diff --git a/CMakeLists.txt b/CMakeLists.txt index c01c54d..aba8597 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,150 +1,150 @@ # # @file CMakeLists.txt # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # @author Trach-Minh Tran # cmake_minimum_required (VERSION 2.8.8) #cmake_policy(SET CMP0053 NEW) project(bsplines_root Fortran C) #enable_language(Fortran) enable_testing() option(BSPLINES_USE_MUMPS "Activate the mumps interface" ON) if(NOT DEFINED BSPLINES_EXAMPLES) option(BSPLINES_EXAMPLES "Compiles the examples" ON) endif() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake") # Assume we are on CRAY if ftn is the Fortran compiler if (${CMAKE_Fortran_COMPILER} MATCHES "ftn$") set(CRAY TRUE) if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Cray") set(cray_suffix cray) elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "PGI") set(cray_suffix pgi) elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel") set(cray_suffix intel) elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU") set(cray_suffix gnu) endif() else() set(CRAY FALSE) endif() if(POLICY CMP0074) cmake_policy(SET CMP0074 NEW) endif() include(CMakeFlagsHandling) # Compiler flags for debug/optimization if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Cray") elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel") set(CMAKE_AR ${XIAR}) add_flags(LANG Fortran TYPE DEBUG -traceback "-check bounds" "-warn unused") add_flags(LANG Fortran TYPE RELEASE -xHost) elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU") add_flags(LANG Fortran TYPE DEBUG -fbounds-check -fbacktrace) endif() if(NOT MUMPS) set(MUMPS $ENV{MUMPS_ROOT}) endif() # Installation root directory if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) set(PREFIX $ENV{PREFIX}) if(PREFIX) set(${CMAKE_INSTALL_PREFIX} ${PREFIX}) else() set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "..." FORCE) endif() message(STATUS "CMAKE_INSTALL_PREFIX is " ${CMAKE_INSTALL_PREFIX}) endif() # Search and load the FUTILS configuration file if(NOT TARGET futils) find_package(futils PATHS ${FUTILS}/lib/cmake REQUIRED) endif() if(BSPLINES_USE_MUMPS) find_package(Mumps REQUIRED) set(HAS_MUMPS ${MUMPS_FOUND}) else() set(HAS_MUMPS FALSE) endif() # Find lapack/blas. Skip it if on CRAY! if(CRAY) set(BSPLINES_USE_PARDISO OFF) endif() include(blas) if(NOT BSPLINES_EXPORT_TARGETS) set(BSPLINES_EXPORT_TARGETS bsplines-targets) endif() find_package(MPI COMPONENTS Fortran REQUIRED) include(GNUInstallDirs) add_subdirectory(pppack) add_subdirectory(pputils2) add_subdirectory(fft) add_subdirectory(src) if(HAS_MUMPS AND BSPLINES_EXAMPLES) add_subdirectory(multigrid) endif() if(BSPLINES_EXAMPLES) add_subdirectory(examples) add_subdirectory(wk) endif() export(TARGETS pppack pputils2 bsplines fft FILE "${CMAKE_BINARY_DIR}/bsplinesLibraryDepends.cmake") export(PACKAGE bsplines) # install configuration files if(BSPLINES_EXPORT_TARGETS MATCHES "bsplines-targets") install(EXPORT bsplines-targets DESTINATION lib/cmake ) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/bsplines-config.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/cmake/bsplines-config.cmake @ONLY ) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/cmake/bsplines-config.cmake DESTINATION lib/cmake ) endif() # enable packaging with CPack include(CPack) diff --git a/cmake/CMakeFlagsHandling.cmake b/cmake/CMakeFlagsHandling.cmake index d9955fe..ccab47d 100644 --- a/cmake/CMakeFlagsHandling.cmake +++ b/cmake/CMakeFlagsHandling.cmake @@ -1,100 +1,100 @@ # # @file CMakeFlagsHandling.cmake # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # if(_CMAKE_FLAGS_HANDLING) return() endif() set(_CMAKE_FLAGS_HANDLING TRUE) #=============================================================================== # Compilation options handling #=============================================================================== macro(_get_flags_message lang type desc) if(${lang} MATCHES "C.." OR ${lang} MATCHES "Fortran") set(${desc} "Flags used by the compiler") elseif(${lang} MATCHES ".*_LINKER") set(${desc} "Flags used by the linker") endif() if(${lang} MATCHES "SHARED_LINKER") set(${desc} "${desc} during the creation of shared libraries") elseif(${lang} MATCHES "MODULE_LINKER") set(${desc} "${desc} during the creation of modules") elseif(${lang} MATCHES "STATIC_LINKER") set(${desc} "${desc} linker during the creation of static libraries") endif() if(${type} MATCHES "ALL") set(${desc} "${desc} during all build types") else() set(${desc} "${desc} during ${type} builds") endif() endmacro() #=============================================================================== function(handle_flags) include(CMakeParseArguments) cmake_parse_arguments(_flags "ADD;REMOVE" "LANG;TYPE" "" ${ARGN} ) if(NOT _flags_LANG) set(_flags_LANG ${FLAGS_HANDLING_DEFAULT_LANGUAGE}) endif() set(_variable CMAKE_${_flags_LANG}_FLAGS) if (_flags_TYPE) set(_variable ${_variable}_${_flags_TYPE}) else() set(_flags_TYPE "ALL") endif() _get_flags_message(${_flags_LANG} ${_flags_TYPE} _desc) foreach(flag ${_flags_UNPARSED_ARGUMENTS}) if (_flags_ADD) string(REPLACE "${flag}" "match" _temp_var "${${_variable}}") if(NOT _temp_var MATCHES "match") set(${_variable} "${flag} ${${_variable}}" CACHE STRING ${_desc} FORCE) endif() elseif(_flags_REMOVE) string(REPLACE "${flag} " "" ${_variable} "${${_variable}}") set(${_variable} "${${_variable}}" CACHE STRING ${_desc} FORCE) endif() endforeach() endfunction() #=============================================================================== function(add_flags) handle_flags(ADD ${ARGN}) endfunction() #=============================================================================== function(remove_flags) handle_flags(REMOVE ${ARGN}) endfunction() #=============================================================================== diff --git a/cmake/CheckFindMumps.c b/cmake/CheckFindMumps.c index 3b006f7..b402816 100644 --- a/cmake/CheckFindMumps.c +++ b/cmake/CheckFindMumps.c @@ -1,105 +1,105 @@ /** * @file CheckFindMumps.c * * @brief * * @copyright * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) * SPC (Swiss Plasma Center) * - * spclibs is free software: you can redistribute it and/or modify it under + * SPClibs is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) * any later version. * - * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . * * @authors * (in alphabetical order) * @author Nicolas Richart */ #include #if !defined(MUMPS_SEQ) # include #endif #define JOB_INIT -1 #define JOB_END -2 #define JOB_COMPLETE 6 #define USE_COMM_WORLD -987654 #define icntl(n) id.icntl[n - 1] int main(int argc, char **argv) { int n = 2; int nz = 2; int irn[2] = {1, 2}; int jcn[2] = {1, 2}; Real a[2]; Real rhs[2]; #if !defined(MUMPS_SEQ) MPI_Init(&argc, &argv); #endif rhs[0] = 1.0; rhs[1]=4.0; a[0] = 1.0; a[1] = 2.0; id.job = JOB_INIT; id.par = 1; id.sym = 0; #if !defined(MUMPS_SEQ) id.comm_fortran = USE_COMM_WORLD; #endif mumps_c(&id); // Default Scaling icntl(8) = 77; // Assembled matrix icntl(5) = 0; /// Default centralized dense second member icntl(20) = 0; icntl(21) = 0; // automatic choice for analysis analysis icntl(28) = 0; // fully distributed icntl(18) = 3; id.n = n; id.nz_loc = nz; id.irn_loc = irn; id.jcn_loc = jcn; id.a_loc = a; id.rhs = rhs; icntl(1) = -1; icntl(2) = -1; icntl(3) = -1; icntl(4) = 0; id.job = JOB_COMPLETE; mumps_c(&id); id.job=JOB_END; mumps_c(&id); printf("Solution is : (%8.2f %8.2f)\n", rhs[0], rhs[1]); return 0; } diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake index 855fa81..7abde8a 100644 --- a/cmake/FindFFTW.cmake +++ b/cmake/FindFFTW.cmake @@ -1,46 +1,46 @@ # # @file FindFFTW.cmake # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # if(CMAKE_C_COMPILER_ID MATCHES "Cray") set(_cray TRUE) endif() # Find FFTW2 if (${_cray}) # set(FFTW_LIBRARY "-ldfftw") set(FFTW_LIBRARY "${FFTW_DIR}/libdfftw.a") else() find_library(FFTW_LIBRARY NAMES fftw PATHS ${FFTW}/lib) find_library(FFTW_LIBRARY NAMES fftw3 PATHS ${FFTW}/lib) find_path(FFTW_INCLUDES fftw_f77.h ${FFTW}/include) find_path(FFTW_INCLUDES fftw_f77.i ${FFTW}/include) find_path(FFTW_INCLUDES fftw.h ${FFTW}/include) find_path(FFTW_INCLUDES fftw3.h ${FFTW}/include) endif() mark_as_advanced(FFTW_LIBRARY FFTW_INCLUDES) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(FFTW DEFAULT_MSG FFTW_LIBRARY) diff --git a/cmake/FindMETIS.cmake b/cmake/FindMETIS.cmake index 879b0a9..f194320 100644 --- a/cmake/FindMETIS.cmake +++ b/cmake/FindMETIS.cmake @@ -1,62 +1,62 @@ # # @file FindMETIS.cmake # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # find_path(METIS_INCLUDE_DIR metis.h PATHS "${METIS_DIR}" ENV METIS_DIR PATH_SUFFIXES include ) find_library(METIS_LIBRARY NAMES metis PATHS "${METIS_DIR}" ENV METIS_DIR PATH_SUFFIXES lib ) mark_as_advanced(METIS_LIBRARY METIS_INCLUDE_DIR) #=============================================================================== include(FindPackageHandleStandardArgs) if(CMAKE_VERSION VERSION_GREATER 2.8.12) if(METIS_INCLUDE_DIR) file(STRINGS ${METIS_INCLUDE_DIR}/metis.h _versions REGEX "^#define\ +METIS_VER_(MAJOR|MINOR|SUBMINOR) .*") foreach(_ver ${_versions}) string(REGEX MATCH "METIS_VER_(MAJOR|MINOR|SUBMINOR) *([0-9.]+)" _tmp "${_ver}") set(_metis_${CMAKE_MATCH_1} ${CMAKE_MATCH_2}) endforeach() set(METIS_VERSION "${_metis_MAJOR}.${_metis_MINOR}" CACHE INTERNAL "") endif() find_package_handle_standard_args(METIS REQUIRED_VARS METIS_LIBRARY METIS_INCLUDE_DIR VERSION_VAR METIS_VERSION) else() find_package_handle_standard_args(METIS DEFAULT_MSG METIS_LIBRARY METIS_INCLUDE_DIR) endif() diff --git a/cmake/FindPETSc.cmake b/cmake/FindPETSc.cmake index 67cfc35..27a5299 100644 --- a/cmake/FindPETSc.cmake +++ b/cmake/FindPETSc.cmake @@ -1,92 +1,92 @@ # # @file FindPETSc.cmake # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # # - Try to find PETSc # PETSC_FOUND - system has PETSc # PETSC_INCLUDE_DIRS - the PETSc include directories # PETSC_LIBRARIES - Link these to use PETSc # PETSC_VERSION - Version string (MAJOR.MINOR.SUBMINOR) if(PETSc_FIND_REQUIRED) find_package(PkgConfig REQUIRED) else() find_package(PkgConfig QUIET) if(NOT PKG_CONFIG_FOUND) return() endif() endif() pkg_search_module(_petsc PETSc) # Some debug code #get_property(_vars DIRECTORY PROPERTY VARIABLES) #foreach(_var ${_vars}) # if ("${_var}" MATCHES "^_petsc") # message("${_var} -> ${${_var}}") # endif() #endforeach() if(_petsc_FOUND AND _petsc_VERSION) set(PETSC_VERSION ${_petsc_VERSION}) endif() if(_petsc_FOUND) set(_petsc_libs) foreach(_lib ${_petsc_LIBRARIES}) string(TOUPPER "${_lib}" _u_lib) find_library(PETSC_LIBRARY_${_u_lib} ${_lib} PATHS ${_petsc_LIBRARY_DIRS}) list(APPEND _petsc_libs ${PETSC_LIBRARY_${_u_lib}}) mark_as_advanced(PETSC_LIBRARY_${_u_lib}) endforeach() if (NOT _petsc_INCLUDE_DIRS) pkg_get_variable(_petsc_INCLUDE_DIRS ${_petsc_MODULE_NAME} includedir) #message(${_petsc_INCLUDE_DIRS}) endif() find_path(PETSC_Fortran_INCLUDE_DIRS "finclude/petsc.h" PATHS ${_petsc_INCLUDE_DIRS}/petsc NO_CMAKE_PATH NO_DEFAULT_PATH ) set(PETSC_LIBRARIES ${_petsc_libs} CACHE FILEPATH "") set(PETSC_INCLUDE_DIRS ${_petsc_INCLUDE_DIRS} CACHE PATH "") add_library(petsc::petsc INTERFACE IMPORTED) set_property(TARGET petsc::petsc PROPERTY INTERFACE_LINK_LIBRARIES ${PETSC_LIBRARIES}) set_property(TARGET petsc::petsc PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${PETSC_INCLUDE_DIRS}) add_library(petsc::petscf INTERFACE IMPORTED) target_link_libraries(petsc::petscf INTERFACE petsc::petsc) set_property(TARGET petsc::petscf PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${PETSC_Fortran_INCLUDE_DIRS}) endif() include (FindPackageHandleStandardArgs) find_package_handle_standard_args(PETSc REQUIRED_VARS PETSC_LIBRARIES PETSC_INCLUDE_DIRS VERSION_VAR PETSC_VERSION) diff --git a/cmake/FindParMETIS.cmake b/cmake/FindParMETIS.cmake index 1f0c0df..f0c5425 100644 --- a/cmake/FindParMETIS.cmake +++ b/cmake/FindParMETIS.cmake @@ -1,62 +1,62 @@ # # @file FindParMETIS.cmake # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # find_path(PARMETIS_INCLUDE_DIR parmetis.h PATHS "${PARMETIS_DIR}" ENV PARMETIS_DIR PATH_SUFFIXES include ) find_library(PARMETIS_LIBRARY NAMES parmetis PATHS "${PARMETIS_DIR}" ENV PARMETIS_DIR PATH_SUFFIXES lib ) mark_as_advanced(PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR) #=============================================================================== include(FindPackageHandleStandardArgs) if(CMAKE_VERSION VERSION_GREATER 2.8.12) if(PARMETIS_INCLUDE_DIR) file(STRINGS ${PARMETIS_INCLUDE_DIR}/parmetis.h _versions REGEX "^#define\ +PARMETIS_(MAJOR|MINOR|SUBMINOR)_VERSION .*") foreach(_ver ${_versions}) string(REGEX MATCH "PARMETIS_(MAJOR|MINOR|SUBMINOR)_VERSION *([0-9.]+)" _tmp "${_ver}") set(_parmetis_${CMAKE_MATCH_1} ${CMAKE_MATCH_2}) endforeach() set(PARMETIS_VERSION "${_parmetis_MAJOR}.${_parmetis_MINOR}" CACHE INTERNAL "") endif() find_package_handle_standard_args(ParMETIS REQUIRED_VARS PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR VERSION_VAR PARMETIS_VERSION) else() find_package_handle_standard_args(ParMETIS DEFAULT_MSG PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR) endif() diff --git a/cmake/blas.cmake b/cmake/blas.cmake index f4ef4d5..00fa014 100644 --- a/cmake/blas.cmake +++ b/cmake/blas.cmake @@ -1,63 +1,63 @@ # # @file blas.cmake # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # set(_default_blas $ENV{BLA_VENDOR}) if(NOT _default_blas) set(_default_blas All) endif() set(BSPLINES_USE_BLAS_VENDOR "${_default_blas}" CACHE STRING "Version of blas to use") mark_as_advanced(BSPLINES_USE_BLAS_VENDOR) set_property(CACHE BSPLINES_USE_BLAS_VENDOR PROPERTY STRINGS All ACML ACML_GPU ACML_MP ATLAS Apple CXML DXML Generic Goto IBMESSL Intel Intel10_32 Intel10_64lp Intel10_64lp_seq NAS OpenBLAS PhiPACK SCSL SGIMATH SunPerf ) if(BSPLINES_USE_PARDISO) set(BSPLINES_USE_BLAS_VENDOR Intel10_64lp CACHE STRING "" INTERNAL) endif() set(ENV{BLA_VENDOR} ${BSPLINES_USE_BLAS_VENDOR}) find_package(BLAS REQUIRED) find_package(LAPACK REQUIRED) diff --git a/cmake/bsplines-config.cmake.in b/cmake/bsplines-config.cmake.in index 31605c5..bf7ad55 100644 --- a/cmake/bsplines-config.cmake.in +++ b/cmake/bsplines-config.cmake.in @@ -1,54 +1,54 @@ # # @file bsplines-config.cmake.in # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Trach-Minh Tran # # - Config file for the BSPLINES package # It defines the target the following variables: # FFLAGS - Fortran compile flags # BSPLINES_MODS - include directories for bsplines modules # BSPLINES_LIBS - bsplines library # BSPLINES_EXTRA_INCS - additional include directories # BSPLINES_EXTRA_LIBS - additional libraries # HAS_PARDISO - BSPLINES built with PARDISO # HAS_MUMPS - BSPLINES built with MUMPS # MPIEXEC - MPI launcher # MPIEXEC_NUMPROC_FLAG - Number of MPI processes flag # Compute paths get_filename_component(_dir "${CMAKE_CURRENT_LIST_FILE}" PATH) get_filename_component(_prefix "${_dir}/../.." ABSOLUTE) # Import the targets include("${_prefix}/lib/cmake/bsplines-targets.cmake") # Report other information set(FFLAGS "@CMAKE_Fortran_FLAGS@") set(BSPLINES_MODS "${_prefix}/include") set(BSPLINES_LIBS fft bsplines pppack pputils2) set(BSPLINES_EXTRA_INCS "@EXTRA_INCS@") set(BSPLINES_EXTRA_LIBS "@EXTRA_LIBS@") set(HAS_PARDISO "@HAS_PARDISO@") set(HAS_MUMPS "@HAS_MUMPS@") set(MPIEXEC "@MPIEXEC@") set(MPIEXEC_NUMPROC_FLAG "@MPIEXEC_NUMPROC_FLAG@") diff --git a/docs/manual/Makefile b/docs/manual/Makefile index 206a081..f77ff93 100644 --- a/docs/manual/Makefile +++ b/docs/manual/Makefile @@ -1,59 +1,59 @@ # # @file bsplines.tex # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Trach-Minh Tran # dvi: bsplines.dvi pdf: bsplines.pdf ps: bsplines.ps .SUFFIXES: .SUFFIXES: .sgml .html .tex .dvi .pdf .ps .txt .tex.dvi: latex $< @while ( grep "Rerun to get cross-references" \ ${<:tex=log} > /dev/null ); do \ latex $<; \ done latex $< .dvi.pdf: dvipdf $< .dvi.ps: dvips $< bsplines.dvi: bsplines.tex driv1.eps fit.eps solvers.dvi: solvers.tex dirichlet_2d.dvi: dirichlet_2d.tex using_bsplines.dvi: using_bsplines.tex clean: rm -f *~ *.dvi *.log *.aux *.out *~ *.toc *.flc *.bbl *.blg distclean: clean rm -f bsplines.ps diff --git a/docs/manual/bsplines.tex b/docs/manual/bsplines.tex index ecc06a9..9b21e88 100644 --- a/docs/manual/bsplines.tex +++ b/docs/manual/bsplines.tex @@ -1,1297 +1,1297 @@ % % @file bsplines.tex % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % @author Stephan Brunner % \documentclass[a4paper]{article} \usepackage{linuxdoc-sgml} %\usepackage{a4wide} \usepackage{graphicx} \usepackage{hyperref} \usepackage{amsmath} %\usepackage{fancybox} %\usepackage[notref]{showkeys} \title{\tt BSPLINES Reference Guide} \author{Trach-Minh Tran, Stephan Brunner, Kurt Appert} \date{v0.3, February 2012} \abstract{Generalized splines of any order on irregular grids for interpolation and solving PDEs with FEM.} \begin{document} \maketitle \tableofcontents \section{Properties of Splines} In this section, several properties of splines will be shown in a more or less rigorous way. The aim is mainly to provide a minimum mathematical background for using the module \texttt{BSPLINES} for the \emph{interpolation} problem as well as the \emph{Finite Element Method} to solve PDEs. More rigorous mathematical proofs can be found in the book by de Boor~\cite{deBoor}. \subsection{Recurrence Relation} We start by defining a finite interval $[a,b]$ subdivided into $N_x$ intervals: \begin{equation} a=t_0 \le t_1 \le \ldots \le t_{N_x}=b. \end{equation} The sequence $t_i, i=0,\ldots,N_x$ can be irregularly spaced. The $j^{th}$ spline of degree $p$ defined on this sequence of grid points (also called \textbf{knots}), is denoted by $\Lambda_{j}^{p}$ and can be constructed using the following recurrence relation. Starting with the \emph{constant} spline \begin{equation} \Lambda_i^0(x) = \begin{cases} 1& \text{if $t_i \le x < t_{i+1}$}, \\ 0& \text{otherwise}. \end{cases} \end{equation} the splines of degree $p>0$ for $t_i \le x < t_{i+1}$ can be constructed from \begin{eqnarray} \Lambda_i^p &=& w_i^p\Lambda_i^{p-1} + (1- w_{i+1}^p)\Lambda_{i+1}^{p-1}, \label{eq:recRel}\\ w_{i}^{p} &=& \frac{x-t_i}{t_{i+p}-t_i}. \end{eqnarray} Thus the values of all \emph{non-zero} splines up to degree $p$ in the interval $[t_i,t_{i+1}]$ fit into the triangular array as shown in Fig.~\ref{fig:allSpl}. Starting from the first column with $\Lambda_i^0=1$, one can compute each of the $p+1$ entries in a subsequent column with Eq.~(\ref{eq:recRel}). Applying this procedure to generate splines on every intervals $[t_i,t_{i+1}], i=0,\ldots,N_{x}-1$ would produce the sequence of $N_{x}+p$ splines of degree $p$: \( \Lambda_{-p}^{p},\ldots, \Lambda_{N_x-1}^{p}\). \subsection{Support and positivity} The linear spline \begin{equation*} \Lambda_i^1 = w_i^1\Lambda_i^0 + (1-w_{i+1}^1)\Lambda_{i+1}^{0} =\frac{x-t_i}{t_{i+1}-t_i}\Lambda_i^0 + \frac{t_{i+2}-x}{t_{i+2}-t_{i+1}}\Lambda_{i+1}^0 \end{equation*} consists of 2 \emph{linear pieces} on $[t_i,t_{i+2}]$, forming a $C^0$ function which breaks at $t_{i+1}$ and vanishes outside of this interval. Likewise, the quadratic spline \begin{eqnarray*} \Lambda_i^2 &=& w_i^2\Lambda_i^1 + (1-w_{i+1}^2)\Lambda_{i+1}^{1} \\ &=& w^2_i w^1_i \Lambda_i^0 + [w^2_i(1-w^1_{i+1})+w^1_{i+1}(1-w^2_{i+1})] \Lambda_{i+1}^0 + (1-w^2_{i+1})(1-w^1_{i+2})\Lambda_{i+2}^0 \end{eqnarray*} consists of 3 \emph{parabolic pieces} on $[t_i,t_{i+3}]$ that join to form a $C^1$ function which breaks at $t_{i+1}$ and $t_{i+2}$ and vanishes outside of this interval. In general the spline of degree $p$ can be expressed as: \begin{equation} \Lambda^p_i = \sum_{r=0}^{p}\, b_{i+r}^p\Lambda_{i+r}^0 \end{equation} where $b_{i+r}^p$ is a sum of products of $p$ linear functions, resulting in $p+1$ polynomials of degree $p$, joining to form a $C^{p-1}$ function which breaks at $t_i,\ldots,t_{i+p+1}$ and vanishes outside of the \emph{support} $[t_i,t_{i+p+1}]$. From its construction, $\Lambda^p_i$ is clearly \emph{strictly positive} on the interior of $[t_i,t_{i+p+1}]$. \begin{equation} \Lambda^p_i (x) > 0, \qquad t_i1$: \[ \sum_{j=i-p+1}^{i}\,\Lambda_j^{p-1} = 1, \] or that the sum of the next to last column in Fig.~\ref{fig:allSpl} is $1$, we have, using the recurrence relation (\ref{eq:recRel}) \begin{eqnarray*} \sum_{j=i-p}^{i}\,\Lambda_j^{p} &=& \sum_{j=i-p}^{i}\, \left( w^p_j\Lambda_j^{p-1} +(1-w^p_{j+1})\Lambda_{j+1}^{p-1} \right) \\ &=& \sum_{j=i-p+1}^{i}\,w^p_j\Lambda_j^{p-1} + \sum_{j=i-p+1}^{i}\,(1-w^p_j)\Lambda_j^{p-1} \\ &=& \sum_{j=i-p+1}^{i}\,\Lambda_j^{p-1} = 1. \end{eqnarray*} \subsection{Derivative of Splines} The derivative of the splines of degree $p$ can be expressed in terms of the splines of degree $p-1$ by the following relation: \begin{equation} \label{derivative of splines} \frac{d}{dx}\Lambda_i^p = p\left( \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i} - \frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} \right). \end{equation} A straightforward consequence of this relation is that the splines of order $p$ are $C^{p-1}$ continuous. The demonstration of Eq.(\ref{derivative of splines}) is done by induction. One starts with the case $p=1$: \begin{eqnarray*} \frac{d}{dx}\Lambda_i^1 & = & \frac{d}{dx}\left[ w_i^1\Lambda_i^0 + (1-w^1_{i+1})\Lambda_{i+1}^0 \right] \\ & = & \frac{d\,w_i^1}{dx}\Lambda_i^0 + \frac{d\,(1-w^1_{i+1})}{dx}\Lambda_{i+1}^0 + w_i^1\frac{d\,\Lambda_i^0}{dx} + (1-w^1_{i+1})\frac{d\,\Lambda_{i+1}^0}{dx} \\ & = & \frac{1}{t_{i+1}-t_i}\Lambda_i^0 - \frac{1}{t_{i+2}-t_{i+1}}\Lambda_{i+1}^0, \end{eqnarray*} having used Eq.(\ref{eq:recRel}) and $d\,\Lambda_i^0/dx = 0$. One then assumes Eq.(\ref{derivative of splines}) true for $p-1$ and demonstrates that it remains true for $p$. This is done as follows: \begin{eqnarray} \label{demo deriv. 1} \frac{d}{dx}\Lambda_i^p & = & \frac{d}{dx}\left[ w_i^p\Lambda_i^{p-1} + (1-w^p_{i+1})\Lambda_{i+1}^{p-1} \right] \\ \nonumber & = & \frac{d\,w_i^p}{dx}\Lambda_i^{p-1} + \frac{d\,(1-w^p_{i+1})}{dx}\Lambda_{i+1}^{p-1} + w_i^p\frac{d\,\Lambda_i^{p-1}}{dx} + (1-w^p_{i+1})\frac{d\,\Lambda_{i+1}^{p-1}}{dx} \\ \nonumber & = & \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i} -\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} \\ \label{demo deriv. 2} && + w_i^p (p-1)\left( \frac{\Lambda_i^{p-2}}{t_{i+p-1}-t_i} - \frac{\Lambda_{i+1}^{p-2}}{t_{i+p}-t_{i+1}} \right) + (1-w_{i+1}^p) (p-1)\left( \frac{\Lambda_{i+1}^{p-2}}{t_{i+p}-t_{i+1}} - \frac{\Lambda_{i+2}^{p-2}}{t_{i+p+1}-t_{i+2}} \right) \end{eqnarray} having used Eq.(\ref{eq:recRel}) to obtain (\ref{demo deriv. 1}), and the induction hypothesis to obtain Eq.(\ref{demo deriv. 2}). Now, rearranging the last two terms of Eq.(\ref{demo deriv. 2}), one easily obtains: \begin{eqnarray} \nonumber \frac{d}{dx}\Lambda_i^p & = & \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i} -\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} \\ \nonumber && +(p-1)\left[ \frac{1}{t_{i+p}-t_i} \left( \frac{x-t_i}{t_{i+p-1}-t_i}\Lambda_i^{p-2} +\frac{t_{i+p}-x}{t_{i+p}-t_{i+1}}\Lambda_{i+1}^{p-2} \right) \right. \\ \nonumber && \hspace{2.cm} \left. -\frac{1}{t_{i+p+1}-t_{i+1}} \left( \frac{x-t_{i+1}}{t_{i+p}-t_{i+1}}\Lambda_{i+1}^{p-2} +\frac{t_{i+p+1}-x}{t_{i+p+1}-t_{i+2}}\Lambda_{i+2}^{p-2} \right) \right] \\ \label{demo deriv. 3} & = & \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i} -\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} + (p-1)\left( \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i} -\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} \right) \\ \nonumber & = & p\left( \frac{\Lambda_i^{p-1} }{t_{i+p}-t_i} -\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} \right) \end{eqnarray} having again used Eq.(\ref{eq:recRel}) to obtain (\ref{demo deriv. 3}). This completes the demonstration of relation (\ref{derivative of splines}). \subsection{Integrals of Splines} With the proper normalization all splines of all degrees have unitary surface: \begin{equation} \label{integrals of splines} \frac{p+1}{t_{i+p+1}-t_i}\int \Lambda_i^p(x)dx = 1. \end{equation} This relation holds trivially for $p=0$ and $p=1$. A recursive proof of the general statement (\ref{integrals of splines}) starts assuming \begin{equation} \label{previousInt} \frac{p}{t_{i+p}-t_i}\int \Lambda_i^{p-1}(x)dx = 1 \end{equation} to be true. Then using Eq.(\ref{derivative of splines}) multiplied by $x$ and integrating one obtains: \begin{equation} \nonumber \int x \frac{d}{dx}\Lambda_i^p dx = -\int \Lambda_i^p dx = p\int\left( \frac{x\Lambda_i^{p-1}}{t_{i+p}-t_i} -\frac{x\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} \right)dx. \end{equation} Completing the fractions in the big parentheses in view of using Eq.(\ref{eq:recRel}) one has \begin{eqnarray} \nonumber \int \Lambda_i^p dx & = & -\, p\int \frac{x-t_i}{t_{i+p}-t_i}\Lambda_i^{p-1} dx - p\int \frac{t_i}{t_{i+p}-t_i}\Lambda_i^{p-1} dx \\ \nonumber & & -\, p\int \frac{t_{i+p+1}-x}{t_{i+p+1}-t_{i+1}}\Lambda_{i+1}^{p-1} dx + p\int \frac{t_{i+p+1}}{t_{i+p+1}-t_{i+1}}\Lambda_{i+1}^{p-1} dx, \end{eqnarray} where the first and the third terms on the right correspond to $-p\int\Lambda_i^p dx $, Eq.(\ref{eq:recRel}), and can be combined with the left side to yield \begin{equation} \label{proofIntegrals} (1+p)\int\Lambda_i^p dx = t_{i+p+1}-t_i, \end{equation} where relation (\ref{previousInt}) has been used for the rest on the right hand side. This concludes the proof of Eq.(\ref{integrals of splines}). \subsection{Boundary Conditions} Applying the recurrence relation to generate \emph{all} the splines on the finite domain $[t_0,t_{N_x}]$ yields the $N_x+p$ splines of degree $p$: \begin{equation} \Lambda^p_{-p},\Lambda^p_{-p+1},\ldots, \Lambda^p_{N_x-1}. \label{eq:splSeq} \end{equation} Note that \emph{additional} knots beyond both ends of $[t_0,t_{N_x}]$ have to be defined to generate all these splines. \subsubsection{Periodic splines} The extra knots are simply defined through periodicity.: \begin{eqnarray} t_{-\nu} &=& t_{N_x-\nu}-(b-a), \\ t_{N_{x}+\nu} &=& t_{\nu}+(b-a), \qquad \nu=0,\ldots,p. \end{eqnarray} The $p+1$ leftmost splines in (\ref{eq:splSeq}) are thus identical to the rightmost splines: \begin{equation} \Lambda^p_{-\nu} = \Lambda^p_{N_x-\nu}, \qquad \nu=0,\ldots,p. \end{equation} \subsubsection{Non-periodic splines} The choice made in \texttt{BSPLINES} is simply: \begin{equation} t_{-p} = \cdots = t_{0} = a, \qquad b=t_{N_x}=\cdots=t_{N_x+p}. \end{equation} Thus in the first interval $[t_0,t_1]$, the first spline $\Lambda^p_{-p}$ is constructed (refer to the first entry on each of the column of Fig.~\ref{fig:allSpl}, with $i=0$) as follow: \begin{eqnarray*} \Lambda^1_{-1} &=& (1-w^1_{0})\Lambda^0_{0} = \frac{t_{1}-x}{t_{1}-t_{0}}\Lambda^0_{0}\\ \Lambda^2_{-2} &=& (1-w^2_{-1})\Lambda^1_{-1} =\frac{t_{1}-x}{t_{1}-t_{-1}}\Lambda^1_{-1} =\left(\frac{t_{1}-x}{t_{1}-t_{0}}\right)^{2}\Lambda^0_{0}\\ \cdot & & \qquad\cdot\qquad\qquad\qquad\qquad \cdot \\ \Lambda^p_{-p} &=& (1-w^p_{-p+1})\Lambda^p_{-p+1} =\frac{t_{1}-x}{t_{1}-t_{-p+1}}\Lambda^{p-1}_{-p+1} =\left(\frac{t_{1}-x}{t_{1}-t_{0}}\right)^{p}\Lambda^0_{0} \end{eqnarray*} In the same manner, the generation of the \emph{last} spline $\Lambda^p_{N_x-1}$ (last entry on each of the column of Fig.~\ref{fig:allSpl}, with $i=N_{x}-1$) yields: \begin{eqnarray*} \Lambda^1_{N_x-1} &=& w^1_{N_x-1}\Lambda^0_{N_x-1} = \frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\Lambda^0_{N_x-1} \\ \Lambda^2_{N_x-1} &=& w^2_{N_x-1}\Lambda^1_{N_x-1} = \frac{x-t_{N_x-1}}{t_{N_x+1}-t_{N_x-1}}\Lambda^1_{N_x-1} = \left(\frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\right)^{2}\Lambda^0_{N_x-1}\\ \cdot & & \qquad\cdot\qquad\qquad\qquad\qquad \cdot \\ \Lambda^p_{N_x-1} &=& w^p_{N_x-1}\Lambda^p_{N_x-1} = \frac{x-t_{N_x-1}}{t_{N_x+p-1}-t_{N_x-1}}\Lambda^1_{N_x-1} = \left(\frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\right)^{p}\Lambda^0_{N_x-1} \end{eqnarray*} Since the sum of all splines is 1 and using the positivity of splines, all the non-periodic splines, except the first (last) spline should vanish at $x=a$ ($x=b$): \begin{equation} \Lambda^p_{r}(a) = \delta_{r,-p}, \qquad\Lambda^p_{r}(b) = \delta_{r,N_x-1} \end{equation} The spline derivatives at the boundaries $x=a$ and $x=b$ can be derived using Eq.(\ref{derivative of splines}) as follow. At $x=a$ (interval $[t_0,t_1]$), by noting that only the spline $\Lambda^{p-1}_{-p+1}$ is non-zero at $x=a$ (see next to last column of Fig.{\ref{fig:allSpl}, with $i=0$), it is easy to see that there are only 2 non-zero derivatives given by \begin{equation} \begin{split} \frac{d}{dx}\Lambda^p_{-p}(a) =& -\frac{p\,\Lambda^{p-1}_{-p+1}(a)}{t_1-t_{-p}} = -\frac{p}{t_1-t_0}, \\ \frac{d}{dx}\Lambda^p_{-p+1}(a) =& \frac{p\,\Lambda^{p-1}_{-p+1}(a)}{t_1-t_{-p+1}} = \frac{p}{t_1-t_0}, \end{split} \end{equation} where we have used $t_0=t_{-1}=\ldots=t_{-p}=a$. Likewise, the 2 non-zero derivatives of spline at the other boundary $x=b$ are \begin{equation} \begin{split} \frac{d}{dx}\Lambda^p_{N_x-2}(b) =& -\frac{p\,\Lambda^{p-1}_{N_x-1}(b)}{t_{N_x+p-1}-t_{N_x-1}} = -\frac{p}{t_{N_x}-t_{N_x-1}}, \\ \frac{d}{dx}\Lambda^p_{N_x-1}(b) =& \frac{p\,\Lambda^{p-1}_{N_x-1}(b)}{t_{N_x+p-1}-t_{N_x-1}} = \frac{p}{t_{N_x}-t_{N_x-1}}, \end{split} \end{equation} where we have used $t_{N_x}=t_{N_x+1}=\ldots=t_{N_x+p}=b$. \subsubsection{Spline expansion} In summary, the approximation of a function $f$ defined in the interval $[a,b]$ using a basis (\textbf{Is this obvious?) }of splines of degree $p$ associated with the sequence of knots $t_i, i=-p,\ldots,N_{x}+p$ can be written as \begin{equation} f(x) = \sum_{j=-p}^{N_x-1}\, c_j\Lambda^p_j(x), \qquad \begin{array}{l} \mbox{support of $\Lambda^p_j$:}\quad [t_{j},t_{j+p+1}],\\ t_i \leq x < t_{i+1} \Longrightarrow \Lambda^p_{i-p}(x),\ldots, \Lambda^p_{i}(x) \ge 0. \end{array} \end{equation} Note that the \emph{last} spline in the interval $[t_i,t_{i+1}]$, which can be written as \[ \Lambda^p_{i}(x)=w^p_i(x) \Lambda^{p-1}_{i}(x)=\ldots=w^p_i(x) w^{p-1}_i(x) \ldots w^{1}_i(x)\Lambda^{0}_{i}(x) \] \emph{vanishes at the knot} $x=t_i$. Thus at any position $x$, the sum involves $p+1$ terms except at the knots $t_i$ where there are only $p$ terms. It is sometimes more convenient to renumber the spline index $j$ so that it starts from $0$. With this new numbering, the spline expansion becomes \begin{equation} f(x) = \sum_{j=0}^{N_x+p-1}\, c_j\Lambda^p_j(x), \qquad \begin{array}{l} \mbox{support of $\Lambda^p_j$:}\quad [t_{j-p},t_{j+1}], \\ t_i \leq x < t_{i+1} \Longrightarrow \Lambda^p_{i}(x),\ldots, \Lambda^p_{i+p}(x) \ge 0. \end{array} \label{eq:splExp} \end{equation} In the \emph{periodic} case, there are $N_{x}$ \emph{independent} spline coefficients since \begin{equation} c_{N_{x}+\nu} = c_{\nu}, \qquad \nu=0,\ldots,p-1. \label{eq:perSp} \end{equation} In the \emph{non-periodic} case, the first and the last spline coefficients $c_{0},\,c_{N_x+p-1}$ are respectively the values of $f$ at the end points $a$ and $b$. The basis functions for both non-periodic and periodic cubic splines ($p=3$) are shown in Fig~.\ref{fig:cubic_splines} where this new numbering is used. \begin{figure}[htbp] \centering \includegraphics[angle=0,width=\hsize]{driv1} \caption{The basis of non-periodic and periodic cubic splines. The periodic splines $\Lambda_{10}$, $\Lambda_{11}$, $\Lambda_{12}$ denote the same splines as $\Lambda_{0}$, $\Lambda_{1}$, $\Lambda_{2}$ respectively. } \label{fig:cubic_splines} \end{figure} \subsection{Spline Initialization with \texttt{SET\_SPLINE}} The initialization of a spline is performed by calling the routine \texttt{SET\_SPLINE}, passing the desired degree $p$ and the sequence of grid points (or knots) $t_j, j=0,\ldots,N_x$. If Gauss points on each of the intervals $[t_j,t_{j+1}]$ are needed, a non-zero value of \texttt{NGAUSS} should be specified. The other input argument is the \emph{optional} \texttt{LOGICAL} argument \texttt{PERIOD} to define the periodicity of the splines. By default it is \texttt{.FALSE.}. The routine returns the 1d spline \texttt{SP} which is of type \texttt{TYPE(spline1d)}: \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE set_spline(p, ngauss, grid, sp, period) INTEGER, INTENT(in) :: p, ngauss DOUBLE PRECISION, INTENT(in) :: grid(:) LOGICAL, OPTIONAL, INTENT(in) :: period TYPE(spline1d), INTENT(out) :: sp LOGICAL, OPTIONAL, INTENT(in) :: period \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} Besides the main characteristics of the spline (degree $p$ of splines, number of grid intervals, dimension of splines $N_x+p$, etc.) the following quantities will be determined and stored in \texttt{SP}: \begin{itemize} \item values and all the $p$ derivatives of the $p+1$ non-vanishing splines on each knots $t_j$. These quantities will be used to speed up computation of the spline expansion (\ref{eq:splExp}). \item integrals of splines \(I_i=\int\Lambda_i(x)\,dx\). \end{itemize} For a 2d spline \begin{equation} \Lambda^{p+q}_{ij}(x,y) = \Lambda^p_i(x)\Lambda^q_j(y), \end{equation} on a 2d structured mesh defined by the grid points \texttt{grid1(0:N1), grid2(0:N2)}, the same call as in the 1d case can be used, except that the scalars \texttt{p, ngauss, period} become 2 element arrays and the output \texttt{SP} is now of type \texttt{TYPE(spline2d)}: \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} INTEGER :: p(2), ngauss(2) LOGICAL, OPTIONAL :: period(2) DOUBLE PRECISION, dimension(:) :: grid1, grid2 TYPE(spline2d) :: sp2d ... CALL set_spline(p, ngauss, grid1, grid2, sp2d, period) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} The derived type \texttt{spline2d} is a \emph{wrapper} of 2 \texttt{spline1d} objects which can be accessed through \texttt{sp2d\%sp1} and \texttt{sp2d\%sp2}. Once \texttt{SET\_SPLINE} is called, the routine \texttt{GET\_DIM} can be called to inquire the spline's essential characteristics such as dimension, number of intervals and degree, for both 1d and 2d splines: \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE get_dim(sp, dim, nx, nidbas) TYPE(spline1d), INTENT(in) :: sp INTEGER, INTENT(out) :: dim INTEGER, OPTIONAL, INTENT(out) :: nx, nidbas \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} Integral of function $\int^b_a\,f(x)dx$ is computed from its spline \texttt{sp} and splines coefficients in: \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} DOUBLE PRECISION FUNCTION fintg(sp, c) TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: c(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} For a 2d functions, the same function should be called with a 2d spline \texttt{sp} and 2d array $c$. Finally \texttt{DESTROY\_SP(sp)} should be called when a spline \texttt{sp} is not needed anymore to clean up memory space. \subsection{Generating Splines with \texttt{DEF\_BASFUN}} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE def_basfun(xp, sp, fun, left) DOUBLE PRECISION, INTENT(in) :: xp TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(out) :: fun(:,:) INTEGER, OPTIONAL, INTENT(out) :: left \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} This routine computes, for a given point $\mbox{xp}\in [t_0,t_{N_x}]$, the value and optionally the $m$ derivatives of the $p+1$ splines \texttt{sp} which were previously defined and returns them in \texttt{fun(1:p+1,1:m+1)} with $m\leq p$. The maximum number of computed derivatives $m$ is determined by the size of the second dimension of the array \texttt{fun}. The subroutine will return the optional integer \texttt{left} defined such that: \[ t_{\mbox{left}} \leq xp < t_{\mbox{left+1}}, \qquad 0\leq \mbox{left} \leq N_{x.-1}. \] \subsection{Example 1: Values and derivatives of all splines} In this example, we first initialize a cubic spline with the knot sequence $t_0,\ldots,t_{N_x}$ with \texttt{SET\_SPLINE} and then call \texttt{DEF\_BASFUN} to compute its values, first and second derivatives on the mesh points \texttt{xp(1:npts)}. \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} USE BSPLINES INTEGER, PARAMETER :: nx=10, npts=100 DOUBLE PRECISION :: t(0:nx), xp(npts) DOUBLE PRECISION, ALLOCATABLE :: fxp0(:,:), fxp1(:,:), fxp2(:,:) DOUBLE PRECISION :: fun(4,3) ! 4 cubic splines at a given xp ! plus first and second derivatives. INTEGER :: i, dim, left TYPE(spline1d) :: sp ! ! Define t(0:nx), xp(npts) ! CALL set_spline(3, 0, t, sp, period=.FALSE.) CALL get_dim(sp, dim) ALLOCATE(fxp0(npts,0:dim-1), fxp1(npts,0:dim-1), fxp2(npts,0:dim-1) fxp0 = 0.0 fxp1 = 0.0 fxp2 = 0.0 DO i=1,npts CALL def_basfun(xp(i), sp, fun, left=left) fxp0(i, left:left+3) = fun(1:4, 1) ! Value fxp1(i, left:left+3) = fun(1:4, 2) ! 1st derivative fxp2(i, left:left+3) = fun(1:4, 3) ! 2nd derivative END DO DEALLOCATE(fxp0, fxp1, fxp2) CALL destroy_sp(sp) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} This code fragment will store \texttt{dim=nx+3=13} splines and theirs first 2 derivatives in \texttt{fxp0}, \texttt{fxp1} and \texttt{fxp2}. Change the \texttt{period} to \texttt{.TRUE.} to obtain \emph{periodic} splines. \section{Spline Interpolation} Given the interval $[a,b]$ discretized into $\{x_k,\,k=0,\ldots,N_g\}$ with $x_0=a$ and $x_{N_g}=b$, the problem of interpolating $f(x), x\in [a,b]$ with splines of degree $p$ is to solve the following equations for the spline coefficients $c_i$: \begin{equation} \sum_{i=0}^{N_x+p-1}\,c_i\Lambda^p_{i}(x_k) = f(x_k),\quad k=0,\ldots,N_g. \label{eq:intEq} \end{equation} The sequence of knots $t_0,\ldots,t_{N_x}$ defines completely the splines $\Lambda^p_i$ and its choice will be described in the following section. \subsection{Choice of knots} If Eqs.~(\ref{eq:intEq}) are the only conditions for our interpolation problem, the number of equations should match the number of unknowns $c_i$. The number of knot intervals $N_x$ hence has to verify \begin{equation} N_x = N_g-p+1.\label{eq:knotNum} \end{equation} For the \emph{periodic} case, taking into account the $p$ periodic spline conditions (\ref{eq:perSp}) on $c_i$ and $f(a)=f(b)$, this condition reduces to: \begin{equation} N_x = N_g. \end{equation} For \emph{odd} values of the spline degree $p$, the knots $t_i$ could be placed on the \emph{interpolation sites} $x_k$ while when $p$ is even, $t_i$ should not be on $x_k$ to avoid a \emph{badly conditioned} linear system when solving Eq.~(\ref{eq:intEq}). This leads to the following choice for $t_i$ in \texttt{BSPLINES}: \subsubsection{Periodic splines} The number of knots $N_x+1$ is \emph{equal} to the number of interpolation points $N_g+1$ with \begin{equation} t_i = \begin{cases} x_i & \text{$p$ odd} \\ (x_{i-1}+x_i)/2 & \text{$p$ even} \end{cases} ,\qquad i=0,\ldots,N_x \end{equation} \subsubsection{Non-Periodic splines} In order to satisfy the equality (\ref{eq:knotNum}), first, the 2 end points are retained as knots: \begin{equation} t_0=x_0, \qquad t_{N_x} = x_{N_g}. \end{equation} For even $p$, the first $p/2$ interpolation intervals are \emph{skipped}: \begin{equation} t_i = (x_{i+p/2-1} + x_{i+p/2})/2, \qquad i=1,\ldots,N_x-1, \label{eq:evenKnots} \end{equation} while for odd $p$, $(p-1)/2$ interpolation points are \emph{skipped}: \begin{equation} t_i = x_{i+(p-1)/2 }, \qquad i=1,\ldots,N_x-1. \label{eq:oddKnots} \end{equation} Instead of skipping grid points, an alternative would be to supplement the system of equations (\ref{eq:intEq}) with conditions on derivatives of $f(x)$ at one or both ends of $[a,b]$. This type of boundary conditions is not implemented in the present version of the \texttt{BSPLINES} module. \subsection{The collocation matrix} The \emph{collocation matrix} $\Lambda^p_i(x_k)$ of the interpolation problem (\ref{eq:intEq}) is a square matrix. Each row has at most $p+1$ non-zero terms. Let us consider separately the non-periodic and the periodic cases. \subsubsection{The non-periodic case} \paragraph{Even spline degree} From (\ref{eq:evenKnots}), there are $p/2+1$ interpolation points \(x_{0}, \ldots, x_{p/2}\) in the first knot interval $[t_0,t_1)$. Since there are at most $p+1$ non-zero splines for any points in each interval (except for $x_0$ where $\Lambda_i(x_0)=\delta_{i,0}$, the collocation matrix starts as: \begin{equation} \left(\begin{array}{llllll} \Lambda_0(x_{0}) & 0 &\cdots & \cdots & \cdots &\cdots \\ \Lambda_0(x_{1}) &\Lambda_1(x_{1}) &\cdots &\Lambda_p(x_{1}) & 0 &\cdots \\ \vdots &\vdots &\cdots &\vdots & 0 &\cdots \\ \Lambda_0(x_{p/2})&\Lambda_1(x_{p/2}) &\cdots &\Lambda_p(x_{p/2}) & 0 &\cdots \\ 0 &\Lambda_1(x_{p/2+1})&\cdots &\Lambda_p(x_{p/2+1})& \Lambda_{p+1}(x_{p/2+1})&0 \\ 0 &\ddots &\ddots &\ddots & \ddots &\ddots \end{array}\right) \end{equation} The number of \emph{upper-diagonals} (non including the diagonal) is obviously determined by the second row of the matrix above, which yields $p-1$. Since the knot placement is identical for both ends of the interpolation mesh, the matrix $\Lambda_i(x_k)$ is \emph{banded} with half-bandwidths \begin{equation} kl=ku=p-1 \end{equation} \paragraph{Odd spline degree} Applying the same procedure, it is straightforward to show for $p$ odd and from (\ref{eq:oddKnots}), that $x_0,\ldots.x_{(p-1)/2}$ are located in the first knot interval $[t_0,t_1)$ and that the matrix has again the same half-bandwidths as in the even $p$ case. The resulting interpolation problem can then be solved with the usual \emph{banded matrix factorization} followed by a \emph{back-solve} phase. \subsubsection{The periodic case} Let consider the matrix for $p=3$ and $N_x=10$ (see lower figure of Fig.~(\ref{fig:cubic_splines}): \begin{equation} \left(\begin{array}{llllll} \Lambda_0(x_{0}) & \Lambda_1(x_{0}) & \Lambda_2(x_{0}) & 0 & \cdots & \\ 0 & \Lambda_1(x_{1}) & \Lambda_2(x_{1}) & \Lambda_3(x_{1}) & 0 &\cdots \\ \vdots & \ddots & \ddots & \ddots & \ddots &\ddots \\ 0 & \cdots & 0 & \Lambda_7(x_{7}) & \Lambda_8(x_{7}) & \Lambda_9(x_{7}) \\ \Lambda_0(x_{8}) & 0 & 0 & \cdots &\Lambda_8(x_{8}) & \Lambda_9(x_{8}) \\ \Lambda_0(x_{9}) &\Lambda_1(x_{9}) & 0 & 0 & \cdots & \Lambda_9(x_{9}) \end{array}\right) \label{eq:perMat} \end{equation} The matrix is ``almost triangular'' (except for the last 2 rows) and is not \emph{diagonally dominant}! A more satisfactory (and symmetric in shape) matrix is however obtained by simply renumbering the splines such that the sequence starts with $-\lfloor p/2 \rfloor$ instead of $0$. This renumbered splines are shown in Fig.~\ref{fig:fitSpl} for the cubic and quadratic periodic splines. With this renumbering, the matrix (\ref{eq:perMat}) has a more symmetric shape and is diagonally dominant: \begin{equation} \left(\begin{array}{lllll} \Lambda_0(x_{0}) & \Lambda_1(x_{0}) & 0 & \cdots & \Lambda_9(x_{0}) \\ \Lambda_0(x_{1}) & \Lambda_1(x_{1}) & \Lambda_2(x_{1}) & 0 &\cdots \\ \vdots & \ddots & \ddots & \ddots &\ddots \\ 0 & \cdots & \Lambda_7(x_{8}) & \Lambda_8(x_{8}) & \Lambda_9(x_{8}) \\ \Lambda_0(x_{9}) & 0 & 0 & \Lambda_8(x_{9}) & \Lambda_9(x_{9}) \end{array}\right) \label{eq:perMatnew} \end{equation} In general, for arbitrary $p$ (even and odd values), the collocation matrix $A=\Lambda_j(x_i)$ can be written as \begin{equation} A = B + UV^T \end{equation} where $B$ is a banded matrix with half-bandwidths $kl=ku=b=\lfloor p/2\rfloor$ and rank $N_x$. $U$ and $V$ are $N_x\times 2b$ sparse matrices: \begin{equation} U = \left( \begin{matrix} I & 0 \\ 0 & 0 \\ 0 & I \end{matrix}\right), \qquad V = \left( \begin{matrix} 0 & D^T \\ 0 & 0 \\ C^T & 0 \end{matrix}\right), \qquad V^T = \left( \begin{matrix} 0 & 0 & C \\ D & 0 & 0 \end{matrix}\right), \qquad \end{equation} where $C$, $D$ are the $b\times b$ \emph{off-band} sub-matrices and $I$, the identity matrix. In the cubic spline example considered above, the \emph{off-band} matrices are simply $1\times 1$ matrices with $C=\Lambda_9(x_0)$ and $D=\Lambda_0(x_9)$. The inverse of $A$ can be deduced from the \emph{Sherman-Morrison-Woodbury formula} \cite{Golub}: \begin{eqnarray*} A^{-1} &=& B^{-1} - B^{-1}U(1+V^{T}B^{-1}U)^{-1}V^{T}B^{-1} \\ &=& B^{-1} - ZW^{T}B^{-1}, \end{eqnarray*} where \begin{eqnarray*} Z &=& B^{-1}U, \\ H &=& 1+V^{T}B^{-1}U \\ W^T &=& H^{-1}V^{T}. \end{eqnarray*} The solution of the interpolation problem $Ax=f$ can then be reduced to a \emph{factorization} and a \emph{back-solve} phase: \begin{enumerate} \item Factorization \begin{enumerate} \item Factor: \( B \longleftarrow L_BU_B \) \item Solve: \( (L_BU_B)Z = U, \quad U\longleftarrow Z \) \item Compute: \( H = 1+V^{T}Z \) \item Factor: \( H=L_HU_H \) \item Solve: \( (L_HU_H)W^{T} = V^{T}, \quad V^{T}\longleftarrow W^{T} \) \end{enumerate} \item Back-solve \begin{enumerate} \item Solve: \( (L_BU_B)y = f \) \item Compute: \( t = W^{T}y \) \item Compute: \( x = y - Zt \) \end{enumerate} \end{enumerate} At the end of the factorization, only the (updated) matrices $B$, $U$ and $V^{T}$, required in the back-solve phase, need to be saved. Note that we avoid to store the product $ZW^T$ because it is a \emph{big} $N_x\times N_x$ matrix. After the \emph{back-solve} step, the solution $x$ is \emph{shifted back} (by $\lfloor p/2\rfloor$) and the appropriate periodicity condition is applied to obtain the spline coefficients $c_j,\, j=0,\ldots,N_x+p-1$, as defined in (\ref{eq:splExp}). \begin{figure}[htbp] \centering \includegraphics[angle=0,width=\hsize]{fit} \caption{The periodic cubic and quadratic splines used for interpolation. The spline knots are indicated by \emph{blue full circles} and the interpolation points, by \emph{dashed vertical lines} } \label{fig:fitSpl} \end{figure} \subsection{\texttt{PP} representation} The computation of $f(x)$ using directly the spline expansion Eq.~(\ref{eq:splExp}) can be costly, because of the evaluation of the splines $\Lambda^p_j(x)$, especially when interpolating on large number of points. Expanding $f(x)$, using truncated Taylor series in each interval $[t_\mu,t_{\mu+1}]$, we obtain the following \emph{Piecewise Polynomial Function} representation (or \texttt{ppform}) of $f(x)$: \begin{equation} f(x) = \sum^p_{k=0}\, \Pi_{k\mu}(x-t_\mu)^k, \quad t_\mu\leq x. % % @authors % (in alphabetical order) % @author Trach-Minh Tran % \documentclass[a4paper]{article} \usepackage{amsmath} \title{\tt Some Notes on Boundary Conditions} \author{Trach-Minh Tran} \date{March 2012} \begin{document} \maketitle \section{Neumann BC as an {essential} BC} The original equation: \begin{equation} \mathbf{A \cdot u} = \mathbf{b} \end{equation} with the Neumann BC (1D case): \begin{equation} \alpha u_1 + \beta u_2 = c. \end{equation} From Eq.(20) of \cite{BSPLINES}: \begin{equation} \beta = -\alpha =\frac{p}{\Delta_1} \end{equation} where $p$ is the degree of spline and $\Delta_1$ is the lenght of the first insterval. Transformation $(u_1, \ldots, u_n) \Rightarrow (\hat u_1, \ldots, \hat u_n)$ defined by \begin{equation} \begin{array}{ccc} \alpha u_1 + \beta u_2 = \hat u_1 & & u_1 = \frac{1}{\alpha}\hat u_1 - \frac{\beta}{\alpha}\hat u_2 \\ u_2 = \hat u_2 & & u_2 = \hat u_2 \\ \vdots & \Longrightarrow & \vdots \\ u_N = \hat u_N & & u_N = \hat u_N. \end{array} \end{equation} The original Neumann BC becomes now a \emph{inhomogeneous Dirichlet} BC on $\mathbf{\hat u}$: \begin{equation} \hat u_1 = c. \end{equation} The transformed linear system can be written as: \begin{equation} \mathbf{(U^T\cdot A \cdot U)\cdot \hat u} = \mathbf{U^T\cdot b}, \end{equation} where $\mathbf{U}$ is given by \begin{equation} \mathbf{U} = \left(\begin{matrix} \frac{1}{\alpha} & -\frac{\beta}{\alpha} & \dots & 0 \\ 0 & 1 & \dots & 0 \\ & & \ddots& \vdots \\ 0 & 0 & \dots & 1 \end{matrix}\right) \end{equation} Thus, all the symmetry, hermiticity or positivity properties of the original matrix are preserved with this matrix transformation! \section{Neumann BC as a \emph{natural} BC} Multiplying the 1D Sturm-Liouville equation (see section 1.1.1 of \cite{SOLVERS}) by spline $\Lambda_j(x)$ and integrating by parts, we obtain the following boundary terms: \begin{equation} -\Lambda_j(L) C_1(L) \phi'(L) + \Lambda_j(0) C_1(0) \phi'(0) \end{equation} To impose $\phi'(0) = a$ and noting that $\Lambda_j(0)=\delta_{j1}$, you only need to add $[-aC_1(0)]$ to the first element of the RHS. Likewise, for the BC $\phi'(L) = b$ you only need to add $[bC_1(L)]$ to the last element of the RHS. No matrix manipulation (as for the \emph{essential} BC) is required! Notice that if $a$ or $b$ is zero, nothing needs to be done to impose these BC. That's the reason why it is called \emph{natural} BC! A subtle point to be noted here is that using \emph{natural} BC, $\phi'(0)$ \emph{is not} exaclty equal to $a$, althought it should converge to $a$ as $(\Delta x)^p$ where $p$ is the spline degree, while using the \emph{essential} BC, $\phi'(0)=a$ is \emph{exact}! \section{Diffusion Equation using second order time implicit method} Let rewrite Eq.(74) of your notes in vector form and replace the unkowns $n$ by $f$: \begin{equation} \mathbf{B} \frac{d \mathbf{f}}{dt} = \mathbf{M\cdot f}. \end{equation} Using a \emph{second order time centered} discretization, \begin{equation} \begin{split} \mathbf{B} \left(\frac{\mathbf{f}^{n+1}-\mathbf{f}^{n}}{\Delta t}\right) &= \mathbf{M} \left(\frac{\mathbf{f}^{n+1}+\mathbf{f}^{n}}{2}\right) \\ \Rightarrow & \left(\mathbf{B} -\frac{\Delta t}{2} \mathbf{M}\right)\mathbf{f}^{n+1} = \left(\mathbf{B} +\frac{\Delta t}{2} \mathbf{M}\right)\mathbf{f}^{n} \end{split} \end{equation} \emph{Essential} BC has to be imposed on the matrix \begin{equation} \mathbf{B} -\frac{\Delta t}{2} \mathbf{M} \end{equation} while \emph{natural} BC is introduced while deriving the weak form leading to the matrix $M$. This method is \emph{unconditionnaly stable} and second order in time. When linear splines are used for the space discretization, this scheme is similar to the well-known \emph{Cranck-Nicolson} (see for example Wikipedia) discretization for parabolic PDE. \begin{thebibliography}{99} \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide. \bibitem{SOLVERS} {\tt The SOLVERS in BSPLINES} Reference Guide. \end{thebibliography} \end{document} diff --git a/docs/manual/solvers.tex b/docs/manual/solvers.tex index 8abdf13..5c4bbf8 100644 --- a/docs/manual/solvers.tex +++ b/docs/manual/solvers.tex @@ -1,2818 +1,2818 @@ % % @file solvers.tex % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % \documentclass[a4paper]{article} \usepackage{linuxdoc-sgml} \usepackage{graphicx} \usepackage{hyperref} \usepackage{amsmath} %\usepackage{verbatim} %\usepackage[notref]{showkeys} \title{\tt The Solvers in BSPLINES} \author{Trach-Minh Tran} \date{v0.6, December 2011} \abstract{Implementation of a common simple interface to popular solver packages (LAPACK, PARDISO, WSMP, PETSc, etc.). The main goal is to provide an easy access to these packages in order to solve elliptic and parabolic as well as some types of integro-differential equations.} \begin{document} \maketitle \tableofcontents \section{Matrix form of the problem} \subsection{Getting started} \subsubsection{A one-dimensional problem} Let us start with the one-dimensional Sturm-Liouville differential equation: \begin{equation*} -\frac{d}{dx} \left[C_1(x) \frac{d\phi}{dx}\right] + C_2(x)\phi = \rho, \end{equation*} on the domain $0\leq x \leq L$ with suitable boundary conditions. On a grid with $N$ intervals, the discretized solution $\phi$, using the splines $\Lambda_i(x)$ of order $p$ can be written as \begin{equation} \label{sol1d} \phi(x) = \sum_{i=0}^{d-1} \phi_i\Lambda_i(x), \end{equation} where \cite{BSPLINES} \begin{equation*} d = \begin{cases} N & \text{if $\phi$ is periodic}, \\ N+p & \text{otherwise}, \end{cases} \end{equation*} and $\phi_i$ are the unknowns of the following matrix equation: \begin{equation} \label{matEq1d} \sum_{i'=0}^{d-1} A_{ii'}\phi_{i'} = \rho_i, \qquad i=0,\ldots, d-1. \end{equation} Here the matrix $A_{ii'}$ and the right-hand-side $\rho_i$ are respectively given by: \begin{equation} \begin{split} \label{matCoef1d} A_{ii'} =& \int_{0}^{L}\!dx\,C_1\Lambda_{i}^{'}\Lambda_{i'}^{'} + \int_{0}^{L}\!dx\,C_2\Lambda_{i}\Lambda_{i'}, \\ \rho_i =& \int_{0}^{L}dx \rho\Lambda_i.\\ \end{split} \end{equation} For more general differential operators, the matrix coefficients $A_{ii'}$ can be written as a sum of contributing matrices of the form \begin{equation} \label{mat1d} A_{ii'} = \int_{0}^{L}\!dx\,C\Lambda_{i}^{\alpha}\Lambda_{i'}^{\alpha'}, \end{equation} where $\Lambda_{i}^{\alpha}$ denotes the $\alpha^\text{th}$ derivative of $\Lambda_{i}$. As the splines $\Lambda_i$ have a support of $p+1$ intervals, the matrix is sparse and its coefficients $A_{ii'}$ are non-zero only for $|i-i'| \leq p$: hence the matrix has a band structure of bandwidth equal to $2p+1$ if the operator is purely differential. For an integral equation such as \begin{equation*} \int_{0}^{L}\!dx' K(x,x')\phi(x') = \rho(x), \end{equation*} the discretization results in a \emph{dense} matrix of the form \begin{equation} \label{matIntg1d} A_{ii'} = \int_{0}^{L}\!dx \Lambda_{i}(x) \int_{0}^{L}\!dx' K(x,x')\Lambda_{i'}(x'). \end{equation} Note that when the kernel is separable $K(x,x') = U(x)V(x')$, the matrix $A_{ii'}$ is a \emph{dyadic}: \begin{equation} A_{ii'} = \int_{0}^{L}\!dx U(x) \Lambda_{i}(x) \int_{0}^{L}\!dx V(x)\Lambda_{i'}(x) = U_iV_{i'}. \end{equation} \subsubsection{Periodic boundary conditions} The splines $\Lambda_i$ are $N$-periodic ($\Lambda_{i+N}(x)=\Lambda_i(x-L)$). This property can be easily enforced while constructing both $\rho_i$ and the matrix $A_{ii'}$. This results in a solution $\phi_i$ which is also $N$-periodic. \subsubsection{Non-periodic boundary conditions} In {\tt BSPLINES} \cite{BSPLINES}, the constructed non-periodic splines are such that at the boundaries $x=0$ and $x=L$: \begin{equation} \Lambda_i(0) = \delta_{i,0}, \qquad \Lambda_i(L) = \delta_{i,N+p-1}, \end{equation} which imply that, using (\ref{sol1d}) \begin{equation} \phi(0) = \phi_0, \qquad \phi(L) = \phi_{N+p-1}. \end{equation} It is thus possible to impose the Dirichlet boundary conditions by a simple modification of the matrix $A_{ii'}$ as shown in Appendix~\ref{DirichletCond}. \subsection{Problems in more dimensions} \subsubsection{Two-dimensional equations} The results obtained above can be extended in a straightforward manner. Assuming, for example a \emph{polar like} $(r,\theta)$ coordinate system, with the discretized solution and the right-hand side written as: \begin{equation} \label{discreteEq2d} \begin{split} \phi(r,\theta) &= \sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1} \phi_{ij}\Lambda_i(r) \Lambda_j(\theta) \\ \rho_{ij} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta J(r,\theta) \rho(r,\theta) \Lambda_i(r) \Lambda_j(\theta),\\ \end{split} \end{equation} where $J(r,\theta)$ is the Jacobian, the matrix equation to solve is \begin{equation} \sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1} A_{iji'j'}\phi_{i'j'} = \rho_{ij}, \end{equation} with the matrix $A_{iji'j'}$ expressed as a sum of matrices of the form: \begin{equation} \label{mat2d} A_{iji'j'} = \int_{0}^{R}\!dr \int_{0}^{2\pi}\!d\theta\, C(r,\theta)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\, \Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta). \end{equation} \subsubsection{Three-dimensional equations} Likewise, for the three-dimension case, assuming for example a \emph{toroidal like} $(r,\theta, \varphi)$ coordinate system, with the discretized solution and the right-hand side written as: \begin{equation} \label{discreteEq3d} \begin{split} \phi(r,\theta, \varphi) &= \sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1} \sum_{k=0}^{N_\varphi-1} \phi_{ijk}\Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi) \\ \rho_{ijk} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta \int_{0}^{2\pi} \!d\varphi J(r,\theta,\varphi) \rho(r,\theta,\varphi) \Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi),\\ \end{split} \end{equation} where $J(r,\theta,\varphi)$ is the Jacobian, the matrix equation to solve is \begin{equation} \sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}\sum_{k'=0}^{N_\varphi-1} A_{ijki'j'k'}\phi_{i'j'k'} = \rho_{ijk}, \end{equation} with the matrix $A_{ijki'j'k'}$ expressed as a sum of matrices of the form: \begin{equation} \label{mat3d} A_{ijki'j'k'} = \int_{0}^{R}\!dr \int_{0}^{2\pi}\!d\theta\ \int_{0}^{2\pi}\!d\varphi\, C(r,\theta,\varphi)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\, \Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta) \Lambda_{k}^{\gamma}(\varphi)\Lambda_{k'}^{\gamma'}(\varphi). \end{equation} \subsubsection{Unicity condition} In the case of the polar coordinates $(r,\theta)$ considered above, the unicity condition on the axis $r=0$ should be imposed. It can be enforced by modifications of the matrix $A$ as described in Appendix~\ref{unicityCond}. \subsubsection{One-dimensional numbering} For two-dimensional (three-dimensional) problems, the solution $\phi_{ij}$ ($\phi_{ijk}$) as well as the right-hand-side $\rho_{ij}$ ($\rho_{ijk}$) can be conveniently casted into one-dimensional arrays. As an example, by numbering first the last index, we obtain the following mappings: \begin{equation} \label{map1d} \mu = \begin{cases} j + iN_\theta & \text{two-dimensional case} \\ k + (j + iN_\theta)N_\varphi & \text{three-dimensional case} \\ \end{cases} \end{equation} Using such a one-dimensional numbering, the matrix equation for the two and three dimensional cases takes a more conventional form: \begin{equation} \sum_{\mu'=0}^{r-1} A_{\mu\mu'} \phi_{\mu'} = \rho_\mu, \end{equation} with the respective matrix ranks $r=(N_r+p_r)N_\theta$ and $r=(N_r+p_r)N_\theta N_\varphi$. For a pure differential operator, the matrix $A_{\mu\mu'}$ has a band structure of bandwidth $b=2(p_r+1)N_\theta-1$ and $b=2(p_r+1)N_\theta N_\varphi-1$ respectively. It is important to note that, except for the one-dimensional problem, there are \emph{many} zeros inside the matrix band! \section{The module {\tt MATRIX}} \subsection{Interface} The Fortran module {\tt MATRIX} contains easy-to-use routines to solve the matrix equation formulated in the previous section, using the direct solvers of LAPACK. The different matrix storage formats are defined, using the Fortran derived datatypes. The different types in the present version are listed in Table~\ref{matTypes}. \begin{table}[h] \centering \begin{tabular}{|l|l|}\hline {\tt gemat} & General dense matrix \\ {\tt gbmat} & General band matrix \\ {\tt pbmat} & Symmetric positive-definite band matrix \\ {\tt periodic\_mat} & Matrix obtained for example in \\ & one-dimensional periodic problems\\ \hline \end{tabular} \caption{The matrix types} \label{matTypes} \end{table} These types define {\tt DOUBLE PRECISION} real matrices. {\tt DOUBLE COMPLEX} matrices are declared by prefixing these types with the letter ``{\tt z}'', for example {\tt zgbmat}. Note that {\tt zpbmat} defines a \emph{hermitian} positive-definite complex matrix. The \emph{generic} routines are defined for each of these types in Table~\ref{matRoutines}. Note that the routine {\tt updtmat} is mainly used for the matrix assembly while {\tt getxxx} and {\tt putxxx} are rather used to modify the matrix, for example to impose boundary conditions. \begin{table}[h] \centering \begin{tabular}{|l|l|} \hline {\tt init} & Initializes the data structure \\ {\tt destroy} & Free the data structure memory \\ \hline {\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\ {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$, row $i$, column $j$ \\ {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$, row $i$, column $j$ \\ {\tt vmx} & Returns the matrix-vector product \\ {\tt mcopy} & Copy a matrix to another matrix \\ {\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\ {\tt determinant} & Returns the matrix determinant \\ \hline {\tt factor} & Computes the LU (Cholesky for symmetric/hermitian \\ & matrix) factorization \\ {\tt bsolve} & Solves the linear system using the factorized matrix \\ \hline \end{tabular} \caption{The generic routines in the {\tt MATRIX} module} \label{matRoutines} \end{table} The complete description of each routine is given in Appendix~\ref{matRef}. More information on how to use it can be obtained by {\tt greping} its name on the examples found in the {\tt examples/} directory. \subsection{A two-dimensional example} \label{twodEx} Let's consider the Poisson equation using the cylindrical coordinates $(r,\theta)$: \begin{equation} -\frac{1}{r}\frac{\partial}{\partial r} \left[r\frac{\partial\phi}{\partial r}\right] -\frac{1}{r^2}\frac{\partial^2\phi}{\partial\theta^2} = \rho, \qquad \phi(r=1,\theta) = 0. \end{equation} Assuming the exact solution \begin{equation*} \phi(r,\theta) = (1-r^2)r^m\cos m\theta, \end{equation*} the right-hand-side becomes \begin{equation*} \rho=4(m+1)r^{m}\cos m\theta. \end{equation*} The matrix and the right hand-side of the discretized problem are computed as \begin{equation} \begin{split} A_{iji'j'} &= \int_{0}^1\!dr \int_{0}^{2\pi}\!d\theta\,\left[ r\,\Lambda'_{i}(r)\Lambda'_{i'}(r)\, \Lambda_{j}(\theta)\Lambda_{j'}(\theta) + \frac{1}{r}\,\Lambda_{i}(r)\Lambda_{i'}(r)\, \Lambda'_{j}(\theta)\Lambda'_{j'}(\theta) \right] \\ \rho_{ij} &= \int_{0}^1\!dr \int_{0}^{2\pi} \!d\theta\,\,r \rho(r,\theta) \,\,\Lambda_i(r) \Lambda_j(\theta). \end{split} \end{equation} In the example {\tt pde2d.f90} this problem is treated in detail. In the following, only the calls to the {\tt MATRIX} routines are reviewed to show how the matrix problem is solved using the {\tt MATRIX} module. \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} ! ! Declare a General Band matrix ! USE matrix USE conmat_mod TYPE(gbmat) :: mat ! ! Rank and bandwidth. nidbas(1) is the spline order in ! the first dimension r. ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals ku = kl ! Number of super-diagnonals nterms = 2 ! Number of terms in the weak form ! ! Initialize the matrix data structure ! CALL init(kl, ku, nrank, nterms, mat) ! ! Construct the matrix, using 2D spline splxy ! and impose boundary conditions ! CALL conmat(splxy, mat, coefeq_poisson) CALL ibcmat(mat, ny) ! ! Compute the RHS, using the 2D spline splxy ! and impose boundary conditions ! CALL disrhs(mbess, splxy, rhs) CALL ibcrhs(rhs, ny) ! ! Factor the matrix and solve ! CALL factor(mat) CALL bsolve(mat, rhs, sol) ! ... CONTAINS SUBROUTINE coefeq_poisson(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! c(1) = x idt(1,1) = 1; idt(1,2) = 0 idw(1,1) = 1; idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0; idt(2,2) = 1 idw(2,1) = 0; idw(2,2) = 1 END SUBROUTINE coefeq_poisson \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} Some explanations and remarks: \begin{itemize} \item The matrix construction is performed by {\tt conmat} which will be described later in section \ref{secCONMAT}. The \emph{weak form} is defined in the \emph{internal} procedure {\tt coefeq\_poisson} and passed as an argument to {\tt conmat}. See section \ref{secCONMAT} for a detailed description of the variables {\tt c, idt, idw} returned by {\tt coefeq\_poisson}. \item Boundary conditions are imposed by modifications of the matrix in subroutine {\tt ibcmat} (see file {\tt the ibcmat.f90}), using the {\tt MATRIX} routines {\tt getrow, putrow, getcol, putcol}. \item The construction of the right-hand-side in {\tt disrhs} (see the file {\tt disrhs.f90}) is computed using a Gauss quadrature.. \item Using the {\tt pbmat} type instead of {\tt gbmat} (the matrix in this example is symmetric and positive-definite!) requires only a few modifications of the program (see the complete example {\tt pde2d\_pb.f90}): \begin{itemize} \item Change the type {\tt gbmat} to {\tt pbmat} in all matrix declarations \item Change the list of arguments in the routine {\tt init} to ({\tt ku, nrank, nterms, mat}) \item Small changes in the boundary conditions ({\tt ibcmat} and {\tt ibcrhs}) to take into account the symmetry. \end{itemize} \item The module {\tt MATRIX} can be used independently of {\tt BSPLINES} (which is used here only to compute the matrix and right-hand-side), for example in a problem discretized using Finite Differences. \end{itemize} \section{Sparse matrix storage} Using the \emph{band matrix format} for a pure differential operator requires to store a full bandwidth $b=2(p_r+1)N_\theta-1$ for the two-dimensional problem as shown in section 1, while there are only $(2p_r+1)^2$ non-zero elements per matrix row. In three-dimensional problem, it is much worse since $b=2(p_r+1)N_\theta N\varphi-1$ for $(2p_r+1)^3$ non-zero elements. In order to reduce the matrix storage, a solution consists of just storing the matrix non-zero elements and use the \emph{Sparse Direct Solvers}. With an optimal \emph{renumbering} strategy (or \emph{fill-in reducing ordering}), the size of the factored matrix can be expected to be smaller than the corresponding band matrix. An alternative is to use \emph{iterative} methods which usually need less memory. Such a sparse matrix is implemented in the {\tt SPARSE} module where each matrix row is represented by a \emph{\tt linked} list of elements with sorted column index. The data structure of this sparse matrix is wrapped up in a the Fortran data type {\tt spmat} for a real matrix and {\tt zspmat} for a complex matrix. Most of the generic routines which are already defined in the {\tt MATRIX} module are overloaded for these matrix types. They are listed in Table~\ref{spmatRoutines}. The complete documentation of these routines can be found in Appendix~\ref{spmatRef}. \begin{table}[h] \centering \begin{tabular}{|l|l|} \hline \emph{Matrix types} & {\tt spmat, zspmat} \\ \hline\hline {\tt init} & Initializes the data structure \\ {\tt destroy} & Free the data structure memory \\ \hline {\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\ {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$, row $i$, column $j$ \\ {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$, row $i$, column $j$ \\ \hline {\tt get\_count} & Get the number of non-zero elements in matrix \\ \hline \end{tabular} \caption{The generic routines in the {\tt SPARSE} module} \label{spmatRoutines} \end{table} It should be noted that this module is \emph{not} used directly in solver problems. One usually uses instead modules which are specific to a type of (direct or iterative) solver. As will be shown in the next section, it is the routines in this solver module which directly calls the routines defined in the {\tt SPARSE} module during the matrix assembly. \section{Solvers using the module {\tt SPARSE}} All the solvers discussed in this section use initially the module {\tt SPARSE} to construct the sparse matrix. Once this construction procedure is complete, this matrix is converted to the (usually more efficient) format used by the solver. In a time-dependent simulation where the problem matrix changes but not the sparsity pattern, the subsequent matrix assembly will be performed directly on this solver's format. Thus for example, the first time {\tt updtmat} is called on a new matrix, it is the version from {\tt SPARSE}. Next, if {\tt updtmat} is called again to modify the matrix, it will be the solver's version, unless the matrix is re-initialized by a call to {\tt destroy} followed by {\tt init}. This switch is completely transparent for the user as shown through an example in the next section. \subsection{The PARDISO direct solver} The interface to PARDISO~\cite{PARDISO} is implemented in the module {\tt PARDISO\_BSPLINES}. The matrix type (symmetric, hermitian, positive-definite) is set by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the generic routine {\tt init}. All the other generic routines defined in the module {MATRIX}, plus routines specific to the sparse solver, are listed in Table~\ref{pardisoRoutines}. The complete documentation of these routines is given in Appendix~\ref{pardisoRef}. \begin{table}[h] \centering \begin{tabular}{|l|l|} \hline \emph{Matrix types} & {\tt pardiso\_mat, zpardiso\_mat} \\ \hline\hline {\tt init} & Initializes the data structure \\ {\tt destroy} & Free the data structure memory \\ \hline {\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\ {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$, row $i$, column $j$ \\ {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$, row $i$, column $j$ \\ {\tt vmx} & Returns the matrix-vector product \\ {\tt mcopy} & Copy a matrix to another matrix \\ {\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\ {\tt clear\_mat}& Set the matrix elements to zero.\\ {\tt psum\_mat} & Parallel sum of matrices \\ {\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\ {\tt get\_count}& Get the number of non-zero elements in matrix \\ \hline {\tt factor} & Factorization \\ {\tt bsolve} & Solves the linear system using the factorized matrix \\ {\tt to\_mat} & Convert to PARDISO CSR matrix format \\ {\tt reord\_mat}& Reordering and symbolic factorization \\ {\tt numfact} & Numerical factorization \\ \hline \end{tabular} \caption{The generic routines in the {\tt PARDISO\_BSPLINES} module} \label{pardisoRoutines} \end{table} Below, a complete example solving a simple two-dimensional Poisson discretized by the 5 point Finite Difference method illustrates how to use the {\tt PARDISO\_BSPLINES} module. \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} PROGRAM main USE pardiso_bsplines IMPLICIT NONE TYPE(pardiso_mat) :: amat DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:) INTEGER :: nx=5, ny=4 INTEGER :: n, nnz INTEGER :: i, j, irow, jcol ! WRITE(*,'(a)', advance='no') 'Enter nx, ny: ' READ(*,*) nx, ny n = nx*ny ! Rank of the matrix ALLOCATE(rhs(n)) ALLOCATE(sol(n)) ALLOCATE(arow(n)) ! CALL init(n, 1, amat, nlsym=.TRUE.) ! Pardiso mat, symmetric case ! ! Construct the matrix and RHS ! DO j=1,ny DO i=1,nx arow = 0.0d0 irow = numb(i,j) arow(irow) = 4.0d0 IF(i.GT.1) arow(numb(i-1,j)) = -1.0d0 IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0 IF(j.GT.1) arow(numb(i,j-1)) = -1.0d0 IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0 CALL putrow(amat, irow, arow) rhs(irow) = SUM(arow) ! => the exact solution is 1 END DO END DO ! WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat) ! ! Factor the amat matrix (Reordering, symbolic and numerical factorization) ! CALL factor(amat) ! ! Back solve ! CALL bsolve(amat, rhs, sol) ! ! Check solutions ! WRITE(*,'(/a/(10f8.4))') 'Computed sol', sol WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0)) ! ! Clean up ! DEALLOCATE(rhs) DEALLOCATE(sol) DEALLOCATE(arow) CALL destroy(amat) CONTAINS INTEGER FUNCTION numb(i,j) ! ! One-dimensional numbering ! Number first x then y ! INTEGER, INTENT(in) :: i, j numb = (j-1)*nx + i END FUNCTION numb END PROGRAM main \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} It should be noted that \begin{itemize} \item The routine {\tt putrow} in the matrix construction loop uses the version from the {\tt SPARSE} module to create dynamically the matrix row using the linked list. \item The routine {\tt factor} calls successively the matrix conversion {\tt to\_mat}, the reordering and symbolic factorization routine {\tt reord\_mat} and finally the numerical factorization {\tt numfact}. One could indeed call these three routines separately instead of the single call to {\tt factor}, \item After solving the linear system, if the matrix is modified by calling for example {\tt putrow} again, it will modify directly the converted matrix and not on the {\tt spmat} matrix which is anyway \emph{destroyed} at the end of {\tt to\_mat}. \item If the matrix sparsity changes, the matrix should be re-initialized by calling the {\tt destroy} and {\tt init} routines. \end{itemize} Other examples can be found by running ``{\tt grep pardiso\_mat}'' on the F90 files in the directory {\tt examples/}. \subsection{The WSMP direct solver} The interface to WSMP~\cite{WSMP} is implemented in the module {\tt WSMP\_BSPLINES}. The matrix type (symmetric, hermitian, positive-definite) is set by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the generic routine {\tt init}. All the other generic routines defined in the module {MATRIX}, plus routines specific to the sparse solver, are listed in Table~\ref{wsmpRoutines}. The complete documentation of these routines is given in Appendix~\ref{wsmpRef}. \begin{table}[h] \centering \begin{tabular}{|l|l|} \hline \emph{Matrix types} & {\tt wsmp\_mat, zwsmp\_mat} \\ \hline\hline {\tt init} & Initializes the data structure \\ {\tt destroy} & Free the data structure memory \\ \hline {\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\ {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$, row $i$, column $j$ \\ {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$, row $i$, column $j$ \\ {\tt vmx} & Returns the matrix-vector product \\ {\tt mcopy} & Copy a matrix to another matrix \\ {\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\ {\tt clear\_mat}& Set the matrix elements to zero.\\ {\tt psum\_mat} & Parallel sum of matrices \\ {\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\ {\tt get\_count}& Get the number of non-zero elements in matrix \\ \hline {\tt factor} & Factorization \\ {\tt bsolve} & Solves the linear system using the factorized matrix \\ {\tt to\_mat} & Convert to WSMP CSR matrix format \\ {\tt reord\_mat}& Reordering and symbolic factorization \\ {\tt numfact} & Numerical factorization \\ \hline \end{tabular} \caption{The generic routines in the {\tt WSMP\_BSPLINES} module} \label{wsmpRoutines} \end{table} The simple Poisson example using the {\tt PARDISO\_BSPLINES} module shown in the previous section can be easily adapted to the WSMP interface since there are only two lines to change: the {\tt USE} and the matrix {\tt TYPE} lines. Other examples of how to use this interface can be found by running ``{\tt grep wsmp\_mat}'' on the F90 files the directory {\tt examples/}. The same solver functionality can be found in both the PARDISO and WSMP solvers as one can verify by comparing Table~\ref{pardisoRoutines} and Table~\ref{wsmpRoutines} or the description of routines in Appendix~\ref{pardisoRef} and Appendix~\ref{wsmpRef}. However, there is an important difference. While in PARDISO (and indeed also in LAPACK), it is possible to define several matrices to solve simultaneously, it appears that in WSMP, this is possible \emph{only} for symmetric and hermitian matrices: in the present 10.9 version, the routines to store and recall the solver context {\tt WSTOREMAT/WRECALLMAT} which are present in the symmetric version of the library are missing in the general version! A separate module named {\tt PWSMP\_BSPLINES} added the MPI \emph{parallelization} capability provided by WSMP. This parallel version implements the same user interface as shown in Table~\ref{wsmpRoutines}. The following considerations should be however taken in to account: \begin{enumerate} \item The coefficient matrix {\tt amat} is partitioned into blocks of \emph{contiguous} rows, with their indices defined in the interval [{\tt amat\%istart,amat\%iend}] which is defined after the call to {\tt init}. \item Calls to the routine {\tt updtmat} to update the matrix coefficients should not specify a row index \emph{outside} this interval. On the other hand, {\tt getxxx} will return 0 and {\tt putxxx} will ignore it if a row index \emph{not} in the range [{\tt amat\%istart,amat\%iend}] is passed to them. \item An \emph{optional} MPI communicator can be given to {\tt init} using the keyword {\tt comm\_in}. By default, the communicator {\tt MPI\_COMM\_WORLD} is used. \end{enumerate} A complete example using {\tt PWSMP\_BSPLINES} can be found in {\tt examples/pde2d\_pwsmp.f90}. \subsection{The MUMPS direct solver} {\tt MUMPS}~\cite{MUMPS} is a \emph{parallel sparse direct solver} using {\tt MPI} and is implemented in the module {\tt MUMPS\_BSPLINES}. User program using this module should be compiled and linked with {\tt MPI} even if only the serial version of the solver is needed, in which case the {\tt MPI\_COMM\_SELF} is passed to the initialization routine {\tt init} as an \emph{optional} argument with the keyword {\tt comm\_in}. Otherwise a valid {\tt MPI} communicator should be passed. By default {\tt comm\_in=MPI\_COMM\_SELF}. Note that it is possible to use both serial and parallel solvers in the same program to solve different matrix problems. As for {\tt PARDISO} and {\tt WSMP}, the same user interface to the {\tt MUMPS} solver is used and summarized in Table~\ref{mumpsRoutines}. The complete documentation of these routines is given in Appendix~\ref{mumpsRef}. \begin{table}[h] \centering \begin{tabular}{|l|l|} \hline \emph{Matrix types} & {\tt mumps\_mat, zmumps\_mat} \\ \hline\hline {\tt init} & Initializes the data structure \\ {\tt destroy} & Free the data structure memory \\ \hline {\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\ {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$, row $i$, column $j$ \\ {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$, row $i$, column $j$ \\ {\tt vmx} & Returns the matrix-vector product \\ {\tt mcopy} & Copy a matrix to another matrix \\ {\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\ {\tt clear\_mat}& Set the matrix elements to zero.\\ {\tt psum\_mat} & Parallel sum of matrices \\ {\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\ {\tt get\_count}& Get the number of non-zero elements in matrix \\ \hline {\tt factor} & Factorization \\ {\tt bsolve} & Solves the linear system using the factorized matrix \\ {\tt to\_mat} & Convert to WSMP CSR matrix format \\ {\tt reord\_mat}& Reordering and symbolic factorization \\ {\tt numfact} & Numerical factorization \\ \hline \end{tabular} \caption{The generic routines in the {\tt MUMPS\_BSPLINES} module} \label{mumpsRoutines} \end{table} \section{Fourier solver \cite{McMillan}} \subsection{The matrix equation in Fourier space} For a periodic one-dimensional problem, the solution $\phi_i$ and the right-hand-side $\rho_i$ in (\ref{matEq1d}) are $N$-periodic. Their Discrete Fourier Transform (DFT) can be defined as \begin{equation} \begin{split} \hat{\phi}_k = \sum_{j=0}^{N-1} \phi_j e^{i\frac{2\pi}{N}kj}, &\qquad \hat{\rho}_k = \sum_{j=0}^{N-1} \rho_j e^{i\frac{2\pi}{N}kj}, \\ \phi_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\phi}_k e^{-i\frac{2\pi}{N}kj}, &\qquad \rho_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\rho}_k e^{-i\frac{2\pi}{N}kj}. \end{split} \end{equation} Taking the DFT of Eq.~(\ref{matEq1d}), we obtain the following matrix equation in Fourier space: \begin{equation} \label{Fourier1d} \sum_{k'=0}^{N-1} \hat{A}_{kk'}\hat{\phi}_{k'} = \hat{\rho}_{k}, \end{equation} where $\hat{A}_{kk'}$ is the DFT of the original matrix. Following the notations in Eq.~(\ref{mat1d}) and assuming an \emph{equidistant} mesh with the interval $\Delta=L/N$, each of the DFT matrices of the weak form can be written as \begin{equation} \begin{split} \hat{A}_{kk'} &= \frac{1}{N}\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \sum_{j'=0}^{N-1} A_{jj'}e^{-i\frac{2\pi}{N}k'j'} \\ &= \frac{1}{N}\int_{0}^L\!\!dx\,C(x) \, \sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\, \sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'} \Lambda_{j'}^{\alpha'} (x) \\ &= \frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \, \sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\, \sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'} \Lambda_{j'}^{\alpha'} (x) \end{split} \end{equation} Note that each of the last two sums is over the splines which are non-zero at a given $x$. Using the translational symmetry of the periodic splines: \begin{equation*} \sum_j e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x) = \sum_j e^{i\frac{2\pi}{N}kj} \Lambda_{j-J}^\alpha(x-J\Delta) = e^{i\frac{2\pi}{N}kJ}\, \hat{\Lambda}_{k}^\alpha(x-J\Delta), \end{equation*} where we have defined the DFT of splines $\hat{\Lambda}_{k}(x)$ as \begin{equation} \hat{\Lambda}_{k}^\alpha(x) = \sum_j\Lambda_j^\alpha(x) e^{i\frac{2\pi}{N}kj}, \end{equation} which are computed by the routine {\tt ft\_basfun} in the module {\tt BSPLINES} for any spline order $p$ and derivative $\alpha \le p$. The DFT matrices can now be written as: \begin{equation*} \hat{A}_{kk'} = \frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \, e^{i\frac{2\pi}{N}J(k-k')} \hat{\Lambda}_k^\alpha (x-J\Delta)\, \left[\hat{\Lambda}_{k'}^{\alpha'} (x-J\Delta)\right]^{*}. \end{equation*} Finally, making the variable transform $x\rightarrow x+J\Delta$ and defining the DFT of the weak-form coefficient $C$ as \begin{equation} \hat{C}_{k}(x) = \sum_{J=0}^{N-1}C(x+J\Delta)\,e^{i\frac{2\pi}{N}Jk}, \end{equation} the DFT of the matrix $\hat{A}_{kk'}$ can be calculated as an integration over the first interval: \begin{equation} \hat{A}_{kk'} = \frac{1}{N}\int_0^\Delta\!\!dx\, \hat{C}_{k-k'}(x) \,\hat{\Lambda}_{k}^\alpha(x) \left[\hat{\Lambda}_{k'}^{\alpha'}(x)\right]^{*}, \end{equation} which can be computed using again the same Gauss formula as before. For uniform $C$, $\hat A_{kk'}$ is diagonal and the matrix equation (\ref{Fourier1d}) reduces to a system of equations for the uncoupled Fourier modes. When $C$ is non-uniform, $\hat A_{kk'}$ is \emph{dense}. However in problems where the solution is expected to be ``smooth'', one can keep only a small number of Fourier modes, reducing thus the rank of $\hat A_{kk'}$. Furthermore, if the coefficients $C(x)$ of the differential equations are very smooth, peaked at a few (low order) modes, the DFT matrix can become \emph{sparse}! The generalization to the two-dimensional problem (\ref{mat2d}) is straightforward: \begin{gather} \hat{A}_{im,i'm'} = \frac{1}{N_\theta}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta \, \hat{C}_{m-m'}(r,\theta) \,\hat{\Lambda}_{m}^\beta(\theta) \left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\right\} \Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha'}(r) \\ \hat{C}_{m}(r,\theta) = \sum_{j=0}^{N_\theta-1}C(r,\theta+j\Delta\theta)\, e^{i\frac{2\pi}{N_\theta}jm}. \end{gather} Likewise, for the three-dimensional problem (\ref{mat3d}), we obtain \begin{gather} \hat{A}_{imn,i'm'n'} = \frac{1}{N_\theta N_\varphi}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta \int_0^{\Delta\varphi}\!\!d\varphi \, \hat{C}_{m-m',n-n'}(r,\theta,\varphi) \,\hat{\Lambda}_{m}^\beta(\theta) \left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\,\hat{\Lambda}_{n}^\gamma(\varphi) \left[\hat{\Lambda}_{n'}^{\gamma'}(\varphi)\right]^{*}\right\} \Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha}(r) \\ \hat{C}_{mn}(r,\theta,\varphi) = \sum_{j=0}^{N_\theta-1}\sum_{k=0}^{N_\varphi-1}C(r,\theta+j\Delta\theta,\varphi+ k\Delta\varphi)\, e^{i\frac{2\pi}{N_\theta}jm}\,e^{i\frac{2\pi}{N_\varphi}kn}. \end{gather} Note that for axi-symmetric systems where the coefficients $C$ do not depend on $\varphi$ \begin{equation} \hat{C}_{mn} = \hat{C}_{mn}\delta_{n,0} \end{equation} and thus the three-dimensional problem reduces to a set of independent two-dimensional problems with \begin{equation} \begin{split} \hat{A}^n_{im.i'm'} &= M_n \hat{A}_{im.i'm'} \\ M_n &= \int_0^{\Delta\varphi}\!\!d\varphi \left|\hat{\Lambda}_n(\varphi)\right|^2. \end{split} \end{equation} \subsection{Integral equation} The DFT matrices for differential operators derived above can be extended to an integral operator of the following form: \begin{equation} \int_{0}^{L}\!dx' K(x,x')\,\phi(x') = \rho(x), \end{equation} where $\phi(x)$ is $L$-periodic. Using the same FE discretization as above results in the following matrix in \emph{real} space: \begin{equation} A_{jj'} = \int_{0}^{L}\!dx\,\Lambda_j(x)\,\int_{0}^{L}\!dx'\, K(x,x')\,\Lambda_{j'}(x'), \end{equation} and its DFT \begin{equation} \hat{A}_{kk'} = \frac{1}{N} \sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!dx\, \sum_{J'=0}^{N-1}\int_{J'\Delta}^{(J'+1)\Delta}\!dx'\,K(x,x')\, e^{i\frac{2\pi}{N}kJ}\hat{\Lambda}_k(x-J\Delta)\, e^{-i\frac{2\pi}{N}k'J'}\left[\hat{\Lambda}_{k'}(x-J'\Delta)\right]^{*}, \end{equation} Now, defining the DFT of the kernel as \begin{equation} \hat{K}_{kk'}(x,x') = \sum_{J=0}^{N-1}\sum_{J'=0}^{N-1}K(x+J\Delta,x'+J'\Delta)\, e^{i\frac{2\pi}{N}kJ}\,e^{-i\frac{2\pi}{N}k'J'}, \end{equation} the final expression for the DFT of the matrix $\hat{A}_{kk'}$ reduces to \begin{equation} \hat{A}_{kk'} = \frac{1}{N} \int_0^\Delta\!dx\int_0^\Delta\!dx'\,\hat{K}_{kk'}(x,x')\, \hat{\Lambda}_k (x)\, \left[\hat{\Lambda}_{k'}(x')\right]^{*}. \end{equation} Again, notice that the dense matrix $\hat{A}$ can become \emph{sparse} if only a limited number of Fourier modes are retained in the DFT of the kernel $\hat{K}$. \subsection{A two-dimensional example with a non-uniform coefficient} As a check, we considered here a two-dimensional example similar to the example in section \ref{twodEx} but with a non-uniform coefficient: \begin{equation} -\frac{1}{r}\frac{\partial}{\partial r} \left[rC\frac{\partial\phi}{\partial r}\right] -\frac{1}{r^2}\frac{\partial}{\partial \theta} \left[C\frac{\partial\phi}{\partial \theta}\right] = \rho. \end{equation} With $C(r,\theta) = 1+\epsilon r\cos\theta$, assuming the same exact solution as in section (\ref{twodEx}) \begin{equation} \phi(r,\theta) = (1-r^2)r^m\cos m\theta, \end{equation} the right-hand side becomes \begin{equation} \begin{split} \rho(r,\theta) = 4(m+1)r^m\cos m\theta & + \frac{\epsilon r^m}{2}(4+5m-m/r^2)\cos(m-1)\theta \\ &+ \frac{\epsilon r^m}{2}(4+3m+m/r^2)\cos(m+1)\theta. \end{split} \end{equation} This problem is solved in real space and Fourier space respectively in example {\tt pde2d\_sym\_pardiso.f90} and example {\tt pde2d\_sym\_pardiso\_dft.f90}. Both use the {\tt PARDISO\_BSPLINES} module to solve the sparse matrix equation. It should be noted that the Fourier method should yield the \emph{same solution} as found with the solver in real space if \emph{all} the $N_\theta$ Fourier modes are kept. For the problem defined above with {\tt m=3}, by keeping only the seven Fourier modes in $[-3,3]$ and the three mode couplings $[-1,0,1]$ in the Fourier solver, we found that both methods yield the same (up to 5 digits) \emph{relative discretization error}. Furthermore, increasing the number of Fourier modes to $[-4,4]$ (note that the $m=\pm 4$ Fourier components of the right hand side $\rho$ are not null) does not increase the accuracy of the computed solution! In this example, the matrix in Fourier space has a rank which is $N_\theta/7$ times smaller than in the solver in real space. The number of non-zeros is also reduced by a factor of $(2p+1)/3$ since only 3 Fourier mode coupling terms are considered. In general, the efficiency of such a \emph{matrix filter} is expected to be problem-dependent. The Fourier solver should be tested in real simulations. \section{The matrix construction module {\tt CONMAT\_MOD}} \label{secCONMAT} The module implements the generic matrix construction subroutine {\tt conmat}, using the algorithm detailed in Appendix~\ref{matAssembly}, for 1D and 2D differential equations. The computed matrix is returned in the argument {\tt mat} which can be a Lapack band matrix as well as a PARDISO, WSMP or MUMPS sparse matrix. The complete interface of the subroutine is given below. \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE conmat(spl, mat, coefeq, maxder) TYPE(spline1d|spline2d) :: spl TYPE([z]gbmat|[z]pbmat|[z]periodic_mat|[z]pardiso|...) :: mat INTEGER, INTENT(in), OPTIONAL :: maxder[(2)] INTERFACE SUBROUTINE coefeq(x, [y], idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, [y] INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Construct the FE matrix for 1D or 2D differential operator. \item[Arguments:] \mbox{} \begin{verbatim} spl : 1D or 2D spline mat : matrix object coefeq : user provided subroutine (see below) maxder : Maximum order of the derivatives in the weak form. Equal to 1 (first derivative) by default. \end{verbatim} \end{description} The subroutine {\tt conmat} includes, in addition to the arguments {\tt spl, mat} and {\tt maxder} described above, an user provided subroutine as the third argument {\tt coefeq} which computes all the weak form coefficients defined in Eq.(\ref{locMat1d}) and Eq.(\ref{locMat2d}) for a given point ($x$ for 1D case or $(x,y)$ for 2D case). The output array {\tt c} will contain all the computed $C$ with its corresponding derivative orders $(d,d')$ returned in {\tt idt, idw} respectively. Other quantities required to calculate the coefficients $C$ could be communicated to {\tt coefeq}, using for example a {\tt COMMON} block or a {\tt MODULE}. An example of using this module can be found in section \ref{twodEx}. \appendix \section{Matrix assembly for differential operators} \label{matAssembly} \subsection{1D case} \subsubsection{Local matrix} The contribution to the discretized weak-form from the interval $[x_i, x_{i+1}]$ where $i=1,\ldots,N$, is a sum of the \emph{local matrices} \begin{equation} \label{locMat1d} \begin{split} A^i_{\alpha\alpha'} &= \int_{x_i}^{x_{i+1}}\!\!dx \;C(x) \Lambda^d_{\alpha}(x)\Lambda^{d'}_{\alpha'}(x) \\ &\simeq \sum_{g=1}^{G}\, \underbrace{w_g\Lambda^d_{\alpha}(x_g)\Lambda^{d'}_{\alpha'}(x_g)}_{F_{\alpha\alpha'g}} \,\underbrace{C(x_g)}_{c_g}, \end{split} \end{equation} where a $G$ point Gauss quadrature over the interval $[x_i, x_{i+1}]$ is used to approximate the integral and $\Lambda^d_{\alpha}$ denotes the $d^{th}$ derivative of splines which are non zero in the interval $[x_i, x_{i+1}]$. For splines of degree $p$, $\alpha=0,\ldots,p$. Note that the matrix can be written as a \emph{matrix-vector product}: \begin{equation} \mathbf{A}= \mathbf{F} \cdot \mathbf{c}. \end{equation} \subsubsection{Mapping to global matrix} For $N$ intervals, the number of spline elements of degree $p$, is $N_e=N+p$, or $N_e=N$ if the system is \emph{periodic}. Once the local matrix $A_{\alpha\alpha'}$ is formed, it can be added to the \emph{global} matrix using the mapping: \begin{equation} \begin{split} A^g_{II'} &\leftarrow A^g_{II'} + A^i_{\alpha\alpha'} \\ I = i+\alpha, & \qquad I' = i+\alpha' \end{split} \end{equation} For periodic problems, the indices $I,I'$ are further transformed by taking into account the periodicity $N$, using for example the following {\tt FORTRAN} statement \begin{center} \tt I = MODULO(I-1,N) + 1 \\ \end{center} \subsection{2D case} \subsubsection{Local matrix} In this case, the local matrix obtained for the grid cell $[x_i, x_{i+1}]\times[y_j, y_{j+1}]$ takes the form: \begin{equation} \label{locMat2d} \begin{split} A_{\alpha\alpha'\beta\beta'} &= \int_{x_i}^{x_{i+1}}\!\!dx \int_{y_j}^{y_{j+1}}\!\!dy\,\Lambda^{d_1}_{\alpha}(x)\Lambda^{d_1'}_{\alpha'}(x)\;C(x,y) \,\Lambda^{d_2}_{\beta}(y)\Lambda^{d_2'}_{\beta'}(y) \\ &\simeq \sum_{g_1=1}^{G_1}\, \underbrace{w_{g_1}\Lambda^{d_1}_{\alpha}(x_{g_1})\Lambda^{{d_1}'}_{\alpha'}(x_{g_1})}_{F_{\alpha\alpha'g_1}} \;\sum_{g_2=1}^{G_2}\,\underbrace{C(x_{g_1},y_{g_2})}_{C_{g_1g_2}} \underbrace{w_{g_2}\Lambda^{d_2}_{\beta}(y_{g_2})\Lambda^{{d_2}'}_{\beta'}(y_{g_2})}_{G_{\beta\beta'g_2}}, \end{split} \end{equation} which can be computed efficiently as \emph{matrix-matrix products} \begin{equation} \mathbf{A} = \mathbf{F}\cdot\mathbf{C}\cdot\mathbf{G^{T}} \end{equation} \subsubsection{Mapping to global matrix} The local to global element indices mapping on each of the two dimensions can be defined as previously as \begin{equation} \begin{split} I = i+\alpha, & \qquad I' = i+\alpha' \\ J = j+\beta, & \qquad J' = j+\beta' \end{split} \end{equation} If any of the 2 dimensions is periodic, the periodic condition have to be applied to the corresponding global element index as explained above. Furthermore, in order to reduce the 4 dimension array $A^g_{II'JJ'}$ to the standard 2 dimension matrix, we number first the elements in $y$ coordinate and obtain the following index transformation: \begin{equation} \mu = J + N^y_e(I-1), \qquad \mu' = J' + N^y_e(I'-1), \end{equation} where $N^y_e$ is the number of elements along the $y$ coordinate. The \emph{global} matrix is then constructed from \begin{equation} A^g_{\mu\mu'} \leftarrow A^g_{\mu\mu'} + A_{\alpha\alpha'\beta\beta'} \end{equation} \section{The boundary conditions} \subsection{Dirichlet condition} \label{DirichletCond} \subsubsection{1D case} Let us consider the boundary condition $u(0)=c$. Since all the splines are 0 at $x=0$, except for the first spline which is equal to 1, $\Lambda_i(0)=\delta_{i,1}$, we have simply \begin{equation} c=u(0) = \sum_{i=1}^N u_i \Lambda_i(0) \quad \Longrightarrow \quad u_{1} = c. \end{equation} The discretized linear system of equations, taking into account of this BC, can thus be written as \begin{equation} \begin{split} u_1 &= c\\ \sum_{j=2}^N A_{ij}u_j &= f_i - A_{i1}c, \quad i=2,\ldots, N \end{split} \end{equation} or in the following matrix form: \begin{equation} \left(\begin{matrix} 1 & 0 & \cdots \\ 0 & A_{22} & \cdots \\ \vdots & \vdots & \ddots \\ \end{matrix}\right) \left(\begin{matrix} u_{1} \\ u_{2} \\ \vdots\\ u_{N} \\ \end{matrix}\right) = \left(\begin{matrix} c \\ f_{2} -cA_{21}\\ \vdots\\ f_{N} -cA_{N1}\\ \end{matrix}\right) \end{equation} Note that (1) the transformed matrix preserves any symmetry or positivity of the original matrix, (2) the first column of the original matrix has to be saved in order to modify the RHS $f_i$ but only for non zero $c$ and (3) in that case, one needs to save only $[A_{i1}]_{i=2}^{p+1}$, where $p$ is the spline order. In summary, the procedure for imposing the Dirichlet BC $u_1=c$ can be summarized as follows: \begin{enumerate} \item Matrix transformation \begin{enumerate} \item Clear the matrix row $i=1$ and set its diagonal term $A_{11}$ to 1. \item Get the matrix column $A_{j1}, \quad j=2,\ldots,p+1$ and save it. \item Clear the matrix column $j=1$ and set its diagonal term $A_{11}$ to 1. \end{enumerate} \item RHS transformation \begin{enumerate} \item Set $f_1\leftarrow c$. \item Modify the RHS: $f_i\leftarrow f_i-A_{i1}c, \quad i=2,\ldots,p+1$. \end{enumerate} \end{enumerate} If the original matrix \emph{is not symmetric}, only the steps (1a) and (2a) are required, since the other steps are only necessary to preserve the symmetry of the original matrix. \subsubsection{2D case} In that case, let us write the solution $u(x,y)$ as \begin{equation} u(x,y) = \sum_{i=1}^{N_1}\sum_{j=1}^{N_2} u_{ij} \Lambda_i(x)\Lambda_j(y), \end{equation} where $N_1, N_2$ are the number of elements in each dimension. Assuming the BC $u(0,y) = g(y)$, and since $\Lambda_i(0)=\delta_{i1}$, the solutions $u_{ij}$ should satisfy \begin{equation} \label{dirich_2d} \sum_{j=1}^{N_2} u_{1j} \Lambda_j(y) = g(y). \end{equation} If $g(y)$ is constant $g(y)=c$, we obtain the trivial solution $u_{1,j}=c$ since $\sum_{j=1}^{N_2} \Lambda_j(y)=1$ \cite{BSPLINES}. For non-uniform $g$, at least 2 methods can be used to obtain the $N_2$ unknowns $u_{1j}$ satisfying the equation above: \begin{enumerate} \item By \emph{collocating} Eq.(\ref{dirich_2d}) on a \emph{suitable} set of points $[y_k]_{ k=1}^{N_2}$, the problem is reduced to an \emph{interpolation} one (see section ``Spline Interpolation'' in \cite{BSPLINES}). \item By \emph{minimizing} the residual norm of Eq.(\ref{dirich_2d}) defined as follows: \begin{gather} R = \left\|\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)-g(y)\right\|^2 = \int\!\!dy\left\{\left[\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\right]^2 - 2g(y)\sum_{j=1}^{N_2} c_{j} \Lambda_j(y) +g^2(y)\right\}\\ \frac{\partial R}{\partial c_k} = 2 \int\!\!dy\left[ \sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\Lambda_k(y) -g(y)\Lambda_k(y)\right] = 0, \quad k=1,\ldots,N_2, \end{gather} the boundary solutions $c_j$ can be calculated by solving the following \emph{weak-form} of Eq.(\ref{dirich_2d}): \begin{equation} \sum_{j=1}^{N_2} c_{j} \int\!\!dy\Lambda_j(y)\Lambda_k(y) = \int\!\!dy\Lambda_k(y) g(y), \qquad k=1,\ldots,N_2. \end{equation} \end{enumerate} Once the values of $c_j$ known, the procedure described for the 1D case above can be applied to satisfy each of the $N_2$ conditions $u_{1j}=c_j$. A full example for solving the cylindrical Laplace equation in cylindrical coordinates: \begin{equation} \begin{split} \frac{1}{r}\frac{\partial}{\partial r} \left(r\frac{\partial\phi}{\partial r}\right) &+\frac{1}{r^2} \frac{\partial^2\phi}{\partial \theta^2} = 0 \\ \phi(r=1,\theta) &= \cos m\theta. \end{split} \end{equation} is given in {\tt bpslines/examples/dirichlet/poisson.f90}. \subsection{Unicity on the axis} \label{unicityCond} Denoting the $N$ solutions at the axis by $(u_1, \ldots, u_N)$ , and their transforms by $(\hat u_1, \ldots, \hat u_N)$ defined by \begin{equation} \begin{array}{ccc} u_1-u_N = \hat u_1 & & u_1 = \hat u_1 + \hat u_N \\ u_2-u_N = \hat u_2 & & u_2 = \hat u_2 + \hat u_N \\ \vdots & \Longrightarrow & \vdots \\ u_{N-1}-u_N = \hat u_{N-1} & & u_{N-1} = \hat u_{N-1} + \hat u_N \\ u_N = \hat u_N & & u_N = \hat u_N, \end{array} \label{eq:unicity1} \end{equation} the unicity condition can be specified by simply imposing \begin{equation} \hat u_1=\hat u_2=\ldots=\hat u_{N-1}=0. \label{eq:unicity2} \end{equation} From (\ref{eq:unicity1}), the \emph{transformation matrix} \(\mathbf U\) is defined as \begin{equation} \mathbf{u} = \mathbf{ U \cdot\hat u}, \qquad \mathbf{U} = \left(\begin{matrix} 1 & 0 & \dots & 0 & 1 \\ 0 & 1 & \dots & 0 & 1 \\ & & \ddots& & \vdots \\ 0 & 0 & \dots & 1 & 1 \\ 0 & 0 & \dots & 0 & 1 \end{matrix}\right), \quad \mathbf{U^{T}} = \left(\begin{matrix} 1 & 0 & \dots & 0 & 0 \\ 0 & 1 & \dots & 0 & 0 \\ & & \ddots& & \vdots \\ 0 & 0 & \dots & 1 & 0 \\ 1 & 1 & \dots & 1 & 1 \end{matrix}\right). \end{equation} \paragraph{Matrix product \( \mathbf{A\cdot U}\)} \begin{equation} \mathbf{ A\cdot U} = \left(\begin{array}{lllll} A_{1,1} & A_{1,2} & \dots & A_{1,N-1} & \sum_{j} A_{1,j} \\ A_{2,1} & A_{2,2} & \dots & A_{2,N-1} & \sum_{j} A_{2,j} \\ & & \ddots& & \vdots \\ A_{N-1,1} & A_{N-1,2} & \dots & A_{N-1,N-1} & \sum_{j}A_{N-1,j} \\ A_{N,1} & A_{N,2} & \dots & A_{N,N-1} & \sum_{j}A_{N,j} \end{array}\right). \end{equation} Thus \emph{right multiply by \(\mathbf{U}\)} is equivalent to put the \emph{the sum of each row on the last column}. \paragraph{Matrix product \( \mathbf{ U^T \cdot A}\)} \begin{equation} \mathbf{ U^T \cdot A} = \left(\begin{array}{lllll} A_{1,1} & A_{1,2} & \dots & A_{1,N-1} & A_{1,N} \\ A_{2,1} & A_{2,2} & \dots & A_{2,N-1} & A_{2,N} \\ & & \ddots& & \vdots \\ A_{N-1,1} & A_{N-1,2} & \dots & A_{N-1,N-1} & A_{N-1,N} \\ \sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} & \sum_{i}A_{i,N} \end{array}\right). \end{equation} Thus \emph{left multiply by \(\mathbf{\hat U}\)} is equivalent to put the \emph{the sum of each column on the last row}. \paragraph{Product \( \mathbf{\hat U \cdot b}\)} \begin{equation} \mathbf{\hat b} = \mathbf{U^T\cdot b} = \left(\begin{array}{l} b_1 \\ b_2 \\ \vdots \\ b_{N-1} \\ \sum_{i} b_{i} \end{array}\right), \end{equation} \paragraph{Transformation of the original matrix equation} The full original linear system, obtained from the discretization of the 2D \(r,\theta\) polar coordinates can be written as: \begin{equation} \left(\begin{array}{ll} \mathbf{A} & \mathbf{B} \\ \mathbf{C} & \mathbf{D} \end{array}\right) \left(\begin{array}{l} \mathbf{u} \\ \mathbf{v} \end{array}\right) = \left(\begin{array}{l} \mathbf{b} \\ \mathbf{c} \end{array}\right), \label{eq:orig_matrix_eq} \end{equation} where the solution array is split into the solutions \(\mathbf{u}\) at \(r=0\) and the solutions \(\mathbf{v}\) on the remaining domain. The transformed system can thus be written as \begin{equation*} \left(\begin{array}{ll} \mathbf{U^T} & 0 \\ 0 & \mathbf{I} \end{array}\right) \left(\begin{array}{ll} \mathbf{A} & \mathbf{B} \\ \mathbf{C} & \mathbf{D} \end{array}\right) \left(\begin{array}{ll} \mathbf{U} & 0 \\ 0 & \mathbf{I} \end{array}\right) \left(\begin{array}{l} \mathbf{\hat u} \\ \mathbf{v} \end{array}\right) = \left(\begin{array}{ll} \mathbf{U^T} &0 \\ 0 & \mathbf{I} \end{array}\right) \left(\begin{array}{l} \mathbf{b} \\ \mathbf{c} \end{array}\right), \end{equation*} \begin{equation} \Longrightarrow \left(\begin{array}{cc} \mathbf{U^TAU} & \mathbf{U^TB} \\ \mathbf{CU} & \mathbf{D} \end{array}\right) \left(\begin{array}{l} \mathbf{\hat u} \\ \mathbf{v} \end{array}\right) = \left(\begin{array}{c} \mathbf{U^Tb} \\ \mathbf{c} \end{array}\right), \end{equation} Notice that the transformation preserves any symmetry existing in the original system (\ref{eq:orig_matrix_eq}). The transformed matrix is finally given in the following where only the modified elements are shown and the sum is only over the first \(N\) points in \(\theta\) direction. The \(\times\) symbol denotes unmodified elements. \begin{equation} \left(\begin{array}{lllllll} \times & \times & \times & \times & \sum_{j} A_{1,j} & \times & \times \\ \times & \times & \times & \times & \sum_{j} A_{2,j} & \times & \times \\ \times & \times & \times & \times & \vdots & \times & \times \\ \times & \times & \times & \times & \sum_{j} A_{N-1,j} & \times & \times \\ \sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} & \sum_{i,j}A_{i,j} & \sum_{i}A_{i,N+1} & \dots \\ \times & \times & \times & \times & \sum_{j} A_{N+1,j} & \times & \times \\ \times & \times & \times & \times & \vdots & \times & \times \end{array}\right) \end{equation} Only the \(N^{th}\) column and the \(N^{th}\) row are affected by the transformation. Applying now the unicity condition (\ref{eq:unicity2}) the final transformed system reads: \begin{equation} \left(\begin{array}{lllllll} 1 & 0 & \dots & 0 & 0 & 0 & 0 \\ 0 & 1 & \dots & 0 & 0 & 0 & 0 \\ 0 & 0 & \ddots & 0 & \vdots & 0 & 0 \\ 0 & 0 & \dots & 1 & 0 & 0 & 0 \\ 0 & 0 & \dots & 0 & \sum_{i,j}A_{i,j} & \sum_{i}A_{i,N+1} & \dots \\ 0 & 0 & \dots & 0 & \sum_{j} A_{N+1,j} & \times & \times \\ 0 & 0 & \dots & 0 & \vdots & \times & \times \end{array}\right) \left(\begin{array}{l} \hat u_1 \\ \hat u_2 \\ \vdots\\ \hat u_{N-1}\\ \hat u_{N} \\ u_{N+1} \\ \vdots \end{array}\right) = \left(\begin{array}{l} 0 \\ 0 \\ \vdots\\ 0 \\ \sum_{i} b_{i} \\ b_{N+1} \\ \vdots \end{array}\right). \end{equation} \section{{\tt MATRIX} Reference} \label{matRef} The following conventions are adopted in the routine descriptions: \begin{itemize} \item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)} declares a variable which can be of type {\tt gemat} or {\tt zgemat}. \item The symbol ``$|$'' is the logical {\tt OR} operator. Thus \begin{verbatim} TYPE([z]gemat|[z]gbmat) :: mat \end{verbatim} declares that {\tt mat} can be either of type {\tt gemat}, {\tt zgemat}, {\tt pbmat} or {\tt zpbmat}. \item In a same declaration block, if a scalar or array of type {\tt DOUBLE PRECISION|COMPLEX} is declared together with a matrix object which can be also complex, both should be either real or complex. For example in the routine {\tt updtmat}, if {\tt mat} of type {\tt zgbmat}, {\tt val} should be complex. \end{itemize} \subsection{init} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} TYPE([z]gemat) :: mat SUBROUTINE init(n, nterms, mat ,kmat) TYPE([z]gbmat) :: mat SUBROUTINE init(kl, ku, n, nterms, mat, kmat) TYPE([z]pbmat) :: mat SUBROUTINE init(ku, n, nterms, mat, kmat) TYPE([z]periodic_mat) :: mat SUBROUTINE init(kl, ku, n, nterms, mat, kmat) INTEGER, INTENT(in) :: kl, ku, n, nterms INTEGER, OPTIONAL :: kmat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Initialize data structure for matrix \item[Arguments:] \mbox{} \begin{verbatim} n : rank of matrix kl, ku : number of sub and super diagonals nterms : number of terms in weak form kmat : matrix id mat : matrix object \end{verbatim} \end{description} \subsection{destroy} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE destroy(mat) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Free matrix memory \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \subsection{updmat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE updtmat(mat, i, j, val) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat INTEGER, INTENT(IN) :: i, j DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Update (accumulate) element $A_{ij}$ \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : row index j : column index val : input value \end{verbatim} \end{description} \subsection{putele} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE putele(mat, i, j, val) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat INTEGER, INTENT(IN) :: i, j DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Overwrite element $A_{ij}$ \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : row index j : column index val : input value \end{verbatim} \end{description} \subsection{putrow} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE putrow(mat, i, arr) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat INTEGER, INTENT(IN) :: i DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Overwrite a matrix row \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : row index arr : input array \end{verbatim} \end{description} \subsection{putcol} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE putcol(mat, j, arr) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat INTEGER, INTENT(IN) :: j DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Overwrite a matrix row \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object j : column index arr : input array \end{verbatim} \end{description} \subsection{getele} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE getele(mat, i, j, val) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat INTEGER, INTENT(IN) :: i, j DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Get element $A_{ij}$ \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : row index j : column index val : output value \end{verbatim} \end{description} \subsection{getrow} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE getrow(mat, i, arr) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat INTEGER, INTENT(IN) :: i DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Get a matrix row \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : row index arr : output array \end{verbatim} \end{description} \subsection{getcol} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE getcol(mat, j, arr) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat INTEGER, INTENT(IN) :: j DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Get a matrix column \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : column index arr : output array \end{verbatim} \end{description} \subsection{vmx} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} FUNCTION vmx(mat, x) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat DOUBLE PRECISION|COMPLEX, DIMENSION(:), INTENT(in) :: x DOUBLE PRECISION|COMPLEX, DIMENSION(SIZE(x)) :: vmx \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Matrix-vector product $Ax$ \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object x : input array vmx : output array \end{verbatim} \end{description} \subsection{mcopy} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE mcopy(mata, matb) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Matrix copy: $B = A$ \item[Arguments:] \mbox{} \begin{verbatim} mata : input matrix object matb : output matrix object \end{verbatim} \end{description} \subsection{maddto} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE maddto(mata, alpha, matb) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb DOUBLE PRECISION|COMPLEX : alpha \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Matrix addition: $A \leftarrow A+\alpha B$ \item[Arguments:] \mbox{} \begin{verbatim} mata : input matrix object matb : output matrix object alpha : input scalar \end{verbatim} \end{description} \subsection{determinant} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE determinant(mat, base, pow) TYPE([z]gemat|[z]gbmat|[z]pbmat) :: mat INTEGER, INTENT(out) :: pow DOUBLE PRECISION|COMPLEX : base \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Returns the determinant of matrix as $D = \text{base}\times 10^{\text{pow}}$ \item[Arguments:] \mbox{} \begin{verbatim} mat : input matrix object base : mantissa of determinant pow : exponent of determinant \end{verbatim} \end{description} \subsection{factor} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE factor(mat) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} LU (Cholesky for symmetric/hermitian matrix) factorization \item[Arguments:] \mbox{} \begin{verbatim} mat : inout matrix object \end{verbatim} \end{description} \subsection{bsolve} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE bsolve(mat) TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat DOUBLE PRECISION|COMPLEX, DIMENSION [(:)] :: rhs DOUBLE PRECISION|COMPLEX, DIMENSION [(:),] OPTIONAL, INTENT (out) :: sol \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Solve the linear system using the factored matrix, for a single or multiple right-hand-side \item[Arguments:] \mbox{} \begin{verbatim} mat : input factored matrix object rhs : input right-hand-side, overwriten by the solution if sol is not present sol : contains solution \end{verbatim} \end{description} \section{{\tt SPMAT} Reference} \label{spmatRef} The following conventions are adopted in the routine descriptions: \begin{itemize} \item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)} declares a variable which can be of type {\tt gemat} or {\tt zgemat}. \item The symbol ``$|$'' is the logical {\tt OR} operator. Thus \begin{verbatim} TYPE([z]gemat|[z]gbmat) :: mat \end{verbatim} declares that {\tt mat} can be either of type {\tt gemat}, {\tt zgemat}, {\tt pbmat} or {\tt zpbmat}. \item In a same declaration block, if a scalar or array of type {\tt DOUBLE PRECISION|COMPLEX} is declared together with a matrix object which can be also complex, both should be either real or complex. For example in the routine {\tt updtmat}, if {\tt mat} of type {\tt zgbmat}, {\tt val} should be complex. \end{itemize} \subsection{init} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE init(n, mat, istart, iend) TYPE([z]spmat) :: mat INTEGER, INTENT(in), OPTIONAL :: istart, iend INTEGER, INTENT(in) :: n \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Initialize an empty sparse matrix of $n$ rows. \item[Arguments:] \mbox{} \begin{verbatim} n : rank of matrix mat : matrix object istart, iend : range of row indices. By default istart=1, iend=n \end{verbatim} \end{description} \subsection{destroy} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE destroy(mat) TYPE([z]spmat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Free matrix memory \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \subsection{updmat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE updtmat(mat, i, j, val) TYPE([z]spmat) :: mat INTEGER, INTENT(IN) :: i, j DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Update (accumulate) an existing element $A_{ij}$ or insert it in the linked list in an increasing order in the column index j. \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : row index j : column index val : input value \end{verbatim} \end{description} \subsection{putele} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE putele(mat, i, j, val, nlforce_zero) TYPE([z]pbmat) :: mat INTEGER, INTENT(IN) :: i, j DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Overwrite an existing element $A_{ij}$ or insert it in the linked list in an increasing order in the column index j. \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : row index j : column index val : input value nlforce_zero : Never remove an existing element when input is zero if TRUE FALSE by default \end{verbatim} \end{description} \subsection{putrow} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE putrow(mat, i, arr, col, nlforce_zero) TYPE([z]spmat) :: mat INTEGER, INTENT(IN) :: i DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:) INTEGER, INTENT(in), OPTIONAL :: col(:) LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Overwrite a matrix row \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : row index arr : input array col : input array containing column indices nlforce_zero : Never remove an existing element when input is zero if TRUE FALSE by default \end{verbatim} \end{description} \subsection{putcol} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE putcol(mat, j, arr, nlforce_zero) TYPE([z]spmat) :: mat INTEGER, INTENT(IN) :: j DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Overwrite a matrix row \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object j : column index arr : input array nlforce_zero : Never remove an existing non-zero element if .TRUE. .FALSE. by default \end{verbatim} \end{description} \subsection{getele} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE getele(mat, i, j, val) TYPE([z]spmat) :: mat INTEGER, INTENT(IN) :: i, j DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Get element $A_{ij}$ \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : row index j : column index val : output value \end{verbatim} \end{description} \subsection{getrow} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE getrow(mat, i, arr, col) TYPE([z]spmat) :: mat INTEGER, INTENT(IN) :: i DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:) INTEGER, INTENT(out), OPTIONAL :: col(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Get a matrix row and optionally the column indices \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : row index arr : output array col : output array containing column indices \end{verbatim} \end{description} \subsection{getcol} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE getcol(mat, j, arr) TYPE([z]spmat) :: mat INTEGER, INTENT(IN) :: j DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Get a matrix column \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object i : column index arr : output array \end{verbatim} \end{description} \subsection{get\_count} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} INTEGER FUNCTION get_count(mat, nnz) TYPE([z]spmat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Returns the number of non-zeros and optionally an array of numbers of non-zeros on each row \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object nnz : array containing numbers of non-zeros on each row. \end{verbatim} \end{description} \section{{\tt PARDISO\_BSPLINES} Reference} \label{pardisoRef} The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow, getcol, vmx, mcopy, maddto} and {\tt destroy} have \emph{exactly} the same list of arguments as those from the {\tt MATRIX} module (as documented in Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines that have different arguments. The same conventions as before are used for the routine description. \subsection{init} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, & & nlforce_zero) INTEGER, INTENT(in) :: n, nterms TYPE([z]pardiso_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlpos LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Initialize the PARDISO solver. A SPMAT matrix of $n$ empty rows is initialized. \item[Arguments:] \mbox{} \begin{verbatim} n : rank of matrix nterms : number of terms in weak form kmat : matrix id mat : matrix object nlsym : symmetric or not. Default is .FALSE. nlherm : Hermitian or not for complex matrix . Default is .FALSE. nlpos : Positive-definite or not. Default is .TRUE. nlforce_zero : Never remove an existing non-zero element if .TRUE. .TRUE. by default \end{verbatim} \end{description} \subsection{clear\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE clear_mat(mat) TYPE([z]pardiso_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Clear matrix, keeping its sparse structure unchanged \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \subsection{psum\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE sum_mat(mat, comm) TYPE([z]pardiso_mat) :: mat INTEGER, INTENT(in) :: comm \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Parallel sum of matrices. Result matrix is placed in the sparse matrix mat\%mat on all processes of comm. \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object comm : communicator \end{verbatim} \end{description} \subsection{p2p\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE p2p_mat(mat, dest, extyp, op, comm) TYPE([z]pardiso_mat) :: mat INTEGER, INTENT(in) :: dest CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv') CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt') INTEGER, INTENT(in) :: comm \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Point-to-point combine sparse matrix between 2 processes. \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object dest : rank of remote process extyp : exchange type ('send', 'recv', 'sendrecv') op : operation type ('put', 'updt') comm : communicator \end{verbatim} \end{description} \subsection{get\_count} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} INTEGER FUNCTION get_count(mat, nnz) TYPE([z]pardiso_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Returns the number of non-zeros and optionally an array of numbers of non-zeros on each row \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object nnz : array containing numbers of non-zeros on each row. \end{verbatim} \end{description} \subsection{factor} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE factor(mat, nlreord, nlmetis, debug) TYPE([z]pardiso_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlreord LOGICAL, OPTIONAL, INTENT(in) :: nlmetis LOGICAL, OPTIONAL, INTENT(in) :: debug \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Wrapper of to\_mat, reord\_mat and numfact \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object nlreord : call reord_mat if .TRUE. (default is .TRUE.) nlmetis : use METIS nested dissection for reoredering. Default is minimum degree alogorithm. debug : verbose output from PARDISO if .TRUE. Default is .FALSE. \end{verbatim} \end{description} \subsection{bsolve} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE bsolve_pardiso_mat1(mat, rhs, sol, nref, debug) TYPE([z]pardiso_mat) :: mat DOUBLE PRECISION|COMPLEX :: rhs(:) DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:) INTEGER, OPTIONAL :: nref LOGICAL, OPTIONAL, INTENT(in) :: debug \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Wrapper of to\_mat, reord\_mat and numfact \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object rhs : input right-hand-side, overwriten by the solution if sol is not present sol : contains solution ref : maximum number of refinement steps. Default is 0 (no refinement). debug : verbose output from PARDISO if .TRUE. Default is .FALSE. \end{verbatim} \end{description} \subsection{to\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE to_mat(mat) TYPE([z]pardiso_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Convert linked list spmat to pardiso matrix structure \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \subsection{reord\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE reord_mat(mat, nlmetis, debug) TYPE([z]pardiso_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlmetis LOGICAL, OPTIONAL, INTENT(in) :: debug \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Reordering and symbolic factorization \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object nlmetis : use METIS nested dissection for reoredering. Default is minimum degree alogorithm. debug : verbose output from PARDISO if .TRUE. Default is .FALSE. \end{verbatim} \end{description} \subsection{numfact} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE numfact(mat, debug) TYPE([z]pardiso_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: debug \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Numerical factorization \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object debug : verbose output from PARDISO if .TRUE. Default is .FALSE. \end{verbatim} \end{description} \section{{\tt [P]WSMP\_BSPLINES} Reference} \label{wsmpRef} The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow, getcol, vmx, mcopy, maddto} and {\tt destroy} have \emph{exactly} the same list of arguments as those from the {\tt MATRIX} module (as documented in Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines that have different arguments. The same conventions as before are used for the routine description. \subsection{init} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, & & nlforce_zero, [comm_in]) INTEGER, INTENT(in) :: n, nterms TYPE([z]wsmp_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlpos LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero INTEGER, OPTIONAL, INTENT(in) :: comm_in \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Initialize the WSMP solver. A SPMAT matrix of $n$ empty rows is initialized. \item[Arguments:] \mbox{} \begin{verbatim} n : rank of matrix nterms : number of terms in weak form kmat : matrix id mat : matrix object nlsym : symmetric or not. Default is .FALSE. nlherm : Hermitian or not for complex matrix . Default is .FALSE. nlpos : Positive-definite or not. Default is .TRUE. nlforce_zero : Never remove an existing non-zero element if .TRUE. .TRUE. by default comm_in : MPI communicator. By default MPI_COMM_WORLD (only in PWSMP_BSPLINES) \end{verbatim} \end{description} \subsection{clear\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE clear_mat(mat) TYPE([z]wsmp_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Clear matrix, keeping its sparse structure unchanged \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \subsection{psum\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE sum_mat(mat, comm) TYPE([z]wsmp_mat) :: mat INTEGER, INTENT(in) :: comm \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Parallel sum of matrices. Result matrix is placed in the sparse matrix mat\%mat on all processes of comm. \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object comm : communicator \end{verbatim} \end{description} \subsection{p2p\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE p2p_mat(mat, dest, extyp, op, comm) TYPE([z]wsmp_mat) :: mat INTEGER, INTENT(in) :: dest CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv') CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt') INTEGER, INTENT(in) :: comm \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Point-to-point combine sparse matrix between 2 processes. \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object dest : rank of remote process extyp : exchange type ('send', 'recv', 'sendrecv') op : operation type ('put', 'updt') comm : communicator \end{verbatim} \end{description} \subsection{get\_count} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} INTEGER FUNCTION get_count(mat, nnz) TYPE([z]wsmp_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Returns the number of non-zeros and optionally an array of numbers of non-zeros on each row \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object nnz : array containing numbers of non-zeros on each row. \end{verbatim} \end{description} \subsection{factor} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE factor(mat, nlreord) TYPE([z]wsmp_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlreord \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Wrapper of to\_mat, reord\_mat and numfact \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object nlreord : call reord_mat if .TRUE. (default is .TRUE.) \end{verbatim} \end{description} \subsection{bsolve} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref) TYPE([z]wsmp_mat) :: mat DOUBLE PRECISION|COMPLEX :: rhs(:) DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:) INTEGER, OPTIONAL :: nref \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Wrapper of to\_mat, reord\_mat and numfact \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object rhs : input right-hand-side, overwriten by the solution if sol is not present sol : contains solution ref : maximum number of refinement steps. Default is 0 (no refinement). debug : verbose output from WSMP if .TRUE. Default is .FALSE. \end{verbatim} \end{description} \subsection{to\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE to_mat(mat) TYPE([z]wsmp_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Convert linked list spmat to wsmp matrix structure \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \subsection{reord\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE reord_mat(mat) TYPE([z]wsmp_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Reordering and symbolic factorization \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \subsection{numfact} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE numfact(mat) TYPE([z]wsmp_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Numerical factorization \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \section{{\tt MUMPS\_BSPLINES} Reference} \label{mumpsRef} The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow, getcol, vmx, mcopy, maddto} and {\tt destroy} have \emph{exactly} the same list of arguments as those from the {\tt MATRIX} module (as documented in Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines that have different arguments. The same conventions as before are used for the routine description. \subsection{init} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, & & nlforce_zero, comm_in) INTEGER, INTENT(in) :: n, nterms TYPE([z]mumps_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlpos LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero INTEGER, OPTIONAL, INTENT(in) :: comm_in \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Initialize the MUMPS solver. A SPMAT matrix of $n$ empty rows is initialized. \item[Arguments:] \mbox{} \begin{verbatim} n : rank of matrix nterms : number of terms in weak form kmat : matrix id mat : matrix object nlsym : symmetric or not. Default is .FALSE. nlherm : Hermitian or not for complex matrix . Default is .FALSE. nlpos : Positive-definite or not. Default is .TRUE. nlforce_zero : Never remove an existing non-zero element if .TRUE. .TRUE. by default comm_in : MPI communicator. By default MPI_COMM_SELF (serial mode). \end{verbatim} \end{description} \subsection{clear\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE clear_mat(mat) TYPE([z]mumps_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Clear matrix, keeping its sparse structure unchanged \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \subsection{psum\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE sum_mat(mat, comm) TYPE([z]mumps_mat) :: mat INTEGER, INTENT(in) :: comm \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Parallel sum of matrices. Result matrix is placed in the sparse matrix mat\%mat on all processes of comm. \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object comm : communicator \end{verbatim} \end{description} \subsection{p2p\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE p2p_mat(mat, dest, extyp, op, comm) TYPE([z]mumps_mat) :: mat INTEGER, INTENT(in) :: dest CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv') CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt') INTEGER, INTENT(in) :: comm \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Point-to-point combine sparse matrix between 2 processes. \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object dest : rank of remote process extyp : exchange type ('send', 'recv', 'sendrecv') op : operation type ('put', 'updt') comm : communicator \end{verbatim} \end{description} \subsection{get\_count} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} INTEGER FUNCTION get_count(mat, nnz) TYPE([z]mumps_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Returns the number of non-zeros and optionally an array of numbers of non-zeros on each row \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object nnz : array containing numbers of non-zeros on each row. \end{verbatim} \end{description} \subsection{factor} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE factor(mat, nlreord) TYPE([z]mumps_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlreord \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Wrapper of to\_mat, reord\_mat and numfact \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object nlreord : call reord_mat if .TRUE. (default is .TRUE.) \end{verbatim} \end{description} \subsection{bsolve} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE bsolve_mumps_mat1(mat, rhs, sol, nref) TYPE([z]mumps_mat) :: mat DOUBLE PRECISION|COMPLEX :: rhs(:) DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:) INTEGER, OPTIONAL :: nref \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Wrapper of to\_mat, reord\_mat and numfact \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object rhs : input right-hand-side, overwriten by the solution if sol is not present sol : contains solution ref : maximum number of refinement steps. Default is 0 (no refinement). debug : verbose output from MUMPS if .TRUE. Default is .FALSE. \end{verbatim} \end{description} \subsection{to\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE to_mat(mat) TYPE([z]mumps_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Convert linked list spmat to mumps matrix structure \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \subsection{reord\_mat} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE reord_mat(mat) TYPE([z]mumps_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Reordering and symbolic factorization \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \subsection{numfact} \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} SUBROUTINE numfact(mat) TYPE([z]mumps_mat) :: mat \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} \begin{description} \item[Purpose:] \mbox{} Numerical factorization \item[Arguments:] \mbox{} \begin{verbatim} mat : matrix object \end{verbatim} \end{description} \begin{thebibliography}{99} \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide. \bibitem{PARDISO} \url{http://www.pardiso-project.org/} \bibitem{WSMP} \url{http://www-users.cs.umn.edu/~agupta/wsmp.html} \bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/} \bibitem{McMillan} B. F. McMillan, et. al. \emph{Rapid Fourier space solution of linear partial integro-differential equations in toroidal magnetic confinement geometries}, Computer Physics Communications 181(4), 715-719 (2010) \end{thebibliography} \end{document} diff --git a/docs/manual/using_bsplines.tex b/docs/manual/using_bsplines.tex index fec665a..974735d 100644 --- a/docs/manual/using_bsplines.tex +++ b/docs/manual/using_bsplines.tex @@ -1,366 +1,366 @@ % % @file using_bsplines.tex % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % \documentclass[a4paper]{article} \usepackage{linuxdoc-sgml} \usepackage{graphicx} \usepackage{hyperref} \usepackage{amsmath} %\usepackage{verbatim} %\usepackage[notref]{showkeys} \title{\tt Using BSPLINES in Particle Codes} \author{Trach-Minh Tran} \date{v0.1, March 2012} \abstract{These notes present some practical considerations on using BSPLINES in particle codes, in particular for the charge or current assignment as well as the field interpolation. Performance measurements are done on an Intel Xeon X5570 and the more recent Xeon E5-2680.} \begin{document} \maketitle %\tableofcontents \section{Introduction} For simplicity, we assume in these notes that we are dealing with a 2D electrostatic particle code and the 2D Poisson equation is to be solved using the Finite Element Method. Starting from the \emph{weak form} and using the \emph{splines} for both \emph{basis} and \emph{test} functions, the electrostatic field potential together with its gradient and the right hand side can be computed from \begin{equation} \begin{split} \phi(x,y) &= \sum_{ij}\,c_{ij}\,\Lambda_i(x)\Lambda_j(y) \\ \frac{\partial\phi}{\partial x} &= \sum_{ij}\,c_{ij}\,\Lambda'_i(x)\Lambda_j(y) \\ \frac{\partial\phi}{\partial y} &= \sum_{ij}\,c_{ij}\,\Lambda_i(x)\Lambda'_j(y) \\ S_{ij} &= \sum_{\mu=1}^{N_p}\, q_\mu\Lambda_i(x_\mu)\Lambda_j(y_\mu), \end{split} \end{equation} where $c_{ij}$ are the solutions of the discretized Poisson equation and $\{x_\mu,y_\mu\}$ are the coordinates of the $N_p$ simulation particles. At each time step, the calculation of both the field $\phi$ and its gradient (\emph{field interpolation}) for the particle pusher and the construction of the RHS $S_{i}$ (\emph{charge assignment}) involve thus the computation of a large number of splines $\Lambda$ and its derivatives $\Lambda'$. Notice that the construction of the solver matrix requires also the calculations of the splines. This operation is however performed only once at the initial timestep in the (most common) case where the matrix is time independent and thus will not be considered in further these notes. \section{Computation of splines} Let consider the grid defined by $x_i$, $i=1,\ldots,N+1$. Inside the interval $[x_i, x_{i+1}]$, the $p+1$ non-zero splines of degree $p$ can be computed efficiently using its polynomial representation given by \begin{equation} \begin{split} \Lambda_{i+\alpha}(x) &= \sum_{k=0}^{p}\, V^{i}_{k\alpha} (x-x_i)^k, \qquad \alpha=1,\ldots,p+1, \\ V^i_{k\alpha} &= \left.\frac{1}{k!}\frac{d^k}{dx^k}\Lambda_{i+\alpha}(x)\right|_{x=x_i}. \end{split} \end{equation} The $(p+1)^2N$ coefficients $V^i_{k\alpha}$ are precalculated and stored during the spline initialization (in routine {\tt SET\_SPLINE}) by using the \emph{recurrence relation} \cite{BSPLINES} to compute the spline and all its $p$ derivatives. Note that for periodic splines on an equidistant mesh, only $(p+1)^2$ coefficients $V_{k\alpha}$ are required since the splines have \emph{translational invariance}. For a polynomial $P(x)=a_0+a_1x+\ldots +a_px^p$, its value can be calculated together with it first derivative, using Horner's rule as: \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} f = a(p) fp = f DO i=p-1,1,-1 f = a(i) + x*f fp = f + x*fp END DO f = a(0) + x*f \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} showing that exactly $4p-2$ floating operations (flops) per point are required. If only the value of the polynomial is needed, only $2p$ flops per point are required. \section{Field interpolation} \subsection{1D case} Let considered first the 1D case. The spline expansion of $\phi$ for $x_i\le x < x_{i+1}$ are expressed as \begin{equation} \phi(x) = \sum_{\alpha=0}^pc_{i+\alpha}\Lambda_{i+\alpha}(x). \end{equation} To calculate the field using this spline expansion, $p+1$ splines have to be first calculated followed by the sum above, which yields a total cost of $2(p+1)^2\sim 2p^2$ flops per point. This cost can be reduced by observing that $\phi(x)$ is a \emph{piecewise polynomial} (PP) of degree $p$ in each interval. Its PP coefficients can be obtained from \begin{equation} \begin{split} \phi(x) &= \sum_{\alpha=0}^pc_{i+\alpha}\sum_{k=0}^{p}\, V^{i}_{k\alpha} (x-x_i)^k \\ &= \sum_{k=0}^{p}\, \Pi^{i}_{k}(x-x_i)^k, \qquad \Pi^{i}_{k}= \sum_{\alpha=0}^pc_{i+\alpha} V^{i}_{k\alpha} \end{split} \end{equation} Once the $N(p+1)$ PP coefficients $\Pi^{i}_{k}$ have been calculated from the spline expansion coefficients $c_{i+\alpha}$, only $2p$ flops per point are required to obtain the field value, using the Horner's rule described previously. \subsection{2D case} Extension for the spline expansion and the PP representation for $\phi(x,y)$ is straightforwards and yields, for $x_i\le x < x_{i+1}$, $y_j\le y < y_{j+1}$: \begin{equation} \begin{split} \phi(x,y) &= \sum_{\alpha=0}^{p1}\sum_{\beta=0}^{p2}c_{i+\alpha,j+\beta} \Lambda_{i+\alpha}(x)\Lambda_{j+\beta}(y) \\ \phi(x,y) &= \sum_{k=0}^{p1}\sum_{l=0}^{p2}\,\Pi^{ij}_{kl}(x-x_i)^k(y-y_j)^l, \qquad \Pi^{ij}_{kl}= \sum_{\alpha=0}^{p1}\sum_{\beta=0}^{p2}c_{i+\alpha,j+\beta} V^{i}_{k\alpha}V^{j}_{l\beta}, \end{split} \end{equation} where $ V^{i}_{k\alpha}$ and $V^{j}_{l\beta}$ are the PP coefficients of the splines $\Lambda_{i+\alpha}(x)$ and $\Lambda_{j+\beta}(y)$ respectively. Assuming the same spline order $p$ in both $x$ and $y$, the flop counts per point for the 2 representations are respectively $2(3p+2)(p+1)\sim 6p^2$ and $2p(p+2)\sim 2p^2$, while the storages required for the spline coefficients $c$ and the PP coefficients $\Pi$ are $(N+p)^2\sim N^2$ and $N^2(p+1)^2$ respectively. \subsection{Implementation in BSPLINES} The PP representation is selected by default in BSPLINES, \emph{unless} the logical keyword {\tt NLPPFORM} is set to {\tt .FALSE.} when calling the spline initialization routine {\tt SET\_SPLINE}. The flop counts per point for both methods are summarized in the table below \begin{center} \begin{tabular}{|l|c|c|} \hline & 1D & 2D \\\hline Spline expansion & $2(p+1)^{2}$ & $2(3p+2)(p+1)$ \\ PP representation & $2p$ & $2p(p+2)$ \\\hline \end{tabular} \end{center} The routine {\tt GRIDVAL} computes the value of the field or one of its derivatives. The first call to this routine computes the PP coefficients $\Pi$ if {\tt NLPPFORM=.TRUE.} is selected or just store the spline coefficients $c$ in the spline internal data otherwise. In the following calls to {\tt GRIDVAL}, $c$ should not be passed to the routines. Notice that the PP representation requires to store the $N^2(p+1)^2$ PP coefficients in the 2D case, which is still acceptable. In the 3D case, this storage requirement becomes $N^3(p+1)^3$ which can be prohibitive! In this case the less efficient \emph{Spline expansion} formulation should be selected. In the \emph{particle loop}, the routine {\tt GETGRAD} which computes the function and all its first partial derivatives at once should be called instead of {\tt GRIDVAL}. \section{Particle localization({\tt locintv})} In both charge assignment and field interpolation, finding in which interval of the spatial grid the particle is localized should be first performed. This operation is trivial for the case of an equidistant mesh. For non-equidistant mesh, an \emph{equidistant fine} mesh and its mapping to the actual mesh are first constructed in the spline initialization routine {\tt SET\_SPLINE} and used to localize the particles in the routine {\tt LOCINTV}. \section{Performances} From the considerations above, using BSPLINES to perform the charge assignment and field interpolation in 2D and 3D particle codes might result in large overheads because of the large number of calls to the routines {\tt BASFUN} to compute the splines or {\tt GETGRAD} to perform the field interpolation at a \emph{single} particle position. In the following, the performances the 2D linearized gyrokinetic code GYGLES which has been adapted to use BSPLINES are analyzed. Vectorization by grouping the particles for both charge assignment and field interpolation is then proposed as a way to speed up these two operations when using BSPLINES. \subsection{Scalar performances} Optimization of the scalar versions of {\tt BASFUN} and {\tt GETGRAD} (when these routines are called with a \emph{single} particle) is performed essentially by \begin{itemize} \item Minimizing the flop counts and reducing redundant operations. \item Unrolling small loops, for example the loop over the $p+1$ splines that are non-zero at a given position, for small $p$. \item Define all routines called by {\tt BASFUN} and {\tt GETGRAD} as \emph{internal procedures}. \item Rearranging the memory layout of the multi-dimension array containing the PP coefficients of the spline. \end{itemize} The timings of the charge and current assignment (assign), the particle pusher (push) and the main time loop for a 5 time step run of GYGLES, on an Intel Xeon X5570 (hpcff.fz-juelich.de), using 4 MPI processes and Intel Fortran 12.1.2 are summarized in the following table \begin{center} \begin{tabular}{lrrrrr} \hline & $T_0$(s) & $T_1$(s) & $T_2$(s) & $T_1/T_0$ & $T_2/T_1$ \\ \hline assign & 1.454E+01 & 2.126E+01 & 2.259E+01 & 1.46 & 1.06 \\ push & 2.536E+01 & 3.080E+01 & 3.144E+01 & 1.21 & 1.02 \\ mainloop & 4.197E+01 & 5.955E+01 & 6.149E+01 & 1.42 & 1.03 \\ \hline \end{tabular} \end{center} where $T_0$ is the time in seconds obtained with the original code while $T_1$ and $T_2$ are the times obtained with BSPLINES, respectively using an \emph{equidistant} and \emph{non-equidistant} radial mesh. In all the 3 runs, a quadratic splines were used. The small difference between \emph{equidistant} and \emph{non-equidistant} mesh comes mainly from the particle localization. The same run on an Intel Xeon E5-2680 (helios.iferc-csc.org), using the same Intel compiler (with AVX instructions) yields \begin{center} \begin{tabular}{lrrrrr} \hline & $T_0$(s) & $T_1$(s) & $T_2$(s) & $T_1/T_0$ & $T_2/T_1$ \\ \hline assign & 1.093E+01 & 1.987E+01 & 2.086E+01 & 1.82 & 1.05 \\ push & 2.385E+01 & 2.868E+01 & 2.994E+01 & 1.20 & 1.04 \\ mainloop & 3.656E+01 & 5.411E+01 & 5.598E+01 & 1.48 & 1.03 \\ \hline \end{tabular} \end{center} \subsection{Speed up by vectorization} As found in the last section, using external routines from BSPLINES instead of \emph{hard coding} the spline computations results in a slowing down of 40--50\% for the main time loop. As will shown later, this problem could be solved by \emph{grouping} the particles and using the vectorized {\tt BASFUN} and {\tt GETGRAD} routines. Such particle grouping can be done for example, by replacing the usual particle loop by the following Fortran code fragment \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{verbatim} nset = npart/ngroup IF(MODULO(npt, ngroup).NE.0) nset = nset+1 i2 = 0 DO is=1,nset i1 = i2+1 i2 = MIN(i2+ngroup,npart) CALL basfun(x(i1:i2), ...) END DO \end{verbatim} \nopagebreak\hrule \addvspace{\medskipamount} where {\tt npart} particles are partitioned into {\tt nset} groups, each containing at most {\tt ngroup} particles. Vectorization of the routines {\tt BASFUN} and {\tt GETGRAD} is achieved by moving whenever is possible the loop over the {\tt ngroup} particles into the innermost loop. The vectorization performances shown in Fig~.\ref{fig:basfun_hpcff} and Fig~.\ref{fig:getgrad_hpcff}, respectively for {\tt BASFUN} and {\tt GETGRAD} are obtained using version $12.1.2$ of Intel compiler on an Intel Xeon X5570 (hpcff.fz-juelich.de). With a speedup of at least 2 for quadratic splines, the slowing down found previously in the scalar version could be likely compensated. The new AVX instructions present in the recent Intel Xeon E5-2680 (helios.iferc-csc.org) seems to improve somewhat the vectorization performance as shown in Fig~.\ref{fig:basfun_helios} and Fig~.\ref{fig:getgrad_helios}. \begin{figure} \centering \includegraphics[angle=0,width=\hsize]{basfun_perf_hpcff} \caption{In this test, $10^5$ particles are distributed randomly on an equidistant mesh of 64 intervals. On each point, all the $p+1$ splines are computed. The particle localization routine {\tt locintv} is included in the timing. In order to have a good statistics in the measurements, $1'000$ iterations of the particle loop are considered.} \label{fig:basfun_hpcff} \end{figure} \begin{figure} \centering \includegraphics[angle=0,width=\hsize]{getgrad_perf_hpcff} \caption{In this test, $10^5$ particles are distributed randomly on an equidistant 2D $(x,y)$ mesh of $64\times 64$ intervals, where the coordinate $y$ is periodic. On each point, the function together with its gradient are computed, using the PP representation. The particle localization routine {\tt locintv} is included in the timing. In order to have a good statistics in the measurements, $100$ iterations of the particle loop are considered.} \label{fig:getgrad_hpcff} \end{figure} \begin{figure} \centering \includegraphics[angle=0,width=\hsize]{basfun_perf_helios} \caption{In this test, $10^5$ particles are distributed randomly on an equidistant mesh of 64 intervals. On each point, all the $p+1$ splines are computed. The particle localization routine {\tt locintv} is included in the timing. In order to have a good statistics in the measurements, $1'000$ iterations of the particle loop are considered.} \label{fig:basfun_helios} \end{figure} \begin{figure} \centering \includegraphics[angle=0,width=\hsize]{getgrad_perf_helios} \caption{In this test, $10^5$ particles are distributed randomly on an equidistant 2D $(x,y)$ mesh of $64\times 64$ intervals, where the coordinate $y$ is periodic. On each point, the function together with its gradient are computed, using the PP representation. The particle localization routine {\tt locintv} is included in the timing. In order to have a good statistics in the measurements, $100$ iterations of the particle loop are considered.} \label{fig:getgrad_helios} \end{figure} \begin{thebibliography}{99} \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide. \end{thebibliography} \end{document} diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 4169dde..4b28ada 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,77 +1,77 @@ # # @file CMakeLists.txt # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # @author Trach-Minh Tran # project(bsplines_tests) add_library(local_util STATIC pde1dp_mod.f90 pde3d_mod.f90 ppde3d_mod.f90 ppde3d_pb_mod.f90 tcdsmat_mod.f90 meshdist.f90 dismat.f90 ibcmat.f90 disrhs.f90 ) target_link_libraries(local_util PUBLIC bsplines) set(BS_TESTS driv1 driv2 driv3 driv4 pde1d pde1dp pde1dp_cmpl pde2d pde2d_pb pde1dp_cmpl_dft pde3d ppde3d ppde3d_pb fit1d fit1dbc fit1dp fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y moments optim1 optim2 optim3 tcdsmat tmassmat tbasfun tsparse1 basfun_perf getgrad_perf gridval_perf test_kron ) if(HAS_PARDISO) set(BS_TESTS ${BS_TESTS} pde1dp_cmpl_pardiso pde2d_pardiso pde2d_sym_pardiso pde2d_sym_pardiso_dft tsparse2 ) endif() if(HAS_MUMPS) set(BS_TESTS ${BS_TESTS} pde2d_mumps pde1dp_cmpl_mumps ) endif() foreach(test ${BS_TESTS}) add_executable(${test} ${test}.f90) target_link_libraries(${test} local_util ${LIBS} ${EXTRA_LIBS}) endforeach() diff --git a/examples/Makefile b/examples/Makefile index 552d03f..0e45321 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -1,506 +1,506 @@ # # @file Makefile # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Emmanuel Lanti # @author Trach-Minh Tran # PREFIX=$(HOME) XGRAFIX=/usr/local/xgrafix_1.2/src-double # FUTILS=/usr/local/crpp/futils # BSPLINES=/usr/local/crpp/bsplines # PPUTILS2=../pputils2 # PPPACK=../pppack # SLATEC=/usr/local/slatec # FFTW=/usr/local/fftw-2.1.5-opt ARPACK=/usr/local/ARPACK LAPACK95=$(MKL)/../../../mkl/include/intel64/lp64 MPIF90 = mpif90 F90 = ifort LD = $(MPIF90) debug = -g -traceback -check bounds -warn alignments -warn unused optim = -O3 -xHOST OPT=$(debug) #OPT=$(optim) F90FLAGS = $(OPT) -fPIC -I../fft -I$(BSPLINES)/include -I$(FUTILS)/include \ -I$(FFTW)/include -I$(MKL)/../../include/intel64/lp64 LDFLAGS = $(OPT) -fPIC -L$(FUTILS)/lib -L$(BSPLINES)/lib -L${HDF5}/lib -L$(FFTW)/lib64 \ -L$(SLATEC)/lib -L$(ARPACK) CC = cc CFLAGS = -O2 LIBS = -mkl=cluster -lbsplines -lpppack -lpppack -lpputils2 -lfutils -lfft -larpack \ -lfftw -lhdf5_fortran -lhdf5 -lz -lpthread LIBS1 = -mkl=cluster -lbsplines1 -lpppack -lfutils \ -lhdf5_fortran -lhdf5 -lz -lsz -lpthread ifdef WSMP LDFLAGS += -L$(WSMP) LIBS += -lwsmp64 LIBS1 += -lpwsmp64 endif ifdef MUMPS F90FLAGS += -I$(MUMPS)/include -I$(LAPACK95) LDFLAGS += -L$(MUMPS)/lib -L$(PARMETIS)/lib LIBS += $(MUMPSLIBS) endif ifdef PETSC_DIR include ${PETSC_DIR}/conf/variables F90FLAGS += -I$(PETSC_DIR)/include -I$(PETSC_DIR)/$(PETSC_ARCH)/include \ -I$(MKL)/../../include LIBS += ${PETSC_FORTRAN_LIB} ${PETSC_KSP_LIB} endif PDE1DOBJS = pde1d.o PDE2DOBJS = pde2d.o dismat.o disrhs.o ibcmat.o FIT1DOBJJS = fit1d.o .SUFFIXES: .SUFFIXES: .o .c .f90 .f .f90.o: $(MPIF90) $(F90FLAGS) -c $< .f.o: $(F90) $(F90FLAGS) -c $< all: examples tmat EX_FILES = driv1 driv2 driv3 driv4 pde1d pde1dp pde1dp_cmpl pde3d ppde3d ppde3d_pb \ pde2d pde2d_pb fit1d fit1dbc \ fit1dp fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y \ moments optim1 optim2 optim3 tmassmat tbasfun tcdsmat tsparse1 tsparse2 \ pde2d_pardiso pde2d_sym_pardiso pde1dp_cmpl_pardiso pde1dp_cmpl_dft \ pde2d_sym_pardiso_dft \ pde1d_eig_csr pde1d_eig_gb pde1d_eig_ge ifdef WSMP EX_FILES += pde2d_wsmp pde2d_pwsmp pde2d_sym_wsmp pde1dp_cmpl_wsmp endif ifdef MUMPS EX_FILES += pde2d_mumps pde1dp_cmpl_mumps pde1d_eig_zmumps endif ifdef PETSC_DIR EX_FILES += pde2d_petsc endif examples: $(EX_FILES) tmat: tmatrix_gb tmatrix_pb tmatrix_zpb adv: adv.o extra.o $(LD) $(LDFLAGS) -L$(XGRAFIX) -o $@ $< extra.o $(LIBS) \ -lfftw -lXGF -lXGC -lX11 cp -p $@ ../bin/ driv1: driv1.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ driv2: driv2.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ driv3: driv3.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ driv4: driv4.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde1d: $(PDE1DOBJS) $(LD) $(LDFLAGS) -o $@ $(PDE1DOBJS) $(LIBS) cp -p $@ ../bin/ pde1d_eig_csr: pde1d_eig_csr.o $(LD) $(LDFLAGS) -o $@ pde1d_eig_csr.o $(LIBS) -lpputils2 cp -p $@ ../bin/ pde1d_eig_zcsr: pde1d_eig_zcsr.o $(LD) $(LDFLAGS) -o $@ pde1d_eig_zcsr.o $(LIBS) cp -p $@ ../bin/ pde1d_eig_zmumps: pde1d_eig_zmumps.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) -lpputils2 cp -p $@ ../bin/ pde1d_eig_zcsr: pde1d_eig_zcsr.o $(LD) $(LDFLAGS) -o $@ pde1d_eig_zcsr.o $(LIBS) cp -p $@ ../bin/ pde1d_eig_gb: pde1d_eig_gb.o $(LD) $(LDFLAGS) -o $@ pde1d_eig_gb.o $(LIBS) cp -p $@ ../bin/ pde1d_eig_ge: pde1d_eig_ge.o $(LD) $(LDFLAGS) -o $@ pde1d_eig_ge.o $(LIBS) cp -p $@ ../bin/ pde1dp: pde1dp.o pde1dp_mod.o $(LD) $(LDFLAGS) -o $@ pde1dp.o pde1dp_mod.o $(LIBS) cp -p $@ ../bin/ pde1dp_cmpl: pde1dp_cmpl.o $(LD) $(LDFLAGS) -o $@ pde1dp_cmpl.o $(LIBS) cp -p $@ ../bin/ pde1dp_KA: pde1dp_KA.o pde1dp_mod_KA.o $(LD) $(LDFLAGS) -o $@ pde1dp_KA.o pde1dp_mod_KA.o $(LIBS) cp -p $@ ../bin/ pde2d: $(PDE2DOBJS) $(LD) $(LDFLAGS) -o $@ $(PDE2DOBJS) $(LIBS) cp -p $@ ../bin/ pde3d: pde3d.o pde3d_mod.o $(LD) $(LDFLAGS) -o $@ pde3d.o pde3d_mod.o $(LIBS) cp -p $@ ../bin/ ppde3d: ppde3d.o ppde3d_mod.o $(LD) $(LDFLAGS) -o $@ ppde3d.o ppde3d_mod.o $(LIBS) cp -p $@ ../bin/ ppde3d_pb: ppde3d_pb.o ppde3d_pb_mod.o $(LD) $(LDFLAGS) -o $@ ppde3d_pb.o ppde3d_pb_mod.o $(LIBS) cp -p $@ ../bin/ pde2d_pb: pde2d_pb.o $(LD) $(LDFLAGS) -o $@ pde2d_pb.o $(LIBS) cp -p $@ ../bin/ pde2d_nh: pde2d_nh.o $(LD) $(LDFLAGS) -o $@ pde2d_nh.o $(LIBS) cp -p $@ ../bin/ tcdsmat: tcdsmat.o tcdsmat_mod.o meshdist.o $(LD) $(LDFLAGS) -o $@ tcdsmat.o tcdsmat_mod.o meshdist.o $(LIBS) cp -p $@ ../bin/ tmatrix_pb: tmatrix_pb.o $(LD) $(LDFLAGS) -o $@ tmatrix_pb.o $(LIBS) cp -p $@ ../bin/ tmatrix_zpb: tmatrix_zpb.o $(LD) $(LDFLAGS) -o $@ tmatrix_zpb.o $(LIBS) cp -p $@ ../bin/ tmatrix_gb: tmatrix_gb.o $(LD) $(LDFLAGS) -o $@ tmatrix_gb.o $(LIBS) cp -p $@ ../bin/ fit1d: $(FIT1DOBJJS) $(LD) $(LDFLAGS) -o $@ $(FIT1DOBJJS) $(LIBS) cp -p $@ ../bin/ fit1d_cmpl: fit1d_cmpl.o $(LD) $(LDFLAGS) -o $@ fit1d_cmpl.o $(LIBS) cp -p $@ ../bin/ gyro: gyro.o $(LD) $(LDFLAGS) -o $@ gyro.o -lslatec $(LIBS) cp -p $@ ../bin/ fit1dbc: fit1dbc.o $(LD) $(LDFLAGS) -o $@ fit1dbc.o $(LIBS) cp -p $@ ../bin/ fit1dp: fit1dp.o $(LD) $(LDFLAGS) -o $@ fit1dp.o $(LIBS) cp -p $@ ../bin/ fit2d: fit2d.o meshdist.o $(LD) $(LDFLAGS) -o $@ fit2d.o meshdist.o $(LIBS) cp -p $@ ../bin/ fit2d1d: fit2d1d.o meshdist.o $(LD) $(LDFLAGS) -o $@ fit2d1d.o meshdist.o $(LIBS) cp -p $@ ../bin/ fit2d_cmpl: fit2d_cmpl.o meshdist.o $(LD) $(LDFLAGS) -o $@ fit2d_cmpl.o meshdist.o $(LIBS) cp -p $@ ../bin/ fit2dbc: fit2dbc.o meshdist.o $(LD) $(LDFLAGS) -o $@ fit2dbc.o meshdist.o $(LIBS) cp -p $@ ../bin/ fit2dbc_x: fit2dbc_x.o meshdist.o $(LD) $(LDFLAGS) -o $@ fit2dbc_x.o meshdist.o $(LIBS) cp -p $@ ../bin/ fit2dbc_y: fit2dbc_y.o meshdist.o $(LD) $(LDFLAGS) -o $@ fit2dbc_y.o meshdist.o $(LIBS) cp -p $@ ../bin/ moments: moments.o $(LD) $(LDFLAGS) -o $@ moments.o $(LIBS) cp -p $@ ../bin/ mesh: mesh.o meshdist.o $(LD) $(LDFLAGS) -o $@ mesh.o meshdist.o $(LIBS) cp -p $@ ../bin/ optim1: optim1.o $(LD) $(LDFLAGS) -o $@ optim1.o $(LIBS) cp -p $@ ../bin/ optim2: optim2.o $(LD) $(LDFLAGS) -o $@ optim2.o $(LIBS) cp -p $@ ../bin/ optim3: optim3.o $(LD) $(LDFLAGS) -o $@ optim3.o $(LIBS) cp -p $@ ../bin/ tmassmat: tmassmat.o $(LD) $(LDFLAGS) -o $@ tmassmat.o $(LIBS) cp -p $@ ../bin/ tbasfun: tbasfun.o $(LD) $(LDFLAGS) -o $@ tbasfun.o $(LIBS) cp -p $@ ../bin/ basfun_perf: basfun_perf.o $(LD) $(LDFLAGS) -o $@ basfun_perf.o $(LIBS) cp -p $@ ../bin/ gridval_perf: gridval_perf.o $(LD) $(LDFLAGS) -o $@ gridval_perf.o $(LIBS) cp -p $@ ../bin/ getgrad_perf: getgrad_perf.o $(LD) $(LDFLAGS) -o $@ getgrad_perf.o $(LIBS) cp -p $@ ../bin/ basfun_perf1: basfun_perf1.o $(LD) $(LDFLAGS) -o $@ basfun_perf1.o $(LIBS) cp -p $@ ../bin/ tlocintv: tlocintv.o meshdist.o $(LD) $(LDFLAGS) -o $@ tlocintv.o meshdist.o $(LIBS) cp -p $@ ../bin/ tgausleg: tgausleg.o meshdist.o $(LD) $(LDFLAGS) -o $@ tgausleg.o meshdist.o $(LIBS) cp -p $@ ../bin/ poisson: poisson.o $(LD) $(LDFLAGS) -o $@ poisson.o $(LIBS) cp -p $@ ../bin/ poisson_mumps: poisson_mumps.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ poisson_petsc: poisson_petsc.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ tsparse1: tsparse1.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ tsparse2: tsparse2.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde2d_pardiso: pde2d_pardiso.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde2d_mumps: pde2d_mumps.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde2d_petsc: pde2d_petsc.o $(PPUTILS2)/pputils2.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde2d_sym_pardiso: pde2d_sym_pardiso.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde2d_sym_pardiso_dft: pde2d_sym_pardiso_dft.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde2d_sym_wsmp_dft: pde2d_sym_wsmp_dft.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ zssmp_ex1: zssmp_ex1.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ zpardiso_ex1: zpardiso_ex1.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde2d_sym_wsmp: pde2d_sym_wsmp.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde2d_wsmp: pde2d_wsmp.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde2d_pwsmp: pde2d_pwsmp.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS1) cp -p $@ ../bin/ pde1dp_cmpl_pardiso: pde1dp_cmpl_pardiso.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde1dp_cmpl_mumps: pde1dp_cmpl_mumps.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde1dp_cmpl_dft: pde1dp_cmpl_dft.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ pde1dp_cmpl_wsmp: pde1dp_cmpl_wsmp.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ tspline: tspline.o $(LD) $(LDFLAGS) -o $@ tspline.o $(LIBS) cp -p $@ ../bin/ tpsum_mat: tpsum_mat.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ tp2p_mat: tp2p_mat.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) cp -p $@ ../bin/ test_pwsmp: test_pwsmp.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS1) cp -p $@ ../bin/ driv1.o: driv2.o: driv3.o: driv4.o: pde1d.o: pde1dp.o: pde1dp_mod.o pde1dp_cmpl.o: pde1dp_mod.o: pde1dp_KA.o: pde1dp_mod_KA.o pde1dp_mod_KA.o: pde2d.o: pde3d.o: pde3d_mod.o pde3d_mod.o: ppde3d.o: ppde3d_mod.o ppde3d_mod.o: ppde3d_pb.o: ppde3d_pb_mod.o ppde3d_pb_mod.o: pde2d_pb.o: pde2d_nh.o: tcdsmat.o: tcdsmat_mod.o tcdsmat_mod.o: fit1d.o: fit1d_cmpl.o: gyro.o: fit1dbc.o: fit1dp.o: fit2d.o: fit2d_cmpl.o: fit2dbc.o: dismat.o: disrhs.o: ibcmat.o: adv.o: tmatrix_pb.o: tmatrix_zpb.o: tmatrix_gb.o: moments.o: mesh.o: optim1.o: optim2.o: optim3.o: tmassmat.o: tbasfun.o: basfun_perf.o: basfun_perf1.o: tlocintv.o: tgausleg.o: poisson.o: poisson_mumps.o: tsparse1.o: tsparse2.o: pde2d_pardiso.o: pde2d_mumps.o: pde2d_petsc.o: pde2d_sym_pardiso.o: pde2d_sym_pardiso_dft.o: pde2d_sym_wsmp_dft.o: pde2d_wsmp.o: pde2d_pwsmp.o: pde2d_sym_wsmp.o: pde1dp_cmpl_pardiso.o: pde1dp_cmpl_mumps.o: pde1dp_cmpl_dft.o: pde1dp_cmpl_wsmp.o: tpsum_mat.o: tp2p_mat.o: poisson_petsc.o: tags: etags *.f *.f90 ../src/*.f90 $(PPPACK)/*.f90 clean: rm -f *.o *.mod *~ a.out distclean: clean # $(MAKE) -C ../src distclean # $(MAKE) -C ../fft distclean # $(MAKE) -C $(PPUTILS2) distclean rm -f *.a *.mod pde1d pde1dp pde1dp_cmpl pde1dp_KA driv1 driv2 driv3 driv4 \ tmatrix_pb tmatrix_gb tmatrix_zpb \ pde2d pde2d_pb pde2d_nh pde3d ppde3d ppde3d_pb\ fit1d fit1d_cmpl gyro fit1dbc fit1dp \ fit2d fit2d1d fit2d_cmpl fit2dbc \ fit2dbc_x fit2dbc_y adv moments tcdsmat poisson poisson_mumps\ mesh optim1 optim2 optim3 tmassmat tbasfun \ basfun_perf gridval_perf getgrad_perf tlocintv \ tsparse1 tsparse2 \ pde2d_pardiso pde2d_sym_pardiso pde2d_wsmp pde2d_sym_wsmp \ pde1dp_cmpl_dft pde1dp_cmpl_wsmp pde1d_eig_csr pde1d_eig_pb pde1d_eig_ge \ pde2d_sym_pardiso_dft pde1dp_cmpl_pardiso \ pde2d_mumps pde1dp_cmpl_mumps tpsum_mat tp2p_mat pde2d_sym_wsmp_dft \ poisson_petsc pde2d_petsc \ pde1d_eig_csr pde1d_eig_zcsr pde1d_eig_zmumps pde1d_eig_gb pde1d_eig.ge \ ../bin/* diff --git a/examples/adv.f90 b/examples/adv.f90 index 8c40a29..c3fc591 100644 --- a/examples/adv.f90 +++ b/examples/adv.f90 @@ -1,320 +1,320 @@ !> !> @file adv.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! 1d Periodic Advection: F(x,t) = F(x-u*dt,t-dt) ! using module bsplines ! USE bsplines ! IMPLICIT NONE INCLUDE 'fftw_f77.h' TYPE(spline1d) :: spl INTEGER, PARAMETER :: nhistmx=1000, ncomb=4 INTEGER :: nx, nidbas, dim INTEGER :: nstep, nskipt, nhist, mhist DOUBLE PRECISION :: a, b, dt, u, w, coefx(5) DOUBLE PRECISION, DIMENSION(0:nhistmx) :: thist, tmass, tfmin, tfmax, ermass DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:), xshft(:) DOUBLE PRECISION, ALLOCATABLE :: coefs(:), ferr(:), kx(:), ampl(:) DOUBLE COMPLEX, ALLOCATABLE :: cfgrid(:), ffft(:) INTEGER(8) :: forw DOUBLE PRECISION :: time INTEGER :: i NAMELIST /newrun/ nx, nidbas, a, b, dt, u, w, coefx !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 100 ! Number of intevals in x a = 0.0 ! Left boundary of interval b = 100.0 ! Right boundary of interval dt = 0.1 ! Time step u = 1.0 ! Velocity w = 2.0 ! Shape of initial function coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) !=========================================================================== ! 2.0 Define initial conditions ! ! Set up mesh ! ALLOCATE(xgrid(0:nx), xshft(0:nx), fgrid(0:nx), ferr(0:nx)) xgrid(0) = a xgrid(nx) = b CALL meshdist(coefx, xgrid, nx) ! ! Set up the spline interpolation ! CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.) CALL get_dim(spl, dim) WRITE(*,'(a,i6)') 'dimension of splines', dim ALLOCATE(coefs(dim)) ! ! Initial conditions ! time = 0.0d0 nstep = 0 nskipt = 1 DO i=0,nx-1 fgrid(i) = finit(xgrid(i)) END DO fgrid(nx) = fgrid(0) ferr = 0.0 CALL get_splcoef(spl, fgrid, coefs) WRITE(*,'(a/(10f8.3))') 'knots', spl%knots WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid WRITE(*,'(a/(10f8.3))') 'coefs', coefs ! ! Set up FFT ! ALLOCATE(kx(-nx/2+1:nx/2), ampl(-nx/2+1:nx/2)) ALLOCATE(cfgrid(0:nx-1), ffft(0:nx-1)) DO i=-nx/2+1,nx/2 kx(i) = i END DO cfgrid(0:nx-1) = fgrid(0:nx-1) CALL fftw_f77_create_plan(forw, nx, FFTW_FORWARD, FFTW_ESTIMATE) CALL fftw_f77_one(forw, cfgrid, ffft) ampl(0:nx/2) = ABS(ffft(0:nx/2)) ampl(-nx/2+1:-1) = ABS(ffft(nx/2+1:nx-1)) ! ! Set up history arrays ! nhist = 0 thist(nhist) = 0.0 !!$ tmass(nhist) = SUM(coefs(1:nx)) tmass(nhist) = SUM(fgrid(1:nx)) tfmin(nhist) = MINVAL(fgrid) tfmax(nhist) = MAXVAL(fgrid) ermass(nhist) = tmass(nhist)-tmass(0) WRITE(*,'(a,(10f8.3))') 'Initial mass', tmass(nhist) ! ! Initialize Xgrafix ! CALL xginit(3,'ADV','adv',' ',' ',' ',' ',time) ! CALL xgset2d('linlin', 'X', 'F', 'open', 220, 60, 1.d0, 1.d0, & & .FALSE., .FALSE., xgrid(0), xgrid(nx), -0.2d0, 1.2d0) CALL xgcurve(xgrid, fgrid, nx+1, 1) ! CALL xgset2d('linlin', 'X', 'FERR', 'open', 620, 60, 1.d0, 1.d0, & & .FALSE., .FALSE., xgrid(0), xgrid(nx), -1.d0, 1.d0) CALL xgcurve(xgrid, ferr, nx+1, 1) ! CALL xgset2d('linlin', 'Time', 'Error Mass', 'open', 820, 400, 1.d0, 1.d0, & & .TRUE., .TRUE., 0.d0, 1.d0, 0.d0, 1.d0) CALL xgcurve(thist, ermass, nhist, 1) ! CALL xgset2d('linlin', 'Time', 'Min/Max', 'open', 420, 400, 1.d0, 1.d0, & & .TRUE., .TRUE., 0.d0, 1.d0, 0.d0, 1.d0) CALL xgcurve(thist, tfmin, nhist, 1) CALL xgcurve(thist, tfmax, nhist, 2) ! CALL xgset2d('linlin', 'kx', 'Amplitude of F', 'open', 20, 400, 1.d0, 1.d0, & & .FALSE., .FALSE., kx(-nx/2+1), kx(nx/2), & & 0.0d0, MAXVAL(ampl)) CALL xgcurve(kx, ampl, nx, 1) ! CALL xgupdate !=========================================================================== ! 3.0 Time loop ! nskipt = 1 DO nstep = nstep+1 time = time+dt CALL xgevent ! ! Shift x ! CALL get_splcoef(spl, fgrid, coefs) xshft(0:nx) = xgrid(0:nx) - u*dt CALL gridval(spl, xshft, fgrid, 0, coefs) ! xshft(0:nx) = xgrid(0:nx) - u*time DO i =0,nx ferr(i) = fgrid(i) - finit(xshft(i)) END DO ! cfgrid(0:nx-1) = fgrid(0:nx-1) CALL fftw_f77_one(forw, cfgrid, ffft) ampl(0:nx/2) = ABS(ffft(0:nx/2)) ampl(-nx/2+1:-1) = ABS(ffft(nx/2+1:nx-1)) ! ! Diagnostics ! IF( MOD(nstep,nskipt) .EQ. 0 ) THEN nhist = nhist+1 IF( nhist .GT. nhistmx ) THEN nskipt = ncomb*nskipt mhist = nhist-1 CALL packarr(mhist, thist, ncomb, nhist) CALL packarr(mhist, tmass, ncomb, nhist) CALL packarr(mhist, tfmin, ncomb, nhist) CALL packarr(mhist, tfmax, ncomb, nhist) CALL packarr(mhist, ermass, ncomb, nhist) END IF thist(nhist) = time tmass(nhist) = SUM(fgrid(1:nx)) !!$ tmass(nhist) = SUM(coefs(1:nx)) tfmin(nhist) = MINVAL(fgrid) tfmax(nhist) = MAXVAL(fgrid) ermass(nhist) = (tmass(nhist)-tmass(0))/tmass(0) END IF ! CALL xgupdate END DO !=========================================================================== ! 9.0 Prologue ! CALL fftw_f77_destroy_plan(forw) DEALLOCATE(xgrid, fgrid, xshft, coefs) CONTAINS DOUBLE PRECISION FUNCTION finit(xx) ! ! A "box" function ! DOUBLE PRECISION, INTENT(in) :: xx DOUBLE PRECISION :: xl, xr, xl0, xr0, h, x, xlen INTEGER :: kl, kr, klflag, krflag ! xlen = b-a x = a + MODULO(xx-a+xlen, xlen) ! xl = 0.375*(b-a) xr = 0.624*(b-a) CALL interv(xgrid, nx+1, xl, kl, klflag) CALL interv(xgrid, nx+1, xr, kr, krflag) xl0 = xl + w*(xgrid(kl)-xgrid(kl-1)) xr0 = xr - w*(xgrid(kr)-xgrid(kr-1)) CALL interv(xgrid, nx+1, xl0, kl, klflag) CALL interv(xgrid, nx+1, xr0, kr, krflag) IF( x .LT. xl0 ) THEN h = xgrid(kl)-xgrid(kl-1) finit = EXP(-((x-xl0)/(w*h))**2) ELSE IF( x .GT. xr0) THEN h = xgrid(kr)-xgrid(kr-1) finit = EXP(-((x-xr0)/(w*h))**2) ELSE finit = 1.0d0 END IF END FUNCTION finit END PROGRAM main !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ SUBROUTINE packarr(n, arr, skip, nhist) IMPLICIT NONE INTEGER :: n, skip, i, ii, nhist DOUBLE PRECISION :: arr(0:n) ii = 0 DO i=0,n,skip arr(ii) = arr(i) ii=ii+1 END DO nhist = ii END SUBROUTINE packarr !+++ SUBROUTINE dump(filename, l) ! ! Is invoked when button "Dump" is pressed. ! IMPLICIT NONE CHARACTER(len=*) :: filename INTEGER :: l WRITE(*,'(a,a,a1)') 'Dumpfile = "', filename(1:l),'"' END SUBROUTINE dump SUBROUTINE quit() ! ! Is invoked when button "Quit" is pressed ! IMPLICIT NONE PRINT*, 'Program terminated ...' END SUBROUTINE quit diff --git a/examples/basfun_perf.f90 b/examples/basfun_perf.f90 index 7cab71f..69c54d3 100644 --- a/examples/basfun_perf.f90 +++ b/examples/basfun_perf.f90 @@ -1,170 +1,170 @@ !> !> @file basfun_perf.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Performance of scalar and vector versions of def_basfun ! USE bsplines IMPLICIT NONE INTEGER :: nx, nidbas, nrank, npt=10, jdermx DOUBLE PRECISION :: dx DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fun(:, :) INTEGER :: left, i, i1, i2 INTEGER :: ngroup, nset, nremain TYPE(spline1d) :: splx DOUBLE PRECISION :: t0, t1, seconds DOUBLE PRECISION :: t_loop, t_locintv1, t_basfun1, t_locintv, t_basfun INTEGER :: its, nits INTEGER, ALLOCATABLE :: vleft(:) DOUBLE PRECISION, ALLOCATABLE :: vfun(:,:,:) LOGICAL :: nlperiod ! NAMELIST /newrun/ nx, nidbas, npt, nits, ngroup, jdermx, nlperiod ! !=============================================================================== ! ! 1D grid ! nx = 10 nidbas = 3 npt = 1000000 nits = 100 ngroup = 10 jdermx = 0 nlperiod = .FALSE. READ(*,newrun) WRITE(*,newrun) ALLOCATE(xgrid(0:nx)) dx = 1.0d0/REAL(nx) xgrid = (/ (i*dx,i=0,nx) /) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid ! ! Set up spline ! CALL set_spline(nidbas, 4, xgrid, splx, period=nlperiod) nrank = splx%dim WRITE(*,'(a, i5)') 'nrank =', nrank WRITE(*,'(a/(10f8.3))') 'knots', splx%knots ! ALLOCATE(xpt(npt)) ALLOCATE(fun(0:nidbas,0:jdermx)) ! Values and first derivatives of all Splines CALL RANDOM_NUMBER(xpt) !=============================================================================== ! 1.0 Scalar version ! ! loop t0 = seconds() DO its=1,nits DO i=1,npt END DO END DO t_loop = (seconds()-t0)/REAL(nits*npt,8) ! ! locintv t0 = seconds() DO its=1,nits DO i=1,npt CALL locintv(splx, xpt(i), left) END DO END DO t_locintv1 = (seconds()-t0)/REAL(nits*npt,8) ! ! def_basfun t0 = seconds() DO its=1,nits DO i=1,npt CALL locintv(splx, xpt(i), left) CALL basfun(xpt(i), splx, fun, left+1) END DO END DO t_basfun1 = (seconds()-t0)/REAL(nits*npt,8) ! WRITE(*,'(6x,3a12)') 'loop', 'locintv', 'basfun' WRITE(*,'(6x,8(1pe12.3))') t_loop, t_locintv1, t_basfun1 !=============================================================================== ! 2.0 Vector version ! ngroup = 1 DO WHILE (ngroup .LT. npt/2) ALLOCATE(vleft(ngroup)) ALLOCATE(vfun(0:nidbas, 0:jdermx, ngroup)) nset = npt/ngroup nremain = MODULO(npt, ngroup) IF(nremain.NE.0) nset = nset+1 ! ! loop t0 = seconds() DO its=1,nits i2 = 0 DO i=1,nset i1 = i2+1 i2 = MIN(i2+ngroup,npt) END DO END DO t_loop = (seconds()-t0)/REAL(nits*nset,8) ! ! locintv t0 = seconds() DO its=1,nits i2 = 0 DO i=1,nset i1 = i2+1 i2 = MIN(i2+ngroup,npt) CALL locintv(splx, xpt(i1:i2), vleft) END DO END DO t_locintv = (seconds()-t0)/REAL(nits*npt,8) ! ! basfun t0 = seconds() DO its=1,nits i2 = 0 DO i=1,nset i1 = i2+1 i2 = MIN(i2+ngroup,npt) CALL locintv(splx, xpt(i1:i2), vleft) CALL basfun(xpt(i1:i2), splx, vfun, vleft+1) END DO END DO t_basfun = (seconds()-t0)/REAL(nits*npt,8) ! WRITE(*,'(i6,8(1pe12.3))') ngroup, t_loop, t_locintv, t_basfun, & & t_locintv1/t_locintv, t_basfun1/t_basfun DEALLOCATE(vleft) DEALLOCATE(vfun) ngroup = ngroup*2 END DO !=============================================================================== ! ! Clean up ! CALL destroy_sp(splx) DEALLOCATE(xgrid) DEALLOCATE(xpt) DEALLOCATE(fun) END PROGRAM main diff --git a/examples/dirichlet/Makefile b/examples/dirichlet/Makefile index 4f0edc9..0b6276f 100644 --- a/examples/dirichlet/Makefile +++ b/examples/dirichlet/Makefile @@ -1,62 +1,62 @@ # # @file Makefile # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Emmanuel Lanti # @author Trach-Minh Tran # #BSPLINES = $(HOME)/bsplines #FUTILS = $(HOME)/futils F90 = mpif90 LD = $(F90) debug = -g -traceback -check bounds -warn alignments -warn nounused optim = -O3 -xHOST F90FLAGS = $(OPT) -I$(BSPLINES)/include -I$(FUTILS)/include LDFLAGS = $(OPT) -L$(BSPLINES)/lib -L$(FUTILS)/lib -L$(HDF5)/lib LIBS = -lbsplines -lpppack -lfutils -lhdf5_fortran -lhdf5 -lz LDFLAGS += -g -L$(MKL) LIBS += -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread OPT=$(debug) #OPT=$(optim) .SUFFIXES: .SUFFIXES: .o .c .f90 .f90.o: $(F90) $(F90FLAGS) -c $< all: poisson poisson: poisson.o poisson_mod.o $(LD) $(LDFLAGS) -o $@ $^ $(LIBS) poisson.o: poisson_mod.o clean: rm -f *.o *.mod distclean: clean rm -f poisson a.out *~ *.h5 *.fig *.eps *.pdf diff --git a/examples/dirichlet/poisson.f90 b/examples/dirichlet/poisson.f90 index fa96f4a..f6048e7 100644 --- a/examples/dirichlet/poisson.f90 +++ b/examples/dirichlet/poisson.f90 @@ -1,383 +1,383 @@ !> !> @file poisson.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 2d Poisson in cylibdrical coordinates, using splines: ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 0, with f(x=1,y) = cos(my) ! exact solution: f(x,y) = r^m cos(my) ! USE bsplines USE matrix USE conmat_mod USE poisson_mod USE futils ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, dirmeth, nterms LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol TYPE(spline2d) :: splxy TYPE(pbmat) :: mat ! DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2, shifty DOUBLE PRECISION :: err00, err10, err01 INTEGER :: nits=500 ! CHARACTER(len=128) :: file='poisson.h5' INTEGER :: fid ! ! Dirichlet BC properties encapsulated in a derived datatype ! TYPE(dirich) :: right_bc ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, dirmeth, & & coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not dirmeth = 1 ! 1: use spline interpolation in Dirichlet BC ! 2: residual minimization in Dirichlet BC coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals ku = kl ! Number of super-diagnonals WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku ! CALL init(ku, nrank, nterms, mat) t0 = seconds() CALL conmat(splxy, mat, coefeq) tmat = seconds() - t0 ALLOCATE(arr(nrank)) ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! Store some usefull parameters in right_bc ! right_bc%meth = dirmeth right_bc%mbess = mbess right_bc%n1 = nx right_bc%n2 = ny right_bc%nidbas1 = nidbas(1) right_bc%nidbas2 = nidbas(2) ! ! BC on Matrix and RHS ! CALL ibcmat(mat, right_bc) CALL ibcrhs(rhs, ygrid, right_bc) WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL factor(mat) tfact = seconds() - t0 gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 t0 = seconds() CALL bsolve(mat, rhs, sol) ! ! Backtransform of solution ! sol(1:ny-1) = sol(ny) ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO ! tsolv = seconds() - t0 gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 !=========================================================================== ! 4.0 Check the solution ! ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = f_exact(mbess, xgrid(i), ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM at first call to gridval IF(nlppform) THEN CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) END IF ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) ! errsol = solana - solcal WRITE(*,'(a/(8(1pe12.3)))') 'Error at the boundary r = 1', errsol(nx,:) err00 = err2_norm(splxy, jder, mbess, f_exact) ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = 0.0d0 ELSE solana(i,j) = fx_exact(mbess,xgrid(i),ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') ! errsol = solana - solcal err10 = err2_norm(splxy, jder, mbess, fx_exact) ! DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = 0.0d0 ELSE solana(i,j) = fy_exact(mbess, xgrid(i), ygrid(j)) END IF END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') ! errsol = solana - solcal err01 = err2_norm(splxy, jder, mbess, fy_exact) ! WRITE(*,'(/a,3(1pe12.3))') 'Discretization errors', err00, err10, err01 !=========================================================================== ! 9.0 Epilogue ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) CALL destroy_sp(splxy) CALL destroy(mat) CALL closef(fid) ! !=========================================================================== ! CONTAINS !-- DOUBLE PRECISION FUNCTION f_exact(m,x,y) INTEGER,INTENT(in) :: m DOUBLE PRECISION, INTENT(in) :: x, y f_exact = (x**m)*COS(m*y) END FUNCTION f_exact !-- DOUBLE PRECISION FUNCTION fx_exact(m,x,y) INTEGER,INTENT(in) :: m DOUBLE PRECISION, INTENT(in) :: x, y fx_exact = m*(x**(m-1))*COS(m*y) END FUNCTION fx_exact !-- DOUBLE PRECISION FUNCTION fy_exact(m,x,y) INTEGER,INTENT(in) :: m DOUBLE PRECISION, INTENT(in) :: x, y fy_exact = -m*(x**m)*SIN(m*y) END FUNCTION fy_exact !-- SUBROUTINE prntmat(str, a) DOUBLE PRECISION, DIMENSION(:,:) :: a CHARACTER(len=*) :: str INTEGER :: i WRITE(*,'(a)') TRIM(str) DO i=1,SIZE(a,1) WRITE(*,'(10f8.1)') a(i,:) END DO END SUBROUTINE prntmat !-- FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 !-- SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS !-- DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/dirichlet/poisson.m b/examples/dirichlet/poisson.m index 3a56ac8..bb5c2e3 100644 --- a/examples/dirichlet/poisson.m +++ b/examples/dirichlet/poisson.m @@ -1,88 +1,88 @@ % % @file poisson.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='poisson.h5'; m=3; % % Get data from data sets % r=hdf5read(file,'/xgrid'); t=hdf5read(file,'/ygrid'); sol=hdf5read(file,'/sol')'; solexact=hdf5read(file,'/solana')'; err=hdf5read(file,'/errors')'; solr=hdf5read(file,'/derivx')'; solt=hdf5read(file,'/derivy')'; [R,T]=meshgrid(r,t); x = R.*cos(T); y= R.*sin(T); solx = cos(T).*solr - sin(T)./R.*solt; soly = sin(T).*solr + cos(T)./R.*solt; figure subplot(221) pcolor(double(r),double(t),double(sol)); shading interp hold on, quiver(r,t,solr,solt) xlabel('r'); ylabel('\theta') title('R-THETA plane') colorbar subplot(222) pcolor(double(x),double(y),double(sol)) shading interp hold on, quiver(x,y,solx,soly) hold off, axis image xlabel('X'); ylabel('Y') title('X-Y plane') colorbar subplot(223) surfc(double(x),double(y),double(sol)) xlabel('X'); ylabel('Y'); title('Solutions') subplot(224) surfc(double(x),double(y),double(err)) xlabel('X'); ylabel('Y'); title('Errors') figure subplot(211) plot(r,sol(1,:),'o',r,solexact(1,:)) xlabel('r') ylabel('Solutions at \theta=0') grid on subplot(212) tt=0:0.01:2*pi; plot(t,sol(:,end),'o',tt,cos(m.*tt)) xlabel('\theta') ylabel('Solutions at r=1') grid on diff --git a/examples/dirichlet/poisson_mod.f90 b/examples/dirichlet/poisson_mod.f90 index ae234c5..96e9417 100644 --- a/examples/dirichlet/poisson_mod.f90 +++ b/examples/dirichlet/poisson_mod.f90 @@ -1,354 +1,354 @@ !> !> @file poisson_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE poisson_mod IMPLICIT NONE ! ! Dirichlet BC encapsulated in a derived datatype ! TYPE dirich INTEGER :: meth, mbess, n1, n2, nidbas1, nidbas2 INTEGER :: i0, i1 DOUBLE PRECISION, POINTER :: amat(:,:) => NULL() DOUBLE PRECISION, POINTER :: g(:) => NULL() END TYPE dirich ! CONTAINS SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! USE bsplines INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) ! ! The RHS is 0 ! rhs(:) = 0.0d0 END SUBROUTINE disrhs ! SUBROUTINE ibcmat(mat, bc) ! ! Apply BC on matrix ! USE matrix TYPE(pbmat), INTENT(inout) :: mat TYPE(dirich) :: bc INTEGER :: ny INTEGER :: kl, ku, nrank, i, j, k DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) INTEGER :: i0, i1, ii !=========================================================================== ! 1.0 Prologue ! ku = mat%ku kl = ku nrank = mat%rank ny = bc%n2 !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! ALLOCATE(zsum(nrank)) ALLOCATE(arr(nrank)) zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) DO j=1,ny+ku zsum(j) = zsum(j) + arr(j) END DO END DO ! zsum(ny) = SUM(zsum(1:ny)) ! using symmetry CALL putrow(mat, ny, zsum) DEALLOCATE(zsum) ! ! The away operator ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO DEALLOCATE(arr) !=========================================================================== ! 3.0 Dirichlet on right boundary ! !!$ i0 = nrank - ku !!$ i1 = nrank - ny i0 = (bc%n1-1)*bc%n2 + 1 i1 = nrank - bc%n2 bc%i0 = i0 bc%i1 = i1 ! IF(ASSOCIATED(bc%amat)) DEALLOCATE(bc%amat) IF(ASSOCIATED(bc%g)) DEALLOCATE(bc%g) ALLOCATE(bc%amat(i0:i1,ny)) ALLOCATE(bc%g(ny)) ! WRITE(*,'(/a,2i6)') 'IBCMAT: i0, i1 =', i0, i1 ! ! Extract and save the last ny columns of matrix ! ALLOCATE(arr(nrank)) DO k=1,ny j = nrank-ny+k CALL getcol(mat, j, arr) bc%amat(i0:i1,k) = arr(i0:i1) IF( ANY(arr(1:i0-1) .NE. 0.0d0) ) THEN WRITE(*,'(a,i4)') 'i0 is underestimated for j =', j END IF END DO ! ! The away operator ! DO k=1,ny j = nrank-ny+k arr = 0.0d0; arr(j) = 1.0d0 CALL putrow(mat, j, arr) END DO ! DEALLOCATE(arr) ! END SUBROUTINE ibcmat !+++ SUBROUTINE ibcrhs(rhs, ygrid, bc) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:) DOUBLE PRECISION, INTENT(in) :: ygrid(:) TYPE(dirich) :: bc ! INTEGER :: nrank, ny, m, i0, i1 DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) ny = bc%n2 m = bc%mbess !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! ! Get spline coefs at boundary r=1 ! SELECT CASE (bc%meth) CASE(1) CALL dirich_interp(ygrid, bc, frhs) CASE(2) CALL dirich_minres(ygrid, bc, frhs) END SELECT ! ! Modify RHS ! i0 = bc%i0 i1 = bc%i1 rhs(i0:i1) = rhs(i0:i1) - MATMUL(bc%amat, bc%g) rhs(i1+1:nrank) = bc%g(1:ny) CONTAINS DOUBLE PRECISION FUNCTION frhs(x) DOUBLE PRECISION, INTENT(in) :: x frhs = COS(m*x) END FUNCTION frhs END SUBROUTINE ibcrhs !++++ SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq !++++ SUBROUTINE dirich_interp(ygrid, bc, frhs) ! ! Dirichlet BC by interpolation ! USE bsplines DOUBLE PRECISION, INTENT(in) :: ygrid(:) TYPE(dirich) :: bc INTERFACE DOUBLE PRECISION FUNCTION frhs(x) DOUBLE PRECISION, INTENT(in) :: x END FUNCTION frhs END INTERFACE ! INTEGER :: nidbas, dim, n2, i DOUBLE PRECISION :: shifty DOUBLE PRECISION :: gval(SIZE(ygrid)) DOUBLE PRECISION, ALLOCATABLE :: coefs(:) DOUBLE PRECISION :: ygrid_interp(SIZE(ygrid)) TYPE(spline1d) :: spl_interp ! nidbas = bc%nidbas2 n2 = bc%n2 ! IF(MODULO(nidbas,2) .EQ. 0 ) THEN shifty = 0.5d0*(ygrid(2)-ygrid(1)) ygrid_interp(:) = ygrid(:) + shifty ELSE ygrid_interp(:) = ygrid(:) END IF CALL set_splcoef(nidbas, ygrid_interp, spl_interp, period=.TRUE.) CALL get_dim(spl_interp, dim) ALLOCATE(coefs(dim)) ! DO i=1,SIZE(ygrid) gval(i) = frhs(ygrid_interp(i)) END DO CALL get_splcoef(spl_interp, gval, coefs) ! ! Store spline coefs in bc ! bc%g(1:n2) = coefs(1:n2) ! DEALLOCATE(coefs) CALL destroy_sp(spl_interp) END SUBROUTINE dirich_interp !++++ SUBROUTINE dirich_minres(xgrid, bc, frhs) ! ! Dirichlet BC by minimization of residual ! USE bsplines USE matrix USE conmat_mod DOUBLE PRECISION, INTENT(in) :: xgrid(:) TYPE(dirich) :: bc INTERFACE DOUBLE PRECISION FUNCTION frhs(x) DOUBLE PRECISION, INTENT(in) :: x END FUNCTION frhs END INTERFACE ! INTEGER :: nx, nidbas, ngauss, kl, ku TYPE(periodic_mat) :: mass_mat TYPE(spline1d) :: spl ! nidbas = bc%nidbas2 ngauss = nidbas+1 nx = bc%n2 kl = nidbas ku = kl ! CALL set_spline(nidbas, ngauss, xgrid, spl, period=.TRUE.) CALL init(kl, ku, nx, 1, mass_mat) CALL conmat(spl, mass_mat, coefeq_mass) CALL conrhs(spl, bc%g, frhs) CALL factor(mass_mat) CALL bsolve(mass_mat, bc%g) ! CALL destroy(mass_mat) CALL destroy_sp(spl) ! CONTAINS SUBROUTINE coefeq_mass(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) c(1) = 1.0d0 idt(1) = 0 idw(1) = 0 END SUBROUTINE coefeq_mass END SUBROUTINE dirich_minres !++++ DOUBLE PRECISION FUNCTION err2_norm(spl, jder, mbess, fexact) ! ! Compute error L2 norm unsing Gauss points ! USE bsplines TYPE(spline2d) :: spl INTEGER, INTENT(in) :: jder(:) INTEGER, INTENT(in) :: mbess INTERFACE DOUBLE PRECISION FUNCTION fexact(m,x,y) INTEGER, INTENT(in) :: m DOUBLE PRECISION, INTENT(in) :: x, y END FUNCTION fexact END INTERFACE ! DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:) INTEGER :: i1, ig1, n1, nidbas1, ndim1, ng1 INTEGER :: i2, ig2, n2, nidbas2, ndim2, ng2 DOUBLE PRECISION :: contrib DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:), sol(:,:) ! ! Gauss points and weights on all intervals ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) xg1 => spl%sp1%gausx ! xg1(ng1,n1) wg1 => spl%sp1%gausw ! wg1(ng1,n1) ng1 = SIZE(xg1,1) xg2 => spl%sp2%gausx wg2 => spl%sp2%gausw ng2 = SIZE(xg2,1) ! err2_norm = 0.0d0 ALLOCATE(x(ng1), y(ng2)) ALLOCATE(sol(ng1,ng2)) DO i1=1,n1 x=xg1(:,i1) DO i2=1,n2 y=xg2(:,i2) CALL gridval(spl, x, y, sol, jder) DO ig1=1,ng1 DO ig2=1,ng2 contrib = wg1(ig1,i1)*wg2(ig2,i2)*(sol(ig1,ig2) - & & fexact(mbess,x(ig1),y(ig2)))**2 err2_norm = err2_norm + x(ig1)*contrib !use same inner-product in weak-form END DO END DO END DO END DO DEALLOCATE(x) DEALLOCATE(y) DEALLOCATE(sol) err2_norm = SQRT(err2_norm) END FUNCTION err2_norm END MODULE poisson_mod diff --git a/examples/dirichlet/run_poisson.sh b/examples/dirichlet/run_poisson.sh index dafc434..7833785 100644 --- a/examples/dirichlet/run_poisson.sh +++ b/examples/dirichlet/run_poisson.sh @@ -1,46 +1,46 @@ # # @file run_poisson.sh # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Trach-Minh Tran # #!/bin/sh EXEC=./poisson cat > in0 < in1 $EXEC < in1 | grep 'Discretization errors ' done rm -f in? diff --git a/examples/dismat.f90 b/examples/dismat.f90 index a15e8df..c4bf0ed 100644 --- a/examples/dismat.f90 +++ b/examples/dismat.f90 @@ -1,157 +1,157 @@ !> !> @file dismat.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! USE bsplines USE matrix IMPLICIT NONE TYPE(spline2d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) left2 = j CALL basfun(xg2, spl%sp2, fun2, left2) ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 !------------- contrib = 0.0d0 DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,ig1,ig2) * & & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) END DO END DO END DO CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat diff --git a/examples/disrhs.f90 b/examples/disrhs.f90 index 24cbb84..cf29fd5 100644 --- a/examples/disrhs.f90 +++ b/examples/disrhs.f90 @@ -1,198 +1,198 @@ !> !> @file disrhs.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! USE bsplines IMPLICIT NONE INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2 ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs3(mbess, npow, spl, rhs) ! ! Assembly the RHS using 3d spline spl ! USE bsplines IMPLICIT NONE INTEGER, INTENT(in) :: mbess, npow TYPE(spline2d1d), TARGET :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:,:) ! TYPE(spline1d), POINTER :: sp1, sp2, sp3 INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: n3, nidbas3, ndim3, ng3 INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! sp1 => spl%sp12%sp1 sp2 => spl%sp12%sp2 sp3 => spl%sp3 ! CALL get_dim(sp1, ndim1, n1, nidbas1) CALL get_dim(sp2, ndim2, n2, nidbas2) CALL get_dim(sp3, ndim3, n3, nidbas3) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(sp1, ng1) CALL get_gauss(sp2, ng2) CALL get_gauss(sp3, ng3) WRITE(*,'(/a, 3i3)') 'Gauss points and weights, ngauss =', ng1, ng2, ng3 ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng2), wg2(ng2)) ALLOCATE(xg3(ng3), wg3(ng3)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs,1) rhs(1:nrank,1:n3) = 0.0d0 ! DO i=1,n1 CALL get_gauss(sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), sp1, fun1, i) DO j=1,n2 CALL get_gauss(sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), sp2, fun2, j) DO k=1,n3 CALL get_gauss(sp3, ng3, k, xg3, wg3) DO ig3=1,ng3 CALL basfun(xg3(ig3), sp3, fun3, k) contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * & & rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 DO k3=0,nidbas3 kk = MODULO(k+k3-1,n3) + 1 rhs(ij,kk) = rhs(ij, kk) + & & contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1) END DO END DO END DO END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(xg3, wg3, fun3) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n) DOUBLE PRECISION, INTENT(in) :: x1, x2, x3 INTEGER, INTENT(in) :: m, n rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n END FUNCTION rhseq END SUBROUTINE disrhs3 diff --git a/examples/driv1.f90 b/examples/driv1.f90 index 03a9329..e725b38 100644 --- a/examples/driv1.f90 +++ b/examples/driv1.f90 @@ -1,189 +1,189 @@ !> !> @file driv1.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Basis splines on a 2d grid. ! USE bsplines USE futils IMPLICIT NONE TYPE(spline1d) :: spx, spy INTEGER :: nx=10, ny=8, nidbas=2, ngauss=4, npts=1000 DOUBLE PRECISION :: a, b, coefx(5), coefy(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), ygrid(:), fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xp(:), funxp(:,:), yp(:), funyp(:,:) DOUBLE PRECISION :: dx, dy INTEGER :: i, j, left CHARACTER(len=256) :: title INTEGER :: fid NAMELIST /newrun/ nx, ny, nidbas, ngauss, a, b, coefx, coefy !=========================================================================== nidbas = 3 ngauss = 4 nx = 10 ! Number of intevals in x ny = 8 ! Number of intevals in y a = 0.0d0 ! Left boundary of interval b = 1.0d0 ! Right boundary of interval coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function READ(*,newrun) WRITE(*,newrun) !=========================================================================== ! 1.0 Set up grids ! ALLOCATE( xgrid(0:nx) ) xgrid(0) = a xgrid(nx) = b CALL meshdist(coefx, xgrid, nx) ! !!$ dy = 2.d0*pi/REAL(ny) dy = 1.0d0 ALLOCATE( ygrid(0:ny) ) ygrid(0) = a ygrid(ny) = b CALL meshdist(coefy, ygrid, ny) !=========================================================================== ! 2.0 Set up splines on (x,y) ! CALL set_spline(nidbas, ngauss, xgrid, spx) CALL set_spline(nidbas, ngauss, ygrid, spy, period=.TRUE.) WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of x-splines', LBOUND(spx%knots), & & ':',UBOUND(spx%knots), spx%knots WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', spx%dim WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of y-splines', LBOUND(spy%knots), & & ':',UBOUND(spy%knots), spy%knots WRITE(*,'(2(a,i5, 2x))') 'NY =', ny, 'DIM =', spy%dim !=========================================================================== ! 3.0 Graph the splines on (x,y) ! ALLOCATE( fun(nidbas+1,1) ) ! Only 0-th derivative !!$ ALLOCATE( fun(nidbas+1,0:1) ) ! Only 0-th derivative ALLOCATE( xp(npts), funxp(npts,0:spx%dim-1) ) ALLOCATE( yp(npts-1), funyp(npts-1,0:spy%dim-1) ) ! ! Splines in X (non-peridic) ! WRITE(*,'(a)') 'Splines in x' dx = (xgrid(nx)-xgrid(0)) / REAL(NPTS-1) DO i=1,npts xp(i) = xgrid(0) + (i-1)*dx CALL locintv(spx, xp(i), left) CALL basfun(xp(i), spx, fun, left+1) funxp(i,left:left+nidbas) = fun(:,1) END DO ! ! Splines in Y (periodic) ! WRITE(*,'(a)') 'Splines in y' dy = (ygrid(ny)-ygrid(0)) / REAL(NPTS-1) DO i=1,npts-1 yp(i) = ygrid(0) + (i-1)*dy CALL locintv(spy, yp(i), left) CALL basfun(yp(i), spy, fun, left+1) funyp(i,left:left+nidbas) = fun(:,1) END DO ! ! Create hdf5 file ! CALL creatf('driv1.h5', fid, real_prec='d') ! WRITE(title,'(a,i3,5x,a,i6)') 'Splines of degree =', nidbas, 'NX =', nx CALL putarr(fid, 'X', xp) CALL putarr(fid, 'KNOTSX', spx%knots) CALL putarr(fid, 'splinesx', funxp, TRIM(title)) CALL putarr(fid, 'KNOTSY', spy%knots) ! WRITE(title,'(a,i3,5x,a,i6)') 'Periodic splines of degree =', nidbas, 'NY =', ny CALL putarr(fid, 'Y', yp) CALL putarr(fid, 'splinesy', funyp, TRIM(title)) CALL closef(fid) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgrid, ygrid) DEALLOCATE(xp, funxp) CALL destroy_sp(spx) CALL destroy_sp(spy) END PROGRAM main !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/driv2.f90 b/examples/driv2.f90 index b67b578..720f624 100644 --- a/examples/driv2.f90 +++ b/examples/driv2.f90 @@ -1,180 +1,180 @@ !> !> @file driv2.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Integration of splines ! USE bsplines ! IMPLICIT NONE TYPE(spline1d) :: spx INTEGER :: nx, nidbas, ngauss DOUBLE PRECISION :: a, b, coefx(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fun(:,:), finteg(:) DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:) DOUBLE PRECISION :: support, res, err INTEGER :: i, ig, j, jj, left INTEGER :: dim, ng LOGICAL :: periodic NAMELIST /newrun/ periodic, nx, nidbas, ngauss, a, b, coefx !=========================================================================== ! 1.0 Set up grids ! ! Read in data specific to run ! periodic = .FALSE. nidbas = 3 ngauss = 4 nx = 10 ! Number of intevals in x a = 0.0d0 ! Left boundary of interval b = 1.0d0 ! Right boundary of interval coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function READ(*,newrun) WRITE(*,newrun) ! ! Define grid/knots ! ALLOCATE(xgrid(0:nx)) xgrid(0) = a xgrid(nx) = b CALL meshdist(coefx, xgrid, nx) WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) !=========================================================================== ! 2.0 Set up splines ! CALL set_spline(nidbas, ngauss, xgrid, spx, periodic) WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of x-splines', LBOUND(spx%knots), & & ':',UBOUND(spx%knots), spx%knots WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', spx%dim !=========================================================================== ! 3.0 Integrate all splines ! CALL get_dim(spx, dim) ALLOCATE( finteg(0:dim-1), xg(ngauss), wg(ngauss), fun(0:nidbas,1) ) finteg = 0.0 fun = 0.0 DO i=1,nx ! Loop thru the intervals CALL get_gauss(spx, ng, i, xg, wg) DO ig=1,ng ! Loop thru Gauss points CALL basfun(xg(ig), spx, fun, i) left = i-1 DO j=0,nidbas ! Loop thru the splines in this interval jj = left+j IF( periodic ) jj = MODULO(left+j, nx) finteg(jj) = finteg(jj) + wg(ig)*fun(j,1) END DO END DO END DO !!$ IF( periodic ) THEN !!$ DO i=nx,dim-1 !!$ finteg(i) = finteg(i-nx) !!$ END DO !!$ END IF ! WRITE(*,'(a/(10f10.5))') 'Integrals of splines', finteg PRINT*, 'Sum of finteg', SUM(finteg) !!$ IF( periodic ) THEN !!$ PRINT*, 'Sum of finteg', SUM(finteg(0:nx-1)) !!$ ELSE !!$ PRINT*, 'Sum of finteg', SUM(finteg) !!$ END IF ! WRITE(*,'(a/(10f10.5))') 'Integrals of splines from module', spx%intspl PRINT*, 'Sum of finteg', SUM(spx%intspl) WRITE(*,'(a5,4a12)') '#', 'I', 'S', '(p+1)I/S', '(p+1)I/S-1' DO i=0,spx%dim-1 support = spx%knots(i+1)-spx%knots(i-nidbas) res = spx%intspl(i)/support*(nidbas+1) err = res - 1.0d0 WRITE(*,'(i5,4(1pe12.4))') i, spx%intspl(i), support, res, err END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE( finteg, xg, wg, fun ) DEALLOCATE(xgrid) CALL destroy_sp(spx) END PROGRAM main !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/driv3.f90 b/examples/driv3.f90 index 8df6e99..cce04fd 100644 --- a/examples/driv3.f90 +++ b/examples/driv3.f90 @@ -1,159 +1,159 @@ !> !> @file driv3.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Computation of croos mass matrix between two splines sp1 & sp2 ! sp1 and sp2 can be splines of any type (i.e. either set up with set_spline or ! set_splcoef) and of any order. ! USE bsplines ! IMPLICIT NONE TYPE(spline1d) :: sp1, sp2 INTEGER :: nx, nidbas1, nidbas2, ngauss INTEGER :: i, j DOUBLE PRECISION :: a, b, coefx(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:) DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMat LOGICAL :: periodic1, periodic2 NAMELIST /newrun/ nx, a, b, coefx, nidbas1, nidbas2, periodic1, periodic2 !=========================================================================== ! 1.0 Set up grids ! ! Read in data specific to run ! nx = 8 ! Number of intevals in x a = 0.0d0 ! Left boundary of interval b = 1.0d0 ! Right boundary of interval coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function periodic1 = .FALSE. periodic2 = .FALSE. nidbas1 = 3 nidbas2 = 2 READ(*,newrun) WRITE(*,newrun) ! ! Define grid/knots ! ALLOCATE(xgrid(0:nx)) xgrid(0 ) = a xgrid(nx) = b CALL meshdist(coefx, xgrid, nx) WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) !=========================================================================== ! 2.0 Set up splines ! ngauss = 1 ! Gauss points initialized with set_spline are in fact not used ! for computing cross mass matrix ! First spline set up as for solving a PDE with FEMs CALL set_spline(nidbas1, ngauss, xgrid, sp1, periodic1) ! Second spline set up as for interpolation CALL set_splcoef(nidbas2, xgrid, sp2, periodic2) WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp1', LBOUND(sp1%knots), & & ':',UBOUND(sp1%knots), sp1%knots WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp2', LBOUND(sp2%knots), & & ':',UBOUND(sp2%knots), sp2%knots WRITE(*,'(3(a,i5, 2x))') 'NX =', nx, 'DIM sp1 =', sp1%dim, 'DIM sp2 =', sp2%dim !=========================================================================== ! 3.0 Compute cross mass matrix ! CALL CompMassMatrix(sp1, sp2, a, b, MassMat) WRITE(*, "(a)") "Cross-mass matrix between splines sp1 & sp2:" DO i = 1, SIZE(MassMat, 1) WRITE(*, "(15f13.5)") (MassMat(i, j), j = 1, MIN(SIZE(MassMat, 2), 15)) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(MassMat) DEALLOCATE(xgrid) CALL destroy_sp(sp1) CALL destroy_sp(sp2) END PROGRAM main !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/driv4.f90 b/examples/driv4.f90 index 63711b7..3e7cad4 100644 --- a/examples/driv4.f90 +++ b/examples/driv4.f90 @@ -1,225 +1,225 @@ !> !> @file driv4.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Computation of croos mass matrix between two splines sp1 & sp2 ! sp1 and sp2 can be splines of any type (i.e. either set up with set_spline or ! set_splcoef) and of any order. ! USE bsplines USE matrix ! IMPLICIT NONE TYPE(gbmat) :: matm INTEGER :: mrows, ncols, kl, ku DOUBLE PRECISION, ALLOCATABLE :: avec(:,:), bvec(:,:), matfull(:,:) ! TYPE(zgbmat) :: zmatm DOUBLE COMPLEX, ALLOCATABLE :: zavec(:,:), zbvec(:,:) DOUBLE PRECISION :: dznrm2 ! TYPE(spline1d) :: sp1, sp2 INTEGER :: nx, nidbas1, nidbas2, ngauss INTEGER :: i, j DOUBLE PRECISION :: a, b, coefx(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:) DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMat LOGICAL :: periodic1, periodic2 NAMELIST /newrun/ nx, a, b, coefx, nidbas1, nidbas2, periodic1, periodic2 !=========================================================================== ! 1.0 Set up grids ! ! Read in data specific to run ! nx = 8 ! Number of intevals in x a = 0.0d0 ! Left boundary of interval b = 1.0d0 ! Right boundary of interval coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function periodic1 = .FALSE. periodic2 = .FALSE. nidbas1 = 3 nidbas2 = 2 READ(*,newrun) WRITE(*,newrun) ! ! Define grid/knots ! ALLOCATE(xgrid(0:nx)) xgrid(0 ) = a xgrid(nx) = b CALL meshdist(coefx, xgrid, nx) WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) !=========================================================================== ! 2.0 Set up splines ! ngauss = 1 ! Gauss points initialized with set_spline are in fact not used ! for computing cross mass matrix ! First spline set up as for solving a PDE with FEMs CALL set_spline(nidbas1, ngauss, xgrid, sp1, periodic1) ! Second spline set up as for interpolation !!$ CALL set_splcoef(nidbas2, xgrid, sp2, periodic2) CALL set_spline(nidbas2, ngauss, xgrid, sp2, periodic2) WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp1', LBOUND(sp1%knots), & & ':',UBOUND(sp1%knots), sp1%knots WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp2', LBOUND(sp2%knots), & & ':',UBOUND(sp2%knots), sp2%knots WRITE(*,'(3(a,i5, 2x))') 'NX =', nx, 'DIM sp1 =', sp1%dim, 'DIM sp2 =', sp2%dim !=========================================================================== ! 3.0 Compute cross mass matrix ! CALL CompMassMatrix(sp1, sp2, a, b, MassMat) WRITE(*, "(a)") "Cross-mass matrix between splines sp1 & sp2:" DO i = 1, SIZE(MassMat, 1) WRITE(*, "(15f10.5)") (MassMat(i, j), j = 1, MIN(SIZE(MassMat, 2), 15)) END DO ! ! Should equal to 1 for splines i "not close to the boundaries": ! p1 .LT. i .LE. N ! WRITE(*,'(/a/(15f8.5))') 'Sum of cols * NX', SUM(MassMat,dim=2)*REAL(nx,8) !=========================================================================== ! 3.0 Use DGB matrice ! !!$ mrows = nx+nidbas1 !!$ ncols = nx+nidbas2 CALL get_dim(sp1, mrows) CALL get_dim(sp2, ncols) kl = nidbas1 ku = nidbas2 CALL init(kl, ku, ncols, 1, matm, mrows=mrows) WRITE(*,'(/a, 2i3)') 'Band matrix:, kl, ku =', kl, ku ! CALL CompMassMatrix(sp1, sp2, a, b, matm) ! DO i=1,SIZE(matm%val,1) WRITE(*,'(15f10.5)') matm%val(i,:) END DO ! WRITE(*,'(/a)') 'Full matrix' ALLOCATE(matfull(mrows,ncols)) matfull = 0.0d0 DO i=1,mrows CALL getrow(matm, i, matfull(i,:)) WRITE(*,'(15f10.5)') matfull(i,:) END DO WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(matfull-MassMat)) ! ! Check VMX ALLOCATE(avec(ncols,2)) ALLOCATE(bvec(mrows,2)) avec = 1.0d0 bvec = vmx(matm,avec)*REAL(nx,8) WRITE(*,'(a)') 'M*a, with a=1' DO j=1,2 WRITE(*,'(15f8.5)') bvec(:,j) END DO !=========================================================================== ! 4.0 Test complex version ! CALL init(kl, ku, ncols, 1, zmatm, mrows=mrows) CALL CompMassMatrix(sp1, sp2, a, b, zmatm) ALLOCATE(zavec(ncols,2)) ALLOCATE(zbvec(mrows,2)) zavec = (1.0d0,0.0d0) zbvec = vmx(zmatm,zavec)*REAL(nx,8) zbvec = zbvec-bvec WRITE(*,'(/a)') 'Check complex version' WRITE(*,'(a,2(1pe12.4))') 'Norm of errors =', & & (dznrm2(mrows, zbvec(1,j), 1), j=1,2) zmatm%val = zmatm%val-matm%val WRITE(*,'(a,1pe12.4)') 'Diff of matrix elements =', MAXVAL(ABS(zmatm%val)) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(MassMat) DEALLOCATE(xgrid) CALL destroy_sp(sp1) CALL destroy_sp(sp2) call destroy(matm) END PROGRAM main !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/extra.c b/examples/extra.c index 572cecd..b406698 100644 --- a/examples/extra.c +++ b/examples/extra.c @@ -1,49 +1,49 @@ /** * @file extra.c * * @brief * * @copyright * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) * SPC (Swiss Plasma Center) * - * spclibs is free software: you can redistribute it and/or modify it under + * SPClibs is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) * any later version. * - * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . * * @authors * (in alphabetical order) * @author Trach-Minh Tran */ /**********************************************/ #include void quit_(); void dump_(char *filename, int *l); void Dump(filename) char *filename; { /* The user's dump routine should go here. */ int l = strlen(filename); dump_(filename, &l); } /* End DUMP */ /**********************************************/ void Quit() { /* The user's quit routine should go here. */ quit_(); } /* End QUIT */ diff --git a/examples/fit1d.f90 b/examples/fit1d.f90 index db3b786..88ae5e4 100644 --- a/examples/fit1d.f90 +++ b/examples/fit1d.f90 @@ -1,251 +1,251 @@ !> !> @file fit1d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Fit grid value function to a spline of any order ! USE bsplines USE futils ! IMPLICIT NONE INTEGER :: nx, nidbas DOUBLE PRECISION :: a, b, coefx(5) !!$ DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:), coefs(:) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:,:), coefs(:,:) INTEGER :: i, dim, left TYPE(spline1d) :: spl DOUBLE PRECISION :: dx INTEGER :: npts DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:) DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, POINTER :: splines(:,:) => null() ! CHARACTER(len=128) :: file='fit1d.h5' INTEGER :: fid ! NAMELIST /newrun/ nx, nidbas, a, b, coefx !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 10 ! Number of intevals in x a = 0.0d0 ! Left boundary of interval b = 1.0d0 ! Right boundary of interval coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid and function values ! ALLOCATE(xgrid(0:nx), fgrid(0:nx,1)) xgrid(0) = a xgrid(nx) = b CALL meshdist(coefx, xgrid, nx) fgrid(:,1) = func(xgrid) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid ! ! Create hdf5 file ! CALL creatf(file, fid, 'FIT1D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) !=========================================================================== ! 2.0 Spline interpolation ! ! Set up the spline interpolation !!$ CALL splcoef_setup(nidbas, xgrid, spl) CALL set_splcoef(nidbas, xgrid, spl) PRINT*, 'nlequid =', spl%nlequid ! ! Compute spline values and derivatives at Boundaries ALLOCATE(fun(nidbas+1,0:nidbas)) WRITE(*,'(/a)') 'spline at the left boundary' CALL locintv(spl, a, left) CALL basfun(a, spl, fun, left+1) DO i=0,nidbas WRITE(*,'(8(1pe12.4))') fun(:,i) END DO ! WRITE(*,'(/a)') 'spline at the right boundary' CALL locintv(spl, b, left) CALL basfun(b, spl, fun, left+1) DO i=0,nidbas WRITE(*,'(8(1pe12.4))') fun(:,i) END DO DEALLOCATE(fun) ! CALL get_dim(spl, dim) ALLOCATE(coefs(dim,1)) WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), & & ':',UBOUND(spl%knots), spl%knots ! ! From given grid values fgrid, compute the spline coefs CALL get_splcoef(spl, fgrid, coefs) WRITE(*,'(a/(10f8.3))') 'coefs', coefs ! ! Plot all splines npts = 100 ALLOCATE(xpt(npts)) dx = (b-a)/REAL(npts-1) DO i=1,npts xpt(i) = a + (i-1)*dx END DO CALL allsplines(spl, xpt, splines) CALL putarr(fid, '/X', xpt) CALL putarr(fid,'/SPLINES', splines) ! ! Check interpolation ALLOCATE(fcalc(npts), fexact(npts), err(npts)) fexact = func(xpt) ! ! Function values CALL gridval(spl, xpt, fcalc, 0, coefs(:,1)) err = fexact - fcalc CALL putarr(fid, '/FEXACT', fexact, 'Exact values') CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values') CALL putarr(fid, '/ERROR', err, 'Interpolation errors') WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) ! ! Derivatives values CALL gridval(spl, xpt, fcalc, 1) fexact = func1(xpt) err = fexact - fcalc CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative') CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative') CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative') WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) ! !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xpt, splines, fexact, fcalc, err) DEALLOCATE(xgrid, fgrid) CALL destroy_sp(spl) CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION func(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: func(SIZE(x)) !!$ INTEGER :: n !!$ n = SIZE(x) !!$ func = 1.d0+x*(1.d0+x*(1.d0+x)) !!$ func(1:n/2) = 1.0d0 !!$ func(n/2+1:n) = 0.5d0 func = EXP(-8.*x*x) END FUNCTION func ! FUNCTION func1(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: func1(SIZE(x)) !!$ INTEGER :: n !!$ n = SIZE(x) !!$ func = 1.d0+x*(1.d0+x*(1.d0+x)) !!$ func(1:n/2) = 1.0d0 !!$ func(n/2+1:n) = 0.5d0 func1 = -16.d0*x*EXP(-8.*x*x) END FUNCTION func1 ! FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 INTEGER :: i ! sum2 = 0.0d0 DO i=1,SIZE(x,1) sum2 = sum2 + x(i)**2 END DO norm2 = SQRT(sum2) END FUNCTION norm2 END PROGRAM main !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/fit1d_cmpl.f90 b/examples/fit1d_cmpl.f90 index ba3c7db..acae6c0 100644 --- a/examples/fit1d_cmpl.f90 +++ b/examples/fit1d_cmpl.f90 @@ -1,106 +1,106 @@ !> !> @file fit1d_cmpl.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Fit a 1d complex function ! USE bsplines IMPLICIT NONE INTEGER,PARAMETER :: NX=10, NIDBAS=3, NPTS=40 DOUBLE PRECISION :: pi, dx, xgrid(0:NX), xpt(NPTS), err DOUBLE COMPLEX, ALLOCATABLE :: coefs(:) DOUBLE COMPLEX :: fgrid(0:NX), fexact(NPTS), fcalc(NPTS) INTEGER :: dim, i TYPE(spline1d) :: spl !================================================================================ ! ! Define grid and function values on grid ! pi = 4.0d0*ATAN(1.0d0) xgrid(0) = 0.0d0 dx = 2.0d0*pi/NX DO i=1,NX xgrid(i) = xgrid(0) + i*dx END DO ! fgrid = func(xgrid) ! WRITE(*,'(2a10)') 'x', 'f' DO i=0,NX WRITE(*,'(3f10.4)') xgrid(i), fgrid(i) END DO ! ! Set up spline ! CALL set_splcoef(NIDBAS, xgrid, spl, period=.TRUE.) CALL get_dim(spl, dim) ALLOCATE(coefs(dim)) ! ! Get Spline coefficients ! CALL get_splcoef(spl, fgrid, coefs) WRITE(*,'(a)') 'Interpolation coefs' DO i=1,dim WRITE(*,'(2(1pe12.3))') coefs(i) END DO ! ! Check interpolation ! CALL RANDOM_NUMBER(xpt) xpt = (2.0d0*pi) * xpt fexact = func(xpt) ! CALL gridval(spl, xpt, fcalc, 0, coefs) ! WRITE(*,'(a10,2a20)') 'x', 'fexact', 'fcacl' DO i=1,NPTS WRITE(*,'(5f10.4)') xpt(i), fexact(i), fcalc(i) END DO err = norm2(fcalc-fexact) WRITE(*,'(a,1pe12.3)') 'error', err ! ! Clean up ! DEALLOCATE(coefs) CALL destroy_sp(spl) !================================================================================ CONTAINS FUNCTION func(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE COMPLEX :: func(size(x)) func = EXP( CMPLX(0.0d0, x)) END FUNCTION func ! FUNCTION norm2(x) ! ! Compute the 2-norm of vector x ! DOUBLE PRECISION :: norm2 DOUBLE COMPLEX, INTENT(in) :: x(:) ! norm2 = SQRT(DOT_PRODUCT(x,x)) END FUNCTION norm2 END PROGRAM main diff --git a/examples/fit1dbc.f90 b/examples/fit1dbc.f90 index 503a469..bb1af10 100644 --- a/examples/fit1dbc.f90 +++ b/examples/fit1dbc.f90 @@ -1,254 +1,254 @@ !> !> @file fit1dbc.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Fit grid value function to a spline of any order ! BC using derivatives at both ends. ! USE bsplines USE futils ! IMPLICIT NONE INTEGER :: nx, nidbas DOUBLE PRECISION :: a, b, coefx(5) !!$ DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:), coefs(:) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:,:), coefs(:,:) INTEGER :: i, dim TYPE(spline1d) :: spl DOUBLE PRECISION :: dx INTEGER :: npts DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:) DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, POINTER :: splines(:,:) => null() INTEGER :: ibc(2,10) !!$ DOUBLE PRECISION :: fbc(2,10) DOUBLE PRECISION :: fbc(2,10,1) ! CHARACTER(len=128) :: file='fit1d.h5' INTEGER :: fid ! NAMELIST /newrun/ nx, nidbas, a, b, coefx, ibc, fbc !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 10 ! Number of intevals in x a = 0.0d0 ! Left boundary of interval b = 1.0d0 ! Right boundary of interval coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ibc(1,1:10) = (/2,3,4,5,6,7,8,9,10,11/) ibc(2,1:10) = ibc(1,1:10) fbc = 0.0 ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid and function values ! ALLOCATE(xgrid(0:nx), fgrid(0:nx,1)) xgrid(0) = a xgrid(nx) = b CALL meshdist(coefx, xgrid, nx) fgrid(:,1) = func(xgrid) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid ! ! Create hdf5 file ! CALL creatf(file, fid, 'FIT1D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) !=========================================================================== ! 2.0 Spline interpolation ! ! Set up the spline interpolation CALL set_splcoef(nidbas, xgrid, spl, ibc=ibc) ! ! Compute spline values and derivatives at Boundaries ALLOCATE(fun(nidbas+1,0:nidbas)) WRITE(*,'(/a)') 'spline at the left boundary' CALL basfun(a, spl, fun, 1) DO i=0,nidbas WRITE(*,'(8(1pe12.4))') fun(:,i) END DO ! WRITE(*,'(/a)') 'spline at the right boundary' CALL basfun(b, spl, fun, nx) DO i=0,nidbas WRITE(*,'(8(1pe12.4))') fun(:,i) END DO DEALLOCATE(fun) ! CALL get_dim(spl, dim) ALLOCATE(coefs(dim,1)) WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), & & ':',UBOUND(spl%knots), spl%knots ! ! From given grid values fgrid, compute the spline coefs CALL get_splcoef(spl, fgrid, coefs, fbc) WRITE(*,'(a/(10f8.3))') 'coefs', coefs ! ! Plot all splines npts = 100 ALLOCATE(xpt(npts)) dx = (b-a)/REAL(npts-1) DO i=1,npts xpt(i) = a + (i-1)*dx END DO CALL allsplines(spl, xpt, splines) CALL putarr(fid, '/X', xpt) CALL putarr(fid,'/SPLINES', splines) ! ! Check interpolation ALLOCATE(fcalc(npts), fexact(npts), err(npts)) fexact = func(xpt) ! ! Function values CALL gridval(spl, xpt, fcalc, 0, coefs(:,1)) err = fexact - fcalc CALL putarr(fid, '/FEXACT', fexact, 'Exact values') CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values') CALL putarr(fid, '/ERROR', err, 'Interpolation errors') WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) ! ! Derivatives values CALL gridval(spl, xpt, fcalc, 1) fexact = func1(xpt) err = fexact - fcalc CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative') CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative') CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative') WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) ! !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xpt, splines, fexact, fcalc, err) DEALLOCATE(xgrid, fgrid) CALL destroy_sp(spl) CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION func(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: func(SIZE(x)) !!$ INTEGER :: n !!$ n = SIZE(x) !!$ func = 1.d0+x*(1.d0+x*(1.d0+x)) !!$ func(1:n/2) = 1.0d0 !!$ func(n/2+1:n) = 0.5d0 func = EXP(-8.*x*x) END FUNCTION func ! FUNCTION func1(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: func1(SIZE(x)) !!$ INTEGER :: n !!$ n = SIZE(x) !!$ func = 1.d0+x*(1.d0+x*(1.d0+x)) !!$ func(1:n/2) = 1.0d0 !!$ func(n/2+1:n) = 0.5d0 func1 = -16.d0*x*EXP(-8.*x*x) END FUNCTION func1 ! FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 INTEGER :: i ! sum2 = 0.0d0 DO i=1,SIZE(x,1) sum2 = sum2 + x(i)**2 END DO norm2 = SQRT(sum2) END FUNCTION norm2 END PROGRAM main !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/fit1dp.f90 b/examples/fit1dp.f90 index 97ca6be..29082d0 100644 --- a/examples/fit1dp.f90 +++ b/examples/fit1dp.f90 @@ -1,227 +1,227 @@ !> !> @file fit1dp.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Fit grid value function to a spline of any order ! Periodic case. ! USE bsplines USE futils ! IMPLICIT NONE INTEGER :: nx, nidbas DOUBLE PRECISION :: a, b, coefx(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:,:), coefs(:,:) INTEGER :: i, dim TYPE(spline1d) :: spl DOUBLE PRECISION :: dx, x0, x1 INTEGER :: npts DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:) DOUBLE PRECISION, POINTER :: splines(:,:) => null() ! CHARACTER(len=128) :: file='fit1d.h5' INTEGER :: fid ! NAMELIST /newrun/ nx, nidbas, a, b, coefx !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 10 ! Number of intevals in x a = 0.0d0 ! Left boundary of interval b = 1.0d0 ! Right boundary of interval coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid and function values ! ALLOCATE(xgrid(0:nx), fgrid(0:nx,1)) xgrid(0) = a xgrid(nx) = b CALL meshdist(coefx, xgrid, nx) fgrid(:,1) = func(xgrid) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid ! ! Create hdf5 file ! CALL creatf(file, fid, 'FIT1D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) !=========================================================================== ! 2.0 Spline interpolation ! ! Set up the spline interpolation CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.) CALL get_dim(spl, dim) WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), & & ':',UBOUND(spl%knots), spl%knots ! ALLOCATE(coefs(dim,1)) ! ! From given grid values fgrid, compute the spline coefs CALL get_splcoef(spl, fgrid, coefs) WRITE(*,'(a/(10f8.3))') 'coefs', coefs ! ! Plot all splines ! npts = 100 ALLOCATE(xpt(npts)) !!$ x0 = a !!$ x1 = b x0 = spl%knots(0) x1 = spl%knots(nx) dx = (x1 -x0)/REAL(npts) ! Last point b not inluded DO i=1,npts xpt(i) = x0 + (i-1)*dx END DO CALL allsplines(spl, xpt, splines) CALL putarr(fid, '/X', xpt) CALL putarr(fid,'/SPLINES', splines) ! ! Check interpolation ! ALLOCATE(fcalc(npts), fexact(npts), err(npts)) fexact = func(xpt) ! CALL gridval(spl, xpt, fcalc, 0, coefs(:,1)) err = fexact - fcalc CALL putarr(fid, '/FEXACT', fexact, 'Exact values') CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values') CALL putarr(fid, '/ERROR', err, 'Interpolation errors') WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) ! ! Derivatives values CALL gridval(spl, xpt, fcalc, 1) fexact = func1(xpt) err = fexact - fcalc CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative') CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative') CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative') WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xpt, splines, fexact, fcalc, err) DEALLOCATE(xgrid, fgrid) CALL destroy_sp(spl) CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION func(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: func(SIZE(x)) DOUBLE PRECISION :: pi pi = 4.0*ATAN(1.0d0) func = SIN(2.d0*pi*x) + 2.0d0*COS(8.d0*pi*x) END FUNCTION func FUNCTION func1(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: func1(SIZE(x)) DOUBLE PRECISION :: pi pi = 4.0*ATAN(1.0d0) func1 = 2.d0*pi*COS(2.d0*pi*x) - 16.0d0*pi*SIN(8.d0*pi*x) END FUNCTION func1 ! FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 INTEGER :: i ! sum2 = 0.0d0 DO i=1,SIZE(x,1) sum2 = sum2 + x(i)**2 END DO norm2 = SQRT(sum2) END FUNCTION norm2 END PROGRAM main !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/fit2d.f90 b/examples/fit2d.f90 index ab598de..563cd26 100644 --- a/examples/fit2d.f90 +++ b/examples/fit2d.f90 @@ -1,159 +1,159 @@ !> !> @file fit2d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Fit 2d grid value function to a 2d spline of any order ! USE bsplines USE futils ! IMPLICIT NONE CHARACTER(len=128) :: file='fit2d.h5' INTEGER :: fid INTEGER :: nx, ny, nidbas(2), mbes, dims(2) INTEGER, PARAMETER :: nptx=100, npty=100 DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:) DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty) DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs TYPE(spline2d) :: splxy DOUBLE PRECISION :: mem INTEGER :: i, j ! NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intevals in x ny = 8 ! Number of intevals in y nidbas = (/3,3/) ! Degree of splines mbes = 2 coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) fgrid = func(xgrid,ygrid) ! WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny) IF( nx.LE.10 .AND. ny.LE.10 ) THEN WRITE(*,'(a)') 'FGRID' DO j=0,ny WRITE(*,'(12f8.3)') fgrid(:,j) END DO WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() END IF ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'MBESS', mbes) !=========================================================================== ! 2.0 Spline interpolation ! ! Setup the spline interpolation CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() ! ! Compute spline interpolation coefficients CALL get_dim(splxy, dims) ALLOCATE(bcoefs(dims(1),dims(2))) WRITE(*,'(a,2i4)') 'Dims of spline', dims ! CALL get_splcoef(splxy, fgrid, bcoefs) !=========================================================================== ! 2.0 Check interpolation ! dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1) dy=(ygrid(ny)-ygrid(0))/REAL(npty-1) DO i=1,nptx xpt(i) = xgrid(0) + (i-1)*dx END DO DO i=1,npty ypt(i) = ygrid(0) + (i-1)*dy END DO fexact = func(xpt,ypt) CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs) errs = fcalc-fexact WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs) ! CALL putarr(fid, '/xpt', xpt, 'r') CALL putarr(fid, '/ypt', ypt, '\theta') CALL putarr(fid, '/fcalc', fcalc, 'Interpolated') CALL putarr(fid, '/fexact', fexact, 'Exact') CALL putarr(fid, '/errs', errs, 'Errors') !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(bcoefs) DEALLOCATE(xgrid, ygrid, fgrid) CALL destroy_sp(splxy) CALL closef(fid) ! CONTAINS FUNCTION func(x,y) DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: func(SIZE(x), SIZE(y)) DOUBLE PRECISION :: zy INTEGER :: j DO j=1,SIZE(y) zy = -mbes * SIN(mbes*y(j)) func(:,j) =(1-x(:)**2) * x(:)**mbes * zy END DO END FUNCTION func ! FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 INTEGER :: i ! sum2 = 0.0d0 DO i=1,SIZE(x,1) sum2 = sum2 + x(i)**2 END DO norm2 = SQRT(sum2) END FUNCTION norm2 END PROGRAM main diff --git a/examples/fit2d1d.f90 b/examples/fit2d1d.f90 index e0ffec3..23a919f 100644 --- a/examples/fit2d1d.f90 +++ b/examples/fit2d1d.f90 @@ -1,154 +1,154 @@ !> !> @file fit2d1d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Fit 2d grid value function to a 2d spline of any order ! Interpolating on an grid (x_i,y_j) or a set of particle ! positions (x_p,y_p). ! USE bsplines ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), mbes, dims(2) INTEGER, PARAMETER :: nptx=100, npty=100 DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:) DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty) DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs DOUBLE PRECISION, DIMENSION(nptx*npty) :: xp, yp, fcalcp, fexactp, errsp TYPE(spline2d) :: splxy DOUBLE PRECISION :: mem INTEGER :: i, j ! NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intevals in x ny = 8 ! Number of intevals in y nidbas = (/3,3/) ! Degree of splines mbes = 2 coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) fgrid = func(xgrid,ygrid) !=========================================================================== ! 2.0 Spline interpolation ! ! Setup the spline interpolation CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) ! ! Compute spline interpolation coefficients CALL get_dim(splxy, dims) ALLOCATE(bcoefs(dims(1),dims(2))) WRITE(*,'(a,2i4)') 'Dims of spline', dims ! CALL get_splcoef(splxy, fgrid, bcoefs) !=========================================================================== ! 2.0 Check interpolation ! dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1) dy=(ygrid(ny)-ygrid(0))/REAL(npty-1) DO i=1,nptx xpt(i) = xgrid(0) + (i-1)*dx END DO DO i=1,npty ypt(i) = ygrid(0) + (i-1)*dy END DO fexact = func(xpt,ypt) CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs) errs = fcalc-fexact WRITE(*,*) 'Using the GRIDVAL2D2D' WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs) ! ! The 2d1d version WRITE(*,*) 'Using the GRIDVAL2D1D' CALL RANDOM_NUMBER(xp) CALL RANDOM_NUMBER(yp) yp=2.0*pi*yp fexactp = func1(xp,yp) !!$ CALL gridval(splxy, xp, yp, fcalcp, (/0,0/), bcoefs) CALL gridval(splxy, xp, yp, fcalcp, (/0,0/)) errsp = fcalcp-fexactp WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errsp), MAXVAL(errsp) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(bcoefs) DEALLOCATE(xgrid, ygrid, fgrid) CALL destroy_sp(splxy) ! CONTAINS FUNCTION func(x,y) DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: func(SIZE(x), SIZE(y)) DOUBLE PRECISION :: zy INTEGER :: j DO j=1,SIZE(y) zy = -mbes * SIN(mbes*y(j)) func(:,j) =(1-x(:)**2) * x(:)**mbes * zy END DO END FUNCTION func FUNCTION func1(x,y) DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: func1(SIZE(x)) DOUBLE PRECISION :: zy INTEGER :: j DO j=1,SIZE(x) zy = -mbes * SIN(mbes*y(j)) func1(j) =(1-x(j)**2) * x(j)**mbes * zy END DO END FUNCTION func1 ! FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 INTEGER :: i ! sum2 = 0.0d0 DO i=1,SIZE(x,1) sum2 = sum2 + x(i)**2 END DO norm2 = SQRT(sum2) END FUNCTION norm2 END PROGRAM main diff --git a/examples/fit2d_cmpl.f90 b/examples/fit2d_cmpl.f90 index 6e6a4ee..4436ed0 100644 --- a/examples/fit2d_cmpl.f90 +++ b/examples/fit2d_cmpl.f90 @@ -1,132 +1,132 @@ !> !> @file fit2d_cmpl.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Fit 2d grid value function to a 2d spline of any order ! USE bsplines ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), mbes, dims(2) INTEGER, PARAMETER :: npt=10000 DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:) DOUBLE COMPLEX, ALLOCATABLE :: fgrid(:,:), fgrid_calc(:,:), bcoefs(:,:) DOUBLE PRECISION :: dx, dy, xpt(npt), ypt(npt), errs(npt) DOUBLE COMPLEX, DIMENSION(npt) :: fcalc, fexact TYPE(spline2d) :: splxy INTEGER :: i, j ! NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intevals in x ny = 8 ! Number of intevals in y nidbas = (/3,3/) ! Degree of splines mbes = 2 coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) ALLOCATE(fgrid(0:nx,0:ny), fgrid_calc(0:nx,0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) fgrid = func2(xgrid,ygrid) !=========================================================================== ! 2.0 Spline interpolation ! ! Setup the spline interpolation CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) ! ! Compute spline interpolation coefficients CALL get_dim(splxy, dims) ALLOCATE(bcoefs(dims(1),dims(2))) WRITE(*,'(a,2i4)') 'Dims of spline', dims ! CALL get_splcoef(splxy, fgrid, bcoefs) !=========================================================================== ! 2.0 Check interpolation ! CALL RANDOM_NUMBER(xpt) CALL RANDOM_NUMBER(ypt) ypt(:) = ypt(:)*2.0*pi fexact = func1(xpt,ypt) ! CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs) errs = ABS(fcalc-fexact) WRITE(*,'(a,2(1pe12.3))') 'Max errors (on random points)', MAXVAL(errs) ! CALL gridval(splxy, xgrid, ygrid, fgrid_calc, (/0,0/)) WRITE(*,'(a,2(1pe12.3))') 'Max errors (on grid points)', & & MAXVAL(ABS(fgrid_calc-fgrid)) ! fgrid_calc = 0.0 DO j=0,ny ypt(1:nx+1) = ygrid(j) CALL gridval(splxy, xgrid, ypt(1:nx+1), fgrid_calc(:,j), (/0,0/)) END DO WRITE(*,'(a,2(1pe12.3))') 'Max errors (on grid points)', & & MAXVAL(ABS(fgrid_calc-fgrid)) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(bcoefs) DEALLOCATE(xgrid, ygrid, fgrid, fgrid_calc) CALL destroy_sp(splxy) ! CONTAINS FUNCTION func2(x,y) DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE COMPLEX :: func2(SIZE(x), SIZE(y)) DOUBLE COMPLEX :: zy INTEGER :: j DO j=1,SIZE(y) zy = -mbes * CMPLX(SIN(mbes*y(j)), COS(mbes*y(j))) func2(:,j) =(1-x(:)**2) * x(:)**mbes * zy END DO END FUNCTION func2 FUNCTION func1(x,y) DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE COMPLEX :: func1(SIZE(x)) DOUBLE COMPLEX :: zy INTEGER :: j DO j=1,SIZE(x) zy = -mbes * CMPLX(SIN(mbes*y(j)), COS(mbes*y(j))) func1(j) =(1-x(j)**2) * x(j)**mbes * zy END DO END FUNCTION func1 END PROGRAM main diff --git a/examples/fit2dbc.f90 b/examples/fit2dbc.f90 index 75862c0..cae0d98 100644 --- a/examples/fit2dbc.f90 +++ b/examples/fit2dbc.f90 @@ -1,183 +1,183 @@ !> !> @file fit2dbc.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Fit 2d grid value function to a 2d spline of any order ! BC using derivatives at both ends, in the non-periodic direction. ! USE bsplines USE futils ! IMPLICIT NONE CHARACTER(len=128) :: file='fit2d.h5' INTEGER :: fid INTEGER :: nx, ny, nidbas(2), mbes, dims(2) INTEGER, PARAMETER :: nptx=100, npty=100 DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:) DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty) DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs TYPE(spline2d) :: splxy DOUBLE PRECISION :: mem INTEGER :: i, j, ii INTEGER :: ibc1(2,10), ibc2(2,10) DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:) ! NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intevals in x ny = 8 ! Number of intevals in y nidbas = (/3,3/) ! Degree of splines mbes = 2 coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 1.0d0 CALL meshdist(coefy, ygrid, ny) fgrid = func(xgrid,ygrid) ! WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny) IF( nx.LE.10 .AND. ny.LE.10 ) THEN WRITE(*,'(a)') 'FGRID' DO j=0,ny WRITE(*,'(12f8.3)') fgrid(:,j) END DO WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() END IF ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'MBESS', mbes) !=========================================================================== ! 2.0 Spline interpolation ! ! Setup the spline interpolation ii=1 ! Start with first derivative DO i = 1, nidbas(1)/2 ibc1(1,i) = ii+i-1 ibc1(2,i) = ii+i-1 END DO ibc2 = ibc1 CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .FALSE./),& & ibc1=ibc1, ibc2=ibc2) WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() ! ! Compute spline interpolation coefficients CALL get_dim(splxy, dims) ALLOCATE(bcoefs(dims(1),dims(2))) WRITE(*,'(a,2i4)') 'Dims of spline', dims ! ALLOCATE(fbc(2, nidbas(1)/2, 0:ny)) fbc=0.0d0 ! WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:) WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:) ! CALL get_splcoef(splxy, fgrid, bcoefs, fbc1=fbc, fbc2=fbc) ! DEALLOCATE(fbc) !=========================================================================== ! 2.0 Check interpolation ! dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1) dy=(ygrid(ny)-ygrid(0))/REAL(npty-1) DO i=1,nptx xpt(i) = xgrid(0) + (i-1)*dx END DO DO i=1,npty ypt(i) = ygrid(0) + (i-1)*dy END DO fexact = func(xpt,ypt) CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs) errs = fcalc-fexact WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs) ! CALL putarr(fid, '/xpt', xpt, 'r') CALL putarr(fid, '/ypt', ypt, '\theta') CALL putarr(fid, '/bcoefs', bcoefs, 'bcoefs') CALL putarr(fid, '/fcalc', fcalc, 'Interpolated') CALL putarr(fid, '/fexact', fexact, 'Exact') CALL putarr(fid, '/errs', errs, 'Errors') !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(bcoefs) DEALLOCATE(xgrid, ygrid, fgrid) CALL destroy_sp(splxy) CALL closef(fid) ! !=========================================================================== CONTAINS FUNCTION func(x,y) ! ! A function with zeo derivatives at both ends ! DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: func(SIZE(x), SIZE(y)) DOUBLE PRECISION :: zy INTEGER :: j DO j=1,SIZE(y) zy = y(j)*y(j)*(y(j)-1.5d0) func(:,j) = x(:)*x(:)*(x(:)-1.5d0) + zy END DO END FUNCTION func ! FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 INTEGER :: i ! sum2 = 0.0d0 DO i=1,SIZE(x,1) sum2 = sum2 + x(i)**2 END DO norm2 = SQRT(sum2) END FUNCTION norm2 END PROGRAM main diff --git a/examples/fit2dbc_x.f90 b/examples/fit2dbc_x.f90 index 8795668..bf64d80 100644 --- a/examples/fit2dbc_x.f90 +++ b/examples/fit2dbc_x.f90 @@ -1,202 +1,202 @@ !> !> @file fit2dbc_x.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Fit 2d grid value function to a 2d spline of any order ! BC using derivatives at both ends, in the non-periodic direction. ! ! Testing BC on derivative along first direction ! USE bsplines USE futils ! IMPLICIT NONE CHARACTER(len=128) :: file='fit2d.h5' INTEGER :: fid INTEGER :: nx, ny, nidbas(2), mbes, dims(2) INTEGER, PARAMETER :: nptx=100, npty=100 DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:) DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty) DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs TYPE(spline2d) :: splxy DOUBLE PRECISION :: mem INTEGER :: i, j, ii INTEGER :: ibc(2,10) DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:) ! NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intevals in x ny = 8 ! Number of intevals in y nidbas = (/3,3/) ! Degree of splines mbes = 2 coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) fgrid = func(xgrid,ygrid) ! WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny) IF( nx.LE.10 .AND. ny.LE.10 ) THEN WRITE(*,'(a)') 'FGRID(x, y)' DO j=0,ny WRITE(*,'(12f8.3)') fgrid(:,j) END DO WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() END IF ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'MBESS', mbes) !=========================================================================== ! 2.0 Spline interpolation ! ! Setup the spline interpolation ii=1 ! Start with first derivative DO i = 1, nidbas(1)/2 ibc(1,i) = ii+i-1 ibc(2,i) = ii+i-1 END DO CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./),& & ibc1=ibc) WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() ! ! Compute spline interpolation coefficients CALL get_dim(splxy, dims) ALLOCATE(bcoefs(dims(1),dims(2))) WRITE(*,'(a,2i4)') 'Dims of spline', dims ! ALLOCATE(fbc(2, nidbas(1)/2, 0:ny)) fbc=0.0d0 ! !!$! Exact first derivatives at boundaries !!$ fbc(1,1:1,:) = func1(xgrid(0:0), ygrid(0:ny)) !!$ fbc(2,1:1,:) = func1(xgrid(nx:nx), ygrid(0:ny)) ! !!$! Derivatives at boundaries approximated with FD !!$ DO j=0,ny !!$ fbc(1,1,j+1) = fgrid(1,j)-fgrid(0,j) !!$ fbc(2,1,j+1) = fgrid(nx,j)-fgrid(nx-1,j) !!$ END DO !!$ fbc(1,1,:) = fbc(1,1,:)/(xgrid(1)-xgrid(0)) !!$ fbc(2,1,:) = fbc(2,1,:)/(xgrid(nx)-xgrid(nx-1)) ! WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:) WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:) ! CALL get_splcoef(splxy, fgrid, bcoefs, fbc1=fbc) ! DEALLOCATE(fbc) !=========================================================================== ! 2.0 Check interpolation ! dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1) dy=(ygrid(ny)-ygrid(0))/REAL(npty-1) DO i=1,nptx xpt(i) = xgrid(0) + (i-1)*dx END DO DO i=1,npty ypt(i) = ygrid(0) + (i-1)*dy END DO fexact = func(xpt,ypt) CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs) errs = fcalc-fexact WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs) ! CALL putarr(fid, '/xpt', xpt, 'r') CALL putarr(fid, '/ypt', ypt, '\theta') CALL putarr(fid, '/fcalc', fcalc, 'Interpolated') CALL putarr(fid, '/fexact', fexact, 'Exact') CALL putarr(fid, '/errs', errs, 'Errors') !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(bcoefs) DEALLOCATE(xgrid, ygrid, fgrid) CALL destroy_sp(splxy) CALL closef(fid) ! !=========================================================================== CONTAINS FUNCTION func(x,y) DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: func(SIZE(x), SIZE(y)) DOUBLE PRECISION :: zy INTEGER :: j DO j=1,SIZE(y) zy = -mbes * SIN(mbes*y(j)) func(:,j) =(1-x(:)**2) * x(:)**mbes * zy END DO END FUNCTION func FUNCTION func1(x,y) DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: func1(SIZE(x), SIZE(y)) DOUBLE PRECISION :: zy INTEGER :: j DO j=1,SIZE(y) zy = -mbes * SIN(mbes*y(j)) func1(:,j) = (mbes - (mbes+2.0d0)*x(:)**2) * x(:)**(mbes-1) * zy END DO END FUNCTION func1 ! FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 INTEGER :: i ! sum2 = 0.0d0 DO i=1,SIZE(x,1) sum2 = sum2 + x(i)**2 END DO norm2 = SQRT(sum2) END FUNCTION norm2 END PROGRAM main diff --git a/examples/fit2dbc_y.f90 b/examples/fit2dbc_y.f90 index ff73d65..08b4dcb 100644 --- a/examples/fit2dbc_y.f90 +++ b/examples/fit2dbc_y.f90 @@ -1,202 +1,202 @@ !> !> @file fit2dbc_y.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Fit 2d grid value function to a 2d spline of any order ! BC using derivatives at both ends, in the non-periodic direction. ! ! Testing BC on derivative along second direction ! USE bsplines USE futils ! IMPLICIT NONE CHARACTER(len=128) :: file='fit2d.h5' INTEGER :: fid INTEGER :: nx, ny, nidbas(2), mbes, dims(2) INTEGER, PARAMETER :: nptx=100, npty=100 DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:) DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty) DOUBLE PRECISION, DIMENSION(npty,nptx) :: fcalc, fexact, errs TYPE(spline2d) :: splxy DOUBLE PRECISION :: mem INTEGER :: i, j, ii INTEGER :: ibc(2,10) DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:) ! NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intevals in x ny = 8 ! Number of intevals in y nidbas = (/3,3/) ! Degree of splines mbes = 2 coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:ny,0:nx)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) fgrid = TRANSPOSE(func(xgrid,ygrid)) ! WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny) IF( nx.LE.10 .AND. ny.LE.10 ) THEN WRITE(*,'(a)') 'FGRID(y, x)' DO j=0,ny WRITE(*,'(12f8.3)') fgrid(j,:) END DO WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() END IF ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'MBESS', mbes) !=========================================================================== ! 2.0 Spline interpolation ! ! Setup the spline interpolation ii=1 ! Start with first derivative DO i = 1, nidbas(1)/2 ibc(1,i) = ii+i-1 ibc(2,i) = ii+i-1 END DO CALL set_splcoef((/nidbas(2), nidbas(1)/), ygrid, xgrid, splxy, (/.TRUE., .FALSE./),& & ibc2=ibc) WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() ! ! Compute spline interpolation coefficients CALL get_dim(splxy, dims) ALLOCATE(bcoefs(dims(1),dims(2))) WRITE(*,'(a,2i4)') 'Dims of spline', dims ! ALLOCATE(fbc(2, nidbas(1)/2, 0:ny)) fbc=0.0d0 ! !!$! Exact first derivatives at boundaries !!$ fbc(1,1:1,:) = func1(xgrid(0:0), ygrid(0:ny)) !!$ fbc(2,1:1,:) = func1(xgrid(nx:nx), ygrid(0:ny)) ! !!$! Derivatives at boundaries approximated with FD !!$ DO j=0,ny !!$ fbc(1,1,j+1) = fgrid(1,j)-fgrid(0,j) !!$ fbc(2,1,j+1) = fgrid(nx,j)-fgrid(nx-1,j) !!$ END DO !!$ fbc(1,1,:) = fbc(1,1,:)/(xgrid(1)-xgrid(0)) !!$ fbc(2,1,:) = fbc(2,1,:)/(xgrid(nx)-xgrid(nx-1)) ! WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:) WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:) ! CALL get_splcoef(splxy, fgrid, bcoefs, fbc2=fbc) ! DEALLOCATE(fbc) !=========================================================================== ! 2.0 Check interpolation ! dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1) dy=(ygrid(ny)-ygrid(0))/REAL(npty-1) DO i=1,nptx xpt(i) = xgrid(0) + (i-1)*dx END DO DO i=1,npty ypt(i) = ygrid(0) + (i-1)*dy END DO fexact = TRANSPOSE(func(xpt,ypt)) CALL gridval(splxy, ypt, xpt, fcalc, (/0,0/), bcoefs) errs = fcalc-fexact WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs) ! CALL putarr(fid, '/xpt', xpt, 'r') CALL putarr(fid, '/ypt', ypt, '\theta') CALL putarr(fid, '/fcalc', fcalc, 'Interpolated') CALL putarr(fid, '/fexact', fexact, 'Exact') CALL putarr(fid, '/errs', errs, 'Errors') !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(bcoefs) DEALLOCATE(xgrid, ygrid, fgrid) CALL destroy_sp(splxy) CALL closef(fid) ! !=========================================================================== CONTAINS FUNCTION func(x,y) DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: func(SIZE(x), SIZE(y)) DOUBLE PRECISION :: zy INTEGER :: j DO j=1,SIZE(y) zy = -mbes * SIN(mbes*y(j)) func(:,j) =(1-x(:)**2) * x(:)**mbes * zy END DO END FUNCTION func FUNCTION func1(x,y) DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: func1(SIZE(x), SIZE(y)) DOUBLE PRECISION :: zy INTEGER :: j DO j=1,SIZE(y) zy = -mbes * SIN(mbes*y(j)) func1(:,j) = (mbes - (mbes+2.0d0)*x(:)**2) * x(:)**(mbes-1) * zy END DO END FUNCTION func1 ! FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 INTEGER :: i ! sum2 = 0.0d0 DO i=1,SIZE(x,1) sum2 = sum2 + x(i)**2 END DO norm2 = SQRT(sum2) END FUNCTION norm2 END PROGRAM main diff --git a/examples/getgrad_perf.f90 b/examples/getgrad_perf.f90 index 0a168c9..79a22f0 100644 --- a/examples/getgrad_perf.f90 +++ b/examples/getgrad_perf.f90 @@ -1,221 +1,221 @@ !> !> @file getgrad_perf.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test and compare performance of using "spline" and ! "pp" forms. 2D case ! USE bsplines ! IMPLICIT NONE INTEGER :: nx, ny, ngauss(2), nidbas(2), nits INTEGER :: npt, d1, d2 INTEGER :: i, j, its, ngroup=4 INTEGER :: i1, i2, nset, nremain DOUBLE PRECISION :: pi, dx, dy DOUBLE PRECISION :: seconds, t0, t1, tscal, tvec DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fgrid00(:), fgrid01(:), fgrid10(:) DOUBLE PRECISION, ALLOCATABLE :: fgrad00(:), fgrad01(:), fgrad10(:) TYPE(spline2d) :: splxy ! NAMELIST /newrun/ nx, ny, nidbas, npt, nits !=============================================================================== ! 0.0 Prologue ! ! 2D grid ! nx = 8 ny = 8 nidbas = (/ 3, 3 /) npt = 100000 nits =100 READ(*,newrun) WRITE(*,newrun) ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) dx = 1.0d0/REAL(nx) xgrid = (/ (i*dx,i=0,nx) /) dy = 2.0d0*pi/REAL(ny) ygrid = (/ (j*dy,j=0,ny) /) ! WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) ! ! Set up spline ! ngauss = 4 CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) d1 = splxy%sp1%dim d2 = splxy%sp2%dim WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2 WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots ! ALLOCATE(xpt(npt), ypt(npt)) CALL RANDOM_NUMBER(xpt) CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt ! ALLOCATE(coefs(d1,d2)) ALLOCATE(fgrad00(npt), fgrad01(npt), fgrad10(npt)) ALLOCATE(fgrid00(npt), fgrid01(npt), fgrid10(npt)) ! !=============================================================================== ! 1.0 PPFORM ! coefs = 1.0d0 ! => f=1, all derivatives = 0! ! splxy%sp1%nlppform = .TRUE. splxy%sp2%nlppform = .TRUE. CALL gridval(splxy, xpt, ypt, fgrid00, (/0,0/), coefs) ! ! Vector GRIDVAL WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp' ngroup = 1 DO WHILE (ngroup.LT.npt/2) nset = npt/ngroup nremain = MODULO(npt, ngroup) IF(nremain.NE.0) nset = nset+1 t0 = seconds() i2 = 0 DO i=1,nset i1 = i2+1 i2 = MIN(i2+ngroup,npt) DO its=1,nits CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid00(i1:i2), (/0,0/)) CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid01(i1:i2), (/0,1/)) CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid10(i1:i2), (/1,0/)) END DO END DO t1 = seconds()-t0 tvec = t1/REAL(npt*nits) IF(ngroup.EQ.1) tscal=tvec WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec ngroup = 2*ngroup END DO WRITE(*,'(/a,3(1pe12.3))') 'GRIDVAL PPFORM: Max errors', & & MAXVAL(ABS(fgrid00-1.0d0)), MAXVAL(ABS(fgrid01)), MAXVAL(ABS(fgrid10)) ! ! Vector GETGRAD WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp' ngroup = 1 DO WHILE (ngroup.LT.npt/2) nset = npt/ngroup nremain = MODULO(npt, ngroup) IF(nremain.NE.0) nset = nset+1 t0 = seconds() i2 = 0 DO i=1,nset i1 = i2+1 i2 = MIN(i2+ngroup,npt) DO its=1,nits CALL getgrad(splxy, xpt(i1:i2), ypt(i1:i2), & & fgrad00(i1:i2), fgrad10(i1:i2), fgrad01(i1:i2)) END DO END DO t1 = seconds()-t0 tvec = t1/REAL(npt*nits) IF(ngroup.EQ.1) tscal=tvec WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec ngroup = 2*ngroup END DO WRITE(*,'(/a,3(1pe12.3))') 'GETGRAD PPFORM: Max errors', & & MAXVAL(ABS(fgrad00-fgrid00)), MAXVAL(ABS(fgrad01-fgrid01)), & & MAXVAL(ABS(fgrad10-fgrid10)) !=============================================================================== ! 2.0 Spline expansion ! coefs = 1.0d0 ! => f=1, all derivatives = 0! ! splxy%sp1%nlppform = .FALSE. splxy%sp2%nlppform = .FALSE. CALL gridval(splxy, xpt, ypt, fgrid00, (/0,0/), coefs) ! ! Vector GRIDVAL WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp' ngroup = 1 DO WHILE (ngroup.LT.npt/2) nset = npt/ngroup nremain = MODULO(npt, ngroup) IF(nremain.NE.0) nset = nset+1 t0 = seconds() i2 = 0 DO i=1,nset i1 = i2+1 i2 = MIN(i2+ngroup,npt) DO its=1,nits CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid00(i1:i2), (/0,0/)) CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid01(i1:i2), (/0,1/)) CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid10(i1:i2), (/1,0/)) END DO END DO t1 = seconds()-t0 tvec = t1/REAL(npt*nits) IF(ngroup.EQ.1) tscal=tvec WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec ngroup = 2*ngroup END DO WRITE(*,'(/a,3(1pe12.3))') 'GRIDVAL SPLINE: Max errors', & & MAXVAL(ABS(fgrid00-1.0d0)), MAXVAL(ABS(fgrid01)), MAXVAL(ABS(fgrid10)) ! ! Vector GETGRAD WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp' ngroup = 1 DO WHILE (ngroup.LT.npt/2) nset = npt/ngroup nremain = MODULO(npt, ngroup) IF(nremain.NE.0) nset = nset+1 t0 = seconds() i2 = 0 DO i=1,nset i1 = i2+1 i2 = MIN(i2+ngroup,npt) DO its=1,nits CALL getgrad(splxy, xpt(i1:i2), ypt(i1:i2), & & fgrad00(i1:i2), fgrad10(i1:i2), fgrad01(i1:i2)) END DO END DO t1 = seconds()-t0 tvec = t1/REAL(npt*nits) IF(ngroup.EQ.1) tscal=tvec WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec ngroup = 2*ngroup END DO WRITE(*,'(/a,3(1pe12.3))') 'GETGRAD SPLINE: Max errors', & & MAXVAL(ABS(fgrad00-fgrid00)), MAXVAL(ABS(fgrad01-fgrid01)), & & MAXVAL(ABS(fgrad10-fgrid10)) !=============================================================================== ! ! Clean up ! CALL destroy_sp(splxy) DEALLOCATE(xgrid, ygrid, coefs) DEALLOCATE(xpt, ypt, fgrid00, fgrid01, fgrid10) DEALLOCATE(fgrad00, fgrad01, fgrad10) END PROGRAM main diff --git a/examples/gridval_perf.f90 b/examples/gridval_perf.f90 index d098531..f8d7e5d 100644 --- a/examples/gridval_perf.f90 +++ b/examples/gridval_perf.f90 @@ -1,194 +1,194 @@ !> !> @file gridval_perf.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test and compare performance of using "spline" and ! "pp" forms. 2D case ! USE bsplines ! IMPLICIT NONE INTEGER :: nx, ny, ngauss(2), nidbas(2), nits INTEGER :: npt, d1, d2 INTEGER :: i, j, its, ngroup=4 INTEGER :: i1, i2, nset, nremain DOUBLE PRECISION :: pi, dx, dy DOUBLE PRECISION :: seconds, t0, t1, tscal, tvec DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fpt00(:), fpt01(:), fpt10(:) DOUBLE PRECISION, ALLOCATABLE :: fscal00(:), fscal01(:), fscal10(:) TYPE(spline2d) :: splxy ! NAMELIST /newrun/ nx, ny, nidbas, npt, nits !=============================================================================== ! 0.0 Prologue ! ! 2D grid ! nx = 8 ny = 8 nidbas = (/ 3, 3 /) npt = 100000 nits =100 READ(*,newrun) WRITE(*,newrun) ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) dx = 1.0d0/REAL(nx) xgrid = (/ (i*dx,i=0,nx) /) dy = 2.0d0*pi/REAL(ny) ygrid = (/ (j*dy,j=0,ny) /) ! WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) ! ! Set up spline ! ngauss = 4 CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) d1 = splxy%sp1%dim d2 = splxy%sp2%dim WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2 WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots !=============================================================================== ! 1.0 PPFORM ! ALLOCATE(xpt(npt), ypt(npt)) CALL RANDOM_NUMBER(xpt) CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt ! ALLOCATE(coefs(d1,d2)) ALLOCATE(fscal00(npt), fscal01(npt), fscal10(npt)) ALLOCATE(fpt00(npt), fpt01(npt), fpt10(npt)) ! coefs = 1.0d0 ! => f=1, all derivatives = 0! ! splxy%sp1%nlppform = .TRUE. splxy%sp2%nlppform = .TRUE. CALL gridval(splxy, xpt, ypt, fscal00, (/0,0/), coefs) ! ! Scalar PPFORM t0 = seconds() DO i=1,npt DO its=1,nits CALL gridval(splxy, xpt(i), ypt(i), fscal00(i), (/0,0/)) CALL gridval(splxy, xpt(i), ypt(i), fscal01(i), (/0,1/)) CALL gridval(splxy, xpt(i), ypt(i), fscal10(i), (/1,0/)) END DO END DO t1 = seconds()-t0 tscal = t1/REAL(npt*nits,8) WRITE(*,'(/a,3(1pe12.3))') 'Scalar PPFORM: Max errors', & & MAXVAL(ABS(fscal00-1.0d0)), MAXVAL(ABS(fscal01)), MAXVAL(ABS(fscal10)) WRITE(*,'(a,3(1pe12.3))') 'time(s)', tscal ! ! Vector PPFORM WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp' ngroup = 1 DO WHILE (ngroup.LT.npt/2) nset = npt/ngroup nremain = MODULO(npt, ngroup) IF(nremain.NE.0) nset = nset+1 t0 = seconds() i2 = 0 DO i=1,nset i1 = i2+1 i2 = MIN(i2+ngroup,npt) DO its=1,nits CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt00(i1:i2), (/0,0/)) CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt01(i1:i2), (/0,1/)) CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt10(i1:i2), (/1,0/)) END DO END DO t1 = seconds()-t0 tvec = t1/REAL(npt*nits) WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec ngroup = 2*ngroup END DO WRITE(*,'(/a,3(1pe12.3))') 'Vector PPFORM: Max errors', & & MAXVAL(ABS(fpt00-fscal00)), MAXVAL(ABS(fpt01-fscal01)), MAXVAL(ABS(fpt10-fscal10)) !=============================================================================== ! 2.0 Sline expansion ! coefs = 1.0d0 ! => f=1, all derivatives = 0! ! splxy%sp1%nlppform = .FALSE. splxy%sp2%nlppform = .FALSE. CALL gridval(splxy, xpt, ypt, fscal00, (/0,0/), coefs) ! ! Scalar SPLINE t0 = seconds() DO i=1,npt DO its=1,nits CALL gridval(splxy, xpt(i), ypt(i), fscal00(i), (/0,0/)) CALL gridval(splxy, xpt(i), ypt(i), fscal01(i), (/0,1/)) CALL gridval(splxy, xpt(i), ypt(i), fscal10(i), (/1,0/)) END DO END DO t1 = seconds()-t0 tscal = t1/REAL(npt*nits,8) WRITE(*,'(/a,3(1pe12.3))') 'Scalar SPLINE: Max errors', & & MAXVAL(ABS(fscal00-1.0d0)), MAXVAL(ABS(fscal01)), MAXVAL(ABS(fscal10)) WRITE(*,'(a,3(1pe12.3))') 'time(s)', tscal ! ! Vector SPLINE WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp' ngroup = 1 DO WHILE (ngroup.LT.npt/2) nset = npt/ngroup nremain = MODULO(npt, ngroup) IF(nremain.NE.0) nset = nset+1 t0 = seconds() i2 = 0 DO i=1,nset i1 = i2+1 i2 = MIN(i2+ngroup,npt) DO its=1,nits CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt00(i1:i2), (/0,0/)) CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt01(i1:i2), (/0,1/)) CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt10(i1:i2), (/1,0/)) END DO END DO t1 = seconds()-t0 tvec = t1/REAL(npt*nits) WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec ngroup = 2*ngroup END DO WRITE(*,'(/a,3(1pe12.3))') 'Vector SPLINE: Max errors', & & MAXVAL(ABS(fpt00-fscal00)), MAXVAL(ABS(fpt01-fscal01)), MAXVAL(ABS(fpt10-fscal10)) !=============================================================================== ! ! Clean up ! CALL destroy_sp(splxy) DEALLOCATE(xgrid, ygrid, coefs) DEALLOCATE(xpt, ypt, fpt00, fpt01, fpt10) DEALLOCATE(fscal00, fscal01, fscal10) END PROGRAM main diff --git a/examples/gyro.f90 b/examples/gyro.f90 index d5a79e8..3c7cb9f 100644 --- a/examples/gyro.f90 +++ b/examples/gyro.f90 @@ -1,179 +1,179 @@ !> !> @file gyro.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Gyro-average using splines. ! F(x)=cos(x) => \bar F(x,rho) = J_0(rho) cos(x) ! USE bsplines USE futils ! IMPLICIT NONE INTEGER, PARAMETER :: nx=10, nidbas=3, dim=nx+nidbas, npt=100, & & nrho=21, nnq=5 DOUBLE PRECISION :: xgrid(0:nx), fgrid(0:nx), coefs(dim) DOUBLE PRECISION :: xpt(npt), fcalc(npt), fexact(npt) DOUBLE PRECISION :: averf(0:nx), averfexact(0:nx) DOUBLE PRECISION, POINTER :: splines(:,:) TYPE(spline1d) :: spl DOUBLE PRECISION :: pi, twopi, dx, lperiod, dth DOUBLE PRECISION :: drho, rho(nrho), erraver(nrho,nnq) INTEGER :: i, j, nq(nnq) DOUBLE PRECISION :: dbesj0 ! CHARACTER(len=128) :: file='gyro.h5' INTEGER :: fid ! INTERFACE SUBROUTINE gyro(spl, xgrid, coefs, rho, nq, averf) USE bsplines TYPE(spline1d) :: spl DOUBLE PRECISION, INTENT(in) :: xgrid(0:), coefs(:), rho INTEGER, INTENT(in) :: nq DOUBLE PRECISION, INTENT(out) :: averf(0:) END SUBROUTINE gyro END INTERFACE ! pi = 4.0d0*ATAN(1.0d0) twopi = 2.0d0*pi ! ! Create hdf5 file CALL creatf(file, fid, 'gyro Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) ! ! Grid and function values dx = twopi/nx xgrid(0) = 0.0d0 DO i=1,nx xgrid(i) = xgrid(0) + i*dx END DO lperiod = xgrid(nx)-xgrid(0) fgrid = func(xgrid) ! ! Spline interpolation CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.) CALL get_splcoef(spl, fgrid, coefs) WRITE(*,'(a)') 'Spline coefficients' WRITE(*,'(i5,f12.4)') (i-1, coefs(i), i=1,dim) ! ! Error of interpolation CALL RANDOM_NUMBER(xpt) xpt = twopi*xpt CALL gridval(spl, xpt, fcalc, 0, coefs) fexact = func(xpt) !!$ WRITE(*,'(a)') 'Interpolated and exact f' !!$ WRITE(*,'(3(1pe12.3))') (xpt(i), fcalc(i), fexact(i), i=1,npt) WRITE(*,'(a,1pe12.3)') 'Interpolation error', norm2(fcalc-fexact) CALL putarr(fid, '/X', xpt) CALL putarr(fid, '/FEXACT', fexact, 'Exact values') CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values') ! ! Gyro-averaged of F at grid points xgrid(0:nx-1) drho = 5.0d0/nrho DO i=1,nrho rho(i) = i*drho DO j=1,nnq nq(j) = 2**(j+1) CALL gyro(spl, xgrid, coefs, rho(i), nq(j), averf) averfexact = dbesj0(rho(i))*COS(xgrid) erraver(i,j) = norm2(averfexact-averf) END DO END DO ! WRITE(*,'(a,f8.3,i5)') 'averaged f at rho, nq =', rho(nrho), nq(nnq) WRITE(*,'(3(1pe12.3))') (xgrid(i),averf(i),averfexact(i), i=0,nx) CALL putarr(fid, '/XGRID', xgrid) CALL putarr(fid, '/AVERF', averf, 'Averaged F') CALL putarr(fid, '/AVERFEXACT', averfexact, 'Averaged F exact') ! CALL putarr(fid, '/RHO', rho) CALL putarr(fid, '/NQ', nq) CALL putarr(fid, '/ERRAVER', erraver) ! ! Clean up CALL destroy_sp(spl) CALL closef(fid) CONTAINS FUNCTION func(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: func(SIZE(x)) func = COS(x) END FUNCTION func FUNCTION norm2(x) DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:) norm2 = SQRT(DOT_PRODUCT(x,x)) END FUNCTION norm2 END PROGRAM main ! SUBROUTINE gyro(spl, xgrid, coefs, rho, nq, averf) ! ! Gyro-average using spline SPL and NQ point quadratire for ! theta-integration. ! USE bsplines IMPLICIT NONE TYPE(spline1d) :: spl DOUBLE PRECISION, INTENT(in) :: xgrid(0:), coefs(:), rho INTEGER, INTENT(in) :: nq DOUBLE PRECISION, INTENT(out) :: averf(0:) ! DOUBLE PRECISION :: th(nq), wth(nq), xq(nq) DOUBLE PRECISION, ALLOCATABLE :: avermat(:,:) DOUBLE PRECISION :: pi, twopi, lperiod, dth DOUBLE PRECISION, POINTER :: splines(:,:) INTEGER :: i, j, iq, nx, dim ! pi = 4.0d0*ATAN(1.0d0) twopi = 2.0d0*pi ! ! Quadrature in theta dth = twopi/nq th(1) = -pi + dth/2.0d0 DO iq=2,nq th(iq) = th(iq-1)+dth END DO wth = dth ! ! Gyro-averaging matrix CALL get_dim(spl, dim, nx) lperiod = xgrid(nx)-xgrid(0) ALLOCATE(avermat(0:nx,dim)) DO i=0,nx xq = xgrid(i) + rho*COS(th) xq = xgrid(0) + MODULO(xq-xgrid(0), lperiod) CALL allsplines(spl, xq, splines) DO j=1,dim avermat(i,j) = DOT_PRODUCT(wth, splines(:,j))/twopi END DO END DO ! ! Gyro-averaged of F at grid points xgrid(0:nx) averf = MATMUL(avermat, coefs) ! DEALLOCATE(avermat) END SUBROUTINE gyro diff --git a/examples/ibcmat.f90 b/examples/ibcmat.f90 index d48ea56..12ec56f 100644 --- a/examples/ibcmat.f90 +++ b/examples/ibcmat.f90 @@ -1,176 +1,176 @@ !> !> @file ibcmat.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! USE matrix IMPLICIT NONE TYPE(gbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: kl, ku, nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) INTEGER :: i0, ii INTEGER :: i0_arr(ny), col(ny) !=========================================================================== ! 1.0 Prologue ! kl = mat%kl ku = mat%ku nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) ! i0 = nrank - ku WRITE(*,'(a,i6)') 'Estimated i0', i0 DO i=1,ny arr = 0.0d0 col(i) = nrank-ny+i CALL getcol(mat, nrank-ny+i, arr) DO ii=1,nrank i0_arr(i)=ii IF(arr(ii) .NE. 0.0d0) EXIT END DO END DO !!$ WRITE(*,'(a/(10i6))') 'col', col !!$ WRITE(*,'(a/(10i6))') 'i0_arr', i0_arr ! !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) DO j=1,ny+ku zsum(j) = zsum(j) + arr(j) END DO END DO CALL putrow(mat, ny, zsum) ! ! The horizontal sum on the NY-th column ! zsum = 0.0d0 DO j=1,ny arr = 0.0d0 CALL getcol(mat, j, arr) DO i=ny,ny+kl zsum(i) = zsum(i) + arr(i) END DO END DO CALL putcol(mat, ny, zsum) ! ! The away operator ! DO j = 1,ny-1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO j = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !+++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! IMPLICIT NONE DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !+++ SUBROUTINE ibcrhs3(rhs, ny) ! ! Apply BC on RHS ! IMPLICIT NONE DOUBLE PRECISION, INTENT(inout) :: rhs(:,:) INTEGER, INTENT(in) :: ny INTEGER :: nrank, nz, k DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) nz = SIZE(rhs,2) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! DO k=1,nz zsum = SUM(rhs(1:ny,k)) rhs(ny,k) = zsum rhs(1:ny-1,k) = 0.0d0 END DO !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO k=1,nz rhs(nrank-ny+1:nrank,k) = 0.0d0 END DO END SUBROUTINE ibcrhs3 diff --git a/examples/mesh.f90 b/examples/mesh.f90 index aa102a9..a532f08 100644 --- a/examples/mesh.f90 +++ b/examples/mesh.f90 @@ -1,66 +1,66 @@ !> !> @file mesh.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Equidistant and non-equidistant mesh ! USE bsplines IMPLICIT NONE INTEGER :: i, nx DOUBLE PRECISION :: coefs(5), dev, dx DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid ! INTERFACE SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist END INTERFACE ! NAMELIST /newrun/ nx, coefs ! nx = 8 ! Number oh intevals in x coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! see function FDIST in MESHDIST ! READ(*,newrun) WRITE(*,newrun) ! ALLOCATE(xgrid(0:nx)) xgrid(0) = 0.0d0 xgrid(nx) = 1.0d0 CALL meshdist(coefs, xgrid, nx) PRINT*, 'Mesh is equidistant?', is_equid(xgrid, dev) PRINT*, 'dev =', dev ! dx = 1.0d0/REAL(nx,8) xgrid = (/ (i*dx, i=0,nx) /) PRINT*, 'Mesh is equidistant?', is_equid(xgrid, dev) PRINT*, 'dev =', dev ! DEALLOCATE(xgrid) END PROGRAM main diff --git a/examples/meshdist.f90 b/examples/meshdist.f90 index a20c2fd..f6e24f8 100644 --- a/examples/meshdist.f90 +++ b/examples/meshdist.f90 @@ -1,82 +1,82 @@ !> !> @file meshdist.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> SUBROUTINE meshdist(c, x, nx) ! ! Construct a 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist diff --git a/examples/moments.f90 b/examples/moments.f90 index 0cb925b..ac4b874 100644 --- a/examples/moments.f90 +++ b/examples/moments.f90 @@ -1,228 +1,228 @@ !> !> @file moments.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Compute moments of f(x), using its Spline representation ! MODULE globals USE bsplines USE matrix IMPLICIT NONE DOUBLE PRECISION, PARAMETER :: pi = 3.14159265359d0 DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), rhs(:), sol(:) DOUBLE PRECISION, ALLOCATABLE :: finteg(:,:), moms(:) TYPE(spline1d), SAVE :: splx TYPE(gbmat), SAVE :: mat CONTAINS !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat) ! ! Assembly FE matrix mat using spline spl ! TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=i+iw; jcol=i+jt CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) ! ! Mass matrix ! c(1) = 1.d0 idt(1) = 0 idw(1) = 0 END SUBROUTINE coefeq END SUBROUTINE dismat !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(spl, rhs) ! ! Assenbly the RHS using spline spl ! TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) ! ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(1:nrank) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) contrib = wgauss(igauss)*rhseq(xgauss(igauss)) rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(v) DOUBLE PRECISION, INTENT(in) :: v rhseq = SQRT(1.0d0/(2.0d0*pi)) * EXP(-0.5d0*v**2) END FUNCTION rhseq END SUBROUTINE disrhs END MODULE globals ! !================================================================================ PROGRAM main USE bsplines USE globals IMPLICIT NONE INTEGER :: nidbas, nx, nmoms, ngauss, rank, kl, ku INTEGER :: i DOUBLE PRECISION :: a, b, dx ! ! Input ! WRITE(*,'(a)') 'Enter, nidbas, a, b, nx, nmoms' READ(*,*) nidbas, a, b, nx, nmoms ! ! Equidistant mesh ! ALLOCATE(xgrid(0:nx)) dx = (b-a)/REAL(nx) DO i=0, nx xgrid(i) = a + i*dx END DO WRITE(*,'(a/(8(1pe12.4)))') 'XGRID', xgrid ! ! Set up spline ! ngauss = nidbas+1 CALL set_spline(nidbas, ngauss, xgrid, splx) ! ! Mass matrix ! CALL get_dim(splx, rank) ! Rank of the FE Mass matrix kl = nidbas ku = kl CALL init(kl, ku, rank, 1, mat) WRITE(*,'(a,3i6)') 'kl, ku, rank', kl, ku, rank CALL dismat(splx, mat) ! ! RHS ! ALLOCATE(rhs(rank), sol(rank)) CALL disrhs(splx, rhs) ! ! Solve for Spline coefs ! CALL factor(mat) CALL bsolve(mat, rhs, sol) WRITE(*,'(a/(8(1pe12.4)))') 'SOL', sol WRITE(*,'(a,1pe20.12)') ' Integral of sol using FINTG', fintg(splx, sol) ! ! Moments ! ALLOCATE(finteg(rank,0:nmoms), moms(0:nmoms)) CALL calc_integ(splx, finteg) DO i=0,nmoms moms(i) = DOT_PRODUCT(sol(:), finteg(:,i)) END DO WRITE(*,'(a,i3)') 'Moments of orders from 0 to', nmoms WRITE(*,'(8(1pe20.12))') moms ! DEALLOCATE(finteg, moms) DEALLOCATE(rhs, sol) DEALLOCATE(xgrid) CALL destroy(mat) CALL destroy_sp(splx) END PROGRAM main diff --git a/examples/optim1.f90 b/examples/optim1.f90 index 8db612e..017aead 100644 --- a/examples/optim1.f90 +++ b/examples/optim1.f90 @@ -1,138 +1,138 @@ !> !> @file optim1.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test and compare performance of using "spline" and ! "pp" forms. 1D case ! USE bsplines ! IMPLICIT NONE INTEGER :: nx, nidbas, nrank, npt=1000000 INTEGER :: i DOUBLE PRECISION :: dx DOUBLE PRECISION :: seconds, t0, t1 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, coefs DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fpt0(:), fpt1(:), fun(:, :) INTEGER, ALLOCATABLE :: left(:) TYPE(spline1d) :: splx ! NAMELIST /newrun/ nx, nidbas, npt !=============================================================================== ! ! 1D grid ! nx = 10 nidbas = 3 npt = 1000000 READ(*,newrun) WRITE(*,newrun) ALLOCATE(xgrid(0:nx)) dx = 1.0d0/REAL(nx) xgrid = (/ (i*dx,i=0,nx) /) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid ! ! Set up spline ! CALL set_spline(nidbas, 4, xgrid, splx) nrank = splx%dim WRITE(*,'(a, i5)') 'nrank =', nrank WRITE(*,'(a/(10f8.3))') 'knots', splx%knots ! ALLOCATE(xpt(npt)) ALLOCATE(left(npt)) ALLOCATE(fun(0:nidbas,0:1)) ! Values and first derivatives of all Splines CALL RANDOM_NUMBER(xpt) CALL locintv(splx, xpt, left) !=============================================================================== ! ! Check def_basfun_opt ! CALL basfun_recur(xpt(101), splx, fun, left(101)+1) WRITE(*,'(/a,f20.15,i4/(2f20.15))') 'BASFUN_RECUR at X=', xpt(101), left(101),& & (fun(:,i), i=0,1) ! !!$ CALL def_basfun(xpt(101), splx, fun) CALL basfun(xpt(101), splx, fun, left(101)+1) WRITE(*,'(/a,f20.15/(2f20.15))') 'DEF_BASFUN at X=', xpt(101), & & (fun(:,i), i=0,1) ! ! Performance of basis function computations ! t0 = seconds() DO i=1,npt CALL basfun_recur(xpt(i), splx, fun, left(i)+1) END DO WRITE(*,'(/a,1pe12.3)') 'BASFUN_RECUR time (s)', (seconds()-t0)/REAL(npt) ! t0 = seconds() DO i=1,npt !!$ CALL def_basfun(xpt(i), splx, fun) CALL basfun(xpt(i), splx, fun, left(i)+1) END DO WRITE(*,'(/a,1pe12.3)') 'DEF_BASFUN time (s)', (seconds()-t0)/REAL(npt) !=============================================================================== ! ! Check and performance of GRIDVAL using PPFORM and SPLINE expansion ! ALLOCATE(coefs(nrank)) DEALLOCATE(xpt) ALLOCATE(xpt(npt), fpt0(npt), fpt1(npt)) CALL RANDOM_NUMBER(xpt) ! splx%nlppform = .TRUE. coefs = 1.0d0 ! CALL gridval(splx, xpt(1:1), fpt0(1:1), 0, coefs) ! t0 = seconds() CALL gridval(splx, xpt, fpt1, 1) CALL gridval(splx, xpt, fpt0, 0) t1 = seconds()-t0 WRITE(*,'(/a,2(1pe12.3))') 'PPFORM: Max errors', & & MAXVAL(ABS(fpt0-1.0d0)) ,MAXVAL(ABS(fpt1)) WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) ! splx%nlppform = .FALSE. coefs = 1.0d0 CALL gridval(splx, xpt(1:1), fpt0(1:1), 0, coefs) t0 = seconds() CALL gridval(splx, xpt, fpt1, 1) CALL gridval(splx, xpt, fpt0, 0) t1 = seconds()-t0 WRITE(*,'(/a,2(1pe12.3))') 'SPLINES: Max errors', & & MAXVAL(ABS(fpt0-1.0d0)) ,MAXVAL(ABS(fpt1)) WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) !=============================================================================== ! ! Clean up ! CALL destroy_sp(splx) DEALLOCATE(xgrid, coefs) DEALLOCATE(xpt, fpt0, fpt1) DEALLOCATE(fun) END PROGRAM main diff --git a/examples/optim2.f90 b/examples/optim2.f90 index ffb9773..bf18a77 100644 --- a/examples/optim2.f90 +++ b/examples/optim2.f90 @@ -1,119 +1,119 @@ !> !> @file optim2.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test and compare performance of using "spline" and ! "pp" forms. 2D case ! USE bsplines ! IMPLICIT NONE INTEGER :: nx, ny, ngauss(2), nidbas(2) INTEGER :: npt, d1, d2 INTEGER :: i, j DOUBLE PRECISION :: pi, dx, dy DOUBLE PRECISION :: seconds, t0, t1 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fpt00(:), fpt01(:), fpt10(:) TYPE(spline2d) :: splxy ! NAMELIST /newrun/ nx, ny, nidbas, npt !=============================================================================== ! ! 2D grid ! nx = 8 ny = 8 nidbas = (/ 3, 3 /) npt = 1000000 READ(*,newrun) WRITE(*,newrun) ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) dx = 1.0d0/REAL(nx) xgrid = (/ (i*dx,i=0,nx) /) dy = 2.0d0*pi/REAL(ny) ygrid = (/ (j*dy,j=0,ny) /) ! WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) ! ! Set up spline ! ngauss = 4 CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) d1 = splxy%sp1%dim d2 = splxy%sp2%dim WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2 WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots !=============================================================================== ! ! Check and performance of GRIDVAL using PPFORM and SPLINE expansion ! ALLOCATE(xpt(npt), ypt(npt)) CALL RANDOM_NUMBER(xpt) CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt ! ALLOCATE(coefs(d1,d2)) ALLOCATE(fpt00(npt), fpt01(npt), fpt10(npt)) ! coefs = 1.0d0 ! splxy%sp1%nlppform = .TRUE. splxy%sp2%nlppform = .TRUE. CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/), coefs) ! t0 = seconds() CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/)) CALL gridval(splxy, xpt, ypt, fpt01, (/0,1/)) CALL gridval(splxy, xpt, ypt, fpt10, (/1,0/)) t1 = seconds()-t0 WRITE(*,'(/a,3(1pe12.3))') 'PPFORM: Max errors', & & MAXVAL(ABS(fpt00-1.0d0)), MAXVAL(ABS(fpt01)), MAXVAL(ABS(fpt10)) WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) ! splxy%sp1%nlppform = .FALSE. splxy%sp2%nlppform = .FALSE. CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/), coefs) t0 = seconds() CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/)) CALL gridval(splxy, xpt, ypt, fpt01, (/0,1/)) CALL gridval(splxy, xpt, ypt, fpt10, (/1,0/)) t1 = seconds()-t0 WRITE(*,'(/a,3(1pe12.3))') 'SPLINES: Max errors', & & MAXVAL(ABS(fpt00-1.0d0)), MAXVAL(ABS(fpt01)), MAXVAL(ABS(fpt10)) WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) !=============================================================================== ! ! Clean up ! CALL destroy_sp(splxy) DEALLOCATE(xgrid, ygrid, coefs) DEALLOCATE(xpt, ypt, fpt00, fpt01, fpt10) END PROGRAM main diff --git a/examples/optim3.f90 b/examples/optim3.f90 index 9a617a0..5c50aba 100644 --- a/examples/optim3.f90 +++ b/examples/optim3.f90 @@ -1,137 +1,137 @@ !> !> @file optim3.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test and compare performance of using "spline" and ! "pp" forms. 2D1D case ! USE bsplines ! IMPLICIT NONE INTEGER :: nx, ny, nz, ngauss(3), nidbas(3) INTEGER :: npt, d1, d2, d3 INTEGER :: i, j, k DOUBLE PRECISION :: pi, dx, dy, dz DOUBLE PRECISION :: seconds, t0, t1 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: coefs DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), zpt(:) DOUBLE PRECISION, ALLOCATABLE :: fpt000(:), fpt100(:), fpt010(:), fpt001(:) TYPE(spline2d1d) :: splxyz ! NAMELIST /newrun/ nx, ny, nz, nidbas, npt !=============================================================================== ! ! 2D grid ! nx = 8 ny = 8 nz = 8 nidbas = (/ 3, 3, 3 /) npt = 1000000 READ(*,newrun) WRITE(*,newrun) ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz)) dx = 1.0d0/REAL(nx) xgrid = (/ (i*dx,i=0,nx) /) dy = 2.0d0*pi/REAL(ny) ygrid = (/ (j*dy,j=0,ny) /) dz = 2.0d0*pi/REAL(nz) zgrid = (/ (k*dz,k=0,nz) /) ! WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz) ! ! Set up spline ! ngauss = 4 CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid, splxyz, & & (/.FALSE., .TRUE., .TRUE./)) d1 = splxyz%sp12%sp1%dim d2 = splxyz%sp12%sp2%dim d3 = splxyz%sp3%dim WRITE(*,'(a,3i4)') 'd1, d2, d3 =', d1, d2, d3 WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxyz%sp12%sp1%knots WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxyz%sp12%sp2%knots WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Z', splxyz%sp3%knots !=============================================================================== ! ! Check and performance of GRIDVAL using PPFORM and SPLINE expansion ! ALLOCATE(xpt(npt), ypt(npt), zpt(npt)) CALL RANDOM_NUMBER(xpt) CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt CALL RANDOM_NUMBER(zpt); zpt = 2.0d0*pi*zpt ! ALLOCATE(coefs(d1,d2,d3)) ALLOCATE(fpt000(npt), fpt100(npt), fpt010(npt), fpt001(npt)) ! coefs = 1.0d0 ! splxyz%sp12%sp1%nlppform = .TRUE. splxyz%sp12%sp2%nlppform = .TRUE. splxyz%sp3%nlppform = .TRUE. CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/), coefs) t0 = seconds() CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/)) CALL gridval(splxyz, xpt, ypt, zpt, fpt100, (/1,0,0/)) CALL gridval(splxyz, xpt, ypt, zpt, fpt010, (/0,1,0/)) CALL gridval(splxyz, xpt, ypt, zpt, fpt001, (/0,0,1/)) t1 = seconds()-t0 WRITE(*,'(/a,4(1pe12.3))') 'PPFORM: Max errors', & & MAXVAL(ABS(fpt000-1.0d0)), & & MAXVAL(ABS(fpt100)), & & MAXVAL(ABS(fpt010)), & & MAXVAL(ABS(fpt001)) WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) ! splxyz%sp12%sp1%nlppform = .FALSE. splxyz%sp12%sp2%nlppform = .FALSE. splxyz%sp3%nlppform = .FALSE. CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/), coefs) t0 = seconds() CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/)) CALL gridval(splxyz, xpt, ypt, zpt, fpt100, (/1,0,0/)) CALL gridval(splxyz, xpt, ypt, zpt, fpt010, (/0,1,0/)) CALL gridval(splxyz, xpt, ypt, zpt, fpt001, (/0,0,1/)) t1 = seconds()-t0 WRITE(*,'(/a,4(1pe12.3))') 'SPLINES: Max errors', & & MAXVAL(ABS(fpt000-1.0d0)), & & MAXVAL(ABS(fpt100)), & & MAXVAL(ABS(fpt010)), & & MAXVAL(ABS(fpt001)) WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) !=============================================================================== ! ! Clean up ! CALL destroy_sp(splxyz) DEALLOCATE(xgrid, ygrid, zgrid, coefs) DEALLOCATE(xpt, ypt, fpt000,fpt100, fpt010, fpt001) END PROGRAM main diff --git a/examples/pde1d.f90 b/examples/pde1d.f90 index 86584b8..74fd2ea 100644 --- a/examples/pde1d.f90 +++ b/examples/pde1d.f90 @@ -1,419 +1,419 @@ !> !> @file pde1d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 1d differential eqation using splines: ! ! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 ! exact solution: f(r) = 1 - r^k ! USE bsplines USE matrix USE futils USE conmat_mod IMPLICIT NONE INTEGER :: nx, nidbas, ngauss, kdiff INTEGER :: i, nrank, kl, ku LOGICAL :: nlppform DOUBLE PRECISION :: coefs(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol TYPE(spline1d) :: splx TYPE(gbmat) :: mat ! CHARACTER(len=128) :: file='pde1d.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: solcal, solana, errsol DOUBLE PRECISION :: seconds, t0, tmat, tfact, tsolv ! INTERFACE SUBROUTINE dismat(spl, mat) USE bsplines USE matrix TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat END SUBROUTINE dismat SUBROUTINE disrhs(kdiff, spl, rhs) USE bsplines INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) END SUBROUTINE disrhs SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist END INTERFACE ! NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval kdiff = 2 ! Exponent of differential problem nlppform = .TRUE. ! Use PPFORM for gridval or not coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! see function FDIST in MESHDIST ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) xgrid(0) = 0.0d0 xgrid(nx) = 1.0d0 CALL meshdist(coefs, xgrid, nx) WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE1D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL attach(fid, '/', 'NGAUSS', ngauss) CALL attach(fid, '/', 'KDIFF', kdiff) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) CALL get_dim(splx, nrank) ! Rank of the FE matrix WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid ! ! FE matrix assembly ! kl = nidbas ku = kl CALL init(kl, ku, nrank, 1, mat) WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank !!$ CALL dismat(splx, mat) CALL conmat(splx, mat, coefeq) ! ALLOCATE(arr(nrank)) !!$ WRITE(*,'(/a)') 'Matrice before BC' !!$ DO i=1,nrank !!$ CALL getrow(mat, i, arr) !!$ WRITE(*,'(12f8.3)') arr, SUM(arr) !!$ END DO ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(kdiff, splx, rhs) !!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs ! ! Set BC f(r=1) = 0 on matrix ! arr(1:nrank-1) = 0.0d0 arr(nrank) = 1.0d0 CALL putrow(mat, nrank, arr) CALL putcol(mat, nrank, arr) tmat = seconds() - t0 !!$ WRITE(*,'(/a)') 'Matrice after BC' !!$ DO i=1,nrank !!$ CALL getrow(mat, i, arr) !!$ WRITE(*,'(12f8.3)') arr !!$ END DO ! ! Set BC f(r=1) = 0 on RHS ! rhs(nrank) = 0.0d0 !!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS after BC', rhs CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') CALL putarr(fid, '/MAT', mat%val, 'GB matrice with BC') CALL attach(fid, '/MAT', 'KL', mat%kl) CALL attach(fid, '/MAT', 'KU', mat%ku) CALL attach(fid, '/MAT', 'RANK', mat%rank) !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL factor(mat) tfact = seconds() - t0 t0 = seconds() CALL bsolve(mat, rhs, sol) tsolv = seconds() - t0 !!$ WRITE(*,'(/a,/(12(f8.3)))') 'SOL', sol CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') ! WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splx, sol), & & fintg(splx, sol)-REAL(kdiff,8)/REAL(kdiff+1,8) !=========================================================================== ! 4.0 Check the solution and its 1st derivative ! ALLOCATE(solcal(0:nx,0:2), solana(0:nx,0:2), errsol(0:nx,0:2)) DO i =0,nx solana(i,0) = 1.0d0-xgrid(i)**kdiff solana(i,1) = -kdiff*xgrid(i)**(kdiff-1) solana(i,2) = -kdiff*(kdiff-1)*xgrid(i)**(kdiff-2) END DO CALL gridval(splx, xgrid, solcal(:,0), 0, sol) ! Compute PPFORM and grid values CALL gridval(splx, xgrid, solcal(:,1), 1) ! 1st derivative CALL gridval(splx, xgrid, solcal(:,2), 2) ! 2nd derivative errsol = solana - solcal ! CALL putarr(fid, '/XGRID', xgrid) CALL putarr(fid, '/SOLCAL', solcal) CALL putarr(fid, '/SOLANA', solana) CALL putarr(fid, '/ERR', errsol) ! CALL creatg(fid, '/spline') CALL attach(fid, '/spline', 'order', splx%order) CALL putarr(fid, '/spline/knots', splx%knots, 'Spline knots') WRITE(*,'(a,3(1pe12.3))') 'Rel. discretization errors (solution and derivatives).', & & (SQRT( DOT_PRODUCT(errsol(:,i),errsol(:,i)) / & & DOT_PRODUCT(solana(:,i),solana(:,i)) ), i=0,2) ! WRITE(*,'(a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(arr) CALL destroy(mat) CALL destroy_sp(splx) CALL closef(fid) CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function defined in FDIST ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ SUBROUTINE dismat(spl, mat) ! ! Assembly FE matrix mat using spline spl ! USE bsplines USE matrix IMPLICIT NONE TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=i+iw; jcol=i+jt CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !+++ SUBROUTINE disrhs(kdiff, spl, rhs) ! ! Assenbly the RHS using spline spl ! USE bsplines IMPLICIT NONE INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, left DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) !!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas ! ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) !!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(1:nrank) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) !!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) left = i-1 !!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x,k) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: k rhseq = k*k*x**(k-1) END FUNCTION rhseq END SUBROUTINE disrhs diff --git a/examples/pde1d_eig.f90 b/examples/pde1d_eig.f90 index 698efea..4105b1b 100644 --- a/examples/pde1d_eig.f90 +++ b/examples/pde1d_eig.f90 @@ -1,459 +1,459 @@ !> !> @file pde1d_eig.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 1d differential eqation using splines: ! ! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 ! exact solution: f(r) = 1 - r^k ! USE bsplines USE matrix USE futils USE conmat_mod IMPLICIT NONE INTEGER :: nx, nidbas, ngauss, kdiff INTEGER :: i, nrank, kl, ku LOGICAL :: nlppform DOUBLE PRECISION :: coefs(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol TYPE(spline1d) :: splx TYPE(gemat) :: mat !!$ TYPE(gbmat) :: mat ! CHARACTER(len=128) :: file='pde1d.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack ! DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:) INTEGER :: j, info ! INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, & & lworkl=maxncv*(maxncv+8), zero=0.0d0 DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), & & d(maxncv,2), resid(maxn), w(maxn), & & tol=0.0d0, sigma DOUBLE PRECISION, EXTERNAL :: dnrm2 INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, & & mode1=1, ierr INTEGER :: ido, ipntr(11), iparam(11) CHARACTER(len=1) :: bmat='I' CHARACTER(len=2) :: which='SA' LOGICAL :: rvec, select(maxncv) ! INTERFACE SUBROUTINE dismat(spl, mat) USE bsplines USE matrix TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat END SUBROUTINE dismat SUBROUTINE disrhs(kdiff, spl, rhs) USE bsplines INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) END SUBROUTINE disrhs SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist END INTERFACE ! NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval kdiff = 2 ! Exponent of differential problem nlppform = .TRUE. ! Use PPFORM for gridval or not coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! see function FDIST in MESHDIST ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) xgrid(0) = 0.0d0 xgrid(nx) = 1.0d0 CALL meshdist(coefs, xgrid, nx) WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE1D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL attach(fid, '/', 'NGAUSS', ngauss) CALL attach(fid, '/', 'KDIFF', kdiff) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) CALL get_dim(splx, nrank) ! Rank of the FE matrix WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid ! ! FE matrix assembly ! kl = nidbas ku = kl !!$ CALL init(kl, ku, nrank, 1, mat) WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank CALL init(nrank, 1, mat) !!$ CALL dismat(splx, mat) CALL conmat(splx, mat, coefeq) ! ALLOCATE(arr(nrank)) !!$ WRITE(*,'(/a)') 'Matrice before BC' !!$ DO i=1,nrank !!$ CALL getrow(mat, i, arr) !!$ WRITE(*,'(12f8.3)') arr, SUM(arr) !!$ END DO ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(kdiff, splx, rhs) !!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs ! ! Set BC f(r=1) = 0 on matrix ! arr(1:nrank-1) = 0.0d0 arr(nrank) = 1.0d0 CALL putrow(mat, nrank, arr) CALL putcol(mat, nrank, arr) tmat = seconds() - t0 ! !=========================================================================== ! 3.0 Eigevalue problem ! ! Using Lapack dsyev ! t0 = seconds() ALLOCATE(mata(nrank,nrank)) ALLOCATE(eigvals(nrank)) ALLOCATE(work(3*nrank)) mata=0.0d0 DO j=1,nrank mata(j:nrank,j) = mat%val(j:nrank,j) END DO CALL putarr(fid, '/MAT', mata, 'matrix A') CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info) teig = seconds() - t0 PRINT*,'Info from DSYEV', info, arr(1) IF(info.EQ.0) THEN CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A') CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A') WRITE(*,'(a/(10f10.4))') 'eigval', eigvals END IF ! ! Using Arpack ! ido = 0 iparam(1) = ishfts iparam(3) = maxitr iparam(7) = mode1 ! IF(nev.GT.0) THEN t0 = seconds() DO ! ARPACK reverse communication loop CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, & & ncv, v, maxn, iparam, ipntr, workd, workl,& & lworkl, info) IF(ido.EQ.-1 .OR. ido.EQ.1) THEN !!$ WRITE(*,'(a/(10i4))') 'ipntr', ipntr CALL av(nrank,workd(ipntr(1)), workd(ipntr(2))) CYCLE END IF IF(info .LT. 0) THEN PRINT*, 'Error in _saupd with info =', info ELSE rvec = .TRUE. CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, & & bmat, nrank, which, nev, tol, resid, ncv, v, maxn, & & iparam, ipntr, workd, workl, lworkl, ierr ) IF( ierr .NE. 0 ) THEN PRINT*,'Error in _seupd with ierr =', ierr ELSE nconv = iparam(5) PRINT*, '--- eigenvalues and error ---' DO j=1,nconv ! Residual norms !!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue !!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1) !!$ d(j,2) = dnrm2(nrank, w, 1) !!$ d(j,2) = d(j,2)/abs(d(j,1)) WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1) !!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A') !!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A') END DO EXIT END IF END IF END DO ! End of ARPACK reverse communication loop tarpack = seconds()-t0 END IF !=========================================================================== ! 9.0 Epilogue ! WRITE(*,'(a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(arr) CALL destroy(mat) CALL destroy_sp(splx) CALL closef(fid) CONTAINS SUBROUTINE av(n,v,w) ! ! Matrix vector product: w <- Av INTEGER, INTENT(in) :: n DOUBLE PRECISION, INTENT(in) :: v(*) DOUBLE PRECISION, INTENT(out) :: w(*) w(1:n) = vmx(mat,v(1:n)) END SUBROUTINE av SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function defined in FDIST ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ SUBROUTINE dismat(spl, mat) ! ! Assembly FE matrix mat using spline spl ! USE bsplines USE matrix IMPLICIT NONE TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=i+iw; jcol=i+jt CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !+++ SUBROUTINE disrhs(kdiff, spl, rhs) ! ! Assenbly the RHS using spline spl ! USE bsplines IMPLICIT NONE INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, left DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) !!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas ! ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) !!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(1:nrank) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) !!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) left = i-1 !!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x,k) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: k rhseq = k*k*x**(k-1) END FUNCTION rhseq END SUBROUTINE disrhs diff --git a/examples/pde1d_eig_csr.f90 b/examples/pde1d_eig_csr.f90 index 02996ca..d8e4067 100644 --- a/examples/pde1d_eig_csr.f90 +++ b/examples/pde1d_eig_csr.f90 @@ -1,469 +1,469 @@ !> !> @file pde1d_eig_csr.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 1d differential eqation using splines: ! ! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 ! exact solution: f(r) = 1 - r^k ! USE bsplines USE csr USE futils USE conmat_mod IMPLICIT NONE INTEGER :: nx, nidbas, ngauss, kdiff INTEGER :: i, nrank, kl, ku LOGICAL :: nlppform DOUBLE PRECISION :: coefs(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol TYPE(spline1d) :: splx TYPE(csr_mat) :: mat !!$ TYPE(gemat) :: mat !!$ TYPE(gbmat) :: mat ! CHARACTER(len=128) :: file='pde1d.h5' INTEGER :: fid, ffid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack ! DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:) INTEGER :: j, info ! INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, & & lworkl=maxncv*(maxncv+8), zero=0.0d0 DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), & & d(maxncv,2), resid(maxn), w(maxn), & & tol=0.0d0, sigma DOUBLE PRECISION, EXTERNAL :: dnrm2 INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, & & mode1=1, ierr INTEGER :: ido, ipntr(11), iparam(11) CHARACTER(len=1) :: bmat='I' CHARACTER(len=2) :: which='SA' LOGICAL :: rvec, select(maxncv) ! INTERFACE SUBROUTINE dismat(spl, mat) USE bsplines USE matrix TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat END SUBROUTINE dismat SUBROUTINE disrhs(kdiff, spl, rhs) USE bsplines INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) END SUBROUTINE disrhs SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist END INTERFACE ! NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval kdiff = 2 ! Exponent of differential problem nlppform = .TRUE. ! Use PPFORM for gridval or not coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! see function FDIST in MESHDIST ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) xgrid(0) = 0.0d0 xgrid(nx) = 1.0d0 CALL meshdist(coefs, xgrid, nx) WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE1D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL attach(fid, '/', 'NGAUSS', ngauss) CALL attach(fid, '/', 'KDIFF', kdiff) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) CALL get_dim(splx, nrank) ! Rank of the FE matrix WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid ! ! FE matrix assembly ! kl = nidbas ku = kl WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank CALL init(nrank, 1, mat) !!$ CALL dismat(splx, mat) CALL conmat(splx, mat, coefeq) CALL to_mat(mat) CALL creatf('pde1d_eig.h5', ffid, 'PDE1D Result File') PRINT*, 'rank', mat%rank PRINT*, 'nnz', mat%nnz PRINT*, 'irow', mat%irow PRINT*, 'cols', mat%cols CALL putmat(ffid,'/MAT',mat,'FE matrix') CALL closef(ffid) ! ! ALLOCATE(arr(nrank)) !!$ WRITE(*,'(/a)') 'Matrice before BC' !!$ DO i=1,nrank !!$ CALL getrow(mat, i, arr) !!$ WRITE(*,'(12f8.3)') arr, SUM(arr) !!$ END DO ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(kdiff, splx, rhs) !!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs !!$! !!$! Set BC f(r=1) = 0 on matrix !!$! !!$ arr(1:nrank-1) = 0.0d0 !!$ arr(nrank) = 1.0d0 !!$ CALL putrow(mat, nrank, arr) !!$ CALL putcol(mat, nrank, arr) CALL putmat(fid,'/MATA', mat, 'FE matrix') tmat = seconds() - t0 ! !=========================================================================== ! 3.0 Eigevalue problem ! ! Using Lapack dsyev ! t0 = seconds() ALLOCATE(mata(nrank,nrank)) ALLOCATE(eigvals(nrank)) ALLOCATE(work(3*nrank)) mata=0.0d0 DO j=1,nrank CALL getcol(mat, j, mata(:,j)) ! mata is a dense matrix END DO CALL putarr(fid, '/MAT', mata, 'matrix A') CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info) teig = seconds() - t0 PRINT*,'Info from DSYEV', info, arr(1) IF(info.EQ.0) THEN CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A') CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A') WRITE(*,'(a/(10f10.4))') 'eigval', eigvals END IF ! ! Using Arpack ! ido = 0 iparam(1) = ishfts iparam(3) = maxitr iparam(7) = mode1 ! IF(nev.GT.0) THEN t0 = seconds() DO ! ARPACK reverse communication loop CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, & & ncv, v, maxn, iparam, ipntr, workd, workl,& & lworkl, info) IF(ido.EQ.-1 .OR. ido.EQ.1) THEN !!$ WRITE(*,'(a/(10i4))') 'ipntr', ipntr CALL av(nrank,workd(ipntr(1)), workd(ipntr(2))) CYCLE END IF IF(info .LT. 0) THEN PRINT*, 'Error in _saupd with info =', info ELSE rvec = .TRUE. CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, & & bmat, nrank, which, nev, tol, resid, ncv, v, maxn, & & iparam, ipntr, workd, workl, lworkl, ierr ) IF( ierr .NE. 0 ) THEN PRINT*,'Error in _seupd with ierr =', ierr ELSE nconv = iparam(5) PRINT*, '--- eigenvalues and error ---' DO j=1,nconv ! Residual norms !!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue !!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1) !!$ d(j,2) = dnrm2(nrank, w, 1) !!$ d(j,2) = d(j,2)/abs(d(j,1)) WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1) !!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A') !!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A') END DO EXIT END IF END IF END DO ! End of ARPACK reverse communication loop tarpack = seconds()-t0 END IF !=========================================================================== ! 9.0 Epilogue ! WRITE(*,'(a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(arr) CALL destroy(mat) CALL destroy_sp(splx) CALL closef(fid) CONTAINS SUBROUTINE av(n,v,w) ! ! Matrix vector product: w <- Av INTEGER, INTENT(in) :: n DOUBLE PRECISION, INTENT(in) :: v(*) DOUBLE PRECISION, INTENT(out) :: w(*) w(1:n) = vmx(mat,v(1:n)) END SUBROUTINE av SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function defined in FDIST ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ SUBROUTINE dismat(spl, mat) ! ! Assembly FE matrix mat using spline spl ! USE bsplines USE matrix IMPLICIT NONE TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=i+iw; jcol=i+jt CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !+++ SUBROUTINE disrhs(kdiff, spl, rhs) ! ! Assenbly the RHS using spline spl ! USE bsplines IMPLICIT NONE INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, left DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) !!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas ! ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) !!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(1:nrank) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) !!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) left = i-1 !!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x,k) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: k rhseq = k*k*x**(k-1) END FUNCTION rhseq END SUBROUTINE disrhs diff --git a/examples/pde1d_eig_gb.f90 b/examples/pde1d_eig_gb.f90 index c80f22d..4b1224f 100644 --- a/examples/pde1d_eig_gb.f90 +++ b/examples/pde1d_eig_gb.f90 @@ -1,460 +1,460 @@ !> !> @file pde1d_eig_gb.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 1d differential eqation using splines: ! ! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 ! exact solution: f(r) = 1 - r^k ! USE bsplines USE matrix USE futils USE conmat_mod IMPLICIT NONE INTEGER :: nx, nidbas, ngauss, kdiff INTEGER :: i, nrank, kl, ku LOGICAL :: nlppform DOUBLE PRECISION :: coefs(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol TYPE(spline1d) :: splx TYPE(gemat) :: mat !!$ TYPE(gbmat) :: mat ! CHARACTER(len=128) :: file='pde1d.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack ! DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:) INTEGER :: j, info ! INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, & & lworkl=maxncv*(maxncv+8), zero=0.0d0 DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), & & d(maxncv,2), resid(maxn), w(maxn), & & tol=0.0d0, sigma DOUBLE PRECISION, EXTERNAL :: dnrm2 INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, & & mode1=1, ierr INTEGER :: ido, ipntr(11), iparam(11) CHARACTER(len=1) :: bmat='I' CHARACTER(len=2) :: which='SA' LOGICAL :: rvec, select(maxncv) ! INTERFACE SUBROUTINE dismat(spl, mat) USE bsplines USE matrix TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat END SUBROUTINE dismat SUBROUTINE disrhs(kdiff, spl, rhs) USE bsplines INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) END SUBROUTINE disrhs SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist END INTERFACE ! NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval kdiff = 2 ! Exponent of differential problem nlppform = .TRUE. ! Use PPFORM for gridval or not coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! see function FDIST in MESHDIST ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) xgrid(0) = 0.0d0 xgrid(nx) = 1.0d0 CALL meshdist(coefs, xgrid, nx) WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE1D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL attach(fid, '/', 'NGAUSS', ngauss) CALL attach(fid, '/', 'KDIFF', kdiff) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) CALL get_dim(splx, nrank) ! Rank of the FE matrix WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid ! ! FE matrix assembly ! kl = nidbas ku = kl !!$ CALL init(kl, ku, nrank, 1, mat) !!$ WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank CALL init(nrank, 1, mat) !!$ CALL dismat(splx, mat) CALL conmat(splx, mat, coefeq) ! ALLOCATE(arr(nrank)) !!$ WRITE(*,'(/a)') 'Matrice before BC' !!$ DO i=1,nrank !!$ CALL getrow(mat, i, arr) !!$ WRITE(*,'(12f8.3)') arr, SUM(arr) !!$ END DO ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(kdiff, splx, rhs) !!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs ! ! Set BC f(r=1) = 0 on matrix ! arr(1:nrank-1) = 0.0d0 arr(nrank) = 1.0d0 CALL putrow(mat, nrank, arr) CALL putcol(mat, nrank, arr) tmat = seconds() - t0 ! !=========================================================================== ! 3.0 Eigevalue problem ! ! Using Lapack dsyev ! t0 = seconds() ALLOCATE(mata(nrank,nrank)) ALLOCATE(eigvals(nrank)) ALLOCATE(work(3*nrank)) mata=0.0d0 DO j=1,nrank mata(j:nrank,j) = mat%val(j:nrank,j) END DO CALL putarr(fid, '/MAT', mata, 'matrix A') CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info) teig = seconds() - t0 PRINT*,'Info from DSYEV', info, arr(1) IF(info.EQ.0) THEN CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A') CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A') WRITE(*,'(a/(10f10.4))') 'eigval', eigvals END IF ! ! Using Arpack ! ido = 0 iparam(1) = ishfts iparam(3) = maxitr iparam(7) = mode1 ! IF(nev.GT.0) THEN t0 = seconds() DO ! ARPACK reverse communication loop CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, & & ncv, v, maxn, iparam, ipntr, workd, workl,& & lworkl, info) IF(ido.EQ.-1 .OR. ido.EQ.1) THEN PRINT*, 'Error in _saupd with info =', info WRITE(*,'(a/(10i4))') 'ipntr', ipntr CALL av(nrank,workd(ipntr(1)), workd(ipntr(2))) CYCLE END IF IF(info .LT. 0) THEN PRINT*, 'Error in _saupd with info =', info ELSE rvec = .TRUE. CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, & & bmat, nrank, which, nev, tol, resid, ncv, v, maxn, & & iparam, ipntr, workd, workl, lworkl, ierr ) IF( ierr .NE. 0 ) THEN PRINT*,'Error in _seupd with ierr =', ierr ELSE nconv = iparam(5) PRINT*, '--- eigenvalues and error ---' DO j=1,nconv ! Residual norms !!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue !!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1) !!$ d(j,2) = dnrm2(nrank, w, 1) !!$ d(j,2) = d(j,2)/abs(d(j,1)) WRITE(*,'(i3,2(1pe12.4))') j, d(j,1), eigvals(j)-d(j,1) !!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A') !!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A') END DO EXIT END IF END IF END DO ! End of ARPACK reverse communication loop tarpack = seconds()-t0 END IF !=========================================================================== ! 9.0 Epilogue ! WRITE(*,'(a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(arr) CALL destroy(mat) CALL destroy_sp(splx) CALL closef(fid) CONTAINS SUBROUTINE av(n,v,w) ! ! Matrix vector product: w <- Av INTEGER, INTENT(in) :: n DOUBLE PRECISION, INTENT(in) :: v(*) DOUBLE PRECISION, INTENT(out) :: w(*) w(1:n) = vmx(mat,v(1:n)) END SUBROUTINE av SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function defined in FDIST ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ SUBROUTINE dismat(spl, mat) ! ! Assembly FE matrix mat using spline spl ! USE bsplines USE matrix IMPLICIT NONE TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=i+iw; jcol=i+jt CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !+++ SUBROUTINE disrhs(kdiff, spl, rhs) ! ! Assenbly the RHS using spline spl ! USE bsplines IMPLICIT NONE INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, left DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) !!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas ! ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) !!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(1:nrank) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) !!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) left = i-1 !!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x,k) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: k rhseq = k*k*x**(k-1) END FUNCTION rhseq END SUBROUTINE disrhs diff --git a/examples/pde1d_eig_ge.f90 b/examples/pde1d_eig_ge.f90 index 713a66e..a36f688 100644 --- a/examples/pde1d_eig_ge.f90 +++ b/examples/pde1d_eig_ge.f90 @@ -1,474 +1,474 @@ !> !> @file pde1d_eig_ge.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 1d differential eqation using splines: ! ! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 ! exact solution: f(r) = 1 - r^k ! USE bsplines USE matrix USE futils USE conmat_mod IMPLICIT NONE INTEGER :: nx, nidbas, ngauss, kdiff INTEGER :: i, nrank, kl, ku LOGICAL :: nlppform DOUBLE PRECISION :: coefs(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol TYPE(spline1d) :: splx TYPE(gemat) :: mat ! CHARACTER(len=128) :: file='pde1d.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack ! DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvecsr(:,:), eigvecsl(:,:), & & wr(:), wi(:), work(:) INTEGER :: j, info ! INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=maxn DOUBLE PRECISION :: v(maxn,maxncv),workd(3*maxn), workev(3*maxncv), & & d(maxncv,2), resid(maxn), w(maxncv,maxn), & & zero=0.0d0, tol=0.0d0, sigmar, sigmai DOUBLE PRECISION, ALLOCATABLE :: workl(:) DOUBLE PRECISION, EXTERNAL :: dnrm2 INTEGER :: nev=10, ncv=30, ishfts=1, maxitr=300, nconv, & & mode1=1, ierr, lworkl INTEGER :: ido, ipntr(11), iparam(11) CHARACTER(len=1) :: bmat='I' CHARACTER(len=2) :: which='SA' LOGICAL :: rvec, select(maxncv) ! INTERFACE SUBROUTINE dismat(spl, mat) USE bsplines USE matrix TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat END SUBROUTINE dismat SUBROUTINE disrhs(kdiff, spl, rhs) USE bsplines INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) END SUBROUTINE disrhs SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist END INTERFACE ! NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, & & nev, ncv, which !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval kdiff = 2 ! Exponent of differential problem nlppform = .TRUE. ! Use PPFORM for gridval or not coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! see function FDIST in MESHDIST ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) xgrid(0) = 0.0d0 xgrid(nx) = 1.0d0 CALL meshdist(coefs, xgrid, nx) WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE1D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL attach(fid, '/', 'NGAUSS', ngauss) CALL attach(fid, '/', 'KDIFF', kdiff) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) CALL get_dim(splx, nrank) ! Rank of the FE matrix WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid ! ! FE matrix assembly ! kl = nidbas ku = kl !!$ CALL init(kl, ku, nrank, 1, mat) WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank CALL init(nrank, 1, mat) !!$ CALL dismat(splx, mat) CALL conmat(splx, mat, coefeq) ! ALLOCATE(arr(nrank)) !!$ WRITE(*,'(/a)') 'Matrice before BC' !!$ DO i=1,nrank !!$ CALL getrow(mat, i, arr) !!$ WRITE(*,'(12f8.3)') arr, SUM(arr) !!$ END DO ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(kdiff, splx, rhs) !!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs ! ! Set BC f(r=1) = 0 on matrix ! arr(1:nrank-1) = 0.0d0 arr(nrank) = 1.0d0 CALL putrow(mat, nrank, arr) CALL putcol(mat, nrank, arr) tmat = seconds() - t0 ! !=========================================================================== ! 3.0 Eigevalue problem ! ! Using Lapack dgeev ! t0 = seconds() ALLOCATE(mata(nrank,nrank)) ALLOCATE(eigvecsr(nrank,nrank)) ALLOCATE(eigvecsl(nrank,nrank)) ALLOCATE(work(4*nrank)) ALLOCATE(wr(nrank), wi(nrank)) mata(:,:) = mat%val(:,:) CALL putarr(fid, '/MAT', mata, 'matrix A') !!$ CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info) CALL dgeev('N', 'V', nrank, mata, nrank, wr, wi, eigvecsl, SIZE(eigvecsl,1), & & eigvecsr, SIZE(eigvecsr,1), work, SIZE(work), info) teig = seconds() - t0 PRINT*,'Info from DGEEV', info, arr(1) IF(info.EQ.0) THEN CALL putarr(fid, '/REIGVS', wr, 'Real of eigenvalues of A') CALL putarr(fid, '/IEIGVS', wi, 'Imag of eigenvalues of A') CALL putarr(fid, '/EIGVECL', eigvecsl, 'left eigenvalues of A') CALL putarr(fid, '/EIGVECR', eigvecsr, 'right eigenvalues of A') WRITE(*,'(a/(10f10.4))') 'Real eigval', wr WRITE(*,'(a/(10f10.4))') 'Imag eigval', wi END IF ! ! Using Arpack ! ido = 0 iparam(1) = ishfts iparam(3) = maxitr iparam(7) = mode1 ! lworkl = 3*ncv**2+6*ncv ALLOCATE(workl(lworkl)) ! t0 = seconds() DO ! ARPACK reverse communication loop CALL dnaupd(ido, bmat, nrank, which, nev, tol, resid, & & ncv, v, maxn, iparam, ipntr, workd, workl,& & lworkl, info) IF(ido.EQ.-1 .OR. ido.EQ.1) THEN CALL av(nrank,workd(ipntr(1)), workd(ipntr(2))) CYCLE END IF PRINT*, 'INFO =', info IF(info .LT. 0) THEN PRINT*, 'Error in dnaupd with info =', info ELSE rvec = .TRUE. CALL dneupd(rvec, 'A', select, d, d(1,2), v, size(v,1), & & sigmar, sigmai, workev, bmat, nrank, which, nev, tol, & & resid, ncv, v, size(v,1), iparam, ipntr, workd, workl, & & lworkl, ierr ) IF( ierr .NE. 0 ) THEN PRINT*,'Error in dneupd with ierr =', ierr ELSE nconv = iparam(5) PRINT*, '--- Real eigenvalues and comprison with Lapack results ---' ! eiegvalues and diff with Lapack results DO j=1,nconv WRITE(*,'(i3,3(1pe12.4))') j, d(j,1), wr(j), wr(j)-d(j,1) END DO PRINT*, '--- Imag eigenvalues and comprison with Lapack results ---' DO j=1,nconv WRITE(*,'(i3,3(1pe12.4))') j, d(j,2), wi(j), wi(j)-d(j,2) END DO !!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue !!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1) !!$ d(j,2) = dnrm2(nrank, w, 1) !!$ d(j,2) = d(j,2)/abs(d(j,1)) CALL putarr(fid, '/EIGVS', d(1:nconv,:), 'ARPACK eigenvalues of A') !!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A') !!$ END DO END IF EXIT END IF END DO ! End of ARPACK reverse communication loop tarpack = seconds()-t0 !=========================================================================== ! 9.0 Epilogue ! WRITE(*,'(a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(arr) CALL destroy(mat) CALL destroy_sp(splx) CALL closef(fid) CONTAINS SUBROUTINE av(n,v,w) ! ! Matrix vector product: w <- Av INTEGER, INTENT(in) :: n DOUBLE PRECISION, INTENT(in) :: v(*) DOUBLE PRECISION, INTENT(out) :: w(*) w(1:n) = vmx(mat,v(1:n)) END SUBROUTINE av SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function defined in FDIST ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ SUBROUTINE dismat(spl, mat) ! ! Assembly FE matrix mat using spline spl ! USE bsplines USE matrix IMPLICIT NONE TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=i+iw; jcol=i+jt CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !+++ SUBROUTINE disrhs(kdiff, spl, rhs) ! ! Assenbly the RHS using spline spl ! USE bsplines IMPLICIT NONE INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, left DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) !!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas ! ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) !!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(1:nrank) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) !!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) left = i-1 !!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x,k) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: k rhseq = k*k*x**(k-1) END FUNCTION rhseq END SUBROUTINE disrhs diff --git a/examples/pde1d_eig_zcsr.f90 b/examples/pde1d_eig_zcsr.f90 index d467456..eaaebe4 100644 --- a/examples/pde1d_eig_zcsr.f90 +++ b/examples/pde1d_eig_zcsr.f90 @@ -1,481 +1,481 @@ !> !> @file pde1d_eig_zcsr.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 1d differential eqation using splines: ! ! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 ! exact solution: f(r) = 1 - r^k ! USE bsplines USE csr USE futils USE f95_precision, ONLY: WP => DP USE lapack95, ONLY: geev IMPLICIT NONE INTEGER :: nx, nidbas, ngauss, kdiff INTEGER :: i, nrank, kl, ku LOGICAL :: nlppform DOUBLE PRECISION :: coefs(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol TYPE(spline1d) :: splx TYPE(zcsr_mat) :: mat ! CHARACTER(len=128) :: file='pde1d.h5' INTEGER :: fid, ffid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack ! ! Lapack95 GEEV arguments COMPLEX(WP), ALLOCATABLE :: mata(:,:), w(:) REAL(WP), ALLOCATABLE :: WR(:), WI(:) INTEGER :: j, info ! INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25 INTEGER :: lworkl, zero=0.0d0 DOUBLE PRECISION :: d(maxncv,2), tol=0.0d0, rwork(maxncv) DOUBLE COMPLEX :: v(maxn,maxncv), resid(maxn), sigma DOUBLE COMPLEX :: workd(3*maxncv), lwork(2*maxn) DOUBLE COMPLEX, ALLOCATABLE :: workl(:), vl(:,:), vr(:,:) DOUBLE PRECISION, EXTERNAL :: dnrm2 INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, & & mode1=1, ierr INTEGER :: ido, ipntr(14), iparam(11) CHARACTER(len=1) :: bmat='I' CHARACTER(len=2) :: which='SA' LOGICAL :: rvec, select(maxncv) ! INTERFACE SUBROUTINE dismat(spl, mat) USE bsplines USE csr TYPE(spline1d), INTENT(in) :: spl TYPE(zcsr_mat), INTENT(inout) :: mat END SUBROUTINE dismat SUBROUTINE disrhs(kdiff, spl, rhs) USE bsplines INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) END SUBROUTINE disrhs SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist END INTERFACE ! NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval kdiff = 2 ! Exponent of differential problem nlppform = .TRUE. ! Use PPFORM for gridval or not coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! see function FDIST in MESHDIST ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) xgrid(0) = 0.0d0 xgrid(nx) = 1.0d0 CALL meshdist(coefs, xgrid, nx) WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE1D Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL attach(fid, '/', 'NGAUSS', ngauss) CALL attach(fid, '/', 'KDIFF', kdiff) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) CALL get_dim(splx, nrank) ! Rank of the FE matrix WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid ! ! FE matrix assembly ! kl = nidbas ku = kl WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank CALL init(nrank, 1, mat) CALL dismat(splx, mat) !!$ CALL conmat(splx, mat, coefeq) CALL to_mat(mat) PRINT*,'MAT after to_mat', mat%val CALL creatf('pde1d_eig.h5', ffid, 'PDE1D Result File', real_prec='D') PRINT*, 'rank', mat%rank PRINT*, 'nnz', mat%nnz PRINT*, 'irow', mat%irow PRINT*, 'cols', mat%cols CALL putmat(ffid,'/MAT',mat,'FE matrix') PRINT*, 'MAT',mat%val CALL closef(ffid) STOP ! ! ALLOCATE(arr(nrank)) !!$ WRITE(*,'(/a)') 'Matrice before BC' !!$ DO i=1,nrank !!$ CALL getrow(mat, i, arr) !!$ WRITE(*,'(12f8.3)') arr, SUM(arr) !!$ END DO ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(kdiff, splx, rhs) !!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs !!$! !!$! Set BC f(r=1) = 0 on matrix !!$! !!$ arr(1:nrank-1) = 0.0d0 !!$ arr(nrank) = 1.0d0 !!$ CALL putrow(mat, nrank, arr) !!$ CALL putcol(mat, nrank, arr) CALL putmat(fid,'/MATA', mat, 'FE matrix') tmat = seconds() - t0 ! !=========================================================================== ! 3.0 Eigevalue problem ! ! Using Lapack dsyev ! t0 = seconds() ALLOCATE(mata(nrank,nrank)) ALLOCATE(w(nrank)) ALLOCATE(wr(nrank), wi(nrank)) ALLOCATE(vl(nrank,nrank), vr(nrank,nrank)) mata=0.0d0 DO j=1,nrank CALL getcol(mat, j, mata(:,j)) ! convert to dense matrix mata END DO CALL putarr(fid, '/MAT', mata, 'matrix A') CALL geev(mata, w) wr(:) = REAL(w(:)) wi(:) = AIMAG(w(:)) teig = seconds() - t0 PRINT*,'Info from ZGEEV', info, arr(1) IF(info.EQ.0) THEN CALL putarr(fid, '/REIGVS', wr, 'eigenvalues of A') CALL putarr(fid, '/IEIGVS', wi, 'eigenvectors of A') WRITE(*,'(a/(10f10.4))') 'Real eigval', wr WRITE(*,'(a/(10f10.4))') 'Imag eigval', wi END IF ! ! Using Arpack ! ido = 0 iparam(1) = ishfts iparam(3) = maxitr iparam(7) = mode1 ! lworkl = 3*ncv**2+5*ncv ALLOCATE(workl(lworkl)) ! IF(nev.GT.0) THEN t0 = seconds() DO ! ARPACK reverse communication loop CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, ncv, & & v, SIZE(v,1), iparam, ipntr, workd, workl, lworkl, & & rwork, info) IF(ido.EQ.-1 .OR. ido.EQ.1) THEN !!$ WRITE(*,'(a/(10i4))') 'ipntr', ipntr CALL av(nrank,workd(ipntr(1)), workd(ipntr(2))) CYCLE END IF IF(info .LT. 0) THEN PRINT*, 'Error in _saupd with info =', info ELSE rvec = .TRUE. CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, & & bmat, nrank, which, nev, tol, resid, ncv, v, maxn, & & iparam, ipntr, workd, workl, lworkl, ierr ) IF( ierr .NE. 0 ) THEN PRINT*,'Error in _seupd with ierr =', ierr ELSE nconv = iparam(5) PRINT*, '--- eigenvalues and error ---' DO j=1,nconv ! Residual norms !!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue !!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1) !!$ d(j,2) = dnrm2(nrank, w, 1) !!$ d(j,2) = d(j,2)/abs(d(j,1)) WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1) !!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A') !!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A') END DO EXIT END IF END IF END DO ! End of ARPACK reverse communication loop tarpack = seconds()-t0 END IF !=========================================================================== ! 9.0 Epilogue ! WRITE(*,'(a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(arr) CALL destroy(mat) CALL destroy_sp(splx) CALL closef(fid) CONTAINS SUBROUTINE av(n,v,w) ! ! Matrix vector product: w <- Av INTEGER, INTENT(in) :: n DOUBLE COMPLEX, INTENT(in) :: v(*) DOUBLE COMPLEX, INTENT(out) :: w(*) w(1:n) = vmx(mat,v(1:n)) END SUBROUTINE av SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function defined in FDIST ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ SUBROUTINE dismat(spl, mat) ! ! Assembly FE matrix mat using spline spl ! USE bsplines USE csr IMPLICIT NONE TYPE(spline1d), INTENT(in) :: spl TYPE(zcsr_mat), INTENT(inout) :: mat INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=i+iw; jcol=i+jt CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !+++ SUBROUTINE disrhs(kdiff, spl, rhs) ! ! Assenbly the RHS using spline spl ! USE bsplines IMPLICIT NONE INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, left DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) !!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas ! ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) !!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(1:nrank) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) !!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) left = i-1 !!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x,k) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: k rhseq = k*k*x**(k-1) END FUNCTION rhseq END SUBROUTINE disrhs diff --git a/examples/pde1d_eig_zmumps.f90 b/examples/pde1d_eig_zmumps.f90 index ec17841..644f680 100644 --- a/examples/pde1d_eig_zmumps.f90 +++ b/examples/pde1d_eig_zmumps.f90 @@ -1,460 +1,460 @@ !> !> @file pde1d_eig_zmumps.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 1d differential eqation using splines: ! ! Solve the standard eigenvalue: ! A*x = \lambda *x or inv(A)*x = 1/\lambda * x using Arpack and MUMPS. ! where A is obtained from discretozation of ! -d/dr[r d/dr] f = k^2 r^(k-1) ! USE bsplines USE mumps_bsplines USE futils IMPLICIT NONE INTEGER :: nx, nidbas, ngauss, kdiff INTEGER :: i, nrank LOGICAL :: nlppform DOUBLE PRECISION :: coefs(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid TYPE(spline1d) :: splx TYPE(zmumps_mat) :: mat ! INTEGER :: ierr INTEGER :: fid CHARACTER(len=32) :: str DOUBLE PRECISION :: seconds, t0, tmat, tfac, tarpack ! ! Arpack: Solve the standard eigenvalue problem ! INTEGER :: nev = 10, ncv = 10 LOGICAL :: nlinv = .FALSE. ! Solve inv(A) = 1/\lambda * x if nlinv=.TRUE. CHARACTER(len=2) :: which='SM' INTEGER :: info=0 ! Use random vector to start the Arnoldi iterations INTEGER :: ido=0 ! Reverse communications LOGICAL :: rvec LOGICAL, ALLOCATABLE :: select(:) INTEGER :: iparam(11), ipntr(14), nconv DOUBLE PRECISION :: tol=0.0d0 CHARACTER(len=1) :: bmat='I' ! INTEGER :: lworkl DOUBLE COMPLEX, ALLOCATABLE :: workl(:), workd(:), workev(:) DOUBLE COMPLEX, ALLOCATABLE :: eig_vals(:), eig_vecs(:,:), resid(:) DOUBLE COMPLEX :: sigma DOUBLE PRECISION, ALLOCATABLE :: rwork(:) ! INTERFACE SUBROUTINE dismat(spl, mat) USE bsplines USE mumps_bsplines TYPE(spline1d), INTENT(in) :: spl TYPE(zmumps_mat), INTENT(inout) :: mat END SUBROUTINE dismat SUBROUTINE disrhs(kdiff, spl, rhs) USE bsplines INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) END SUBROUTINE disrhs SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist END INTERFACE ! NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, & & nev, ncv, nlinv, which, tol !=========================================================================== ! 1.0 Prologue ! CALL mpi_init(ierr) ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval kdiff = 2 ! Exponent of differential problem nlppform = .TRUE. ! Use PPFORM for gridval or not coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! see function FDIST in MESHDIST ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) xgrid(0) = 0.0d0 xgrid(nx) = 1.0d0 CALL meshdist(coefs, xgrid, nx) WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) CALL get_dim(splx, nrank) ! Rank of the FE matrix WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid ! ! FE matrix assembly ! WRITE(*,'(a,i6)') ' nrank', nrank CALL init(nrank, 1, mat) CALL dismat(splx, mat) CALL to_mat(mat) tmat = seconds() - t0 ! mat%mumps_par%IRN => mat%mumps_par%IRN_loc ! Work around for single proc. PRINT*, 'nnz_loc', mat%nnz_loc PRINT*, 'mat%mumps_par%N', mat%mumps_par%N PRINT*, 'mat%mumps_par%NZ_loc', mat%mumps_par%NZ_loc PRINT*, 'size of mat%mumps_par%IRN', SIZE(mat%mumps_par%IRN) PRINT*, 'mat%istart,mat%iend', mat%istart,mat%iend ! CALL creatf('pde1d_eig_zmumps.h5', fid, 'PDE1D Result File', real_prec='d') PRINT*, 'rank', mat%rank PRINT*, 'nnz', mat%nnz ! CALL putmat(fid,'/MAT',mat,'FE matrix') ! IF(nlinv) THEN t0 = seconds() CALL factor(mat) tfac = seconds()-t0 END IF !=========================================================================== ! 3.0 Solve the standard eigenvalue problem ! lworkl = 3*ncv**2 + 5*ncv ALLOCATE(workl(lworkl)) ALLOCATE(workd(3*nrank)) ALLOCATE(workev(2*ncv)) ALLOCATE(eig_vals(ncv), eig_vecs(nrank,ncv)) ALLOCATE(resid(nrank)) ALLOCATE(rwork(ncv)) ! iparam(1) = 1 ! shfts iparam(3) = 300 ! Max. number of iterations iparam(7) = 1 ! Regular mode ! ! The reverse communication loop ! t0 = seconds() DO CALL znaupd (ido, bmat, nrank, which, nev, tol, resid, ncv, & & eig_vecs, nrank, iparam, ipntr, workd, workl, lworkl, & & rwork, info ) ! IF(ido .EQ. -1 .OR. ido .EQ. 1) THEN ! Compute A*v CALL av(nrank, workd(ipntr(1)), workd(ipntr(2))) CYCLE END IF ! IF(info .LT. 0) THEN ! Error PRINT*, 'Error in _naupd with info =', info EXIT ELSE rvec = .TRUE. ALLOCATE(select(ncv)) CALL zneupd (rvec, 'A', select, eig_vals, eig_vecs, nrank, & & sigma, workev, bmat, nrank, which, nev, tol, resid, & & ncv, eig_vecs, nrank, iparam, ipntr, workd, workl, lworkl,& & rwork, ierr) IF(ierr .NE. 0) THEN PRINT*, 'Error in _neupd with ierr =', ierr EXIT ELSE nconv = iparam(5) PRINT*,'Number of converged eigenvalues', nconv IF(nlinv) THEN eig_vals(1:nconv) = (1.d0,0.0d0) / eig_vals(1:nconv) END IF WRITE(*,'(2(1pe12.3))') eig_vals(1:nconv) CALL putarr(fid, '/eig_vals', eig_vals(1:nconv)) CALL putarr(fid, '/eig_vecs', eig_vecs(1:nrank,1:nconv)) DO i=1,nconv !!$ WRITE(*,'(/a,2(pe20.6))') '*** eigen value =', eig_vals(i) !!$ WRITE(*,'(a/(10(1pe12.4)))') 'Real of eigen vector', & !!$ & REAL(eig_vecs(1:nrank,i)) !!$ WRITE(*,'(a/(10(1pe12.4)))') 'Imag of eigen vector', & !!$ & aimag(eig_vecs(1:nrank,i)) WRITE(str,'(a,i3.3)') '/eig_vecs_',i CALL putarr(fid, TRIM(str), eig_vecs(1:nrank,i)) END DO EXIT END IF END IF END DO ! End of reverse commuinication loop IF(info .EQ. 1) THEN PRINT*, 'Maximum number of iterations reached!' PRINT*, 'IPARAM(5) =', iparam(5) END IF PRINT*, 'Number of Arnoldi iterations', iparam(3) tarpack = seconds() - t0 !=========================================================================== ! 9.0 Epilogue ! WRITE(*,'(a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat IF(nlinv) THEN WRITE(*,'(a,1pe12.3)') 'Matrice factorization time (s) ', tfac END IF WRITE(*,'(a,1pe12.3)') 'Arpack time (s) ', tarpack ! DEALLOCATE(xgrid) CALL destroy(mat) CALL destroy_sp(splx) CALL closef(fid) CALL mpi_finalize(ierr) ! CONTAINS SUBROUTINE av(n,v,w) ! INTEGER, INTENT(in) :: n DOUBLE COMPLEX, INTENT(in) :: v(*) DOUBLE COMPLEX, INTENT(out) :: w(*) ! IF(nlinv) THEN w(1:n) = v(1:n) CALL bsolve(mat,w(1:n)) ! Solve A*w = v or w=inv(A)*v ELSE w(1:n) = vmx(mat,v(1:n)) ! A*v matrix product END IF END SUBROUTINE av ! SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function defined in FDIST ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ SUBROUTINE dismat(spl, mat) ! ! Assembly FE matrix mat using spline spl ! USE bsplines USE mumps_bsplines IMPLICIT NONE TYPE(spline1d), INTENT(in) :: spl TYPE(zmumps_mat), INTENT(inout) :: mat INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=i+iw; jcol=i+jt CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) ! ! Weak form = Int(x*dw/dx*dt/dx)dx ! c(1) = x idt(1) = 1 idw(1) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !+++ SUBROUTINE disrhs(kdiff, spl, rhs) ! ! Assenbly the RHS using spline spl ! USE bsplines IMPLICIT NONE INTEGER, INTENT(in) :: kdiff TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: nrank, nx, nidbas, ngauss INTEGER :: i, igauss, left DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, nrank, nx, nidbas) !!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas ! ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) !!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(1:nrank) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) !!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) left = i-1 !!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x,k) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: k rhseq = k*k*x**(k-1) END FUNCTION rhseq END SUBROUTINE disrhs diff --git a/examples/pde1dp.f90 b/examples/pde1dp.f90 index 7eb32bd..36c93d4 100644 --- a/examples/pde1dp.f90 +++ b/examples/pde1dp.f90 @@ -1,170 +1,170 @@ !> !> @file pde1dp.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! 1D PDE with priodic BC ! USE pde1dp_mod USE bsplines USE matrix USE futils USE conmat_mod ! IMPLICIT NONE CHARACTER(len=128) :: file='pde1dp.h5' INTEGER :: fid INTEGER :: nx, nidbas, ngauss, ibcoef INTEGER :: nrank, kl, ku, dim DOUBLE PRECISION :: coefs(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, sol, rhs INTEGER, PARAMETER :: npts=100 DOUBLE PRECISION, DIMENSION(0:npts-1) :: xpts, frhs DOUBLE PRECISION :: dx, errmx DOUBLE PRECISION, ALLOCATABLE :: arr(:,:) TYPE(periodic_mat) :: mat INTEGER :: i, j ! NAMELIST /newrun/ nx, nidbas, ngauss, ibcoef, coefs !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval ibcoef = 1 ! Index of non-zero spline coef for RHS coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! see function FDIST in MESHDIST ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) xgrid(0) = 0.0d0 xgrid(nx) = 1.0d0 CALL meshdist(coefs, xgrid, nx) WRITE(*,'(/a/(10f8.3))') 'XGRID', xgrid(0:nx) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE1DP Result File') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL attach(fid, '/', 'NGAUSS', ngauss) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up periodic spline ! CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) nrank = nx ! Rank of the FE matrix ! ! Mass matrix assembly ! kl = nidbas ku = kl CALL init(kl, ku, nrank, 1, mat) CALL get_dim(splx, dim) WRITE(*,'(/a,4i6)') 'kl, ku, nrank, dim', kl, ku, nrank, dim !!$ CALL dismat(splx, mat) CALL conmat(splx, mat, coefeq_mass) ! ! Store matrix in hdf5 file ! ALLOCATE(arr(nrank,nrank)) DO j=1,nrank CALL getcol(mat, j, arr(:,j)) END DO CALL putarr(fid, '/mata', arr) DEALLOCATE(arr) ! ! Check RHS constructed using input spline coefs. ! ALLOCATE(bcoef(0:dim-1)) bcoef = 0.0d0; bcoef(ibcoef-1) = 1.0d0 ! DO i=nrank,dim-1 ! Periodicity to fill array of spline coefs bcoef(i) = bcoef(MODULO(i,nrank)) END DO WRITE(*,'(/a/(10f8.3))') 'bcoef from input', bcoef dx = (xgrid(nx)-xgrid(0))/npts DO i=0,npts-1 xpts(i) = xgrid(0) + i*dx frhs(i) = rhseq(xpts(i)) END DO CALL creatg(fid, '/rhs') CALL putarr(fid,'/rhs/x', xpts) CALL putarr(fid,'/rhs/f', frhs) ! ! Assembly RHS and check A*x = f, using method vmx ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(splx, rhs) sol = vmx(mat, bcoef(0:nrank-1)) WRITE(*,'(/6x,3a12)') 'A*x', 'rhs', 'Err' errmx = 0.0d0 DO i=1,nrank WRITE(*,'(i6,3(1pe12.3))') i, sol(i), rhs(i), sol(i)-rhs(i) errmx=MAX(errmx,ABS(sol(i)-rhs(i))) END DO WRITE(*,'(a,1pe12.3)') 'Max. error =', errmx ! ! Factor and solve ! CALL factor(mat) CALL bsolve(mat, rhs, sol) WRITE(*,'(/6x,3a12)') 'Computed', 'Exact', 'Err' errmx = 0.0d0 DO i=1,nrank WRITE(*,'(i6,3(1pe12.3))') i, sol(i), bcoef(i-1), sol(i)-bcoef(i-1) errmx=MAX(errmx,ABS(sol(i)-bcoef(i-1))) END DO WRITE(*,'(a,1pe12.3)') 'Max. error =', errmx !=========================================================================== ! 9.0 Clean up ! DEALLOCATE(xgrid) DEALLOCATE(bcoef) DEALLOCATE(rhs, sol) CALL destroy(mat) CALL destroy_sp(splx) CALL closef(fid) CONTAINS SUBROUTINE coefeq_mass(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Mass matrix ! c(1) = 1.0d0 idt(1) = 0 idw(1) = 0 END SUBROUTINE coefeq_mass END PROGRAM main diff --git a/examples/pde1dp_cmpl.f90 b/examples/pde1dp_cmpl.f90 index 48b2d1e..44ca51b 100644 --- a/examples/pde1dp_cmpl.f90 +++ b/examples/pde1dp_cmpl.f90 @@ -1,403 +1,403 @@ !> !> @file pde1dp_cmpl.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE pde1dp_cmpl_mod USE bsplines USE matrix IMPLICIT NONE ! CONTAINS SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta) ! TYPE(spline1d) :: spl INTEGER, INTENT(in) :: mmode DOUBLE COMPLEX, INTENT(in) :: alpha, beta DOUBLE COMPLEX, INTENT(out) :: rhs(:) ! INTEGER :: dim, nrank, nx, nidbas, ngauss INTEGER :: i, igauss, it, irow DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX :: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! nrank = SIZE(rhs) CALL get_dim(spl, dim, nx, nidbas) ! ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(:) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) contrib = wgauss(igauss) * rhseq(xgauss(igauss)) DO it=0,nidbas irow=MODULO(i+it-1,nx) + 1 ! Periodic BC rhs(irow) = rhs(irow) + contrib*fun(it,1) END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) CONTAINS DOUBLE COMPLEX FUNCTION rhseq(x) DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION :: arg arg = mmode*x rhseq = (mmode**2*alpha-beta)*COS(arg) END FUNCTION rhseq END SUBROUTINE disrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat, alpha, beta) ! TYPE(spline1d) :: spl TYPE(zperiodic_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: alpha, beta ! INTEGER :: dim, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE COMPLEX, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, dim, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC jcol=MODULO(i+jt-1,nx) + 1 CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt)) ! c(1) = alpha idt(1) = 1 idw(1) = 1 ! c(2) = -beta idt(2) = 0 idw(2) = 0 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE save_mat(fid, label, mat) ! ! Save zperiodic_mat in dense format ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(zperiodic_mat) :: mat INTEGER :: j, n DOUBLE COMPLEX, ALLOCATABLE :: fullmat(:,:) ! n=mat%mat%rank ALLOCATE(fullmat(n,n)) DO j=1,n CALL getcol(mat, j, fullmat(:,j)) END DO CALL putarr(fid, label, fullmat) DEALLOCATE(fullmat) END SUBROUTINE save_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION norm2(x) ! ! Compute the 2-norm of complex array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE COMPLEX, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 ! sum2 = DOT_PRODUCT(x,x) norm2 = SQRT(sum2) END FUNCTION norm2 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE meshdist(mmode, x) ! ! Construct a 1d non-equidistant mesh given a ! mesh distribution function. ! INTEGER, INTENT(in) :: mmode DOUBLE PRECISION, INTENT(inout) :: x(0:) INTEGER :: nx, nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! nx = SIZE(x)-1 a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = 2.0 + COS(mmode*x) END FUNCTION fdist END SUBROUTINE meshdist !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde1dp_cmpl_mod !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ! PROGRAM main ! ! 1D complex PDE with periodic BC ! USE pde1dp_cmpl_mod USE bsplines USE matrix USE futils ! IMPLICIT NONE TYPE(spline1d) :: splx TYPE(zperiodic_mat) :: mat INTEGER :: kl, ku, nrank ! CHARACTER(len=128) :: file='pde1dp_cmpl.h5' INTEGER :: fid INTEGER :: nx, nidbas, ngauss, mmode, npt, dim LOGICAL :: nlequid DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 DOUBLE PRECISION :: dx DOUBLE COMPLEX :: alpha, beta DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: err_norm INTEGER :: i ! NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE. mmode = 1 ! Fourier mode alpha = (1.0, 1.0) ! Complex "diffusion" beta = 1.0 npt = 100 ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) dx = 2.d0*pi/REAL(nx,8) xgrid = (/ (i*dx,i=0,nx) /) IF( .NOT. nlequid ) THEN CALL meshdist(mmode, xgrid) END IF WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL putarr(fid, '/xgrid', xgrid) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up periodic spline ! CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) WRITE(*,'(a,l6)') 'nlequid =', nlequid nrank = nx ! Rank of the FE matrix ! ! FE matrix assembly ! kl = nidbas ku = kl CALL init(kl, ku, nrank, 2, mat) CALL get_dim(splx, dim) WRITE(*,'(/a,4i6)') 'kl, ku, nrank, dim', kl, ku, nrank, dim CALL dismat(splx, mat, alpha, beta) ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(splx, rhs, mmode, alpha, beta) ! CALL save_mat(fid, '/mat', mat) CALL putarr(fid, '/rhs', rhs) ! ! Factor and solve ! CALL factor(mat) CALL bsolve(mat, rhs, sol) CALL putarr(fid, '/sol', sol) !=========================================================================== ! 3.0 Check solution ! ! Exact solution ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt)) dx=2.0d0*pi/REAL(npt,8) x = (/ (i*dx, i=0,npt) /) solana = COS(mmode*x) ! ! Prolongate solution using periodicity ! ALLOCATE(bcoef(dim)) bcoef(1:nrank) = sol(1:nrank) DO i=nrank+1,dim bcoef(i) = bcoef(MODULO(i-1,nrank)+1) END DO ! ! Interpolate field ! CALL gridval(splx, x, solcal, 0, bcoef) ! err = ABS(solcal-solana) CALL putarr(fid, '/x', x) CALL putarr(fid, '/solana', solana) CALL putarr(fid, '/solcal', solcal) CALL putarr(fid, '/err', err) ! ! Compute discretization error norm by Gauss integration ! err_norm=0.0 ALLOCATE(xgauss(ngauss), wgauss(ngauss)) DO i=1,nx CALL get_gauss(splx, ngauss, i, xgauss, wgauss) CALL gridval(splx, xgauss, solcal(1:ngauss), 0) solana(1:ngauss) = COS(mmode*xgauss) err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), & & solana(1:ngauss)-solcal(1:ngauss)) err_norm = err_norm + SUM(wgauss*err(1:ngauss)) END DO err_norm = SQRT(err_norm) WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm ! DEALLOCATE(x, solcal, solana, err) DEALLOCATE(xgauss, wgauss) DEALLOCATE(bcoef) !=========================================================================== ! 9.0 Clean up ! DEALLOCATE(xgrid) DEALLOCATE(rhs, sol) CALL destroy(mat) CALL destroy_sp(splx) CALL closef(fid) END PROGRAM main diff --git a/examples/pde1dp_cmpl_dft.f90 b/examples/pde1dp_cmpl_dft.f90 index d866885..9fbf2dc 100644 --- a/examples/pde1dp_cmpl_dft.f90 +++ b/examples/pde1dp_cmpl_dft.f90 @@ -1,290 +1,290 @@ !> !> @file pde1dp_cmpl_dft.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE pde1dp_cmpl_dft_mod USE bsplines IMPLICIT NONE ! CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat, alpha, beta) ! USE bsplines TYPE(spline1d) :: spl DOUBLE COMPLEX :: mat(:) DOUBLE COMPLEX, INTENT(in) :: alpha, beta ! INTEGER :: dim, nx, nidbas, ngauss, intv, igauss DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, dim, nx, nidbas) CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) ALLOCATE(ft_fun(0:nx-1,2)) ! Up to first derivative ! ! Weak form: integration on first interval ! intv = 1 CALL get_gauss(spl, ngauss, intv, xgauss, wgauss) mat = 0.0d0 DO igauss=1,ngauss CALL ft_basfun(xgauss(igauss), spl, ft_fun, intv) mat(:) = mat(:) + wgauss(igauss) * ( & & alpha*ft_fun(:,2)*CONJG(ft_fun(:,2)) & & - beta*ft_fun(:,1)*CONJG(ft_fun(:,1)) & & ) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(ft_fun) DEALLOCATE(xgauss, wgauss) ! END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta) ! TYPE(spline1d) :: spl INTEGER, INTENT(in) :: mmode DOUBLE COMPLEX, INTENT(in) :: alpha, beta DOUBLE COMPLEX, INTENT(out) :: rhs(:) ! INTEGER :: dim, nrank, nx, nidbas, ngauss INTEGER :: i, igauss, it, irow DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX :: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! nrank = SIZE(rhs) CALL get_dim(spl, dim, nx, nidbas) ! ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(:) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) contrib = wgauss(igauss) * rhseq(xgauss(igauss)) DO it=0,nidbas irow=MODULO(i+it-1,nx) + 1 ! Periodic BC rhs(irow) = rhs(irow) + contrib*fun(it,1) END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) CONTAINS DOUBLE COMPLEX FUNCTION rhseq(x) DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION :: arg arg = mmode*x rhseq = (mmode**2*alpha-beta)*COS(arg) END FUNCTION rhseq END SUBROUTINE disrhs END MODULE pde1dp_cmpl_dft_mod !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ! PROGRAM main ! ! 1D complex PDE with periodic BC, using DFT ! USE pde1dp_cmpl_dft_mod USE bsplines USE matrix USE futils USE fft ! IMPLICIT NONE TYPE(spline1d) :: splx DOUBLE COMPLEX, ALLOCATABLE :: mat(:) INTEGER ::nrank ! CHARACTER(len=128) :: file='pde1dp_cmpl_dft.h5' INTEGER :: fid INTEGER :: nx, nidbas, ngauss, mmode, npt, dim DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 DOUBLE PRECISION :: dx DOUBLE COMPLEX :: alpha, beta DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol_shifted, rhs_shifted DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: err_norm INTEGER :: i INTEGER :: k, kmin, kmax ! NAMELIST /newrun/ nx, nidbas, ngauss, alpha, beta, mmode, npt !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval mmode = 1 ! Fourier mode alpha = (1.0, 1.0) ! Complex "diffusion" beta = 1.0 npt = 100 ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) dx = 2.d0*pi/REAL(nx,8) xgrid = (/ (i*dx,i=0,nx) /) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL putarr(fid, '/xgrid', xgrid) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up periodic spline ! CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) nrank = nx ! Rank of the FE matrix CALL get_dim(splx, dim) WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim ! ! Init DFT kmin = -nx/2 kmax = nx/2-1 CALL init_dft(splx, kmin, kmax) ! ! FE matrix assembly in Fourier space ! ALLOCATE(mat(0:nx-1)) CALL dismat(splx, mat, alpha, beta) CALL putarr(fid, '/mat', mat) ! ! RHS assembly in real space ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(splx, rhs, mmode, alpha, beta) !=========================================================================== ! 3.0 Solve the dicretized system ! ! Fourier solve ! CALL putarr(fid, '/rhs', rhs) ! CALL fourcol(rhs, 1) ! ALLOCATE(rhs_shifted(kmin:kmax)) ALLOCATE(sol_shifted(kmin:kmax)) DO k=kmin,kmax rhs_shifted(k) = rhs(MODULO(k+nx,nx)+1)/REAL(nx,8) END DO sol_shifted = rhs_shifted / mat DO k=kmin,kmax sol(MODULO(k+nx,nx)+1) = sol_shifted(k) END DO ! CALL putarr(fid, '/rhs_fft', rhs) CALL putarr(fid, '/sol_fft', sol) ! ! Solution in real space ! CALL fourcol(sol,-1) CALL putarr(fid, '/sol', sol) !=========================================================================== ! 4.0 Check solution ! ! Exact solution ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt)) dx=2.0d0*pi/REAL(npt,8) x = (/ (i*dx, i=0,npt) /) solana = COS(mmode*x) ! ! Prolongate solution using periodicity ! ALLOCATE(bcoef(dim)) bcoef(1:nrank) = sol(1:nrank) DO i=nrank+1,dim bcoef(i) = bcoef(MODULO(i-1,nrank)+1) END DO ! ! Interpolate field ! CALL gridval(splx, x, solcal, 0, bcoef) ! err = ABS(solcal-solana) CALL putarr(fid, '/x', x) CALL putarr(fid, '/solana', solana) CALL putarr(fid, '/solcal', solcal) CALL putarr(fid, '/err', err) ! ! Compute discretization error norm by Gauss integration ! err_norm=0.0 ALLOCATE(xgauss(ngauss), wgauss(ngauss)) DO i=1,nx CALL get_gauss(splx, ngauss, i, xgauss, wgauss) CALL gridval(splx, xgauss, solcal(1:ngauss), 0) solana(1:ngauss) = COS(mmode*xgauss) err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), & & solana(1:ngauss)-solcal(1:ngauss)) err_norm = err_norm + SUM(wgauss*err(1:ngauss)) END DO err_norm = SQRT(err_norm) WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm !! !=========================================================================== ! 9.0 Clean up ! DEALLOCATE(xgrid) DEALLOCATE(mat) CALL destroy_sp(splx) CALL closef(fid) END PROGRAM main diff --git a/examples/pde1dp_cmpl_mumps.f90 b/examples/pde1dp_cmpl_mumps.f90 index ab14f2a..ef2f1d8 100644 --- a/examples/pde1dp_cmpl_mumps.f90 +++ b/examples/pde1dp_cmpl_mumps.f90 @@ -1,478 +1,478 @@ !> !> @file pde1dp_cmpl_mumps.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE pde1dp_cmpl_mumps_mod USE bsplines USE mumps_bsplines IMPLICIT NONE ! CONTAINS SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta) ! TYPE(spline1d) :: spl INTEGER, INTENT(in) :: mmode DOUBLE COMPLEX, INTENT(in) :: alpha, beta DOUBLE COMPLEX, INTENT(out) :: rhs(:) ! INTEGER :: dim, nrank, nx, nidbas, ngauss INTEGER :: i, igauss, it, irow DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX :: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! nrank = SIZE(rhs) CALL get_dim(spl, dim, nx, nidbas) ! ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(:) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) contrib = wgauss(igauss) * rhseq(xgauss(igauss)) DO it=0,nidbas irow=MODULO(i+it-1,nx) + 1 ! Periodic BC rhs(irow) = rhs(irow) + contrib*fun(it,1) END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) CONTAINS DOUBLE COMPLEX FUNCTION rhseq(x) DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION :: arg arg = mmode*x rhseq = (mmode**2*alpha-beta)*COS(arg) END FUNCTION rhseq END SUBROUTINE disrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat, alpha, beta) ! TYPE(spline1d) :: spl TYPE(zmumps_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: alpha, beta ! INTEGER :: dim, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE COMPLEX, ALLOCATABLE :: coefs(:) ! INTEGER :: istart, iend !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, dim, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! istart = mat%istart iend = mat%iend DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO iw=0,nidbas irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC IF( irow.GE.istart .AND. irow.LE.iend) THEN DO jt=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) jcol=MODULO(i+jt-1,nx) + 1 CALL updtmat(mat, irow, jcol, contrib) END DO END IF END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt)) ! c(1) = alpha idt(1) = 1 idw(1) = 1 ! c(2) = -beta idt(2) = 0 idw(2) = 0 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION norm2(x) ! ! Compute the 2-norm of complex array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE COMPLEX, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 ! sum2 = DOT_PRODUCT(x,x) norm2 = SQRT(sum2) END FUNCTION norm2 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE meshdist(mmode, x) ! ! Construct a 1d non-equidistant mesh given a ! mesh distribution function. ! INTEGER, INTENT(in) :: mmode DOUBLE PRECISION, INTENT(inout) :: x(0:) INTEGER :: nx, nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! nx = SIZE(x)-1 a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = 2.0 + COS(mmode*x) END FUNCTION fdist END SUBROUTINE meshdist !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde1dp_cmpl_mumps_mod !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ! PROGRAM main ! ! 1D complex PDE with periodic BC ! USE pde1dp_cmpl_mumps_mod USE futils ! IMPLICIT NONE INCLUDE 'mpif.h' TYPE(spline1d) :: splx TYPE(zmumps_mat) :: mat TYPE(zmumps_mat) :: newmat INTEGER :: kl, ku, nrank ! CHARACTER(len=128) :: file='pde1dp_cmpl_mumps.h5' INTEGER :: fid INTEGER :: nx, nidbas, ngauss, mmode, npt, dim LOGICAL :: nlequid LOGICAL :: nlsym, nlherm, nlpos DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 DOUBLE PRECISION :: dx DOUBLE COMPLEX :: alpha, beta DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: err_norm INTEGER :: i INTEGER :: ierr, me INTEGER :: nzfact ! NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, & & nlsym, nlherm, nlpos !=========================================================================== ! 1.0 Prologue ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE. mmode = 1 ! Fourier mode alpha = (1.0, 1.0) ! Complex "diffusion" beta = 1.0 npt = 100 nlsym = .TRUE. ! Is matrice symmetric nlherm = .FALSE. ! Is matrice hermitian nlpos = .TRUE. ! and positive definite ? ! IF(me.EQ.0) THEN READ(*,newrun) WRITE(*,newrun) END IF CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nidbas, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ngauss, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlequid, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(mmode, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(alpha, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(beta, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(npt, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlherm, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlpos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) dx = 2.d0*pi/REAL(nx,8) xgrid = (/ (i*dx,i=0,nx) /) IF( .NOT. nlequid ) THEN CALL meshdist(mmode, xgrid) END IF IF(me.EQ.0) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid ! ! Create hdf5 file ! IF(me.EQ.0) THEN CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL putarr(fid, '/xgrid', xgrid) END IF !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up periodic spline ! CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) CALL get_dim(splx, dim) nrank = nx ! Rank of the FE matrix ! ! FE matrix assembly ! CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) WRITE(*,'(a,i4.4,a,3i6)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, & & mat%iend-mat%istart+1 ! CALL dismat(splx, mat, alpha, beta) ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(splx, rhs, mmode, alpha, beta) ! IF(me.EQ.0) CALL putarr(fid, '/rhs', rhs) ! ! Factor and solve ! CALL factor(mat, debug=.FALSE.) CALL bsolve(mat, rhs, sol, debug=.FALSE.) ! nzfact = mat%mumps_par%INFOG(29) IF(nzfact<0) THEN nzfact = -nzfact*1000000 END IF IF(me.EQ.0) THEN CALL putarr(fid, '/sol', sol) WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ',get_count(mat) WRITE(*,'(a,i8)') 'Number of nonzeros in factors of A = ',nzfact ! ! Compute residue ! WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs) END IF !=========================================================================== ! 3.0 Check solution ! IF(me.EQ.0) THEN ! ! Exact solution ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt)) dx=2.0d0*pi/REAL(npt,8) x = (/ (i*dx, i=0,npt) /) solana = COS(mmode*x) ! ! Prolongate solution using periodicity ! ALLOCATE(bcoef(dim)) bcoef(1:nrank) = sol(1:nrank) DO i=nrank+1,dim bcoef(i) = bcoef(MODULO(i-1,nrank)+1) END DO ! ! Interpolate field ! CALL gridval(splx, x, solcal, 0, bcoef) ! err = ABS(solcal-solana) CALL putarr(fid, '/x', x) CALL putarr(fid, '/solana', solana) CALL putarr(fid, '/solcal', solcal) CALL putarr(fid, '/err', err) ! ! Compute discretization error norm by Gauss integration ! err_norm=0.0 ALLOCATE(xgauss(ngauss), wgauss(ngauss)) DO i=1,nx CALL get_gauss(splx, ngauss, i, xgauss, wgauss) CALL gridval(splx, xgauss, solcal(1:ngauss), 0) solana(1:ngauss) = COS(mmode*xgauss) err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), & & solana(1:ngauss)-solcal(1:ngauss)) err_norm = err_norm + SUM(wgauss*err(1:ngauss)) END DO err_norm = SQRT(err_norm) WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm END IF ! !=========================================================================== ! 4.0 Test of getrow/putrow, getcol/putcol and mcopy ! CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) ALLOCATE(arow(nrank), newsol(nrank)) ! DO i=1,nrank CALL getrow(mat, i, arow) CALL putrow(newmat, i, arow) END DO CALL factor(newmat) CALL bsolve(newmat, rhs, newsol) WRITE(*,'(/a)') 'putrow/getrow ...' WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) ! DO i=1,nrank CALL getcol(mat, i, arow) CALL putcol(newmat, i, arow) END DO CALL factor(newmat) CALL bsolve(newmat, rhs, newsol) WRITE(*,'(/a)') 'putcol/getcol ...' WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) ! CALL clear_mat(newmat) CALL mcopy(mat, newmat) WRITE(*,'(/a)') 'mcopy ...' newmat%val = (1000.0d0,0.0d0)*newmat%val CALL factor(newmat) CALL bsolve(newmat, rhs, newsol) WRITE(*,'(a)') 'Backsolve the new system' WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol) ! WRITE(*,'(a)') 'Destroy NEWMAT ...' CALL destroy(newmat) ! CALL bsolve(mat, rhs, sol) WRITE(*,'(/a)') 'Backsolve the old system' WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol) ! WRITE(*,'(a)') 'Destroy MAT ...' CALL destroy(mat) ! !!$ WRITE(*,'(/a)') 'Should crash since NEWMAT is gone!' !!$ CALL bsolve(newmat, rhs, newsol) !=========================================================================== ! 9.0 Clean up ! IF(me.EQ.0) THEN DEALLOCATE(x, solcal, solana, err) DEALLOCATE(bcoef) DEALLOCATE(xgauss, wgauss) END IF DEALLOCATE(xgrid) DEALLOCATE(rhs, sol) DEALLOCATE(arow, newsol) CALL destroy_sp(splx) IF(me.EQ.0) CALL closef(fid) CALL mpi_finalize(ierr) END PROGRAM main diff --git a/examples/pde1dp_cmpl_pardiso.f90 b/examples/pde1dp_cmpl_pardiso.f90 index e3cf4a5..fccd3f6 100644 --- a/examples/pde1dp_cmpl_pardiso.f90 +++ b/examples/pde1dp_cmpl_pardiso.f90 @@ -1,457 +1,457 @@ !> !> @file pde1dp_cmpl_pardiso.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE pde1dp_cmpl_pardiso_mod USE bsplines USE pardiso_bsplines IMPLICIT NONE ! CONTAINS SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta) ! TYPE(spline1d) :: spl INTEGER, INTENT(in) :: mmode DOUBLE COMPLEX, INTENT(in) :: alpha, beta DOUBLE COMPLEX, INTENT(out) :: rhs(:) ! INTEGER :: dim, nrank, nx, nidbas, ngauss INTEGER :: i, igauss, it, irow DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX :: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! nrank = SIZE(rhs) CALL get_dim(spl, dim, nx, nidbas) ! ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(:) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) contrib = wgauss(igauss) * rhseq(xgauss(igauss)) DO it=0,nidbas irow=MODULO(i+it-1,nx) + 1 ! Periodic BC rhs(irow) = rhs(irow) + contrib*fun(it,1) END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) CONTAINS DOUBLE COMPLEX FUNCTION rhseq(x) DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION :: arg arg = mmode*x rhseq = (mmode**2*alpha-beta)*COS(arg) END FUNCTION rhseq END SUBROUTINE disrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat, alpha, beta) ! TYPE(spline1d) :: spl TYPE(zpardiso_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: alpha, beta ! INTEGER :: dim, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE COMPLEX, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, dim, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC jcol=MODULO(i+jt-1,nx) + 1 CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt)) ! c(1) = alpha idt(1) = 1 idw(1) = 1 ! c(2) = -beta idt(2) = 0 idw(2) = 0 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION norm2(x) ! ! Compute the 2-norm of complex array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE COMPLEX, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 ! sum2 = DOT_PRODUCT(x,x) norm2 = SQRT(sum2) END FUNCTION norm2 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE meshdist(mmode, x) ! ! Construct a 1d non-equidistant mesh given a ! mesh distribution function. ! INTEGER, INTENT(in) :: mmode DOUBLE PRECISION, INTENT(inout) :: x(0:) INTEGER :: nx, nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! nx = SIZE(x)-1 a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = 2.0 + COS(mmode*x) END FUNCTION fdist END SUBROUTINE meshdist !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde1dp_cmpl_pardiso_mod !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ! PROGRAM main ! ! 1D complex PDE with periodic BC ! USE pde1dp_cmpl_pardiso_mod USE futils USE conmat_mod ! IMPLICIT NONE TYPE(spline1d) :: splx TYPE(zpardiso_mat) :: mat TYPE(zpardiso_mat) :: newmat INTEGER :: kl, ku, nrank ! CHARACTER(len=128) :: file='pde1dp_cmpl_pardiso.h5' INTEGER :: fid INTEGER :: nx, nidbas, ngauss, mmode, npt, dim LOGICAL :: nlequid LOGICAL :: nlsym, nlherm, nlpos DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 DOUBLE PRECISION :: dx DOUBLE COMPLEX :: alpha, beta DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: err_norm INTEGER :: i ! NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, & & nlsym, nlherm, nlpos !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE. mmode = 1 ! Fourier mode alpha = (1.0, 1.0) ! Complex "diffusion" beta = 1.0 npt = 100 nlsym = .TRUE. ! Is matrice symmetric nlherm = .FALSE. ! Is matrice hermitian nlpos = .TRUE. ! and positive definite ? ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) dx = 2.d0*pi/REAL(nx,8) xgrid = (/ (i*dx,i=0,nx) /) IF( .NOT. nlequid ) THEN CALL meshdist(mmode, xgrid) END IF WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL putarr(fid, '/xgrid', xgrid) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up periodic spline ! CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) WRITE(*,'(a,l6)') 'nlequid =', nlequid nrank = nx ! Rank of the FE matrix ! ! FE matrix assembly ! CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) CALL get_dim(splx, dim) WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim !!$ CALL dismat(splx, mat, alpha, beta) CALL conmat(splx, mat, coefeq) ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(splx, rhs, mmode, alpha, beta) ! CALL putarr(fid, '/rhs', rhs) ! ! Factor and solve ! WRITE(*,'(a/(10i6))') 'iparm', mat%p%iparm CALL factor(mat) CALL putmat(fid,'/MAT', mat) CALL bsolve(mat, rhs, sol) CALL putarr(fid, '/sol', sol) WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ',get_count(mat) WRITE(*,'(a,i8)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) ! ! Compute residue ! WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs) !=========================================================================== ! 3.0 Check solution ! ! Exact solution ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt)) dx=2.0d0*pi/REAL(npt,8) x = (/ (i*dx, i=0,npt) /) solana = COS(mmode*x) ! ! Prolongate solution using periodicity ! ALLOCATE(bcoef(dim)) bcoef(1:nrank) = sol(1:nrank) DO i=nrank+1,dim bcoef(i) = bcoef(MODULO(i-1,nrank)+1) END DO ! ! Interpolate field ! CALL gridval(splx, x, solcal, 0, bcoef) ! err = ABS(solcal-solana) CALL putarr(fid, '/x', x) CALL putarr(fid, '/solana', solana) CALL putarr(fid, '/solcal', solcal) CALL putarr(fid, '/err', err) ! ! Compute discretization error norm by Gauss integration ! err_norm=0.0 ALLOCATE(xgauss(ngauss), wgauss(ngauss)) DO i=1,nx CALL get_gauss(splx, ngauss, i, xgauss, wgauss) CALL gridval(splx, xgauss, solcal(1:ngauss), 0) solana(1:ngauss) = COS(mmode*xgauss) err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), & & solana(1:ngauss)-solcal(1:ngauss)) err_norm = err_norm + SUM(wgauss*err(1:ngauss)) END DO err_norm = SQRT(err_norm) WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm ! !=========================================================================== ! 4.0 Test of getrow/putrow, getcol/putcol and mcopy ! CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) ALLOCATE(arow(nrank), newsol(nrank)) ! DO i=1,nrank CALL getrow(mat, i, arow) CALL putrow(newmat, i, arow) END DO CALL factor(newmat) CALL bsolve(newmat, rhs, newsol) WRITE(*,'(/a)') 'putrow/getrow ...' WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) ! DO i=1,nrank CALL getcol(mat, i, arow) CALL putcol(newmat, i, arow) END DO CALL factor(newmat) CALL bsolve(newmat, rhs, newsol) WRITE(*,'(/a)') 'putcol/getcol ...' WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) ! CALL clear_mat(newmat) CALL mcopy(mat, newmat) WRITE(*,'(/a)') 'mcopy ...' newmat%val = (1000.0d0,0.0d0)*newmat%val CALL factor(newmat) CALL bsolve(newmat, rhs, newsol) WRITE(*,'(a)') 'Backsolve the new system' WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol) ! WRITE(*,'(a)') 'Destroy NEWMAT ...' CALL destroy(newmat) ! CALL bsolve(mat, rhs, sol) WRITE(*,'(/a)') 'Backsolve the old system' WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol) ! WRITE(*,'(a)') 'Destroy MAT ...' CALL destroy(mat) !!$! !!$ WRITE(*,'(/a)') 'Should crash since NEWMAT is gone!' !!$ CALL bsolve(newmat, rhs, newsol) !!$ WRITE(*,'(a)') 'Backsolve the new system' !!$ WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol) !=========================================================================== ! 9.0 Clean up ! DEALLOCATE(x, solcal, solana, err) DEALLOCATE(xgauss, wgauss) DEALLOCATE(bcoef) DEALLOCATE(xgrid) DEALLOCATE(rhs, sol) DEALLOCATE(arow, newsol) CALL destroy_sp(splx) CALL closef(fid) CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE COMPLEX, INTENT(out) :: c(:) ! c(1) = alpha idt(1) = 1 idw(1) = 1 ! c(2) = -beta idt(2) = 0 idw(2) = 0 END SUBROUTINE coefeq END PROGRAM main diff --git a/examples/pde1dp_cmpl_wsmp.f90 b/examples/pde1dp_cmpl_wsmp.f90 index 1760563..3e12386 100644 --- a/examples/pde1dp_cmpl_wsmp.f90 +++ b/examples/pde1dp_cmpl_wsmp.f90 @@ -1,436 +1,436 @@ !> !> @file pde1dp_cmpl_wsmp.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE pde1dp_cmpl_wsmp_mod USE bsplines USE wsmp_bsplines IMPLICIT NONE ! CONTAINS SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta) ! TYPE(spline1d) :: spl INTEGER, INTENT(in) :: mmode DOUBLE COMPLEX, INTENT(in) :: alpha, beta DOUBLE COMPLEX, INTENT(out) :: rhs(:) ! INTEGER :: dim, nrank, nx, nidbas, ngauss INTEGER :: i, igauss, it, irow DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX :: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! nrank = SIZE(rhs) CALL get_dim(spl, dim, nx, nidbas) ! ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(:) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) contrib = wgauss(igauss) * rhseq(xgauss(igauss)) DO it=0,nidbas irow=MODULO(i+it-1,nx) + 1 ! Periodic BC rhs(irow) = rhs(irow) + contrib*fun(it,1) END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) CONTAINS DOUBLE COMPLEX FUNCTION rhseq(x) DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION :: arg arg = mmode*x rhseq = (mmode**2*alpha-beta)*COS(arg) END FUNCTION rhseq END SUBROUTINE disrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat, alpha, beta) ! TYPE(spline1d) :: spl TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: alpha, beta ! INTEGER :: dim, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE COMPLEX, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, dim, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC jcol=MODULO(i+jt-1,nx) + 1 CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) ! CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt)) ! c(1) = alpha idt(1) = 1 idw(1) = 1 ! c(2) = -beta idt(2) = 0 idw(2) = 0 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION norm2(x) ! ! Compute the 2-norm of complex array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE COMPLEX, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2 ! sum2 = DOT_PRODUCT(x,x) norm2 = SQRT(sum2) END FUNCTION norm2 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE meshdist(mmode, x) ! ! Construct a 1d non-equidistant mesh given a ! mesh distribution function. ! INTEGER, INTENT(in) :: mmode DOUBLE PRECISION, INTENT(inout) :: x(0:) INTEGER :: nx, nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! nx = SIZE(x)-1 a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = 2.0 + COS(mmode*x) END FUNCTION fdist END SUBROUTINE meshdist !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde1dp_cmpl_wsmp_mod !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ! PROGRAM main ! ! 1D complex PDE with periodic BC ! USE pde1dp_cmpl_wsmp_mod USE futils ! IMPLICIT NONE TYPE(spline1d) :: splx TYPE(zwsmp_mat) :: mat TYPE(zwsmp_mat) :: newmat INTEGER :: kl, ku, nrank ! CHARACTER(len=128) :: file='pde1dp_cmpl_wsmp.h5' INTEGER :: fid INTEGER :: nx, nidbas, ngauss, mmode, npt, dim LOGICAL :: nlequid LOGICAL :: nlsym, nlherm, nlpos DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 DOUBLE PRECISION :: dx DOUBLE COMPLEX :: alpha, beta DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: err_norm INTEGER :: i ! NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, & & nlsym, nlherm, nlpos !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE. mmode = 1 ! Fourier mode alpha = (1.0, 1.0) ! Complex "diffusion" beta = 1.0 npt = 100 nlsym = .TRUE. ! Is matrice symmetric nlherm = .FALSE. ! Is matrice hermitian nlpos = .TRUE. ! and positive definite ? ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) dx = 2.d0*pi/REAL(nx,8) xgrid = (/ (i*dx,i=0,nx) /) IF( .NOT. nlequid ) THEN CALL meshdist(mmode, xgrid) END IF WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL putarr(fid, '/xgrid', xgrid) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up periodic spline ! CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) WRITE(*,'(a,l6)') 'nlequid =', nlequid nrank = nx ! Rank of the FE matrix ! ! FE matrix assembly ! CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) CALL get_dim(splx, dim) WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim CALL dismat(splx, mat, alpha, beta) ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(splx, rhs, mmode, alpha, beta) ! CALL putarr(fid, '/rhs', rhs) ! ! Factor and solve ! !!$ CALL factor(mat, nlmetis=.TRUE.) CALL factor(mat) CALL putmat(fid,'/MAT', mat) CALL bsolve(mat, rhs, sol) CALL putarr(fid, '/sol', sol) WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ',get_count(mat) WRITE(*,'(a,i8)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) ! ! Compute residue ! WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs) !=========================================================================== ! 3.0 Check solution ! ! Exact solution ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt)) dx=2.0d0*pi/REAL(npt,8) x = (/ (i*dx, i=0,npt) /) solana = COS(mmode*x) ! ! Prolongate solution using periodicity ! ALLOCATE(bcoef(dim)) bcoef(1:nrank) = sol(1:nrank) DO i=nrank+1,dim bcoef(i) = bcoef(MODULO(i-1,nrank)+1) END DO ! ! Interpolate field ! CALL gridval(splx, x, solcal, 0, bcoef) ! err = ABS(solcal-solana) CALL putarr(fid, '/x', x) CALL putarr(fid, '/solana', solana) CALL putarr(fid, '/solcal', solcal) CALL putarr(fid, '/err', err) ! ! Compute discretization error norm by Gauss integration ! err_norm=0.0 ALLOCATE(xgauss(ngauss), wgauss(ngauss)) DO i=1,nx CALL get_gauss(splx, ngauss, i, xgauss, wgauss) CALL gridval(splx, xgauss, solcal(1:ngauss), 0) solana(1:ngauss) = COS(mmode*xgauss) err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), & & solana(1:ngauss)-solcal(1:ngauss)) err_norm = err_norm + SUM(wgauss*err(1:ngauss)) END DO err_norm = SQRT(err_norm) WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm ! !=========================================================================== ! 4.0 Test of getrow/putrow, getcol/putcol and mcopy ! CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) ALLOCATE(arow(nrank), newsol(nrank)) ! DO i=1,nrank CALL getrow(mat, i, arow) CALL putrow(newmat, i, arow) END DO CALL factor(newmat) CALL bsolve(newmat, rhs, newsol) WRITE(*,'(/a)') 'putrow/getrow ...' WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) ! DO i=1,nrank CALL getcol(mat, i, arow) CALL putcol(newmat, i, arow) END DO CALL factor(newmat) CALL bsolve(newmat, rhs, newsol) WRITE(*,'(/a)') 'putcol/getcol ...' WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) ! CALL clear_mat(newmat) CALL mcopy(mat, newmat) WRITE(*,'(/a)') 'mcopy ...' newmat%val = (1000.0d0,0.0d0)*newmat%val CALL factor(newmat) CALL bsolve(newmat, rhs, newsol) WRITE(*,'(a, i3)') 'Backsolve the system', newmat%matid WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol) ! CALL bsolve(mat, rhs, sol) WRITE(*,'(a, i3)') 'Backsolve the system', mat%matid WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol) ! CALL bsolve(newmat, rhs, newsol) WRITE(*,'(a, i3)') 'Backsolve the system', newmat%matid WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol) !=========================================================================== ! 9.0 Clean up ! DEALLOCATE(x, solcal, solana, err) DEALLOCATE(xgauss, wgauss) DEALLOCATE(bcoef) DEALLOCATE(xgrid) DEALLOCATE(rhs, sol) DEALLOCATE(arow, newsol) CALL destroy(mat) CALL destroy(newmat) CALL destroy_sp(splx) CALL closef(fid) END PROGRAM main diff --git a/examples/pde1dp_mod.f90 b/examples/pde1dp_mod.f90 index 0a508ac..7c8f6ba 100644 --- a/examples/pde1dp_mod.f90 +++ b/examples/pde1dp_mod.f90 @@ -1,225 +1,225 @@ !> !> @file pde1dp_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE pde1dp_mod ! USE bsplines USE matrix IMPLICIT NONE DOUBLE PRECISION, ALLOCATABLE :: bcoef(:) TYPE(spline1d), SAVE :: splx ! CONTAINS SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function defined in FDIST ! DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ SUBROUTINE dismat(spl, mat) ! ! Assembly FE matrix (with periodic BC) mat using spline spl ! TYPE(spline1d), INTENT(in) :: spl TYPE(periodic_mat), INTENT(inout) :: mat INTEGER :: dim, nx, nidbas, ngauss INTEGER :: i, igauss, iterm, iw, jt, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, dim, nx, nidbas) ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative ! ! Weak form ! kterms = mat%mat%nterms ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) CALL coefeq(xgauss(igauss), idert, iderw, coefs) DO iterm=1,kterms DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,idert(iterm)) * coefs(iterm) * & & fun(iw,iderw(iterm)) * wgauss(igauss) irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC jcol=MODULO(i+jt-1,nx) + 1 CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) DEALLOCATE(iderw, idert, coefs) END SUBROUTINE dismat ! SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) ! ! Mass matrix ! c(1) = 1.0d0 idt(1) = 0 idw(1) = 0 END SUBROUTINE coefeq !+++ SUBROUTINE disrhs(spl, rhs) ! ! Assenbly the RHS using spline spl ! TYPE(spline1d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: dim, nrank, nx, nidbas, ngauss INTEGER :: i, igauss, it, irow DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! nrank = SIZE(rhs) CALL get_dim(spl, dim, nx, nidbas) ! ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! rhs(:) = 0.0d0 ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) contrib = wgauss(igauss) * rhseq(xgauss(igauss)) DO it=0,nidbas irow=MODULO(i+it-1,nx) + 1 ! Periodic BC rhs(irow) = rhs(irow) + contrib*fun(it,1) END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) END SUBROUTINE disrhs ! DOUBLE PRECISION FUNCTION rhseq(x) DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION :: xarr(1), farr(1) INTEGER, SAVE :: icall =0 xarr(1) = x IF( icall.EQ.0 ) THEN icall = icall+1 CALL gridval(splx, xarr, farr, 0, bcoef) ELSE CALL gridval(splx, xarr, farr, 0) END IF rhseq = farr(1) END FUNCTION rhseq END MODULE pde1dp_mod diff --git a/examples/pde2d.f90 b/examples/pde2d.f90 index b881711..f46883f 100644 --- a/examples/pde2d.f90 +++ b/examples/pde2d.f90 @@ -1,409 +1,409 @@ !> !> @file pde2d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 2d PDE using splines: ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! USE bsplines USE matrix USE futils USE conmat_mod ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms LOGICAL :: nlppform, nlconmat INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol TYPE(spline2d) :: splxy TYPE(gbmat) :: mat ! CHARACTER(len=128) :: file='pde2d.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 INTEGER :: nits=500 ! INTERFACE SUBROUTINE dismat(spl, mat) USE bsplines USE matrix TYPE(spline2d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat END SUBROUTINE dismat SUBROUTINE disrhs(mbess, spl, rhs) USE bsplines INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) END SUBROUTINE disrhs SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist SUBROUTINE ibcmat(mat, ny) USE matrix TYPE(gbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny END SUBROUTINE ibcmat SUBROUTINE ibcrhs(rhs, ny) DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny END SUBROUTINE ibcrhs !!$ SUBROUTINE coefeq_poisson(x, y, idt, idw, c) !!$ DOUBLE PRECISION, INTENT(in) :: x, y !!$ INTEGER, INTENT(out) :: idt(:,:), idw(:,:) !!$ DOUBLE PRECISION, INTENT(out) :: c(:) !!$ END SUBROUTINE coefeq_poisson END INTERFACE ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlconmat, & & coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlconmat = .TRUE. ! Use CONMAT instead of DISMAT coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) !!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots !!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals ku = kl ! Number of super-diagnonals WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku ! CALL init(kl, ku, nrank, nterms, mat) t0 = seconds() IF(nlconmat) THEN CALL conmat(splxy, mat, coefeq_poisson) ELSE CALL dismat(splxy, mat) END IF tmat = seconds() - t0 CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice') ALLOCATE(arr(nrank)) ! ! BC on Matrix ! IF(nrank.LT.100) & & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(kl+ku+1,:) CALL ibcmat(mat, ny) IF(nrank.LT.100) & & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(kl+ku+1,:) ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, ny) CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') CALL putmat(fid, '/MAT1', mat, 'GB matrice with BC') WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL factor(mat) tfact = seconds() - t0 gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 t0 = seconds() CALL bsolve(mat, rhs, sol) ! ! Backtransform of solution ! sol(1:ny-1) = sol(ny) ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) ! tsolv = seconds() - t0 gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) !=========================================================================== ! 9.0 Epilogue ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) CALL destroy_sp(splxy) CALL destroy(mat) ! CALL closef(fid) !=========================================================================== ! CONTAINS SUBROUTINE coefeq_poisson(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq_poisson ! !+++ FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/pde2d_mumps.f90 b/examples/pde2d_mumps.f90 index 772fce2..8a2e120 100644 --- a/examples/pde2d_mumps.f90 +++ b/examples/pde2d_mumps.f90 @@ -1,937 +1,937 @@ !> !> @file pde2d_mumps.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Solving the 2d PDE using splines and MUMPS non-symmetric and symmetric ! matrix ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! MODULE pde2d_mumps_mod USE bsplines USE mumps_bsplines IMPLICIT NONE CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(mumps_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) ! INTEGER :: istart, iend !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) !!$ WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 !!$ WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! ! ! Matrix partition ! istart = mat%istart iend = mat%iend !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) left2 = j CALL basfun(xg2, spl%sp2, fun2, left2) ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 IF( irow.GE.istart .AND. irow.LE.iend) THEN DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 !------------- contrib = 0.0d0 DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,ig1,ig2) * & & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) END DO END DO END DO CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END IF END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(mumps_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! nrank = mat%rank !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! ALLOCATE(zsum(nrank), arr(nrank)) zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) zsum(:) = zsum(:) + arr(:) END DO IF(mat%nlsym) THEN zsum(ny) = SUM(zsum(1:ny)) ! using symmetry END IF CALL putrow(mat, ny, zsum) ! ! The horizontal sum on the NY-th column ! IF( .NOT.mat%nlsym) THEN zsum = 0.0d0 DO j=1,ny arr = 0.0d0 CALL getcol(mat, j, arr) zsum(ny:) = zsum(ny:) + arr(ny:) END DO CALL putcol(mat, ny, zsum) END IF ! ! The away operator ! IF( .NOT.mat%nlsym) THEN DO j = 1,ny-1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO END IF ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO DEALLOCATE(zsum) DEALLOCATE(arr) ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! ALLOCATE(arr(nrank)) IF( .NOT.mat%nlsym) THEN DO j = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO END IF ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO DEALLOCATE(arr) !=========================================================================== ! 9.0 Epilogue ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE write_matrix(lun, mat, comm) ! ! Write the distribute matrix to (single) file ! INCLUDE 'mpif.h' ! INTEGER :: lun TYPE(mumps_mat) :: mat INTEGER, INTENT(in) :: comm ! INTEGER :: nprocs, me, ierr INTEGER :: nrank, nnz, nnz_loc, istart, iend, nloc INTEGER :: i INTEGER, ALLOCATABLE :: displs(:), nlocs(:), cols(:), irow(:) DOUBLE PRECISION, ALLOCATABLE :: val(:) ! CALL mpi_comm_size(comm, nprocs, ierr) CALL mpi_comm_rank(comm, me, ierr) ! IF(.NOT.ASSOCIATED(mat%val)) THEN WRITE(*,'(a)') 'WRITE_MATRIX: MUMPS matrix does not exist!' STOP END IF ! ! Info on matrix ! !!$ IF(me.EQ.0) THEN !!$ s0 = mat%nnz_start-1 !!$ DO i=mat%istart,mat%iend !!$ s=mat%irow(i)-s0 !!$ e=mat%irow(i+1)-1-s0 !!$ WRITE(*,'(a,i6,1pe12.3)') 'nnz, Sum(val)', e-s+1, SUM(mat%val(s:e)) !!$ END DO !!$ END IF ! nrank = mat%rank nnz_loc = mat%nnz_loc nnz = mat%nnz istart = mat%istart iend = mat%iend ! IF(me.EQ.0) THEN WRITE(lun) nrank, nnz END IF ! ! Write irow ! nloc = iend-istart+1 IF (me.EQ.0) THEN ALLOCATE(displs(0:nprocs)) ALLOCATE(nlocs(0:nprocs-1)) ALLOCATE(irow(nrank+1)) END IF CALL mpi_gather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, 0, comm, ierr) IF(me.EQ.0) THEN displs(0) = 0 DO i=0,nprocs-1 displs(i+1) = displs(i)+nlocs(i) END DO END IF CALL mpi_gatherv(mat%irow, nloc, MPI_INTEGER, & & irow, nlocs, displs, MPI_INTEGER, 0, comm, ierr) IF(me.EQ.0) THEN irow(nrank+1) = nnz+1 WRITE(lun) irow DEALLOCATE(irow) END IF ! ! Write cols ! nloc = mat%nnz_loc IF(me.EQ.0) THEN ALLOCATE(cols(nnz)) END IF CALL mpi_gather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, 0, comm, ierr) IF(me.EQ.0) THEN displs(0) = 0 DO i=0,nprocs-1 displs(i+1) = displs(i)+nlocs(i) END DO END IF CALL mpi_gatherv(mat%cols, nloc, MPI_INTEGER, & & cols, nlocs, displs, MPI_INTEGER, 0, comm, ierr) IF(me.EQ.0) THEN WRITE(lun) cols DEALLOCATE(cols) END IF ! ! Write val (Same data partition as "cols" ! IF(me.EQ.0) THEN ALLOCATE(val(nnz)) END IF CALL mpi_gatherv(mat%val, nloc, MPI_DOUBLE_PRECISION, & & val, nlocs, displs, MPI_DOUBLE_PRECISION, 0, comm, ierr) IF(me.EQ.0) THEN WRITE(lun) val DEALLOCATE(val) END IF ! ! Epilogue ! IF(me.EQ.0) THEN DEALLOCATE(displs, nlocs) END IF END SUBROUTINE write_matrix !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_mumps_mod PROGRAM main USE pde2d_mumps_mod USE futils ! IMPLICIT NONE INCLUDE 'mpif.h' INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms LOGICAL :: debug_mumps=.FALSE. LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol TYPE(spline2d) :: splxy TYPE(mumps_mat) :: mat ! CHARACTER(len=128) :: file='pde2d_mumps.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1 DOUBLE PRECISION :: tconv, treord INTEGER :: nits=100 LOGICAL :: nlsym, nlpos LOGICAL :: nlmetis, nlforce_zero LOGICAL :: nlserial ! INTEGER :: ierr, me INTEGER(kind=8) :: nzfact DOUBLE PRECISION :: mem_loc ! CHARACTER(len=128) :: matfile='' ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlsym, nlpos,& & nlmetis, nlforce_zero, nlserial, coefx, coefy, matfile, & & debug_mumps !=========================================================================== ! 1.0 Prologue ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlsym = .FALSE. ! Symmetric or unsymmetric matrix nlpos = .TRUE. ! Positive definite matrix nlmetis = .FALSE. ! Use metis ordering or minimum degree nlserial = .TRUE. ! Serial. The solver is duplicated on each process. Otherwise ! the solver matrix is partionned among the processes. nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function matfile = '' ! Save matrix file to matfile if not empty ! IF(me.EQ.0) THEN READ(*,newrun) WRITE(*,newrun) END IF CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlpos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlmetis, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(debug_mumps, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlserial, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! ! Create hdf5 file ! IF(me.EQ.0) THEN CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) END IF !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix IF(me.EQ.0) WRITE(*,'(a,i8)') 'nrank', nrank ! IF(nlserial) THEN ! The solver is duplicated CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero,& & nlsym=nlsym, nlpos=nlpos) ELSE ! The solver is distributed CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero,& & nlsym=nlsym, nlpos=nlpos, comm_in=MPI_COMM_WORLD) END IF mat%mumps_par%ICNTL(23) = 400 IF(me.EQ.0) THEN WRITE(*,'(a/(20i6))') 'ICNTL =', mat%mumps_par%ICNTL END IF WRITE(*,'(a,i4.4,a,3i16)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, & & mat%iend-mat%istart+1 ! CALL dismat(splxy, mat) ! ! BC on Matrix ! CALL ibcmat(mat, ny) tmat = seconds() - t0 ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, ny) ! mem_loc = mem() CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT') IF(me.EQ.0) THEN CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') END IF !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL to_mat(mat) WRITE(*,'(a/(10i6))') 'MInmax IRN_loc', MINVAL(mat%mumps_par%IRN_loc), MAXVAL(mat%mumps_par%IRN_loc) WRITE(*,'(a/(10i6))') 'JCN_loc', MINVAL(mat%mumps_par%JCN_loc), MAXVAL(mat%mumps_par%JCN_loc) tconv = seconds() -t0 CALL minmax_i(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz') IF(me.EQ.0) THEN WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz END IF ! ! Write Matrix and RHS to file ! IF(LEN_TRIM(matfile).GT.0) THEN IF(me.EQ.0) THEN OPEN(99, file=matfile, form='unformatted') END IF CALL write_matrix(99, mat, MPI_COMM_WORLD) END IF ! t0 = seconds() CALL reord_mat(mat, nlmetis=nlmetis, debug=debug_mumps) treord = seconds() - t0 ! t0 = seconds() CALL numfact(mat, debug=debug_mumps) tfact = seconds() - t0 ! ! mem_loc = mem() CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after FACTOR') IF(me.EQ.0) THEN nzfact = mat%mumps_par%INFOG(29) IF(nzfact<0) THEN nzfact = -nzfact*1000000 END IF WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',nzfact WRITE(*,'(a,f12.2)') 'Number of factorization MFLOPS = ',& & mat%mumps_par%RINFOG(3)/1.e6 END IF gflops1 = mat%mumps_par%RINFOG(3) / tfact / 1.d9 ! CALL bsolve(mat, rhs, sol, debug=debug_mumps) ! IF(LEN_TRIM(matfile).GT.0) THEN IF(me.EQ.0) THEN WRITE(99) rhs WRITE(99) sol CLOSE(99) END IF END IF ! t0 = seconds() DO it=1,nits ! nits iterations for timing CALL bsolve(mat, rhs, sol) sol(1:ny-1) = sol(ny) END DO tsolv = (seconds() - t0)/REAL(nits) ! mem_loc = mem() CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE') ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') END IF !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! IF(me.EQ.0) THEN ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 DEALLOCATE(solcal, solana, errsol) END IF !=========================================================================== ! 5.0 Clear the matrix and recompute ! IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...' t0 = seconds() CALL clear_mat(mat) CALL dismat(splxy, mat) CALL ibcmat(mat, ny) tmat = seconds()-t0 ! t0 = seconds() CALL numfact(mat, debug=debug_mumps) tfact = seconds()-t0 gflops1 = mat%mumps_par%RINFOG(3) / tfact / 1.d9 ! t0 = seconds() ALLOCATE(newsol(nrank)) CALL bsolve(mat, rhs, newsol) newsol(1:ny-1) = newsol(ny) tsolv = seconds()-t0 ! IF(me.EQ.0) THEN WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 END IF ! DEALLOCATE(newsol) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(bcoef) CALL destroy_sp(splxy) CALL destroy(mat) ! IF(me.EQ.0) CALL closef(fid) CALL mpi_finalize(ierr) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 ! SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat ! SUBROUTINE minmax_i(k, comm, str) CHARACTER(len=*), INTENT(in) :: str INTEGER, INTENT(in) :: k INTEGER, INTENT(in) :: comm INTEGER :: me, ierr, kmin, kmax CALL mpi_comm_rank(comm, me, ierr) CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr) CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax END IF END SUBROUTINE minmax_i ! SUBROUTINE minmax_r(x, comm, str) CHARACTER(len=*), INTENT(in) :: str DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: comm INTEGER :: me, ierr DOUBLE PRECISION :: xmin, xmax CALL mpi_comm_rank(comm, me, ierr) CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr) CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax END IF END SUBROUTINE minmax_r END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/pde2d_nh.f90 b/examples/pde2d_nh.f90 index ee5a008..98607b9 100644 --- a/examples/pde2d_nh.f90 +++ b/examples/pde2d_nh.f90 @@ -1,684 +1,684 @@ !> !> @file pde2d_nh.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Solving the following 2d PDE using splines: ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), ! with BC: f(x=1,y) = cos(y) ! ! Exact solution: f(x,y) = (1-x^2) x^m cos(my) + x*cos(y) ! MODULE pde2d_nh_mod USE bsplines USE matrix IMPLICIT NONE ! LOGICAL :: nlfix CONTAINS SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms)) ! ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1)) ! ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng2), wg2(ng2)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs) DO iterm=1,kterms DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 contrib = fun1(iw1,iderw(iterm,1)) * & & fun2(iw2,iderw(iterm,2)) * & & coefs(iterm) * & & fun2(it2,idert(iterm,2)) * & & fun1(it1,idert(iterm,1)) * & & wg1(ig1) * wg2(ig2) CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! IMPLICIT NONE INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2 ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng2), wg2(ng2)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, spl) ! ! Apply BC on matrix ! IMPLICIT NONE TYPE(gbmat), INTENT(inout) :: mat TYPE(spline2d) :: spl INTEGER :: nx, ndim1, nidbas1 INTEGER :: ny, ndim2, nidbas2 INTEGER :: kl, ku, nrank, i, j INTEGER :: krow, kcol, jf DOUBLE PRECISION :: yg DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:), fun(:,:) !=========================================================================== ! 1.0 Prologue ! CALL get_dim(spl%sp1, ndim1, nx, nidbas1) CALL get_dim(spl%sp2, ndim2, ny, nidbas2) ! kl = mat%kl ku = mat%ku nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) ALLOCATE(fun(0:nidbas2,1)) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) DO j=1,ny+ku zsum(j) = zsum(j) + arr(j) END DO END DO CALL putrow(mat, ny, zsum) ! ! The horizontal sum on the NY-th column ! zsum = 0.0d0 DO j=1,ny arr = 0.0d0 CALL getcol(mat, j, arr) DO i=ny,ny+kl zsum(i) = zsum(i) + arr(i) END DO END DO CALL putcol(mat, ny, zsum) ! ! The away operator ! DO j = 1,ny-1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! i=nx+nidbas1 ! The last spline in X DO j=1,ny krow=(i-1)*ny+j IF(MODULO(nidbas2,2) .EQ. 0 .AND. nlfix) THEN yg = (spl%sp2%knots(j-1)+spl%sp2%knots(j))/2.0d0 ELSE yg = spl%sp2%knots(j-1) END IF CALL basfun(yg, spl%sp2, fun, j) arr = 0.0d0 DO jf=0,nidbas2 kcol=(i-1)*ny + MODULO(jf+j-1,ny)+1 arr(kcol) = arr(kcol)+fun(jf,1) END DO CALL putrow(mat, krow, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) DEALLOCATE(fun) ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, spl) ! ! Apply BC on RHS ! IMPLICIT NONE DOUBLE PRECISION, INTENT(inout) :: rhs(:) TYPE(spline2d) :: spl INTEGER :: nx, ndim1, nidbas1 INTEGER :: ny, ndim2, nidbas2 INTEGER :: nrank INTEGER :: i, j, k DOUBLE PRECISION :: xg, yg, zsum !=========================================================================== ! 1.0 Prologue ! CALL get_dim(spl%sp1, ndim1, nx, nidbas1) CALL get_dim(spl%sp2, ndim2, ny, nidbas2) nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! i = nx+nidbas1 ! The last spline index on x xg = spl%sp1%knots(nx) ! Right boundary radial coordinate DO j=1,ny k = (i-1)*ny + j IF(MODULO(nidbas2,2) .EQ. 0 .AND. nlfix) THEN yg = (spl%sp2%knots(j-1)+spl%sp2%knots(j))/2.0d0 ELSE yg = spl%sp2%knots(j-1) END IF rhs(k) = xg*COS(yg) END DO END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_nh_mod !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ PROGRAM main ! USE pde2d_nh_mod USE bsplines USE matrix USE futils ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol TYPE(spline2d) :: splxy TYPE(gbmat) :: mat ! CHARACTER(len=128) :: file='pde2d_nh.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 INTEGER :: nits=500 ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, coefx, coefy, nlfix !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function nlfix = .TRUE. ! Fix or not for even nidbas2 ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals ku = kl ! Number of super-diagnonals WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku ! CALL init(kl, ku, nrank, nterms, mat) CALL dismat(splxy, mat) CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice') ALLOCATE(arr(nrank)) ! ! BC on Matrix ! IF(nrank.LT.100) & & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(kl+ku+1,:) CALL ibcmat(mat, splxy) tmat = seconds() - t0 IF(nrank.LT.100) & & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(kl+ku+1,:) ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, splxy) CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') CALL putmat(fid, '/MAT1', mat, 'GB matrice with BC') WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL factor(mat) tfact = seconds() - t0 gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 t0 = seconds() CALL bsolve(mat, rhs, sol) ! ! Backtransform of solution ! sol(1:ny-1) = sol(ny) ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) ! tsolv = seconds() - t0 gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) & & + xgrid(i)*COS(ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) & & + COS(ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') CALL putarr(fid, '/errors_x', errsol, 'Errors in d/dx') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * & & SIN(mbess*ygrid(j)) & & -xgrid(i)*SIN(ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal CALL putarr(fid, '/errors_y', errsol, 'Errors in d/dy') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) !=========================================================================== ! 9.0 Epilogue ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) CALL destroy_sp(splxy) CALL destroy(mat) ! CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ !+++ diff --git a/examples/pde2d_pardiso.f90 b/examples/pde2d_pardiso.f90 index 276f727..53619d3 100644 --- a/examples/pde2d_pardiso.f90 +++ b/examples/pde2d_pardiso.f90 @@ -1,741 +1,741 @@ !> !> @file pde2d_pardiso.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Solving the 2d PDE using splines and PARDISO non-symmetric matrix ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! MODULE pde2d_pardiso_mod USE bsplines USE pardiso_bsplines IMPLICIT NONE CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(pardiso_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) left2 = j CALL basfun(xg2, spl%sp2, fun2, left2) ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 !------------- contrib = 0.0d0 DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,ig1,ig2) * & & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) END DO END DO END DO CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(pardiso_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) zsum(:) = zsum(:) + arr(:) END DO CALL putrow(mat, ny, zsum) ! ! The horizontal sum on the NY-th column ! zsum = 0.0d0 DO j=1,ny arr = 0.0d0 CALL getcol(mat, j, arr) zsum(ny:) = zsum(ny:) + arr(ny:) END DO CALL putcol(mat, ny, zsum) ! ! The away operator ! DO j = 1,ny-1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO j = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE coefeq_poisson(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq_poisson !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_pardiso_mod PROGRAM main USE pde2d_pardiso_mod USE futils USE conmat_mod ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms LOGICAL :: nlppform, nlconmat INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol TYPE(spline2d) :: splxy TYPE(pardiso_mat) :: mat ! CHARACTER(len=128) :: file='pde2d_pardiso.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 DOUBLE PRECISION :: tconv, treord INTEGER :: nits=100 LOGICAL :: nlmetis, nlforce_zero ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, & & nlforce_zero, nlconmat, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlmetis = .FALSE. ! Use metis ordering or minimum degree nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele nlconmat = .TRUE. ! Use CONMAT instead of DISMAT coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) !!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots !!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix WRITE(*,'(a,i8)') 'nrank', nrank ! CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero) t0 = seconds() IF(nlconmat) THEN CALL conmat(splxy, mat, coefeq_poisson) ELSE CALL dismat(splxy, mat) END IF tmat = seconds() - t0 ALLOCATE(arr(nrank)) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr END IF ! ! BC on Matrix ! WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) CALL ibcmat(mat, ny) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr WRITE(*,'(a)') 'Last rows' DO i=nrank-ny,nrank CALL getrow(mat, i, arr) WRITE(*,'(10(1pe12.3))') arr END DO END IF ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, ny) ! CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL to_mat(mat) tconv = seconds() -t0 WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() ! t0 = seconds() CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.) CALL putmat(fid, '/MAT', mat) treord = seconds() - t0 ! t0 = seconds() CALL numfact(mat, debug=.FALSE.) tfact = seconds() - t0 WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) WRITE(*,'(a,i12)') 'Number of factorization MFLOPS = ',mat%p%iparm(19) gflops1 = mat%p%iparm(19) / tfact / 1.d3 ! CALL bsolve(mat, rhs, sol, debug=.FALSE.) t0 = seconds() DO it=1,nits ! nits iterations for timing CALL bsolve(mat, rhs, sol) sol(1:ny-1) = sol(ny) END DO WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() tsolv = (seconds() - t0)/REAL(nits) ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 !=========================================================================== ! 5.0 Clear the matrix and recompute ! WRITE(*,'(/a)') 'Recompute the solver ...' CALL clear_mat(mat) t0 = seconds() IF(nlconmat) THEN CALL conmat(splxy, mat, coefeq_poisson) ELSE CALL dismat(splxy, mat) END IF tmat = seconds()-t0 CALL ibcmat(mat, ny) ! t0 = seconds() CALL numfact(mat, debug=.FALSE.) tfact = seconds()-t0 gflops1 = mat%p%iparm(19) / tfact / 1.d3 ! t0 = seconds() ALLOCATE(newsol(nrank)) CALL bsolve(mat, rhs, newsol) newsol(1:ny-1) = newsol(ny) tsolv = seconds()-t0 ! WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 ! DEALLOCATE(newsol) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) CALL destroy_sp(splxy) CALL destroy(mat) ! CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/pde2d_pb.f90 b/examples/pde2d_pb.f90 index e764273..bd18342 100644 --- a/examples/pde2d_pb.f90 +++ b/examples/pde2d_pb.f90 @@ -1,696 +1,696 @@ !> !> @file pde2d_pb.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 2d PDE using splines: ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! USE bsplines USE matrix USE conmat_mod ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms LOGICAL :: nlppform, nlconmat INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol TYPE(spline2d) :: splxy TYPE(pbmat) :: mat ! DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 INTEGER :: nits=500 ! INTERFACE SUBROUTINE dismat(spl, mat) USE bsplines USE matrix TYPE(spline2d), INTENT(in) :: spl TYPE(pbmat), INTENT(inout) :: mat END SUBROUTINE dismat SUBROUTINE disrhs(mbess, spl, rhs) USE bsplines INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) END SUBROUTINE disrhs SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist SUBROUTINE ibcmat(mat, ny) USE matrix TYPE(pbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny END SUBROUTINE ibcmat SUBROUTINE ibcrhs(rhs, ny) DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny END SUBROUTINE ibcrhs SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlconmat, & & coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlconmat = .TRUE. ! Use CONMAT instead of DISMAT coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) !!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots !!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals ku = kl ! Number of super-diagnonals WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku ! CALL init(ku, nrank, nterms, mat) t0 = seconds() IF(nlconmat) THEN CALL conmat(splxy, mat, coefeq) ELSE CALL dismat(splxy, mat) END IF tmat = seconds() - t0 ALLOCATE(arr(nrank)) ! ! BC on Matrix ! IF(nrank.LT.100) & & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(ku+1,:) CALL ibcmat(mat, ny) IF(nrank.LT.100) & & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(ku+1,:) ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, ny) WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL factor(mat) tfact = seconds() - t0 gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 t0 = seconds() CALL bsolve(mat, rhs, sol) ! ! Backtransform of solution ! sol(1:ny-1) = sol(ny) ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) ! tsolv = seconds() - t0 gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM at first call to gridval IF(nlppform) THEN CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) END IF ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) !=========================================================================== ! 9.0 Epilogue ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) CALL destroy_sp(splxy) CALL destroy(mat) ! !=========================================================================== ! CONTAINS SUBROUTINE prntmat(str, a) DOUBLE PRECISION, DIMENSION(:,:) :: a CHARACTER(len=*) :: str INTEGER :: i WRITE(*,'(a)') TRIM(str) DO i=1,SIZE(a,1) WRITE(*,'(10f8.1)') a(i,:) END DO END SUBROUTINE prntmat FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! USE bsplines USE matrix IMPLICIT NONE TYPE(spline2d), INTENT(in) :: spl TYPE(pbmat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) left2 = j CALL basfun(xg2, spl%sp2, fun2, left2) ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 !------------- contrib = 0.0d0 DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,ig1,ig2) * & & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) END DO END DO END DO CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! USE bsplines IMPLICIT NONE INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2 ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! USE matrix IMPLICIT NONE TYPE(pbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: kl, ku, nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) INTEGER :: i0, ii INTEGER :: i0_arr(ny) !=========================================================================== ! 1.0 Prologue ! ku = mat%ku kl = ku nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) ! i0 = nrank - ku WRITE(*,'(a,i6)') 'Estimated i0', i0 DO i=1,ny CALL getcol(mat, nrank-ny+i, arr) DO ii=1,nrank i0_arr(i)=ii IF(arr(ii) .NE. 0.0d0) EXIT END DO END DO !!$ WRITE(*,'(a/(10i6))') 'i0_arr', i0_arr ! !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) DO j=1,ny+ku zsum(j) = zsum(j) + arr(j) END DO END DO ! zsum(ny) = SUM(zsum(1:ny)) ! using symmetry CALL putrow(mat, ny, zsum) ! ! The away operator ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO i = nrank, nrank-ny+1, -1 CALL getcol(mat, i, arr) arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !+++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! IMPLICIT NONE DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++ SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq diff --git a/examples/pde2d_petsc.f90 b/examples/pde2d_petsc.f90 index 4378b3f..54cd8db 100644 --- a/examples/pde2d_petsc.f90 +++ b/examples/pde2d_petsc.f90 @@ -1,795 +1,795 @@ !> !> @file pde2d_petsc.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Solving the 2d PDE using splines and PETSC matrix ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! MODULE pde2d_petsc_mod USE bsplines USE petsc_bsplines IMPLICIT NONE CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(petsc_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) ! INTEGER :: istart, iend !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) !!$ WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 !!$ WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! ! ! Matrix partition ! istart = mat%istart iend = mat%iend !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) left2 = j CALL basfun(xg2, spl%sp2, fun2, left2) ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 IF( irow.GE.istart .AND. irow.LE.iend) THEN DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 !------------- contrib = 0.0d0 DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,ig1,ig2) * & & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) END DO END DO END DO CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END IF END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(petsc_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! nrank = mat%rank !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! ALLOCATE(zsum(nrank), arr(nrank)) zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) zsum(:) = zsum(:) + arr(:) END DO IF(mat%nlsym) THEN zsum(ny) = SUM(zsum(1:ny)) ! using symmetry END IF CALL putrow(mat, ny, zsum) ! ! The horizontal sum on the NY-th column ! IF( .NOT.mat%nlsym) THEN zsum = 0.0d0 DO j=1,ny arr = 0.0d0 CALL getcol(mat, j, arr) zsum(ny:) = zsum(ny:) + arr(ny:) END DO CALL putcol(mat, ny, zsum) END IF ! ! The away operator ! IF( .NOT.mat%nlsym) THEN DO j = 1,ny-1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO END IF ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO DEALLOCATE(zsum) DEALLOCATE(arr) ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! ALLOCATE(arr(nrank)) IF( .NOT.mat%nlsym) THEN DO j = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO END IF ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO DEALLOCATE(arr) !=========================================================================== ! 9.0 Epilogue ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_petsc_mod PROGRAM main USE pde2d_petsc_mod USE futils ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms INTEGER :: nitmax=10000, nits, nits0, ntrials=0 DOUBLE PRECISION :: rtol=1.e-9 LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol DOUBLE PRECISION, ALLOCATABLE :: row_sum(:), row(:) TYPE(spline2d) :: splxy TYPE(petsc_mat) :: mat ! CHARACTER(len=128) :: file='pde2d_petsc.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: mem, dopla DOUBLE PRECISION :: t0, tmat, tsolv, tsolv0, tgrid, gflops1, gflops2 LOGICAL :: nlsym LOGICAL :: nlforce_zero ! INTEGER :: ierr, me INTEGER(kind=8) :: nzfact INTEGER :: nnz_loc, nnz DOUBLE PRECISION :: mem_loc, mem_min, mem_max ! CHARACTER(len=128) :: matfile='mat.dat' logical :: file_exist ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlsym,& & nlforce_zero, coefx, coefy, nitmax, rtol, ntrials, & & matfile !=========================================================================== ! 1.0 Prologue ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlsym = .FALSE. ! Symmetric or unsymmetric matrix nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function nitmax = 10000 ! Max number ofviterations rtol = 1.e-9 ! Relative tolerance ntrials = 0 ! Run ntrials solution steps after setup matfile = '' ! Save matrix file to matfile if not empty ! IF(me.EQ.0) THEN READ(*,newrun) WRITE(*,newrun) END IF CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nitmax, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ntrials, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! ! Create hdf5 file ! IF(me.EQ.0) THEN CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) END IF !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = mpi_wtime() CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix IF(me.EQ.0) WRITE(*,'(a,i12 )') 'nrank', nrank ! CALL init(nrank, nterms, mat, comm=MPI_COMM_WORLD) ! INQUIRE(file=TRIM(matfile), exist=file_exist) IF( file_exist ) THEN t0 = mpi_wtime() CALL load_mat(mat, matfile) tmat = mpi_wtime()-t0 if(me.eq.0) WRITE(*,'(a,1pe12.3)') 'Mat read time (s) ', tmat ELSE t0 = mpi_wtime() CALL dismat(splxy, mat) CALL ibcmat(mat, ny) ! !!$ ALLOCATE(row_sum(mat%istart:mat%iend)) !!$ ALLOCATE(row(mat%rank)) !!$ DO i=mat%istart,mat%iend !!$ row = 0.0d0 !!$ CALL getrow(mat, i, row) !!$ row_sum(i) = SUM(row) !!$ END DO !!$ WRITE(*,'(a,i3.3,a,(10(1pe12.3)))') 'PE', me, ': row_sum', row_sum ! CALL to_mat(mat) ! !!$ DO i=mat%istart,mat%iend !!$ row = 0.0d0 !!$ CALL getrow(mat, i, row) !!$ row_sum(i) = SUM(row) !!$ END DO !!$ WRITE(*,'(a,i3.3,a,(10(1pe12.3)))') 'PE', me, ': row_sum(after)', row_sum CALL save_mat(mat, matfile) tmat = mpi_wtime() - t0 IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat construction time (s) ', tmat END IF ! IF(me.EQ.0) THEN WRITE(*,'(a,2i16)') 'Mat rank, nnz', mat%rank, mat%nnz END IF ! ! RHS assembly ! t0=mpi_wtime() ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, ny) ! mem_loc = mem() CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT') IF(me.EQ.0) THEN CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') END IF IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'RHS construction time (s) ', mpi_wtime()-t0 !=========================================================================== ! 3.0 Solve the dicretized system ! CALL minmax_i8(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz') IF(me.EQ.0) THEN WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz END IF ! t0 = mpi_wtime() CALL bsolve(mat, rhs, sol, rtol, nitmax, nits0) tsolv0 = mpi_wtime() - t0 IF(me.EQ.0) WRITE(*,'(a,1pe12.3,i8)') 'Solve+setup time(s) and nits ', tsolv0, nits0 ! IF(ntrials .GT. 0) THEN t0 = mpi_wtime() DO it=1,ntrials ! ntrials iterations for timing sol = 0.0d0 CALL bsolve(mat, rhs, sol, rtol, nitmax, nits) sol(1:ny-1) = sol(ny) END DO tsolv = (mpi_wtime() - t0)/REAL(ntrials) IF(me.EQ.0) WRITE(*,'(a,1pe12.3,i8)') 'Solve time(s) and nits ', tsolv, nits END IF ! mem_loc = mem() CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE') ! CALL destroy(mat) ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO ! mem_loc = mem() CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after setting bcoef') !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! IF(me.EQ.0) THEN ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! WRITE(*,'(/a)') '*** Checking solutions' errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) ! DEALLOCATE(solcal, solana, errsol) END IF !!$!=========================================================================== !!$! 5.0 Clear the matrix and recompute !!$! !!$ IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...' !!$ t0 = mpi_wtime()() !!$ CALL clear_mat(mat) !!$ CALL dismat(splxy, mat) !!$ CALL ibcmat(mat, ny) !!$ tmat = mpi_wtime()()-t0 !!$! !!$ t0 = mpi_wtime()() !!$ CALL numfact(mat, debug=.FALSE.) !!$ tfact = mpi_wtime()()-t0 !!$ gflops1 = mat%petsc_par%RINFOG(3) / tfact / 1.d9 !!$! !!$ t0 = mpi_wtime()() !!$ ALLOCATE(newsol(nrank)) !!$ CALL bsolve(mat, rhs, newsol) !!$ newsol(1:ny-1) = newsol(ny) !!$ tsolv = mpi_wtime()()-t0 !!$! !!$ IF(me.EQ.0) THEN !!$ WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) !!$ WRITE(*,'(/a)') '---' !!$ WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat !!$ WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact !!$ WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv !!$ WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv !!$ WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 !!$ END IF !!$! !!$ DEALLOCATE(newsol) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(bcoef) !!$ CALL destroy(mat) CALL destroy_sp(splxy) ! IF(me.EQ.0) CALL closef(fid) CALL mpi_finalize(ierr) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 ! SUBROUTINE minmax_i(k, comm, str) CHARACTER(len=*), INTENT(in) :: str INTEGER, INTENT(in) :: k INTEGER, INTENT(in) :: comm INTEGER :: me, ierr, kmin, kmax CALL mpi_comm_rank(comm, me, ierr) CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr) CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax END IF END SUBROUTINE minmax_i ! SUBROUTINE minmax_i8(k, comm, str) CHARACTER(len=*), INTENT(in) :: str INTEGER(8), INTENT(in) :: k INTEGER, INTENT(in) :: comm INTEGER :: me, ierr INTEGER(8) :: kmin, kmax CALL mpi_comm_rank(comm, me, ierr) CALL mpi_reduce(k, kmin, 1, MPI_INTEGER8, MPI_MIN, 0, comm, ierr) CALL mpi_reduce(k, kmax, 1, MPI_INTEGER8, MPI_MAX, 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax END IF END SUBROUTINE minmax_i8 !! SUBROUTINE minmax_r(x, comm, str) CHARACTER(len=*), INTENT(in) :: str DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: comm INTEGER :: me, ierr DOUBLE PRECISION :: xmin, xmax CALL mpi_comm_rank(comm, me, ierr) CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr) CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax END IF END SUBROUTINE minmax_r END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/pde2d_pwsmp.f90 b/examples/pde2d_pwsmp.f90 index 1cd7592..7e001a3 100644 --- a/examples/pde2d_pwsmp.f90 +++ b/examples/pde2d_pwsmp.f90 @@ -1,776 +1,776 @@ !> !> @file pde2d_pwsmp.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Solving the 2d PDE using splines and WSMP non-symmetric matrix ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! MODULE pde2d_pwsmp_mod USE bsplines USE pwsmp_bsplines IMPLICIT NONE CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(wsmp_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) ! INTEGER :: istart, iend !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! ! ! Matrix partition ! istart = mat%istart iend = mat%iend !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) left2 = j CALL basfun(xg2, spl%sp2, fun2, left2) ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 IF( irow.GE.istart .AND. irow.LE.iend) THEN DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 !------------- contrib = 0.0d0 DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,ig1,ig2) * & & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) END DO END DO END DO CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END IF END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(wsmp_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) zsum(:) = zsum(:) + arr(:) END DO CALL putrow(mat, ny, zsum) ! ! The horizontal sum on the NY-th column ! IF( .NOT.mat%nlsym) THEN zsum = 0.0d0 DO j=1,ny arr = 0.0d0 CALL getcol(mat, j, arr) zsum(ny:) = zsum(ny:) + arr(ny:) END DO CALL putcol(mat, ny, zsum) END IF ! ! The away operator ! IF( .NOT.mat%nlsym) THEN DO j = 1,ny-1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO END IF ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO j = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_pwsmp_mod PROGRAM main USE pde2d_pwsmp_mod USE futils ! IMPLICIT NONE INCLUDE 'mpif.h' INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol TYPE(spline2d) :: splxy TYPE(wsmp_mat) :: mat ! CHARACTER(len=128) :: file='pde2d_wsmp.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 DOUBLE PRECISION :: tconv, treord INTEGER :: nits=100 LOGICAL :: nlsym, nlforce_zero ! INTEGER :: ierr, me, nprocs DOUBLE PRECISION :: mem_loc, mem_min, mem_max ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, & & nlsym, nlforce_zero, coefx, coefy !=========================================================================== ! 1.0 Prologue ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) CALL mpi_comm_size(MPI_COMM_WORLD, nprocs, ierr) ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlsym = .TRUE. ! Symmetric matrix or not nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! IF(me.EQ.0) THEN READ(*,newrun) WRITE(*,newrun) END IF CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! ! Create hdf5 file ! if(me.eq.0) then CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) end if !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix IF(me.EQ.0) THEN WRITE(*,'(a,i8,a,i4)') 'nrank =', nrank, ' nprocs =', nprocs END IF ! CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero, nlsym=nlsym, & & comm_in=MPI_COMM_WORLD) WRITE(*,'(a,i4.4,a,3i16)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, & & mat%iend-mat%istart+1 ! CALL dismat(splxy, mat) ! ! BC on Matrix ! CALL ibcmat(mat, ny) tmat = seconds() - t0 ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, ny) ! mem_loc = mem() CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT') IF(me.EQ.0) THEN CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') END IF !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL to_mat(mat) tconv = seconds() -t0 CALL minmax_i(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz') IF(me.EQ.0) THEN WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz END IF ! t0 = seconds() CALL reord_mat(mat) treord = seconds() - t0 ! t0 = seconds() CALL numfact(mat) tfact = seconds() - t0 ! mem_loc = mem() CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after FACTOR') ! IF(me.EQ.0) THEN WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24) WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9 END IF gflops1 = mat%p%dparm(23) / tfact / 1.d9 ! CALL bsolve(mat, rhs, sol) ! t0 = seconds() DO it=1,nits ! nits iterations for timing CALL bsolve(mat, rhs, sol) sol(1:ny-1) = sol(ny) END DO tsolv = (seconds() - t0)/REAL(nits) ! mem_loc = mem() CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE') ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') END IF !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method (only on proc 0) ! IF(me.EQ.0) THEN ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 DEALLOCATE(solcal) DEALLOCATE(solana) DEALLOCATE(errsol) END IF !=========================================================================== ! 5.0 Clear the matrix and recompute ! IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...' t0 = seconds() CALL clear_mat(mat) CALL dismat(splxy, mat) CALL ibcmat(mat, ny) tmat = seconds()-t0 ! t0 = seconds() !!$ CALL numfact(mat) CALL factor(mat, nlreord=.FALSE.) tfact = seconds()-t0 gflops1 = mat%p%dparm(23) / tfact / 1.d9 ! t0 = seconds() ALLOCATE(newsol(nrank)) CALL bsolve(mat, rhs, newsol) newsol(1:ny-1) = newsol(ny) tsolv = seconds()-t0 ! IF(me.EQ.0) THEN WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 END IF ! DEALLOCATE(newsol) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(bcoef) DEALLOCATE(xgrid, rhs, sol) 9999 CONTINUE CALL mpi_barrier(MPI_COMM_WORLD, ierr) CALL destroy_sp(splxy) CALL destroy(mat) ! IF(me.EQ.0) CALL closef(fid) CALL mpi_finalize(ierr) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat ! SUBROUTINE minmax_i(k, comm, str) CHARACTER(len=*), INTENT(in) :: str INTEGER, INTENT(in) :: k INTEGER, INTENT(in) :: comm INTEGER :: me, ierr, kmin, kmax CALL mpi_comm_rank(comm, me, ierr) CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr) CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax END IF END SUBROUTINE minmax_i ! SUBROUTINE minmax_r(x, comm, str) CHARACTER(len=*), INTENT(in) :: str DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: comm INTEGER :: me, ierr DOUBLE PRECISION :: xmin, xmax CALL mpi_comm_rank(comm, me, ierr) CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr) CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax END IF END SUBROUTINE minmax_r END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/pde2d_sym_pardiso.f90 b/examples/pde2d_sym_pardiso.f90 index 47c7cda..fcd23bf 100644 --- a/examples/pde2d_sym_pardiso.f90 +++ b/examples/pde2d_sym_pardiso.f90 @@ -1,715 +1,715 @@ !> !> @file pde2d_sym_pardiso.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Solving the 2d PDE using splines and PARDISO symmetric matrix ! ! -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0 ! C(x,y) = 1 + \epsilon x cos(y) ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! MODULE pde2d_sym_pardiso_mod USE bsplines USE pardiso_bsplines IMPLICIT NONE CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, epsi, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(in) :: epsi TYPE(pardiso_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) left2 = j CALL basfun(xg2, spl%sp2, fun2, left2) ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 !------------- contrib = 0.0d0 DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,ig1,ig2) * & & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) END DO END DO END DO CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! DOUBLE PRECISION :: zcoef ! ! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy ! C(x,y) = 1 + epsilon*x*cos(y) ! zcoef = 1.0d0 + epsi*x*COS(y) ! c(1) = x*zcoef ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = zcoef/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, epsi, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! INTEGER, INTENT(in) :: mbess DOUBLE PRECISION, INTENT(in) :: epsi TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x, y, m) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(in) :: m DOUBLE PRECISION :: xm ! xm = REAL(m,8) rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + & & epsi*x*( & & ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + & & (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) & & )) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(pardiso_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) zsum(:) = zsum(:) + arr(:) END DO zsum(ny) = SUM(zsum(1:ny)) ! using symmetry CALL putrow(mat, ny, zsum) ! ! The away operator ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_sym_pardiso_mod PROGRAM main USE pde2d_sym_pardiso_mod USE futils ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol TYPE(spline2d) :: splxy TYPE(pardiso_mat) :: mat ! CHARACTER(len=128) :: file='pde2d_sym_pardiso.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 DOUBLE PRECISION :: tconv, treord INTEGER :: nits=100 LOGICAL :: nlmetis, nlforce_zero, nlpos ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, epsi, nlppform, nlmetis, & & nlforce_zero, nlpos, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem epsi = 0.5 ! Non-uniformity in the Laplacian coefficicient nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlmetis = .FALSE. ! Use metis ordering or minimum degree nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele nlpos = .TRUE. ! Matrix is positive definite coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) CALL attach(fid, '/', 'EPSI', epsi) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix WRITE(*,'(a,i8)') 'nrank', nrank ! t0 = seconds() CALL init(nrank, nterms, mat, nlsym=.TRUE.) CALL dismat(splxy, epsi, mat) ALLOCATE(arr(nrank)) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr END IF ! ! BC on Matrix ! WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) CALL ibcmat(mat, ny) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr WRITE(*,'(a)') 'Last rows' DO i=nrank-ny,nrank CALL getrow(mat, i, arr) WRITE(*,'(10(1pe12.3))') arr END DO END IF tmat = seconds() - t0 ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, epsi, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, ny) ! CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() !!$ CALL factor(mat) ! ! The call to "factor" could be split into the ! 3 following calls ! CALL to_mat(mat) tconv = seconds() -t0 WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() ! t0 = seconds() CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.) CALL putmat(fid, '/MAT', mat) treord = seconds() - t0 ! t0 = seconds() CALL numfact(mat, debug=.FALSE.) tfact = seconds() - t0 WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) WRITE(*,'(a,i12)') 'Number of factorization MFLOPS = ',mat%p%iparm(19) gflops1 = mat%p%iparm(19) / tfact / 1.d3 ! CALL bsolve(mat, rhs, sol, debug=.FALSE.) t0 = seconds() DO it=1,nits ! nits iterations for timing CALL bsolve(mat, rhs, sol) sol(1:ny-1) = sol(ny) END DO WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() tsolv = (seconds() - t0)/REAL(nits) ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 !=========================================================================== ! 5.0 Clear the matrix and recompute ! WRITE(*,'(/a)') 'Recompute the solver ...' t0 = seconds() CALL clear_mat(mat) CALL dismat(splxy, epsi, mat) CALL ibcmat(mat, ny) tmat = seconds()-t0 ! t0 = seconds() !!$ CALL numfact(mat, debug=.FALSE.) CALL factor(mat, nlreord=.FALSE., debug=.FALSE.) tfact = seconds()-t0 gflops1 = mat%p%iparm(19) / tfact / 1.d3 ! t0 = seconds() ALLOCATE(newsol(nrank)) CALL bsolve(mat, rhs, newsol) newsol(1:ny-1) = newsol(ny) tsolv = seconds()-t0 ! WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 ! DEALLOCATE(newsol) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) CALL destroy_sp(splxy) CALL destroy(mat) ! CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/pde2d_sym_pardiso_dft.f90 b/examples/pde2d_sym_pardiso_dft.f90 index 42426e6..3b77cf5 100644 --- a/examples/pde2d_sym_pardiso_dft.f90 +++ b/examples/pde2d_sym_pardiso_dft.f90 @@ -1,1034 +1,1034 @@ !> !> @file pde2d_sym_pardiso_dft.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Solving the 2d PDE using splines and PARDISO symmetric matrix. ! The periodic coordinate y is discrete Fourier transformed. ! ! -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0 ! C(x,y) = 1 + \epsilon x cos(y) ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! MODULE pde2d_sym_pardiso_dft_mod USE bsplines USE pardiso_bsplines IMPLICIT NONE ! CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, epsi, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(in) :: epsi TYPE(zpardiso_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: kmin, kmax, dk INTEGER :: i, j, ig1, ig2, kc INTEGER :: iterm, iw1, mw, igw1, it1, mt, igt1, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:) DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), fft_temp(:) DOUBLE COMPLEX :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER :: kcoupl ! Number of mode couplings INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE COMPLEX, ALLOCATABLE :: coefs(:,:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) kmin = spl%sp2%dft%kmin kmax = spl%sp2%dft%kmax dk = spl%sp2%dft%dk WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 WRITE(*,'(a, 5i6)') 'kmin, kmax, dk =', kmin, kmax, dk ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng2), wg2(ng2)) ! ! Weak form ! kterms = mat%nterms kcoupl = SIZE(spl%sp2%dft%mode_couplings) ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,kcoupl,ng1,ng2)) ! ! Splines and derivatives at all Gauss points ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(ft_fun2(kmin:kmax,0:1,ng2)) ! DFT of splines and 1st derivative ALLOCATE(fft_temp(0:n2-1)) ! Used in coefeq !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) ! ! First interval in 2nd (periodic) coordinate ! j = 1 CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2) left2 = j CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO mt=kmin,kmax ! Test Fourier mode DO kc=1,kcoupl mw = mt + spl%sp2%dft%mode_couplings(kc) IF(mw.LT.kmin .OR. mw.GT.kmax) CYCLE !------------- contrib = (0.0d0, 0.0d0) DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & ft_fun2(mw,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,kc,ig1,ig2) * & & CONJG(ft_fun2(mt,idert(iterm,2,ig1,ig2),ig2)) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) /REAL(n2,8) END DO END DO END DO irow = (igw1-1)*dk + (mw-kmin)+1 ! Number first mode m then radial coord. jcol = (igt1-1)*dk + (mt-kmin)+1 CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, ft_fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) DEALLOCATE(fft_temp) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) USE fft DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE COMPLEX, INTENT(out) :: c(:,:) ! DOUBLE PRECISION :: zcoef, dy INTEGER :: j, k, kc, kp ! ! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy ! C(x,y) = 1 + epsilon*x*cos(y) ! dy = spl%sp2%dft%dx kc = SIZE(spl%sp2%dft%mode_couplings) DO j=0,n2-1 fft_temp(j) = 1.0d0+epsi*x*COS(y+j*dy) END DO CALL fourcol(fft_temp,1) DO k=1,kc kp = spl%sp2%dft%mode_couplings(k) IF(kp.LT.0) kp=kp+n2 c(1,k) = x*fft_temp(kp) c(2,k) = fft_temp(kp)/x END DO !!$ WRITE(*,'(a/(10(1pe12.4)))') 'fft_temp', ABS(fft_temp) !!$ WRITE(*,'(a/(10(1pe12.4)))') 'c1', ABS(c(1,:)) !!$ WRITE(*,'(a/(10(1pe12.4)))') 'c2', ABS(c(2,:)) ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, epsi, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! INTEGER, INTENT(in) :: mbess DOUBLE PRECISION, INTENT(in) :: epsi TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x, y, m) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(in) :: m ! DOUBLE PRECISION :: xm xm = REAL(m,8) rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + & & epsi*x*( & & ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + & & (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) & & )) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, dft) ! ! Apply BC on matrix ! TYPE(zpardiso_mat), INTENT(inout) :: mat TYPE(dftmap), INTENT(in) :: dft INTEGER :: nrank, k, kmin, kmax, dk, i DOUBLE COMPLEX :: arr(mat%rank) !=========================================================================== ! 1.0 Prologue ! nrank = mat%rank kmin = dft%kmin kmax = dft%kmax dk = dft%dk !=========================================================================== ! 2.0 BC at the axis ! ! zero for non-zero modes ! DO k=kmin,kmax IF(k.NE.0) THEN i = k-kmin+1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END IF END DO !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO i = nrank, nrank-dk+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, dft) ! ! Apply BC on RHS ! DOUBLE COMPLEX, INTENT(inout) :: rhs(:) TYPE(dftmap), INTENT(in) :: dft INTEGER :: nrank, kmin, kmax, dk, k !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) kmin = dft%kmin kmax = dft%kmax dk = dft%dk !=========================================================================== ! 2.0 BC at the axis ! ! zero for non-zero modes ! DO k=kmin,kmax IF(k.NE.0) rhs(k-kmin+1) = 0.0 END DO !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-dk+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE spectrum0(spl, carr, xpt, cspec) ! ! DFT modes at xpt (integration on the first interval) ! DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) DOUBLE PRECISION, PARAMETER :: pi = 3.141592653589793d0 TYPE(spline2d), INTENT(in) :: spl DOUBLE COMPLEX, INTENT(in) :: carr(:) DOUBLE PRECISION, INTENT(in) :: xpt DOUBLE COMPLEX, INTENT(out) :: cspec(:) ! INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 INTEGER :: k, kmin, kmax, dk, kk INTEGER :: ng2, ig2 INTEGER, ALLOCATABLE :: left2(:) DOUBLE PRECISION :: temp(1) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:) DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), psi(:), coefs(:,:) ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) kmin = spl%sp2%dft%kmin kmax = spl%sp2%dft%kmax dk = spl%sp2%dft%dk ! CALL get_gauss(spl%sp2, ng2) ALLOCATE(left2(ng2)) ALLOCATE(xg2(ng2), wg2(ng2)) ALLOCATE(ft_fun2(kmin:kmax,1,ng2)) ! DFT of splines ALLOCATE(psi(kmin:kmax)) ! ! Integration over first interval ! left2 = 1 CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2) CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2) psi = (0.0d0,0.0d0) DO k=kmin,kmax DO ig2=1,ng2 psi(k) = psi(k) + wg2(ig2)*EXP(k*ci*xg2(ig2))*CONJG(ft_fun2(k,1,ig2)) END DO END DO ! ALLOCATE(coefs(dk,ndim1)) coefs = RESHAPE(carr, SHAPE(coefs)) temp = xpt DO kk=kmin,kmax k=kk-kmin+1 coefs(k,:) = psi(kk)*coefs(k,:) CALL gridval(spl%sp1, temp, cspec(k:k), 0, coefs(k,:)) END DO cspec = cspec/(2.0d0*pi) ! DEALLOCATE(left2) DEALLOCATE(xg2, wg2) DEALLOCATE(ft_fun2) DEALLOCATE(psi) DEALLOCATE(coefs) END SUBROUTINE spectrum0 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE spectrum1(spl, carr, xpt, ypt0, cspec) ! ! DFT modes at xpt (at the initial ypt0) ! DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) ! TYPE(spline2d), INTENT(in) :: spl DOUBLE COMPLEX, INTENT(in) :: carr(:) DOUBLE PRECISION, INTENT(in) :: xpt, ypt0 DOUBLE COMPLEX, INTENT(out) :: cspec(:) ! INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 INTEGER :: k, kmin, kmax, dk DOUBLE PRECISION :: temp(1) DOUBLE COMPLEX :: ctemp(1) DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:), coefs(:,:) ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) kmin = spl%sp2%dft%kmin kmax = spl%sp2%dft%kmax dk = spl%sp2%dft%dk ! ! DFT of splines at ypt0 ALLOCATE(ft_fun2(kmin:kmax,1)) ALLOCATE(coefs(kmin:kmax,ndim1)) CALL ft_basfun(ypt0, spl%sp2, ft_fun2, 1) coefs = RESHAPE(carr, SHAPE(coefs)) ! temp = xpt DO k=kmin,kmax CALL gridval(spl%sp1, temp, ctemp, 0, coefs(k,:)) cspec(k-kmin+1) = CONJG(ft_fun2(k,1))*ctemp(1)*EXP(k*ci*ypt0) END DO cspec = cspec/REAL(n2,8) ! DEALLOCATE(ft_fun2) DEALLOCATE(coefs) END SUBROUTINE spectrum1 !!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE spectrum2(spl, xpt, ypt0, cspec) ! ! DFT modes at xpt (at the initial ypt0) ! USE fft USE bsplines ! DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) ! TYPE(spline2d) :: spl DOUBLE PRECISION, INTENT(in) :: xpt, ypt0 DOUBLE COMPLEX, INTENT(out) :: cspec(:) ! INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 INTEGER :: k, kmin, kmax, dk DOUBLE PRECISION, ALLOCATABLE :: ypt(:), fun(:,:) DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:) DOUBLE PRECISION :: temp(1) ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) kmin = spl%sp2%dft%kmin kmax = spl%sp2%dft%kmax dk = spl%sp2%dft%dk ! ALLOCATE(ypt(0:n2-1)) ALLOCATE(fun(1, 0:n2-1)) ALLOCATE(ft_fun(0:n2-1)) ! ! Function values at points ypt ! ypt(0:n2-1) = ypt0 + spl%sp2%knots(0:n2-1) temp = xpt CALL gridval(spl, temp, ypt, fun, (/0,0/)) ft_fun = fun(1,:) ! ! Discrete Fourier Transform ! CALL fourcol(ft_fun, 1) DO k=kmin,kmax IF(k.LT.0) THEN cspec(k-kmin+1) = ft_fun(k+n2)*EXP(k*ci*ypt0) ELSE cspec(k-kmin+1) = ft_fun(k)*EXP(k*ci*ypt0) END IF END DO cspec = cspec/REAL(n2,8) ! DEALLOCATE(ypt) DEALLOCATE(fun) DEALLOCATE(ft_fun) END SUBROUTINE spectrum2 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_sym_pardiso_dft_mod PROGRAM main USE pde2d_sym_pardiso_dft_mod USE futils USE fft ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms, kmin, kmax, dk INTEGER :: n_mode_couplings INTEGER, ALLOCATABLE :: mode_couplings(:) LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, nrank_full, jder(2), it, i0, i0_r INTEGER :: k, kp, ik DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol DOUBLE COMPLEX, ALLOCATABLE :: crhs(:), crhs_r(:), csol(:), csol_r(:) TYPE(spline2d) :: splxy TYPE(zpardiso_mat) :: mat ! CHARACTER(len=128) :: file='pde2d_sym_pardiso_dft.h5' INTEGER :: fid DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: arr, srow DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tfour, tfour0, tgrid, gflops1 INTEGER :: nits=100 LOGICAL :: nlmetis, nlforce_zero, nlpos ! DOUBLE PRECISION :: xpt, ypt0 DOUBLE COMPLEX, ALLOCATABLE :: cspec0(:), cspec(:), energy_k(:) DOUBLE COMPLEX :: energy, energy_exact ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, kmin, kmax, mbess, epsi, & & nlppform, nlmetis, nlforce_zero, nlpos, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval kmin = -3 ! Minimum Fourier mode number kmax = 3 ! Maximum Fourier mode number mbess = 2 ! Exponent of differential problem epsi = 0.5 ! Non-uniformity in the Laplacian coefficicient nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlmetis = .FALSE. ! Use metis ordering or minimum degree nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele nlpos = .TRUE. ! Matrix is positive definite coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) !! ! Read table of mode couplings ! READ(*,*) n_mode_couplings ALLOCATE(mode_couplings(n_mode_couplings)) READ(*,*) mode_couplings WRITE(*,'(/a/(20i4))') 'Mode couplings', mode_couplings ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! ! Exact energy ! energy_exact = 2.0d0*pi/REAL(2+mbess,8) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) CALL attach(fid, '/', 'EPSI', epsi) CALL attach(fid, '/', 'KMIN', kmin) CALL attach(fid, '/', 'KMAX', kmax) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, & & (/.FALSE., .TRUE./)) ! ! Init DFT for spline in 2nd direction ! CALL init_dft(splxy%sp2, kmin, kmax, mode_couplings) dk = splxy%sp2%dft%dk ! ! FE matrix assembly ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim nrank = (nx+nidbas(1))*dk ! Rank of restricted matrix nrank_full = (nx+nidbas(1))*ny ! Rank of full matrix ! ALLOCATE(rhs(nrank_full), sol(nrank_full)) ALLOCATE(crhs(nrank_full), csol(nrank_full)) ALLOCATE(crhs_r(nrank), csol_r(nrank)) ! WRITE(*,'(a,i8)') 'nrank_full', nrank_full WRITE(*,'(a,i8)') 'nrank ', nrank ! t0 = seconds() CALL init(nrank, nterms, mat, nlherm=.TRUE.) CALL dismat(splxy, epsi, mat) ALLOCATE(arr(nrank)) ALLOCATE(srow(nrank)) DO i=1,nrank CALL getrow(mat, i, arr) srow(i) = SUM(arr) END DO !!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows before BC', REAL(srow) !!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows before BC', AIMAG(srow) PRINT*, 'Sum of mat before BC', SUM(srow) ! ! BC on Matrix ! WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) CALL ibcmat(mat, splxy%sp2%dft) tmat = seconds() - t0 DO i=1,nrank CALL getrow(mat, i, arr) srow(i) = SUM(arr) END DO !!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows after BC', REAL(srow) !!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows after BC', AIMAG(srow) PRINT*, 'Sum of mat after BC', SUM(srow) ! WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() ! ! RHS assembly ! CALL disrhs(mbess, epsi, splxy, rhs) ! ! Init FFT ! t0 = seconds() CALL fourcol(crhs(1:ny),1) CALL fourcol(crhs(1:ny),-1) tfour0 = seconds()-t0 crhs = crhs/REAL(ny,8) ! ! DFT of RHS ! t0 = seconds() crhs = rhs DO i=1,nx+nidbas(1) i0 = (i-1)*ny CALL fourcol(crhs(i0+1:i0+ny), 1) END DO tfour = seconds()-t0 ! ! Restriction in Fourier space ! k = kmin:kmax (restricted) ! kp = 0:ny-1 (full) ! DO i=1,nx+nidbas(1) i0 = (i-1)*ny i0_r = (i-1)*dk DO k=kmin,kmax kp = k IF(kp.LT.0) kp = kp+ny crhs_r(i0_r+k-kmin+1) = crhs(i0+kp+1) END DO END DO ! ! BC on RHS ! CALL ibcrhs(crhs_r, splxy%sp2%dft) ! IF(nrank.LT.100) THEN WRITE(*,'(a/(10(1pe12.3)))') 'Real of crhs', REAL(crhs) WRITE(*,'(a/(10(1pe12.3)))') 'Imag of crhs', AIMAG(crhs) END IF !=========================================================================== ! 3.0 Solve the dicretized system ! ! Matrix factorization ! t0 = seconds() !!$ CALL factor(mat, nlmetis=nlmetis) CALL to_mat(mat) CALL reord_mat(mat, nlmetis=nlmetis); CALL putmat(fid, '/MAT1', mat) CALL numfact(mat) tfact = seconds() - t0 DO i=1,nrank CALL getrow(mat, i, arr) srow(i) = SUM(arr) END DO PRINT*, 'Sum of mat after factor', SUM(srow) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) WRITE(*,'(a,i12)') 'Number of factorization MFLOPS = ',mat%p%iparm(19) gflops1 = mat%p%iparm(19) / tfact / 1.d3 ! ! Backsolve ! t0 = seconds() PRINT*, 'SUM of crhs_r', SUM(crhs_r) CALL bsolve(mat, crhs_r, csol_r, debug=.FALSE.) WRITE(*,'(a,1pe12.4)') 'Residue =', cnorm2(vmx(mat,csol_r)-crhs_r) tsolv = seconds() - t0 PRINT*, 'SUM of csol_r', SUM(csol_r) ! CALL putarr(fid, '/FT_RHS', crhs_r, 'DFT of RHS') CALL putarr(fid, '/FT_SOL', csol_r, 'DFT of Spline coefficients') !=========================================================================== ! 4.0 Perform some diagnostics in Fourier space ! ! Fourier spectrum at xpt ! xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8)) ALLOCATE(cspec0(dk)) CALL spectrum0(splxy, csol_r, xpt, cspec0) WRITE(*,'(/a,f10.5)') 'DFT spectrum (by integration) at x = ', xpt DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) END DO ! ypt0 = 0.0d0 WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0 CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0) DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) END DO ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0 CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0) DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) END DO ! ! Spectral energy ! WRITE(*,'(/a)') 'Spectral energies' ALLOCATE(energy_k(kmin:kmax)) energy_k = (0.0d0,0.0d0) DO i=1,dimx i0_r = (i-1)*dk DO k=kmin,kmax ik = i0_r+k-kmin+1 energy_k(k) = energy_k(k) + csol_r(ik)*CONJG(crhs_r(ik)) END DO END DO energy_k = energy_k/REAL(ny,8) energy = SUM(energy_k) DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, energy_k(k), ABS(energy_k(k)) END DO WRITE(*,'(a5,4(1pe15.3))') 'Sum', energy, ABS(energy), REAL(energy-energy_exact) ! CALL putarr(fid, '/ENERGY_K', energy_k, 'Spectral energies') !=========================================================================== ! 5.0 Transform back to real space ! ! Expand to full Fourier space ! k = kmin:kmax (restricted) ! kp = 0:ny-1 (full) ! crhs = (0.0d0,0.0d0) DO i=1,nx+nidbas(1) i0 = (i-1)*ny i0_r = (i-1)*dk DO k=kmin,kmax kp = k IF(kp.LT.0) kp = kp+ny csol(i0+kp+1) = csol_r(i0_r+k-kmin+1) END DO END DO ! ! Fourier transform back to real space ! t0 = seconds() DO i=1,nx+nidbas(1) i0 = (i-1)*ny CALL fourcol(csol(i0+1:i0+ny),-1) END DO sol = REAL(csol)/REAL(ny,8) tfour = tfour + seconds()-t0 ! ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') ! ! Total energy ! WRITE(*,'(/a, 2(1pe15.3))') 'Total energy and error(real space)', & & DOT_PRODUCT(rhs,sol), & & DOT_PRODUCT(rhs,sol)-REAL(energy_exact) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() !=========================================================================== ! 6.0 Check the solution ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval ! CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! ! Fourier spectrum at xpt ! xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8)) ALLOCATE(cspec(dk)) ! ypt0 = 0.0d0 WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0 CALL spectrum2(splxy, xpt, ypt0, cspec) DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1)) END DO ! ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0 CALL spectrum2(splxy, xpt, ypt0, cspec) DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1)) END DO ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'Init FFT time (s) ', tfour0 WRITE(*,'(a,1pe12.3)') 'FFT time (s) ', tfour WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(cspec0, cspec) DEALLOCATE(mode_couplings) DEALLOCATE(xgrid, ygrid, rhs, sol) DEALLOCATE(crhs, csol) DEALLOCATE(crhs_r, csol_r) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) DEALLOCATE(srow) DEALLOCATE(energy_k) CALL destroy_sp(splxy) CALL destroy(mat) ! CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 ! FUNCTION cnorm2(x) DOUBLE COMPLEX, INTENT(in) :: x(:) DOUBLE PRECISION :: cnorm2 cnorm2 = SQRT(DOT_PRODUCT(x,x)) END FUNCTION cnorm2 ! SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/pde2d_sym_wsmp.f90 b/examples/pde2d_sym_wsmp.f90 index 2d0c56e..8d44461 100644 --- a/examples/pde2d_sym_wsmp.f90 +++ b/examples/pde2d_sym_wsmp.f90 @@ -1,696 +1,696 @@ !> !> @file pde2d_sym_wsmp.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Solving the 2d PDE using splines and WSMP symmetric matrix ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! MODULE pde2d_sym_wsmp_mod USE bsplines USE wsmp_bsplines IMPLICIT NONE CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(wsmp_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) left2 = j CALL basfun(xg2, spl%sp2, fun2, left2) ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 !------------- contrib = 0.0d0 DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,ig1,ig2) * & & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) END DO END DO END DO CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(wsmp_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) zsum(:) = zsum(:) + arr(:) END DO zsum(ny) = SUM(zsum(1:ny)) ! using symmetry CALL putrow(mat, ny, zsum) ! ! The away operator ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_sym_wsmp_mod PROGRAM main USE pde2d_sym_wsmp_mod USE futils ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol TYPE(spline2d) :: splxy TYPE(wsmp_mat) :: mat ! CHARACTER(len=128) :: file='pde2d_sym_wsmp.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 DOUBLE PRECISION :: tconv, treord INTEGER :: nits=100 LOGICAL :: nlforce_zero, nlpos ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlpos, & & nlforce_zero, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele nlpos = .TRUE. ! Matrix is positive definite coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix WRITE(*,'(a,i8)') 'nrank', nrank ! CALL init(nrank, nterms, mat, nlsym=.TRUE., nlpos=nlpos) CALL dismat(splxy, mat) ALLOCATE(arr(nrank)) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr END IF ! ! BC on Matrix ! WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) CALL ibcmat(mat, ny) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr WRITE(*,'(a)') 'Last rows' DO i=nrank-ny,nrank CALL getrow(mat, i, arr) WRITE(*,'(10(1pe12.3))') arr END DO END IF tmat = seconds() - t0 ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, ny) CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL factor(mat) ! ! The call to "factor" could be split into the ! 3 following calls ! !!$ CALL to_mat(mat) !!$ tconv = seconds() -t0 !!$ WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) !!$ WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() !!$! !!$ t0 = seconds() !!$ CALL reord_mat(mat) !!$ CALL putmat(fid, '/MAT', mat) !!$ treord = seconds() - t0 !!$! !!$ t0 = seconds() !!$ CALL numfact(mat) tfact = seconds() - t0 WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24) WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9 gflops1 = mat%p%dparm(23) / tfact / 1.d9 ! CALL bsolve(mat, rhs, sol) t0 = seconds() DO it=1,nits ! nits iterations for timing CALL bsolve(mat, rhs, sol) sol(1:ny-1) = sol(ny) END DO WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(6) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() tsolv = (seconds() - t0)/REAL(nits) ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 !=========================================================================== ! 5.0 Clear the matrix and recompute ! WRITE(*,'(/a)') 'Recompute the solver ...' t0 = seconds() CALL clear_mat(mat) CALL dismat(splxy, mat) CALL ibcmat(mat, ny) tmat = seconds()-t0 ! t0 = seconds() CALL numfact(mat) tfact = seconds()-t0 gflops1 = mat%p%dparm(23) / tfact / 1.d9 ! t0 = seconds() ALLOCATE(newsol(nrank)) CALL bsolve(mat, rhs, newsol) newsol(1:ny-1) = newsol(ny) tsolv = seconds()-t0 ! WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 ! DEALLOCATE(newsol) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) CALL destroy_sp(splxy) CALL destroy(mat) ! CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/pde2d_sym_wsmp_dft.f90 b/examples/pde2d_sym_wsmp_dft.f90 index f3226e9..01625ff 100644 --- a/examples/pde2d_sym_wsmp_dft.f90 +++ b/examples/pde2d_sym_wsmp_dft.f90 @@ -1,1039 +1,1039 @@ !> !> @file pde2d_sym_wsmp_dft.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Ben McMillan !> @author Trach-Minh Tran !> ! ! Solving the 2d PDE using splines and WSMP symmetric matrix. ! The periodic coordinate y is discrete Fourier transformed. ! ! -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0 ! C(x,y) = 1 + \epsilon x cos(y) ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! MODULE pde2d_sym_wsmp_dft_mod USE bsplines USE wsmp_bsplines IMPLICIT NONE ! CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, epsi, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(in) :: epsi TYPE(zwsmp_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: kmin, kmax, dk INTEGER :: i, j, ig1, ig2, kc INTEGER :: iterm, iw1, mw, igw1, it1, mt, igt1, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:) DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), fft_temp(:) DOUBLE COMPLEX :: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER :: kcoupl ! Number of mode couplings INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE COMPLEX, ALLOCATABLE :: coefs(:,:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) kmin = spl%sp2%dft%kmin kmax = spl%sp2%dft%kmax dk = spl%sp2%dft%dk WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 WRITE(*,'(a, 5i6)') 'kmin, kmax, dk =', kmin, kmax, dk ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng2), wg2(ng2)) ! ! Weak form ! kterms = mat%nterms kcoupl = SIZE(spl%sp2%dft%mode_couplings) ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,kcoupl,ng1,ng2)) ! ! Splines and derivatives at all Gauss points ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(ft_fun2(kmin:kmax,0:1,ng2)) ! DFT of splines and 1st derivative ALLOCATE(fft_temp(0:n2-1)) ! Used in coefeq !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) ! ! First interval in 2nd (periodic) coordinate ! j = 1 CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2) left2 = j CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO mt=kmin,kmax ! Test Fourier mode DO kc=1,kcoupl mw = mt + spl%sp2%dft%mode_couplings(kc) IF(mw.LT.kmin .OR. mw.GT.kmax) CYCLE !------------- contrib = (0.0d0, 0.0d0) DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & ft_fun2(mw,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,kc,ig1,ig2) * & & CONJG(ft_fun2(mt,idert(iterm,2,ig1,ig2),ig2)) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) /REAL(n2,8) END DO END DO END DO irow = (igw1-1)*dk + (mw-kmin)+1 ! Number first mode m then radial coord. jcol = (igt1-1)*dk + (mt-kmin)+1 CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, ft_fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) DEALLOCATE(fft_temp) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) USE fft DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE COMPLEX, INTENT(out) :: c(:,:) ! DOUBLE PRECISION :: zcoef, dy INTEGER :: j, k, kc, kp ! ! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy ! C(x,y) = 1 + epsilon*x*cos(y) ! dy = spl%sp2%dft%dx kc = SIZE(spl%sp2%dft%mode_couplings) DO j=0,n2-1 fft_temp(j) = 1.0d0+epsi*x*COS(y+j*dy) END DO CALL fourcol(fft_temp,1) DO k=1,kc kp = spl%sp2%dft%mode_couplings(k) IF(kp.LT.0) kp=kp+n2 c(1,k) = x*fft_temp(kp) c(2,k) = fft_temp(kp)/x END DO !!$ WRITE(*,'(a/(10(1pe12.4)))') 'fft_temp', ABS(fft_temp) !!$ WRITE(*,'(a/(10(1pe12.4)))') 'c1', ABS(c(1,:)) !!$ WRITE(*,'(a/(10(1pe12.4)))') 'c2', ABS(c(2,:)) ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, epsi, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! INTEGER, INTENT(in) :: mbess DOUBLE PRECISION, INTENT(in) :: epsi TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x, y, m) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(in) :: m ! DOUBLE PRECISION :: xm xm = REAL(m,8) rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + & & epsi*x*( & & ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + & & (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) & & )) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, dft) ! ! Apply BC on matrix ! TYPE(zwsmp_mat), INTENT(inout) :: mat TYPE(dftmap), INTENT(in) :: dft INTEGER :: nrank, k, kmin, kmax, dk, i DOUBLE COMPLEX :: arr(mat%rank) !=========================================================================== ! 1.0 Prologue ! nrank = mat%rank kmin = dft%kmin kmax = dft%kmax dk = dft%dk !=========================================================================== ! 2.0 BC at the axis ! ! zero for non-zero modes ! DO k=kmin,kmax IF(k.NE.0) THEN i = k-kmin+1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END IF END DO !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO i = nrank, nrank-dk+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, dft) ! ! Apply BC on RHS ! DOUBLE COMPLEX, INTENT(inout) :: rhs(:) TYPE(dftmap), INTENT(in) :: dft INTEGER :: nrank, kmin, kmax, dk, k !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) kmin = dft%kmin kmax = dft%kmax dk = dft%dk !=========================================================================== ! 2.0 BC at the axis ! ! zero for non-zero modes ! DO k=kmin,kmax IF(k.NE.0) rhs(k-kmin+1) = 0.0 END DO !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-dk+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE spectrum0(spl, carr, xpt, cspec) ! ! DFT modes at xpt (integration on the first interval) ! DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) DOUBLE PRECISION, PARAMETER :: pi = 3.141592653589793d0 TYPE(spline2d), INTENT(in) :: spl DOUBLE COMPLEX, INTENT(in) :: carr(:) DOUBLE PRECISION, INTENT(in) :: xpt DOUBLE COMPLEX, INTENT(out) :: cspec(:) ! INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 INTEGER :: k, kmin, kmax, dk, kk INTEGER :: ng2, ig2 INTEGER, ALLOCATABLE :: left2(:) DOUBLE PRECISION :: temp(1) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:) DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), psi(:), coefs(:,:) ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) kmin = spl%sp2%dft%kmin kmax = spl%sp2%dft%kmax dk = spl%sp2%dft%dk ! CALL get_gauss(spl%sp2, ng2) ALLOCATE(left2(ng2)) ALLOCATE(xg2(ng2), wg2(ng2)) ALLOCATE(ft_fun2(kmin:kmax,1,ng2)) ! DFT of splines ALLOCATE(psi(kmin:kmax)) ! ! Integration over first interval ! left2 = 1 CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2) CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2) psi = (0.0d0,0.0d0) DO k=kmin,kmax DO ig2=1,ng2 psi(k) = psi(k) + wg2(ig2)*EXP(k*ci*xg2(ig2))*CONJG(ft_fun2(k,1,ig2)) END DO END DO ! ALLOCATE(coefs(dk,ndim1)) coefs = RESHAPE(carr, SHAPE(coefs)) temp = xpt DO kk=kmin,kmax k=kk-kmin+1 coefs(k,:) = psi(kk)*coefs(k,:) CALL gridval(spl%sp1, temp, cspec(k:k), 0, coefs(k,:)) END DO cspec = cspec/(2.0d0*pi) ! DEALLOCATE(left2) DEALLOCATE(xg2, wg2) DEALLOCATE(ft_fun2) DEALLOCATE(psi) DEALLOCATE(coefs) END SUBROUTINE spectrum0 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE spectrum1(spl, carr, xpt, ypt0, cspec) ! ! DFT modes at xpt (at the initial ypt0) ! DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) ! TYPE(spline2d), INTENT(in) :: spl DOUBLE COMPLEX, INTENT(in) :: carr(:) DOUBLE PRECISION, INTENT(in) :: xpt, ypt0 DOUBLE COMPLEX, INTENT(out) :: cspec(:) ! INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 INTEGER :: k, kmin, kmax, dk DOUBLE PRECISION :: temp(1) DOUBLE COMPLEX :: ctemp(1) DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:), coefs(:,:) ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) kmin = spl%sp2%dft%kmin kmax = spl%sp2%dft%kmax dk = spl%sp2%dft%dk ! ! DFT of splines at ypt0 ALLOCATE(ft_fun2(kmin:kmax,1)) ALLOCATE(coefs(kmin:kmax,ndim1)) CALL ft_basfun(ypt0, spl%sp2, ft_fun2, 1) coefs = RESHAPE(carr, SHAPE(coefs)) ! temp = xpt DO k=kmin,kmax CALL gridval(spl%sp1, temp, ctemp, 0, coefs(k,:)) cspec(k-kmin+1) = CONJG(ft_fun2(k,1))*ctemp(1)*EXP(k*ci*ypt0) END DO cspec = cspec/REAL(n2,8) ! DEALLOCATE(ft_fun2) DEALLOCATE(coefs) END SUBROUTINE spectrum1 !!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE spectrum2(spl, xpt, ypt0, cspec) ! ! DFT modes at xpt (at the initial ypt0) ! USE fft USE bsplines ! DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) ! TYPE(spline2d) :: spl DOUBLE PRECISION, INTENT(in) :: xpt, ypt0 DOUBLE COMPLEX, INTENT(out) :: cspec(:) ! INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 INTEGER :: k, kmin, kmax, dk DOUBLE PRECISION, ALLOCATABLE :: ypt(:), fun(:,:) DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:) DOUBLE PRECISION :: temp(1) ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) kmin = spl%sp2%dft%kmin kmax = spl%sp2%dft%kmax dk = spl%sp2%dft%dk ! ALLOCATE(ypt(0:n2-1)) ALLOCATE(fun(1, 0:n2-1)) ALLOCATE(ft_fun(0:n2-1)) ! ! Function values at points ypt ! ypt(0:n2-1) = ypt0 + spl%sp2%knots(0:n2-1) temp = xpt CALL gridval(spl, temp, ypt, fun, (/0,0/)) ft_fun = fun(1,:) ! ! Discrete Fourier Transform ! CALL fourcol(ft_fun, 1) DO k=kmin,kmax IF(k.LT.0) THEN cspec(k-kmin+1) = ft_fun(k+n2)*EXP(k*ci*ypt0) ELSE cspec(k-kmin+1) = ft_fun(k)*EXP(k*ci*ypt0) END IF END DO cspec = cspec/REAL(n2,8) ! DEALLOCATE(ypt) DEALLOCATE(fun) DEALLOCATE(ft_fun) END SUBROUTINE spectrum2 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_sym_wsmp_dft_mod PROGRAM main USE pde2d_sym_wsmp_dft_mod USE futils USE fft ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms, kmin, kmax, dk INTEGER :: n_mode_couplings INTEGER, ALLOCATABLE :: mode_couplings(:) LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, nrank_full, jder(2), it, i0, i0_r INTEGER :: k, kp, ik DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol DOUBLE COMPLEX, ALLOCATABLE :: crhs(:), crhs_r(:), csol(:), csol_r(:) TYPE(spline2d) :: splxy TYPE(zwsmp_mat) :: mat ! CHARACTER(len=128) :: file='pde2d_sym_wsmp_dft.h5' INTEGER :: fid DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: arr, srow DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tfour, tfour0, tgrid, gflops1 INTEGER :: nits=100 LOGICAL :: nlmetis, nlforce_zero, nlpos ! DOUBLE PRECISION :: xpt, ypt0 DOUBLE COMPLEX, ALLOCATABLE :: cspec0(:), cspec(:), energy_k(:) DOUBLE COMPLEX :: energy, energy_exact ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, kmin, kmax, mbess, epsi, & & nlppform, nlmetis, nlforce_zero, nlpos, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval kmin = -3 ! Minimum Fourier mode number kmax = 3 ! Maximum Fourier mode number mbess = 2 ! Exponent of differential problem epsi = 0.5 ! Non-uniformity in the Laplacian coefficicient nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlmetis = .FALSE. ! Use metis ordering or minimum degree nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele nlpos = .TRUE. ! Matrix is positive definite coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) !! ! Read table of mode couplings ! READ(*,*) n_mode_couplings ALLOCATE(mode_couplings(n_mode_couplings)) READ(*,*) mode_couplings WRITE(*,'(/a/(20i4))') 'Mode couplings', mode_couplings ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! ! Exact energy ! energy_exact = 2.0d0*pi/REAL(2+mbess,8) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) CALL attach(fid, '/', 'EPSI', epsi) CALL attach(fid, '/', 'KMIN', kmin) CALL attach(fid, '/', 'KMAX', kmax) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, & & (/.FALSE., .TRUE./)) ! ! Init DFT for spline in 2nd direction ! CALL init_dft(splxy%sp2, kmin, kmax, mode_couplings) dk = splxy%sp2%dft%dk ! ! FE matrix assembly ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim nrank = (nx+nidbas(1))*dk ! Rank of restricted matrix nrank_full = (nx+nidbas(1))*ny ! Rank of full matrix ! ALLOCATE(rhs(nrank_full), sol(nrank_full)) ALLOCATE(crhs(nrank_full), csol(nrank_full)) ALLOCATE(crhs_r(nrank), csol_r(nrank)) ! WRITE(*,'(a,i8)') 'nrank_full', nrank_full WRITE(*,'(a,i8)') 'nrank ', nrank ! t0 = seconds() CALL init(nrank, nterms, mat, nlherm=.TRUE., nlpos=nlpos) CALL dismat(splxy, epsi, mat) ALLOCATE(arr(nrank)) ALLOCATE(srow(nrank)) DO i=1,nrank CALL getrow(mat, i, arr) srow(i) = SUM(arr) END DO !!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows before BC', REAL(srow) !!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows before BC', AIMAG(srow) PRINT*, 'Sum of mat before BC', SUM(srow) ! ! BC on Matrix ! WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) CALL ibcmat(mat, splxy%sp2%dft) tmat = seconds() - t0 DO i=1,nrank CALL getrow(mat, i, arr) srow(i) = SUM(arr) END DO !!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows after BC', REAL(srow) !!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows after BC', AIMAG(srow) PRINT*, 'Sum of mat after BC', SUM(srow) ! WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() ! ! RHS assembly ! CALL disrhs(mbess, epsi, splxy, rhs) ! ! Init FFT ! t0 = seconds() CALL fourcol(crhs(1:ny),1) CALL fourcol(crhs(1:ny),-1) tfour0 = seconds()-t0 crhs = crhs/REAL(ny,8) ! ! DFT of RHS ! t0 = seconds() crhs = rhs DO i=1,nx+nidbas(1) i0 = (i-1)*ny CALL fourcol(crhs(i0+1:i0+ny), 1) END DO tfour = seconds()-t0 ! ! Restriction in Fourier space ! k = kmin:kmax (restricted) ! kp = 0:ny-1 (full) ! DO i=1,nx+nidbas(1) i0 = (i-1)*ny i0_r = (i-1)*dk DO k=kmin,kmax kp = k IF(kp.LT.0) kp = kp+ny crhs_r(i0_r+k-kmin+1) = crhs(i0+kp+1) END DO END DO ! ! BC on RHS ! CALL ibcrhs(crhs_r, splxy%sp2%dft) ! IF(nrank.LT.100) THEN WRITE(*,'(a/(10(1pe12.3)))') 'Real of crhs', REAL(crhs) WRITE(*,'(a/(10(1pe12.3)))') 'Imag of crhs', AIMAG(crhs) END IF !=========================================================================== ! 3.0 Solve the dicretized system ! ! Matrix factorization ! t0 = seconds() !!$ CALL factor(mat) CALL to_mat(mat) CALL reord_mat(mat); CALL putmat(fid, '/MAT1', mat) CALL numfact(mat) tfact = seconds() - t0 DO i=1,nrank CALL getrow(mat, i, arr) srow(i) = SUM(arr) END DO PRINT*, 'Sum of mat after factor', SUM(srow) PRINT*, 'iparm(64) after factor', mat%p%iparm(64) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24) WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9 gflops1 = mat%p%dparm(23) / tfact / 1.d9 ! ! Backsolve ! t0 = seconds() PRINT*, 'SUM of crhs_r', SUM(crhs_r) CALL bsolve(mat, crhs_r, csol_r) tsolv = seconds() - t0 WRITE(*,'(a,1pe12.4)') 'Residue =', cnorm2(vmx(mat,csol_r)-crhs_r) PRINT*, 'SUM of csol_r', SUM(csol_r) PRINT*, 'iparm(64) after bsolve', mat%p%iparm(64) PRINT*, 'Residue from WSMP', mat%p%dparm(7) WRITE(*,'(a/(20i4))') 'iparm', mat%p%iparm ! CALL putarr(fid, '/FT_RHS', crhs_r, 'DFT of RHS') CALL putarr(fid, '/FT_SOL', csol_r, 'DFT of Spline coefficients') !=========================================================================== ! 4.0 Perform some diagnostics in Fourier space ! ! Fourier spectrum at xpt ! xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8)) ALLOCATE(cspec0(dk)) CALL spectrum0(splxy, csol_r, xpt, cspec0) WRITE(*,'(/a,f10.5)') 'DFT spectrum (by integration) at x = ', xpt DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) END DO ! ypt0 = 0.0d0 WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0 CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0) DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) END DO ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0 CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0) DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) END DO ! ! Spectral energy ! WRITE(*,'(/a)') 'Spectral energies' ALLOCATE(energy_k(kmin:kmax)) energy_k = (0.0d0,0.0d0) DO i=1,dimx i0_r = (i-1)*dk DO k=kmin,kmax ik = i0_r+k-kmin+1 energy_k(k) = energy_k(k) + csol_r(ik)*CONJG(crhs_r(ik)) END DO END DO energy_k = energy_k/REAL(ny,8) energy = SUM(energy_k) DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, energy_k(k), ABS(energy_k(k)) END DO WRITE(*,'(a5,4(1pe15.3))') 'Sum', energy, ABS(energy), REAL(energy-energy_exact) ! CALL putarr(fid, '/ENERGY_K', energy_k, 'Spectral energies') !=========================================================================== ! 5.0 Transform back to real space ! ! Expand to full Fourier space ! k = kmin:kmax (restricted) ! kp = 0:ny-1 (full) ! crhs = (0.0d0,0.0d0) DO i=1,nx+nidbas(1) i0 = (i-1)*ny i0_r = (i-1)*dk DO k=kmin,kmax kp = k IF(kp.LT.0) kp = kp+ny csol(i0+kp+1) = csol_r(i0_r+k-kmin+1) END DO END DO ! ! Fourier transform back to real space ! t0 = seconds() DO i=1,nx+nidbas(1) i0 = (i-1)*ny CALL fourcol(csol(i0+1:i0+ny),-1) END DO sol = REAL(csol)/REAL(ny,8) tfour = tfour + seconds()-t0 ! ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') ! ! Total energy ! WRITE(*,'(/a, 2(1pe15.3))') 'Total energy and error(real space)', & & DOT_PRODUCT(rhs,sol), & & DOT_PRODUCT(rhs,sol)-REAL(energy_exact) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() !=========================================================================== ! 6.0 Check the solution ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval ! CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! ! Fourier spectrum at xpt ! xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8)) ALLOCATE(cspec(dk)) ! ypt0 = 0.0d0 WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0 CALL spectrum2(splxy, xpt, ypt0, cspec) DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1)) END DO ! ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0 CALL spectrum2(splxy, xpt, ypt0, cspec) DO k=kmin,kmax WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1)) END DO ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'Init FFT time (s) ', tfour0 WRITE(*,'(a,1pe12.3)') 'FFT time (s) ', tfour WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(cspec0, cspec) DEALLOCATE(mode_couplings) DEALLOCATE(xgrid, ygrid, rhs, sol) DEALLOCATE(crhs, csol) DEALLOCATE(crhs_r, csol_r) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) DEALLOCATE(srow) DEALLOCATE(energy_k) CALL destroy_sp(splxy) CALL destroy(mat) ! CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 ! FUNCTION cnorm2(x) DOUBLE COMPLEX, INTENT(in) :: x(:) DOUBLE PRECISION :: cnorm2 cnorm2 = SQRT(DOT_PRODUCT(x,x)) END FUNCTION cnorm2 ! SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/pde2d_wsmp.f90 b/examples/pde2d_wsmp.f90 index 6a20630..f227e92 100644 --- a/examples/pde2d_wsmp.f90 +++ b/examples/pde2d_wsmp.f90 @@ -1,711 +1,711 @@ !> !> @file pde2d_wsmp.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Solving the 2d PDE using splines and WSMP non-symmetric matrix ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! MODULE pde2d_wsmp_mod USE bsplines USE wsmp_bsplines IMPLICIT NONE CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(wsmp_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) left2 = j CALL basfun(xg2, spl%sp2, fun2, left2) ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 !------------- contrib = 0.0d0 DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,ig1,ig2) * & & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) END DO END DO END DO CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(wsmp_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) zsum(:) = zsum(:) + arr(:) END DO CALL putrow(mat, ny, zsum) ! ! The horizontal sum on the NY-th column ! zsum = 0.0d0 DO j=1,ny arr = 0.0d0 CALL getcol(mat, j, arr) zsum(ny:) = zsum(ny:) + arr(ny:) END DO CALL putcol(mat, ny, zsum) ! ! The away operator ! DO j = 1,ny-1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO j = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_wsmp_mod PROGRAM main USE pde2d_wsmp_mod USE futils ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol TYPE(spline2d) :: splxy TYPE(wsmp_mat) :: mat ! CHARACTER(len=128) :: file='pde2d_wsmp.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 DOUBLE PRECISION :: tconv, treord INTEGER :: nits=100 LOGICAL :: nlforce_zero LOGICAL :: nlserial ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, & & nlforce_zero, nlserial, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix WRITE(*,'(a,i8)') 'nrank', nrank ! CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero) CALL dismat(splxy, mat) ALLOCATE(arr(nrank)) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr END IF ! ! BC on Matrix ! WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) CALL ibcmat(mat, ny) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr WRITE(*,'(a)') 'Last rows' DO i=nrank-ny,nrank CALL getrow(mat, i, arr) WRITE(*,'(10(1pe12.3))') arr END DO END IF tmat = seconds() - t0 ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, ny) ! CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL to_mat(mat) tconv = seconds() -t0 WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() ! t0 = seconds() CALL reord_mat(mat) CALL putmat(fid, '/MAT', mat) treord = seconds() - t0 ! t0 = seconds() CALL numfact(mat) tfact = seconds() - t0 WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24) WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9 gflops1 = mat%p%dparm(23) / tfact / 1.d9 ! CALL bsolve(mat, rhs, sol) t0 = seconds() DO it=1,nits ! nits iterations for timing CALL bsolve(mat, rhs, sol) sol(1:ny-1) = sol(ny) END DO WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(26) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() tsolv = (seconds() - t0)/REAL(nits) ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 !=========================================================================== ! 5.0 Clear the matrix and recompute ! WRITE(*,'(/a)') 'Recompute the solver ...' t0 = seconds() CALL clear_mat(mat) CALL dismat(splxy, mat) CALL ibcmat(mat, ny) tmat = seconds()-t0 ! t0 = seconds() !!$ CALL numfact(mat) CALL factor(mat, nlreord=.FALSE.) tfact = seconds()-t0 gflops1 = mat%p%dparm(23) / tfact / 1.d9 ! t0 = seconds() ALLOCATE(newsol(nrank)) CALL bsolve(mat, rhs, newsol) newsol(1:ny-1) = newsol(ny) tsolv = seconds()-t0 ! WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 ! DEALLOCATE(newsol) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) CALL destroy_sp(splxy) CALL destroy(mat) ! CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/examples/pde3d.f90 b/examples/pde3d.f90 index 9fed6e9..b60e046 100644 --- a/examples/pde3d.f90 +++ b/examples/pde3d.f90 @@ -1,396 +1,396 @@ !> !> @file pde3d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 3d PDE using splines: ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n ! USE futils USE fft USE pde3d_mod ! IMPLICIT NONE INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms LOGICAL :: nlppform INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku INTEGER :: jder(3), it DOUBLE PRECISION :: pi, coefx(5) DOUBLE PRECISION :: dy, dz DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: crhs ! TYPE(spline2d1d), TARGET :: splxyz TYPE(spline2d), POINTER :: splxy TYPE(gbmat) :: mat ! CHARACTER(len=128) :: file='pde3d.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 INTEGER :: nits=500 ! INTEGER, PARAMETER :: npart=10 DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal ! INTEGER :: kmin, kmax DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass_shifted ! NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nz = 8 ! Number of intervals in z nidbas = (/3,3,3/) ! Degree of splines ngauss = (/4,4, 4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem npow = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz)) ! xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ! dy = 2.d0*pi/REAL(ny,8) ! Equidistant in y ygrid = (/ (j*dy, j=0,ny) /) ! dz = 2.0d0*pi/REAL(nz,8) ! Equidistant in z zgrid = (/ (k*dz, k=0,nz) /) ! WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NZ', nz) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NIDBAS3', nidbas(3)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'NGAUSS2', ngauss(3)) CALL attach(fid, '/', 'MBESS', mbess) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid, splxyz, & & (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform) splxy => splxyz%sp12 ! WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Z', splxyz%sp3%knots ! ! 2D FE matrix assembly (in plane x-y) ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals ku = kl ! Number of super-diagnonals WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku ! CALL init(kl, ku, nrank, nterms, mat) CALL dismat(splxy, mat) !!$ CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice') ALLOCATE(arr(nrank)) ! ! BC on Matrix ! CALL ibcmat(mat, ny) tmat = seconds() - t0 ! ! 3D RHS assembly ! ALLOCATE(rhs(nrank,0:nz-1), sol(nrank,0:nz-1)) CALL disrhs3(mbess, npow, splxyz, rhs) ! ! FFT in z of RHS ! ALLOCATE(crhs(nrank,0:nz-1)) crhs = rhs CALL fourrow(crhs, 1) crhs = crhs/REAL(nz,8) ! ! Apply Mass matrix to crhs ! kmin =-nz/2 kmax = nz/2-1 CALL init_dft(splxyz%sp3, kmin, kmax) ALLOCATE(fftmass_shifted(kmin:kmax)) ALLOCATE(fftmass(0:nz-1)) CALL calc_fftmass(splxyz%sp3, fftmass_shifted) DO k=kmin,kmax fftmass(MODULO(k+nz,nz)) = fftmass_shifted(k) END DO DO k=0,nz-1 crhs(:,k) = crhs(:,k)/fftmass(k) END DO ! ! Fourier transform back crhs to real space in z ! CALL fourrow(crhs, -1) rhs(:,:) = REAL(crhs(:,:),8) ! ! BC on RHS ! CALL ibcrhs3(rhs, ny) ! CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL factor(mat) tfact = seconds() - t0 gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 t0 = seconds() CALL bsolve(mat, rhs, sol) ! ! Backtransform of solution ! DO k=0,nz-1 sol(1:ny-1,k) = sol(ny,k) END DO tsolv = seconds() - t0 gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') ! ! Spline coefficients, taking into account of periodicity in y and z ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim dimz = splxyz%sp3%dim WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 DO k=0,dimz-1 kk = MODULO(k,nz) bcoef(i,j,k) = sol(ij,kk) END DO END DO END DO CALL putarr(fid, '/BCOEF', bcoef, 'Spline coefficients of solution') !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! WRITE(*,'(/a)') 'Check with analytical solutions ...' CALL RANDOM_NUMBER(xp) yp=0.0d0 zp=0.0d0 jder = (/0,0,0/) CALL gridval(splxyz, xp, yp, zp, fp_calc, jder, bcoef) !!$ WRITE(*,'(4a12)') 'X', 'CALC', 'ANAL', 'ERROR' !!$ DO i=1,npart !!$ fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess & !!$ & * COS(mbess*yp(i)) * COS(zp(i))**npow !!$ WRITE(*,'(4(1pe12.3))') xp(i), fp_calc(i), fp_anal(i), fp_calc(i)-fp_anal(i) !!$ END DO ! ALLOCATE(solcal(0:nx,0:ny,0:nz)) ALLOCATE(solana(0:nx,0:ny,0:nz)) ALLOCATE(errsol(0:nx,0:ny,0:nz)) DO i=0,nx DO j=0,ny DO k=0,nz solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess & & * COS(mbess*ygrid(j)) * COS(zgrid(k))**npow END DO END DO END DO ! jder = (/0,0,0/) CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder, bcoef) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder) tgrid = seconds()-t0 errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/zgrid', zgrid, '\phi') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') ! ! Check derivative d/dx ! DO i=0,nx DO j=0,ny DO k=0,nz IF( mbess .EQ. 0 ) THEN solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k))**npow ELSE solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * & & COS(zgrid(k))**npow END IF END DO END DO END DO ! jder = (/1,0,0/) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder) tgrid = tgrid + seconds()-t0 errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') CALL putarr(fid, '/derivx_exact', solana) WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', norm2(errsol)/norm2(solana) ! ! Check derivative d/dy ! DO i=0,nx DO j=0,ny DO k=0,nz solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * & & SIN(mbess*ygrid(j))* COS(zgrid(k))**npow END DO END DO END DO ! jder = (/0,1,0/) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder) tgrid = tgrid + seconds()-t0 CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') CALL putarr(fid, '/derivy_exact', solana) errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', norm2(errsol)/norm2(solana) ! ! Check derivative d/dz ! DO i=0,nx DO j=0,ny DO k=0,nz solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess & & * COS(mbess*ygrid(j)) * COS(zgrid(k))**(npow-1) & & * SIN(zgrid(k)) END DO END DO END DO ! jder = (/0,0,1/) t0 = seconds() IF(nlppform) THEN CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder) ELSE CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder, bcoef) END IF tgrid = tgrid + seconds()-t0 CALL putarr(fid, '/derivz', solcal, 'd/dz of solutions') CALL putarr(fid, '/derivz_exact', solana) errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', norm2(errsol)/norm2(solana) !=========================================================================== ! 9.0 Epilogue ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'gridval time (s) ', tgrid WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() ! DEALLOCATE(xgrid, ygrid, zgrid) DEALLOCATE(rhs, sol) DEALLOCATE(crhs) DEALLOCATE(fftmass) DEALLOCATE(fftmass_shifted) DEALLOCATE(bcoef) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(arr) CALL destroy_sp(splxyz) CALL destroy(mat) ! CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) DO k=1,SIZE(x,3) sum2 = sum2 + x(i,j,k)**2 END DO END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ diff --git a/examples/pde3d_mod.f90 b/examples/pde3d_mod.f90 index 79c3edf..20de074 100644 --- a/examples/pde3d_mod.f90 +++ b/examples/pde3d_mod.f90 @@ -1,397 +1,397 @@ !> !> @file pde3d_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE pde3d_mod USE bsplines USE matrix IMPLICIT NONE ! CONTAINS SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms)) ! ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1)) ! ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs) DO iterm=1,kterms DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 contrib = fun1(iw1,iderw(iterm,1)) * & & fun2(iw2,iderw(iterm,2)) * & & coefs(iterm) * & & fun2(it2,idert(iterm,2)) * & & fun1(it1,idert(iterm,1)) * & & wg1(ig1) * wg2(ig2) CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs3(mbess, npow, spl, rhs) ! ! Assembly the RHS using 3d spline spl ! INTEGER, INTENT(in) :: mbess, npow TYPE(spline2d1d), TARGET :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:,:) ! TYPE(spline1d), POINTER :: sp1, sp2, sp3 INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: n3, nidbas3, ndim3, ng3 INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! sp1 => spl%sp12%sp1 sp2 => spl%sp12%sp2 sp3 => spl%sp3 ! CALL get_dim(sp1, ndim1, n1, nidbas1) CALL get_dim(sp2, ndim2, n2, nidbas2) CALL get_dim(sp3, ndim3, n3, nidbas3) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(sp1, ng1) CALL get_gauss(sp2, ng2) CALL get_gauss(sp3, ng3) WRITE(*,'(/a, 3i3)') 'Gauss points and weights, ngauss =', ng1, ng2, ng3 ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng2), wg2(ng2)) ALLOCATE(xg3(ng3), wg3(ng3)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs,1) rhs(1:nrank,1:n3) = 0.0d0 ! DO i=1,n1 CALL get_gauss(sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), sp1, fun1, i) DO j=1,n2 CALL get_gauss(sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), sp2, fun2, j) DO k=1,n3 CALL get_gauss(sp3, ng3, k, xg3, wg3) DO ig3=1,ng3 CALL basfun(xg3(ig3), sp3, fun3, k) contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * & & rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 DO k3=0,nidbas3 kk = MODULO(k+k3-1,n3) + 1 rhs(ij,kk) = rhs(ij, kk) + & & contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1) END DO END DO END DO END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(xg3, wg3, fun3) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n) DOUBLE PRECISION, INTENT(in) :: x1, x2, x3 INTEGER, INTENT(in) :: m, n rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n END FUNCTION rhseq END SUBROUTINE disrhs3 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(gbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: kl, ku, nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! kl = mat%kl ku = mat%ku nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) DO j=1,ny+ku zsum(j) = zsum(j) + arr(j) END DO END DO CALL putrow(mat, ny, zsum) ! ! The horizontal sum on the NY-th column ! zsum = 0.0d0 DO j=1,ny arr = 0.0d0 CALL getcol(mat, j, arr) DO i=ny,ny+kl zsum(i) = zsum(i) + arr(i) END DO END DO CALL putcol(mat, ny, zsum) ! ! The away operator ! DO j = 1,ny-1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO j = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs3(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:,:) INTEGER, INTENT(in) :: ny INTEGER :: nrank, nz, k DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) nz = SIZE(rhs,2) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! DO k=1,nz zsum = SUM(rhs(1:ny,k)) rhs(ny,k) = zsum rhs(1:ny-1,k) = 0.0d0 END DO !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO k=1,nz rhs(nrank-ny+1:nrank,k) = 0.0d0 END DO END SUBROUTINE ibcrhs3 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde3d_mod diff --git a/examples/poisson_mumps.f90 b/examples/poisson_mumps.f90 index 941a715..f495bd9 100644 --- a/examples/poisson_mumps.f90 +++ b/examples/poisson_mumps.f90 @@ -1,169 +1,169 @@ !> !> @file poisson_mumps.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main USE mumps_bsplines USE cds ! IMPLICIT NONE INCLUDE 'mpif.h' TYPE(mumps_mat) :: amat TYPE(cds_mat) :: amat_cds DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:) INTEGER :: nx=5, ny=4 INTEGER :: n INTEGER :: i, j, irow INTEGER :: ierr, me INTEGER, ALLOCATABLE :: dists(:) DOUBLE PRECISION :: t0 ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! IF(me.EQ.0) THEN WRITE(*,'(a)', advance='no') 'Enter nx, ny: ' READ(*,*) nx, ny END IF CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) n = nx*ny ! Rank of the matrix ALLOCATE(rhs(n)) ALLOCATE(sol(n)) ALLOCATE(arow(n)) ! WRITE(*,'(/a)') 'Mumps using CSR mat ...' CALL init(n, 1, amat) ! ! Construct the matrix and RHS ! t0 = mpi_wtime(0) DO j=1,ny DO i=1,nx arow = 0.0d0 irow = numb(i,j) arow(irow) = 4.0d0 IF(i.GT.1) arow(numb(i-1,j)) = -1.0d0 IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0 IF(j.GT.1) arow(numb(i,j-1)) = -1.0d0 IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0 CALL putrow(amat, irow, arow) rhs(irow) = SUM(arow) ! => the exact solution is 1 END DO END DO ! WRITE(*,'(a,i6)') 'Rank of matrix', n WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat) WRITE(*,'(a,1pe12.3)') 'Matrix construction time (s)', mpi_wtime()-t0 ! ! Factor the amat matrix (Reordering, symbolic and numerical factorization) ! t0 = mpi_wtime(0) CALL factor(amat, nlmetis=.TRUE.) sol=rhs CALL bsolve(amat, sol) WRITE(*,'(a,1pe12.3)') 'Direct solve time (s)', mpi_wtime()-t0 ! IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0)) END IF CALL destroy(amat) ! ! CDS matrix ! WRITE(*,'(/a)') 'Mumps using CDS mat ...' IF(ALLOCATED(dists)) DEALLOCATE(dists) ALLOCATE(dists(-2:2)) dists = [-nx, -1, 0, 1, nx] WRITE(*,'(a/(20i4))') 'dists used in INIT =', dists CALL init(n, dists, 1, amat_cds) ! t0 = mpi_wtime(0) DO j=1,ny DO i=1,nx arow = 0.0d0 irow = numb(i,j) amat_cds%val(irow,0) = 4.0d0 IF(i.GT.1) amat_cds%val(irow,-1) = -1.0d0 IF(i.LT.nx) amat_cds%val(irow,+1) = -1.0d0 IF(j.GT.1) amat_cds%val(irow,-2) = -1.0d0 IF(j.LT.ny) amat_cds%val(irow,+2) = -1.0d0 END DO END DO WRITE(*,'(a,1pe12.3)') 'Matrix construction time (s)', mpi_wtime()-t0 ! ! Compute dists of amat PRINT*, 'stat of mata%mat', ASSOCIATED(amat%mat) PRINT*, 'rank of mata', amat%mat%rank CALL mstruct(amat%mat, dists) WRITE(*,'(A/(20i4))') 'dists from MSTRUCT=', dists ! t0 = mpi_wtime(0) CALL cds2mumps(amat_cds, amat) CALL factor(amat, debug=.FALSE.) sol = rhs CALL bsolve(amat, sol) WRITE(*,'(a,1pe12.3)') 'Direct solve time (s)', mpi_wtime()-t0 ! IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0)) END IF ! ! Clean up ! DEALLOCATE(rhs) DEALLOCATE(sol) DEALLOCATE(arow) CALL destroy(amat) CALL mpi_finalize(ierr) CONTAINS SUBROUTINE mstruct(mat, dists) TYPE(spmat), INTENT(in) :: mat INTEGER, ALLOCATABLE, INTENT(inout) :: dists(:) TYPE(elt), POINTER :: t INTEGER :: n, i, j0 j0 = LBOUND(dists,1) n = mat%rank PRINT*, 'rank of mat', n DO i=1,n ! scan the matrix rows t => mat%row(i)%row0 DO WHILE(ASSOCIATED(t)) ! walk thru the linked list row(i) j = t%index IF(ABS(t%val) .LE. EPSILON(0.0d0)) THEN dists(j0) = t%index-i ! distance from main diag j0 = j0+1 END IF t => t%next END DO END DO END SUBROUTINE mstruct INTEGER FUNCTION numb(i,j) ! ! One-dimensional numbering ! Number first x then y ! INTEGER, INTENT(in) :: i, j numb = (j-1)*nx + i END FUNCTION numb END PROGRAM main diff --git a/examples/poisson_petsc.f90 b/examples/poisson_petsc.f90 index 341afba..87dde81 100644 --- a/examples/poisson_petsc.f90 +++ b/examples/poisson_petsc.f90 @@ -1,218 +1,218 @@ !> !> @file poisson_petsc.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main USE petsc_bsplines IMPLICIT NONE TYPE(petsc_mat) :: amat DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:) INTEGER :: nx=5, ny=4, ntrials=10 INTEGER :: nitmax=10000, nits DOUBLE PRECISION :: rtol=1.e-9 INTEGER :: n, nnz, nnz_loc INTEGER :: i, j, irow, jcol INTEGER :: ierr, me, npes, istart, iend DOUBLE PRECISION :: t0 INTEGER :: ncols, cols(5) ! Max nnz by row .LE. 5 ! CHARACTER(len=128) :: matfile='mat.dat', rhsfile='rhs.dat' LOGICAL :: file_exist ! NAMELIST /newrun/ nx, ny, nitmax, rtol, matfile, rhsfile ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) ! IF(me.EQ.0) THEN READ(*, newrun) WRITE(*, newrun) WRITE(*,'(a,i6)') 'npes =', npes END IF CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nitmax, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(rhsfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) ! n = nx*ny ! Rank of the matrix ! ! Initialize matrix ! CALL init(n, 1, amat, comm=MPI_COMM_WORLD) istart = amat%istart iend = amat%iend !!$ WRITE(*,'(a,i3.3,a,3i6)') 'PE', me, ': istart, iend', istart, iend ! ! INQUIRE(file=TRIM(matfile), exist=file_exist) ! IF( file_exist ) THEN CALL mpi_barrier(MPI_COMM_WORLD,ierr) t0 = mpi_wtime() CALL load_mat(amat, matfile) CALL mpi_barrier(MPI_COMM_WORLD,ierr) IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. loading time (s)', mpi_wtime()-t0 ELSE ! ! Construct the matrix ! CALL mpi_barrier(MPI_COMM_WORLD,ierr) t0 = mpi_wtime() ALLOCATE(arow(5)) ! Max nnz per row .LE. 5 DO j=1,ny DO i=1,nx irow = numb(i,j) IF( irow.GE.istart .AND. irow.LE.iend) THEN ncols = 1; cols(ncols) = irow; arow(ncols) = 4.0d0 IF(i.GT.1) THEN ncols = ncols+1 cols(ncols) = numb(i-1,j); arow(ncols) = -1.0d0 END IF IF(i.LT.nx) THEN ncols = ncols+1 cols(ncols) = numb(i+1,j); arow(ncols) = -1.0d0 END IF IF(j.GT.1) THEN ncols = ncols+1 cols(ncols) = numb(i,j-1); arow(ncols) = -1.0d0 END IF IF(j.LT.ny) THEN ncols = ncols+1 cols(ncols) = numb(i,j+1); arow(ncols) = -1.0d0 END IF CALL putrow(amat, irow, arow(1:ncols), cols(1:ncols)) END IF END DO END DO DEALLOCATE(arow) CALL mpi_barrier(MPI_COMM_WORLD,ierr) IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. construction time (s)', mpi_wtime()-t0 ! ! Convert to PETSC mat ! CALL mpi_barrier(MPI_COMM_WORLD,ierr) t0=mpi_wtime() CALL to_mat(amat) CALL mpi_barrier(MPI_COMM_WORLD,ierr) IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. conversion time (s)', mpi_wtime()-t0 ! CALL save_mat(amat, matfile) END IF ! ! Matrix size and partition could have changed after loading from file! ! n = amat%rank istart = amat%istart iend = amat%iend ! nnz_loc = get_count(amat) CALL mpi_reduce(nnz_loc, nnz, 1, MPI_INTEGER, mpi_sum, 0, MPI_COMM_WORLD, ierr) IF(npes.LE.4) THEN WRITE(*,'(a,i3.3,a,3i6)') 'PE', me, ': istart, iend (after), nloc, nnz_loc', & & istart, iend, iend-istart+1, nnz_loc END IF ! ! Construct or read RHS ! CALL mpi_barrier(MPI_COMM_WORLD,ierr) t0 = mpi_wtime() ALLOCATE(rhs(n)) INQUIRE(file=TRIM(rhsfile), exist=file_exist) IF( file_exist ) THEN OPEN(unit=99, file=TRIM(rhsfile), status='old', form='unformatted') READ(99) rhs(1:n) CLOSE(99) CALL mpi_barrier(MPI_COMM_WORLD,ierr) IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3)') 'RHS read time (s)', mpi_wtime()-t0 END IF ELSE rhs = 0.0d0 ALLOCATE(arow(n)) DO i=istart, iend arow = 0.0d0 CALL getrow(amat, i, arow) rhs(i) = SUM(arow) ! => the exact solution is 1 END DO arow = rhs CALL mpi_allreduce(arow, rhs, n, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr) DEALLOCATE(arow) IF( me.EQ.0 ) THEN ! All processes have the gobla rhs OPEN(unit=99, file=TRIM(rhsfile), status='new', form='unformatted') WRITE(99) rhs(1:n) CLOSE(99) END IF CALL mpi_barrier(MPI_COMM_WORLD,ierr) IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3)') 'RHS construction time (s)', mpi_wtime()-t0 END IF END IF CLOSE(99) CALL mpi_barrier(MPI_COMM_WORLD,ierr) ! ! Back solve ! ALLOCATE(sol(n)) ! CALL mpi_barrier(MPI_COMM_WORLD,ierr) sol = 0.0d0 t0=mpi_wtime() CALL bsolve(amat, rhs, sol, rtol, nitmax, nits) CALL mpi_barrier(MPI_COMM_WORLD,ierr) IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3,i8,1pe12.3)') 'Error, nits, solve time(s)', & & MAXVAL(ABS(sol-1.0d0)), nits, mpi_wtime()-t0 END IF CALL mpi_barrier(MPI_COMM_WORLD,ierr) t0=mpi_wtime() DO i=1,ntrials sol = 0.0d0 CALL bsolve(amat, rhs, sol, rtol, nitmax, nits) END DO CALL mpi_barrier(MPI_COMM_WORLD,ierr) IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3,i8,1pe12.3)') 'Error, nits, solve time(s)', & & MAXVAL(ABS(sol-1.0d0)), nits, (mpi_wtime()-t0)/REAL(ntrials) END IF ! ! Clean up ! DEALLOCATE(rhs) DEALLOCATE(sol) CALL destroy(amat) CALL PetscFinalize(ierr) CALL mpi_finalize(ierr) CONTAINS INTEGER FUNCTION numb(i,j) ! ! One-dimensional numbering ! Number first x then y ! INTEGER, INTENT(in) :: i, j numb = (j-1)*nx + i END FUNCTION numb END PROGRAM main diff --git a/examples/ppde3d.f90 b/examples/ppde3d.f90 index 00e4f06..6a895e5 100644 --- a/examples/ppde3d.f90 +++ b/examples/ppde3d.f90 @@ -1,510 +1,510 @@ !> !> @file ppde3d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 3d PDE using splines: ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n ! USE futils USE fft USE pputils2, ONLY : pptransp USE ppde3d_mod ! IMPLICIT NONE ! CHARACTER(len=128) :: infile="ppde3d.in" INTEGER :: l INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms INTEGER :: startz, endz, nzloc INTEGER :: start_rank, end_rank, nrank_loc LOGICAL :: nlppform INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku INTEGER :: jder(3), it DOUBLE PRECISION :: pi, coefx(5) DOUBLE PRECISION :: dy, dz DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol, rhs_t DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: crhs_t ! TYPE(spline2d1d), TARGET :: splxyz TYPE(spline2d), POINTER :: splxy TYPE(spline1d) :: splz TYPE(gbmat) :: mat ! CHARACTER(len=128) :: file='ppde3d.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 INTEGER :: nits=500 ! INTEGER, PARAMETER :: npart=10000000 INTEGER :: nploc DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal DOUBLE PRECISION zsuml, zsumg, errnorm2 ! NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx !=========================================================================== ! 1.0 Prologue ! ! Init MPI ! CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Get input file name from command argument ! IF( COMMAND_ARGUMENT_COUNT() .EQ. 1 ) THEN CALL GET_COMMAND_ARGUMENT(1, infile, l, ierr) END IF ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nz = 8 ! Number of intervals in z nidbas = (/3,3,3/) ! Degree of splines ngauss = (/4,4, 4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem npow = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! OPEN(unit=99, file=TRIM(infile), status='old', action='read') READ(99,newrun) IF( me.EQ.0) THEN WRITE(*,newrun) END IF CLOSE(99) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz)) ! xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ! dy = 2.d0*pi/REAL(ny,8) ! Equidistant in y ygrid = (/ (j*dy, j=0,ny) /) ! ! Partitionned toroidal grid z ! dz = 2.0d0*pi/REAL(nz,8) ! Equidistant in z zgrid = (/ (k*dz, k=0,nz) /) CALL dist1d(0, nz, startz, nzloc) endz = startz+nzloc !!$ PRINT*, 'PE', me, ' startz, endz, nzloc', startz, endz, nzloc ! IF( me.EQ.0) THEN WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz) END IF ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d', & & mpicomm=MPI_COMM_WORLD) CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NZ', nz) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NIDBAS3', nidbas(3)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'NGAUSS2', ngauss(3)) CALL attach(fid, '/', 'MBESS', mbess) CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/zgrid', zgrid(0:nz-1), '\phi') !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid(startz:endz), & & splxyz, (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform) splxy => splxyz%sp12 ! IF( me.EQ.0) THEN WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots END IF CALL disp(splxyz%sp3%knots, 'KNOTS in Z', MPI_COMM_WORLD) ! ! 2D FE matrix assembly (in plane x-y) ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals ku = kl ! Number of super-diagnonals IF(me.EQ.0) THEN WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku END IF ! CALL init(kl, ku, nrank, nterms, mat) CALL dismat(splxy, mat) ALLOCATE(arr(nrank)) ! ! BC on Matrix ! CALL ibcmat(mat, ny) tmat = seconds() - t0 ! ! 3D RHS assembly ! ALLOCATE(rhs(nrank,0:nzloc+nidbas(3)-1)) ! With right guard cells nzloc:nzloc+nidbas3-1 ALLOCATE(sol(nrank,0:nzloc-1)) CALL disrhs3(mbess, npow, splxyz, rhs) ! zsuml = SUM(ABS(rhs(:,0:nzloc-1))) CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, & & MPI_COMM_WORLD, ierr) IF(me.EQ.0) PRINT*, 'sum of rhs after DISRHS3', zsumg ! ! FFT in z of RHS ! CALL dist1d(1, nrank, start_rank, nrank_loc) end_rank = start_rank+nrank_loc-1 ALLOCATE(rhs_t(0:nz-1,nrank_loc), crhs_t(0:nz-1,nrank_loc)) ! CALL pptransp(MPI_COMM_WORLD, rhs(:,0:nzloc-1), rhs_t) crhs_t = rhs_t CALL fourcol(crhs_t,1) crhs_t = crhs_t/REAL(nz,8) ! ! Apply Mass matrix to crhs ! PRINT*, 4 CALL set_spline(nidbas(3), ngauss(3), zgrid, splz, .TRUE.) ALLOCATE(fftmass(0:nz-1)) PRINT*, 5 !!$ CALL calc_fftmass(splz, fftmass) CALL calc_fftmass_old(splz, fftmass) IF(me.EQ.0) THEN WRITE(*,'(/a/(10(1pe12.3)))') 'Mass matrix', fftmass END IF DO k=0,nz-1 crhs_t(k,:) = crhs_t(k,:)/fftmass(k) END DO ! ! Fourier transform back crhs to real space in z ! CALL fourcol(crhs_t, -1) rhs_t(:,:) = REAL(crhs_t(:,:),8) CALL pptransp(MPI_COMM_WORLD, rhs_t, sol) ! Put the final RHS in SOL ! ! BC on RHS ! CALL ibcrhs3(sol, ny) ! IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() END IF !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL factor(mat) tfact = seconds() - t0 gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 t0 = seconds() CALL bsolve(mat, sol) ! ! Backtransform of solution ! DO k=0,nzloc-1 sol(1:ny-1,k) = sol(ny,k) END DO ! zsuml = SUM(ABS(sol)) CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, & & MPI_COMM_WORLD, ierr) IF(me.EQ.0) PRINT*, 'sum of sol', zsumg ! tsolv = seconds() - t0 gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 ! ! Spline coefficients, taking into account of periodicity in y and z ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim dimz = splxyz%sp3%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1)) ! ! Get 3D array of spline coefs. ! DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 DO k=0,nzloc-1 bcoef(i,j,k) = sol(ij,k) END DO END DO END DO ! ! Get missing coefs from remote guard cells ! prev = MODULO(me-1,npes) next = MODULO(me+1,npes) count = dimx*dimy DO i=0,nidbas(3)-1 CALL mpi_sendrecv(bcoef(0,0,i), count, MPI_DOUBLE_PRECISION, prev, 0, & & bcoef(0,0,nzloc+i), count, MPI_DOUBLE_PRECISION, next, 0, & & MPI_COMM_WORLD, status, ierr) END DO ! IF(me.EQ.0) THEN WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz END IF !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! CALL RANDOM_NUMBER(xp) CALL RANDOM_NUMBER(yp); yp = 2.d0*pi*yp CALL RANDOM_NUMBER(zp); zp = 2.d0*pi*zp nploc = 0 DO i=1,npart IF(zp(i).GE.zgrid(startz) .AND. zp(i).LT.zgrid(endz)) THEN nploc = nploc+1 xp(nploc) = xp(i) yp(nploc) = yp(i) zp(nploc) = zp(i) END IF END DO jder = (/0,0,0/) CALL gridval(splxyz, xp(1:nploc), yp(1:nploc), zp(1:nploc), fp_calc(1:nploc), jder, bcoef) DO i=1,nploc fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess & & * COS(mbess*yp(i)) * COS(zp(i))**npow END DO errnorm2 = norm21(fp_calc(1:nploc)-fp_anal(1:nploc))/norm21(fp_calc(1:nploc)) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors, using random points', & & errnorm2 END IF ! ALLOCATE(solcal(0:nx,0:ny,0:nzloc-1)) ALLOCATE(solana(0:nx,0:ny,0:nzloc-1)) ALLOCATE(errsol(0:nx,0:ny,0:nzloc-1)) ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 kk=startz+k solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess & & * COS(mbess*ygrid(j)) * COS(zgrid(kk))**npow END DO END DO END DO ! jder = (/0,0,0/) CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) tgrid = seconds()-t0 errsol = solana - solcal ! errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & errnorm2 END IF CALL putarr(fid, '/sol', solcal,pardim=3) CALL putarr(fid, '/solana', solana,pardim=3) ! ! Check derivative d/dx ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 IF( mbess .EQ. 0 ) THEN solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k+startz))**npow ELSE solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * & & COS(zgrid(k+startz))**npow END IF END DO END DO END DO ! jder = (/1,0,0/) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) tgrid = tgrid + seconds()-t0 errsol = solana - solcal errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', errnorm2 END IF CALL putarr(fid, '/derivx', solcal, pardim=3) CALL putarr(fid, '/derivx_exact', solana,pardim=3) ! ! Check derivative d/dy ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * & & SIN(mbess*ygrid(j))* COS(zgrid(k+startz))**npow END DO END DO END DO ! jder = (/0,1,0/) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) tgrid = tgrid + seconds()-t0 errsol = solana - solcal errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', errnorm2 END IF CALL putarr(fid, '/derivy', solcal, pardim=3) CALL putarr(fid, '/derivy_exact', solana,pardim=3) ! ! Check derivative d/dz ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess & & * COS(mbess*ygrid(j)) * COS(zgrid(k+startz))**(npow-1) & & * SIN(zgrid(k+startz)) END DO END DO END DO ! jder = (/0,0,1/) t0 = seconds() IF(nlppform) THEN CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) ELSE CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef) END IF tgrid = tgrid + seconds()-t0 errsol = solana - solcal errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', errnorm2 END IF CALL putarr(fid, '/derivz', solcal, pardim=3) CALL putarr(fid, '/derivz_exact', solana,pardim=3) !=========================================================================== ! 9.0 Epilogue ! IF(me.EQ.0) THEN WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'gridval time (s) ', tgrid WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() END IF ! DEALLOCATE(xgrid, ygrid, zgrid) DEALLOCATE(fftmass) DEALLOCATE(rhs) DEALLOCATE(sol) DEALLOCATE(rhs_t) DEALLOCATE(crhs_t) DEALLOCATE(bcoef) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(arr) CALL destroy_sp(splxyz) CALL destroy_sp(splz) CALL destroy(mat) ! CALL closef(fid) ! CALL mpi_finalize(ierr) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:,:) DOUBLE PRECISION :: sum2, sum2g INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) DO k=1,SIZE(x,3) sum2 = sum2 + x(i,j,k)**2 END DO END DO END DO CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & & MPI_COMM_WORLD, ierr) norm2 = SQRT(sum2g) END FUNCTION norm2 ! FUNCTION norm21(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm21 DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2, sum2g INTEGER :: i, j ! sum2 = DOT_PRODUCT(x,x) CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & & MPI_COMM_WORLD, ierr) norm21 = SQRT(sum2g) END FUNCTION norm21 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ diff --git a/examples/ppde3d_mod.f90 b/examples/ppde3d_mod.f90 index c04c788..f4f7627 100644 --- a/examples/ppde3d_mod.f90 +++ b/examples/ppde3d_mod.f90 @@ -1,473 +1,473 @@ !> !> @file ppde3d_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE ppde3d_mod USE bsplines USE matrix IMPLICIT NONE INCLUDE "mpif.h" ! INTEGER :: me, npes INTEGER :: prev, next INTEGER :: count, status(MPI_STATUS_SIZE), ierr ! CONTAINS SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(gbmat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms)) ! ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1)) ! ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs) DO iterm=1,kterms DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 contrib = fun1(iw1,iderw(iterm,1)) * & & fun2(iw2,iderw(iterm,2)) * & & coefs(iterm) * & & fun2(it2,idert(iterm,2)) * & & fun1(it1,idert(iterm,1)) * & & wg1(ig1) * wg2(ig2) CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs3(mbess, npow, spl, rhs) ! ! Assembly the RHS using 3d spline spl ! INTEGER, INTENT(in) :: mbess, npow TYPE(spline2d1d), TARGET :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:,:) ! TYPE(spline1d), POINTER :: sp1, sp2, sp3 INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: n3, nidbas3, ndim3, ng3 INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:) DOUBLE PRECISION, ALLOCATABLE :: buf(:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! sp1 => spl%sp12%sp1 sp2 => spl%sp12%sp2 sp3 => spl%sp3 ! CALL get_dim(sp1, ndim1, n1, nidbas1) CALL get_dim(sp2, ndim2, n2, nidbas2) CALL get_dim(sp3, ndim3, n3, nidbas3) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(sp1, ng1) CALL get_gauss(sp2, ng2) CALL get_gauss(sp3, ng3) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng2), wg2(ng2)) ALLOCATE(xg3(ng3), wg3(ng3)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs,1) rhs = 0.0d0 ! DO i=1,n1 CALL get_gauss(sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), sp1, fun1, i) DO j=1,n2 CALL get_gauss(sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), sp2, fun2, j) DO k=1,n3 CALL get_gauss(sp3, ng3, k, xg3, wg3) DO ig3=1,ng3 CALL basfun(xg3(ig3), sp3, fun3, k) contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * & & rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 DO k3=0,nidbas3 kk = k+k3 rhs(ij,kk) = rhs(ij, kk) + & & contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1) END DO END DO END DO END DO END DO END DO END DO END DO END DO ! ! Update from remote guard cells ! CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) next = MODULO(me+1,npes) prev = MODULO(me-1,npes) count = nrank ALLOCATE(buf(nrank)) DO i=nidbas3,1,-1 CALL mpi_sendrecv(rhs(1,n3+i), count, MPI_DOUBLE_PRECISION, next, 0, & & buf, count, MPI_DOUBLE_PRECISION, prev, 0, & & MPI_COMM_WORLD, status, ierr) rhs(:,i) = rhs(:,i) + buf(:) END DO DEALLOCATE(buf) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(xg3, wg3, fun3) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n) DOUBLE PRECISION, INTENT(in) :: x1, x2, x3 INTEGER, INTENT(in) :: m, n rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n END FUNCTION rhseq END SUBROUTINE disrhs3 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(gbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: kl, ku, nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! kl = mat%kl ku = mat%ku nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) DO j=1,ny+ku zsum(j) = zsum(j) + arr(j) END DO END DO CALL putrow(mat, ny, zsum) ! ! The horizontal sum on the NY-th column ! zsum = 0.0d0 DO j=1,ny arr = 0.0d0 CALL getcol(mat, j, arr) DO i=ny,ny+kl zsum(i) = zsum(i) + arr(i) END DO END DO CALL putcol(mat, ny, zsum) ! ! The away operator ! DO j = 1,ny-1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO j = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs3(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:,:) INTEGER, INTENT(in) :: ny INTEGER :: nrank, nz, k DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) nz = SIZE(rhs,2) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! DO k=1,nz zsum = SUM(rhs(1:ny,k)) rhs(ny,k) = zsum rhs(1:ny-1,k) = 0.0d0 END DO !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO k=1,nz rhs(nrank-ny+1:nrank,k) = 0.0d0 END DO END SUBROUTINE ibcrhs3 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dist1d(s0, ntot, s, nloc) INCLUDE 'mpif.h' INTEGER, INTENT(in) :: s0, ntot INTEGER, INTENT(out) :: s, nloc INTEGER :: me, npes, ierr, naver, rem ! CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr) CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr) naver = ntot/npes rem = MODULO(ntot,npes) s = s0 + MIN(rem,me) + me*naver nloc = naver IF( me.LT.rem ) nloc = nloc+1 END SUBROUTINE dist1d !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disp(a, str, comm) ! ! Gather partitionned 1d array to 0 and print it ! INCLUDE 'mpif.h' DOUBLE PRECISION, INTENT(in) :: a(:) INTEGER, INTENT(in) :: comm CHARACTER(len=*), INTENT(in) :: str INTEGER :: n, ntot, npes, me, ierr, i DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: c INTEGER, ALLOCATABLE, DIMENSION(:) :: counts, displs ! CALL mpi_comm_rank(comm, me, ierr) CALL mpi_comm_size(comm, npes, ierr) n = SIZE(a) IF(me.EQ.0) THEN ALLOCATE(counts(npes), displs(npes+1)) END IF CALL mpi_gather(n, 1, MPI_INTEGER, counts, 1, MPI_INTEGER, 0, comm, ierr) IF(me.EQ.0) THEN displs(1) = 0 DO i=2,npes+1 displs(i) = displs(i-1)+counts(i-1) END DO ntot = displs(npes+1) ALLOCATE(c(ntot)) c = 0.0d0 END IF CALL mpi_gatherv(a, n, MPI_DOUBLE_PRECISION, c, counts, displs, & & MPI_DOUBLE_PRECISION, 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(/a)') TRIM(str) DO i=1,npes WRITE(*,'(a,i3.3/(10(1pe12.3)))') 'PE', i-1, & & c(displs(i)+1:displs(i+1)) END DO DEALLOCATE(c) DEALLOCATE(counts) DEALLOCATE(displs) END IF END SUBROUTINE disp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE ppde3d_mod diff --git a/examples/ppde3d_pb.f90 b/examples/ppde3d_pb.f90 index 850056b..800226f 100644 --- a/examples/ppde3d_pb.f90 +++ b/examples/ppde3d_pb.f90 @@ -1,518 +1,518 @@ !> !> @file ppde3d_pb.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 3d PDE using splines: ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n ! USE futils USE fft USE pputils2, ONLY : pptransp USE ppde3d_pb_mod ! IMPLICIT NONE ! CHARACTER(len=128) :: infile="ppde3d_pb.in" INTEGER :: l INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms INTEGER :: startz, endz, nzloc INTEGER :: start_rank, end_rank, nrank_loc LOGICAL :: nlppform INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku INTEGER :: jder(3), it DOUBLE PRECISION :: pi, coefx(5) DOUBLE PRECISION :: dy, dz DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol, rhs_t DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: crhs_t ! TYPE(spline2d1d), TARGET :: splxyz TYPE(spline2d), POINTER :: splxy TYPE(spline1d) :: splz TYPE(pbmat) :: mat ! CHARACTER(len=128) :: file='ppde3d_pb.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 INTEGER :: nits=500 ! INTEGER, PARAMETER :: npart=10000000 INTEGER :: nploc DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal DOUBLE PRECISION zsuml, zsumg, errnorm2 ! INTEGER :: kmin, kmax DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass_shifted ! NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx !=========================================================================== ! 1.0 Prologue ! ! Init MPI ! CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Get input file name from command argument ! IF( COMMAND_ARGUMENT_COUNT() .EQ. 1 ) THEN CALL GET_COMMAND_ARGUMENT(1, infile, l, ierr) END IF ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nz = 8 ! Number of intervals in z nidbas = (/3,3,3/) ! Degree of splines ngauss = (/4,4, 4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem npow = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! OPEN(unit=99, file=TRIM(infile), status='old', action='read') READ(99,newrun) IF( me.EQ.0) THEN WRITE(*,newrun) END IF CLOSE(99) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz)) ! xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ! dy = 2.d0*pi/REAL(ny,8) ! Equidistant in y ygrid = (/ (j*dy, j=0,ny) /) ! ! Partitionned toroidal grid z ! dz = 2.0d0*pi/REAL(nz,8) ! Equidistant in z zgrid = (/ (k*dz, k=0,nz) /) CALL dist1d(0, nz, startz, nzloc) endz = startz+nzloc !!$ PRINT*, 'PE', me, ' startz, endz, nzloc', startz, endz, nzloc ! IF( me.EQ.0) THEN WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz) END IF ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d', & & mpicomm=MPI_COMM_WORLD) CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NZ', nz) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NIDBAS3', nidbas(3)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'NGAUSS2', ngauss(3)) CALL attach(fid, '/', 'MBESS', mbess) CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/zgrid', zgrid(0:nz-1), '\phi') !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid(startz:endz), & & splxyz, (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform) splxy => splxyz%sp12 ! IF( me.EQ.0) THEN WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots END IF CALL disp(splxyz%sp3%knots, 'KNOTS in Z', MPI_COMM_WORLD) ! ! 2D FE matrix assembly (in plane x-y) ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals ku = kl ! Number of super-diagnonals IF(me.EQ.0) THEN WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku END IF ! CALL init(ku, nrank, nterms, mat) CALL dismat(splxy, mat) ALLOCATE(arr(nrank)) ! ! BC on Matrix ! CALL ibcmat(mat, ny) tmat = seconds() - t0 ! ! 3D RHS assembly ! ALLOCATE(rhs(nrank,0:nzloc+nidbas(3)-1)) ! With right guard cells nzloc:nzloc+nidbas3-1 ALLOCATE(sol(nrank,0:nzloc-1)) CALL disrhs3(mbess, npow, splxyz, rhs) ! zsuml = SUM(ABS(rhs(:,0:nzloc-1))) CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, & & MPI_COMM_WORLD, ierr) IF(me.EQ.0) PRINT*, 'sum of rhs after DISRHS3', zsumg ! ! FFT in z of RHS ! CALL dist1d(1, nrank, start_rank, nrank_loc) end_rank = start_rank+nrank_loc-1 ALLOCATE(rhs_t(0:nz-1,nrank_loc), crhs_t(0:nz-1,nrank_loc)) ! CALL pptransp(MPI_COMM_WORLD, rhs(:,0:nzloc-1), rhs_t) crhs_t = rhs_t CALL fourcol(crhs_t,1) crhs_t = crhs_t/REAL(nz,8) ! ! Apply Mass matrix to crhs ! CALL set_spline(nidbas(3), ngauss(3), zgrid, splz, .TRUE.) kmin =-nz/2 kmax = nz/2-1 CALL init_dft(splz, kmin, kmax) ALLOCATE(fftmass_shifted(kmin:kmax)) ALLOCATE(fftmass(0:nz-1)) CALL calc_fftmass_old(splz, fftmass_shifted) DO k=kmin,kmax fftmass(MODULO(k+nz,nz)) = fftmass_shifted(k) END DO IF(me.EQ.0) THEN WRITE(*,'(/a/(10(1pe12.3)))') 'Mass matrix', fftmass END IF DO k=0,nz-1 crhs_t(k,:) = crhs_t(k,:)/fftmass(k) END DO ! ! Fourier transform back crhs to real space in z ! CALL fourcol(crhs_t, -1) rhs_t(:,:) = REAL(crhs_t(:,:),8) CALL pptransp(MPI_COMM_WORLD, rhs_t, sol) ! Put the final RHS in SOL ! ! BC on RHS ! CALL ibcrhs3(sol, ny) ! IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() END IF !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL factor(mat) tfact = seconds() - t0 gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 t0 = seconds() CALL bsolve(mat, sol) ! ! Backtransform of solution ! DO k=0,nzloc-1 sol(1:ny-1,k) = sol(ny,k) END DO ! zsuml = SUM(ABS(sol)) CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, & & MPI_COMM_WORLD, ierr) IF(me.EQ.0) PRINT*, 'sum of sol', zsumg ! tsolv = seconds() - t0 gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 ! ! Spline coefficients, taking into account of periodicity in y and z ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim dimz = splxyz%sp3%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1)) ! ! Get 3D array of spline coefs. ! DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 DO k=0,nzloc-1 bcoef(i,j,k) = sol(ij,k) END DO END DO END DO ! ! Get missing coefs from remote guard cells ! prev = MODULO(me-1,npes) next = MODULO(me+1,npes) count = dimx*dimy DO i=0,nidbas(3)-1 CALL mpi_sendrecv(bcoef(0,0,i), count, MPI_DOUBLE_PRECISION, prev, 0, & & bcoef(0,0,nzloc+i), count, MPI_DOUBLE_PRECISION, next, 0, & & MPI_COMM_WORLD, status, ierr) END DO ! IF(me.EQ.0) THEN WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz END IF !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! CALL RANDOM_NUMBER(xp) CALL RANDOM_NUMBER(yp); yp = 2.d0*pi*yp CALL RANDOM_NUMBER(zp); zp = 2.d0*pi*zp nploc = 0 DO i=1,npart IF(zp(i).GE.zgrid(startz) .AND. zp(i).LT.zgrid(endz)) THEN nploc = nploc+1 xp(nploc) = xp(i) yp(nploc) = yp(i) zp(nploc) = zp(i) END IF END DO jder = (/0,0,0/) CALL gridval(splxyz, xp(1:nploc), yp(1:nploc), zp(1:nploc), fp_calc(1:nploc), jder, bcoef) DO i=1,nploc fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess & & * COS(mbess*yp(i)) * COS(zp(i))**npow END DO errnorm2 = norm21(fp_calc(1:nploc)-fp_anal(1:nploc))/norm21(fp_calc(1:nploc)) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors, using random points', & & errnorm2 END IF ! ALLOCATE(solcal(0:nx,0:ny,0:nzloc-1)) ALLOCATE(solana(0:nx,0:ny,0:nzloc-1)) ALLOCATE(errsol(0:nx,0:ny,0:nzloc-1)) ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 kk=startz+k solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess & & * COS(mbess*ygrid(j)) * COS(zgrid(kk))**npow END DO END DO END DO ! jder = (/0,0,0/) CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) tgrid = seconds()-t0 errsol = solana - solcal ! errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & errnorm2 END IF CALL putarr(fid, '/sol', solcal,pardim=3) CALL putarr(fid, '/solana', solana,pardim=3) ! ! Check derivative d/dx ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 IF( mbess .EQ. 0 ) THEN solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k+startz))**npow ELSE solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * & & COS(zgrid(k+startz))**npow END IF END DO END DO END DO ! jder = (/1,0,0/) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) tgrid = tgrid + seconds()-t0 errsol = solana - solcal errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', errnorm2 END IF CALL putarr(fid, '/derivx', solcal, pardim=3) CALL putarr(fid, '/derivx_exact', solana,pardim=3) ! ! Check derivative d/dy ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * & & SIN(mbess*ygrid(j))* COS(zgrid(k+startz))**npow END DO END DO END DO ! jder = (/0,1,0/) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) tgrid = tgrid + seconds()-t0 errsol = solana - solcal errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', errnorm2 END IF CALL putarr(fid, '/derivy', solcal, pardim=3) CALL putarr(fid, '/derivy_exact', solana,pardim=3) ! ! Check derivative d/dz ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess & & * COS(mbess*ygrid(j)) * COS(zgrid(k+startz))**(npow-1) & & * SIN(zgrid(k+startz)) END DO END DO END DO ! jder = (/0,0,1/) t0 = seconds() IF(nlppform) THEN CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) ELSE CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef) END IF tgrid = tgrid + seconds()-t0 errsol = solana - solcal errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', errnorm2 END IF CALL putarr(fid, '/derivz', solcal, pardim=3) CALL putarr(fid, '/derivz_exact', solana,pardim=3) !=========================================================================== ! 9.0 Epilogue ! IF(me.EQ.0) THEN WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'gridval time (s) ', tgrid WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() END IF ! DEALLOCATE(xgrid, ygrid, zgrid) DEALLOCATE(fftmass) DEALLOCATE(fftmass_shifted) DEALLOCATE(rhs) DEALLOCATE(sol) DEALLOCATE(rhs_t) DEALLOCATE(crhs_t) DEALLOCATE(bcoef) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(arr) CALL destroy_sp(splxyz) CALL destroy_sp(splz) CALL destroy(mat) ! CALL closef(fid) ! CALL mpi_finalize(ierr) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:,:) DOUBLE PRECISION :: sum2, sum2g INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) DO k=1,SIZE(x,3) sum2 = sum2 + x(i,j,k)**2 END DO END DO END DO CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & & MPI_COMM_WORLD, ierr) norm2 = SQRT(sum2g) END FUNCTION norm2 ! FUNCTION norm21(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm21 DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2, sum2g INTEGER :: i, j ! sum2 = DOT_PRODUCT(x,x) CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & & MPI_COMM_WORLD, ierr) norm21 = SQRT(sum2g) END FUNCTION norm21 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ diff --git a/examples/ppde3d_pb_mod.f90 b/examples/ppde3d_pb_mod.f90 index 72c4917..6995b7a 100644 --- a/examples/ppde3d_pb_mod.f90 +++ b/examples/ppde3d_pb_mod.f90 @@ -1,452 +1,452 @@ !> !> @file ppde3d_pb_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE ppde3d_pb_mod USE bsplines USE matrix IMPLICIT NONE INCLUDE "mpif.h" ! INTEGER :: me, npes INTEGER :: prev, next INTEGER :: count, status(MPI_STATUS_SIZE), ierr ! CONTAINS SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(pbmat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms)) ! ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1)) ! ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs) DO iterm=1,kterms DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 contrib = fun1(iw1,iderw(iterm,1)) * & & fun2(iw2,iderw(iterm,2)) * & & coefs(iterm) * & & fun2(it2,idert(iterm,2)) * & & fun1(it1,idert(iterm,1)) * & & wg1(ig1) * wg2(ig2) CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs3(mbess, npow, spl, rhs) ! ! Assembly the RHS using 3d spline spl ! INTEGER, INTENT(in) :: mbess, npow TYPE(spline2d1d), TARGET :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:,:) ! TYPE(spline1d), POINTER :: sp1, sp2, sp3 INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: n3, nidbas3, ndim3, ng3 INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:) DOUBLE PRECISION, ALLOCATABLE :: buf(:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! sp1 => spl%sp12%sp1 sp2 => spl%sp12%sp2 sp3 => spl%sp3 ! CALL get_dim(sp1, ndim1, n1, nidbas1) CALL get_dim(sp2, ndim2, n2, nidbas2) CALL get_dim(sp3, ndim3, n3, nidbas3) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(sp1, ng1) CALL get_gauss(sp2, ng2) CALL get_gauss(sp3, ng3) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng2), wg2(ng2)) ALLOCATE(xg3(ng3), wg3(ng3)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs,1) rhs = 0.0d0 ! DO i=1,n1 CALL get_gauss(sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), sp1, fun1, i) DO j=1,n2 CALL get_gauss(sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), sp2, fun2, j) DO k=1,n3 CALL get_gauss(sp3, ng3, k, xg3, wg3) DO ig3=1,ng3 CALL basfun(xg3(ig3), sp3, fun3, k) contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * & & rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 DO k3=0,nidbas3 kk = k+k3 rhs(ij,kk) = rhs(ij, kk) + & & contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1) END DO END DO END DO END DO END DO END DO END DO END DO END DO ! ! Update from remote guard cells ! CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) next = MODULO(me+1,npes) prev = MODULO(me-1,npes) count = nrank ALLOCATE(buf(nrank)) DO i=nidbas3,1,-1 CALL mpi_sendrecv(rhs(1,n3+i), count, MPI_DOUBLE_PRECISION, next, 0, & & buf, count, MPI_DOUBLE_PRECISION, prev, 0, & & MPI_COMM_WORLD, status, ierr) rhs(:,i) = rhs(:,i) + buf(:) END DO DEALLOCATE(buf) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(xg3, wg3, fun3) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n) DOUBLE PRECISION, INTENT(in) :: x1, x2, x3 INTEGER, INTENT(in) :: m, n rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n END FUNCTION rhseq END SUBROUTINE disrhs3 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(pbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: kl, ku, nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! ku = mat%ku kl = ku nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) DO j=1,ny+ku zsum(j) = zsum(j) + arr(j) END DO END DO ! zsum(ny) = SUM(zsum(1:ny)) ! using symmetry CALL putrow(mat, ny, zsum) ! ! The away operator ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs3(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:,:) INTEGER, INTENT(in) :: ny INTEGER :: nrank, nz, k DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) nz = SIZE(rhs,2) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! DO k=1,nz zsum = SUM(rhs(1:ny,k)) rhs(ny,k) = zsum rhs(1:ny-1,k) = 0.0d0 END DO !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO k=1,nz rhs(nrank-ny+1:nrank,k) = 0.0d0 END DO END SUBROUTINE ibcrhs3 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dist1d(s0, ntot, s, nloc) INCLUDE 'mpif.h' INTEGER, INTENT(in) :: s0, ntot INTEGER, INTENT(out) :: s, nloc INTEGER :: me, npes, ierr, naver, rem ! CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr) CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr) naver = ntot/npes rem = MODULO(ntot,npes) s = s0 + MIN(rem,me) + me*naver nloc = naver IF( me.LT.rem ) nloc = nloc+1 END SUBROUTINE dist1d !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disp(a, str, comm) ! ! Gather partitionned 1d array to 0 and print it ! INCLUDE 'mpif.h' DOUBLE PRECISION, INTENT(in) :: a(:) INTEGER, INTENT(in) :: comm CHARACTER(len=*), INTENT(in) :: str INTEGER :: n, ntot, npes, me, ierr, i DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: c INTEGER, ALLOCATABLE, DIMENSION(:) :: counts, displs ! CALL mpi_comm_rank(comm, me, ierr) CALL mpi_comm_size(comm, npes, ierr) n = SIZE(a) IF(me.EQ.0) THEN ALLOCATE(counts(npes), displs(npes+1)) END IF CALL mpi_gather(n, 1, MPI_INTEGER, counts, 1, MPI_INTEGER, 0, comm, ierr) IF(me.EQ.0) THEN displs(1) = 0 DO i=2,npes+1 displs(i) = displs(i-1)+counts(i-1) END DO ntot = displs(npes+1) ALLOCATE(c(ntot)) c = 0.0d0 END IF CALL mpi_gatherv(a, n, MPI_DOUBLE_PRECISION, c, counts, displs, & & MPI_DOUBLE_PRECISION, 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(/a)') TRIM(str) DO i=1,npes WRITE(*,'(a,i3.3/(10(1pe12.3)))') 'PE', i-1, & & c(displs(i)+1:displs(i+1)) END DO DEALLOCATE(c) DEALLOCATE(counts) DEALLOCATE(displs) END IF END SUBROUTINE disp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE ppde3d_pb_mod diff --git a/examples/tbasfun.f90 b/examples/tbasfun.f90 index 26bf500..cec95a8 100644 --- a/examples/tbasfun.f90 +++ b/examples/tbasfun.f90 @@ -1,137 +1,137 @@ !> !> @file tbasfun.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test scalar and vector versions of def_basfun ! USE bsplines IMPLICIT NONE INTEGER :: nx, nidbas, nrank, npt, jdermx DOUBLE PRECISION :: dx DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fun(:, :), vfun(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: fun1(:, :), vfun1(:,:,:) DOUBLE PRECISION :: errfun INTEGER :: left, i, nerrs, k INTEGER, ALLOCATABLE :: vleft(:) LOGICAL :: nlper=.FALSE. TYPE(spline1d) :: splx ! NAMELIST /newrun/ nx, nidbas, npt, jdermx, nlper ! !=============================================================================== ! ! 1D grid ! nx = 10 nidbas = 3 npt = 1000000 jdermx = 0 READ(*,newrun) WRITE(*,newrun) ALLOCATE(xgrid(0:nx)) dx = 1.0d0/REAL(nx) xgrid = (/ (i*dx,i=0,nx) /) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid ! ! Set up spline ! CALL set_spline(nidbas, 4, xgrid, splx, period=nlper) nrank = splx%dim WRITE(*,'(a, i5)') 'nrank =', nrank WRITE(*,'(a/(10f8.3))') 'knots', splx%knots ! IF(nx.LE.2) THEN WRITE(*,'(/a)') 'VAL0' DO i=1,nx WRITE(*,'(a,i3)') 'Interval', i DO k=1,nidbas+1 ! Spline number WRITE(*,'(10f12.4)') splx%val0(:,k,i) END DO END DO IF(nlper) THEN WRITE(*,'(/a)') 'VALC' DO k=1,nidbas+1 ! Spline number WRITE(*,'(10f12.4)') splx%valc(:,k) END DO END IF END IF ! ALLOCATE(xpt(npt)) ALLOCATE(vleft(npt)) ALLOCATE(fun(0:nidbas,0:jdermx)) ! Values and derivatives of all Splines ALLOCATE(vfun(0:nidbas,0:jdermx,npt)) ALLOCATE(fun1(0:nidbas,0:jdermx)) ! Values and derivatives of all Splines ALLOCATE(vfun1(0:nidbas,0:jdermx,npt)) CALL RANDOM_NUMBER(xpt) !=============================================================================== ! ! Check def_basfun ! CALL def_basfun(xpt, splx, vfun, vleft) ! WRITE(*,'(/a)') 'vector def_basfun versus scalar def_basfun' WRITE(*,'(a6,a12, 2a6, a12)') 'i', 'x', 'left', 'vleft', 'Max. err' DO i=1,npt CALL def_basfun(xpt(i), splx, fun, left) errfun= MAXVAL(ABS(fun(:,:)-vfun(:,:,i))) WRITE(*,'(i6,1pe12.4,2i6,1pe12.4)') i, xpt(i), left, vleft(i), errfun END DO ! IF(npt.LE.10) THEN WRITE(*,'(/a)') 'Scalar/vector basfun' DO i=1,npt CALL basfun(xpt(i), splx, fun, vleft(i)+1) WRITE(*,'(a,1pe12.4/10(1pe12.4))') 'x = ', xpt(i), fun(:,:) WRITE(*,'(10(1pe12.4))') vfun(:,:,i) END DO END IF !=============================================================================== ! ! Check basfun ! CALL basfun(xpt, splx, vfun1, vleft+1) WRITE(*,'(/a,1pe12.4)') 'vector basfun versus vector def_basun: Max err', & & MAXVAL(ABS(vfun-vfun1)) ! WRITE(*,'(/a)') 'vector basfun versus scalar basfun' WRITE(*,'(a6,a12,a12)') 'i', 'x', 'Max. err' DO i=1,npt CALL basfun(xpt(i), splx, fun1, vleft(i)+1) errfun= MAXVAL(ABS(fun1(:,:)-vfun1(:,:,i))) WRITE(*,'(i6,1pe12.4,1pe12.4)') i, xpt(i), errfun END DO !!=============================================================================== ! ! Clean up ! CALL destroy_sp(splx) DEALLOCATE(xgrid) DEALLOCATE(xpt) DEALLOCATE(vleft) DEALLOCATE(fun) END PROGRAM main diff --git a/examples/tcdsmat.f90 b/examples/tcdsmat.f90 index f832abf..0e3f565 100644 --- a/examples/tcdsmat.f90 +++ b/examples/tcdsmat.f90 @@ -1,283 +1,283 @@ !> !> @file tcdsmat.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 2d PDE using splines and iterative method ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! USE tcdsmat_mod USE bsplines USE cds USE futils ! IMPLICIT NONE INCLUDE 'mpif.h' ! INTEGER :: nints(2), nidbas(2), ngauss(2), mbess, nterms DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), ygrid(:) DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:) INTEGER :: mrank, bw0 TYPE(spline2d) :: splxy TYPE(cds_mat) :: mat LOGICAL :: readmat, verbose CHARACTER(len=128) :: file='tcdsmat.h5' CHARACTER(len=128) :: filein INTEGER :: fid, fidin DOUBLE PRECISION :: mem, seconds DOUBLE PRECISION :: t0, tmat, tbal, tsolv, tgrid, tmumps(2) INTEGER :: nitmx=100, niter, nssor DOUBLE PRECISION :: rtolmx=1.0d-6, omega=0.0d0, resid ! INTEGER :: i, j, ij, dimx, dimy DOUBLE PRECISION, ALLOCATABLE :: bcoef(:,:) DOUBLE PRECISION, ALLOCATABLE :: solcal(:,:), solana(:,:), errsol(:,:) INTEGER, ALLOCATABLE :: dists(:) ! INTEGER :: ierr, me TYPE(mumps_mat) :: mat_mumps ! NAMELIST /newrun/ nints, nidbas, ngauss, mbess, coefx, coefy, & & nitmx, rtolmx, omega, nssor, readmat, verbose, filein !=========================================================================== ! 1.0 Prologue ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Read in data specific to run ! WRITE(*,'(/a)') 'Prologue ...' readmat = .FALSE. ! Read matrix and rhs from file filein = 'mat.h5' nints = (/8,8/) ! Number of intervals in x, y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nitmx = 1000 ! Max number of iterations rtolmx = 1.e-12 ! Max relative tolerance nssor = 1 ! Number of SSOR precond steps verbose = .FALSE. ! Output residue at each iteration coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) ! ! Overwrite some input by reading in "filein" if required ! IF(readmat) THEN CALL openf(filein, fidin, mode='r') CALL getatt(fidin, '/', 'NX', nints(1)) CALL getatt(fidin, '/', 'NY', nints(2)) CALL getatt(fidin, '/', 'NIDBAS1', nidbas(1)) CALL getatt(fidin, '/', 'NIDBAS2', nidbas(2)) CALL getatt(fidin, '/', 'NGAUSS1', ngauss(1)) CALL getatt(fidin, '/', 'NGAUSS2', ngauss(2)) CALL getatt(fidin, '/', 'MBESS', mbess) END IF WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nints(1)), ygrid(0:nints(2))) xgrid(0) = 0.0d0; xgrid(nints(1)) = 1.0d0 CALL meshdist(coefx, xgrid, nints(1)) ygrid(0) = 0.0d0; ygrid(nints(2)) = 2.d0*pi CALL meshdist(coefy, ygrid, nints(2)) ! ! Create hdf5 file ! CALL creatf(file, fid, 'TCDSMAT Result File', real_prec='d') CALL attach(fid, '/', 'NX', nints(1)) CALL attach(fid, '/', 'NY', nints(2)) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! WRITE(*,'(/a)') 'Discretize the PDE ...' t0 = seconds() CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./) ) ! ! Set up CDS matrix for solver ! CALL mstruct(nidbas, nints, mrank, dists) bw0=(nidbas(1)+1)*nints(2) ! Half band including all zero diagonals CALL init(mrank, dists, nterms, mat, bw0=bw0) WRITE(*,'(a,4i8)') 'rank, kl, ku, bw0 = ', mat%rank, mat%kl, mat%ku, bw0 WRITE(*,'(i4,a/(10i8))') mat%ndiags, ' diagonals:', mat%dists ! ! FE matrix assembly and apply BC ! IF(readmat) THEN CALL getmat(fidin, '/MAT1', mat) ELSE CALL dismat(splxy, mat) CALL ibcmat(mat, nints(2), nidbas(1)) END IF ! WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() ! ! Assembly RHS and apply BC ! ALLOCATE(rhs(mrank), sol(mrank)) ! IF(readmat) THEN CALL getarr(fidin, '/RHS', rhs) ELSE CALL disrhs(mbess, splxy, rhs) CALL ibcrhs(rhs, nints(2)) CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') END IF tmat = seconds() - t0 !=========================================================================== ! 3.0 Diagonal balance of matrix ! WRITE(*,'(/a)') 'Diagonal balance of matrix ...' tbal = seconds() IF( .NOT. readmat ) THEN CALL diagbal(mat) CALL putmat(fid,'/MAT1', mat, 'CDS matrix with BC') END IF rhs = mat%bal * rhs tbal = seconds()-tbal !=========================================================================== ! 4.0 Analytical solutions ! ALLOCATE(solcal(0:nints(1),0:nints(2))) ALLOCATE(solana(0:nints(1),0:nints(2))) ALLOCATE(errsol(0:nints(1),0:nints(2))) DO i=0,nints(1) DO j=0,nints(2) solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO !=========================================================================== ! 5.0 Direct solve with MUMPS ! ! WRITE(*,'(/a)') 'Solve the linear system using MUMPS ...' ! tmumps(1) = seconds() CALL cds2mumps(mat, mat_mumps) CALL factor(mat_mumps, debug=.FALSE.) tmumps(2) = seconds() sol = rhs CALL bsolve(mat_mumps, sol, debug=.FALSE.) sol = mat%bal * sol sol(1:nints(2)-1) = sol(nints(2)) ! Unicity tmumps(1:2) = seconds()-tmumps(1:2) ! tgrid = seconds() dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) ! DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,nints(2)) + i*nints(2) + 1 bcoef(i,j) = sol(ij) END DO END DO CALL gridval(splxy, xgrid, ygrid, solcal, (/0,0/), bcoef) errsol = solana - solcal tgrid = seconds() - tgrid ! PRINT*, 'Relative discretization errors', norm2(errsol) / norm2(solana) !!$ WRITE(*,'(a,2(1pe15.6))') 'Relative discretization errors', & !!$ & norm2(errsol) / norm2(solana) !=========================================================================== ! 5.0 Solve the linear system using CG ! WRITE(*,'(/a)') 'Solve the linear system using CG ...' ! tsolv = seconds() sol(:) = 0.0d0 ! Initial guest for solution IF( nssor .EQ. 0 ) THEN CALL cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, niter, & & verbose=verbose) ELSE CALL cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, niter, & & verbose=verbose, nssor=nssor) END IF sol = mat%bal * sol sol(1:nints(2)-1) = sol(nints(2)) ! Unicity tsolv = seconds()-tsolv ! DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,nints(2)) + i*nints(2) + 1 bcoef(i,j) = sol(ij) END DO END DO CALL gridval(splxy, xgrid, ygrid, solcal, (/0,0/), bcoef) errsol = solana - solcal ! PRINT*, 'Relative discretization errors', norm2(errsol) / norm2(solana) !=========================================================================== ! 9.0 Epilogue ! CALL putarr(fid, '/SOL', sol) CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice balancing time (s) ', tbal WRITE(*,'(a,2(1pe12.3))') 'MUMPS solver time (s) ', tmumps WRITE(*,'(a,1pe12.3)') 'Solution at grid time (s) ', tgrid WRITE(*,'(a,i8,2(1pe12.3))') 'nits, resid, t(s)', niter, resid, tsolv ! DEALLOCATE(xgrid,ygrid) DEALLOCATE(rhs, sol) DEALLOCATE(dists) DEALLOCATE(bcoef) DEALLOCATE(solcal,solana,errsol) CALL destroy_sp(splxy) CALL destroy(mat) CALL closef(fid) IF(readmat) THEN CALL closef(fidin) END IF ! CALL mpi_finalize(ierr) !=========================================================================== END PROGRAM main diff --git a/examples/tcdsmat_mod.f90 b/examples/tcdsmat_mod.f90 index 776e422..9be9434 100644 --- a/examples/tcdsmat_mod.f90 +++ b/examples/tcdsmat_mod.f90 @@ -1,635 +1,635 @@ !> !> @file tcdsmat_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE tcdsmat_mod IMPLICIT NONE ! INTERFACE SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist END INTERFACE ! INTERFACE norm2 MODULE PROCEDURE norm2_1d, norm2_2d END INTERFACE CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE mstruct(p, n, rank, dist) ! ! It is assumed that: ! . 2nd dimension is number first ! . 2nd dimension is periodic ! INTEGER, INTENT(in) :: p(2), n(2) INTEGER, INTENT(out) :: rank INTEGER, ALLOCATABLE :: dist(:) ! INTEGER, ALLOCATABLE :: pvect(:,:) INTEGER :: kl, ku, i ! rank = (n(1)+p(1))*n(2) ! Rank of the FE matrix ku = (p(1)+1)*(2*p(2)+1)-1 kl = ku ALLOCATE(pvect(0:2*p(2),0:p(1))) IF( ALLOCATED(dist)) DEALLOCATE(dist) ALLOCATE(dist(-kl:ku)) ! ! Upper (North) points pvect(0:p(2),0) = (/(i,i=0,p(2))/) ! ! Lower (South) points and periodicity of 2nd dim. DO i=1,p(2) pvect(p(2)+i,0) = n(2)-pvect(p(2)-i+1,0) END DO ! ! Shift by N2 for points on the right (West) side DO i=1,p(1) pvect(:,i) = pvect(:,i-1)+n(2) END DO ! ! Super-diagonals including the diagonal dist(0:ku) = RESHAPE(pvect, (/ku+1/)) ! ! Sub-diagonals DO i=-1,-kl,-1 dist(i) = -dist(-i) END DO ! DEALLOCATE(pvect) END SUBROUTINE mstruct !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! USE bsplines USE cds ! TYPE(spline2d), INTENT(in) :: spl TYPE(cds_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms)) ! ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1)) ! ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs) DO iterm=1,kterms DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 contrib = fun1(iw1,iderw(iterm,1)) * & & fun2(iw2,iderw(iterm,2)) * & & coefs(iterm) * & & fun2(it2,idert(iterm,2)) * & & fun1(it1,idert(iterm,1)) * & & wg1(ig1) * wg2(ig2) CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny, px) ! ! Apply BC on matrix ! USE cds IMPLICIT NONE TYPE(cds_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny, px ! INTEGER :: kl, ku, n, bw0, i, j DOUBLE PRECISION :: zsum(mat%rank), arr(mat%rank) !=========================================================================== ! 1.0 Prologue ! kl = mat%kl ku = mat%ku n = mat%rank mat%ny = ny ! ! Size of row ny and column ny ! bw0 = SIZE(mat%rowv) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! Store the sums at row ny in MAT%ROWV ! zsum(1:n) = 0.0d0 DO i=1,ny CALL getrow(mat, i, arr) zsum(1:bw0) = zsum(1:bw0) + arr(1:bw0) IF( i .LE. ny ) THEN ! Clear rows 1:(ny-1) arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END IF END DO mat%rowv(1:bw0) = zsum(1:bw0) ! row ny !!$ WRITE(*,'(/a,/(10(f8.3)))') 'rowv', mat%rowv(1:130) ! ! The horizontal sum on the NY-th column ! The NY-th row of matrix was stored in mat%rowv ! Store the sums ar column ny at MAT%COLH ! zsum(1:n) = 0.0d0 DO j=1,ny CALL getcol(mat, j, arr) zsum(ny) = zsum(ny) + mat%rowv(j) zsum(ny+1:bw0) = zsum(ny+1:bw0) + arr(ny+1:bw0) IF( j .NE. ny ) THEN ! Clear columns 1:(ny-1) mat%rowv(j) = 0.0d0 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END IF END DO mat%rowv(ny) = 0.0d0 ! Its value is now in mat%colh(ny) arr = 0.0d0 CALL putcol(mat, ny, arr) CALL putrow(mat, ny, arr) mat%colh(1:bw0) = zsum(1:bw0) ! column ny ! ! Move the diagonal term from mat%colh back to main diagonal ! CALL putele(mat, ny, ny, mat%colh(ny)) mat%colh(ny) = 0.0d0 !!$ WRITE(*,'(/a,/(10(f8.3)))') 'rowv', mat%rowv(1:130) !!$ WRITE(*,'(/a,/(10(f8.3)))') 'colh', mat%colh(1:130) !!$ WRITE(*,'(/a,/(10(f8.3)))') 'colh', mat%val(1:ny,0) !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO j = n, n-ny+1, -1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = n, n-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !!$ WRITE(*,'(/a,/(10(f8.3)))') 'diag 1', mat%val(n-ny-1:n,1) !!$ WRITE(*,'(/a,/(10(f8.3)))') 'diag 0', mat%val(n-ny:n,0) !!$ WRITE(*,'(/a,/(10(f8.3)))') 'diag -1', mat%val(n-ny-1:n,-1) !=========================================================================== ! 9.0 Epilogue ! END SUBROUTINE ibcmat !=========================================================================== SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! USE bsplines INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2 ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny ! INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !=========================================================================== SUBROUTINE cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, nit, verbose, & & nssor) ! ! Preconditionned Conjugate Gradient solver ! USE cds TYPE(cds_mat) :: mat DOUBLE PRECISION, INTENT(in) :: rhs(:), omega, rtolmx INTEGER, INTENT(in) :: nitmx DOUBLE PRECISION, INTENT(inout) :: sol(:) DOUBLE PRECISION, OPTIONAL, INTENT(out) :: resid INTEGER, OPTIONAL, INTENT(out) :: nit INTEGER, OPTIONAL, INTENT(in) :: nssor LOGICAL, OPTIONAL, INTENT(in) :: verbose ! DOUBLE PRECISION, DIMENSION(SIZE(rhs,1)) :: wr, wz, wp, wq DOUBLE PRECISION :: bnrm2, residue, rho0, rho1, alpha, beta INTEGER :: it ! bnrm2 = SQRT(DOT_PRODUCT(rhs,rhs)) ! Euclidian norm of RHS it = 0 wr = rhs-vmx(mat,sol) ! !... Iteration loop (see fig. 2.5, p.15 of "Templates...") DO it = it+1 IF( PRESENT(nssor) ) THEN CALL psolve(mat, wz, wr, omega, nssor) ELSE wz = wr END IF rho1 = DOT_PRODUCT(wr,wz) IF( it .EQ. 1 ) THEN wp = wz ELSE beta = rho1/rho0 wp = wz + beta*wp END IF wq = vmx(mat,wp) alpha = rho1 / DOT_PRODUCT(wp,wq) sol = sol + alpha*wp wr = wr - alpha*wq residue = SQRT(DOT_PRODUCT(wr,wr)) / bnrm2 IF( PRESENT(verbose) ) THEN IF(verbose) WRITE(*,'(a,i8,1pe12.3)') 'it, resid', it, residue END IF IF( residue .LE. rtolmx .OR. it .GE. nitmx) EXIT rho0 = rho1 END DO IF(PRESENT(resid)) resid = residue IF(PRESENT(nit)) nit = it END SUBROUTINE cg !=========================================================================== SUBROUTINE psolve(mat, x, b, omega, niter_in) ! ! Preconditionners ! USE cds TYPE(cds_mat) :: mat DOUBLE PRECISION, INTENT(out) :: x(:) DOUBLE PRECISION, INTENT(in) :: b(:) DOUBLE PRECISION, INTENT(in) :: omega INTEGER, OPTIONAL, INTENT(in) :: niter_in ! INTEGER :: niter DOUBLE PRECISION :: rtolmx !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ! 1. No-preconditionning ! IF( omega .LT. 0.0d0 ) THEN x = b RETURN END IF !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ! 2. SSOR Preconditionner ! niter = 1 rtolmx = 1.d-6 IF(PRESENT(niter_in)) THEN niter = niter_in END IF CALL ssor(mat, b, omega, niter, rtolmx, x) !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END SUBROUTINE psolve ! !=========================================================================== SUBROUTINE ssor(mat, b, omega, nitmx, rtolmx, x, resid, nit, verbose) ! ! Solve Ax = b using SSOR method ! USE cds TYPE(cds_mat) :: mat DOUBLE PRECISION, INTENT(out) :: x(:) DOUBLE PRECISION, INTENT(in) :: b(:) DOUBLE PRECISION, INTENT(in) :: omega, rtolmx INTEGER, INTENT(in) :: nitmx DOUBLE PRECISION, OPTIONAL, INTENT(out) :: resid INTEGER, OPTIONAL, INTENT(out) :: nit LOGICAL, OPTIONAL, INTENT(in) :: verbose ! INTEGER :: n, iter INTEGER :: k, i, j, d, bw0, ny DOUBLE PRECISION :: omega1, bnrm2, residue DOUBLE PRECISION :: rhs(SIZE(x)) !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ! 1. Initialization n = SIZE(x) bw0 = SIZE(mat%rowv) ny = mat%ny bnrm2 = norm2(b) ! Euclidian norm of RHS omega1 = 1.0d0-omega iter = 0 DO iter = iter+1 ! ! 2. Forward SOR ! Set RHS rhs = omega*b ! rhs <- omega*b IF( iter .GT. 1 ) THEN rhs = rhs + omega1*x ! rhs <- rhs + (1-omega)*x0 DO k=1,mat%ku ! rhs <- rhs - omega*U*x0 d = mat%dists(k) DO i=MAX(1,1-d),MIN(n,n-d) rhs(i) = rhs(i) - omega*mat%val(i,k)*x(i+d) END DO END DO IF( ny .NE. 0 ) THEN ! Contributions from unicity BC rhs(ny) = rhs(ny) - omega*DOT_PRODUCT(mat%rowv(ny+1:bw0),x(ny+1:bw0)) END IF END IF ! ! Solve (1+omega*L) x = rhs x = rhs IF( ny .NE. 0 ) THEN ! Contributions from unicity BC rhs(ny+1:bw0) = rhs(ny+1:bw0) - omega*mat%colh(ny:bw0)*x(ny) END IF DO i=ny+1,n DO k=-1,-mat%kl,-1 d = mat%dists(k) j=i+d IF( j.LE.0 ) EXIT x(i) = x(i) - omega*mat%val(i,k)*x(j) END DO END DO ! ! 3. Backward SOR ! Set RHS rhs = omega*b + omega1*x ! rhs <- omega*b + (1-omega)*x0 IF( ny .NE. 0 ) THEN ! Contributions from unicity BC rhs(ny+1:bw0) = rhs(ny+1:bw0) - omega*mat%colh(ny:bw0)*x(ny) END IF DO k=-mat%kl,-1 ! rhs <- rhs - omega*L*x0 d = mat%dists(k) DO i=MAX(1,1-d),MIN(n,n-d) rhs(i) = rhs(i) - omega*mat%val(i,k)*x(i+d) END DO END DO ! ! Solve (1+omega*U) x = rhs x = rhs DO i=n-1,ny+1,-1 DO k=1,mat%ku d = mat%dists(k) j = i+d IF( j.GT.n ) EXIT x(i) = x(i) - omega*mat%val(i,k)*x(j) END DO END DO IF( ny .NE. 0 ) THEN ! Contributions from unicity BC x(ny) = x(ny) - omega*DOT_PRODUCT(mat%rowv(ny+1:bw0),x(ny+1:bw0)) END IF ! ! 4. Compute residue ! IF( PRESENT(resid) ) THEN residue = norm2(b-vmx(mat,x)) / bnrm2 IF(PRESENT(verbose)) THEN IF(verbose) WRITE(*,'(a,i8,1pe12.3)') 'it, resid', iter, residue END IF IF( residue .LT. rtolmx ) EXIT END IF ! IF( iter .GE. nitmx ) EXIT END DO ! End of SSOR iterations IF(PRESENT(nit)) nit = iter IF(PRESENT(resid)) resid = residue END SUBROUTINE ssor !=========================================================================== SUBROUTINE diagbal(mat) ! ! Diagonal matrix balancing: store D^(-1/2) in mat%bal ! USE cds TYPE(cds_mat) :: mat INTEGER :: n, bw0, ny, d, i, k DOUBLE PRECISION :: diag(mat%rank) ! n = mat%rank ny = mat%ny bw0 = SIZE(mat%colh) diag(1:n) = mat%val(1:n,0) IF( MINVAL(diag) .LE. 0.0d0 ) THEN WRITE(*,'(a)') 'Diagonal elements of matrix are not stricly positive!' STOP END IF diag(1:n) = 1.0d0/SQRT(diag(1:n)) ! ! Scale the matrix !$OMP parallel do private (k,d,i) DO k=-mat%kl,mat%ku d = mat%dists(k) DO i=MAX(1,1-d),MIN(n,n-d) mat%val(i,k) = diag(i)*diag(i+d)*mat%val(i,k) END DO END DO !$OMP end parallel do ! ! The ny^th column and row IF( ny.NE.0 ) THEN mat%rowv(1:bw0) = diag(1:bw0)*diag(ny)*mat%rowv(1:bw0) mat%colh(1:bw0) = diag(ny)*diag(1:bw0)*mat%colh(1:bw0) END IF ! ! Save D^(-1/2) mat%bal(:) = diag(:) END SUBROUTINE diagbal !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION norm2_1d(x) ! ! Compute the 2-norm of 1d array ! DOUBLE PRECISION :: x(:) DOUBLE PRECISION :: norm2_1d norm2_1d = SQRT(SUM(x*x)) END FUNCTION norm2_1d !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION norm2_2d(x) ! ! Compute the 2-norm of 2d array ! DOUBLE PRECISION :: x(:,:) DOUBLE PRECISION :: norm2_2d norm2_2d = SQRT(SUM(x*x)) END FUNCTION norm2_2d !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE tcdsmat_mod diff --git a/examples/test_kron.f90 b/examples/test_kron.f90 index ee9d5fe..52a5a2c 100644 --- a/examples/test_kron.f90 +++ b/examples/test_kron.f90 @@ -1,125 +1,125 @@ !> !> @file test_kron.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test Kronecker product for both GE and CSR versions. ! USE iso_fortran_env, ONLY : rkind => real64 USE matrix, ONLY : gemat, init, kron, vmx USE csr, ONLY : csr_mat, init, full_to_csr, kron, vmx IMPLICIT NONE ! TYPE(gemat) :: mata, matb, matc TYPE(gemat) :: matu1 TYPE(csr_mat) :: mata_csr, matb_csr, matc_csr INTEGER, PARAMETER :: m=3, n=2 REAL(rkind) :: a(m*m)=[ 1.1, 0.0, 0.0, 0.2, 1.5, 1.0, 0.5, 0.0, 1.0 ] REAL(rkind) :: b(n*n)=[ 2.0, 1.0, 0.0, 3.0 ] REAL(rkind), TARGET :: u(m,n), uu(m,n), v(m,n) REAL(rkind), POINTER :: u1d(:), uu1d(:) INTEGER :: i, s, e ! CALL init(m, 0, mata) CALL init(n, 0, matb) mata%val(1:m,1:m) = RESHAPE(a, [m,m]) matb%val(1:n,1:n) = RESHAPE(b, [n,n]) ! CALL printmat_ge('Matrix A', mata) CALL printmat_ge('Matrix B', matb) ! u1d(1:m*n) => u ! u1d = vec(u) u1d = [ (REAL(i,rkind), i=1,m*n) ] CALL printmat('Array U', u) ! ! Compute (A.U).B^T ! CALL init(n, 0, matu1, mrows=m) v = TRANSPOSE(vmx(matb, TRANSPOSE(vmx(mata, u)))) CALL printmat('(A.U).B^T', v) ! ! Compute (BxA).vec(U) ! CALL kron(matb, mata, matc) uu1d(1:m*n) => uu uu1d = vmx(matc, u1d) CALL printmat_ge('Matrix C', matc) CALL printmat('(BxA).vec(U)', uu) !---------------------------------------------------------------------- ! ! Using CSR matrices ! CALL full_to_csr(mata%val, mata_csr) CALL full_to_csr(matb%val, matb_csr) CALL printmat_csr('Matrix A', mata_csr) CALL printmat_csr('Matrix B', matb_csr) ! CALL kron(matb_csr, mata_csr, matc_csr) uu1d = vmx(matc_csr, u1d) ! CALL printmat_csr('Matrix C', matc_csr) CALL printmat('(BxA).vec(U)', uu) ! CONTAINS SUBROUTINE printmat(str, a) CHARACTER(len=*) :: str REAL(rkind), INTENT(in) :: a(:,:) INTEGER :: i,m,n WRITE(*,'(/a)') TRIM(str) m=SIZE(a,1) n=SIZE(a,2) DO i=1,m WRITE(*,'(12f8.3)') a(i,:) END DO END SUBROUTINE printmat SUBROUTINE printmat_ge(str, a) CHARACTER(len=*) :: str TYPE(gemat) :: a INTEGER :: i WRITE(*,'(/a)') TRIM(str) DO i=1,a%mrows WRITE(*,'(12f8.3)') a%val(i,:) END DO END SUBROUTINE printmat_ge SUBROUTINE printmat_csr(str, a) CHARACTER(len=*) :: str TYPE(csr_mat) :: a INTEGER :: i, s, e REAL(rkind) :: arow(a%ncols) WRITE(*,'(/a,a,3i4)') TRIM(str), ': m, n, nnz', a%mrows, a%ncols, a%nnz DO i=1,a%mrows arow = 0.0_rkind s = a%irow(i) e = a%irow(i+1)-1 arow(a%cols(s:e)) = a%val(s:e) WRITE(*,'(12f8.3)') arow END DO IF(SIZE(a%idiag) .GT. 0) THEN WRITE(*,'(a,(20i4))') 'idiag =', a%idiag WRITE(*,'(a,12f8.3)') 'diag = ', a%val(a%idiag) END IF ! END SUBROUTINE printmat_csr END PROGRAM main diff --git a/examples/test_pwsmp.f90 b/examples/test_pwsmp.f90 index 0537296..29ebbf1 100644 --- a/examples/test_pwsmp.f90 +++ b/examples/test_pwsmp.f90 @@ -1,287 +1,287 @@ !> !> @file test_pwsmp.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main !!$ USE futils IMPLICIT NONE INCLUDE 'mpif.h' ! INTEGER :: npes, me, ierr, comm=MPI_COMM_WORLD INTEGER :: l, i, lun=99 INTEGER :: nrank, nnz, s, e, nrank_loc, nnz_loc, nnz_sum INTEGER :: istart, iend INTEGER, ALLOCATABLE :: irow(:), cols(:) INTEGER, ALLOCATABLE :: irow_loc(:), cols_loc(:) DOUBLE PRECISION, ALLOCATABLE :: val(:), val_loc(:) DOUBLE PRECISION, ALLOCATABLE :: rhs(:), rhs_loc(:) DOUBLE PRECISION, ALLOCATABLE :: sol(:), sol_loc(:) DOUBLE PRECISION :: mem CHARACTER(len=128) :: fname = "mat.dat" DOUBLE PRECISION :: mem_loc, mem_min, mem_max DOUBLE PRECISION :: err, err_max, err_norm DOUBLE PRECISION :: t0, tfact, tsolv INTEGER :: it, nits=100 ! ! PWSMP vars ! DOUBLE PRECISION :: dparm(64) INTEGER :: iparm(64) INTEGER, ALLOCATABLE :: perm(:), invp(:) ! INTEGER :: mrp ! just a placeholder in this program DOUBLE PRECISION :: aux, diag ! just placeholders in this program INTEGER :: naux=0, nrhs=1 !=========================================================================== ! 1.0 Prologue ! CALL mpi_init(ierr) CALL mpi_comm_size(comm, npes, ierr) CALL mpi_comm_rank(comm, me, ierr) !=========================================================================== ! 2.0 Read matrix ! ! File header IF( command_argument_count() > 0 ) THEN CALL get_command_argument(1, fname, l, ierr) END IF OPEN(unit=lun, file=fname, form="unformatted") READ(lun) nrank, nnz IF(me.EQ.0) WRITE(*,'(a,3i16)') 'npes, nrank, nnz', npes, nrank, nnz ! ! Matrix partition CALL dist1d(comm, 1, nrank, istart, nrank_loc) iend = istart+nrank_loc-1 WRITE(*,'(a,i3.3,a,2i12)') 'PE', me, ':istart, iend', istart, iend ALLOCATE(irow_loc(nrank_loc+1)) ! ! Read irow ALLOCATE(irow(nrank+1)) READ(lun) irow nnz_loc = irow(iend+1)-irow(istart) CALL mpi_reduce(nnz_loc, nnz_sum, 1, MPI_INTEGER, MPI_SUM, 0, comm, ierr) IF(me.EQ.0) THEN PRINT*, 'nnz_sum', nnz_sum END IF irow_loc(:) = irow(istart:iend+1) ! Still unshifted DEALLOCATE(irow) ! ALLOCATE(cols_loc(nnz_loc)) ALLOCATE(val_loc(nnz_loc)) ALLOCATE(rhs_loc(nrank_loc)) ALLOCATE(sol_loc(nrank_loc)) ! s = irow_loc(1) e = irow_loc(nrank_loc+1)-1 irow_loc(:) = irow_loc(:)-s+1 ! Shifted relative irow WRITE(*,'(a,i3.3,a,3i12)') 'PE', me, ':s, e, nnz_loc', s, e, nnz_loc ! ! Read cols ALLOCATE(cols(nnz)) READ(lun) cols cols_loc(:) = cols(s:e) DEALLOCATE(cols) ! ! Read vals ALLOCATE(val(nnz)) READ(lun) val val_loc(:) = val(s:e) DEALLOCATE(val) ! ! Read RHS ALLOCATE(rhs(nrank)) READ(lun) rhs rhs_loc(:) = rhs(istart:iend) DEALLOCATE(rhs) ! !!$ mem_loc = mem() !!$ CALL minmax_r(mem_loc, comm, 'mem used (MB) after matrix read') !=========================================================================== ! 3.0 Call PWSMP ! ! Initializing of PWSMP. ! !!$ CALL pwsmp_initialize ALLOCATE(invp(nrank), perm(nrank)) ! ! Fill 'iparm' and 'dparm' arrays with default values. iparm(1:3) = 0 CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & & rhs_loc, nrank_loc, nrhs, & & aux, naux, mrp, iparm, dparm) IF(iparm(64).NE.0) THEN PRINT*, 'WSMP init failed with iparm(64) =', iparm(64) CALL mpi_abort(comm, iparm(64), ierr) ELSE IF(me.EQ.0) PRINT*, 'WSMP init ok' END IF ! ! Ordering iparm(2) = 1 iparm(3) = 1 CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & & rhs_loc, nrank_loc, nrhs, & & aux, naux, mrp, iparm, dparm) IF(iparm(64).NE.0) THEN PRINT*, 'WSMP ordering failed with iparm(64) =', iparm(64) CALL mpi_abort(comm, iparm(64), ierr) ELSE IF(me.EQ.0) PRINT*, 'WSMP ordering ok' END IF ! ! Symbolic factorization iparm(2) = 2 iparm(3) = 2 CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & & rhs_loc, nrank_loc, nrhs, & & aux, naux, mrp, iparm, dparm) IF(iparm(64).NE.0) THEN PRINT*, 'WSMP symbolic failed with iparm(64) =', iparm(64) CALL mpi_abort(comm, iparm(64), ierr) ELSE IF(me.EQ.0) PRINT*, 'WSMP symbolic ok' END IF IF(me.EQ.0) THEN PRINT *,'Number of nonzeros in factor L = 1000 X ',iparm(24) PRINT *,'Number of FLOPS in factorization = ',dparm(23) PRINT *,'Double words needed to factor on 0 = 1000 X ',iparm(23) END IF ! ! Cholesky factorizarion iparm(2) = 3 iparm(3) = 3 t0 = mpi_wtime() CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & & rhs_loc, nrank_loc, nrhs, & & aux, naux, mrp, iparm, dparm) tfact = mpi_wtime()-t0 IF(iparm(64).NE.0) THEN PRINT*, 'WSMP Choleski failed with iparm(64) =', iparm(64) CALL mpi_abort(comm, iparm(64), ierr) ELSE IF(me.EQ.0) PRINT*, 'WSMP Choleski ok' END IF ! ! Backsolve t0 = mpi_wtime() DO it=1,nits sol_loc=rhs_loc iparm(2) = 4 iparm(3) = 4 CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & & sol_loc, nrank_loc, nrhs, & & aux, naux, mrp, iparm, dparm) END DO rhs_loc=sol_loc tsolv = (mpi_wtime()-t0)/REAL(nits,8) IF(iparm(64).NE.0) THEN PRINT*, 'WSMP backsolve failed with iparm(64) =', iparm(64) CALL mpi_abort(comm, iparm(64), ierr) ELSE IF(me.EQ.0) PRINT*, 'WSMP backsolve ok' END IF ! ! Iterative refinement iparm(2) = 5 iparm(3) = 5 CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & & rhs_loc, nrank_loc, nrhs, & & aux, naux, mrp, iparm, dparm) IF(iparm(64).NE.0) THEN PRINT*, 'WSMP refinement failed with iparm(64) =', iparm(64) CALL mpi_abort(comm, iparm(64), ierr) ELSE IF(me.EQ.0) PRINT*, 'WSMP refinement ok' END IF ! !!$ mem_loc = mem() !!$ CALL minmax_r(mem_loc, comm, 'mem used (MB) after PWSMP') !=========================================================================== ! 4.0 Check SOL ! ! Read SOL ALLOCATE(sol(nrank)) READ(lun) sol sol_loc(:) = sol(istart:iend) DEALLOCATE(sol) PRINT*, 'Comp. sol', SUM(rhs_loc) PRINT*, 'Exact sol', SUM(sol_loc) ! err=MAXVAL(ABS(sol_loc-rhs_loc)) CALL mpi_reduce(err, err_max, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr) IF(me.EQ.0) THEN PRINT*, 'Max. error', err_max END IF rhs_loc = rhs_loc-sol_loc err = DOT_PRODUCT(rhs_loc,rhs_loc) CALL mpi_reduce(err, err_norm, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0, comm, ierr) IF(me.EQ.0) THEN PRINT*, 'Norm of error', SQRT(err_norm) END IF ! !!$ mem_loc = mem() !!$ CALL minmax_r(mem_loc, comm, 'mem used (MB)') !=========================================================================== ! 9.0 Epilogue ! CALL minmax_r(tfact, comm, 'Factorisation time(s)') CALL minmax_r(tsolv, comm, ' Backsolve time(s)') CALL mpi_finalize(ierr) ! CONTAINS SUBROUTINE dist1d(comm, s0, ntot, s, nloc) ! ! 1d distribute ntot elements, returns offset s and local number of ! elements nloc. ! IMPLICIT NONE INCLUDE 'mpif.h' INTEGER, INTENT(in) :: s0, ntot INTEGER, INTENT(out) :: s, nloc INTEGER :: comm, me, npes, ierr, naver, rem ! CALL MPI_COMM_SIZE(comm, npes, ierr) CALL MPI_COMM_RANK(comm, me, ierr) naver = ntot/npes rem = MODULO(ntot,npes) s = s0 + MIN(rem,me) + me*naver nloc = naver IF( me.LT.rem ) nloc = nloc+1 END SUBROUTINE dist1d ! SUBROUTINE minmax_r(x, comm, str) CHARACTER(len=*), INTENT(in) :: str DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: comm INTEGER :: me, ierr DOUBLE PRECISION :: xmin, xmax CALL mpi_comm_rank(comm, me, ierr) CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr) CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax END IF END SUBROUTINE minmax_r ! END PROGRAM main diff --git a/examples/tlocintv.f90 b/examples/tlocintv.f90 index 0208d29..a6970c2 100644 --- a/examples/tlocintv.f90 +++ b/examples/tlocintv.f90 @@ -1,140 +1,140 @@ !> !> @file tlocintv.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Optimization of locintv ! USE bsplines IMPLICIT NONE INTEGER :: nx, nidbas, ngauss, np, nits DOUBLE PRECISION :: a, b, coefs(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid TYPE(spline1d) :: splx ! INTEGER :: i, nerrs, it DOUBLE PRECISION :: t0, t1, seconds, tscal, tscal_new, tvec, tvec_new DOUBLE PRECISION, ALLOCATABLE :: xp(:) INTEGER, ALLOCATABLE :: left(:) ! INTERFACE SUBROUTINE meshdist(coefs, x, nx) DOUBLE PRECISION, INTENT(in) :: coefs(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) END SUBROUTINE meshdist END INTERFACE ! NAMELIST /newrun/ nx, nidbas, ngauss, np, nits, a, b, coefs !=========================================================================== ! Read in data ! nx = 8 ! Number oh intevals in x nidbas = 3 ! Degree of splines ngauss = 4 ! Number of Gauss points/interval np = 10 ! Number of random points in [a,b] nits = 1000000 a = 0.0 b = 1.0 coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! see function FDIST in MESHDIST ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x axis ! ALLOCATE(xgrid(0:nx)) xgrid(0) = a xgrid(nx) = b CALL meshdist(coefs, xgrid, nx) WRITE(*,'(/a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) ! ! Set up spline ! CALL set_spline(nidbas, ngauss, xgrid, splx) WRITE(*,'(a,l1)') 'Is mesh equidistant? ', splx%nlequid ! ! Test locintv ! ALLOCATE(xp(np)) ALLOCATE(left(np)) xp(:) = (b-a)*xp(:) + a tscal = 0.0d0 tscal_new = 0.0d0 tvec = 0.0d0 tvec_new = 0.0d0 ! nerrs = 0 DO it=1,nits CALL RANDOM_NUMBER(xp) t0 = seconds() DO i=1,np CALL locintv_old(splx, xp(i), left(i)) END DO tscal = tscal + seconds()-t0 nerrs = nerrs + COUNT(.NOT.in_interv(xp, left)) ! t0 = seconds() DO i=1,np CALL locintv(splx, xp(i), left(i)) END DO tscal_new = tscal_new + seconds()-t0 nerrs = nerrs + COUNT(.NOT.in_interv(xp, left)) ! t0 = seconds() CALL locintv_old(splx, xp, left) tvec = tvec + seconds()-t0 nerrs = nerrs + COUNT(.NOT.in_interv(xp, left)) ! t0 = seconds() CALL locintv(splx, xp, left) tvec_new = tvec_new + seconds()-t0 nerrs = nerrs + COUNT(.NOT.in_interv(xp, left)) END DO PRINT*, 'nerrs =', nerrs ! tscal = tscal/(REAL(nits*np,8)) tscal_new = tscal_new/(REAL(nits*np,8)) tvec = tvec/(REAL(nits*np,8)) tvec_new = tvec_new/(REAL(nits*np,8)) WRITE(*,'(4a12)') 'scalar', 'scalar new', 'vector', 'vector new' WRITE(*,'(4(1pe12.3))') tscal, tscal_new, tvec, tvec_new ! ! Clean up ! DEALLOCATE(xp) DEALLOCATE(left) DEALLOCATE(xgrid) CALL destroy_sp(splx) ! CONTAINS LOGICAL ELEMENTAL FUNCTION in_interv(x, l) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(in) :: l in_interv = x.GE.xgrid(l) .AND. x.LT.xgrid(l+1) END FUNCTION in_interv END PROGRAM main diff --git a/examples/tmassmat.f90 b/examples/tmassmat.f90 index 679f965..b284969 100644 --- a/examples/tmassmat.f90 +++ b/examples/tmassmat.f90 @@ -1,189 +1,189 @@ !> !> @file tmassmat.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! FT of mass matrix ! USE bsplines USE matrix ! IMPLICIT NONE INTEGER :: nx, nidbas INTEGER :: ngauss, nrank, kl, ku INTEGER :: i, k, kmin, kmax DOUBLE PRECISION :: pi, xlenght, dx, arg0, arg DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), arow(:) DOUBLE PRECISION, ALLOCATABLE :: fftmassm1(:), fftmassm2(:), fftmassm3(:) DOUBLE PRECISION, ALLOCATABLE :: fftmass_shifted(:) TYPE(spline1d) :: splx TYPE(periodic_mat) :: massm ! NAMELIST /newrun/ nx, nidbas, xlenght !================================================================================ ! 1.0 Prologue ! pi = 4.0d0*ATAN(1.0d0) ! nx = 8 nidbas = 3 xlenght = 2.0d0*pi ! READ(*,newrun) WRITE(*,newrun) ! ngauss = nidbas+1 ! Exact integration for polynomials of degree 2*nidbas ! ALLOCATE(xgrid(0:nx)) dx = xlenght/REAL(nx) xgrid = (/ (i*dx,i=0,nx) /) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid ! CALL set_spline(nidbas, ngauss, xgrid, splx, .TRUE.) !=========================================================================== ! 2.0 Mass matrix nrank = nx kl = nidbas ku = kl CALL init(kl, ku, nrank, 1, massm) CALL dismat(splx, massm) ! ALLOCATE(arow(nrank)) !!$ WRITE(*,'(/a)') 'Mass matrix' !!$ DO i=1,nrank !!$ CALL getrow(massm, i, arow) !!$ WRITE(*,'(10(1pe12.4))') arow !!$ END DO !=========================================================================== ! 3.0 Fourier transform of Mass matrix ! ALLOCATE(fftmassm1(0:nx-1)) ALLOCATE(fftmassm2(0:nx-1)) ALLOCATE(fftmassm3(0:nx-1)) IF(nidbas.LE.3) THEN CALL analytic(nidbas, fftmassm1) fftmassm1 = dx*fftmassm1 WRITE(*,'(/a/(10(1pe12.4)))') 'Analytic', fftmassm1 END IF ! CALL calc_fftmass_old(splx, fftmassm2) WRITE(*,'(/a/(10(1pe12.4)))') 'Old version', fftmassm2 IF(nidbas.LE.3) THEN WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(fftmassm2-fftmassm1)) END IF ! ! Init DFT kmin = -nx/2 kmax = nx/2-1 CALL init_dft(splx, kmin, kmax) ! ALLOCATE(fftmass_shifted(kmin:kmax)) CALL calc_fftmass(splx, fftmass_shifted) DO k=kmin, kmax fftmassm3(MODULO(k+nx,nx))=fftmass_shifted(k) END DO WRITE(*,'(/a/(10(1pe12.4)))') 'New version', fftmassm3 WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(fftmassm3-fftmassm2)) ! ! Check dftcoefs WRITE(*,'(/a)') 'Check DFT of splines' PRINT*, 'dims of dftcoefs', SHAPE(splx%dft%coefs) DO i=0,nidbas WRITE(*,'(a,i3,2(1pe12.4))') 'Sum of coefs for spline', i, & & SUM(splx%dft%coefs(:,i)) END DO !=========================================================================== ! 9.0 Clean up ! DEALLOCATE(xgrid) DEALLOCATE(fftmassm1) DEALLOCATE(fftmassm2) DEALLOCATE(fftmassm3) DEALLOCATE(fftmass_shifted) DEALLOCATE(arow) CALL destroy(massm) CALL destroy_sp(splx) CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat) TYPE(spline1d) :: spl TYPE(periodic_mat) :: mat INTEGER :: dim, nx, nidbas, ngauss DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), xgauss(:), wgauss(:) INTEGER :: i, igauss, iw, jt, irow, jcol DOUBLE PRECISION :: contrib ! CALL get_dim(spl, dim, nx, nidbas) ALLOCATE(fun(0:nidbas,1)) ! Spline CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) ! DO i=1,nx CALL get_gauss(spl, ngauss, i, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, i) DO jt=0,nidbas DO iw=0,nidbas contrib = fun(jt,1) * fun(iw,1) * wgauss(igauss) irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC jcol=MODULO(i+jt-1,nx) + 1 CALL updtmat(mat, irow, jcol, contrib) END DO END DO END DO END DO ! DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE analytic(nidbas, mat) ! ! Analytic form for nidbas .le. 3 ! INTEGER, INTENT(in) :: nidbas DOUBLE PRECISION, INTENT(out) :: mat(0:) DOUBLE PRECISION :: arg0, arg, cosk INTEGER :: n, k ! n = SIZE(mat) arg0 = 2.0d0*pi/REAL(n,8) DO k=0,n-1 arg = k*arg0 cosk = COS(arg) SELECT CASE (nidbas) CASE (1) mat(k) = (2.0d0 + cosk)/3.0d0 CASE (2) mat(k) = (16.0d0 + cosk*(13.0d0+cosk))/30.0d0 CASE (3) mat(k) = (272.0d0 + cosk*(297.0d0+cosk*(60.0d0+cosk)))/630.0d0 CASE default WRITE(*,'(a,i4,a)') 'ANALYTIC: nidbas =', nidbas, ' is not implemented!' STOP END SELECT END DO END SUBROUTINE analytic END PROGRAM main diff --git a/examples/tmatrix_gb.f90 b/examples/tmatrix_gb.f90 index 6183335..106c0b7 100644 --- a/examples/tmatrix_gb.f90 +++ b/examples/tmatrix_gb.f90 @@ -1,114 +1,114 @@ !> !> @file tmatrix_gb.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test some routines of module matrix ! USE matrix IMPLICIT NONE TYPE(gbmat) :: mata INTEGER, PARAMETER :: n=5, ku=3, kl=3 DOUBLE PRECISION :: arr(n), fulla(n,n), base DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p INTEGER :: i, j, iaway, pow CHARACTER(len=32) :: str ! CALL init(ku, ku, n, 0, mata) CALL getvalp(mata, p) PRINT*, 'shape of A: ', SHAPE(p) ! ! Test updtmat p = 0.0d0 DO j=1,n DO i=1,n arr(i) = 10*i + j IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(i)) END DO END DO CALL prntmat('Test of UPDTMAT', p) ! ! Test PUTCOL p = 0.0d0 DO j=1,n DO i=1,n arr(i) = 10*i + j END DO CALL putcol(mata, j, arr) END DO CALL prntmat('Test of PUTCOL', p) ! ! ! Test PUTROW p = 0.0d0 DO i=1,n DO j=1,n arr(j) = 10*i + j END DO CALL putrow(mata, i, arr) END DO CALL prntmat('Test of PUTROW', p) ! ! Test GETCOL fulla = 0.0 DO j=1,n CALL getcol(mata, j, fulla(:,j)) END DO CALL prntmat('Test of GETCOL', fulla) ! iaway=4 arr = 0.0d0 arr(iaway) =1.0 CALL putrow(mata, iaway, arr) CALL putcol(mata, iaway, arr) WRITE(str,'(a,i3)') 'Away on i = j =',iaway CALL prntmat(TRIM(str), p) ! ! Test GETCOL fulla = 0.0 DO j=1,n CALL getcol(mata, j, fulla(:,j)) END DO CALL prntmat('Matrix full', fulla) ! ! Test of determinant CALL determinant(mata, base, pow) CALL prntmat('Factored A (gb)', p) PRINT*, 'Prod. of factored A diagnonals', PRODUCT(p(kl+ku+1,:)) WRITE(*,'(a,1pe15.6,i6)') 'Determinant(bas,power) =', base, pow PRINT*, 'Pivots ', mata%piv ! call destroy(mata) CONTAINS SUBROUTINE prntmat(str, a) DOUBLE PRECISION, DIMENSION(:,:) :: a CHARACTER(len=*) :: str INTEGER :: i WRITE(*,'(a)') TRIM(str) DO i=1,SIZE(a,1) WRITE(*,'(10f8.1)') a(i,:) END DO END SUBROUTINE prntmat END PROGRAM main diff --git a/examples/tmatrix_pb.f90 b/examples/tmatrix_pb.f90 index eae0fd4..01d7f92 100644 --- a/examples/tmatrix_pb.f90 +++ b/examples/tmatrix_pb.f90 @@ -1,143 +1,143 @@ !> !> @file tmatrix_pb.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test some routines of module matrix ! USE matrix IMPLICIT NONE TYPE(pbmat) :: mata, matb INTEGER, PARAMETER :: n=5, ku=3 DOUBLE PRECISION :: arr(n), fulla(n,n), fullb(n,n), base DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p, pb INTEGER :: i, j, info, pow ! CALL init(ku, n, 0, mata) CALL init(1, n, 0, matb) CALL getvalp(mata, p) CALL getvalp(matb, pb) PRINT*, 'shape of A: ', SHAPE(p) ! ! Test updtmat p = 0.0d0 DO i=1,n DO j=i,n arr(j) = 10*i + j IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(j)) END DO END DO CALL prntmat('Test of UPDTMAT', p) ! ! Test GETCOL fulla = 0.0 DO j=1,n CALL getcol(mata, j, fulla(:,j)) END DO CALL prntmat('Full matrix from GETCOL', fulla) ! ! Test GETROW fulla = 0.0 DO i=1,n CALL getrow(mata, i, fulla(i,:)) END DO CALL prntmat('Full matrix from GETROW', fulla) ! ! Test PUTCOL p = 0.0d0 DO j=1,n DO i=1,n arr(i) = 10*i + j END DO CALL putcol(mata, j, arr) END DO CALL prntmat('Test of PUTCOL', p) ! ! Test PUTROW p = 0.0d0 DO i=1,n DO j=1,n arr(j) = 10*i + j END DO CALL putrow(mata, i, arr) END DO CALL prntmat('Test of PUTROW', p) ! arr = 0.0d0 arr(2) =1.0 CALL putrow(mata, 2, arr) CALL prntmat('Away on i=2, j=2', p) ! ! Test GETCOL fulla = 0.0 DO j=1,n CALL getcol(mata, j, fulla(:,j)) END DO CALL prntmat('Full matrix from GETCOL', fulla) ! ! Test GETROW fulla = 0.0 DO i=1,n CALL getrow(mata, i, fulla(i,:)) END DO CALL prntmat('Full matrix from GETROW', fulla) ! ! Test GETELE fulla = 0.0 DO i=1,n DO j=1,n IF(ABS(j-i).LT.ku+1) CALL getele(mata,i,j,fulla(i,j)) END DO END DO CALL prntmat('Full matrix from GETELE', fulla) ! ! Test of determinant fullb = 0.0 DO i=1,n fullb(i,i) = 2.0d0 IF(i.LT.n) fullb(i,i+1)=-1.0d0 IF(i.GT.1) fullb(i,i-1)=-1.0d0 END DO DO j=1,n CALL putcol(matb, j, fullb(:,j)) END DO CALL prntmat('Mat. A (full)', fullb) CALL prntmat('Mat. A (pb)', pb) CALL determinant(matb, base, pow) WRITE(*,'(a,1pe15.6,i6)') 'Determinant(bas,power) =', base, pow ! CALL destroy(mata) CALL destroy(matb) CONTAINS SUBROUTINE prntmat(str, a) DOUBLE PRECISION, DIMENSION(:,:) :: a CHARACTER(len=*) :: str INTEGER :: i WRITE(*,'(a)') TRIM(str) DO i=1,SIZE(a,1) WRITE(*,'(10f8.1)') a(i,:) END DO END SUBROUTINE prntmat END PROGRAM main diff --git a/examples/tmatrix_zpb.f90 b/examples/tmatrix_zpb.f90 index ba7c30d..2340d0b 100644 --- a/examples/tmatrix_zpb.f90 +++ b/examples/tmatrix_zpb.f90 @@ -1,137 +1,137 @@ !> !> @file tmatrix_zpb.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test some routines of module matrix ! USE matrix IMPLICIT NONE TYPE(zpbmat) :: mata, matb INTEGER, PARAMETER :: n=5, ku=3 DOUBLE COMPLEX :: arr(n), fulla(n,n), fullb(n,n), base DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p, pb INTEGER :: i, j, pow ! CALL init(ku, n, 0, mata) CALL init(1, n, 0, matb) CALL getvalp(mata, p) CALL getvalp(matb, pb) PRINT*, 'shape of A: ', SHAPE(p) ! ! Test updtmat p = 0.0d0 DO i=1,n DO j=i,n arr(j) = CMPLX(10*i + j, j-i) IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(j)) END DO END DO CALL prntmat('Test of UPDTMAT', p) ! ! Test GETCOL fulla = 0.0 DO j=1,n CALL getcol(mata, j, fulla(:,j)) END DO CALL prntmat('Full matrix from GETCOL', fulla) ! ! Test GETROW fulla = 0.0 DO i=1,n CALL getrow(mata, i, fulla(i,:)) END DO CALL prntmat('Full matrix from GETROW', fulla) ! ! Test PUTCOL p = 0.0d0 DO j=1,n CALL putcol(mata, j, fulla(:,j)) END DO CALL prntmat('Test of PUTCOL', p) ! ! Test PUTROW p = 0.0d0 DO i=1,n CALL putrow(mata, i, fulla(i,:)) END DO CALL prntmat('Test of PUTROW', p) ! arr = 0.0d0 arr(2) =1.0 CALL putrow(mata, 2, arr) CALL prntmat('Away on i=2, j=2', p) ! ! Test GETCOL fulla = 0.0 DO j=1,n CALL getcol(mata, j, fulla(:,j)) END DO CALL prntmat('Full matrix from GETCOL', fulla) ! ! Test GETROW fulla = 0.0 DO i=1,n CALL getrow(mata, i, fulla(i,:)) END DO CALL prntmat('Full matrix from GETROW', fulla) ! ! Test GETELE fulla = 0.0 DO i=1,n DO j=1,n IF(ABS(j-i).LT.ku+1) CALL getele(mata,i,j,fulla(i,j)) END DO END DO CALL prntmat('Full matrix from GETELE', fulla) ! ! Test of determinant fullb = 0.0 DO i=1,n fullb(i,i) = 2.0d0 IF(i.LT.n) fullb(i,i+1)=-1.0d0 IF(i.GT.1) fullb(i,i-1)=-1.0d0 END DO DO j=1,n CALL putcol(matb, j, fullb(:,j)) END DO CALL prntmat('Mat. A (full)', fullb) CALL prntmat('Mat. A (pb)', pb) CALL determinant(matb, base, pow) WRITE(*,'(a,2f8.5,i3)') 'Determinant(base,power) = ', base, pow ! CALL destroy(mata) CALL destroy(matb) CONTAINS SUBROUTINE prntmat(str, a) DOUBLE COMPLEX, DIMENSION(:,:) :: a CHARACTER(len=*) :: str INTEGER :: i WRITE(*,'(a)') TRIM(str) DO i=1,SIZE(a,1) WRITE(*,'(5(5x,"(",f5.1,",",f5.1,")"))') a(i,:) END DO END SUBROUTINE prntmat END PROGRAM main diff --git a/examples/tp2p_mat.f90 b/examples/tp2p_mat.f90 index d5084fb..3a6b318 100644 --- a/examples/tp2p_mat.f90 +++ b/examples/tp2p_mat.f90 @@ -1,108 +1,108 @@ !> !> @file tp2p_mat.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main USE pardiso_bsplines IMPLICIT NONE INCLUDE 'mpif.h' ! INTEGER :: me, npes, ierr INTEGER :: next INTEGER :: i, j, rank DOUBLE PRECISION :: val DOUBLE PRECISION, ALLOCATABLE :: arow(:) TYPE(pardiso_mat) :: mat ! CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) IF(npes.NE.2) THEN PRINT*, 'Should run with 2 procs!' CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) END IF ! ! Define local matrix ! rank = npes CALL init(npes, 0, mat) DO i=1,rank ! Fill row me+1 val = me+1 j = me+1 CALL updtmat(mat, i, me+1, val) END DO ! ! Exchange matrix ! CALL disp_mat('Original matrix') next=MODULO(me+1,2) ! IF(me.EQ.0) THEN CALL p2p_mat(mat, 1, 'send', 'put', MPI_COMM_WORLD) ELSE CALL p2p_mat(mat, 0, 'recv', 'put', MPI_COMM_WORLD) END IF CALL disp_mat('Matrix after 0->1/put') ! CALL p2p_mat(mat, next, 'sendrecv', 'put', MPI_COMM_WORLD) CALL disp_mat('Matrix after sendrev/put') ! CALL p2p_mat(mat, next, 'sendrecv', 'updt', MPI_COMM_WORLD) CALL disp_mat('Matrix after sendrev/updt') ! IF(me.EQ.1) THEN CALL p2p_mat(mat, 0, 'send', 'updt', MPI_COMM_WORLD) ELSE CALL p2p_mat(mat, 1, 'recv', 'updt', MPI_COMM_WORLD) END IF CALL disp_mat('Matrix after 1->0/updt') ! IF(me.EQ.1) THEN CALL p2p_mat(mat, 0, 'send', 'put', MPI_COMM_WORLD) ELSE CALL p2p_mat(mat, 1, 'recv', 'put', MPI_COMM_WORLD) END IF CALL disp_mat('Matrix after 1->0/put') ! CALL mpi_finalize(ierr) CONTAINS SUBROUTINE disp_mat(str) CHARACTER(len=*), INTENT(in) :: str INTEGER :: p DO p=0,npes-1 IF(me.EQ.p) THEN WRITE(*,'(a,i3.3)') str//' on PE', me CALL to_mat(mat, nlkeep=.TRUE.) ALLOCATE(arow(mat%rank)) DO i=1,mat%rank CALL getrow(mat, i, arow) WRITE(*,'(10f8.2)') arow END DO DEALLOCATE(arow) CALL FLUSH(6) END IF CALL mpi_barrier(MPI_COMM_WORLD, ierr) END DO END SUBROUTINE disp_mat END PROGRAM main diff --git a/examples/tpsum_mat.f90 b/examples/tpsum_mat.f90 index 5f01441..30d30f9 100644 --- a/examples/tpsum_mat.f90 +++ b/examples/tpsum_mat.f90 @@ -1,77 +1,77 @@ !> !> @file tpsum_mat.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main !!$ USE pardiso_bsplines !!$ USE wsmp_bsplines USE mumps_bsplines IMPLICIT NONE INCLUDE 'mpif.h' ! INTEGER :: me, npes, ierr INTEGER :: i, j, rank DOUBLE PRECISION :: val DOUBLE PRECISION, ALLOCATABLE :: arow(:) !!$ TYPE(pardiso_mat) :: mat !!$ TYPE(wsmp_mat) :: mat TYPE(mumps_mat) :: mat ! CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! rank = npes CALL init(npes, 1, mat) DO i=1,rank ! Fill row me+1 val = me+1 j = me+1 CALL updtmat(mat, i, me+1, val) END DO ! !!$ CALL disp_mat('Original matrix') CALL psum_mat(mat, MPI_COMM_WORLD) CALL disp_mat('Global sum of matrix') ! CALL mpi_finalize(ierr) CONTAINS SUBROUTINE disp_mat(str) CHARACTER(len=*), INTENT(in) :: str INTEGER :: p DO p=0,npes-1 IF(me.EQ.p) THEN CALL to_mat(mat, nlkeep=.TRUE.) WRITE(*,'(a,i3.3,a,2i6)') str//' on PE', me, ': rank, nnz', mat%rank, mat%nnz ALLOCATE(arow(mat%rank)) DO i=1,mat%rank CALL getrow(mat, i, arow) WRITE(*,'(10f8.2)') arow END DO DEALLOCATE(arow) CALL FLUSH(6) END IF CALL mpi_barrier(MPI_COMM_WORLD, ierr) END DO END SUBROUTINE disp_mat END PROGRAM main diff --git a/examples/tsparse1.f90 b/examples/tsparse1.f90 index c22a07f..eb3e44c 100644 --- a/examples/tsparse1.f90 +++ b/examples/tsparse1.f90 @@ -1,117 +1,117 @@ !> !> @file tsparse1.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! MAIN !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! PROGRAM main USE sparse IMPLICIT none ! TYPE(zspmat) :: amat TYPE(zsprow) :: arow ! LOGICAL :: found INTEGER :: n=10 INTEGER :: jcol, nnz, i DOUBLE PRECISION :: valr, vali DOUBLE COMPLEX :: val, zero=(0.,0.) DOUBLE COMPLEX, ALLOCATABLE :: arr(:), farr(:) INTEGER, ALLOCATABLE :: col(:), newcol(:) ! ! Initialize the sparse matrix amat ! CALL init(n, amat) ! ! Use UPDT_SP to update a sparse row ! WRITE(*,*) 'Enter a list of positive indices, terminate with a zero' DO READ(*,*) jcol IF(jcol .LE. 0) EXIT CALL RANDOM_NUMBER(valr) vali = jcol val = CMPLX(valr, vali) CALL updtmat(arow, jcol, val) END DO ! ! Convert a sparse row to a sequential row ! nnz = arow%nnz WRITE(*,'(a,i5)') 'nnz =', nnz ! ALLOCATE(arr(nnz), col(nnz), newcol(nnz)) CALL getrow(arow, arr, col) WRITE(*, '(a/(10i8))') 'col', col WRITE(*, '(a/(10f8.4))') 'arr', arr ! ALLOCATE(farr(MAXVAL(col))) CALL getrow(arow, farr) WRITE(*, '(/a/(10f8.4))') 'farr', farr ! ! Clear element by element of row ! DO i=1,nnz CALL putele(arow, col(i), zero) CALL getrow(arow, arr, newcol) WRITE(*, '(/a,i6/(10i8))') 'col', arow%nnz, newcol(1:arow%nnz) END DO ! ! Re-create row using PUTROW and full row ! CALL putrow(arow, farr) CALL getrow(arow, arr, col) WRITE(*,'(/a,i5)') 'nnz =', arow%nnz WRITE(*, '(a/(10i8))') 'col', col WRITE(*, '(a/(10f8.4))') 'arr', arr ! ! Clear row using DESTROY ! CALL destroy(arow) CALL getrow(arow, arr, newcol) nnz = arow%nnz WRITE(*, '(/a,i6/(10i8))') 'col', nnz, newcol(1:nnz) ! ! Re-create row using PUTROW and sparse row ! CALL putrow(arow, arr, col) CALL getrow(arow, arr, newcol, nnz) WRITE(*, '(/a,i6/(10i8))') 'col', nnz, newcol(1:nnz) WRITE(*, '(a/(10f8.4))') 'arr', arr ! ! Test GETELE ! i=111;val=0 CALL getele(arow, i, val, found) WRITE(*,'(/i8,2f8.4,l3)') i, val, found DO i=1,nnz CALL getele(arow, col(i), val, found) WRITE(*,'(i8,2f8.4,l3)') col(i), val, found END DO ! ! Test destroy_spmat ! CALL destroy(amat) END PROGRAM main diff --git a/examples/tsparse2.f90 b/examples/tsparse2.f90 index 23d3fcf..98efd21 100644 --- a/examples/tsparse2.f90 +++ b/examples/tsparse2.f90 @@ -1,151 +1,151 @@ !> !> @file tsparse2.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Simple 2D Poisson using 5 points FD ! USE pardiso_bsplines IMPLICIT NONE ! TYPE(pardiso_mat) :: amat, bmat DOUBLE PRECISION, ALLOCATABLE :: arow(:), rhs(:), sol(:) DOUBLE PRECISION, ALLOCATABLE :: arown(:,:), rhsn(:,:), soln(:,:) INTEGER :: nx=5, ny=4, n INTEGER :: i, j, irow, jcol ! DOUBLE PRECISION :: mem, seconds ! n = nx*ny CALL init(n, 1, amat) ! Non-symmetric matrix CALL init(n, 1, bmat, nlsym=.TRUE.) ! Symmetric matrix ALLOCATE(arow(n), arown(n,2)) ALLOCATE(rhs(n), rhsn(n,2)) ALLOCATE(sol(n), soln(n,2)) ! ! Construct the FD matrix amat, using sparse rows (linked lists) ! DO j=1,ny DO i=1,nx arow = 0.0d0 irow = numb(i,j) arow(irow) = 4.0d0 IF(i.GT.1) arow(numb(i-1,j)) = -1.0d0 IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0 IF(j.GT.1) arow(numb(i,j-1)) = -1.0d0 IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0 rhs(irow) = SUM(arow) CALL putrow(amat, irow, arow) ! General matrix CALL putrow(bmat, irow, arow) ! Symmetric matrix END DO END DO ! ! Print the matrices ! WRITE(*,'(/a)') 'Matrix A' DO i=1,n CALL getrow(amat, i, arow) WRITE(*,'(30f4.0)') arow END DO PRINT*, 'nnz from get_count', get_count(amat) ! WRITE(*,'(/a)') 'Matrix B' DO i=1,n CALL getrow(bmat, i, arow) WRITE(*,'(30f4.0)') arow END DO PRINT*, 'nnz from get_count', get_count(bmat) ! ! Factor the matrix using Pardiso ! CALL factor(amat, nlmetis=.TRUE.) WRITE(*,'(/a,i5)') 'Number of nonzeros in factors of A = ',amat%p%iparm(18) WRITE(*,'(a,i5)') 'Number of factorization MFLOPS = ',amat%p%iparm(19) ! CALL factor(bmat, nlmetis=.TRUE.) WRITE(*,'(/a,i5)') 'Number of nonzeros in factors of B = ',bmat%p%iparm(18) WRITE(*,'(a,i5)') 'Number of factorization MFLOPS = ',bmat%p%iparm(19) ! WRITE(*,'(/a/(10f8.4))') 'rhs', rhs ! ! Backsolve Ax = b, using Pardiso ! sol = rhs CALL bsolve(amat, sol, debug=.FALSE.) WRITE(*,'(/a/(10f8.4))') 'sol (non-sym)', sol WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0)) ! ! Backsolve Bx = b, using Pardiso ! sol = rhs CALL bsolve(bmat, sol, debug=.FALSE.) WRITE(*,'(/a/(10f8.4))') 'sol (sym)', sol WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0)) ! arow = vmx(amat, sol) WRITE(*,'(/a/(10f8.4))') 'A*x (non-sym)', arow WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arow-rhs)) ! arow = vmx(bmat, sol) WRITE(*,'(/a/(10f8.4))') 'B*x (sym)', arow WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arow-rhs)) ! ! Multiple RHS ! rhsn(:,1) = -rhs(:) rhsn(:,2) = 2.d0*rhs(:) CALL bsolve(amat, rhsn, soln) WRITE(*,'(/a/(10f8.4))') 'soln (non-sym)', soln WRITE(*,'(a,2(1pe12.3))') 'Error', MAXVAL(ABS(soln(:,1)+1.0d0)), & & MAXVAL(ABS(soln(:,2)-2.0d0)) ! CALL bsolve(bmat, rhsn, soln) WRITE(*,'(/a/(10f8.4))') 'soln (sym)', soln WRITE(*,'(a,2(1pe12.3))') 'Error', MAXVAL(ABS(soln(:,1)+1.0d0)), & & MAXVAL(ABS(soln(:,2)-2.0d0)) ! arown = vmx(amat, soln) WRITE(*,'(/a/(10f8.4))') 'A*x (non-sym)', arown WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arown-rhsn)) ! arown = vmx(bmat, soln) WRITE(*,'(/a/(10f8.4))') 'A*x (sym)', arown WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arown-rhsn)) ! ! Clean up ! DEALLOCATE(arow,arown) DEALLOCATE(rhs,rhsn) DEALLOCATE(sol,soln) CALL destroy(amat) CALL destroy(bmat) CONTAINS INTEGER FUNCTION numb(i,j) INTEGER, INTENT(in) :: i, j numb = (j-1)*nx + i END FUNCTION numb ! END PROGRAM main diff --git a/examples/zpardiso_ex1.f b/examples/zpardiso_ex1.f index 7979d88..33027c6 100644 --- a/examples/zpardiso_ex1.f +++ b/examples/zpardiso_ex1.f @@ -1,96 +1,96 @@ !> !> @file zpardiso_ex1.f !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main USE wsmp_bsplines USE pardiso_bsplines IMPLICIT NONE c INTEGER :: n=9 INTEGER ia(10) INTEGER ja(29) COMPLEX*16 avals(29) COMPLEX*16 b(9), sol(9), arow(9) c c$$$ type(zwsmp_mat) :: mat type(zpardiso_mat) :: mat integer :: i, k c DATA ia /1,5,9,13,17,21,25,27,29,30/ data ja 1 /1, 3, 7, 8, 2 2, 3, 8, 9, 3 3, 7, 8, 9, 4 4, 6, 7, 8, 5 5, 6, 8, 9, 6 6, 7, 8, 9, 7 7, 8, 8 8, 9, 9 9/ data avals 1 /(14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1), 2 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2), 3 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1), 4 (14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1), 5 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2), 6 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1), 7 (16.d0,0.d0), (-4.d0,-2.d-1), 8 (71.d0,0.d0), (-4.d0,-2.d-1), 9 (16.d0,0.d0)/ c c$$$ call init(n, 1, mat, nlherm=.false., nlsym=.true., nlpos=.true.) call init(n, 1, mat, nlherm=.true., nlpos=.true.) do i=1,n do k=ia(i),ia(i+1)-1 call putele(mat, i, ja(k), avals(k)) end do end do c call factor(mat) c print*, 'diff of val', cnorm2(avals-mat%val) print*, 'diff of ia', ia-mat%irow print*,' diff ja', ja-mat%cols c print*, 'The RHS:' do i = 1, n call getrow(mat,i, arow) b(i) = sum(arow) print *,i,' : ',b(i) end do call bsolve(mat,b,sol) print *,'The solution of the system is as follows:' do i = 1, n print *,i,' : ',sol(i) end do print*, 'Residue =', cnorm2(vmx(mat,sol)-b) contains FUNCTION cnorm2(x) DOUBLE COMPLEX, INTENT(in) :: x(:) DOUBLE PRECISION :: cnorm2 cnorm2 = SQRT(DOT_PRODUCT(x,x)) END FUNCTION cnorm2 c END PROGRAM main diff --git a/examples/zssmp_ex1.f b/examples/zssmp_ex1.f index cae1c08..dbe84fc 100644 --- a/examples/zssmp_ex1.f +++ b/examples/zssmp_ex1.f @@ -1,111 +1,111 @@ !> !> @file zssmp_ex1.f !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main USE wsmp_bsplines USE pardiso_bsplines IMPLICIT NONE c INTEGER :: n=9 INTEGER ia(10) INTEGER ja(29) COMPLEX*16 avals(29) COMPLEX*16 b(9), sol(9), arow(9) c type(zwsmp_mat) :: mat integer :: i, k c DATA ia /1,5,9,13,17,21,25,27,29,30/ data ja 1 /1, 3, 7, 8, 2 2, 3, 8, 9, 3 3, 7, 8, 9, 4 4, 6, 7, 8, 5 5, 6, 8, 9, 6 6, 7, 8, 9, 7 7, 8, 8 8, 9, 9 9/ data avals 1 /(14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1), 2 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2), 3 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1), 4 (14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1), 5 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2), 6 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1), 7 (16.d0,0.d0), (-4.d0,-2.d-1), 8 (71.d0,0.d0), (-4.d0,-2.d-1), 9 (16.d0,0.d0)/ c call init(n, 1, mat, nlherm=.true., nlpos=.true.) do i=1,n do k=ia(i),ia(i+1)-1 call putele(mat, i, ja(k), avals(k)) end do end do c print*, 'The RHS before tomat:' do i = 1, n call getrow(mat,i, arow) b(i) = sum(arow) print *,i,' : ',b(i) end do c call factor(mat) c write(*,'(a/(20f6.2))') 'avals', avals write(*,'(a/(20f6.2))') 'mat%val', mat%val print*, 'diff of val', cnorm2(avals-mat%val) print*, 'diff of ia', ia-mat%irow print*,' diff ja', ja-mat%cols c print*, 'Check getrow' do i = 1, n call getrow(mat,i, arow) write(*,'(i3,": ",(20f6.2))') i, arow(i:n) end do c print*, 'The RHS:' do i = 1, n call getrow(mat,i, arow) b(i) = sum(arow) print *,i,' : ',b(i) end do call bsolve(mat,b,sol) print *,'Norm of Residual = ',mat%p%dparm(7) print *,'The solution of the system is as follows:' do i = 1, n print *,i,' : ',sol(i) end do print*, 'Residue =', cnorm2(vmx(mat,sol)-b) c contains FUNCTION cnorm2(x) DOUBLE COMPLEX, INTENT(in) :: x(:) DOUBLE PRECISION :: cnorm2 cnorm2 = SQRT(DOT_PRODUCT(x,x)) END FUNCTION cnorm2 c END PROGRAM main diff --git a/fft/CMakeLists.txt b/fft/CMakeLists.txt index 82f6ec0..1b099c2 100644 --- a/fft/CMakeLists.txt +++ b/fft/CMakeLists.txt @@ -1,69 +1,69 @@ # # @file CMakeLists.txt # # @brief Principal CMake configuration file for the fft library # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # @author Trach-Minh Tran # project(fft) find_package(FFTW REQUIRED) set(fft_w $ENV{fft_w}) set(SRCS fft_fftw.F90 ) set(EXAMPLES tfft.f90) add_library(fft STATIC ${SRCS}) target_include_directories(fft PRIVATE $ ${FFTW_INCLUDES} INTERFACE $ $ ${FFTW_INCLUDES} ) if (${fft_w} MATCHES "fft_w2") target_compile_options(fft PRIVATE "-Dfft_w2") else() target_compile_options(fft PRIVATE "-Dfft_w3") endif() target_link_libraries(fft PUBLIC ${FFTW_LIBRARY} ${MPI_Fortran_LIBRARIES}) # set_property(TARGET fft PROPERTY PUBLIC_HEADER ${CMAKE_CURRENT_BINARY_DIR}/modules/fft.mod) add_executable(tfft tfft.f90) target_link_libraries(tfft fft ${MPI_Fortran_LIBRARIES}) #add_test(tfft ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 # ${CMAKE_CURRENT_BINARY_DIR}/tfft < ${fft_SOURCE_DIR}/in) install(TARGETS fft EXPORT ${BSPLINES_EXPORT_TARGETS} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) diff --git a/fft/Makefile b/fft/Makefile index bb7a818..895fdb9 100644 --- a/fft/Makefile +++ b/fft/Makefile @@ -1,68 +1,68 @@ # # @file Makefile # # @brief Makefile for the fft library # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Trach-Minh Tran # FFTW=$(FFTW_HOME) F90 = ifort LD = ifort debug = -g -traceback -CB optim = -O3 OPT=$(debug) #OPT=$(optim) F90FLAGS = $(OPT) -Dfft_w3 -I$(FFTW)/include LDFLAGS = $(OPT) -L. -L$(FFTW)/lib LIBS = -lfft -lfftw3 .SUFFIXES: .SUFFIXES: .o .c .F90 .f90 .f .f90.o: $(F90) $(F90FLAGS) -c $< .F90.o: $(F90) $(F90FLAGS) -c $< .f.o: $(F90) $(F90FLAGS) -c $< all: tfft lib: libfft.a libfft.a: fft_fftw.o xiar r $@ $? ranlib $@ tfft: tfft.o $(LD) $(LDFLAGS) -o $@ tfft.o fft_fftw.o $(LIBS) tfft.o: lib clean: rm -f *.o *.mod *~ a.out distclean: clean rm -f tfft diff --git a/fft/fft_fftw.F90 b/fft/fft_fftw.F90 index ff9e7ca..cd404a8 100644 --- a/fft/fft_fftw.F90 +++ b/fft/fft_fftw.F90 @@ -1,1376 +1,1376 @@ !> !> @file fft_fftw.F90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> #if defined(fft_w2) MODULE fft ! IMPLICIT NONE ! PRIVATE ! PUBLIC four1, fourcol, fourrow ! ! Global parameters ! INTEGER, PARAMETER :: MXPLAN=8 ! ! Global variables ! INTEGER ::n1d_saved=0 INTEGER*8, DIMENSION(MXPLAN) :: plan1d INTEGER, DIMENSION(MXPLAN) :: n1d REAL, DIMENSION(:), ALLOCATABLE :: scr1_real DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: scr1 INTERFACE fourcol MODULE PROCEDURE four1, fourcol_ra, fourcol_raa END INTERFACE CONTAINS ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE four1(arr, isign) ! ! A single 1D complex FFT ! INCLUDE 'fftw_f77.h' ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: isign ! ! Local variables ! INTEGER :: n, id ! n = SIZE(arr) IF( .NOT. ALLOCATED(scr1) ) THEN ALLOCATE(scr1(n)) ELSE IF ( SIZE(scr1) < n ) THEN DEALLOCATE(scr1) ALLOCATE(scr1(n)) END IF END IF ! CALL getplan(n, isign, id, 1) CALL fftw_f77_one(plan1d(id), arr, scr1) END SUBROUTINE four1 ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourcol_ra(arr, isign) ! ! 1D complex FFT of columns of arr(1:N,1:howmany) ! INCLUDE 'fftw_f77.h' ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: isign ! ! Local variables ! INTEGER :: n, howmany, id ! n = SIZE(arr,1) howmany = SIZE(arr,2) ! IF( .NOT. ALLOCATED(scr1) ) THEN ALLOCATE(scr1(n)) ELSE IF ( SIZE(scr1) < n ) THEN DEALLOCATE(scr1) ALLOCATE(scr1(n)) END IF END IF ! CALL getplan(n, isign, id,1) CALL fftw_f77(plan1d(id), howmany, arr, 1, n, scr1, 1, n) END SUBROUTINE fourcol_ra ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourcol_raa(arr, isign) ! ! 1D complex FFT of columns of arr(1:N,1:howmany) ! INCLUDE 'fftw_f77.h' ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: isign ! ! Local variables ! INTEGER :: n, howmany, id ! n = SIZE(arr,1) howmany = SIZE(arr,2)*SIZE(arr,3) ! IF( .NOT. ALLOCATED(scr1) ) THEN ALLOCATE(scr1(n)) ELSE IF ( SIZE(scr1) < n ) THEN DEALLOCATE(scr1) ALLOCATE(scr1(n)) END IF END IF ! CALL getplan(n, isign, id, 1) CALL fftw_f77(plan1d(id), howmany, arr, 1, n, scr1, 1, n) END SUBROUTINE fourcol_raa ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourrow(arr, isign) ! ! 1D complex FFT of rows of arr(1:howmany,1:N) ! INCLUDE 'fftw_f77.h' ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: isign ! ! Local variables ! INTEGER :: n, howmany, id ! n = SIZE(arr,2) howmany = SIZE(arr,1) ! IF( .NOT. ALLOCATED(scr1) ) THEN ALLOCATE(scr1(n)) ELSE IF ( SIZE(scr1) < n ) THEN DEALLOCATE(scr1) ALLOCATE(scr1(n)) END IF END IF ! CALL getplan(n, isign, id, 1) CALL fftw_f77(plan1d(id), howmany, arr, howmany, 1, & & scr1, howmany, 1) END SUBROUTINE fourrow ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE getplan(n, sign, id, complex_fftw) ! ! Create or get an already created FFT plan (depends only on N.) ! INCLUDE 'fftw_f77.h' ! ! Dummy arguments ! INTEGER, INTENT(IN) :: n ! size of transform INTEGER, INTENT(IN) :: sign ! dir. of transform -1=>FORWARD, +1=>BACKWARD INTEGER, INTENT(OUT) :: id ! id of FFT plan INTEGER, INTENT(IN) :: complex_fftw ! Create complex<->complex transform if =1 ! ! Local variables ! INTEGER :: k, i, dir ! k = sign*(2*n+complex_fftw) DO i = 1,n1d_saved IF( k == n1d(i)) THEN id = i RETURN END IF END DO IF( n1d_saved == MXPLAN) THEN PRINT*, 'Module fft: MXPLAN too small! Increase it and recompile' STOP END IF n1d_saved = n1d_saved+1 n1d(n1d_saved) = k id = n1d_saved dir = FFTW_FORWARD IF( sign == +1 ) dir = FFTW_BACKWARD IF (complex_fftw == 1) THEN CALL fftw_f77_create_plan(plan1d(id), n, dir, FFTW_ESTIMATE + FFTW_IN_PLACE) !!$ ELSE !!$ CALL rfftw_f77_create_plan(plan1d(id), n, dir, FFTW_ESTIMATE + FFTW_IN_PLACE) END IF END SUBROUTINE getplan END MODULE fft #endif ! #if defined(fft_w3) MODULE fft ! IMPLICIT NONE ! PRIVATE PUBLIC :: four1, fourcol, fourrow ! INCLUDE 'fftw3.f' ! TYPE int_para INTEGER, DIMENSION(2) :: par ! size of transform END TYPE int_para ! ! Global parameters ! INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of plans. ! ! Global variables ! INTEGER*8, DIMENSION(MXPLAN,4), SAVE :: plan1d ! plans for 1-dim FFT TYPE(int_para), DIMENSION(MXPLAN,4), SAVE :: n1d_par INTEGER, DIMENSION(4), SAVE :: n1d_saved=0 ! number of plans saved ! INTERFACE fourcol MODULE PROCEDURE four1, fourcol_ra, fourcol_raa END INTERFACE ! CONTAINS ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE four1(vec, isign) ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec INTEGER, INTENT(IN) :: isign ! ! Local parameters ! INTEGER, PARAMETER :: NUM=1 ! ! Local variables ! INTEGER :: k INTEGER :: dim1, i, id, istat, n DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: vec_tmp ! ! n = SIZE(vec) ! ! test if a plan that fits is already created. ! id = -1 k = isign*n DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) ! ! test if the maximal number of plans is alredy reached. ! IF (n1d_saved(NUM) == MXPLAN) THEN WRITE(*,*) 'FOUR1: MXPLAN too small! Increase it and recompile' STOP END IF ! dim1 = SIZE(vec) ALLOCATE(vec_tmp(dim1), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOUR1: Allocation of vec_tmp failed!' STOP END IF ! n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k id = n1d_saved(NUM) ! SELECT CASE (isign) CASE (-1) CALL dfftw_plan_dft_1d(plan1d(id,NUM), n, vec_tmp(1), vec_tmp(1), & FFTW_FORWARD, FFTW_ESTIMATE) CASE (1) CALL dfftw_plan_dft_1d(plan1d(id,NUM), n, vec_tmp(1), vec_tmp(1), & FFTW_BACKWARD, FFTW_ESTIMATE) END SELECT ! DEALLOCATE(vec_tmp, stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOUR1: Dellocation of vec_tmp failed!' STOP END IF ! END SELECT ! CALL dfftw_execute_dft(plan1d(id,NUM), vec(1), vec(1)) ! END SUBROUTINE four1 ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourcol_ra(arr, isign) ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: isign ! ! Local parameters ! INTEGER, PARAMETER :: NUM=2, RANK=1 ! ! Local variables ! INTEGER :: k INTEGER :: dim1, dim2, howmany, i, id, istat, n INTEGER, DIMENSION(RANK) :: n_arr, nembed DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: arr_tmp ! dim1 = SIZE(arr,1) dim2 = SIZE(arr,2) howmany = SIZE(arr,2) ! ! test if a plan that fits is already created. ! id = -1 k = isign*dim1 DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1) .AND. & howmany == n1d_par(i,NUM)%par(2)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) ! ! test if the maximal number of plans is alredy reached. ! IF (n1d_saved(NUM) == MXPLAN) THEN WRITE(*,*) 'FOURCOL_RA: MXPLAN too small! Increase it and recompile' STOP END IF ! ALLOCATE(arr_tmp(dim1, dim2), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RA: Allocation of arr_tmp failed!' STOP END IF ! nembed(1) = SIZE(arr) n_arr(1) = dim1 n = n_arr(1) ! n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany id = n1d_saved(NUM) ! SELECT CASE (isign) CASE (-1) CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & arr_tmp(1,1), nembed, 1, n, arr_tmp(1,1), nembed, 1, n, & FFTW_FORWARD, FFTW_ESTIMATE) CASE (1) CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & arr_tmp(1,1), nembed, 1, n, arr_tmp(1,1), nembed, 1, n, & FFTW_BACKWARD, FFTW_ESTIMATE) END SELECT ! DEALLOCATE(arr_tmp, stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RA: Dellocation of arr_tmp failed!' STOP END IF ! END SELECT ! CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1), arr(1,1)) ! END SUBROUTINE fourcol_ra ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourcol_raa(arr, isign) ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: isign ! ! Local parameters ! INTEGER, PARAMETER :: NUM=3, RANK=1 ! ! Local variables ! INTEGER :: k INTEGER :: dim1, dim2, dim3, howmany, i, id, istat, n INTEGER, DIMENSION(RANK) :: n_arr, nembed DOUBLE COMPLEX, DIMENSION(:,:,:), ALLOCATABLE :: arr_tmp ! dim1 = SIZE(arr,1) dim2 = SIZE(arr,2) dim3 = SIZE(arr,3) howmany = dim2*dim3 ! ! test if a plan that fits is already created. ! id = -1 k = isign*dim1 DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1) .AND. & howmany == n1d_par(i,NUM)%par(2)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) ! ! test if the maximal number of plans is alredy reached. ! IF (n1d_saved(NUM) == MXPLAN) THEN WRITE(*,*) 'FOURCOL_RAA: MXPLAN too small! Increase it and recompile' STOP END IF ! ALLOCATE(arr_tmp(dim1, dim2, dim3), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RAA: Allocation of arr_tmp failed!' STOP END IF ! nembed(1) = SIZE(arr) n_arr(1) = dim1 n = n_arr(1) ! n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany id = n1d_saved(NUM) ! SELECT CASE (isign) CASE (-1) CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & arr_tmp(1,1,1), nembed, 1, n, arr_tmp(1,1,1), nembed, 1, n, & FFTW_FORWARD, FFTW_ESTIMATE) CASE (1) CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & arr_tmp(1,1,1), nembed, 1, n, arr_tmp(1,1,1), nembed, 1, n, & FFTW_BACKWARD, FFTW_ESTIMATE) END SELECT ! DEALLOCATE(arr_tmp, stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RAA: Dellocation of arr_tmp failed!' STOP END IF ! END SELECT ! CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1,1), arr(1,1,1)) ! END SUBROUTINE fourcol_raa ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourrow(arr, isign) ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: isign ! ! Local parameters ! INTEGER, PARAMETER :: NUM=4, RANK=1 ! ! Local variables ! INTEGER :: k INTEGER :: dim1, dim2, howmany, i, id, istat, n INTEGER, DIMENSION(RANK) :: n_arr, nembed DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: arr_tmp ! dim1 = SIZE(arr,1) dim2 = SIZE(arr,2) howmany = dim1 ! ! test if a plan that fits is already created. ! id = -1 k = isign*dim2 DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1) .AND. & howmany == n1d_par(i,NUM)%par(2)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) ! ! test if the maximal number of plans is alredy reached. ! IF (n1d_saved(NUM) == MXPLAN) THEN WRITE(*,*) 'FOURROW: MXPLAN too small! Increase it and recompile' STOP END IF ! ALLOCATE(arr_tmp(dim1, dim2), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURROW: Allocation of arr_tmp failed!' STOP END IF ! nembed(1) = SIZE(arr) n_arr(1) = SIZE(arr,2) n = n_arr(1) howmany = SIZE(arr,1) ! n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany id = n1d_saved(NUM) ! SELECT CASE (isign) CASE (-1) CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & arr_tmp(1,1), nembed, howmany, 1, arr_tmp(1,1), nembed, howmany, 1, & FFTW_FORWARD, FFTW_ESTIMATE) CASE (1) CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & arr_tmp(1,1), nembed, howmany, 1, arr_tmp(1,1), nembed, howmany, 1, & FFTW_BACKWARD, FFTW_ESTIMATE) END SELECT ! DEALLOCATE(arr_tmp, stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOUR1: Dellocation of arr_tmp failed!' STOP END IF ! END SELECT ! CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1), arr(1,1)) ! END SUBROUTINE fourrow END MODULE fft #endif ! #if defined(fft_essl) MODULE fft ! IMPLICIT NONE ! PRIVATE ! PUBLIC :: four1, fourcol, fourrow ! TYPE pointer_ra REAL, DIMENSION(:), POINTER :: poi_ra END TYPE pointer_ra ! TYPE int_para INTEGER, DIMENSION(2) :: par END TYPE int_para ! ! Global parameters ! INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of work arrays. ! ! Global variables ! EXTERNAL :: ENOTRM LOGICAL, SAVE :: initflag=.TRUE. ! initialization of the module CHARACTER (len=8), SAVE :: S2015 ! string to copy error list entry REAL, DIMENSION(8) :: aux1 ! auxilary array REAL, DIMENSION(1) :: aux2 ! auxilary array TYPE(pointer_ra), DIMENSION(:,:), ALLOCATABLE, SAVE :: aux1_poi, aux2_poi ! work arrays for the ESSL routine TYPE(int_para), DIMENSION(MXPLAN,4), SAVE :: n1d_par ! size of transform INTEGER, DIMENSION(4), SAVE :: n1d_saved=0 ! number of plans saved ! INTERFACE fourcol MODULE PROCEDURE fourcol_ra, fourcol_raa END INTERFACE ! CONTAINS ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE four1(vec, isign) ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec INTEGER, INTENT(IN) :: isign ! ! Local parameters ! INTEGER, PARAMETER :: NUM=1 ! ! Local variables ! INTEGER :: i, id, istat, k, n, naux1, naux2 ! IF (initflag) THEN initflag = .FALSE. ! CALL EINFO(0) CALL ERRSAV(2015,S2015) ! ALLOCATE(aux1_poi(MXPLAN,4), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOUR1: 1. Allocation of aux1_poi failed!' STOP END IF ! ALLOCATE(aux2_poi(MXPLAN,4), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOUR1: 1. Allocation of aux2_poi failed!' STOP END IF ! END IF ! n = SIZE(vec) ! ! test if a plan that fits is already created. ! id = -1 k = isign*n DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) ! n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k id = n1d_saved(NUM) ! CALL ERRSET(2015,0,-1,1,ENOTRM,0) ! naux1 = SIZE(aux1) naux2 = SIZE(aux2) ! CALL dcft(1, vec(1), 1, n, vec(1), 1, n, & n, 1, -isign, 1.0, aux1, naux1, aux2, naux2) ! CALL ERRSTR(2015,S2015) ! ! dynamic allocation of the work arrays. ! ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOUR1: 2. Allocation of aux1_poi failed!' STOP ENDIF ! ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOUR1: 2. Allocation of aux2_poi failed!' STOP ENDIF ! CALL dcft(1, vec(1), 1, n, vec(1), 1, n, & n, 1, -isign, 1.0, & aux1_poi(id,NUM)%poi_ra(1), naux1, & aux2_poi(id,NUM)%poi_ra(1), naux2) ! END SELECT ! CALL dcft(0, vec(1), 1, n, vec(1), 1, n, & n, 1, -isign, 1.0, & aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), & aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra)) ! END SUBROUTINE four1 ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourcol_ra(arr, isign) ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: isign ! ! Local parameters ! INTEGER, PARAMETER :: NUM=2 ! ! Local variables ! INTEGER :: dim1, howmany, i, id, istat, k, naux1, naux2 ! IF (initflag) THEN initflag = .FALSE. ! CALL EINFO(0) CALL ERRSAV(2015,S2015) ! ALLOCATE(aux1_poi(MXPLAN,4), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RA: 1. Allocation of aux1_poi failed!' STOP END IF ! ALLOCATE(aux2_poi(MXPLAN,4), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RA: 1. Allocation of aux2_poi failed!' STOP END IF ! END IF ! dim1 = SIZE(arr,1) howmany = SIZE(arr,2) ! ! test if a plan that fits is already created. ! id = -1 k = isign*dim1 DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1) .AND. & howmany == n1d_par(i,NUM)%par(2)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) ! n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany id = n1d_saved(NUM) ! CALL ERRSET(2015,0,-1,1,ENOTRM,0) ! naux1 = SIZE(aux1) naux2 = SIZE(aux2) ! CALL dcft(1, arr(1,1), 1, dim1, arr(1,1), 1, dim1, & dim1, howmany, -isign, 1.0, aux1, naux1, aux2, naux2) ! CALL ERRSTR(2015,S2015) ! ! dynamic allocation of the work arrays. ! ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RA: 2. Allocation of aux1_poi failed!' STOP ENDIF ! ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RA: 2. Allocation of aux2_poi failed!' STOP ENDIF ! CALL dcft(1, arr(1,1), 1, dim1, arr(1,1), 1, dim1, & dim1, howmany, -isign, 1.0, & aux1_poi(id,NUM)%poi_ra(1), naux1, & aux2_poi(id,NUM)%poi_ra(1), naux2) ! END SELECT ! CALL dcft(0, arr(1,1), 1, dim1, arr(1,1), 1, dim1, & dim1, howmany, -isign, 1.0, & aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), & aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra)) ! END SUBROUTINE fourcol_ra ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourcol_raa(arr, isign) ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: isign ! ! Local parameters ! INTEGER, PARAMETER :: NUM=3 ! ! Local variables ! INTEGER :: dim1, howmany, i, id, istat, k, naux1, naux2 ! IF (initflag) THEN initflag = .FALSE. ! CALL EINFO(0) CALL ERRSAV(2015,S2015) ! ALLOCATE(aux1_poi(MXPLAN,NUM), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RAA: 1. Allocation of aux1_poi failed!' STOP END IF ! ALLOCATE(aux2_poi(MXPLAN,NUM), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RAA: 1. Allocation of aux2_poi failed!' STOP END IF ! END IF ! dim1 = SIZE(arr,1) howmany = SIZE(arr,2)*SIZE(arr,3) ! ! test if a plan that fits is already created. ! id = -1 k = isign*dim1 DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1) .AND. & howmany == n1d_par(i,NUM)%par(2)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) ! n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany id = n1d_saved(NUM) ! CALL ERRSET(2015,0,-1,1,ENOTRM,0) ! naux1 = SIZE(aux1) naux2 = SIZE(aux2) ! CALL dcft(1, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, & dim1, howmany, -isign, 1.0, aux1, naux1, aux2, naux2) ! CALL ERRSTR(2015,S2015) ! ! dynamic allocation of the work arrays. ! ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RAA: 2. Allocation of aux1_poi failed!' STOP ENDIF ! ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURCOL_RAA: 2. Allocation of aux2_poi failed!' STOP ENDIF ! CALL dcft(1, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, & dim1, howmany, -isign, 1.0, & aux1_poi(id,NUM)%poi_ra(1), naux1, & aux2_poi(id,NUM)%poi_ra(1), naux2) ! END SELECT ! CALL dcft(0, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, & dim1, howmany, -isign, 1.0, & aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), & aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra)) ! END SUBROUTINE fourcol_raa ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourrow(arr, isign) ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: isign ! ! Local parameters ! INTEGER, PARAMETER :: NUM=4 ! ! Local variables ! INTEGER :: dim1, dim2, i, id, istat, k, naux1, naux2 ! IF (initflag) THEN initflag = .FALSE. ! CALL EINFO(0) CALL ERRSAV(2015,S2015) ! ALLOCATE(aux1_poi(MXPLAN,NUM), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURROW: 1. Allocation of aux1_poi failed!' STOP END IF ! ALLOCATE(aux2_poi(MXPLAN,NUM), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURROW: 1. Allocation of aux2_poi failed!' STOP END IF ! END IF ! dim1 = SIZE(arr,1) dim2 = SIZE(arr,2) ! ! test if a plan that fits is already created. ! id = -1 k = isign*dim1 DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1) .AND. & dim2 == n1d_par(i,NUM)%par(2)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) ! n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k n1d_par(n1d_saved(NUM),NUM)%par(2) = dim2 id = n1d_saved(NUM) ! CALL ERRSET(2015,0,-1,1,ENOTRM,0) ! naux1 = SIZE(aux1) naux2 = SIZE(aux2) ! CALL dcft(1, arr(1,1), dim2, 1, arr(1,1), dim2, 1, & dim1, dim2, -isign, 1.0, aux1, naux1, aux2, naux2) ! CALL ERRSTR(2015,S2015) ! ! dynamic allocation of the work arrays. ! ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURROW: 2. Allocation of aux1_poi failed!' STOP ENDIF ! ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat) IF (istat /= 0) THEN WRITE(*,*) 'FOURROW: 2. Allocation of aux2_poi failed!' STOP ENDIF ! CALL dcft(1, arr(1,1), dim1, 1, arr(1,1), dim1, 1, & dim1, dim2, -isign, 1.0, & aux1_poi(id,NUM)%poi_ra(1), naux1, & aux2_poi(id,NUM)%poi_ra(1), naux2) ! END SELECT ! CALL dcft(0, arr(1,1), dim1, 1, arr(1,1), dim1, 1, & dim1, dim2, -isign, 1.0, & aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), & aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra)) ! END SUBROUTINE fourrow END MODULE fft #endif ! #if defined(fft_mkl) MODULE fft ! USE mkl_dfti ! IMPLICIT NONE ! PRIVATE ! PUBLIC :: pointer_r, handle1d PUBLIC :: four1, fourcol, fourrow ! TYPE pointer_r TYPE(DFTI_DESCRIPTOR), POINTER :: desc_handle END TYPE pointer_r ! TYPE int_para INTEGER, DIMENSION(2) :: par END TYPE int_para ! ! Global parameters ! INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of plans. ! ! Global variables ! TYPE(DFTI_DESCRIPTOR), POINTER :: desc_handle TYPE(pointer_r), DIMENSION(MXPLAN,4), SAVE :: handle1d ! descriptor handles for 1-dim FFT TYPE(int_para), DIMENSION(MXPLAN,4), SAVE :: n1d_par ! size of transform INTEGER, DIMENSION(4), SAVE :: n1d_saved=0 ! number of descriptor handles saved ! INTERFACE fourcol MODULE PROCEDURE fourcol_ra, fourcol_raa END INTERFACE ! CONTAINS ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE four1(vec, isign) ! ! Dummy arguments ! INTEGER, INTENT(IN) :: isign DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec ! ! Local parameters ! INTEGER, PARAMETER :: NUM=1 ! ! Local variables ! INTEGER :: dim1, id, i, k, status LOGICAL :: init_flag ! ! test if a plan that fits is already created. ! id = -1 k = isign*SIZE(vec) DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) init_flag = .TRUE. n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k id = n1d_saved(NUM) ! CASE default init_flag = .FALSE. END SELECT ! dim1 = SIZE(vec,1) ! CALL fourcol_mkl(vec(1), dim1, 1, isign, init_flag, id, NUM) ! END SUBROUTINE four1 ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourcol_ra(arr, isign) ! ! Dummy arguments ! INTEGER, INTENT(IN) :: isign DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr ! ! Local parameters ! INTEGER, PARAMETER :: NUM=2 ! ! Local variables ! INTEGER :: dim1, howmany, id, i, k, status LOGICAL :: init_flag ! dim1 = SIZE(arr,1) howmany = SIZE(arr,2) ! ! test if a plan that fits is already created. ! id = -1 k = isign*SIZE(arr) DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1) .AND. & howmany == n1d_par(i,NUM)%par(2)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) init_flag = .TRUE. n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany id = n1d_saved(NUM) ! CASE default init_flag = .FALSE. END SELECT ! CALL fourcol_mkl(arr(1,1), dim1, howmany, isign, init_flag, id, NUM) ! END SUBROUTINE fourcol_ra ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourcol_raa(arr, isign) ! ! Dummy arguments ! INTEGER, INTENT(IN) :: isign DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr ! ! Local parameters ! INTEGER, PARAMETER :: NUM=3 ! ! Local variables ! INTEGER :: dim1, howmany, id, i, k, status LOGICAL :: init_flag ! dim1 = SIZE(arr,1) howmany = SIZE(arr,2)*SIZE(arr,3) ! ! test if a plan that fits is already created. ! id = -1 k = isign*SIZE(arr) DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1) .AND. & howmany == n1d_par(i,NUM)%par(2)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) init_flag = .TRUE. n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany id = n1d_saved(NUM) ! CASE default init_flag = .FALSE. END SELECT ! CALL fourcol_mkl(arr(1,1,1), dim1, howmany, isign, init_flag, id, NUM) ! END SUBROUTINE fourcol_raa ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourrow(arr, isign) ! ! Dummy arguments ! INTEGER, INTENT(IN) :: isign DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr ! ! Local parameters ! INTEGER, PARAMETER :: NUM=4 ! ! Local variables ! INTEGER :: dim2, howmany, id, i, k, status LOGICAL :: init_flag ! howmany = SIZE(arr,1) dim2 = SIZE(arr,2) ! ! test if a plan that fits is already created. ! id = -1 k = isign*SIZE(arr) DO i = 1,n1d_saved(NUM) IF (k == n1d_par(i,NUM)%par(1) .AND. & howmany == n1d_par(i,NUM)%par(2)) THEN id = i EXIT END IF END DO ! SELECT CASE (id) CASE (-1) init_flag = .TRUE. n1d_saved(NUM) = n1d_saved(NUM)+1 n1d_par(n1d_saved(NUM),NUM)%par(1) = k n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany id = n1d_saved(NUM) ! CASE default init_flag = .FALSE. END SELECT ! CALL fourrow_mkl(arr(1,1), howmany, dim2, isign, init_flag, id, NUM) ! END SUBROUTINE fourrow END MODULE fft ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourcol_mkl(arr, dim1, howmany, isign, init_flag, id, num) ! ! COMMENT: This subroutine is necessary to prevent the Lahey/Fujitsu ! compiler from making a copy of array arr when passing ! arguments. USE fft, ONLY: handle1d ! USE mkl_dfti ! IMPLICIT NONE ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(*), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: dim1 INTEGER, INTENT(IN) :: howmany INTEGER, INTENT(IN) :: isign LOGICAL, INTENT(IN) :: init_flag INTEGER, INTENT(IN) :: id INTEGER, INTENT(IN) :: num ! ! Local variables ! INTEGER :: i, status ! IF (init_flag) THEN ! status = DftiCreateDescriptor(handle1d(id,num)%desc_handle, & DFTI_DOUBLE, DFTI_COMPLEX, 1, dim1) IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 0:', DftiErrorMessage(status) status = DftiSetValue(handle1d(id,num)%desc_handle, & DFTI_NUMBER_OF_TRANSFORMS, howmany) IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 1:', DftiErrorMessage(status) status = DftiSetValue(handle1d(id,num)%desc_handle, & DFTI_INPUT_DISTANCE, dim1) IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 2:', DftiErrorMessage(status) status = DftiSetValue(handle1d(id,num)%desc_handle, & DFTI_OUTPUT_DISTANCE, dim1) IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 3:', DftiErrorMessage(status) ! status = DftiCommitDescriptor(handle1d(id,num)%desc_handle) IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 4:', DftiErrorMessage(status) ! END IF ! SELECT CASE (isign) CASE (-1) status = DftiComputeForward(handle1d(id,num)%desc_handle, arr) CASE (1) status = DftiComputeBackward(handle1d(id,num)%desc_handle, arr) END SELECT ! END SUBROUTINE fourcol_mkl ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! SUBROUTINE fourrow_mkl(arr, howmany, dim2, isign, init_flag, id, num) ! ! COMMENT: This subroutine is necessary to prevent the Lahey/Fujitsu ! compiler from making a copy of array arr when passing ! arguments. ! USE fft, ONLY: handle1d ! USE mkl_dfti ! IMPLICIT NONE ! ! Dummy arguments ! DOUBLE COMPLEX, DIMENSION(*), INTENT(INOUT) :: arr INTEGER, INTENT(IN) :: howmany INTEGER, INTENT(IN) :: dim2 INTEGER, INTENT(IN) :: isign LOGICAL, INTENT(IN) :: init_flag INTEGER, INTENT(IN) :: id INTEGER, INTENT(IN) :: num ! ! Local variables ! INTEGER :: i, status INTEGER, DIMENSION(2) :: stride ! IF (init_flag) THEN ! stride(1) = 0 stride(2) = howmany ! status = DftiCreateDescriptor(handle1d(id,num)%desc_handle, & DFTI_DOUBLE, DFTI_COMPLEX, 1, dim2) IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 0:', DftiErrorMessage(status) status = DftiSetValue(handle1d(id,num)%desc_handle, & DFTI_NUMBER_OF_TRANSFORMS, dim2) IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 1:', DftiErrorMessage(status) ! status = DftiSetValue(handle1d(id,num)%desc_handle, & DFTI_INPUT_DISTANCE, 1) IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 2:', DftiErrorMessage(status) status = DftiSetValue(handle1d(id,num)%desc_handle, & DFTI_INPUT_STRIDES, stride) IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 3:', DftiErrorMessage(status) ! status = DftiSetValue(handle1d(id,num)%desc_handle, & DFTI_OUTPUT_DISTANCE, 1) IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 4:', DftiErrorMessage(status) status = DftiSetValue(handle1d(id,num)%desc_handle, & DFTI_OUTPUT_STRIDES, stride) IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 5:', DftiErrorMessage(status) ! status = DftiCommitDescriptor(handle1d(id,num)%desc_handle) IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 6:', DftiErrorMessage(status) ! END IF ! SELECT CASE (isign) CASE (-1) status = DftiComputeForward(handle1d(id,num)%desc_handle, arr) CASE (1) status = DftiComputeBackward(handle1d(id,num)%desc_handle, arr) END SELECT ! END SUBROUTINE fourrow_mkl #endif ! diff --git a/fft/fftw_f77.h b/fft/fftw_f77.h index b825f23..a566a02 100644 --- a/fft/fftw_f77.h +++ b/fft/fftw_f77.h @@ -1,53 +1,53 @@ !> !> @file fftw_f77.h !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! This file contains PARAMETER statements for various constants ! that can be passed to FFTW routines. You should include ! this file in any FORTRAN program that calls the fftw_f77 ! routines (either directly or with an #include statement ! if you use the C preprocessor). integer FFTW_FORWARD,FFTW_BACKWARD parameter (FFTW_FORWARD=-1,FFTW_BACKWARD=1) integer FFTW_REAL_TO_COMPLEX,FFTW_COMPLEX_TO_REAL parameter (FFTW_REAL_TO_COMPLEX=-1,FFTW_COMPLEX_TO_REAL=1) integer FFTW_ESTIMATE,FFTW_MEASURE parameter (FFTW_ESTIMATE=0,FFTW_MEASURE=1) integer FFTW_OUT_OF_PLACE,FFTW_IN_PLACE,FFTW_USE_WISDOM parameter (FFTW_OUT_OF_PLACE=0) parameter (FFTW_IN_PLACE=8,FFTW_USE_WISDOM=16) integer FFTW_THREADSAFE parameter (FFTW_THREADSAFE=128) ! Constants for the MPI wrappers: integer FFTW_TRANSPOSED_ORDER, FFTW_NORMAL_ORDER integer FFTW_SCRAMBLED_INPUT, FFTW_SCRAMBLED_OUTPUT parameter(FFTW_TRANSPOSED_ORDER=1, FFTW_NORMAL_ORDER=0) parameter(FFTW_SCRAMBLED_INPUT=8192) parameter(FFTW_SCRAMBLED_OUTPUT=16384) diff --git a/fft/tfft.f90 b/fft/tfft.f90 index 866a66b..18da423 100644 --- a/fft/tfft.f90 +++ b/fft/tfft.f90 @@ -1,141 +1,141 @@ !> !> @file tfft.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test the FFT routines exported by module fft ! USE fft IMPLICIT NONE INTEGER :: nx=8 DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: a,b,c DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rn1, rn2 DOUBLE PRECISION :: pi, argx INTEGER :: ix, nx0=2 ! PRINT*, 'Enter array dim. nx' READ(*,*) nx ALLOCATE(a(nx,4), b(nx,4), c(4,nx)) ALLOCATE(rn1(nx,4), rn2(nx,4)) ! ! Create initial array pi = 4.0d0*ATAN(1.0d0) WRITE(*,*) 'Enter mode nx0' READ(*,*) nx0 DO ix=0,nx-1 argx = 2.0d0*pi/nx*nx0*ix a(ix+1,:) = COS(argx) END DO ! WRITE(*,*) '-----------------------' WRITE(*,*) 'Reals of original array' WRITE(*,*) '-----------------------' WRITE(*,'(10f10.4)') REAL(a) !________________________________________________________________________________ ! WRITE(*,*) '-----------------' WRITE(*,*) 'testing four1 ...' WRITE(*,*) '-----------------' ! ! Forward transform b=a CALL four1(b(:,1), -1) ! ! Check WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1)) WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1)) ! ! Backward transform CALL four1(b(:,1), 1) b = b/REAL(nx) ! ! Check WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1)) WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1)) ! b(:,1) = b(:,1)-a(:,1) rn1(:,1) = REAL(b(:,1),8) rn2(:,1) = AIMAG(b(:,1)) PRINT *, 'Min. max err of real', MINVAL(rn1(:,1)), MAXVAL(rn1(:,1)) PRINT *, 'Min. max err of imag', MINVAL(rn2(:,1)), MAXVAL(rn2(:,1)) !________________________________________________________________________________ ! WRITE(*,*) '-------------------' WRITE(*,*) 'testing fourcol ...' WRITE(*,*) '-------------------' ! ! Forward transform b=a CALL fourcol(b(:,:), -1) ! ! Check WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1)) WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1)) ! ! Backward transform CALL fourcol(b(:,:), 1) b = b/REAL(nx) ! ! Check WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1)) WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1)) ! b = b-a rn1 = REAL(b,8) rn2 = AIMAG(b) PRINT *, 'Min. max err of real', MINVAL(rn1), MAXVAL(rn1) PRINT *, 'Min. max err of imag', MINVAL(rn2), MAXVAL(rn2) !________________________________________________________________________________ ! WRITE(*,*) '-------------------' WRITE(*,*) 'testing fourrow ...' WRITE(*,*) '-------------------' ! ! Forward transform b=a c = TRANSPOSE(b) CALL fourrow(c, -1) ! ! Check WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(c(1,:)) WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(c(1,:)) ! ! Backward transform CALL fourrow(c, 1) c = c/REAL(nx) ! ! Check WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(c(1,:)) WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(c(1,:)) ! b = TRANSPOSE(c)-a rn1 = REAL(b,8) rn2 = AIMAG(b) PRINT *, 'Min. max err of real', MINVAL(rn1), MAXVAL(rn1) PRINT *, 'Min. max err of imag', MINVAL(rn2), MAXVAL(rn2) ! ! Clean up DEALLOCATE(a,b,c, rn1,rn2) END PROGRAM main diff --git a/matlab/cds_mat.m b/matlab/cds_mat.m index e2a79d4..25e1c65 100644 --- a/matlab/cds_mat.m +++ b/matlab/cds_mat.m @@ -1,44 +1,44 @@ % % @file cds_mat.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % function [mata, diag] = cds_mat(file, dset) n=double(hdf5read(file,dset, 'RANK')); dists=double(hdf5read(file, strcat(dset,'/dists'))); val=hdf5read(file, strcat(dset,'/vals')); %% Shift the off-diagonals %% for k=1:length(dists) d=dists(k); if d < 0 val(1:n+d,k) = val(1-d:n,k); elseif d > 0 val(n:-1:d+1,k) = val(n-d:-1:1,k); end end mata = spdiags(val, dists, n,n); if nargout == 2 idiag = find(dists==0); diag = val(:,idiag); end diff --git a/matlab/csr_mat.m b/matlab/csr_mat.m index ddfa679..7dd9ac5 100644 --- a/matlab/csr_mat.m +++ b/matlab/csr_mat.m @@ -1,45 +1,45 @@ % % @file csr_mat.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % function [mata, diag] = csr_mat(file, dset) n=hdf5read(file,dset, 'RANK'); nnz=hdf5read(file,dset, 'NNZ'); cols=hdf5read(file, strcat(dset,'/cols')); irow=hdf5read(file, strcat(dset,'/irow')); val=hdf5read(file, strcat(dset,'/val')); idiag=hdf5read(file, strcat(dset,'/idiag')); for i=1:n s = irow(i); e = irow(i+1)-1; rows(s:e) = i; end cols=double(cols); rows=double(rows); vals = double(val); mata = sparse(rows,cols,vals); if nargout == 2 diag = val(idiag); end diff --git a/matlab/driv1.m b/matlab/driv1.m index 2d28ea7..cafb273 100644 --- a/matlab/driv1.m +++ b/matlab/driv1.m @@ -1,68 +1,68 @@ % % @file driv1.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='driv1.h5'; x = hdf5read(file, '/X'); knotsx = hdf5read(file, 'KNOTSX'); splinesx = hdf5read(file, '/splinesx','V71Dimensions', true); y = hdf5read(file, '/Y'); knotsy = hdf5read(file, 'KNOTSY'); splinesy = hdf5read(file, '/splinesy','V71Dimensions', true); c=['b','g','r','c','m','y','k']; nc=size(c,2); figure %subplot(211) hold on ns = size(splinesx,1); attr=hdf5read(file,'/splinesx/title'); title_ann=attr.Data; for i = 1:ns cc = mod(i-1,nc)+1; plot(x,splinesx(i,:),c(cc)) end yk=zeros(size(knotsx)); plot(knotsx,yk,'ro'); grid on title(title_ann) ylabel('Splines') figure %subplot(212) hold on ns = size(splinesy,1); attr=hdf5read(file,'/splinesy/title'); title_ann=attr.Data; for i = 1:ns cc = mod(i-1,nc)+1; plot(y,splinesy(i,:),c(cc)) end yk=zeros(size(knotsy)); i1=find(knotsy==y(1)); i2=find(knotsy>=y(size(y,1)),1); plot(knotsy(i1:i2),yk(i1:i2),'ro'); grid on title(title_ann) ylabel('Splines') diff --git a/matlab/fit.m b/matlab/fit.m index 64b90e8..7f6674a 100644 --- a/matlab/fit.m +++ b/matlab/fit.m @@ -1,38 +1,38 @@ % % @file fit.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % P1=polyfit(log(n),log(err1),1) P2=polyfit(log(n),log(err2),1) P3=polyfit(log(n),log(err3),1) figure loglog(n,err1,'o', n, exp(P1(2)).*n.^P1(1), 'b') hold on loglog(n,err2,'rh', n, exp(P2(2)).*n.^P2(1),'r') loglog(n,err3,'*k', n, exp(P3(2)).*n.^P3(1), 'k') grid on xlabel('Number of intervals N'); ylabel('Discretization Error') title('2D Cylindrical problem with m=1, s=10') diff --git a/matlab/fit1d.m b/matlab/fit1d.m index 6475e77..f96663d 100644 --- a/matlab/fit1d.m +++ b/matlab/fit1d.m @@ -1,94 +1,94 @@ % % @file fit1d.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='fit1d.h5'; x = hdf5read(file, '/X'); f = hdf5read(file, '/FCALC'); fexact = hdf5read(file, '/FEXACT'); error = hdf5read(file, '/ERROR'); f1 = hdf5read(file, '/FCALC1'); fexact1 = hdf5read(file, '/FEXACT1'); error1 = hdf5read(file, '/ERROR1'); splines = hdf5read(file, '/SPLINES'); splines = splines'; % % Attributes % nidbas = hdf5read(file,'/NIDBAS'); nx = hdf5read(file,'/NX'); attr=hdf5read(file,'/FEXACT/title'); fexact_ann=attr.Data; attr=hdf5read(file,'/FCALC/title'); f_ann=attr.Data; attr=hdf5read(file,'/ERROR/title'); error_ann=attr.Data; attr=hdf5read(file,'/FEXACT1/title'); fexact1_ann=attr.Data; attr=hdf5read(file,'/FCALC1/title'); f1_ann=attr.Data; attr=hdf5read(file,'/ERROR1/title'); error1_ann=attr.Data; label=sprintf('Splines of degree %d, NX =%d', nidbas, nx); ns = size(splines,1); c=['b','g','r','c','m','y','k']; nc=size(c,2); figure subplot(511) hold on for i = 1:ns cc = mod(i-1,nc)+1; plot(x,splines(i,:),c(cc)) end grid on ylabel('Splines') xlabel('X') title(label); hold off subplot(512) plot(x, f, 'o', x, fexact) legend(f_ann, fexact_ann) xlabel('X') grid on subplot(513) plot(x, error) ylabel(error_ann) xlabel('X') grid on subplot(514) plot(x, f1, 'h', x, fexact1) legend(f1_ann, fexact1_ann) xlabel('X') grid on subplot(515) plot(x, error1) ylabel(error1_ann) xlabel('X') grid on diff --git a/matlab/fit2d.m b/matlab/fit2d.m index f225459..3d69116 100644 --- a/matlab/fit2d.m +++ b/matlab/fit2d.m @@ -1,80 +1,80 @@ % % @file fit2d.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='fit2d.h5'; % % Get data from data sets % r=hdf5read(file,'/xpt'); t=hdf5read(file,'/ypt'); fcalc=hdf5read(file,'/fcalc'); fexact=hdf5read(file,'/fexact'); errs=hdf5read(file,'/errs'); % % Attributes % NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); NIDBAS1=hdf5read(file,'/NIDBAS1'); NIDBAS2=hdf5read(file,'/NIDBAS2'); MBESS=hdf5read(file,'/MBESS'); LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ... NIDBAS1, NIDBAS2, MBESS); attr=hdf5read(file,'/xpt/title'); x_ann=attr.Data; attr=hdf5read(file,'/ypt/title'); y_ann=attr.Data; attr=hdf5read(file,'/fcalc/title'); fcalc_ann=attr.Data; attr=hdf5read(file,'/fexact/title'); fexact_ann=attr.Data; attr=hdf5read(file,'/errs/title');errs_ann=attr.Data; [R,T]=meshgrid(r,t); x = R.*cos(T); y= R.*sin(T); figure subplot(221) pcolor(double(x),double(y),double(fcalc)); shading interp xlabel('X'); ylabel('Y') title(LABEL) colorbar subplot(222) pcolor(double(x),double(y),double(fexact)) shading interp axis image xlabel('X'); ylabel('Y') title('X-Y plane') colorbar subplot(223) surfc(double(x),double(y),double(errs)) xlabel('X'); ylabel('Y'); title(errs_ann) %% Plot error at theta ~ pi/4 k = max(find(t. % % @authors % (in alphabetical order) % @author Trach-Minh Tran % function [yfit, P] = fitlog(x,y) P=polyfit(log(x),log(y),1); yfit = exp(P(2)).*x.^P(1); diff --git a/matlab/fourier_gs.m b/matlab/fourier_gs.m index 09bbb05..d8e94bb 100644 --- a/matlab/fourier_gs.m +++ b/matlab/fourier_gs.m @@ -1,69 +1,69 @@ % % @file fourier_gs.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % clear all tau=0; alpha=0.5; theta=-pi:0.02:pi; [x,y] = meshgrid(theta,theta); n1=length(theta); n2=n1; %%%% %%%% Gauss-Seidel relaxations %%%% str_title=sprintf('tau = %.1f, alpha = %.2f',tau, alpha); ee = exp(i.*theta); eep= conj(ee); csin= alpha.*complex(alpha, (tau/2).*imag(ee)); G=zeros(n1,n2); for ii=1:n1 for jj=1:n2 num = ee(ii) + csin(ii)*ee(jj); G(ii,jj) = num / (2*(1+alpha^2) - conj(num)); end end figure hold off G0=(ee+csin)./(2*(1+alpha^2)-(eep+conj(csin))); plot(theta, abs(G(:,1)), 'r', 'LineWidth', 2) hold on plot(theta, abs(G0), 'g', 'LineWidth', 2) for jj=1:20:n2 plot(theta, abs(G(:,jj)), 'b') end xlabel('\theta_1'); ylabel('Amplification Factor for Gauss-Seidel') title(str_title) % $$$ figure % $$$ mesh(x,y,abs(G)) % $$$ xlabel('\theta_1'); ylabel('\theta_2') % $$$ title(str_title); % $$$ view(-120,25) max(max(abs(G))) diff --git a/matlab/fourier_jac.m b/matlab/fourier_jac.m index 4e9da71..ae518ab 100644 --- a/matlab/fourier_jac.m +++ b/matlab/fourier_jac.m @@ -1,69 +1,69 @@ % % @file fourier_jac.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % clear all omega=0.8; tau=-2; alpha=1; c=2*omega/(1+alpha^2); str_title=sprintf('omega = %.1f, tau = %.1f, alpha = %.2f', omega, ... tau, alpha) theta1=-pi:0.01:pi; theta2=-pi:0.01:pi; [x,y] = meshgrid(theta1,theta2); n1=length(theta1); n2=length(theta2); %%%% %%%% Damped Jacobi relaxations %%%% G=zeros(n1,n2); for ii=1:n1 for jj=1:n2 G(ii,jj) = 1-c.*( sin(theta1(ii)/2)^2 + alpha^2*sin(theta2(jj)/2)^2 ... + 0.25*alpha*tau*sin(theta1(ii))*sin(theta2(jj)) ); end end figure hold off G0 = 1-c.*sin(theta1./2).^2; plot(theta1, G(:,1), 'r', 'LineWidth', 2) hold on plot(theta1, G0, 'g', 'LineWidth', 2) for jj=1:20:n2 plot(theta1, G(:,jj), 'b') end xlabel('\theta_1'); ylabel('Amplification Factor for Jacobi') title(str_title) % $$$ figure % $$$ mesh(x,y,G) % $$$ xlabel('\theta_1'); ylabel('\theta_2') % $$$ title(str_title); max(max(abs(G))) diff --git a/matlab/fourier_smooth.m b/matlab/fourier_smooth.m index 45e8a7d..ed3ddd8 100644 --- a/matlab/fourier_smooth.m +++ b/matlab/fourier_smooth.m @@ -1,106 +1,106 @@ % % @file fourier_smooth.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % % Brute force computation of \mu = MAX |G| for % damped Jacobi and Gauss-Seidel relaxation % % Find optimal damping factor \omega for Jacobi relaxations % if ~exist('tau','var'), tau=0; end if ~exist('alpha','var'), alpha=1.0; end dth=0.01; theta1=-pi+dth:dth:pi; theta2=-pi+dth:dth:pi; nth1=length(theta1); nth2=length(theta2); sint12=sin(theta1./2).^2; sint22=sin(theta2./2).^2; sint1=sin(theta1); sint2=sin(theta2); [S1,S2]=meshgrid(sint1,sint2); ctau=0.25*alpha*tau; omega=0.5:0.002:1; n=length(omega); G = zeros(nth1,nth2); for i=1:n c=2*omega(i)/(1+alpha^2); for i1=1:nth1 for i2=1:nth2 if or(abs(theta1(i1))>= pi/2, abs(theta2(i2)) >= pi/2); G(i1,i2) = abs(1-c*(sint12(i1)+alpha^2*sint22(i2) + ... ctau*sint1(i1)*sint2(i2))); end end end [gmax,imax]=max(G); [mu(i),jmax]=max(gmax); theta1_opt(i) = theta1(imax(jmax)); theta2_opt(i) = theta2(jmax); end [mu_min,i_min]=min(mu); omega_opt=omega(i_min); str_title=sprintf(['omega = %.3f, mu = %.3f, alpha = %.2f, tau = ' ... '%.1f'], omega_opt, mu_min, alpha, tau); figure subplot(211) plot(omega,mu,'LineWidth',2); xlabel('\omega'); ylabel('\mu') grid on title(str_title); subplot(212) plot(omega, theta1_opt, omega, theta2_opt,'LineWidth',2); legend('\theta_{1opt}', '\theta_{2opt}') xlabel('\omega'); ylabel('optimum \theta') grid on % % \mu for Gauss-Seidel relaxation % Ggs = zeros(nth1,nth2); exp1=complex(cos(theta1),sin(theta1)); exp2=complex(cos(theta2),sin(theta2)); c=2*(1+alpha^2); ctau=complex(alpha^2, (0.5*alpha*tau).*sint1); for i1=1:nth1 for i2=1:nth2 if or(abs(theta1(i1))>= pi/2, abs(theta2(i2)) >= pi/2); Num = exp1(i1) + ctau(i1)*exp2(i2); Ggs(i1,i2) = abs( Num/(c-conj(Num)) ); end end end [gmax,imax]=max(Ggs); [mugs,jmax]=max(gmax); theta1_gs_opt = theta1(imax(jmax)); theta2_gs_opt = theta2(jmax); fprintf('alpha = %.2f, tau = %.1f, Ggs = %.4f, theta1 = %.4f, theta2 = %.4f\n', alpha, tau, ... mugs, theta1_gs_opt, theta2_gs_opt); subplot(211) hold on plot(omega,repmat(mugs,1,length(omega)),'r--','LineWidth',2) diff --git a/matlab/gb_mat.m b/matlab/gb_mat.m index a15b31d..64440af 100644 --- a/matlab/gb_mat.m +++ b/matlab/gb_mat.m @@ -1,43 +1,43 @@ % % @file gb_mat.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % function [mata] = gb_mat(file, dset) rank=h5readatt(file, dset, 'RANK'); ku=h5readatt(file, dset, 'KU'); kl=h5readatt(file, dset, 'KL'); gbmat=h5read(file, dset); m=rank; n=rank; mata = zeros(m,n); for i=1:m jmin = max(1,i-kl); jmax = min(n,i+ku); for j=jmin:jmax ib = kl+ku+i-j+1; mata(i,j)=gbmat(ib,j); end end clear gbmat; \ No newline at end of file diff --git a/matlab/gbmat.m b/matlab/gbmat.m index 3bb7c5e..dc14e74 100644 --- a/matlab/gbmat.m +++ b/matlab/gbmat.m @@ -1,60 +1,60 @@ % % @file gbmat.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde2d.h5'; % % Attributes of GB matrix % NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); NIDBAS1=hdf5read(file,'/NIDBAS1'); NIDBAS2=hdf5read(file,'/NIDBAS2'); %mat='/MAT1'; rank=hdf5read(file,strcat(mat,'/RANK')); ku=hdf5read(file,strcat(mat,'/KU')); kl=hdf5read(file,strcat(mat,'/KL')); gb_mat=hdf5read(file,mat); rhs0=hdf5read(file,'/RHS'); sol0=hdf5read(file,'/SOL'); % m=rank; n=rank; a = zeros(m,n); for i=1:m jmin = max(1,i-kl); jmax = min(n,i+ku); for j=jmin:jmax ib = kl+ku+i-j+1; a(i,j)=gb_mat(ib,j); end end %clear gb_mat; S = sparse(a); %clear a; figure spy(S); LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), rank = %d', NR, NTH, ... NIDBAS1, NIDBAS2, rank); title(LABEL) diff --git a/matlab/gs_fd.m b/matlab/gs_fd.m index 8e8c1b6..e812c93 100644 --- a/matlab/gs_fd.m +++ b/matlab/gs_fd.m @@ -1,66 +1,66 @@ % % @file gs_fd.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % clear all N=1024; L=100; n0=1; Narr=[128 256 512 1024]; for i=1:length(Narr); N=Narr(i); h=L/N; h2=h^2; title_str=sprintf(['N=%d, L=%.1f, n0=%.1f'], N, L, n0); v0=-2.+n0*h2; u0=1.; dom(i)= 2*abs(u0)/abs(v0); v=v0.*ones(N,1); u=u0*ones(N-1,1); mata=diag(u,1) + diag(u,-1) + diag(v); matl= tril(mata,0); % D+L lambda = eig(-triu(mata,1),matl); rho(i)=max(abs(lambda)); % $$$ figure % $$$ plot(lambda,'o') % $$$ xlabel('Real eigenvalue'), ylabel('Imag eigenvalue') % $$$ grid on % $$$ axis equal % $$$ title_str=sprintf(['N=%d, L=%.1f, n0=%.1f, Spec. Radius=%.4f, dom=%.4f'], N, L, ... % $$$ n0, rho(i),dom(i)); % $$$ title(title_str); fprintf(1, 'Spectral Radius of GS relaxation matrix = %.4f, dom=%.4f\n', ... rho(i),dom(i)) end figure plot(Narr, rho,'o-') xlabel('N'); ylabel('GS spectral radius') title_str=sprintf(['FD scheme, L=%.1f, n0=%.1f'], L, n0); title(title_str); grid on diff --git a/matlab/gs_fe.m b/matlab/gs_fe.m index 7dfaa33..9f77cec 100644 --- a/matlab/gs_fe.m +++ b/matlab/gs_fe.m @@ -1,66 +1,66 @@ % % @file gs_fe.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % clear all N=1024; L=100; n0=1; Narr=[128 256 512 1024]; for i=1:length(Narr); N=Narr(i); h=L/N; h2=h^2; title_str=sprintf(['N=%d, L=%.1f, n0=%.1f'], N, L, n0); v0=-2.+2.0*n0*h2/6.0; u0=1.+n0*h2/6.0; dom(i)= 2*abs(u0)/abs(v0); v=v0.*ones(N,1); u=u0*ones(N-1,1); mata=diag(u,1) + diag(u,-1) + diag(v); matl= tril(mata,0); % D+L lambda = eig(-triu(mata,1),matl); rho(i)=max(abs(lambda)); % $$$ figure % $$$ plot(lambda,'o') % $$$ xlabel('Real eigenvalue'), ylabel('Imag eigenvalue') % $$$ grid on % $$$ axis equal % $$$ title_str=sprintf(['N=%d, L=%.1f, n0=%.1f, Spec. Radius=%.4f, dom=%.4f'], N, L, ... % $$$ n0, rho(i),dom(i)); % $$$ title(title_str); fprintf(1, 'Spectral Radius of GS relaxation matrix = %.4f, dom=%.4f\n', ... rho(i),dom(i)) end figure plot(Narr, rho,'o-') xlabel('N'); ylabel('GS spectral radius') title_str=sprintf(['FE scheme, L=%.1f, n0=%.1f'], L, n0); title(title_str); grid on diff --git a/matlab/h5Complex.m b/matlab/h5Complex.m index ab338f9..c2f8774 100644 --- a/matlab/h5Complex.m +++ b/matlab/h5Complex.m @@ -1,50 +1,50 @@ % % @file h5Complex.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % function z = h5Complex(file, dset) data = hdf5read(file, dset); dims = size(data); rank = size(dims,2); switch rank case {1} for i=1:dims(1) z(i)=complex(cell2mat(data(i,1).Data(1)), cell2mat(data(i,1).Data(2))); end case {2} for i=1:dims(1) for j=1:dims(2) z(i,j)=complex(cell2mat(data(i,j).Data(1)), cell2mat(data(i,j).Data(2))); end end case {3} for i=1:dims(1) for j=1:dims(2) for k=1:dims(3) z(i,j,k)=complex(cell2mat(data(i,j,k).Data(1)), cell2mat(data(i,j,k).Data(2))); end end end end diff --git a/matlab/h5Complex_ll.m b/matlab/h5Complex_ll.m index ad43007..de9bf9d 100644 --- a/matlab/h5Complex_ll.m +++ b/matlab/h5Complex_ll.m @@ -1,33 +1,33 @@ % % @file h5Complex_ll.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % function z = h5Complex_ll(file, dset) fid=H5F.open(file, 'H5F_ACC_RDONLY', 'H5P_DEFAULT'); dset_id=H5D.open(fid, dset); dxpl = 'H5P_DEFAULT'; data = H5D.read(dset_id,'H5ML_DEFAULT','H5S_ALL','H5S_ALL', dxpl); z = complex(data.real, data.imaginary); H5D.close(dset_id); H5F.close(fid); diff --git a/matlab/jac_opt.m b/matlab/jac_opt.m index 82d6f70..04b2c09 100644 --- a/matlab/jac_opt.m +++ b/matlab/jac_opt.m @@ -1,78 +1,78 @@ % % @file jac_opt.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % % % Find optimal damping factor \omega for Jacobi relaxations % clear all alpha=0.5; theta2=pi/2:0.01:pi; theta1=0:0.01:pi; [x,y]=meshgrid(theta1,theta1); sint12=sin(theta1./2).^2; sint22=sin(theta2./2).^2; [ksi,eta]=meshgrid(sint12,sint22); nth1=length(theta1); nth2=length(theta2); omega=0.1:0.01:1; n=length(omega); for i=1:n c=2*omega(i)/(1+alpha^2); G=abs(1 - c.*(ksi+(alpha^2).*eta )); [gmax,imax]=max(G); [mu(i),jmax]=max(gmax); eta_opt(i)=eta(imax(jmax),jmax); ksi_opt(i)=ksi(imax(jmax),jmax); end [mu_min,i_min]=min(mu); omega_opt=omega(i_min); str_title=sprintf('omega = %.3f, mu = %.3f, alpha = %.2f', omega_opt, ... mu_min, alpha); figure plot(omega,mu,'o-', omega, ksi_opt, '*-', omega, eta_opt, '^-'); legend('\mu', '\xi_{opt}', '\eta_{opt}') xlabel('\omega'); ylabel('\mu') grid on title(str_title); % $$$ % $$$ c=2*omega_opt/(1+alpha^2); % $$$ G=1-c.*(ksi+(alpha^2).*eta); % $$$ figure % $$$ hold off % $$$ plot(theta1, G(1,:), 'r', 'LineWidth', 2) % $$$ hold on % $$$ plot(theta1, G(nth2,:), 'g', 'LineWidth', 2) % $$$ for jj=1:20:nth2 % $$$ plot(theta1, G(jj,:), 'b') % $$$ end % $$$ xlabel('\theta_1'); ylabel('Amplification Factor for Jacobi') % $$$ title(str_title) % $$$ grid on diff --git a/matlab/modes.m b/matlab/modes.m index ccb5c06..0dd4521 100644 --- a/matlab/modes.m +++ b/matlab/modes.m @@ -1,84 +1,84 @@ % % @file modes.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % N=8; kmodes=N-1; %% modes on fine grid %% xh=(1/N).*(0:1:N); wh=zeros(N+1,kmodes); for k=1:kmodes wh(:,k) = sin((k*pi).*xh); end %% Restriction %% R=[ 1 0 0 0 0 0 0 0 0 0 0.5 1 0.5 0 0 0 0 0 0 0 0 0.5 1 0.5 0 0 0 0 0 0 0 0 0.5 1 0.5 0 0 0 0 0 0 0 0 0 1 ]; %% Null space of Restriction %% ns = [ 0 0 0 0 2 0 0 0 -1 -1 0 0 0 2 0 0 0 -1 -1 0 0 0 2 0 0 0 -1 -1 0 0 0 2 0 0 0 0]; figure subplot(211) plot(ns,'o-'); title('Basis of Null space of Restriction') subplot(212) plot(R','o-'); title('Basis of Range of Prolongation') %% modes on coarse grid %% N2h = N/2; x2h=(1/N2h).*(0:1:N2h); w2h = R*wh; x=0:0.01:1.; figure for k=1:kmodes subplot(3,3,k) plot(xh,wh(:,k),'o', x, sin((k*pi).*x),'b-', x2h, w2h(:,k), 'ro-'); grid on end figure for k=1:N/2 subplot(2,2,k) plot(xh,wh(:,k),'o-', xh,wh(:,N-k),'r*-') grid on end diff --git a/matlab/pde1d.m b/matlab/pde1d.m index 86b6642..3f67aa7 100644 --- a/matlab/pde1d.m +++ b/matlab/pde1d.m @@ -1,72 +1,72 @@ % % @file pde1d.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde1d.h5'; fprintf(1,'NX = %d\n', hdf5read(file,'/NX')); fprintf(1,'NIDBAS = %d\n', hdf5read(file,'/NIDBAS')); fprintf(1,'NGAUSS = %d\n', hdf5read(file,'/NGAUSS')); fprintf(1,'KDIFF = %d\n', hdf5read(file,'/KDIFF')); x = hdf5read(file, '/XGRID'); f= hdf5read(file, '/SOLCAL'); fexact= hdf5read(file, '/SOLANA'); err=hdf5read(file, '/ERR'); f=f'; fexact=fexact'; err=err'; figure subplot(311) plot(x,f(1,:),'o',x,fexact(1,:)) xlabel('X') ylabel('Function') grid on subplot(312) plot(x,f(2,:),'o',x,fexact(2,:)) xlabel('X') ylabel('1st Derivative') grid on subplot(313) plot(x,f(3,:),'o',x,fexact(3,:)) xlabel('X') ylabel('2nd Derivative') grid on figure subplot(311) plot(x,err(1,:),'o-') xlabel('X'); ylabel('Errors on function'); grid on subplot(312) plot(x,err(2,:),'o-') xlabel('X'); ylabel('Errors on 1st derivative'); grid on subplot(313) plot(x,err(3,:),'o-') xlabel('X'); ylabel('Errors on 2nd derivative'); grid on diff --git a/matlab/pde1d_eig.m b/matlab/pde1d_eig.m index 8b39b43..70b5434 100644 --- a/matlab/pde1d_eig.m +++ b/matlab/pde1d_eig.m @@ -1,30 +1,30 @@ % % @file pde1d_eig.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde1d_eig.h5'; %%% get sparse matrix and its diagonal elelments in dig %%% [mata,diag]=zcsr_mat(file,'/MAT'); spy(mata,12); diff --git a/matlab/pde1d_eig_zcsr.m b/matlab/pde1d_eig_zcsr.m index c6fa3e7..5cbe4d1 100644 --- a/matlab/pde1d_eig_zcsr.m +++ b/matlab/pde1d_eig_zcsr.m @@ -1,30 +1,30 @@ % % @file pde1d_eig_zcsr.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde1d_eig.h5'; %%% get sparse matrix and its diagonal elelments in dig %%% [mata,diag]=zcsr_mat(file,'/MAT'); spy(mata,12); diff --git a/matlab/pde1d_eig_zmumps.m b/matlab/pde1d_eig_zmumps.m index 2a431bd..b9c721c 100644 --- a/matlab/pde1d_eig_zmumps.m +++ b/matlab/pde1d_eig_zmumps.m @@ -1,55 +1,55 @@ % % @file pde1d_eig_zmumps.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde1d_eig_zmumps.h5'; %%% Read mumps matrix and convert to Matlab sparse format [mata,diag_ele]=zmumps_mat(file,'/MAT'); n=size(mata,1); %spy(mata,12); arpack_vals=h5Complex_ll(file,'/eig_vals'); nev=size(arpack_vals,1); arpack_vecs = h5Complex_ll(file, '/eig_vecs'); %%% Compute eigen values and vectors, using EIGS [V,D,FLAG]=eigs(mata,nev,'SM'); [eigs_vals,perm]=sort(diag(D)); eigs_vecs=V(:,perm); fprintf('Eigenvalues from Arpack and Matlab eigs\n'); for i=1:nev fprintf('%i (%.5e %.5e), %.5e\n',i,real(arpack_vals(i)),imag(arpack_vals(i)),eigs_vals(i)); end fprintf('Norm of difference %.3e\n', norm(arpack_vals-eigs_vals,Inf)); %%% Renormalize EIGS %%% fprintf('\n\nDiff of Eigenvectors from Arpack and Matlab eigs\n'); for i=1:nev nrm=arpack_vecs(1,i); eigs_vecs(:,i) = (eigs_vecs(:,i)./eigs_vecs(1,i)).*nrm; diff_vecs = norm(arpack_vecs(:,i)-eigs_vecs(:,i),Inf); fprintf('%i %10.3e\n', i, diff_vecs); end diff --git a/matlab/pde1dp.m b/matlab/pde1dp.m index 97b7a6e..e464d62 100644 --- a/matlab/pde1dp.m +++ b/matlab/pde1dp.m @@ -1,41 +1,41 @@ % % @file pde1dp.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde1dp.h5'; fprintf(1,'NX = %d\n', hdf5read(file,'/NX')); fprintf(1,'NIDBAS = %d\n', hdf5read(file,'/NIDBAS')); fprintf(1,'NGAUSS = %d\n', hdf5read(file,'/NGAUSS')); mata=hdf5read(file,'/mata'); xpts = hdf5read(file, '/rhs/x'); frhs = hdf5read(file, '/rhs/f'); figure plot(xpts,frhs); xlabel('X') ylabel('RHS') grid on diff --git a/matlab/pde1dp_cmpl.m b/matlab/pde1dp_cmpl.m index e07fc31..53f7348 100644 --- a/matlab/pde1dp_cmpl.m +++ b/matlab/pde1dp_cmpl.m @@ -1,49 +1,49 @@ % % @file pde1dp_cmpl.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde1dp_cmpl.h5'; xgrid=hdf5read(file,'/xgrid'); nx = size(xgrid)-1; rhs=h5Complex(file, '/rhs'); sol=h5Complex(file, '/sol'); %mat=h5Complex(file, '/mat'); x=hdf5read(file,'/x'); solana=h5Complex(file,'/solana'); solcal=h5Complex(file,'/solcal'); err=hdf5read(file, '/err'); figure subplot(211) plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ... imag(solcal), '*') legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag') xlabel('X'); ylabel('SOL'); subplot(212) plot(x, err, 'o-'); xlabel('X'); ylabel('|Error|') grid on diff --git a/matlab/pde1dp_cmpl_dft.m b/matlab/pde1dp_cmpl_dft.m index 10aa61a..72c8b9d 100644 --- a/matlab/pde1dp_cmpl_dft.m +++ b/matlab/pde1dp_cmpl_dft.m @@ -1,70 +1,70 @@ % % @file pde1dp_cmpl_dft.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde1dp_cmpl_dft.h5'; xgrid=hdf5read(file,'/xgrid'); nx = size(xgrid)-1; mode=0:nx-1; rhs=h5Complex(file, '/rhs'); sol=h5Complex(file, '/sol'); rhs_fft=h5Complex(file, '/rhs_fft'); sol_fft=h5Complex(file, '/sol_fft'); mat=h5Complex(file, '/mat'); x=hdf5read(file,'/x'); solana=h5Complex(file,'/solana'); solcal=h5Complex(file,'/solcal'); err=hdf5read(file, '/err'); figure subplot(211) plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ... imag(solcal), '*') legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag') xlabel('X'); ylabel('SOL'); subplot(212) plot(x, err, 'o-'); xlabel('X'); ylabel('|Error|') grid on figure subplot(311) plot(mode, real(mat), 'o', mode, imag(mat), '*') xlabel('mode'); ylabel('DFT of MAT') legend('Real', 'Imag') grid on subplot(312) plot(mode, real(rhs_fft), 'o', mode, imag(rhs_fft), '*') xlabel('mode'); ylabel('DFT of RHS') legend('Real', 'Imag') grid on subplot(313) plot(mode, real(sol_fft), 'o', mode, imag(sol_fft), '*') xlabel('mode'); ylabel('DFT of SOL') legend('Real', 'Imag') grid on diff --git a/matlab/pde1dp_cmpl_pardiso.m b/matlab/pde1dp_cmpl_pardiso.m index 1878e8e..bcc75b6 100644 --- a/matlab/pde1dp_cmpl_pardiso.m +++ b/matlab/pde1dp_cmpl_pardiso.m @@ -1,72 +1,72 @@ % % @file pde1dp_cmpl_pardiso.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde1dp_cmpl_pardiso.h5'; xgrid=hdf5read(file,'/xgrid'); nx = size(xgrid)-1; rhs=h5Complex(file, '/rhs'); sol=h5Complex(file, '/sol'); cols=hdf5read(file, '/MAT/cols'); irow=hdf5read(file, '/MAT/irow'); val=h5Complex(file, '/MAT/val'); perm=hdf5read(file, '/MAT/perm'); cols=double(cols); irow=double(irow); perm=double(perm); n = size(perm,1); nnz=size(val,1); rows = zeros(nnz,1); for i=1:n s = irow(i); e = irow(i+1)-1; rows(s:e) = i; end mat = sparse(rows,cols,val); figure subplot(121); spy(mat); subplot(122); spy(mat(perm,perm)); x=hdf5read(file,'/x'); solana=h5Complex(file,'/solana'); solcal=h5Complex(file,'/solcal'); err=hdf5read(file, '/err'); figure subplot(211) plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ... imag(solcal), '*') legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag') xlabel('X'); ylabel('SOL'); subplot(212) plot(x, err, 'o-'); xlabel('X'); ylabel('|Error|') grid on diff --git a/matlab/pde2d.m b/matlab/pde2d.m index c375cef..e3ee9fe 100644 --- a/matlab/pde2d.m +++ b/matlab/pde2d.m @@ -1,85 +1,85 @@ % % @file pde2d.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde2d.h5'; % % Get data from data sets % r=hdf5read(file,'/xgrid'); t=hdf5read(file,'/ygrid'); sol=hdf5read(file,'/sol')'; solexact=hdf5read(file,'/solana')'; err=hdf5read(file,'/errors')'; solr=hdf5read(file,'/derivx')'; solt=hdf5read(file,'/derivy')'; % % Attributes % NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); NIDBAS1=hdf5read(file,'/NIDBAS1'); NIDBAS2=hdf5read(file,'/NIDBAS2'); MBESS=hdf5read(file,'/MBESS'); LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ... NIDBAS1, NIDBAS2, MBESS); attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data; attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data; attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data; attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data; attr=hdf5read(file,'/errors/title');err_ann=attr.Data; [R,T]=meshgrid(r,t); x = R.*cos(T); y= R.*sin(T); solx = cos(T).*solr - sin(T)./R.*solt; soly = sin(T).*solr + cos(T)./R.*solt; figure subplot(221) pcolor(double(r),double(t),double(sol)); shading interp hold on, quiver(r,t,solr,solt) xlabel(x_ann); ylabel(y_ann) title(LABEL) colorbar subplot(222) pcolor(double(x),double(y),double(sol)) shading interp hold on, quiver(x,y,solx,soly) hold off, axis image xlabel('X'); ylabel('Y') title('X-Y plane') colorbar subplot(223) surfc(double(x),double(y),double(sol)) xlabel('X'); ylabel('Y'); title(sol_ann) subplot(224) surfc(double(x),double(y),double(err)) xlabel('X'); ylabel('Y'); title(err_ann) diff --git a/matlab/pde2d_mumps.m b/matlab/pde2d_mumps.m index 268f2ed..62950d8 100644 --- a/matlab/pde2d_mumps.m +++ b/matlab/pde2d_mumps.m @@ -1,97 +1,97 @@ % % @file pde2d_mumps.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde2d_mumps.h5'; nr=hdf5read(file,'/', 'NX'); nth=hdf5read(file,'/', 'NY'); NIDBAS1=hdf5read(file,'/','NIDBAS1'); NIDBAS2=hdf5read(file,'/','NIDBAS2'); MBESS=hdf5read(file,'/','MBESS'); r=hdf5read(file,'/xgrid'); t=hdf5read(file,'/ygrid'); sol=hdf5read(file,'/sol')'; solexact=hdf5read(file,'/solana')'; err=hdf5read(file,'/errors')'; solr=hdf5read(file,'/derivx')'; solt=hdf5read(file,'/derivy')'; LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', nr, nth, ... NIDBAS1, NIDBAS2, MBESS); figure subplot(211) plot(r, sol(nth/2,:), 'o', r, solexact(nth/2,:)) xlabel('r') grid on title(LABEL) subplot(212) if MBESS == 0 plot(t, sol(:,1), 'o', t, solexact(:,1)) else plot(t, sol(:,nr/2), 'o', t, solexact(:,nr/2)) end xlabel('\theta') grid on % $$$ if verLessThan('matlab', '7.9'); % $$$ n = hdf5read(file,'/MAT/RANK'); % $$$ nnz = hdf5read(file,'/MAT/NNZ'); % $$$ nlsym = hdf5read(file,'/MAT/NLSYM'); % $$$ else % $$$ n = hdf5read(file,'/MAT/', 'RANK'); % $$$ nnz = hdf5read(file,'/MAT/', 'NNZ'); % $$$ nlsym = hdf5read(file,'/MAT/', 'NLSYM'); % $$$ end % $$$ % $$$ cols=hdf5read(file, '/MAT/cols'); % $$$ irow=hdf5read(file, '/MAT/irow'); % $$$ val=hdf5read(file, '/MAT/val'); % $$$ perm=hdf5read(file, '/MAT/perm'); % $$$ % $$$ rows = zeros(nnz,1); % $$$ cols=double(cols); % $$$ irow=double(irow); % $$$ perm=double(perm); % $$$ % $$$ for i=1:n % $$$ s = irow(i); % $$$ e = irow(i+1)-1; % $$$ rows(s:e) = i; % $$$ end % $$$ % $$$ mat = sparse(rows,cols,val); % $$$ figure % $$$ subplot(121); % $$$ spy(mat(perm,perm)); % $$$ title('Matrix structure') % $$$ subplot(122); % $$$ spy(chol(mat(perm,perm))); % $$$ title('Factor L^T') diff --git a/matlab/pde2d_nh.m b/matlab/pde2d_nh.m index cd12593..b60a4fe 100644 --- a/matlab/pde2d_nh.m +++ b/matlab/pde2d_nh.m @@ -1,101 +1,101 @@ % % @file pde2d_nh.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde2d_nh.h5'; % % Get data from data sets % r=hdf5read(file,'/xgrid'); t=hdf5read(file,'/ygrid'); sol=hdf5read(file,'/sol')'; solexact=hdf5read(file,'/solana')'; err=hdf5read(file,'/errors')'; errx=hdf5read(file,'/errors_x')'; erry=hdf5read(file,'/errors_y')'; solr=hdf5read(file,'/derivx')'; solt=hdf5read(file,'/derivy')'; % % Attributes % NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); NIDBAS1=hdf5read(file,'/NIDBAS1'); NIDBAS2=hdf5read(file,'/NIDBAS2'); MBESS=hdf5read(file,'/MBESS'); LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ... NIDBAS1, NIDBAS2, MBESS); attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data; attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data; attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data; attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data; attr=hdf5read(file,'/errors/title');err_ann=attr.Data; [R,T]=meshgrid(r,t); x = R.*cos(T); y= R.*sin(T); solx = cos(T).*solr - sin(T)./R.*solt; soly = sin(T).*solr + cos(T)./R.*solt; figure subplot(221) pcolor(double(r),double(t),double(sol)); shading interp hold on, quiver(r,t,solr,solt) xlabel(x_ann); ylabel(y_ann) title(LABEL) colorbar subplot(222) pcolor(double(x),double(y),double(sol)) shading interp hold on, quiver(x,y,solx,soly) hold off, axis image xlabel('X'); ylabel('Y') title('X-Y plane') colorbar subplot(223) surfc(double(x),double(y),double(sol)) xlabel('X'); ylabel('Y'); title(sol_ann) subplot(224) surfc(double(x),double(y),double(err)) xlabel('X'); ylabel('Y'); title(err_ann) figure subplot(311) plot(t,err(:,NR+1),'o-') xlabel('\theta'); ylabel('Error on solution') grid on title('Error on Boundary r=1'); subplot(312) plot(t,errx(:,NR+1),'o-') xlabel('\theta'); ylabel('Error on x-derivative') grid on subplot(313) plot(t,erry(:,NR+1),'o-') xlabel('\theta'); ylabel('Error on y-derivative') grid on diff --git a/matlab/pde2d_pardiso.m b/matlab/pde2d_pardiso.m index 1ec3ae3..5b4604c 100644 --- a/matlab/pde2d_pardiso.m +++ b/matlab/pde2d_pardiso.m @@ -1,63 +1,63 @@ % % @file pde2d_pardiso.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde2d_pardiso.h5'; if verLessThan('matlab', '7.9'); n = hdf5read(file,'/MAT/RANK'); nnz = hdf5read(file,'/MAT/NNZ'); nlsym = hdf5read(file,'/MAT/NLSYM'); else n = hdf5read(file,'/MAT/', 'RANK'); nnz = hdf5read(file,'/MAT/', 'NNZ'); nlsym = hdf5read(file,'/MAT/', 'NLSYM'); end cols=hdf5read(file, '/MAT/cols'); irow=hdf5read(file, '/MAT/irow'); val=hdf5read(file, '/MAT/val'); perm=hdf5read(file, '/MAT/perm'); rows = zeros(nnz,1); cols=double(cols); irow=double(irow); perm=double(perm); for i=1:n s = irow(i); e = irow(i+1)-1; rows(s:e) = i; end mat = sparse(rows,cols,val); figure subplot(121); spy(mat(perm,perm)); title('Matrix structure') subplot(122); spy(chol(mat(perm,perm))); title('Factor L^T') diff --git a/matlab/pde2d_sym_pardiso.m b/matlab/pde2d_sym_pardiso.m index 7eb2c8f..b23f0a7 100644 --- a/matlab/pde2d_sym_pardiso.m +++ b/matlab/pde2d_sym_pardiso.m @@ -1,136 +1,136 @@ % % @file pde2d_sym_pardiso.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde2d_sym_pardiso.h5'; % % Get data from data sets % r=hdf5read(file,'/xgrid'); t=hdf5read(file,'/ygrid'); sol=hdf5read(file,'/sol')'; solexact=hdf5read(file,'/solana')'; err=hdf5read(file,'/errors')'; solr=hdf5read(file,'/derivx')'; solt=hdf5read(file,'/derivy')'; % % Attributes % if verLessThan('matlab', '7.9'); NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); NIDBAS1=hdf5read(file,'/NIDBAS1'); NIDBAS2=hdf5read(file,'/NIDBAS2'); MBESS=hdf5read(file,'/MBESS'); EPSI=hdf5read(file,'/EPSI'); attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data; attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data; attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data; attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data; attr=hdf5read(file,'/errors/title');err_ann=attr.Data; else NR=hdf5read(file,'/','NX'); NTH=hdf5read(file,'/','NY'); NIDBAS1=hdf5read(file,'/','NIDBAS1'); NIDBAS2=hdf5read(file,'/','NIDBAS2'); MBESS=hdf5read(file,'/','MBESS'); EPSI=hdf5read(file,'/','EPSI'); attr=hdf5read(file,'/xgrid/','title'); x_ann=attr.Data; attr=hdf5read(file,'/ygrid/','title'); y_ann=attr.Data; attr=hdf5read(file,'/sol/','title'); sol_ann=attr.Data; attr=hdf5read(file,'/solana/','title'); solexact_ann=attr.Data; attr=hdf5read(file,'/errors/','title');err_ann=attr.Data; end LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d, epsi = %3.2f', ... NR, NTH, NIDBAS1, NIDBAS2, MBESS, EPSI); [R,T]=meshgrid(r,t); x = R.*cos(T); y= R.*sin(T); solx = cos(T).*solr - sin(T)./R.*solt; soly = sin(T).*solr + cos(T)./R.*solt; figure subplot(221) pcolor(double(r),double(t),double(sol)); shading interp hold on, quiver(r,t,solr,solt) xlabel(x_ann); ylabel(y_ann) title(LABEL) colorbar subplot(222) pcolor(double(x),double(y),double(sol)) shading interp hold on, quiver(x,y,solx,soly) hold off, axis image xlabel('X'); ylabel('Y') title('X-Y plane') colorbar subplot(223) surfc(double(x),double(y),double(sol)) xlabel('X'); ylabel('Y'); title(sol_ann) subplot(224) surfc(double(x),double(y),double(err)) xlabel('X'); ylabel('Y'); title(err_ann) if verLessThan('matlab', '7.9'); n = hdf5read(file,'/MAT/RANK'); nnz = hdf5read(file,'/MAT/NNZ'); nlsym = hdf5read(file,'/MAT/NLSYM'); else n = hdf5read(file,'/MAT/', 'RANK'); nnz = hdf5read(file,'/MAT/', 'NNZ'); nlsym = hdf5read(file,'/MAT/', 'NLSYM'); end cols=hdf5read(file, '/MAT/cols'); irow=hdf5read(file, '/MAT/irow'); val=hdf5read(file, '/MAT/val'); perm=hdf5read(file, '/MAT/perm'); rows = zeros(nnz,1); cols=double(cols); irow=double(irow); perm=double(perm); for i=1:n s = irow(i); e = irow(i+1)-1; rows(s:e) = i; end mat = sparse(rows,cols,val); figure subplot(121) spy(mat) title('Original Matrix structure') subplot(122) spy(mat(perm,perm)) title('Permuted Matrix structure') diff --git a/matlab/pde2d_sym_pardiso_dft.m b/matlab/pde2d_sym_pardiso_dft.m index 5b0f948..b5103b7 100644 --- a/matlab/pde2d_sym_pardiso_dft.m +++ b/matlab/pde2d_sym_pardiso_dft.m @@ -1,171 +1,171 @@ % % @file pde2d_sym_pardiso_dft.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % mat_disp=1; file='pde2d_sym_pardiso_dft.h5'; if verLessThan('matlab', '7.9'); n = hdf5read(file,'/MAT1/RANK'); nnz = hdf5read(file,'/MAT1/NNZ'); nlsym = hdf5read(file,'/MAT1/NLSYM'); NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); NIDBAS1=hdf5read(file,'/NIDBAS1'); NIDBAS2=hdf5read(file,'/NIDBAS2'); MBESS=hdf5read(file,'/MBESS'); EPSI=hdf5read(file,'/EPSI'); KMIN=hdf5read(file,'/KMIN'); KMAX=hdf5read(file,'/KMAX'); else n = hdf5read(file,'/MAT1/', 'RANK'); nnz = hdf5read(file,'/MAT1/', 'NNZ'); nlsym = hdf5read(file,'/MAT1/', 'NLSYM'); NR=hdf5read(file,'/', 'NX'); NTH=hdf5read(file,'/', 'NY'); NIDBAS1=hdf5read(file,'/', 'NIDBAS1'); NIDBAS2=hdf5read(file,'/', 'NIDBAS2'); MBESS=hdf5read(file,'/', 'MBESS'); EPSI=hdf5read(file,'/','EPSI'); KMIN=hdf5read(file,'/','KMIN'); KMAX=hdf5read(file,'/','KMAX'); end LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d, epsi = %3.2f', NR, NTH, ... NIDBAS1, NIDBAS2, MBESS, EPSI); DK = KMAX-KMIN+1; attr=hdf5read(file,'/xgrid/','title'); x_ann=attr.Data; attr=hdf5read(file,'/ygrid/','title'); y_ann=attr.Data; attr=hdf5read(file,'/sol/','title'); sol_ann=attr.Data; attr=hdf5read(file,'/solana/','title'); solexact_ann=attr.Data; attr=hdf5read(file,'/errors/','title');err_ann=attr.Data; if mat_disp == 1 cols=hdf5read(file, '/MAT1/cols'); irow=hdf5read(file, '/MAT1/irow'); val=h5Complex(file, '/MAT1/val'); perm=hdf5read(file, '/MAT1/perm'); rows = zeros(nnz,1); cols=double(cols); irow=double(irow); perm=double(perm); for i=1:n s = irow(i); e = irow(i+1)-1; rows(s:e) = i; end valr=real(val); vali=imag(val); mat = sparse(rows,cols,valr); figure subplot(121) spy(mat,8) title('Original Matrix structure') subplot(122) spy(mat(perm,perm),8) title('Permuted Matrix structure') end r=hdf5read(file,'/xgrid'); t=hdf5read(file,'/ygrid'); sol=hdf5read(file,'/sol')'; solexact=hdf5read(file,'/solana')'; err=hdf5read(file,'/errors')'; solr=hdf5read(file,'/derivx')'; solt=hdf5read(file,'/derivy')'; [R,T]=meshgrid(r,t); x = R.*cos(T); y= R.*sin(T); solx = cos(T).*solr - sin(T)./R.*solt; soly = sin(T).*solr + cos(T)./R.*solt; figure subplot(221) pcolor(r,t,sol); shading interp hold on, quiver(r,t,solr,solt) xlabel(x_ann); ylabel(y_ann) title(LABEL) colorbar subplot(222) pcolor(x,y,sol) shading interp hold on, quiver(x,y,solx,soly) hold off, axis image xlabel('X'); ylabel('Y') title('X-Y plane') colorbar subplot(223) surfc(x,y,sol) xlabel('X'); ylabel('Y'); title(sol_ann) subplot(224) surfc(x,y,err) xlabel('X'); ylabel('Y'); title(err_ann) figure ft_sol=h5Complex(file,'/FT_SOL'); ft_sol=reshape(ft_sol,DK,[]); m=[KMIN:KMAX]; sp=1:NR+NIDBAS1; subplot(121) stem3(sp,m, real(ft_sol), 'filled') shading interp xlabel('Radial spline number'); ylabel('m') title('Real(\phi)') subplot(122) stem3(sp,m, imag(ft_sol),'filled') shading interp xlabel('Radial spline number'); ylabel('m') title('Imag(\phi)') figure ft_rhs=h5Complex(file,'/FT_RHS'); ft_rhs=reshape(ft_rhs,DK,[]); m=[KMIN:KMAX]; sp=1:NR+NIDBAS1; subplot(121) stem3(sp,m, real(ft_rhs), 'filled') shading interp xlabel('Radial spline number'); ylabel('m') title('Real(\rho)') subplot(122) stem3(sp,m, imag(ft_rhs),'filled') shading interp xlabel('Radial spline number'); ylabel('m') title('Imag(\rho)') figure energy_k = h5Complex(file,'/ENERGY_K'); subplot(211) stem(m, real(energy_k)); xlabel('m'); ylabel('Real(\phi)'); title('Spectral energy') subplot(212) stem(m, imag(energy_k)); xlabel('m'); ylabel('Imag(\phi)'); title('Spectral energy') diff --git a/matlab/pde3d.m b/matlab/pde3d.m index f8cb948..17bb7d5 100644 --- a/matlab/pde3d.m +++ b/matlab/pde3d.m @@ -1,124 +1,124 @@ % % @file pde3d.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='pde3d.h5'; % % Get data from data sets % x=hdf5read(file,'/xgrid'); y=hdf5read(file,'/ygrid'); z=hdf5read(file,'/zgrid'); % nx=size(x); ny=size(y); nz=size(z); % rhs=hdf5read(file,'/RHS'); coefs=hdf5read(file,'/SOL'); bcoef=hdf5read(file,'/BCOEF'); sol=hdf5read(file,'/sol'); solexact=hdf5read(file,'/solana'); solx=hdf5read(file,'/derivx'); soly=hdf5read(file,'/derivy'); solz=hdf5read(file,'/derivz'); solx_exact=hdf5read(file,'/derivx_exact'); soly_exact=hdf5read(file,'/derivy_exact'); solz_exact=hdf5read(file,'/derivz_exact'); figure k=ceil(nz(1)/2); subplot(211); pcolor(x,y,sol(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated solution') colorbar subplot(212); pcolor(x,y,solexact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical solution') colorbar figure err=sol-solexact; pcolor(x,y,err(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Discretization error') colorbar figure subplot(211); pcolor(x,y,solx(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated d/dx') colorbar subplot(212); pcolor(x,y,solx_exact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical d/dx') colorbar figure k=ceil(nz(1)/2); subplot(211); pcolor(x,y,soly(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated d/dy') colorbar subplot(212); pcolor(x,y,soly_exact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical d/dy') colorbar figure k=ceil(nz(1)/6); subplot(211); pcolor(x,y,solz(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated d/dz') colorbar subplot(212); pcolor(x,y,solz_exact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical d/dz') colorbar figure i=ceil(nx(1)/2); plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r') xlabel('z'); diff --git a/matlab/poisson_fe.m b/matlab/poisson_fe.m index 92012f6..a12b08b 100644 --- a/matlab/poisson_fe.m +++ b/matlab/poisson_fe.m @@ -1,123 +1,123 @@ % % @file poisson_fe.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='poisson_fe.h5'; % nx=h5readatt(file,'/','NX'); ny=h5readatt(file,'/','NY'); kx=h5readatt(file,'/','KX'); ky=h5readatt(file,'/','KY'); nidbas1=h5readatt(file,'/','NIDBAS1'); nidbas2=h5readatt(file,'/','NIDBAS2'); relax=h5readatt(file,'/','RELAX'); nlevels=h5readatt(file,'/','LEVELS'); nu1=h5readatt(file,'/','NU1'); nu2=h5readatt(file,'/','NU2'); mu=h5readatt(file,'/','MU'); title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ... 'nu2=%d, mu=%d, LEVELS=%d, KX=%d, KY=%d'], ... nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,kx,ky); % % Prolongation matrices at the coarsest grid % levels=nlevels; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/matpx'); matpx=h5read(file,dset); dset=strcat(mglevel,'/matpy'); matpy=h5read(file,dset); % % FE matrix at the finest grid % levels=1; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/mata'); [mata,diag]=csr_mat(file,dset); f=h5read(file,strcat(mglevel,'/f')); v=h5read(file,strcat(mglevel,'/v')); f1d=h5read(file,strcat(mglevel,'/f1d')); v1d=h5read(file,strcat(mglevel,'/v1d')); % $$$ figure % $$$ spy(mata) % % Solutions at the finest grid % x=h5read(file,'/solutions/xg'); y=h5read(file,'/solutions/yg'); dense=h5read(file,'/solutions/dense'); sol_anal=h5read(file,'/solutions/anal'); sol_calc=h5read(file,'/solutions/calc'); sol_direct=h5read(file,'/solutions/direct'); nx=int32(size(x,1)); ny=int32(size(y,1)); figure surf(x,y,sol_direct'-sol_anal') xlabel('X'); ylabel('Y'); title('Error on the finest grid') figure subplot(211) [yy,iy] = max(abs(sol_anal),[],2); [xx,ix] = max(yy); iy0=iy(ix); str=sprintf('Solution at y = %.4f', y(iy0)); plot(x, sol_anal(:,iy0),x, sol_direct(:,iy0),'o') xlabel('x'); ylabel(str); grid on legend('Analytic Solution', 'Direct Solution') title(title_str) subplot(212) [xx,ix] = max(abs(sol_anal)); [yy,iy] = max(xx); ix0=ix(iy); str=sprintf('Solution at x = %.4f', x(ix0)); plot(y, sol_anal(ix0,:),y, sol_direct(ix0,:),'o') xlabel('y'); ylabel(str); grid on title(title_str) % % Iterations % dset='/Iterations/'; disc_err=h5read(file, strcat(dset,'disc_errors')); resid=h5read(file, strcat(dset,'residues')); its=0:1:size(resid,1)-1; figure subplot(211) semilogy(its,resid,'o-') grid on xlabel('Iterations'); ylabel('Norm of residue'); title(title_str); subplot(212) semilogy(its,disc_err,'h-') grid on xlabel('Iterations'); ylabel('Norm of error'); diff --git a/matlab/poisson_mg.m b/matlab/poisson_mg.m index e407dd6..10f23a3 100644 --- a/matlab/poisson_mg.m +++ b/matlab/poisson_mg.m @@ -1,140 +1,140 @@ % % @file poisson_mg.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % if ~exist('file'), file='poisson_mg.h5'; end % nx=h5readatt(file,'/','NX'); ny=h5readatt(file,'/','NY'); lx=h5readatt(file,'/','LX'); ly=h5readatt(file,'/','LY'); kx=h5readatt(file,'/','KX'); ky=h5readatt(file,'/','KY'); beta=h5readatt(file,'/','BETA'); omega=h5readatt(file,'/','OMEGA'); relax=h5readatt(file,'/','RELAX'); mat_type=h5readatt(file,'/','MAT_TYPE'); nlevels=h5readatt(file,'/','LEVELS'); mu=h5readatt(file,'/','MU'); nnu=h5readatt(file,'/','NNU'); nu1=h5read(file,'/nu1'); nu2=h5read(file,'/nu2'); title_str=sprintf(['N=(%d,%d), Lx=%d, Ly=%d, beta=%.4f, relax=%s, V(%d,%d), ' ... 'LEVELS=%d, KX=%d, KY=%d'], ... nx,ny,lx,ly,beta,relax,nu1(nnu),nu2(nnu),nlevels,kx,ky); % % Prolongation matrices at the coarsest grid % levels=nlevels; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/matpx'); matpx=csr_mat(file,dset); dset=strcat(mglevel,'/matpy'); matpy=csr_mat(file,dset); % % FE matrix at the finest grid % levels=1; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/mata'); if mat_type == 'csr' [mata,diag]=csr_mat(file,dset); else [mata,diag]=cds_mat(file,dset); end n=size(diag,1); % $$$ figure % $$$ spy(mata) % % Spectral radius of GS Iteration Matrix % Rg = -(D+L)^(-1) * U % % $$$ matl= tril(mata,0); % D+L % $$$ lambda = eigs(-triu(mata,1),matl) % $$$ fprintf(1, 'Spectral Radius of GS relaxation matrix = %g\n', max(abs(lambda))) % $$$ figure % $$$ plot(lambda, 'o') % $$$ axis equal % $$$ grid on % $$$ xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues') % $$$ title(title_str) % % Solutions at the finest grid % dense=h5read(file,'/dense'); x=h5read(file,'/solutions/xg'); y=h5read(file,'/solutions/yg'); sol_anal=h5read(file,'/solutions/anal'); %sol_direct=h5read(file,'/solutions/direct'); sol_calc=h5read(file,'/solutions/calc'); figure % $$$ surf(x,y,sol_calc'-sol_anal') pcolor(x,y,sol_calc'-sol_anal') shading interp colorbar xlabel('X'); ylabel('Y'); zlabel('Error'); title(title_str) figure subplot(211) [yy,iy] = max(abs(sol_anal),[],2); [xx,ix] = max(yy); iy0=iy(ix); str=sprintf('Solution at y = %.4f', y(iy0)); plot(x, sol_anal(:,iy0),x, sol_calc(:,iy0),'o') xlabel('x'); ylabel(str); grid on legend('Analytic Solution', 'MG Solution') title(title_str) subplot(212) [xx,ix] = max(abs(sol_anal)); [yy,iy] = max(xx); ix0=ix(iy); str=sprintf('Solution at x = %.4f', x(ix0)); plot(y, sol_anal(ix0,:),y, sol_calc(ix0,:),'o') xlabel('y'); ylabel(str); grid on title(title_str) % % Iterations % dset='/Iterations/'; disc_err=h5read(file, strcat(dset,'disc_errors')); resid=h5read(file, strcat(dset,'residues')); its=0:1:size(resid,1)-1; figure subplot(211) semilogy(its,resid,'o-') grid on xlabel('Iterations'); ylabel('Norm of residue'); title(title_str); subplot(212) semilogy(its,disc_err,'h-') grid on xlabel('Iterations'); ylabel('Norm of error'); diff --git a/matlab/ppde3d.m b/matlab/ppde3d.m index ed333f2..de73cc6 100644 --- a/matlab/ppde3d.m +++ b/matlab/ppde3d.m @@ -1,145 +1,145 @@ % % @file ppde3d.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='ppde3d.h5'; % % Get data from data sets % x=hdf5read(file,'/xgrid'); y=hdf5read(file,'/ygrid'); z=hdf5read(file,'/zgrid'); % nx=size(x); ny=size(y); nz=size(z); % sol=hdf5read(file,'/sol'); solexact=hdf5read(file,'/solana'); solx=hdf5read(file,'/derivx'); soly=hdf5read(file,'/derivy'); solz=hdf5read(file,'/derivz'); solx_exact=hdf5read(file,'/derivx_exact'); soly_exact=hdf5read(file,'/derivy_exact'); solz_exact=hdf5read(file,'/derivz_exact'); figure k=ceil(nz(1)/2); subplot(311); pcolor(x,y,sol(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated solution') colorbar subplot(312); pcolor(x,y,solexact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical solution') colorbar subplot(313); err=sol-solexact; pcolor(x,y,err(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Discretization error') colorbar figure subplot(311); pcolor(x,y,solx(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated d/dx') colorbar subplot(312); pcolor(x,y,solx_exact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical d/dx') colorbar subplot(313); err=solx-solx_exact; pcolor(x,y,err(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Discretization error') colorbar figure k=ceil(nz(1)/2); subplot(311); pcolor(x,y,soly(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated d/dy') colorbar subplot(312); pcolor(x,y,soly_exact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical d/dy') colorbar subplot(313); err=soly-soly_exact; pcolor(x,y,err(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Discretization error') colorbar figure k=ceil(nz(1)/6); subplot(311); pcolor(x,y,solz(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated d/dz') colorbar subplot(312); pcolor(x,y,solz_exact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical d/dz') colorbar subplot(313); err=solz-solz_exact; pcolor(x,y,err(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Discretization error') colorbar figure i=ceil(nx(1)/2); plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r') xlabel('z'); diff --git a/matlab/ppde3d_pb.m b/matlab/ppde3d_pb.m index 0702db8..8541704 100644 --- a/matlab/ppde3d_pb.m +++ b/matlab/ppde3d_pb.m @@ -1,145 +1,145 @@ % % @file ppde3d_pb.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='ppde3d_pb.h5'; % % Get data from data sets % x=hdf5read(file,'/xgrid'); y=hdf5read(file,'/ygrid'); z=hdf5read(file,'/zgrid'); % nx=size(x); ny=size(y); nz=size(z); % sol=hdf5read(file,'/sol'); solexact=hdf5read(file,'/solana'); solx=hdf5read(file,'/derivx'); soly=hdf5read(file,'/derivy'); solz=hdf5read(file,'/derivz'); solx_exact=hdf5read(file,'/derivx_exact'); soly_exact=hdf5read(file,'/derivy_exact'); solz_exact=hdf5read(file,'/derivz_exact'); figure k=ceil(nz(1)/2); subplot(311); pcolor(x,y,sol(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated solution') colorbar subplot(312); pcolor(x,y,solexact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical solution') colorbar subplot(313); err=sol-solexact; pcolor(x,y,err(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Discretization error') colorbar figure subplot(311); pcolor(x,y,solx(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated d/dx') colorbar subplot(312); pcolor(x,y,solx_exact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical d/dx') colorbar subplot(313); err=solx-solx_exact; pcolor(x,y,err(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Discretization error') colorbar figure k=ceil(nz(1)/2); subplot(311); pcolor(x,y,soly(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated d/dy') colorbar subplot(312); pcolor(x,y,soly_exact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical d/dy') colorbar subplot(313); err=soly-soly_exact; pcolor(x,y,err(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Discretization error') colorbar figure k=ceil(nz(1)/6); subplot(311); pcolor(x,y,solz(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Caculated d/dz') colorbar subplot(312); pcolor(x,y,solz_exact(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Analytical d/dz') colorbar subplot(313); err=solz-solz_exact; pcolor(x,y,err(:,:,k)'); shading interp xlabel('x'); ylabel('y') title('Discretization error') colorbar figure i=ceil(nx(1)/2); plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r') xlabel('z'); diff --git a/matlab/ppoisson_fd.m b/matlab/ppoisson_fd.m index a573749..e098384 100644 --- a/matlab/ppoisson_fd.m +++ b/matlab/ppoisson_fd.m @@ -1,110 +1,110 @@ % % @file ppoisson_fd.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % if ~exist('file'), file='ppoisson_fd.h5'; end prb=h5readatt(file,'/','PRB'); nx=h5readatt(file,'/','NX'); ny=h5readatt(file,'/','NY'); lx=h5readatt(file,'/','LX'); ly=h5readatt(file,'/','LY'); kx=h5readatt(file,'/','KX'); ky=h5readatt(file,'/','KY'); beta=h5readatt(file,'/','BETA'); omega=h5readatt(file,'/','OMEGA'); relax=h5readatt(file,'/','RELAX'); nlevels=h5readatt(file,'/','LEVELS'); mu=h5readatt(file,'/','MU'); nu1=h5readatt(file,'/','NU1'); nu2=h5readatt(file,'/','NU2'); direct_solve_nits=h5readatt(file,'/','DIRECT_SOLVE_NITS'); title_str=sprintf(['PRB=%s, N=(%d,%d), relax=%s, V(%d,%d), LEVELS=%d, DIRECT SOLVE=%d'], ... prb, nx, ny, relax, nu1, nu2, nlevels, direct_solve_nits); x = h5read(file, '/xgrid'); y = h5read(file, '/ygrid'); [X,Y]=meshgrid(y,x); n1=size(x,1); n2=size(y,1); n=n1*n2 mat = stencil_mat(file, '/MAT'); % $$$ figure % $$$ spy(mat) f = h5read(file,'/f'); f1d=reshape(f,n,1); v = h5read(file,'/v'); v1d=reshape(v,n,1); u = h5read(file,'/u'); u1d=reshape(u,n,1); % $$$ udirect1d = mat\f1d; % $$$ udirect=reshape(udirect1d,n1,n2); % $$$ fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d)); % $$$ fprintf('Error of direct solution = %.3e\n', norm(udirect1d- ... % $$$ v1d)); figure subplot(221) pcolor(x,y,v'-u') xlabel('X'); ylabel('Y'); shading interp colorbar title('Error on the finest grid') subplot(222) [yy,iy] = max(abs(v),[],2); [xx,ix] = max(yy); iy0=iy(ix); str=sprintf('Solution at y = %.4f', y(iy0)); plot(x, v(:,iy0),x, u(:,iy0),'o') xlabel('x'); ylabel(str); grid on legend('Analytic Solution', 'Computed Solution') title(title_str) subplot(223) [xx,ix] = max(abs(v)); [yy,iy] = max(xx); ix0=ix(iy); str=sprintf('Solution at x = %.4f', x(ix0)); plot(y, v(ix0,:),y, u(ix0,:),'o') xlabel('y'); ylabel(str); grid on figure resid_it=h5read(file, '/resid'); err_it=h5read(file, '/error'); nits = size(resid_it,1)-1; it=0:nits; subplot(211) semilogy(it,resid_it,'o-') xlabel('Iterations') ylabel('Residual norm') grid on title(title_str) subplot(212) semilogy(it, err_it, 'o-') xlabel('Iterations') ylabel('Norm of Discretization Error') grid on diff --git a/matlab/relax.m b/matlab/relax.m index 37d3fca..bfbaa29 100644 --- a/matlab/relax.m +++ b/matlab/relax.m @@ -1,55 +1,55 @@ % % @file relax.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % load relax.mat figure subplot(221) semilogy(jac_1(:,1),jac_1(:,2),gs_1(:,1),gs_1(:,2)) grid on legend('Jacobi', 'GS') xlabel('Iterations'); ylabel('Error') title('NX=32, P=1') subplot(222) semilogy(jac_3(:,1),jac_3(:,2),gs_3(:,1),gs_3(:,2)) grid on legend('Jacobi', 'GS') xlabel('Iterations'); ylabel('Error') title('NX=32, P=3') subplot(223) semilogy(jac_1(:,1),jac_1(:,4),gs_1(:,1),gs_1(:,4)) grid on legend('Jacobi', 'GS') xlabel('Iterations'); ylabel('Discretization error') title('NX=32, P=1') subplot(224) semilogy(jac_3(:,1),jac_3(:,4),gs_3(:,1),gs_3(:,4)) grid on legend('Jacobi', 'GS') xlabel('Iterations'); ylabel('Discretization error') title('NX=32, P=3') diff --git a/matlab/stencil_mat.m b/matlab/stencil_mat.m index 4283768..6c7030c 100644 --- a/matlab/stencil_mat.m +++ b/matlab/stencil_mat.m @@ -1,49 +1,49 @@ % % @file stencil_mat.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % function [mata, diag] = stencil_mat(file, dset) id = double(h5read(file, strcat(dset,'/dists'))); val = h5read(file, strcat(dset,'/val')); n1 = size(val,1); n2 = size(val,2); n = n1*n2; ndiag = size(val,3); dists = id(:,1) + n1*id(:,2); val = reshape(val,n,ndiag); %% Shift the off-diagonals %% for k=1:length(dists) d=dists(k); if d < 0 val(1:n+d,k) = val(1-d:n,k); elseif d > 0 val(n:-1:d+1,k) = val(n-d:-1:1,k); end end mata = spdiags(val, dists, n,n); if nargout == 2 idiag = find(dists==0); diag = val(:,idiag); end diff --git a/matlab/tcdsmat.m b/matlab/tcdsmat.m index 5331df7..e5a974a 100644 --- a/matlab/tcdsmat.m +++ b/matlab/tcdsmat.m @@ -1,91 +1,91 @@ % % @file tcdsmat.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % mat='/MAT1'; gbmat; clear S gb_mat; file='tcdsmat.h5'; nx=hdf5read(file,'/NX'); ny=hdf5read(file,'/NY'); dists=hdf5read(file,strcat(mat,'/dists')); vals=hdf5read(file,strcat(mat,'/vals')); rowv=hdf5read(file,strcat(mat,'/rowv')); colh=hdf5read(file,strcat(mat,'/colh')); n=hdf5read(file,strcat(mat,'/RANK')); nd=hdf5read(file,strcat(mat,'/NDIAGS')); err=zeros(n,nd); % Diagonal balancing of matrix dbal = 1./sqrt(diag(a)); a = diag(dbal)*a*diag(dbal); % Check CDS mat except row ny and column ny for k=1:nd d=dists(k); i1=max(1,1-d); i2=min(n,n-d); fprintf(1,'%8d %8d %8d\n',d,i1,i2); for i=i1:i2 j=i+d; if (i~=ny && j~=ny) err(i,k) = a(i,j)-vals(i,k); end end end fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err))); % Check row ny and j .ne. ny i=ny; bw0=size(rowv,1); for k=1:nd d=dists(k); j=i+d; if ((j >= ny+1) && (j <= bw0)) err(i,k)=a(i,j)-rowv(j); end end fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err))); % Check column ny j=ny; for k=1:nd d=dists(k); i=j-d; if ((i >= ny+1) && (i <= bw0)) err(i,k)=a(i,j)-colh(i); end end fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err))); % Check RHS rhs=hdf5read(file,'/RHS'); fprintf('Err in RHS: %8.3e\n', max(max(abs(rhs-rhs0)))) % Check SOL sol=hdf5read(file,'/SOL'); err= sol-sol0; fprintf('Err SOL: %8.3e\n', max(max(abs(err)))); diff --git a/matlab/tcdsmat_plot_sol.m b/matlab/tcdsmat_plot_sol.m index 0754929..68234c5 100644 --- a/matlab/tcdsmat_plot_sol.m +++ b/matlab/tcdsmat_plot_sol.m @@ -1,79 +1,79 @@ % % @file tcdsmat_plot_sol.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='tcdsmat.h5' % % Get data from data sets % r=hdf5read(file,'/xgrid'); t=hdf5read(file,'/ygrid'); sol=hdf5read(file,'/sol')'; solexact=hdf5read(file,'/solana')'; err=hdf5read(file,'/errors')'; % % Attributes % NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); NIDBAS1=hdf5read(file,'/NIDBAS1'); NIDBAS2=hdf5read(file,'/NIDBAS2'); MBESS=hdf5read(file,'/MBESS'); LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ... NIDBAS1, NIDBAS2, MBESS); attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data; attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data; attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data; attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data; attr=hdf5read(file,'/errors/title');err_ann=attr.Data; [R,T]=meshgrid(r,t); x = R.*cos(T); y= R.*sin(T); figure subplot(221) pcolor(double(r),double(t),double(sol)); shading interp xlabel(x_ann); ylabel(y_ann) title(LABEL) colorbar subplot(222) pcolor(double(x),double(y),double(sol)) shading interp hold off, axis image xlabel('X'); ylabel('Y') title('X-Y plane') colorbar subplot(223) surfc(double(x),double(y),double(sol)) xlabel('X'); ylabel('Y'); title(sol_ann) subplot(224) surfc(double(x),double(y),double(err)) xlabel('X'); ylabel('Y'); title(err_ann) diff --git a/matlab/test_intergrid.m b/matlab/test_intergrid.m index 4d89561..5b22016 100644 --- a/matlab/test_intergrid.m +++ b/matlab/test_intergrid.m @@ -1,102 +1,102 @@ % % @file test_intergrid.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % if ~exist('file'), file='test_intergrid0.h5'; end % nx=h5readatt(file,'/','NX'); ny=h5readatt(file,'/','NY'); lx=h5readatt(file,'/','LX'); ly=h5readatt(file,'/','LY'); kx=h5readatt(file,'/','KX'); ky=h5readatt(file,'/','KY'); nlevels=h5readatt(file,'/','LEVELS'); title_str=sprintf(['N=(%d,%d), Lx=%d, Ly=%d, LEVELS=%d, KX=%d, KY=%d'], ... nx,ny,lx,ly,nlevels,kx,ky); if nlevels ~= 2 disp 'levels should be 2!' return end % % Prolongation matrices at the coarsest grid % levels=nlevels; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/matpx'); matpx=csr_mat(file,dset); dset=strcat(mglevel,'/matpy'); matpy=csr_mat(file,dset); for l=1:2 mglevel=sprintf('/mglevels/level.%.2d', l); x=h5read(file,strcat(mglevel,'/x')); y=h5read(file,strcat(mglevel,'/y')); f=h5read(file,strcat(mglevel,'/f')); v=h5read(file,strcat(mglevel,'/v')); figure subplot(221) [yy,iy] = max(abs(f),[],2); [xx,ix] = max(yy); iy0=iy(ix); str=sprintf('f at y = %.4f', y(iy0)); plot(x, f(:,iy0),'o-') xlabel('x'); ylabel(str); grid on title(title_str) subplot(222) [xx,ix] = max(abs(f)); [yy,iy] = max(xx); ix0=ix(iy); str=sprintf('f at x = %.4f', x(ix0)); plot(y, f(ix0,:),'o-') xlabel('y'); ylabel(str); grid on title(title_str) subplot(223) [yy,iy] = max(abs(v),[],2); [xx,ix] = max(yy); iy0=iy(ix); str=sprintf('v at y = %.4f', y(iy0)); plot(x, v(:,iy0),'ro-') xlabel('x'); ylabel(str); grid on title(title_str) subplot(224) [xx,ix] = max(abs(v)); [yy,iy] = max(xx); ix0=ix(iy); str=sprintf('v at x = %.4f', x(ix0)); plot(y, v(ix0,:),'ro-') xlabel('y'); ylabel(str); grid on title(title_str) if l==1 ffine=f; vfine=v; else fcoarse=f; vcoarse=v; end end %% Check err_restriction = matpx'*ffine*matpy./4 - fcoarse; err_prolong = matpx*vcoarse*matpy' - vfine; max(max(abs(err_restriction))) max(max(abs(err_prolong))) diff --git a/matlab/test_jacobi.m b/matlab/test_jacobi.m index 2a71117..875f559 100644 --- a/matlab/test_jacobi.m +++ b/matlab/test_jacobi.m @@ -1,96 +1,96 @@ % % @file test_jacobi.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % if ~exist('file'), file='test_jacobi.h5'; end x = h5read(file, '/xgrid'); y = h5read(file, '/ygrid'); [X,Y]=meshgrid(y,x); n1=size(x,1); n2=size(y,1); n=n1*n2 mat = stencil_mat(file, '/MAT'); % $$$ figure % $$$ spy(mat) f = h5read(file,'/f'); f1d=reshape(f,n,1); v = h5read(file,'/v'); v1d=reshape(v,n,1); u = h5read(file,'/u'); u1d=reshape(u,n,1); resids = h5read(file,'/resids'); errs = h5read(file,'/errs'); udirect1d = mat\f1d; udirect=reshape(udirect1d,n1,n2); fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d)); fprintf('Error of direct solution = %.3e\n', norm(udirect1d-v1d)); figure subplot(211) [yy,iy] = max(abs(v),[],2); [xx,ix] = max(yy); iy0=iy(ix); str=sprintf('Solution at y = %.4f', y(iy0)); plot(x, v(:,iy0),x, u(:,iy0),'o') xlabel('x'); ylabel(str); grid on legend('Analytic Solution', 'Computed Solution') subplot(212) [xx,ix] = max(abs(v)); [yy,iy] = max(xx); ix0=ix(iy); str=sprintf('Solution at x = %.4f', x(ix0)); plot(y, v(ix0,:),y, u(ix0,:),'o') xlabel('y'); ylabel(str); grid on % $$$ figure % $$$ subplot(321) % $$$ surf(x,y,v'); title('Exact solution') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(322) % $$$ surf(x,y,f'); title('RHS') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(323) % $$$ surf(x,y,u'); title('Num. solution') % $$$ subplot(324) % $$$ surf(x,y,udirect'); title('Direct. solution') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(325) % $$$ surf(x,y,resids'); title('Residuals') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(326) % $$$ surf(x,y,errs'); title('Errors') % $$$ xlabel('X'); ylabel('Y') resid_it=h5read(file, '/resid'); err_it=h5read(file, '/error'); nits = size(resid_it,1)-1; it=0:nits; figure semilogy(it,resid_it, it, err_it) legend('Residual norm', 'Discretization error') xlabel('Iterations') grid on diff --git a/matlab/test_jacobig.m b/matlab/test_jacobig.m index a0e31d2..39dee82 100644 --- a/matlab/test_jacobig.m +++ b/matlab/test_jacobig.m @@ -1,84 +1,84 @@ % % @file test_jacobig.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % if ~exist('file'), file='test_jacobig.h5'; end x = h5read(file, '/xgrid'); y = h5read(file, '/ygrid'); [X,Y]=meshgrid(y,x); n1=size(x,1); n2=size(y,1); n=n1*n2 mat = stencil_mat(file, '/MAT'); % $$$ figure % $$$ spy(mat) f = h5read(file,'/f'); f1d=reshape(f,n,1); v = h5read(file,'/v'); v1d=reshape(v,n,1); u = h5read(file,'/u'); u1d=reshape(u,n,1); udirect1d = mat\f1d; udirect=reshape(udirect1d,n1,n2); fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d)); fprintf('Error of direct solution = %.3e\n', norm(udirect1d- ... v1d)); figure subplot(221) pcolor(x,y,v'-u') xlabel('X'); ylabel('Y'); shading interp colorbar title('Error') subplot(223) [yy,iy] = max(abs(v),[],2); [xx,ix] = max(yy); iy0=iy(ix); str=sprintf('Solution at y = %.4f', y(iy0)); plot(x, v(:,iy0),x, u(:,iy0),'o') xlabel('x'); ylabel(str); grid on legend('Analytic Solution', 'Computed Solution') subplot(224) [xx,ix] = max(abs(v)); [yy,iy] = max(xx); ix0=ix(iy); str=sprintf('Solution at x = %.4f', x(ix0)); plot(y, v(ix0,:),y, u(ix0,:),'o') xlabel('y'); ylabel(str); grid on resid_it=h5read(file, '/resid'); err_it=h5read(file, '/error'); nits = size(resid_it,1)-1; it=0:nits; subplot(222) semilogy(it,resid_it, it, err_it) legend('Residual norm', 'Discretization error') xlabel('Iterations') grid on diff --git a/matlab/test_mg.m b/matlab/test_mg.m index d0ed10b..5f0b5fd 100644 --- a/matlab/test_mg.m +++ b/matlab/test_mg.m @@ -1,164 +1,164 @@ % % @file test_mg.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='test_mg.h5'; % nx=h5readatt(file,'/','NX'); nidbas=h5readatt(file,'/','NIDBAS'); relax=h5readatt(file,'/','RELAX'); levels=h5readatt(file,'/','LEVELS'); nu1=h5readatt(file,'/','NU1'); nu2=h5readatt(file,'/','NU2'); alpha=h5readatt(file,'/','ALPHA'); omega=h5readatt(file,'/','OMEGA'); if alpha == 0 kx=h5readatt(file,'/','KMODE'); title_str=sprintf('N=%d, NIDBAS=%d, KX=%d, relax=%s, omega=%.3f, levels = %d, nu1 = %d, nu2 = %d', ... nx,nidbas,kx,relax, omega, levels, nu1, nu2); else modem=h5readatt(file,'/','MODEM'); modep=h5readatt(file,'/','MODEP'); title_str=sprintf('N=%d, NIDBAS=%d, modem=%d, modep=%d, relax=%s, omega=%.3f, levels = %d, nu1 = %d, nu2 = %d', ... nx,nidbas,modem,modep,relax, omega, levels, nu1, nu2); end % % Read matrices at coarset grid % for lev=2:levels % % FE mat at fine grid mglevel=sprintf('/mglevels/level.%.2d', lev-1); dset=strcat(mglevel,'/mata'); mata_f = gb_mat(file, dset); % % FE mat at coarse grid mglevel=sprintf('/mglevels/level.%.2d', lev); dset=strcat(mglevel,'/mata'); mata_c = gb_mat(file, dset); % % Prolong mat dset=strcat(mglevel,'/matp'); matp=h5read(file,dset); % % Check fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ... - mata_c)) end % % Iterations dset='/Iterations/'; err=h5read(file, strcat(dset,'errors')); disc_err=h5read(file, strcat(dset,'disc_errors')); resid=h5read(file, strcat(dset,'residues')); its=0:1:size(err,1)-1; figure subplot(212) semilogy(its,resid,'o-', its, disc_err,'h-') legend('Residue', 'Error') grid on xlabel('Iterations'); ylabel('Norm od residue and error'); title(title_str); % % Plot grid values at the last iteration xgrid=h5read(file, '/Iterations/xgrid'); u_calc=h5read(file, '/Iterations/u_calc'); u_exact=h5read(file, '/Iterations/u_exact'); u_direct=h5read(file, '/Iterations/u_direct'); subplot(211) plot(xgrid, u_exact, xgrid,u_calc,'o') legend('Analytic', 'Calculated') xlabel('X');ylabel('Grid values of solution') grid on title(title_str); % % % $$$ mglevel=sprintf('/mglevels/level.%.2d', 1); % $$$ dset=strcat(mglevel,'/mata'); % $$$ A = gb_mat(file, dset); % $$$ D = diag(diag(A),0); % $$$ n=rank(A); % $$$ k=1:1:n; % $$$ if relax(1:2) == 'ja' % $$$ % % $$$ % Compute eigenvalues of Rj = D^(-1)*A % $$$ % % $$$ [V, l] = eig(A,D); % $$$ [lambda, iss] = sort(diag(l)); % $$$ V = V(1:end,iss); % $$$ % % $$$ % Spectral radius of Jacobi iteration matrix % $$$ % R(omega) = max |1-omega*lambda| % $$$ % % $$$ om=0:0.01:1; % $$$ for i=1:size(om,2) % $$$ rho(i) = max(abs(1-om(i).*lambda)); % $$$ end % $$$ fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(1-omega*lambda))) % $$$ % $$$ figure % $$$ subplot(211) % $$$ plot(k, 1-omega*lambda, 'o-') % $$$ xlabel('mode k'); ylabel('Eigen value of inv(D)*A') % $$$ grid on % $$$ title(title_str) % $$$ subplot(212) % $$$ plot(om, rho) % $$$ xlabel('\omega'); ylabel('Spectral Radius') % $$$ grid on % $$$ % $$$ for i=1:n % $$$ k=mod(i-1,4*5)+1; % $$$ if k==1 % $$$ figure % $$$ title(title_str) % $$$ end % $$$ subplot(4,5,k) % $$$ str = sprintf('Mode = %d, ||R|| = %.3f', i, 1-omega*lambda(i)); % $$$ plot(V(:,i)); grid on % $$$ title(str) % $$$ end % $$$ elseif relax(1:2) == 'gs' % $$$ % % $$$ % Spectral radius of GS Iteration Matrix % $$$ % Rg = (D-L)^(-1) * U % $$$ % % $$$ B = tril(A,0); % D-L % $$$ lambda = eig(-triu(A,1),B); % $$$ fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda))) % $$$ figure % $$$ plot(lambda, 'o', 'MarkerSize', 6) % $$$ axis equal % $$$ grid on % $$$ xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues') % $$$ title(title_str) % $$$ end diff --git a/matlab/test_mg2d.m b/matlab/test_mg2d.m index cac6b9d..279ad29 100644 --- a/matlab/test_mg2d.m +++ b/matlab/test_mg2d.m @@ -1,102 +1,102 @@ % % @file test_mg2d.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='test_mg2d.h5'; % nx=h5readatt(file,'/','NX'); ny=h5readatt(file,'/','NY'); kx=h5readatt(file,'/','KX'); ky=h5readatt(file,'/','KY'); nidbas1=h5readatt(file,'/','NIDBAS1'); nidbas2=h5readatt(file,'/','NIDBAS2'); relax=h5readatt(file,'/','RELAX'); nlevels=h5readatt(file,'/','LEVELS'); nu1=h5readatt(file,'/','NU1'); nu2=h5readatt(file,'/','NU2'); mu=h5readatt(file,'/','MU'); title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ... 'nu2=%d, mu=%d, LEVELS=%d, KX=%d, KY=%d'], ... nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,kx,ky); % % Prolongation matrices at the coarsest grid % levels=nlevels; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/matpx'); matpx=csr_mat(file,dset); dset=strcat(mglevel,'/matpy'); matpy=csr_mat(file,dset); % % FE matrix at the finest grid % levels=1; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/mata'); [mata,diag]=csr_mat(file,dset); f=h5read(file,strcat(mglevel,'/f')); v=h5read(file,strcat(mglevel,'/v')); f1d=h5read(file,strcat(mglevel,'/f1d')); v1d=h5read(file,strcat(mglevel,'/v1d')); % $$$ figure % $$$ spy(mata) % % Solutions at the finest grid % x=h5read(file,'/solutions/xg'); y=h5read(file,'/solutions/yg'); sol_anal=h5read(file,'/solutions/anal'); sol_calc=h5read(file,'/solutions/calc'); % $$$ figure % $$$ subplot(211) % $$$ surf(x,y,sol_anal') % $$$ xlabel('X'); ylabel('Y'); % $$$ title('Analytical solution on the finest grid') % $$$ subplot(212) % $$$ surf(x,y,sol_calc') % $$$ xlabel('X'); ylabel('Y'); % $$$ title('Calculated solution on the finest grid') % % Iterations % dset='/Iterations/'; disc_err=h5read(file, strcat(dset,'disc_errors')); resid=h5read(file, strcat(dset,'residues')); its=0:1:size(resid,1)-1; figure subplot(211) semilogy(its,resid,'o-') grid on xlabel('Iterations'); ylabel('Norm of residue'); title(title_str); subplot(212) semilogy(its,disc_err,'h-') grid on xlabel('Iterations'); ylabel('Norm of error'); diff --git a/matlab/test_mg2d_cyl.m b/matlab/test_mg2d_cyl.m index 3258da6..86f6ec5 100644 --- a/matlab/test_mg2d_cyl.m +++ b/matlab/test_mg2d_cyl.m @@ -1,141 +1,141 @@ % % @file test_mg2d_cyl.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='test_mg2d_cyl.h5'; % nx=h5readatt(file,'/','NX'); ny=h5readatt(file,'/','NY'); nidbas1=h5readatt(file,'/','NIDBAS1'); nidbas2=h5readatt(file,'/','NIDBAS2'); modem=h5readatt(file,'/','MODEM'); modep=h5readatt(file,'/','MODEP'); relax=h5readatt(file,'/','RELAX'); nlevels=h5readatt(file,'/','LEVELS'); nu1=h5readatt(file,'/','NU1'); nu2=h5readatt(file,'/','NU2'); mu=h5readatt(file,'/','MU'); title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ... 'nu2=%d, mu=%d, LEVELS=%d, MODEM=%d, MODEP=%d'], ... nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,modem,modep); % % Prolongation matrices at the coarsest grid % levels=nlevels; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/matpx'); matpx=csr_mat(file,dset); dset=strcat(mglevel,'/matpy'); matpy=csr_mat(file,dset); % % FE matrix at the finest grid % levels=1; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/mata'); [mata,diag]=csr_mat(file,dset); f=h5read(file,strcat(mglevel,'/f')); v=h5read(file,strcat(mglevel,'/v')); f1d=h5read(file,strcat(mglevel,'/f1d')); v1d=h5read(file,strcat(mglevel,'/v1d')); % $$$ figure % $$$ spy(mata) % % Solutions at the finest grid % x=h5read(file,'/solutions/xg'); y=h5read(file,'/solutions/yg'); sol_anal=h5read(file,'/solutions/anal'); sol_calc=h5read(file,'/solutions/calc'); sol_direct=h5read(file,'/solutions/direct'); nx=int32(size(x,1)); ny=int32(size(y,1)); figure subplot(211) surf(x,y,sol_anal') xlabel('X'); ylabel('Y'); title('Analytical solution on the finest grid') subplot(212) surf(x,y,sol_calc') % $$$ surf(x,y, (abs(sol_calc'-sol_anal'))) xlabel('X'); ylabel('Y'); title('Calculated solution on the finest grid') figure subplot(211) plot(x, sol_anal(:,ny/2),x, sol_calc(:,ny/2),'o') xlabel('r'); grid on legend('Analytic Solution', 'MG Solution') title(title_str) subplot(212) if modem == 0 plot(y, sol_anal(1,:),y, sol_calc(1,:),'o') else plot(y, sol_anal(nx/2,:),y, sol_calc(nx/2,:),'o') end xlabel('\theta'); grid on title(title_str) % $$$ figure % $$$ subplot(211) % $$$ semilogy(x, abs(sol_anal(:,ny/2)-sol_calc(:,ny/2)),'o') % $$$ xlabel('r'); ylabel('Error') % $$$ grid on % $$$ title(title_str) % $$$ subplot(212) % $$$ if modem == 0 % $$$ semilogy(y, abs(sol_anal(1,:)-sol_calc(1,:)),'o') % $$$ else % $$$ semilogy(y, abs(sol_anal(nx/2,:)-sol_calc(nx/2,:)),'o') % $$$ end % $$$ xlabel('\theta'); ylabel('Error'); % $$$ grid on % $$$ title(title_str) % $$$ % % Iterations % dset='/Iterations/'; disc_err=h5read(file, strcat(dset,'disc_errors')); resid=h5read(file, strcat(dset,'residues')); its=0:1:size(resid,1)-1; figure subplot(211) semilogy(its,resid,'o-') grid on xlabel('Iterations'); ylabel('Norm of residue'); title(title_str); subplot(212) semilogy(its,disc_err,'h-') grid on xlabel('Iterations'); ylabel('Norm of error'); diff --git a/matlab/test_mgp.m b/matlab/test_mgp.m index 4f5fb1b..69d5214 100644 --- a/matlab/test_mgp.m +++ b/matlab/test_mgp.m @@ -1,98 +1,98 @@ % % @file test_mgp.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='test_mgp.h5'; % nx=h5readatt(file,'/','NX'); nidbas=h5readatt(file,'/','NIDBAS'); relax=h5readatt(file,'/','RELAX'); levels=h5readatt(file,'/','LEVELS'); nu1=h5readatt(file,'/','NU1'); nu2=h5readatt(file,'/','NU2'); title_str=sprintf('NX = %d, NIDBAS = %d, levels = %d, nu1 = %d, nu2 = %d', nx, nidbas, levels, nu1, nu2); % % Read matrices at coarset grid % for lev=2:levels % % FE mat at fine grid mglevel=sprintf('/mglevels/level.%.2d', lev-1); dset=strcat(mglevel,'/mata'); mata_f=h5read(file,dset); n=size(mata_f,1); % % FE mat at coarse grid mglevel=sprintf('/mglevels/level.%.2d', lev); dset=strcat(mglevel,'/mata'); mata_c=h5read(file,dset); n=size(mata_c,1); % % Prolong mat dset=strcat(mglevel,'/matp'); matp=h5read(file,dset); % % Check fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ... - mata_c)) end % % Iterations dset='/Iterations/'; err=h5read(file, strcat(dset,'errors')); disc_err=h5read(file, strcat(dset,'disc_errors')); resid=h5read(file, strcat(dset,'residues')); its=0:1:size(err,1)-1; figure subplot(221) semilogy(its,resid,'o-', its, disc_err,'h-') legend('Residue', 'Error') grid on xlabel('Iterations'); ylabel('Norm od residue and error'); title(title_str); % % Plot grid values at the last iteration xgrid=h5read(file, '/Iterations/xgrid'); u_calc=h5read(file, '/Iterations/u_calc'); u_exact=h5read(file, '/Iterations/u_exact'); u_direct=h5read(file, '/Iterations/u_direct'); subplot(222) plot(xgrid,u_exact, xgrid,u_calc,'o') xlabel('X');ylabel('Grid values of solution') grid on title(title_str); subplot(223) semilogy(xgrid,abs(u_calc-u_direct)) xlabel('X');ylabel('Diff with direct solution') grid on title(title_str); subplot(224) semilogy(xgrid,abs(u_calc-u_exact)) xlabel('X');ylabel('Diff with exact solution') grid on title(title_str); diff --git a/matlab/test_relax.m b/matlab/test_relax.m index 938b605..8d8fbbb 100644 --- a/matlab/test_relax.m +++ b/matlab/test_relax.m @@ -1,157 +1,157 @@ % % @file test_relax.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='test_relax.h5'; % nx=h5readatt(file,'/','NX'); alpha=h5readatt(file,'/','ALPHA'); nidbas=h5readatt(file,'/','NIDBAS'); relax=h5readatt(file,'/','RELAX'); omega=h5readatt(file,'/','OMEGA'); if alpha == 0 kx=h5readatt(file,'/','KX'); title_str=sprintf('N=%d, NIDBAS=%d, KX=%d, relax=%s, omega=%.3f', ... nx,nidbas,kx,relax, omega); else modem=h5readatt(file,'/','MODEM'); modep=h5readatt(file,'/','MODEP'); title_str=sprintf('N=%d, NIDBAS=%d, modem=%d, modep=%d, relax=%s, omega=%.3f', ... nx,nidbas,modem,modep,relax, omega); end % % Solutions at the finest grid % x=h5read(file,'/solutions/xg'); sol_direct=h5read(file,'/solutions/direct'); sol_anal=h5read(file,'/solutions/anal'); sol_calc=h5read(file,'/solutions/calc'); figure subplot(211) plot(x, sol_anal, x, sol_calc, 'o') legend('Analytic', 'Calculated') xlabel('X') grid on title(title_str); % % Relaxations % errdisc=h5read(file,'/relaxation/errdisc'); resid=h5read(file,'/relaxation/resid'); its=0:1:size(errdisc)-1; subplot(212) semilogy(its,errdisc,its,resid) legend('Discretisation Error', 'Residual Norm') xlabel('Iterations') grid on title(title_str) % % FE Matrix % dset = '/MATA/'; A = gb_mat(file, dset); D = diag(diag(A),0); n=rank(A); k=1:1:n; if relax(1:2) == 'ja' % % Compute eigenvalues of Rj = D^(-1)*A % [V, l] = eig(A,D); [lambda, iss] = sort(diag(l)); V = V(1:end,iss); % % Spectral radius of Jacobi iteration matrix % R(omega) = max |1-omega*lambda| % om=0:0.01:1; for i=1:size(om,2) rho(i) = max(abs(1-om(i).*lambda)); end fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(1-omega*lambda))) figure subplot(211) plot(k, 1-omega*lambda, 'o-') xlabel('mode k'); ylabel('Eigen value of inv(D)*A') grid on title(title_str) subplot(212) plot(om, rho) omega_c = 2.0/max(lambda); str = sprintf('Critical omega = %.3f', omega_c) title(str) xlabel('\omega'); ylabel('Spectral Radius') grid on elseif relax(1:2) == 'gs' % % Spectral radius of GS Iteration Matrix % Rg = -(D+L)^(-1) * U % B = tril(A,0); % D+L % $$$ [V, l] = eig(-triu(A,1),B); lambda=diag(l); % $$$ [V, l] = eig(B,A); lambda = 1 - 1./diag(l); [V, l] = eig(A,B); lambda = 1 - diag(l); [lambda, iss] = sort(lambda, 'descend'); V = V(1:end,iss); fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda))) figure subplot(211) plot(real(lambda), imag(lambda), 'o') xlabel('Real of eigenvalues'); ylabel(['Imag of ' ... 'eigenvalues']) axis equal title(title_str) grid on subplot(212) plot(k, abs(lambda), 'o-') xlabel('Mode'); ylabel('eigenvalues') grid on end % % Plot eigenvectors neig=size(lambda,1); for i=1:neig k=mod(i-1,4*5)+1; if k==1 figure title(title_str) end subplot(4,5,k) if relax(1:2) == 'ja' str = sprintf('Mode = %d, ||R|| = %.4f', i, 1-omega* ... lambda(i)); else str = sprintf('Mode = %d, ||R|| = %.4f', i, ... lambda(i)); end plot(V(:,i)); grid on title(str) end diff --git a/matlab/test_relax2d.m b/matlab/test_relax2d.m index 3e5f778..2370781 100644 --- a/matlab/test_relax2d.m +++ b/matlab/test_relax2d.m @@ -1,86 +1,86 @@ % % @file test_relax2d.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='test_relax2d.h5'; % nx=h5readatt(file,'/','NX'); ny=h5readatt(file,'/','NY'); kx=h5readatt(file,'/','KX'); ky=h5readatt(file,'/','KY'); nidbas1=h5readatt(file,'/','NIDBAS1'); nidbas2=h5readatt(file,'/','NIDBAS2'); levels=h5readatt(file,'/','LEVELS'); relax=h5readatt(file,'/','RELAX'); title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), KX=%d, KY=%d, relax=%s', ... nx,ny,nidbas1,nidbas2,kx,ky,relax); % % Prolongation matrices at the coarsest grid % mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/matpx'); matpx=csr_mat(file,dset); dset=strcat(mglevel,'/matpy'); matpy=csr_mat(file,dset); % % FE matrix at the finest grid % levels=1; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/mata'); [mata,diag]=csr_mat(file,dset); x=h5read(file,strcat(mglevel,'/x')); y=h5read(file,strcat(mglevel,'/y')); f=h5read(file,strcat(mglevel,'/f')); v=h5read(file,strcat(mglevel,'/v')); f1d=h5read(file,strcat(mglevel,'/f1d')); v1d=h5read(file,strcat(mglevel,'/v1d')); figure spy(mata) % % Solutions at te finest grid % sol_direct=h5read(file,'/solutions/direct'); sol_anal=h5read(file,'/solutions/anal'); figure surf(x,y,sol_direct') xlabel('X'); ylabel('Y'); % % Relaxations % errdisc=h5read(file,'/relaxation/errdisc'); resid=h5read(file,'/relaxation/resid'); its=0:1:size(errdisc)-1; figure semilogy(its,errdisc,its,resid) legend('Discretisation Error', 'Residual Norm') xlabel('Iterations') grid on title(title_str) diff --git a/matlab/test_relax2d_cyl.m b/matlab/test_relax2d_cyl.m index e01c146..78f4c4c 100644 --- a/matlab/test_relax2d_cyl.m +++ b/matlab/test_relax2d_cyl.m @@ -1,183 +1,183 @@ % % @file test_relax2d_cyl.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='test_relax2d_cyl.h5'; % nx=h5readatt(file,'/','NX'); ny=h5readatt(file,'/','NY'); modem=h5readatt(file,'/','MODEM'); modep=h5readatt(file,'/','MODEP'); nidbas1=h5readatt(file,'/','NIDBAS1'); nidbas2=h5readatt(file,'/','NIDBAS2'); levels=h5readatt(file,'/','LEVELS'); omega=h5readatt(file,'/','OMEGA'); relax=h5readatt(file,'/','RELAX'); if relax(1:2) == 'ja' title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d, relax=%s, omega=%.3f', ... nx,ny,nidbas1,nidbas2,modem,modep,relax, omega); else title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d, relax=%s,', ... nx,ny,nidbas1,nidbas2,modem,modep,relax); end % % Prolongation matrices at the coarsest grid % mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/matpx'); matpx=csr_mat(file,dset); dset=strcat(mglevel,'/matpy'); matpy=csr_mat(file,dset); % % Solutions at the finest grid % mglevel=sprintf('/mglevels/level.%.2d', 1); x=h5read(file,strcat(mglevel,'/x')); y=h5read(file,strcat(mglevel,'/y')); f=h5read(file,strcat(mglevel,'/f')); v=h5read(file,strcat(mglevel,'/v')); f1d=h5read(file,strcat(mglevel,'/f1d')); v1d=h5read(file,strcat(mglevel,'/v1d')); sol_direct=h5read(file,'/solutions/direct'); sol_relax=h5read(file,'/solutions/relax'); sol_anal=h5read(file,'/solutions/anal'); % $$$ figure % $$$ surf(x,y,sol_direct') % $$$ xlabel('r'); ylabel('\theta'); % $$$ title(title_str) % $$$ figure % $$$ subplot(211) % $$$ plot(x, sol_anal(:,ny/2),x, sol_direct(:,ny/2),'o') % $$$ xlabel('r'); ylabel('Direct solution') % $$$ grid on % $$$ title(title_str) % $$$ subplot(212) % $$$ if modem == 0 % $$$ plot(x, sol_anal(1,:),x, sol_direct(1,:),'o') % $$$ else % $$$ plot(x, sol_anal(nx/2,:),x, sol_direct(nx/2,:),'o') % $$$ end % $$$ xlabel('\theta'); ylabel('Direct solution') % $$$ grid on % $$$ title(title_str) % % Relaxations % errdisc=h5read(file,'/relaxation/errdisc'); resid=h5read(file,'/relaxation/resid'); its=0:1:size(errdisc)-1; figure semilogy(its,errdisc,its,resid) legend('Discretisation Error', 'Residual Norm') xlabel('Iterations') grid on title(title_str) % $$$ figure % $$$ subplot(211) % $$$ plot(x, sol_anal(:,ny/2),x, sol_relax(:,ny/2),'o') % $$$ xlabel('r'); ; ylabel('Relaxed solution') % $$$ grid on % $$$ title(title_str) % $$$ subplot(212) % $$$ if modem == 0 % $$$ plot(x, sol_anal(1,:),x, sol_relax(1,:),'o') % $$$ else % $$$ plot(x, sol_anal(nx/2,:),x, sol_relax(nx/2,:),'o') % $$$ end % $$$ xlabel('\theta'); ylabel('Relaxed solution') % $$$ grid on % $$$ title(title_str) figure subplot(211) plot(x, sol_anal(:,ny/2),x, sol_direct(:,ny/2),'o') xlabel('r'); ; ylabel('Direct solution') grid on title(title_str) subplot(212) plot(x, sol_anal(1,:),x, sol_direct(1,:),'o') xlabel('\theta'); ylabel('Direct solution at the axis') grid on title(title_str) % % FE matrix at the finest grid % levels=1; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/mata'); [mata,diag]=csr_mat(file,dset); n=size(diag,1); % $$$ figure % $$$ spy(mata) % $$$ title(title_str) if relax(1:2) == 'ja' % % Eigenvalues of inv(D)*A % matd=spdiags(diag,0,n,n); lambda = eigs(mata, matd, n); om=0:0.01:0.8; clear rho; for i=1:size(om,2) rho(i) = max(abs(1-om(i).*lambda)); end figure subplot(211) plot(lambda,'o') ylabel('Eigenvalue of inv(D)*A') grid on title(title_str) subplot(212) plot(om,rho) xlabel('\omega'); ylabel('\rho') grid on omega_c = 2.0/max(lambda); lambda_min = eigs(mata, matd, 1, 'SM'); fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', abs(1-omega*lambda_min)) str = sprintf('Critical omega = %.3f', omega_c) title(str) else % % Spectral radius of GS Iteration Matrix % Rg = -(D+L)^(-1) * U % matl= tril(mata,0); % D+L lambda = eigs(-triu(mata,1),matl); fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda))) figure plot(lambda, 'o') axis equal grid on xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues') title(title_str) end diff --git a/matlab/test_stencil.m b/matlab/test_stencil.m index db11cc9..174267a 100644 --- a/matlab/test_stencil.m +++ b/matlab/test_stencil.m @@ -1,62 +1,62 @@ % % @file test_stencil.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % if ~exist('file'), file='test_stencil.h5'; end x = h5read(file, '/xgrid'); y = h5read(file, '/ygrid'); b1 = h5read(file, '/barr1'); b2 = h5read(file, '/barr2'); b3 = h5read(file, '/barr3'); n1=size(x,1); n2=size(y,1); n=n1*n2 mat = stencil_mat(file, '/MAT'); % $$$ figure % $$$ spy(mat) fprintf('||B1|| = %e\n', norm(reshape(b1,n,1))); fprintf('||B2|| = %e\n', norm(reshape(b2,n,1))); fprintf('||B3|| = %e\n', norm(reshape(b3,n,1))); % $$$ figure % $$$ subplot(321) % $$$ surf(x,y,v'); title('Exact solution') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(322) % $$$ surf(x,y,f'); title('RHS') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(323) % $$$ surf(x,y,u'); title('Num. solution') % $$$ subplot(324) % $$$ surf(x,y,udirect'); title('Direct. solution') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(325) % $$$ surf(x,y,resids'); title('Residuals') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(326) % $$$ surf(x,y,errs'); title('Errors') % $$$ xlabel('X'); ylabel('Y') diff --git a/matlab/test_stencilg.m b/matlab/test_stencilg.m index fb7dccd..fae5111 100644 --- a/matlab/test_stencilg.m +++ b/matlab/test_stencilg.m @@ -1,63 +1,63 @@ % % @file test_stencilg.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % if ~exist('file'), file='test_stencilg.h5'; end x = h5read(file, '/xgrid'); y = h5read(file, '/ygrid'); a = h5read(file, '/arr'); b1 = h5read(file, '/barr1'); b2 = h5read(file, '/barr2'); b3 = h5read(file, '/barr3'); n1=size(x,1); n2=size(y,1); n=n1*n2 mat = stencil_mat(file, '/MAT'); % $$$ figure % $$$ spy(mat) fprintf('||B1|| = %e\n', norm(reshape(b1,n,1))); fprintf('||B2|| = %e\n', norm(reshape(b2,n,1))); fprintf('||B3|| = %e\n', norm(reshape(b3,n,1))); % $$$ figure % $$$ subplot(321) % $$$ surf(x,y,v'); title('Exact solution') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(322) % $$$ surf(x,y,f'); title('RHS') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(323) % $$$ surf(x,y,u'); title('Num. solution') % $$$ subplot(324) % $$$ surf(x,y,udirect'); title('Direct. solution') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(325) % $$$ surf(x,y,resids'); title('Residuals') % $$$ xlabel('X'); ylabel('Y') % $$$ subplot(326) % $$$ surf(x,y,errs'); title('Errors') % $$$ xlabel('X'); ylabel('Y') diff --git a/matlab/test_transf2d.m b/matlab/test_transf2d.m index 506c5ad..9936481 100644 --- a/matlab/test_transf2d.m +++ b/matlab/test_transf2d.m @@ -1,105 +1,105 @@ % % @file test_transf2d.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='test_transf2d.h5'; % nx(1)=h5readatt(file,'/','NX'); ny(1)=h5readatt(file,'/','NY'); kx=h5readatt(file,'/','KX'); ky=h5readatt(file,'/','KY'); nidbas1=h5readatt(file,'/','NIDBAS1'); nidbas2=h5readatt(file,'/','NIDBAS2'); nlevels=h5readatt(file,'/','LEVELS'); title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), KX=%d, KY=%d', ... nx(1),ny(1),nidbas1,nidbas2,kx,ky); % % Grid sizes on each levels % for l=2:nlevels nx(l) = nx(l-1)/2; ny(l) = ny(l-1)/2; end % % Prolongation matrices at the coarsest grid % levels=nlevels; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/matpx'); matpx=csr_mat(file,dset); dset=strcat(mglevel,'/matpy'); matpy=csr_mat(file,dset); % % FE matrix at the finest grid % levels=1; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/mata'); [mata,diag]=csr_mat(file,dset); x=h5read(file,strcat(mglevel,'/x')); y=h5read(file,strcat(mglevel,'/y')); f=h5read(file,strcat(mglevel,'/f')); v=h5read(file,strcat(mglevel,'/v')); f1d=h5read(file,strcat(mglevel,'/f1d')); v1d=h5read(file,strcat(mglevel,'/v1d')); figure spy(mata) % % Solutions at the finest grid % sol_direct=h5read(file,'/solutions/direct'); sol_anal=h5read(file,'/solutions/anal'); vfine=h5read(file,'/solutions/vfine'); figure subplot(211) surf(x,y,sol_direct') xlabel('X'); ylabel('Y'); title('Direct Solve on the finest grid') subplot(212) surf(x,y,vfine'-sol_direct') xlabel('X'); ylabel('Y'); title('Prolongation solution on the finest grid') % % Errors % errdisc = h5read(file,'/errors/errdisc'); resid = h5read(file,'/errors/resid'); restrict = h5read(file,'/errors/restrict'); prolong = h5read(file,'/errors/prolong'); errdisc_prolong = h5read(file,'/errors/disc_err_prolong'); figure subplot(211) loglog(nx, errdisc, 'o-', nx(1:end-1), errdisc_prolong, 'h-') grid on; legend('Direct Solution', 'Prolonged Solution') xlabel('N'); ylabel('Discretization Errors') title(title_str); subplot(212) loglog(nx(2:end), restrict, 'o-', nx(1:end-1), prolong, 'h-') grid on; legend('Restricted RHS', 'Prolonged Solution') xlabel('N'); ylabel('Discretization Errors') diff --git a/matlab/test_transf2d_cyl.m b/matlab/test_transf2d_cyl.m index 1466f02..63b4c00 100644 --- a/matlab/test_transf2d_cyl.m +++ b/matlab/test_transf2d_cyl.m @@ -1,117 +1,117 @@ % % @file test_transf2d_cyl.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='test_transf2d_cyl.h5'; % nx(1)=h5readatt(file,'/','NX'); ny(1)=h5readatt(file,'/','NY'); modem=h5readatt(file,'/','MODEM'); modep=h5readatt(file,'/','MODEP'); nidbas1=h5readatt(file,'/','NIDBAS1'); nidbas2=h5readatt(file,'/','NIDBAS2'); nlevels=h5readatt(file,'/','LEVELS'); title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d', ... nx(1),ny(1),nidbas1,nidbas2,modem,modep); % % Grid sizes on each levels % for l=2:nlevels nx(l) = nx(l-1)/2; ny(l) = ny(l-1)/2; end % % Prolongation matrices at the coarsest grid % levels=nlevels; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/matpx'); matpx=csr_mat(file,dset); dset=strcat(mglevel,'/matpy'); matpy=csr_mat(file,dset); % % FE matrix at the finest grid % levels=1; mglevel=sprintf('/mglevels/level.%.2d', levels); dset=strcat(mglevel,'/mata'); [mata,diag]=csr_mat(file,dset); x=h5read(file,strcat(mglevel,'/x')); y=h5read(file,strcat(mglevel,'/y')); f=h5read(file,strcat(mglevel,'/f')); v=h5read(file,strcat(mglevel,'/v')); f1d=h5read(file,strcat(mglevel,'/f1d')); v1d=h5read(file,strcat(mglevel,'/v1d')); % $$$ figure % $$$ spy(mata) % % Solutions at the finest grid % sol_direct=h5read(file,'/solutions/direct'); sol_anal=h5read(file,'/solutions/anal'); vfine=h5read(file,'/solutions/vfine'); figure subplot(211) surf(x,y,sol_direct') xlabel('X'); ylabel('Y'); title('Direct Solve on the finest grid') subplot(212) surf(x,y,vfine') xlabel('X'); ylabel('Y'); title('Prolongation solution on the finest grid') figure subplot(311) plot(x, sol_direct(:,ny(1)/2),x, vfine(:,ny(1)/2),'o') xlabel('r'); grid on legend('Direct Solution', 'Prolonged Solution') title(title_str) subplot(313) plot(y, sol_direct(1,:),y, vfine(1,:),'o') xlabel('\theta'); ylabel('On axis') grid on title(title_str) subplot(312) plot(y, sol_direct(nx(1)/2,:),y, vfine(nx(1)/2,:),'o') xlabel('\theta'); ylabel('Off axis') grid on title(title_str) % % Errors % errdisc = h5read(file,'/errors/errdisc'); resid = h5read(file,'/errors/resid'); restrict = h5read(file,'/errors/restrict'); prolong = h5read(file,'/errors/prolong'); errdisc_prolong = h5read(file,'/errors/disc_err_prolong'); figure loglog(nx, errdisc, 'o-', nx(2:end), errdisc_prolong, 'h-') grid on; legend('Direct Solution', 'Prolonged Solution') xlabel('N'); ylabel('Discretization Errors') title(title_str); diff --git a/matlab/tpardiso.m b/matlab/tpardiso.m index 5efb0a1..7609732 100644 --- a/matlab/tpardiso.m +++ b/matlab/tpardiso.m @@ -1,72 +1,72 @@ % % @file tpardiso.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % mat='/MAT1'; gbmat; clear S gb_mat; % Diagonal balancing of matrix dbal = 1./sqrt(diag(a)); a = diag(dbal)*a*diag(dbal); file='tpardiso.h5'; mat='/MAT'; n=hdf5read(file,strcat(mat,'/N')); nz=hdf5read(file,strcat(mat,'/NZ')); irow=hdf5read(file,strcat(mat,'/irow')); cols=hdf5read(file,strcat(mat,'/cols')); val=hdf5read(file,strcat(mat,'/val')); perm=hdf5read(file,strcat(mat,'/perm')); amat=zeros(n,n); % Check PARDISO mat for i=1:n for k=irow(i):irow(i+1)-1 j=cols(k); amat(i,j) = val(k); amat(j,i) = val(k); end end err = a-amat; errmx = max(max(abs(err))); fprintf(1,'Max. error = %e\n', errmx); figure spy(sparse(amat(perm,perm)),'r.'); LABEL=sprintf('n = %d, nz =%d', n, nz); title(LABEL) % $$$ pmat=zeros(n); % $$$ for i=1:n % $$$ pmat(i,perm(i))=1; % $$$ end % $$$ amod=pmat*amat*pmat'; % $$$ S=sparse(amod); % $$$ figure % $$$ spy(S,'r.'); % $$$ LABEL=sprintf('n = %d', n); % $$$ title(LABEL) diff --git a/matlab/two_grid.m b/matlab/two_grid.m index 3be72de..d6a6a0d 100644 --- a/matlab/two_grid.m +++ b/matlab/two_grid.m @@ -1,92 +1,92 @@ % % @file two_grid.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % file='two_grid.h5'; % nx=h5readatt(file,'/','NX'); nidbas=h5readatt(file,'/','NIDBAS'); levels=h5readatt(file,'/','LEVELS'); title_str=sprintf('NX = %d, NIDBAS = %d, levels = %d', nx, nidbas, levels); % % Read matrices at coarset grid % for lev=2:levels % % FE mat at fine grid mglevel=sprintf('/mglevels/level.%.2d', lev-1); dset=strcat(mglevel,'/mata'); ku=h5readatt(file,dset,'KU'); kl=ku; n=h5readatt(file,dset,'RANK'); gbmat=h5read(file,dset); mata_f=zeros(n,n); for i=1:n jmin = max(1,i-kl); jmax = min(n,i+ku); for j=jmin:jmax ib = kl+ku+i-j+1; mata_f(i,j)=gbmat(ib,j); end end dset=strcat(mglevel,'/f'); f_fine = h5read(file,dset); dset=strcat(mglevel,'/v'); v_fine = h5read(file,dset); % % FE mat at coarse grid mglevel=sprintf('/mglevels/level.%.2d', lev); dset=strcat(mglevel,'/mata'); ku=h5readatt(file,dset,'KU'); kl=ku; n=h5readatt(file,dset,'RANK'); gbmat=h5read(file,dset); mata_c=zeros(n,n); for i=1:n jmin = max(1,i-kl); jmax = min(n,i+ku); for j=jmin:jmax ib = kl+ku+i-j+1; mata_c(i,j)=gbmat(ib,j); end end dset=strcat(mglevel,'/f'); f_coarse = h5read(file,dset); dset=strcat(mglevel,'/v'); v_coarse = h5read(file,dset); % % Prolong mat dset=strcat(mglevel,'/matp'); matp=h5read(file,dset); % % Check fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ... - mata_c)) end % v_prolong = h5read(file,'/v_prolong'); figure plot(v_fine) hold on plot(v_prolong, 'r') diff --git a/matlab/zcsr_mat.m b/matlab/zcsr_mat.m index 0b1b491..9411e3c 100644 --- a/matlab/zcsr_mat.m +++ b/matlab/zcsr_mat.m @@ -1,44 +1,44 @@ % % @file zcsr_mat.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % function [mata, diag] = zcsr_mat(file, dset) n=hdf5read(file,dset, 'RANK'); nnz=hdf5read(file,dset, 'NNZ'); cols=hdf5read(file, strcat(dset,'/cols')); irow=hdf5read(file, strcat(dset,'/irow')); val=h5Complex_ll(file, strcat(dset,'/val')); idiag=hdf5read(file, strcat(dset,'/idiag')); for i=1:n s = irow(i); e = irow(i+1)-1; rows(s:e) = i; end cols=double(cols); rows=double(rows); mata = sparse(rows,cols,val); if nargout == 2 diag = val(idiag); end diff --git a/matlab/zmumps_mat.m b/matlab/zmumps_mat.m index 2db15b7..4558886 100644 --- a/matlab/zmumps_mat.m +++ b/matlab/zmumps_mat.m @@ -1,44 +1,44 @@ % % @file zmumps_mat.m % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % function [mata, diag] = zmumps_mat(file, dset) n=hdf5read(file,dset, 'RANK'); Nnz=hdf5read(file,dset, 'NNZ'); cols=hdf5read(file, strcat(dset,'/cols')); irow=hdf5read(file, strcat(dset,'/irow')); irn=hdf5read(file, strcat(dset,'/mumps_par/IRN')); val=h5Complex_ll(file, strcat(dset,'/val')); idiag=int32(find((irn-cols)==0)); for i=1:n s = irow(i); e = irow(i+1)-1; rows(s:e) = i; end cols=double(cols); rows=double(rows); mata = sparse(rows,cols,val); if nargout == 2 diag = val(idiag); end diff --git a/multigrid/CMakeLists.txt b/multigrid/CMakeLists.txt index 27779d3..b99a423 100644 --- a/multigrid/CMakeLists.txt +++ b/multigrid/CMakeLists.txt @@ -1,32 +1,32 @@ # # @file CMakeLists.txt # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # @author Trach-Minh Tran # add_subdirectory(src) if(BSPLINES_EXAMPLES) add_subdirectory(wk) endif() #add_subdirectory(halpern) diff --git a/multigrid/docs/Makefile b/multigrid/docs/Makefile index e144969..d18fa33 100644 --- a/multigrid/docs/Makefile +++ b/multigrid/docs/Makefile @@ -1,52 +1,52 @@ # # @file Makefile # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Trach-Minh Tran # all: multigrid.pdf multigrid_2d.pdf mg_gbs.pdf grid.eps: grid.tex tex grid.tex dvips -o grid.eps grid.dvi mg_gbs.dvi: grid.eps clean: rm -f grid.eps *.dvi *.log *.aux *~ *.toc *.flc *.bbl *.blg *.out *~ distclean: clean .SUFFIXES: .SUFFIXES: .tex .dvi .pdf %.dvi: %.tex latex $< @while ( grep "Rerun to get cross-references" \ ${<:tex=log} > /dev/null ); do \ latex $<; \ done latex $< %.pdf: %.dvi dvipdf $< diff --git a/multigrid/docs/grid.tex b/multigrid/docs/grid.tex index 13b90a0..40091d9 100644 --- a/multigrid/docs/grid.tex +++ b/multigrid/docs/grid.tex @@ -1,70 +1,70 @@ % % @file grid.tex % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % \hoffset=-2truecm \voffset=-2truecm \special{papersize=9cm,5cm} \hsize=9truecm \vsize=5truecm \parindent=0pt \nopagenumbers \input pstricks \pspicture(4,4) \psline[linestyle=solid](0,0)(8,0) \psline[linestyle=dotted](0,1)(8,1) \psline[linestyle=solid](0,2)(8,2) \psline[linestyle=dotted](0,3)(8,3) \psline[linestyle=solid](0,4)(8,4) \psline[linestyle=solid](0,0)(0,4) \psline[linestyle=dotted](1,0)(1,4) \psline[linestyle=solid](2,0)(2,4) \psline[linestyle=dotted](3,0)(3,4) \psline[linestyle=solid](4,0)(4,4) \psline[linestyle=dotted](5,0)(5,4) \psline[linestyle=solid](6,0)(6,4) \psline[linestyle=dotted](7,0)(7,4) \psline[linestyle=solid](8,0)(8,4) \psdots[dotstyle=square,dotscale=2](0,0)(2,0)(4,0)(6,0)(8,0) \psdots[dotstyle=square,dotscale=2](0,2)(2,2)(4,2)(6,2)(8,2) \psdots[dotstyle=square,dotscale=2](0,4)(2,4)(4,4)(6,4)(8,4) \psdots[dotstyle=*](0,0)(1,0)(2,0)(3,0)(4,0)(5,0)(6,0)(7,0)(8,0) \psdots[dotstyle=*](0,1)(1,1)(2,1)(3,1)(4,1)(5,1)(6,1)(7,1)(8,1) \psdots[dotstyle=*](0,2)(1,2)(2,2)(3,2)(4,2)(5,2)(6,2)(7,2)(8,2) \psdots[dotstyle=*](0,3)(1,3)(2,3)(3,3)(4,3)(5,3)(6,3)(7,3)(8,3) \psdots[dotstyle=*](0,4)(1,4)(2,4)(3,4)(4,4)(5,4)(6,4)(7,4)(8,4) \endpspicture \bye diff --git a/multigrid/docs/mg_gbs.tex b/multigrid/docs/mg_gbs.tex index 4c1eec0..4fff2b6 100644 --- a/multigrid/docs/mg_gbs.tex +++ b/multigrid/docs/mg_gbs.tex @@ -1,1623 +1,1623 @@ % % @file mg_gbs.tex % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % \documentclass[a4paper]{article} \usepackage{linuxdoc-sgml} \usepackage{graphicx} \usepackage{hyperref} %\usepackage{amsmath} \usepackage{amssymb} \usepackage{mathtools} \usepackage{placeins} \usepackage{multirow} \usepackage{latexsym} \usepackage{listings} \usepackage{xcolor} \usepackage{rotating} \def\RepFigures{FIGURES_mg_gbs} \title{\tt Multigrid Solver for GBS} \author{Trach-Minh Tran, Federico Halpern\\CRPP/EPFL} \date{v1.0, June 2015} \begin{document} \maketitle \tableofcontents \section{The PDE} The PDE considered is \begin{equation} \label{eq:pde} \left[\frac{\partial^2}{\partial x^2} + \tau\frac{\partial^2}{\partial x\partial y} + \frac{\partial^2}{\partial y^2} - a(x,y)\right] u(x,y) = f(x,y), \qquad 0\le x \le L_x, \; 0\le y \le L_y. \end{equation} On the four boundaries, homogeneous Dirichlet boundary condition $u=0$ as well as Neumann boundary condition $\partial u/\partial n=0$ can be applied. \section{Discretization} The grid points $(x_i,y_j)$ are defined by \begin{equation} \begin{split} x_i &= ih_x = i\frac{L_x}{N_x}, \quad i=0,\ldots, N_x \\ y_j &= jh_y = j\frac{L_y}{N_y}, \quad j=0,\ldots, N_y \\ \end{split} \end{equation} Second order Finite Difference discretization of Eq.\ref{eq:pde} leads to the following 9-point stencil \begin{equation} \label{eq:stencil} S_{ij} = \frac{1}{h_x^2} \begin{bmatrix} -\tau\alpha/4 & \alpha^2 & \tau\alpha/4 \\ 1 & -2(1+\alpha^2)-h_x^2a_{ij} & 1 \\ \tau\alpha/4 & \alpha^2 &-\tau\alpha/4 \\ \end{bmatrix} , \qquad \mbox{where $\alpha=h_x/h_y$}. \end{equation} Note that the mesh aspect ratio $\alpha$ results in the same stencil for the \emph{anisotropic} Poisson equation with $h_x=h_y$: \begin{equation} \frac{\partial^2 u}{\partial x^2} + \alpha^2 \frac{\partial^2 u}{\partial y^2} = f. \end{equation} It is shown in \cite[p.~119]{Briggs} that this anisotropy can degrade the performance of multigrid using standard relaxations such as Gauss-Seidel or damped Jacobi can be strongly degraded. With the \emph{lexicographic} numbering \begin{equation} I = j(N_x+1) + i+1, \end{equation} for the $(N_x+1)(N_y+1)$ nodes, the discretized problem can be expressed as a matrix problem \begin{equation} \label{eq:matrix} \mathbf{Au} = \mathbf{f}, \end{equation} where $\mathbf{A}$ is a 9-diagonal matrix, assembled using the stencil defined above. Homogeneous Dirichlet boundary condition can be imposed, for example, on the face $j=0$ simply by \emph{clearing} the matrix rows and columns $1,2,\ldots, N_x+1$, and setting the diagonal terms to 1. Neumann boundary condition $\partial u/\partial x=0$ at the face $i=0$, can be simply implemented by imposing $u_{-1j}=u_{1j}$. The stencil for the boundary nodes $(0,j)$ can thus be modified as \begin{equation} S_{0j} = \frac{1}{h_x^2} \begin{bmatrix} 0 &\alpha^2 & 0 \\ 0 &-2(1+\alpha^2)-h_x^2a_{0j} & 2 \\ 0 &\alpha^2 & 0 \\ \end{bmatrix} . \end{equation} Two model problems are considered in this report: \begin{description} \item[\texttt{\textbf{DDDD}} problem:] Homogeneous Dirichlet BC at all the 4 boundaries. The \emph{analytic solution} is \begin{equation} u(x,y) = \sin\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y}, \qquad \mbox{where $k_x$, $k_y$ are positive integers}. \end{equation} \item[\texttt{\textbf{NNDD}} problem:] Neumann boundary conditions $\partial u/\partial x=0$ at $x=0$ and $x=L_x$, homogeneous Dirichlet BC at $y=0$ and $y=L_y$. The \emph{analytic solution} is \begin{equation} u(x,y) = \cos\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y}, \qquad \mbox{where $k_x$, $k_y$ are positive integers}. \end{equation} \end{description} In both problems, $a$ depends only on $x$: \begin{equation} \label{eq:density} a(x,y)= \exp\left[-\frac{(x-L_x/3)^2}{(L_x/2)^2}\right]. \end{equation} The sparse direct solver MUMPS \cite{MUMPS} is used to solve (\ref{eq:matrix}) in order to check the convergence of the schema described above. Fig.\ref{fig:convergence} shows the expected \emph{quadratic} convergence of the error with respect to $h_x$ with fixed $\alpha=h_x/h_y=0.5$ for both problems, when the grid size is varied from $16\times 64$ to $512\times 2048$. \begin{figure}[hbt] \centering \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/convergence} \caption{Convergence of the error $\| u_{calc}- u_{anal}\|_\infty$ wrt the number of intervals in the $x$ direction $N_x$ for $L_x=100$, $L_y=800$, $k_x=k_y=4$, $\tau=1$ and $N_y=4N_x$.} \label{fig:convergence} \end{figure} \section{Multigrid $V$-cycle} \label{sec-mgProc} Given an approximate $\mathbf{u}^h$ and right hand side $\mathbf{f}^h$ defined at some grid level represented by the grid spacing $h$, the following MG $V$-cycle procedure \begin{equation*} \boxed{\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{f}^h)} \end{equation*} will compute a \emph{new} $\mathbf{u}^h$. It is defined recursively by the following steps: \begin{enumerate} \item If $h$ is the coarsest mesh size, \begin{itemize} \item Direct solve $\mathbf{A}^h\mathbf{u}^h=\mathbf{f}^h$ \item Goto 3. \end{itemize} \item Else \begin{itemize} \item Relax $\mathbf{u}^h$ $\nu_1$ times. \item $\mathbf{f}^{2h} \leftarrow {\mathbf{R}}(\mathbf{f}^h-\mathbf{A}^h\mathbf{u}^h)$. \item $\mathbf{u}^{2h} \leftarrow MG^{2h}(\mathbf{u}^{2h},\mathbf{f}^{2h})$ $\mu$ times. \item $\mathbf{u}^h\leftarrow \mathbf{u}^h+{\mathbf{P}}\mathbf{u}^{2h}$. \item Relax $\mathbf{u}^h$ $\nu_2$ times. \end{itemize} \item Return \end{enumerate} In the procedure above, the operators $\mathbf{R}$ and $\mathbf{P}$ denote respectively the \emph{restriction} (from \emph{fine} to \emph{coarse} grid) and the \emph{prolongation} (from \emph{coarse} to \emph{fine} grid). Notice that in this multigrid procedure, $\mathbf{R}$ applies only to the \emph{right hand side} while $\mathbf{P}$ applies only to the \emph{solution}. The standard $V(\nu_1,\nu_2)$ cycle is obtained by calling this $MG^h$ procedure with $\mathbf{f}^h$ defined at the \emph{finest} grid level, a guess $\mathbf{u}^h=0$ and $\mu=1$, while $\mu=2$ results in the $W(\nu_1,\nu_2)$ cycle. Details on the grid coarsening, the inter-grid transfers and methods of relaxation are given in the following. \subsection{Grid coarsening} Let start with the one-dimensional \emph{fine} grid defined by $x_i,\, i=0,\ldots,N$, assuming that $N$ is even. The next coarse grid (with $N/2$ intervals) is obtained by simply discarding the grid points with \emph{odd} indices. In order to get a \emph{smallest coarsest} grid (so that it is possible to solve \emph{cheaply} the problem with a \emph{direct} method), $N$ should be $N=N_c2^{L-1}$ where $L$ the total number of grid levels and $N_c$ is either 2 or a \emph{small odd} integer. As an example, the fine grid with $N=768$ can have up to 9 grid levels, and a coarsest grid with 3 intervals, see Table~\ref{tab:level}. \begin{table}[hbt] \centering \begin{tabular}{|l||r|r|r|r|r|}\hline $L$ & \multicolumn{5}{c|}{$N$} \\ \hline 1 & 2 & 3 & 5 & 7 & 9\\ 2 & 4 & 6 & 10 & 14 & 18\\ 3 & 8 & 12 & 20 & 28 & 36\\ 4 & 16 & 24 & 40 & 56 & 72\\ 5 & 32 & 48 & 80 & 112 & 144\\ 6 & 64 & 96 & 160 & 224 & 288\\ 7 & 128 & 192 & 320 & 448 & 576\\ 8 & 256 & 384 & 640 & 896 & 1152\\ 9 & 512 & 768 & 1280 & 1792 & 2304\\ 10 & 1024 & 1536 & 2560 & 3584 & 4608\\ \hline \end{tabular} \caption{Set of values of the \emph{fine} grid number of intervals $N$ to obtain a \emph{coarsest} grid size at most equal to $9$ with at most $10$ grid levels.} \label{tab:level} \end{table} For a two-dimensional grid, the same procedure is applied to both dimensions. The result of such procedure is illustrated in Fig.~\ref{fig:2d_coarsening}, for a $8\times4$ fine grid. \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.6\hsize]{grid} \caption{A \emph{coarse} $4\times 2$ grid ($\Box$) obtained from a $8\times4$ fine grid ($\bullet$).} \label{fig:2d_coarsening} \end{figure} \subsection{Inter-grid transfers} The one-dimensional \emph{prolongation} operator for the second-order FD discretization is chosen the same as the one obtained with the \emph{linear Finite Elements} \cite{MG1D}. For a $N=8$ grid, it can be represented as a $9\times 5$ matrix given by \begin{equation} \label{eq:1dprolongation} \mathbf{P} = \left( \begin{matrix} 1 & 0 & 0 & 0 & 0 \\ 1/2 & 1/2 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 & 0 \\ 0 & 1/2 & 1/2 & 0 & 0 \\ 0 & 0 & 1 & 0 & 0 \\ 0 & 0 & 1/2 & 1/2 & 0 \\ 0 & 0 & 0 & 1 & 0 \\ 0 & 0 & 0 & 1/2 & 1/2\\ 0 & 0 & 0 & 0 & 1 \\ \end{matrix}\right) \end{equation} The \emph{restriction} matrix $\mathbf{R}$ is simply related to $\mathbf{P}$ by \begin{equation} \label{eq:1drestriction} \mathbf{R} = \frac{1}{2}\mathbf{P}^{T}=\frac{1}{2}\left( \begin{matrix} 1&1/2&0&0&0&0&0&0&0\\ 0&1/2&1&1/2&0&0&0&0&0\\ 0&0&0&1/2&1&1/2&0&0&0\\ 0&0&0&0&0&1/2&1&1/2&0\\ 0&0&0&0&0&0&0&1/2&1\\ \end{matrix} \right). \end{equation} For Dirichlet BC imposed on the \emph{left} boundary one has to set ${P}_{21}=R_{12}=0$, while for Dirichlet BC imposed on the \emph{right} boundary, ${P}_{N,N/2+1}=R_{N/2+1,N}=0$. Notice that these inter-grid operators are identical to the standard \emph{linear interpolation} and \emph{full weighting} operators. For a two-dimensional problem, using the property that the grid is a \emph{tensor product} of two one-dimensional grids, the restriction of the right hand side $f^{h}_{ij}$ and the prolongation of the solution $u^{2h}_{ij}$ can be computed as \begin{equation} \label{eq:2dintergrid} \begin{split} \mathbf{f}^{2h} &= \mathbf{R}^x \cdot \mathbf{f}^{h}\cdot (\mathbf{R}^y)^T \\ \mathbf{u}^{h} &= \mathbf{\mathbf{P}}^x \cdot \mathbf{u}^{2h}\cdot (\mathbf{\mathbf{P}}^y)^T \\ \end{split} \end{equation} \subsection{Relaxations} Gauss-Seidel and damped Jacobi iterations are used as relaxation methods in the multigrid $V$ cycle. In general, Gauss-Seidel method is more efficient but much more difficult to \emph{parallelize} than the Jacobi method. It should be noted that if $a(x,y)$ in Eq.~\ref{eq:pde} is non-positive, both relaxations diverge! This can be seen by considering the following one-dimensional FD equation with uniform $a$: \begin{equation} u_{j-1} -(2+ah^2)u_j + u_{j+1} = h^2f_j. \end{equation} Using the damped Jacobi relaxation, the error $\epsilon^{(m)}_j\equiv u_{anal}(x_j)-u_j^{(m)}$ at iteration $m+1$ is given by \begin{equation} \epsilon^{(m+1)}_j = \frac{\omega}{2+h^2a}(\epsilon^{(m)}_{j-1}+\epsilon^{(m)}_{j+1}) +(1-\omega)\epsilon^{(m)}_j . \end{equation} Performing a \emph{local mode analysis} (or Fourier analysis) (see \cite[p.~48]{Briggs}), assuming that $\epsilon^{(m)}_j=A(m)e^{ij\theta}$, where $\theta$ is related to the mode number $k$ by $\theta=2\pi k/N$, the \emph{amplification factor} $G(\theta)$ is obtained as \begin{equation} \begin{split} G(\theta) &= \frac{A(m+1)}{A(m)} = \frac{2\omega}{2+h^2a}\cos\theta + (1-\omega) \\ &= G_0(\theta) -\frac{ \omega h^2a}{2+h^2a}\cos\theta \simeq G_0(\theta) -\frac{ \omega h^2a}{2}\cos\theta, \\ G_0(\theta) &=1-2\omega\sin^2\frac{\theta}{2}, \\ \end{split} \end{equation} where $G_0(\theta)$ is the amplification factor for $a=0$. Note that $|G_0(\theta)|< 1$ for \emph{all} $\theta$ and $0<\omega< 1$ but $\displaystyle{\max_{|\theta|<\pi}|G(\theta)|>1}$ if $a<0$. In Gauss-Seidel relaxation method, the errors evolve as: \begin{equation} \epsilon^{(m+1)}_j = \frac{\epsilon^{(m+1)}_{j-1}+\epsilon^{(m)}_{j+1}}{2+h^2a}. \end{equation} Applying again the same Fourier analysis yields the following complex amplification factor: \begin{equation} \begin{split} G(\theta) &\simeq G_0(\theta)\left(1-\frac{h^2a}{2-e^{-i\theta}}\right) \\ G_0(\theta) &=\frac{e^{i\theta}}{2-e^{-i\theta}}, \quad |G_0(\theta)| < 1 \end{split} \end{equation} which show that the Gauss-Seidel relaxations \emph{diverge} if $a<0$. Notice that when $a>0$, the effect of $a$ on the amplification is negligible and is thus ignored in the following two-dimensional analysis. Applying the damped Jacobi scheme on the stencil~(\ref{eq:stencil}), the error at the iteration $m+1$ is given by: \begin{equation} \begin{split} \epsilon^{(m+1)}_{ij} = & \frac{\omega}{2(1+\alpha^2)} \left[ \epsilon^{(m)}_{i-1,j} + \epsilon^{(m)}_{i+1,j} + \alpha^2( \epsilon^{(m)}_{i,j-1} + \epsilon^{(m)}_{i,j+1}) + \frac{\tau\alpha}{4}( \epsilon^{(m)}_{i+1,j+1}+\epsilon^{(m)}_{i-1,j-1} - \epsilon^{(m)}_{i-1,j+1}-\epsilon^{(m)}_{i+1,j-1}) \right] \\ & + (1-\omega)\epsilon^{(m)}_{ij}. \\ \end{split} \end{equation} Using the two-dimensional Fourier mode expression \begin{equation} \epsilon^{(m)}_{ij} = A(m)e^{i(\theta_1+\theta_2)}, \quad -\pi <\theta_1, \theta_2 \le \pi, \end{equation} the amplification factor $G=A(m+1)/A(m)$ is given by \begin{equation} \label{eq:amp_jac} G(\theta_1,\theta_2;\omega,\alpha,\tau)=1 - \frac{2\omega}{1+\alpha^2} \left( \sin^2\frac{\theta_1}{2} + \alpha^2\sin^2\frac{\theta_2}{2} + \frac{\tau\alpha}{4}\sin\theta_1\,\sin\theta_2 \right). \end{equation} The errors in Gauss-Seidel method, assuming a \emph{lexicographic} ordering for the unknowns (increasing first $i$ then $j$), are updated according to \begin{equation} \epsilon^{(m+1)}_{ij} = \frac{1}{2(1+\alpha^2)} \left[ \epsilon^{(m+1)}_{i-1,j} + \epsilon^{(m)}_{i+1,j} + \alpha^2( \epsilon^{(m+1)}_{i,j-1} + \epsilon^{(m)}_{i,j+1}) + \frac{\tau\alpha}{4}( \epsilon^{(m)}_{i+1,j+1}+\epsilon^{(m+1)}_{i-1,j-1} - \epsilon^{(m)}_{i-1,j+1}-\epsilon^{(m+1)}_{i+1,j-1}) \right]. \end{equation} The Fourier mode analysis then leads to the following complex amplification factor \begin{equation} \label{eq:amp_gs} G(\theta_1,\theta_2;\alpha,\tau) = \frac{e^{i\theta_1} + \left(\alpha^2+i\dfrac{\tau\alpha}{2}\sin\theta_1\right)e^{i\theta_2}} {2(1+\alpha^2) - \left[e^{-i\theta_1}+\left(\alpha^2 - i\dfrac{\tau\alpha}{2}\sin\theta_1\right)e^{-i\theta_2}\right]}. \end{equation} Curves of $G$ for \emph{fixed} $\theta_2$ are plotted in Fig.~\ref{fig:fourier_jac} showing \emph{convergence} ($\max|G|<1$) for $\tau=-1,0,1,2$, using the damped Jacobi method. The same conclusions are obtained for Gauss-Seidel relaxations as shown in Fig.~\ref{fig:fourier_gs} where the absolute values of the complex amplification factor $G$ are plotted. However, for larger $|\tau|>2$, both methods diverge as can be seen in Fig.~\ref{fig:relax_diverge}.Notice however that the PDE (\ref{eq:pde}) is \emph{elliptic} only when $|\tau|<2$ is satisfied! \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.85\hsize]{\RepFigures/fourier_jac} \caption{Amplification factor for damped Jacobi relaxations with $\omega=0.8$ and $\alpha=h_x/h_y=1$ and $\tau=-1,0,1,2$ displayed as curves of constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on the \emph{red} curve.} \label{fig:fourier_jac} \end{figure} \begin{figure}[hbt] \centering \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/fourier_gs} \caption{Absolute value of the amplification factor for Gauss-Seidel relaxations with $\alpha=h_x/h_y=1$ and $\tau=-1,0,1,2$, displayed as curves of constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on the \emph{red} curve.} \label{fig:fourier_gs} \end{figure} \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/relax_diverge} \caption{Amplification factor for Jacobi (left) and Gauss-Seidel (right) relaxations for $|\tau|=3,5$, $\alpha=h_x/h_y=1$, displayed as curves of constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on the \emph{red} curve.} \label{fig:relax_diverge} \end{figure} In summary, the local mode analysis predicts that \begin{itemize} \item Negative values of the coefficient $a$ and large mixed derivative ($|\tau| > 2$) can make both damped Jacobi and Gauss-Seidel relaxations diverge. \item Positive values of $a$ can decrease the amplification factor (improving thus the convergence rate) but its contributions $h^2a$ decrease for increasing grid resolution. \end{itemize} These predictions will be checked against numerical experiments in the next section. \FloatBarrier \section{Numerical Experiments} \label{sec:NumExp1} In the following numerical experiments, we look at the convergence rate of the residual norm and the error norm which are defined at the iteration $m$ by \begin{equation} \begin{split} r^{(m)} &= \|\mathbf{f}-\mathbf{A}\mathbf{u}^{(m)}\|_\infty, \\ e^{(m)} &= \|\mathbf{u}^{(m)}-\mathbf{u}_{anal}\|_\infty.\\ \end{split} \end{equation} The iterations are stopped when the number of iterations reach an user supplied \emph{maximum} of iterations or when the residual norm is smaller than either a given \emph{relative} tolerance {\tt rtol} \cite[p.~51]{TEMPL} or \emph{absolute} tolerance {\tt atol}: \begin{equation} \begin{split} r^{(m)} &< \mbox{\tt rtol} \cdot(\|\mathbf{A}\|_\infty\cdot\|\mathbf{u}^{(m)}\|_\infty + \|\mathbf{f}\|_\infty), \\ r^{(m)} &< \mbox{\tt atol}.\\ \end{split} \end{equation} An additional stopping criteria consists of stopping the iterations when the change of the discretization error between successive iteration is small enough: \begin{equation} \frac{e^{(m)}-e^{(m-1)}}{e^{(m-1)}}< \mbox{\tt etol}. \end{equation} \subsection{$V$-cycle performances} Table~\ref{tab:iternum} shows the numbers of $V$-cycles required to reach the \emph{relative tolerance} $\mbox{\tt rtol}=10^{-8}$. In these runs where $\alpha=0.5$, $\tau=1$ and $a(x,y)$ given by Eq.~\ref{eq:density}, we observe that the biggest improvement is obtained at $\nu_1=\nu_2=2$. For larger $\nu_1,\nu_2$, the number of required iterations is relatively insensitive to the grid sizes. As can be seen in Fig.~\ref{fig:mg_iterations} which plots the evolution of the error $e^{(m)}$, it is clear that the level of discretization error has been largely reached. Finally the times used by these runs are shown in Fig.~\ref{fig:dddd_pc220} and Fig.~\ref{fig:nndd_pc220} where the times spent in the direct solver using MUMPS \cite{MUMPS} are included for comparison. For the $512\times 2048$ grid (the largest case using the direct solver), the multigrid $V(3,3)$ is about $30$ times faster! \begin{table}[htb] \centering \begin{tabular}{|l||c|c|c|c||c|c|c|c|}\hline & \multicolumn{4}{c||}{\texttt{\textbf{DDDD}} problem} & \multicolumn{4}{c|}{\texttt{\textbf{NNDD}} problem} \\ \cline{2-9} Grid size & $V(1,1)$ & $V(2,2)$ & $V(3,3)$ & $V(4,4)$ & $V(1,1)$ & $V(2,2)$ & $V(3,3)$ & $V(4,4)$ \\ \hline $16\times 64$ & 3 & 2 & 2 & 1 & 4 & 2 & 2 & 1\\ $32\times 128$ & 5 & 3 & 2 & 2 & 5 & 3 & 2 & 2\\ $64\times 256$ & 7 & 4 & 3 & 3 & 7 & 4 & 3 & 3\\ $128\times 512$ & 10 & 6 & 4 & 4 & 10 & 6 & 5 & 4\\ $256\times 1024$ & 11 & 6 & 5 & 4 & 11 & 6 & 5 & 4\\ $512\times 2048$ & 11 & 6 & 5 & 4 & 11 & 6 & 5 & 4\\ $1024\times 4096$ & 10 & 6 & 4 & 4 & 10 & 6 & 4 & 4\\ $1536\times 6144$ & 9 & 6 & 4 & 4 & 9 & 5 & 4 & 3\\ \hline \end{tabular} \caption{Multigrid $V$-cycle results for the {\tt DDDD} and {\tt NNDD} model problems with $k_x=k_y=4$, $L_x=100$, $L_y=800$, $\tau=1$ and $a(x,y)$ given by Eq.~\ref{eq:density}. Shown are the numbers of multigrid $V$-cycles required to reduce the \emph{relative} residual norm to less than $10^{-8}$ for different grid sizes and numbers of pre and post relaxation sweeps. Gauss-Seidel relaxation is used. The coarsest grid size of the $1536\times 6144$ case is $3\times 12$ while all the others have a coarsest grid of size $2\times 8$.} \label{tab:iternum} \end{table} \begin{figure}[htb!] \centering \includegraphics[width=\textwidth]{\RepFigures/mg_iterations} \caption{Performance of the $V(2,2)$-cycle using the Gauss-Seidel relaxation scheme for the {\tt DDDD} (upper curve) and {\tt NNDD} (lower curve) problem. The relative tolerance {\tt rtol} is set to $10^{-8}$ the coarsest grid size for all the problem size is fixed to $2\times 8$.} \label{fig:mg_iterations} \end{figure} The fittings of the obtained data show that the multigrid $V$ cycle cost scales almost \emph{linearly} with the number of unknowns $N=(N_x+1)(N_y+1)$ (as does the backsolve stage of MUMPS) while the \emph{total} direct solve time scales as $N^{1.4}$. \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/dddd_pc220} \caption{Times used by the multigrid $V$ cycles for the runs reported in Table~\ref{tab:iternum} for the \texttt{\textbf{DDDD}} problem. The last 6 $V(3,3)$ data points are used for the multigrid fit. The MUMPS direct solver's cost is included for comparison. All the timing results are obtained on an Intel Nehalem i7 processor, using the Intel compiler version 13.0.1 and MUMPS-4.10.0. } \label{fig:dddd_pc220} \end{figure} \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/nndd_pc220} \caption{As in Fig.~\ref{fig:dddd_pc220} for the \texttt{\textbf{NNDD}} problem.} \label{fig:nndd_pc220} \end{figure} \FloatBarrier \subsection{Effects of the mesh aspect ratio $\alpha$} From Table~\ref{tab:anisotropy}, one can observe that the required number of $V(2,2)$ cycles increase quickly when $\alpha<0.5$ and $\alpha>2$. Advanced \emph{relaxation} methods and \emph{coarsening} strategies \cite[chap. 7]{Wesseling} can solve this performance degradation but are generally more difficult to parallelize. \begin{table}[htb] \centering \begin{tabular}{|l||c|c|c|}\hline $\alpha$ & \texttt{\textbf{DDDD}} & \texttt{\textbf{NNDD}} \\ \hline 0.125 & 19 & 22 \\ 0.25 & 12 & 12 \\ 0.5 & 6 & 6 \\ 1.0 & 5 & 5 \\ 2.0 & 7 & 7 \\ 4.0 & 20 & 19 \\ \hline \end{tabular} \caption{Effects of the \emph{mesh aspect ratio} $\alpha=h_x/h_y$ on the number of $V(2,2)$ cycles required to reach $\mbox{\tt rtol}=10^{-8}$ for {\tt DDDD} and {\tt NNDD} model problems. The listed $\alpha$'s are obtained by fixing $N_x=256$, $N_y=1024$, $L_x=100$ and varying $L_y$.} \label{tab:anisotropy} \end{table} \subsection{Effects of the mixed partial derivative} When $|\tau|>2$, Table~\ref{tab:mixedterm} shows that the multigrid $V$-cycle diverge, as predicted from the local mode analysis based on the amplification factor given in Eq.~\ref{eq:amp_gs}. Although, a non-negative coefficient $a$ has a \emph{stabilizing} effect, the latter disappears already for a $256\times 1024$ grid. \begin{table}[htb] \centering \begin{tabular}{|l|l|c|c|c|c|c|c|c|c|}\hline &Grid size & $\tau=-3$ & $\tau=-2$ & $\tau=-1$ & $\tau=0$ & $\tau=1$ & $\tau=2$ & $\tau=3$ \\ \hline \multirow{4}{*}{\texttt{\textbf{DDDD}}} &$128\times 512(a=0)$ & - & 39 & 7 & 5 & 7 & 38 & - \\ & $128\times 512$ &16 & 6 & 5 & 4 & 4 & 6 & 17 \\ & $256\times 1024$ &- & 8 & 5 & 4 & 5 & 7 & - \\ & $512\times 2048$ &- & 9 & 5 & 4 & 5 & 9 & - \\ \hline\hline \multirow{4}{*}{\texttt{\textbf{NNDD}}} &$128\times 512(a=0)$ & - & 42 & 7 & 5 & 7 & 41 & - \\ & $128\times 512$ &13 & 6 & 5 & 4 & 5 & 5 & 13 \\ & $256\times 1024$ &- & 7 & 5 & 4 & 5 & 7 & - \\ & $512\times 2048$ &- & 7 & 5 & 4 & 5 & 7 & - \\ \hline \end{tabular} \caption{Effects of the mixed derivative term $\tau$ on the performances of the $V(3,3)$ cycle. The dashes indicate that the $V$-cycle diverges. In theses runs, $a(x,y)$ is given by Eq.~\ref{eq:density} except for the cases where it is set to 0. Notice the \emph{stabilizing} effect of $a\ne 0$ for the $128\times 512$ grid at $\tau = \pm 3$.} \label{tab:mixedterm} \end{table} \subsection{Using the damped Jacobi relaxation} The optimum Jacobi damping factor $\omega$ can be determined by minimizing the \emph{smoothing factor} defined as the maximum amplification coefficient (\ref{eq:amp_jac}) restricted to the \emph{oscillatory modes}: \begin{equation} \label{eq:mu_jac} \mu(\omega,\alpha,\tau) = \max_{(\theta_1,\theta_2)\in\Omega} |G(\theta_1,\theta_2,\omega,\alpha,\tau)|, \qquad \Omega = [|\theta_1|>\pi/2]\,\bigcup\,[|\theta_2|>\pi/2]. \end{equation} Results from numerical computation of (\ref{eq:mu_jac} are shown in Fig.~\ref{fig:jac_opt}. An analytic expression for $\tau=0$ assuming $\alpha\le 1$ is derived in \cite[p.~119]{salpha}: \begin{gather} \mu(\omega,\alpha,\tau=0) = \max\left(\left|1-2\omega\right|,\;\left|1-\frac{\alpha^2}{1+\alpha^2}\omega\right|\right), \nonumber\\ \mu_{\mbox{opt}} = \frac{2+\alpha^2}{2+3\alpha^2} \quad \mbox{at} \quad \omega_{\mbox{opt}} = \frac{2+2\alpha^2}{2+3\alpha^2}. \end{gather} Notice that the smoothing factor increases as $\alpha$ departs from 1 and for increasing $\tau$. For Gauss-Seidel relaxation, the same numerical procedure applied to (\ref{eq:amp_gs}) yields a smoothing factor $\mu$ equal to respectively $0.5$, $ 0.68$ and $0.70$ for the three cases shown in Fig.~\ref{fig:jac_opt}, which result in a better smoothing property than the damped Jacobi relaxation. \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/jac_opt} \caption{The smoothing factor for damped Jacobi relaxation for different values of $\alpha$ and $\tau$.} \label{fig:jac_opt} \end{figure} Numerical experiments with the reference case ($\alpha=0.5$, $\tau=1$, $a(x,y)$ given by Eq.~\ref{eq:density}) and the $128\times 512$ grid using damped Jacobi relaxation, are shown in Table~\ref{tab:jac_opt} and confirm that $\omega=0.9$ is the optimum damping factor and that it is less efficient than the Gauss-Seidel relaxation, in agreement with the Fourier analysis. \begin{table}[htb] \centering \begin{tabular}{l c c c c c c}\hline &$\omega=0.5$ &$\omega=0.6$ &$\omega=0.7$ &$\omega=0.8$ &$\omega=0.9$ &$\omega=1.0$ \\ \cline{2-7} \texttt{\textbf{DDDD}}& 12 & 10 & 9 & 8 & 7 & 15 \\ \texttt{\textbf{NNDD}}& 12 & 11 & 9 & 8 & 7 & 18 \\ \hline \end{tabular} \caption{The number of $V(3,3)$ cycles required to obtain $\mbox{\texttt{rtol}}=10^{-8}$ versus the Jacobi \emph{damped factor} $\omega$. The grid size is $128\times 512$ with $\alpha=0.5$, $\tau=1$ and $a(x,y)$ given by Eq.~\ref{eq:density}.} \label{tab:jac_opt} \end{table} \subsection{Matrix storage} Initially the \emph{Compressed Sparse Row} storage format (CSR or CRS) (see \cite[p.~58--59]{TEMPL}) was used to store the discretized finite difference matrix. With this choice, the CPU time used by the matrix construction (and boundary condition setting) is found to be always larger than the multigrid solver time as shown in Fig.~\ref{fig:matcon_time}. Fortunately, switching to the \emph{Compressed Diagonal Storage} (CDS), where the 9 diagonal structure of the matrix is fully exploited, the matrix construction time is considerably reduced as shown in the same figure. On the other hand, no difference in the multigrid solver performance is noticeable between the two matrix storage. \begin{figure}[htb] \centering \includegraphics[angle=0,width=\hsize]{\RepFigures/matcon_time} \caption{CPU time used by the matrix construction for CSR and CDS matrix storage compared to the multigrid $V(3,3)$ cycle time for the \textbf{DDDD} and \textbf{NNDD} model problems. The timing is obtained using the same conditions as in Fig.~\ref{fig:dddd_pc220}.} \label{fig:matcon_time} \end{figure} \FloatBarrier \section{Modified PDE} Here, the following modified PDE is considered: \begin{equation} \label{eq:new_pde} \left[\frac{\partial^2}{\partial x^2} + \tau\frac{\partial^2}{\partial x\partial y} + (1+\tau^2/4)\frac{\partial^2}{\partial y^2} - a(x,y)\right] u(x,y) = f(x,y), \qquad 0\le x \le L_x, \; 0\le y \le L_y. \end{equation} This PDE which is obtained from the $\hat s-\alpha$ model \cite[Eq.10]{salpha} is \emph{elliptic} for any value of $\tau$. The resulting stencil is changed from the previous stencil \ref{eq:stencil} to \begin{equation} \label{eq:new_stencil} S_{ij} = \frac{1}{h_x^2} \begin{bmatrix} -\tau\alpha/4 & \alpha^2(1+\tau^2/4) & \tau\alpha/4 \\ 1 & -2\left[1+\alpha^2(1+\tau^2/4)\right]-h_x^2a_{ij} & 1 \\ \tau\alpha/4 & \alpha^2(1+\tau^2/4) &-\tau\alpha/4 \\ \end{bmatrix} . \end{equation} Note that the \emph{anisotropy} of the resulting Finite Difference discretization is now $\alpha^2(1+\tau^2/4)$ and could be controlled by adjusting both the mesh aspect ratio $\alpha$ and the \emph{shear} term $\tau$. Numerical calculations show that the multigrid $V$-cycles do always converge, as shown in Table~\ref{tab:new_mixedterm}. \begin{table}[htb] \centering \begin{tabular}{|l|l|c|c|c|c|c|c|c|}\hline &Grid size & $\tau=0$ & $\tau=1$ & $\tau=2$ & $\tau=4$ & $\tau=8$ & $\tau=16$ \\ \hline \multirow{3}{*}{\texttt{\textbf{DDDD}}} & $128\times 512$ &4 & 4 & 5 & 6 & 9 & 20 (6) \\ & $256\times 1024$ &4 & 4 & 5 & 6 & 11 & 25 (8)\\ & $512\times 2048$ &4 & 4 & 5 & 7 & 12 & 29 (8)\\ \hline\hline \multirow{3}{*}{\texttt{\textbf{NNDD}}} & $128\times 512$ &4 & 4 & 4 & 5 & 8 & 17 (5) \\ & $256\times 1024$ &4 & 5 & 5 & 5 & 8 & 19 (6) \\ & $512\times 2048$ &4 & 4 & 4 & 5 & 7 & 18 (6) \\ \hline \end{tabular} \caption{Effects of the mixed derivative term $\tau$ on the performances of the $V(3,3)$ cycle. In theses runs, $a(x,y)$ is given by Eq.~\ref{eq:density}. The mesh aspect ratio $\alpha=0.5$ was used. On the last column and shown in parenthesis are the numbers of $V(3,3)$ cycles when $\alpha$ is reduced to 0.125 by increasing the length $L_y$ while keeping all the other values fixed.} \label{tab:new_mixedterm} \end{table} \section{Parallel Multigrid} In order to maximize the parallel efficiency and the flexibility of utilization, a two-dimensional domain partition scheme is chosen to parallelize the multigrid solver. As shown below, generalization of this procedure for higher dimensions is straightforward. \subsection{Distributed grid coarsening} The coarsening algorithm can be summarized as follow: \begin{itemize} \item Partition the grid points on each dimension at the \emph{finest} grid level, as evenly as possible. \item The range for each sub-grid, using \emph{global indexing} is thus specified by $[s,e]$, with $s=0$ for the first sub-grid and $e=N$ for the last sub-grid, $N$ being the number of grid intervals. \item The next coarse sub-grid is thus obtained by discarding all the \emph{odd} indexed grid points, as in the serial case. \item This process can continue (as long as the total of number of intervals is even) until there exists a prescribed \emph{minimum} number of grid points on any sub-grid is reached. \end{itemize} \subsection{Matrix-free formulation} Using standard \emph{matrix} to represent the discretized 2D (or higher dimensional) operators imply an \emph{one-dimensional numbering} of the grid nodes. For example on a 2D $N_x\times N_y$ grid, the 1D numbering of the node $(x_{i_1},y_{i_2})$ could be defined as \[ k=i_1+i_2\times N_x, \quad i_1=0:N_x,\;i_2=0:N_y. \] However, using 2D domain partition defined by \begin{equation} \label{eq:2dnumber} i_1=s_1:e_1,\;i_2=s_2:e_2, \end{equation} with $s=(s_1,s_2)$ and $e=(e_1,e_2)$ denoting respectively the \emph{starting} and \emph{ending} indices of a rectangular sub-domain, result in a \emph{non-contiguous} set of the indices ${k}$ and in a complicate structure of the partitioned matrix for the linear operator. On the other hand, using the \emph{stencil notation} introduced in \cite[chap. 5.2]{Wesseling} based on the \emph{multidimensional} node labeling as defined by (\ref{eq:2dnumber}) for a 2D problem, one can define a simple data structure for the partitioned operator, $A(i,\delta)$, where the $d$-tuple $i=(i_1,\ldots,i_d)$ represents a node of the $d$-dimensional grid and the $d$-tuple $\delta=(\delta_1,\ldots,\delta_d)$, the \emph{distance} between the connected nodes. The result of $\mathbf{A}u$ can thus be defined as \begin{equation} \label{eq:vmx} (\mathbf{A}u)_i = \sum_{\delta\in\mathbb{Z}^d} A(i,\delta)u_{i+\delta}, \quad i=s:e. \end{equation} In (\ref{eq:vmx}), the sum is performed over all indices $\delta$ such that $A(i,\delta)$ is non-zero. For the 2D nine-point stencil defined in (\ref{eq:stencil}), the 2-tuple $\delta$ can be specified as the 9 columns of the following \emph{structure} matrix \begin{equation} \label{5points} S_\delta = \left( \begin{array}{rrrrrrrrr} 0 & -1 & 0 & 1 & -1 & 1 & -1& 0 & 1 \\ 0 & -1 & -1&-1 & 0 & 0 & 1 & 1 & 1 \\ \end{array} \right). \end{equation} In the general case of a $d$-dimensional grid and $\mathcal{N}$ point stencil, $S_\delta$ is a $d\times\mathcal{N}$ matrix. By noting that the subscript $i+\delta$ of $u$ on the right hand side of (\ref{eq:vmx}) should be in the range $[0,N]$ \emph{only} for sub-domains which are \emph{adjacent} to the boundary, one can deduce that for a \emph{fixed} $\delta$, the lower and upper bounds of the indices $i$ should be \begin{equation} \begin{split} i_{\mbox{min}} &= \max (0, -\delta, s), \\ i_{\mbox{max}} &= \min (N, N-\delta, e) \\ \end{split} \end{equation} where $N=(N_1,N_2,\ldots,N_d)$ specify the number of intervals, since, for sub-domains \emph{not adjacent} to the boundary, $u$ should include values at the \emph{ghost} cells $s-g$ and $e+g$ where $g$ is given by \begin{equation} g = \max|S_\delta| \end{equation} with the operator max taken along the \emph{rows} of the matrix. The formula defined in (\ref{eq:vmx}) can then be implemented as in the \emph{pseudo} Fortran code \par \addvspace{\medskipamount} \nopagebreak\hrule \begin{lstlisting}[mathescape] do k=1,SIZE($S_{\delta}$,2) ! loop over the stencil points $\delta$ = $S_{\delta}$(:,k) lb = MAX(0,-$\delta$,$s$) ub = MIN($N$,$N-\delta$,e) do i=lb,ub Au(i) = Au(i) + A(i,$\delta$)*u(i+$\delta$) enddo enddo \end{lstlisting} \nopagebreak\hrule \addvspace{\medskipamount} On the other hand, if the values of $u$ at the ghost cells of the sub-domains \emph{adjacent} to the boundary are set to 0 \begin{equation*} u_{-g} = u_{N+g} = 0, \end{equation*} the lower and upper bounds of the inner loop can be simply set to $lb=s$ and $ub=e$. Note that the inner loop should be interpreted as $d$ nested loops over the $d$-tuple $i=(i_1,\ldots,i_d)$ for a $d$-dimensional problem. \subsection{Inter-grid transfers} \subsubsection{Restriction} Using the definition in the first equation of (\ref{eq:2dintergrid}) together with (\ref{eq:1drestriction}), the 2D restriction operator can be represented by the following 9-point stencil: \begin{equation} \label{eq:2drestriction} \mathbf{R}_i = \frac{1}{16} \begin{pmatrix} 1 & 2 & 1 \\ 2 & 4 & 2 \\ 1 & 2 & 1 \\ \end{pmatrix}, \end{equation} and the restriction of $f$ can be computed as \begin{equation} \bar{f}_i = (\mathbf{R}f)_i = \sum_{\delta\in\mathbb{Z}^2} R(i,\delta)f_{2i+\delta}, \quad i=\bar{s}:\bar{e}, \end{equation} where $\bar{s},\bar{e}$ denote the partitioned domain boundary indices on the \emph{coarse} grid, using the same algorithm described previously. \subsubsection{BC for the restriction operator} \label{sec:restrict_bc} Dirichlel boundary conditions can be imposed by modifying the \emph{restriction stencil} on each of the four boundaries as follow: \begin{equation} \mathbf{R}_{0,.} = \frac{1}{16}\begin{pmatrix} 1 & 2 & 0 \\ 2 & 4 & 0 \\ 1 & 2 & 0 \\ \end{pmatrix},\quad \mathbf{R}_{N_x,.} = \frac{1}{16}\begin{pmatrix} 0 & 2 & 1 \\ 0 & 4 & 2 \\ 0 & 2 & 1 \\ \end{pmatrix},\quad \mathbf{R}_{.,0} = \frac{1}{16}\begin{pmatrix} 0 & 0 & 0 \\ 2 & 4 & 2 \\ 1 & 2 & 1 \\ \end{pmatrix},\quad \mathbf{R}_{.,N_y} = \frac{1}{16}\begin{pmatrix} 1 & 2 & 1 \\ 2 & 4 & 2 \\ 0 & 0 & 0 \\ \end{pmatrix}. \end{equation} With the natural Neumann BC, no change of the restriction operator is needed. \subsubsection{Prolongation} Stencil notation for \emph{prolongation} operators is less obvious to formulate, see \cite[chap. 5.2]{Wesseling}. A more straightforward implementation is obtained in the 2D case, by simply applying \emph{bilinear interpolation} on the \emph{coarse grid}: \begin{equation} \label{eq:2dprolongation} \begin{split} (\mathbf{P}\bar{u})_{2i} &= \bar{u}_{i}, \\ (\mathbf{P}\bar{u})_{2i+e_1} &= (\bar{u}_{i} + \bar{u}_{i+e_1})/2, \quad (\mathbf{P}\bar{u})_{2i+e_2} = (\bar{u}_{i} + \bar{u}_{i+e_2})/2, \\ (\mathbf{P}\bar{u})_{2i+e_1+e_2} &= (\bar{u}_{i} + \bar{u}_{i+e_1} + \bar{u}_{i+e_2} + \bar{u}_{i+e_1+e_2})/4, \\ \end{split} \end{equation} \subsection{Relaxations} While the Gauss-Seidel proves to be more efficient, the damped Jacobi method, at least for a first version of the parallel multigrid solver, is used because it is straightforward to \emph{parallelize}. The same undamped Jacobi (with $\omega=1$) with a \emph{few} number of iterations is also used to solve the linear system at the coarsest mesh as prescribed by the multigrid $V$-cycle procedure defined in section \ref{sec-mgProc}. \subsection{Local vectors and stencils} All local vectors (used to represent solutions or right-hand-sides) contain \emph{ghost cells} and are implemented using 2D arrays, for example \[\mbox{\texttt{sol(s(1)-1:e(1)+1,s(2)-1:e(2)+1)}}\] for the solution vector. The partitioned stencils are defined only for the \emph{local} grid points, without the ghost cells. Thus, before each operation on the local vectors, an exchange (or update) of the values on the ghost cells is performed. As a result, all the memory required by the solver is completely partitioned, except for the space used by the ghost cells. \subsection{Numerical Experiments} In this section, all the numerical experiments are conducted on \texttt{helios.iferc-csc.org}, using the Intel compiler version 13.1.3 and bullxpmi-1.2.4.3. The \emph{stopping criteria} for the $V$-cycles is based on the absolute and relative residual norms as well as the discretization error norm as defined in section \ref{sec:NumExp1}. In cases where the analytic solution is not known, the latter can be replaced by some norm of the solution. \subsubsection{Strong scaling} \begin{figure}[htb] \centering \includegraphics[angle=0,width=\hsize]{\RepFigures/strong_256x1024_DDD} \caption{DDDD problem for a $256\times 1024$ size, using multigrid $V(3,3)$ cycles. Different times for a given number of processes are obtained with different combinations of processes in each dimension. The number of grid levels are fixed to 6. Five Jacobi iterations are used at the coarsest grid.} \label{fig:strong_scal_small} \end{figure} \begin{figure}[htb] \centering \includegraphics[angle=0,width=\hsize]{\RepFigures/strong_512x2048_DDD} \caption{DDDD problem for a $512\times 2048$ size using multigrid $V(3,3)$ cycles. The \textcolor{red}{red marker} on the left shows the time for the serial multigrid solver. Different times for a given number of processes are obtained with different combinations of processes in each dimension. The number of grid levels are fixed to 6. Five Jacobi iterations are used at the coarsest grid.} \label{fig:strong_scal} \end{figure} Here 2 \emph{fixed} problem sizes are considered: \begin{itemize} \item A small size with the (fine) grid of $256\times 1024$ shown in Fig.~\ref{fig:strong_scal_small} and \item a larger size of $512\times 2048$ in Fig.~\ref{fig:strong_scal}. \end{itemize} In both cases $\mbox{\tt rtol}=10^{-8}$ and $\mbox{\tt etol}=10^{-3}$. It was checked that the results do not change when more than 5 Jacobi iterations are used at the coarsest mesh. Notice that for the small problem, the parallel efficiency starts to degrade at 32 MPI processes while for the larger case, this happens after 64 MPI processes. This can be explained by the ghost cell exchange communication overhead: denoting $N_1$ and $N_2$, the number of grid points in each direction and $P_1$ and $P_2$ the number of MPI processes in each direction, the ratio $S/V$ between the number of ghost points and interior grid points for each local subdomains can be estimated as \begin{equation} \label{eq:comm_overhead} S/V\simeq\frac{2(N_1/P_1+N_2/P_2)}{N_1N_2/P_1P_2} = 2\left(P_1/N_1+P_2/N_2\right). \end{equation} This ratio increases as the number MPI processes increases while keeping the problem size fixed. On very coarse grids, this communication cost can become prohibitive. For this reason, in all the runs shown here, the number of grid points on each direction for the coarsest grid is limited to 2. \subsubsection{Weak Scaling} \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/weak_DDDD} \caption{Weak scaling for a DDDD problem, using multigrid $V(3,3)$ cycles. The number of grid levels are fixed to 7. The solver for the coarsest grid uses 5 Jacobi iterations except for the 2 largest cases which require respectively 20 and 100 iterations to converge. The 2 sets of curves on the right figure show respectively the timings with and without the calculations of the residual norm and discretization error which require both a \emph{global reduction}.} \label{fig:weak_scal_DDDD} \end{figure} \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/weak_NNDD} \caption{Weak scaling for a NNDD problem, using multigrid $V(3,3)$ cycles. The number of grid levels are fixed to 7. The solver for the coarsest grid uses 5 Jacobi iterations except for the 2 largest cases which require respectively 20 and 100 iterations to converge. The 2 sets of curves on the right figure show respectively the timings with and without the calculations of the residual norm and discretization error which require both a \emph{global reduction}.} \label{fig:weak_scal_NNDD} \end{figure} According to Eq.~\ref{eq:comm_overhead}, varying the problem size together with the number of MPI processes by keeping $N_1/P_1$ and $N_2/P_2$ constant should yield a \emph{constant scaling}, provided that the convergence rate does not depend on the problem sizes. The results for the \texttt{DDDD} and \texttt{NNDD} problems are shown in Fig.~\ref{fig:weak_scal_DDDD} and Fig.~\ref{fig:weak_scal_NNDD}. The left part of the figures shows that the convergence rate depends only weakly on the problem sizes, which leads indeed to a (almost) constant time obtained for numbers of MPI processes $P$ between 16 and 1024 . The reason for the good timings for smaller $P$ is simply that there are only 2 ghost cell exchanges for $P=2\times 2$ (instead of 4 for $P\ge 16$) and that there is no exchange for $P=0$. \section{Non-homogeneous Boundary Conditions} \subsection{Non-homogeneous Dirichlet Conditions} Non-homogeneous Dirichlet boundary conditions can be imposed on all the Dirichlet faces simply by \emph{clearing}, as for the \emph{homogeneous case}, the matrice rows and columns and setting its diagonal term to 1. Moreover, the corresponding corresponding \emph{right-hand-side} should be set to: \begin{equation} \begin{split} f_{0,j} &= D^W(y_j), \quad f_{N_x,j}=D^E(y_j), \qquad j=0,\ldots,N_y, \\ f_{i,0} &= D^S(x_i), \quad f_{j,N_y}=D^N(x_i), \qquad i=0,\ldots,N_x, \\ \end{split} \end{equation} where $D^W, D^E, D^S, D^N$ are the values of $u$ at the 4 Dirichlet faces. As for the homogeneous Dirichlet BC, the \emph{restriction} operator should be changed as described in section \ref{sec:restrict_bc} while the \emph{prolongation} defined in (\ref{eq:2dprolongation}) remains unchanged. \subsection{Non-homogeneous Neumann Conditions} The non-homogeneous Neumann conditions at the 4 faces $x=0$ can be defined as \begin{equation} \begin{split} \left.\frac{\partial u}{\partial x}\right|_{x=0} &= N^W(y), \quad \left.\frac{\partial u}{\partial x}\right|_{x=L_x} = N^E(y), \\ \left.\frac{\partial u}{\partial y}\right|_{y=0} &= N^S(x), \quad \left.\frac{\partial u}{\partial y}\right|_{y=L_y} = N^N(x). \\ \end{split} \end{equation} Discretization of the BC defined above, using the \emph{central difference} yields on the 4 faces \begin{equation} \begin{split} u_{-1,j} &= u_{1,j} -2h_xN^W(y_j), \quad u_{N_x+1,j} = u_{N_x-1,j} + 2h_xN^E(y_j), \qquad j=0,\ldots,N_y, \\ u_{i,-1} &= u_{i,1} -2h_yN^S(x_i), \quad u_{i,N_y+1} = u_{i,N_y-1} + 2h_yN^N(x_i), \qquad i=0,\ldots,N_x. \\ \end{split} \end{equation} With these relations, the stencil (\ref{eq:new_stencil}) on the 4 boundaries is modified as follow \begin{equation} \begin{split} S^W &= \frac{1}{h_x^2} \begin{bmatrix} 0 &\alpha^2(1+\tau^2/4) & 0 \\ 0 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{0,j} & 2 \\ 0 &\alpha^2(1+\tau^2/4) & 0 \\ \end{bmatrix} ,\quad S^E = \frac{1}{h_x^2} \begin{bmatrix} 0 &\alpha^2(1+\tau^2/4) & 0 \\ 2 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{N_x,j} & 0 \\ 0 &\alpha^2(1+\tau^2/4) & 0 \\ \end{bmatrix} , \\ S^S &= \frac{1}{h_x^2} \begin{bmatrix} 0 & 2\alpha^2(1+\tau^2/4) & 0 \\ 1 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{i,0} & 1 \\ 0 & 0 & 0 \\ \end{bmatrix} ,\quad S^N = \frac{1}{h_x^2} \begin{bmatrix} 0 & 0 & 0 \\ 1 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{i,N_y} & 1 \\ 0 & 2\alpha^2(1+\tau^2/4) & 0 \\ \end{bmatrix} , \\ \end{split} \end{equation} while the right-hand-side should be changed according to \begin{equation} \begin{split} f_{0,j} &\longleftarrow f_{0,j} + \frac{2}{h_x}\left[\frac{\tau\alpha}{4}\,N^W(y_{j-1}) + N^W(y_j) - \frac{\tau\alpha}{4}\,N^W(y_{j+1})\right], \\ f_{N_x,j} &\longleftarrow f_{N_x,j} + \frac{2}{h_x}\left[\frac{\tau\alpha}{4}\,N^E(y_{j-1}) - N^E(y_j) - \frac{\tau\alpha}{4}\,N^E(y_{j+1})\right], \\ f_{i,0} &\longleftarrow f_{i,0} + \frac{2}{h_y}\left[\frac{\tau}{4\alpha}\,N^S(x_{i-1}) + (1+\tau^2/4)N^S(x_i) - \frac{\tau}{4\alpha}\,N^S(x_{i+1})\right], \\ f_{i,N_y} &\longleftarrow f_{i,N_y} + \frac{2}{h_y}\left[\frac{\tau}{4\alpha}\,N^N(x_{i-1}) - (1+\tau^2/4)N^N(x_i) - \frac{\tau}{4\alpha}\,N^N(x_{i+1})\right]. \\ \end{split} \end{equation} \subsection{The NNDD test problem} In order to test the discretization of the non-homogeneous boundary conditions as formulated above, a test problem with the prescribed \emph{exact} solution \begin{equation} u(x,y) = 1 + \sin\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y}, \qquad \mbox{where $k_x$, $k_y$ are positive integers} \end{equation} and the following non-homogeneous boundary conditions \begin{equation} \begin{split} \left.\frac{\partial u}{\partial x}\right|_{x=0} = \left.\frac{\partial u}{\partial x}\right|_{x=L_x} &= k_x\sin\frac{2\pi k_yy}{L_y}, \\ u(x,0) = u(x,L_y) &= 1, \\ \end{split} \end{equation} is solved with varying grid spacing. The discretization errors versus the number of grid intervals $N_x$ displayed in Fig(\ref{fig:conv_nh_bc} shows a \emph{quadratic} convergence as expected from the second order finite differences used in both the PDE and the Neumann boundary condition discretization. \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/conv_nh_bc} \caption{Convergence of the error $\| u_{calc}- u_{anal}\|_\infty$ wrt the number of intervals in the $x$ direction $N_x$ for the non-homogeneous NNDD problem. Here, $L_x=100$, $L_y=800$, $k_x=k_y=4$, $\tau=1$ and $N_y=4N_x$.} \label{fig:conv_nh_bc} \end{figure} As shown in Fig.(\ref{fig:nndd_nh}), the multigrid $V$-cycles for the \emph{non-homogeneous} problem converge with a slightly smaller efficiency, than the \emph{homogeneous} problem shown in Fig.(\ref{fig:weak_scal_NNDD}). \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/nndd_nh} \caption{Performances of the $V(3,3)$-cycle for the non-homogeneous NNDD problem. The same parameters in Fig.(\ref{fig:conv_nh_bc}) are used here.} \label{fig:nndd_nh} \end{figure} \FloatBarrier \subsection{Local relaxation methods} In addition to the damped Jacobi, three methods of relaxations are added in this parallel multigrid solver: \begin{enumerate} \item The 4 color Gauss-Seidel method (RBGS). \item The Gauss-Seidel method (GS). \item The successive over-relaxation method (SOR). \end{enumerate} In order to apply correctly the parallel 4 color Gauss-Seidel, a complicated ghost cell exchange has to be performed for each sweep for each color. Here we simply apply the method \emph{locally} on each subdomain with only one ghost exchange performed at the beginning of each relaxation. The same procedure is also used for the other 2 methods which are inherently \emph{serial}. All these 3 relaxations are thus only correct if there is only one subdomain. As a consequence, while the damped Jacobi does not depend on the partition of the subdomains, results from these 3 methods do depend on how the domain is partitioned. Table~\ref{tab:advrelax} show however that all of the 3 \emph{approximated} relaxation methods produce a much \emph{faster} convergence rate than the damped Jacobi relaxations for the NNDD test problem considered here. The performance of the implemented solver using the 4 relaxation methods on HELIOS is compared in Fig,(\ref{fig:weakhelios}. The bad performance of the 4 color Gauss-Seidel relaxations (RBGS) can be explained by the 4 nested loops required to sweep each of the 4 colors. \begin{table}[hbt] \centering \begin{tabular}{|l||r|r|r|r|r|r|}\hline Grid Sizes & $256\times 1024$ & $512\times 2048$ & $1024\times 4096$ & $2048\times 8192$ & $4096\times 16384$ & $8192\times 32768$ \\ \hline Process topology & \multicolumn{1}{c|} {$1\times 1$} & \multicolumn{1}{c|} {$2\times 2$} & \multicolumn{1}{c|} {$4\times 4$} & \multicolumn{1}{c|} {$8\times 8$} & \multicolumn{1}{c|} {$16\times 16$} & \multicolumn{1}{c|} {$32\times 32$} \\ \hline Jacobi $\omega=0.9$ & 0.22 & 0.24 & 0.24 & 0.24 & 0.24 & 0.25 \\ RBGS & 0.05 & 0.07 & 0.10 & 0.10 & 0.12 & 0.12 \\ GS & 0.07 & 0.08 & 0.10 & 0.11 & 0.11 & 0.12 \\ SOR $\omega=1.2$ & 0.04 & 0.05 & 0.07 & 0.07 & 0.07 & 0.08 \\ \hline \end{tabular} \caption{Reduction factor for the residuals (obtained as the \emph{geometric mean} of all its values except the first 2 values) for the non-homogeneous NNDD test problem. The same parameters as in Fig.(\ref{fig:conv_nh_bc}) are used here.} \label{tab:advrelax} \end{table} \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.9\hsize]{\RepFigures/weak_helios} \caption{Performance of the 4 relaxations on the non-homogeneous NNDD problem. The same parameters in Fig.(\ref{fig:conv_nh_bc}) are used here. The grid sizes used in this \emph{weak scaling} run are shown in Table~\ref{tab:advrelax}.} \label{fig:weakhelios} \end{figure} \FloatBarrier \section{Performance of the Stencil Kernel on different platform} To get a feeling on the performances gained on the different platforms and how well the compilers (with their auto-vectorization capability) support these platforms, the following Fortran \emph{9-point stencil} kernel has been used. The OpenMP directives are used for parallelization on both Xeon and Xeon Phi while offload to GPU card is done via the high level OpenACC directives. \emph{First touch} is applied in the initialization of \texttt{x} and \texttt{mat}. \begin{lstlisting}[language=Fortran,numbers=left,commentstyle=\color{blue},keywordstyle=\color{red},frame=single] !$omp parallel do private(ix,iy) !$acc parallel loop present(mat,x,y) private(ix,iy) DO iy=0,ny DO ix=0,nx y(ix,iy) = mat(ix,iy,1)*x(ix-1,iy-1) & & + mat(ix,iy,2)*x(ix, iy-1) & & + mat(ix,iy,3)*x(ix+1,iy-1) & & + mat(ix,iy,4)*x(ix-1,iy) & & + mat(ix,iy,0)*x(ix,iy) & & + mat(ix,iy,5)*x(ix+1,iy) & & + mat(ix,iy,6)*x(ix-1,iy+1) & & + mat(ix,iy,7)*x(ix, iy+1) & & + mat(ix,iy,8)*x(ix+1,iy+1) END DO END DO !$acc end parallel loop !$omp end parallel do \end{lstlisting} The performances on a Helios dual processor node and its attached Xeon Phi co-processor are shown in Fig.~\ref{fig:cpu_mic} while the performances on a Cray XC30 CPU and its attached NVIDIA graphics card are shown in Fig.~\ref{fig:cpu_gpu}. In these figures, Intel optimization flag \texttt{-O3} and default Cray optimization were applied. In Fig.~\ref{fig:cpu_mic_O1_O3}, the speedup by vectorization is shown by comparing performances obtained with \texttt{-O3} and \texttt{-O1}. Several observations can be drawn from these results. \begin{itemize} \item The parallel scaling, using OpenMP is linear for both Intel and Cray compilers, when the problem sizes fit into the 20MB cache of the Sandybridge processor. For grid sizes smaller than $32\times8$, the overhead of thread creation dominates. When the memory footprint is larger than the cache, 4 threads per socket already saturate the memory bandwidth. \item On the MIC, the parallel speedup scales linearly up to 60 cores with 1 thread per core. Using 2 or 3 threads per core does not help while with 4 threads, the performance even degrades. \item The MIC, using the Intel \emph{mic native} mode, does not perform better than 8 cores of the Sandybridge processor. \item Since the benefit from \emph{vectorization} is quite large for the MIC (see Fig.~\ref{fig:cpu_mic_O1_O3}), the poor parallel scalability may be explained by the low flop intensity per thread coupled with the high overhead of the (many) thread creation and thread synchronization. \item The NIVIDIA card, using the high level OpenACC programming style is more than 3 times faster than 8 Sandybridge cores, for grid sizes larger than $1024\times256$. For smaller sizes, there are not enough flops to keep the GPU threads busy. \end{itemize} \begin{figure}[htb] \centering \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_mic} \caption{Performance on the Helios dual processor (left) using the \texttt{-O3} compiler option and on the MIC (right), using the native mode \texttt{-mmic}.} \label{fig:cpu_mic} \end{figure} \begin{figure}[htb] \centering \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_gpu} \caption{Performance on a Cray XC30 single 8 core processor node (left) and the NVIDIA card (right) using OpenACC. Default Cray Fortran compiler optimization has been used on both runs.} \label{fig:cpu_gpu} \end{figure} \begin{figure}[htb] \centering \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_mic_O1_O3} \caption{Performance comparison between using \texttt{-O3} and \texttt{-O1}, on the Helios dual processor (left) and on the MIC (right).} \label{fig:cpu_mic_O1_O3} \end{figure} \FloatBarrier \section{Hybrid MPI+OpenMP \texttt{PARMG (r599})} In this version, a \emph{straightforward} parallelization is done in the subroutines \texttt{jacobi, residue, prolong, restrict} and \texttt{norm\_vec}, using the OpenMP work sharing directives. The ghost cell exchange is executed by the \emph{master} thread. All the 2D arrays (solutions, RHS, etc.) are allocated and initialized \emph{once} by the \emph{master} thread. Dynamic array allocations during the multigrid $V$-cycles are thus avoided. To help further optimization, timings are introduced for each of the 4 multigrid components \texttt{jacobi, residue, prolong, restrict} and the ghost cell \texttt{exchange} as well as on the \emph{recursive} subroutine \texttt{mg}. Since the timings of the 4 MG components include already calls to \texttt{exchange}, the time obtained for \texttt{mg} should be equal to the sum of the 4 MG components and the \emph{extras} time which includes operations in \texttt{mg} but not in the 4 components: \begin{equation} \label{eq:timings} t_\text{mg} = t_\text{jacobi} + t_\text{residue} + t_\text{prolong} + t_\text{restrict} + t_\text{extras}. \end{equation} We will see in the following sections that, in addition to these 5 contributions to $t_\text{mg}$, there is \emph{overhead} probably due to the \emph{recursive} calls of \texttt{mg}. \subsection{Parallel efficiency on single node} The comparison in Fig.~\ref{fig:single_node} shows that the pure OpenMP version is at most $30\%$ slower than the pure MPI version when all the 16 cores are used but less than $10\%$ when only one socket is used. The degradation of the OpenMP version can be explained by the \emph{numa} effects when 2 sockets are used. It is also observed that the performance level off at 4 cores, due to the saturation of the socket memory bandwidth. \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/single_node} \caption{Parallel performance of the 7 level $V(3,3)$-cycle on a dual socket Helios node ($2\times 8$ cores) for pure OpenMP and pure MPI. The non-homogeneous NNDD problem with the same parameters in Fig.~\ref{fig:conv_nh_bc} is considered here. The OpenMP threads and MPI tasks are placed first on the first socket before filling the second socket, using the \texttt{srun} option ``\texttt{--cpu\_bind=cores -m block:block}'' and the environment variable \texttt{OMP\_PROC\_BIND=true}.} \label{fig:single_node} \end{figure} \subsection{Hybrid efficiency on multi-nodes} In the following multi-node experiments, all the 16 cores on each Helios node are utilized. The numbers of OpenMP threads \emph{per} MPI \emph{process} NT, the number of MPI processes \emph{per node} NP, the number of nodes NNODES and the \emph{total} number of MPI processes $\text{NP}_{tot}$ verify thus the following relations: \begin{equation} \begin{gathered} 1\le\text{NT} \le 16, \qquad 1\le\text{NP} \le 16 \\ \text{NT}\times\text{NP} = 16 \\ \text{NP}_{tot} = 16\times\text{NNODES}/\text{NT} \\ \end{gathered} \end{equation} The times of the different MG components and the relative contributions for the \emph{strong scaling} experiments using a $1024\times 4096$ grid size, are shown in Fig.~\ref{fig:hybrid_strong} and Fig.~\ref{fig:hybrid_strong_contrib} respectively. The following observations can be made: \begin{enumerate} \item The \texttt{exchange} time increases strongly with increasing NNODES, due to smaller partitioned subdomains and thus their larger surface/volume ratio. \item The pure MPI (NT=1) \texttt{exchange} time is on the other hand reduced with $\text{NT}>1$ since the local partitioned grid becomes larger. \item The less efficient OpenMP parallelization (numa effects, Amdahl's law) tends to limit however this advantage. \item As a result, there is an optimal NT for a given NNODES: 2 for 4 and 16 nodes, 8 for 64 nodes. \item The \texttt{jacobi} and \texttt{residue} contributions dominate largely with $0.63\le t_\text{jacobi}/t_\text{mg}\le 0.83$ and $0.09\le t_\text{residue}/t_\text{mg}\le 0.18$. \item The \emph{overhead} (see Eq.~\ref{eq:timings}) times increase with NNODES but decrease slightly for increasing NT. \end{enumerate} The times of the different MG components and the relative contributions for the \emph{weak scaling} experiments are shown in Fig.~\ref{fig:hybrid_weak} and Fig.~\ref{fig:hybrid_weak_contrib} respectively. The following observations can be made: \begin{enumerate} \item A steady increase of MG times with the number of nodes can be attributed to the increase of ghost cells \texttt{exchange} time, even though the amount of communications between nodes does not change. \item The MG performance is improved slightly when NT=2 but drop drastically for NT=16. This seems to indicate that \emph{numa} effects are important here, since the array initialization is not done locally on each thread. \item The \emph{overhead} (see Eq.~\ref{eq:timings}) times are much smaller than in the \emph{strong scaling} runs. \end{enumerate} \begin{sidewaysfigure} \centering \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_strong} \caption{Detailed timings for strong scaling experiments using the same problem parameters as in Fig.~\ref{fig:single_node}, except that 5 levels are chosen to be able to run the runs with 64 nodes.} \label{fig:hybrid_strong} \end{sidewaysfigure} \begin{sidewaysfigure} %\begin{figure}[htb] \centering \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_strong_contrib} \caption{Relative contributions of each of the MG components for the strong scaling experiments using the same problem parameters as in Fig.~\ref{fig:single_node}.} \label{fig:hybrid_strong_contrib} %\end{figure} \end{sidewaysfigure} \begin{sidewaysfigure} %\begin{figure}[htb] \centering \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_weak} \caption{Detailed timings for weak scaling experiments using the same problem parameters as in Fig.~\ref{fig:single_node}.} \label{fig:hybrid_weak} %\end{figure} \end{sidewaysfigure} \begin{sidewaysfigure} %\begin{figure}[htb] \centering \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_weak_contrib} \caption{Relative contributions of each of the MG components for the weak scaling experiments using the same problem parameters as in Fig.~\ref{fig:single_node}.} \label{fig:hybrid_weak_contrib} %\end{figure} \end{sidewaysfigure} Finally, Table~\ref{tab:memory} shows that using $\text{NT}>1$ decreases the memory needed by the multigrid procedure for both strong and weak scaling runs. \begin{table}[htb] \begin{center} \begin{tabular}{|l|c||r|r|r|r|r|} \hline &\texttt{\textbf{NNODES}}& NT=1 & NT=2 & NT=4 & NT=8 & NT=16 \\ \hline \multirow{4}{*}{\texttt{\textbf{Strong Scaling}}} &1 & 57.01 & 53.70 & 52.04 & 51.06 & 48.57 \\ &4 & 21.87 & 21.49 & 15.75 & 13.87 & 13.11 \\ &16 & 13.82 & 8.52 & 5.75 & 5.69 & 4.00 \\ &64 & 13.93 & 6.80 & 3.73 & 2.32 & 1.48 \\ \hline\hline \multirow{4}{*}{\texttt{\textbf{Weak Scaling}}} &1 & 57.03 & 53.71 & 52.04 & 51.08 & 48.61 \\ &4 & 59.24 & 59.18 & 53.39 & 51.52 & 48.69 \\ &16 & 60.48 & 55.33 & 52.69 & 52.74 & 49.06 \\ &64 & 63.30 & 56.13 & 53.30 & 51.58 & 48.79 \\ \hline \end{tabular} \end{center} \caption{Memory footprint \emph{per core} (MB/core) for the strong scaling and weak scaling experiments.} \label{tab:memory} \end{table} \subsection{Summary and conclusions} The \emph{strong scaling} and \emph{weak scaling} wrt NT and NNODES are summarized in Fig.~\ref{fig:scaling}. The speed up for the strong scaling experiments shows a good efficiency up to 16 nodes for all NT but degrades at 64 nodes (1024 cores) due the partitioned grid becoming too small. A good \emph{weak scaling} is also obtained with an increase in $t_\text{mg}$ of less than $10\%$ when NNODES vary from 4 to 64. However, for NT=16, the efficiency drops significantly, due to the non-local memory access when the OpenMP threads are placed on both sockets (\emph{numa} effect). In order to improve the hybrid MPI+OpenMP multigrid, especially for large number of threads per MPI process NT, the following optimizations should be done: \begin{itemize} \item \emph{First touch} array initialization in order to avoid \emph{numa} effects. \item OpenMP parallelization of some remaining \emph{serial} loops. \item Better vectorization of inner loops. \end{itemize} The outcome of these optimization steps is important in order to run efficiently on upcoming \emph{multicore} processors and \emph{manycore} (MIC) devices. \begin{figure}[htb] \centering \includegraphics[angle=0,width=0.9\hsize]{\RepFigures/scaling} \caption{Strong scaling with a $1024\times 4096$ grid size (left) and weak scaling (right) with grid sizes $1024\times 4096,10248\times 8192, 4096\times 16384$ and $8192\times32768$ respectively for 1, 4, 16 and 64 nodes.} \label{fig:scaling} \end{figure} \FloatBarrier \pagebreak \begin{thebibliography}{99} \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A Multigrid Tutorial, Second Edition, SIAM (2000)}. \bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/}. \bibitem{MG1D} {\tt Multigrid Formulation for Finite Elements},\\ \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/multigrid/docs/multigrid.pdf} \bibitem{TEMPL} {R. Barrett, M. Berry, T. F. Chan, J. Demmel, J. Donato, J. Dongarra, V. Eijkhout, R. Pozo, C. Romine and H. Van der Vorst, Templates for the Solution of Linear Systems: Building Blocks for Iterative Methods, 2nd Edition , SIAM, (1994)}. \bibitem{Wesseling} {P.~Wesseling, An Introduction to Multigrid Methods, Edwards, 2004}. \bibitem{salpha} {X. Lapillonne, S. Brunner, T. Dannert, S. Jolliet, A. Marinoni et al., Phys. Plasmas 16, 032308 (2009)}. \end{thebibliography} \end{document} diff --git a/multigrid/docs/multigrid.tex b/multigrid/docs/multigrid.tex index a8f694d..4af4d6b 100644 --- a/multigrid/docs/multigrid.tex +++ b/multigrid/docs/multigrid.tex @@ -1,949 +1,949 @@ % % @file multigrid.tex % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % \documentclass[a4paper]{article} \usepackage{linuxdoc-sgml} \usepackage{graphicx} \usepackage{hyperref} \usepackage{amsmath} %\usepackage{verbatim} %\usepackage[notref]{showkeys} \title{\tt Multigrid Formulation for Finite Elements.} \author{Trach-Minh Tran, Stephan Brunner} \date{v0.2, October 2012} \abstract{A multigrid formulation for finite elements is derived, using variational principles. More specifically the grid transfer operators will be derived and tested in 1D Cartesian, cylindrical and spherical geometry for arbitrary order B-Splines.} \begin{document} \maketitle \tableofcontents \section{The discretized problem} Consider the one-dimensional linear integro-differential problem \begin{equation} \label{eq:oned_prob} \mathcal{L}(u) = f, \qquad 0\le x\le L, \end{equation} with suitable boundary conditions. On an \emph{equidistant} mesh with interval $h=L/N$ and using the \emph{weak form} of Eq.~(\ref{eq:oned_prob}), the linear system to be solved on this grid (which will be referred as the \emph{fine} grid) can be written as (see \cite{SOLVERS}, \cite{BSPLINES}): \begin{equation} \label{eq:fine} \sum_{i'=1}^{N+p}A_{ii'}^hu^h_{i'} = b^h_i, \qquad A^h_{ii'}=\int_{0}^{L}\Lambda^h_i \mathcal{L}(\Lambda^h_{i'})\,x^{\alpha}dx, \qquad b^h_i = \int_{0}^{L}f\Lambda^h_i\,x^{\alpha}dx, \end{equation} where $p$ is the order the Splines $\Lambda^h_i$ and $\alpha=0,1,2$ for Cartesian, cylindrical and spherical coordinates respectively. It should be noted that the unknowns $u^h_i$ of this linear system are the \emph{expansion coefficients} of the discretized solution of the problem $u^h(x)$ \begin{equation} u^h(x) = \sum_{i'=1}^{N+p}u^h_{i'}\Lambda^h_{i'}(x) \end{equation} and the right hand sides $b^h_i$ are defined as the \emph{projection} of $f(x)$ on the same basis functions, in contrast with the Finite Differences (FD) or Finite Volume (FV) formulations where $u^h_i$ and $b^h_i$ are the \emph{nodal values} of $u$ and $f$. On the \emph{coarser} mesh with interval $2h=2L/N$, the discretized linear system can be written as \begin{equation} \label{eq:coarse} \sum_{i'=1}^{N/2+p}A_{ii'}^{2h}u^{2h}_{i'} = b^{2h}_i, \qquad A^{2h}_{ii'}=\int_{0}^{L}\Lambda^{2h}_i \mathcal{L}(\Lambda^{2h}_{i'})\,x^{\alpha}dx, \qquad b^{2h}_i = \int_{0}^{L}f\Lambda^{2h}_i\,x^{\alpha}dx. \end{equation} \section{Transfer operators} \label{sec:twogrid} For simplicity let consider the two-grid procedure \cite{Briggs} which can be summarized as follow: \begin{enumerate} \item Obtain an approximation $\mathbf{u}^h$ on the \emph{fine} grid, using a Gauss-Seidel (GS) or a weighted Jacobi scheme. This procedure is also called \emph{smoothing} or \emph{relaxation}. \item Compute the \emph{residuals}: $\mathbf{r}^h=\mathbf{b}^h-\mathbf{A}^h\mathbf{u}^h$. \item Obtain the residuals on the coarse mesh $\mathbf{r^{2h}}$ by \emph{restriction} of $\mathbf{r}^h$. \item Direct solve $\mathbf{A}^{2h}\mathbf{e}^{2h}=\mathbf{r^{2h}}$ to obtain the error $\mathbf{e}^{2h}=\mathbf{u}-\mathbf{u}^{2h}$ \item Interpolate (\emph{prolong}) the error to obtain $\mathbf{e}^h$. \item Correct the approximation obtained on the fine grid: $\mathbf{u}^h\leftarrow \mathbf{u}^h+\mathbf{e}^h$. \item Relax on $\mathbf{A}^{h}\mathbf{u}^{h}=\mathbf{f^{h}}$, using the previously computed $\mathbf{u}^h$ as a guess. \end{enumerate} Steps 3 and 5 are called \emph{grid transfers} and are detailed in the following. It should be noted that the fine to coarse transfer (restriction) applies to the right hand side $\mathbf{b}^h$ while the prolongation applies to the expansion coefficients $\mathbf{u}^{2h}$. \subsection{Fine to coarse grid transfer (restriction)} The right hand side on the fine and coarse grid can be written as \begin{equation*} \begin{split} b^{h}_i &= \int_0^Lf\Lambda^{h}_i\,x^{\alpha}dx = \sum_{i'=1}^{N+p}f^h_{i'}\underbrace{\int_0^L\Lambda^{h}_{i}\Lambda^{h}_{i'}\,x^{\alpha}dx}_ {M^{h,h}_{ii'}}, \\ b^{2h}_i &= \int_0^Lf\Lambda^{2h}_i\,x^{\alpha}dx = \sum_{i'=1}^{N+p}f^h_{i'} \underbrace{\int_0^L\Lambda^{2h}_{i}\Lambda^{h}_{i'}\,x^{\alpha}dx}_{M^{2h,h}_{ii'}} \\ \end{split} \end{equation*} where the expansion $f(x)=\sum_{i=1}^{N+p}f^h_i\Lambda^h_i(x)$ has been used. Elimination of $\mathbf{f^h}$ leads to the definition of the \emph{restriction} matrix: \begin{equation} \label{eq:restriction} \mathbf{b}^{2h} = \mathbf{R}^{2h}_{h}\mathbf{b}^h, \qquad \boxed{\mathbf{R}^{2h}_{h}=\mathbf{M}^{2h,h}(\mathbf{M}^{h,h})^{-1}}. \end{equation} Note that the computation of the \emph{mass matrices} $\mathbf{M}^{h,h}$ and $\mathbf{M}^{2h,h}$ can be done \emph{exactly} using a Gauss integration with $N_G=\lceil p+(\alpha+1)/2 \rceil$ points. Another way to derive the restriction operator $\mathbf{R}^{2h}_{h}$ is by noting that the basis functions $\Lambda^{2h}_{i}$ are \emph{piecewise} $C^{p-1}_h$ \emph{polynomials} with \emph{breaks} on the fine grid points $x_i=ih$, and thus can be expressed \emph{uniquely} as \begin{equation} \Lambda^{2h}_{i}(x) = \sum_{i'=1}^{N+p}c_{ii'}\Lambda^{h}_{i'}(x), \quad i=1\ldots N/2+p. \end{equation} Projecting this equation on the basis $\Lambda^{h}_j$ then leads to \begin{equation*} \begin{split} \sum_{i'=1}^{N+p}c_{ii'}\int_0^L\Lambda^{h}_{i'}\Lambda^{h}_{j}\,x^{\alpha}dx &= \int_0^L\Lambda^{2h}_{i}\Lambda^{h}_{j}\,x^{\alpha}dx, \qquad i=1\ldots N/2+p, \quad j=1,\ldots N+p \\ \Longrightarrow \mathbf{c}\cdot\mathbf{M}^{h,h} &= \mathbf{M}^{2h,h} \Longrightarrow \mathbf{c} = \mathbf{M}^{2h,h}(\mathbf{M}^{h,h})^{-1} = \mathbf{R}^{2h}_{h} \\ \end{split} \end{equation*} and finally \begin{equation} \label{eq:restrict_gen} \boxed{\Lambda^{2h}_{i}(x) = \sum_{i'=1}^{N+p}\left(\mathbf{R}^{2h}_{h}\right)_{ii'}\Lambda^{h}_{i'}(x), \quad i=1\ldots N/2+p} \end{equation} Because the expansion coefficients $c_{ii'}$ of $\Lambda^{2h}_{i}(x)$ (rows of the restriction matrix $\mathbf{R}^{2h}_{h}$) on the fine mesh basis are \emph{unique}, $\mathbf{R}^{2h}_{h}$ should be independent of the geometry exponent $\alpha$ or more generally, of the definition of the \emph{projection} (or scalar product) used to calculate the restriction matrix. Furthermore, since the supports of both $\Lambda^{h}_i$ and $\Lambda^{2h}_i$ are \emph{compact}, the matrix $\mathbf{R}^{2h}_{h}$ should be \emph{sparse}. One can show that, using (\ref{eq:restrict_gen}), the restriction of the fine mesh FE matrix $\mathbf{A}^h$ is given by \begin{equation} \label{eq_coarse_mat} \mathbf{A}^{2h} = \mathbf{R}^{2h}_h\mathbf{A}^{h}\left(\mathbf{R}^{2h}_h\right)^{T}. \end{equation} \subsection{Coarse to fine grid transfer (prolongation)} Let denote the discretized solution on the coarse mesh of $\mathbf{A}^{2h}\mathbf{u}^{2h}=\mathbf{R}^{2h}_{h}\mathbf{b}^h$ by \begin{equation*} u^{2h}(x) = \sum_{i=1}^{N/2+p}u^{2h}_{i}\Lambda^{2h}_{i}(x), \end{equation*} and seek for an approximated solution on the fine mesh $\mathbf{u}^{h}$ \begin{equation*} u^{h}(x) = \sum_{i=1}^{N+p}u^{h}_{i}\Lambda^{h}_{i}(x). \end{equation*} by \emph{prolongation} of $\mathbf{u}^{2h}$ (instead of solving $\mathbf{A}^{h}\mathbf{u}^{h}=\mathbf{b}^h$). A reasonable solution is to \emph{minimize} the square of the error norm defined as \begin{equation*} \begin{split} \epsilon^2 &= \|u^{h}(x)-u^{2h}(x)\|^2 \equiv \int_0^L [u^{h}(x)-u^{2h}(x)]^2\,x^\alpha dx, \\ \frac{\partial\epsilon^2}{\partial u^h_i} &=0 \Longrightarrow \sum_{i'=1}^{N+p}u^{h}_{i}\int_0^L \Lambda^{h}_{i}\Lambda^{h}_{i'}\,x^\alpha dx = \sum_{i'=1}^{N/2+p}u^{2h}_{i}\int_0^L \Lambda^{h}_{i}\Lambda^{2h}_{i'}\,x^\alpha dx. \\ \end{split} \end{equation*} This yields the prolonged (or interpolated) \emph{coarse grid} solution on the \emph{fine grid} \begin{equation} \label{eq:prolongation} \mathbf{u}^h = \mathbf{P}^h_{2h}\mathbf{u}^{2h}, \qquad \boxed{\mathbf{P}^h_{2h} = (\mathbf{M}^{h,h})^{-1}\mathbf{M}^{h,2h}=(\mathbf{R}^{2h}_{h})^T} \end{equation} and the coarse FE matrix can be finally expressed as\begin{equation} \label{eq:coarse_mat} \boxed{\mathbf{A}^{2h} = \mathbf{R}^{2h}_{h}\mathbf{A}^{h}\mathbf{P}^h_{2h}} \end{equation} \subsection{An alternative derivation of grid transfer operators} Starting from the inter grid transformation of the basis functions Eq.(\ref{eq:restrict_gen}), the restriction of $\mathbf{b}^h$ and the prolongation of $\mathbf{u}^{2h}$ can be derived as follow \begin{gather*} b^{2h}_i = \int_0^L f\Lambda^{2h}_i\,x^{\alpha}dx = \sum_{i'=1}^{N+p} \left(\mathbf{R}^{2h}_{h}\right)_{ii'}\int_0^Lf\Lambda^{h}_{i'}\,x^{\alpha}dx =\sum_{i'=1}^{N+p} \left(\mathbf{R}^{2h}_{h}\right)_{ii'}b^h_{i'}, \\ u^{2h}(x) = \sum_{i=1}^{N/2+p} u^{2h}_{i} \Lambda^{2h}_{i}= \sum_{i'=1}^{N+p}\underbrace{\left[\sum_{i=1}^{N/2+p} \left(\mathbf{R}^{2h}_{h}\right)_{ii'} u^{2h}_{i}\right]}_{u^h_{i'}}\Lambda^{h}_{i'}(x) \Longrightarrow \mathbf{u}^h = \left(\mathbf{R}^{2h}_{h}\right)^T\mathbf{u}^{2h} = \mathbf{P}^h_{2h}\mathbf{u}^{2h}.\\ \end{gather*} \section{Numerical results for the transfer operators} The prolongation matrix as defined in Eq.~(\ref{eq:prolongation}) was calculated using the BSPLINES module. A Gauss integration with $N_G=\lceil p+(\alpha+1)/2 \rceil$ points is used to carry out the numerical integrations. In the following, the results are presented for linear, quadratic and cubic Splines. Since the restriction matrix is just the transpose of the prolongation matrix, only the latter is shown. As expected, all the obtained matrices are found to be \emph{independent} of $\alpha$ and \emph{sparse}. During the calculations, it was checked that \begin{itemize} \item The coarse matrix computed using Eq.~(\ref{eq:coarse_mat}) and the transfer matrix, is identical to the matrix assembled directly on the coarse grid. \item The sum of each row of the prolongation matrix is 1, since a constant function ($\mathbf{u}^{2h}=1$) should remain constant after the grid transfer. \end{itemize} \subsection{Linear Splines} For $N=8$, the prolongation is a $9\times 5$ matrix given by \begin{equation} \mathbf{P}^{h}_{2h} = \left( \begin{matrix} 1 & 0 & 0 & 0 & 0 \\ 1/2 & 1/2 & 0 & 0 & 0 \\ 0 & 1 & 0 & 0 & 0 \\ 0 & 1/2 & 1/2 & 0 & 0 \\ 0 & 0 & 1 & 0 & 0 \\ 0 & 0 & 1/2 & 1/2 & 0 \\ 0 & 0 & 0 & 1 & 0 \\ 0 & 0 & 0 & 1/2 & 1/2\\ 0 & 0 & 0 & 0 & 1 \\ \end{matrix}\right) \end{equation} As expected, the prolongation matrix for linear Splines is identical to the one obtained for first order FD discretization, where a linear interpolation is used. One can easily check that \begin{equation*} \begin{split} \Lambda^{2h}_1(x) &= \Lambda^h_1(x) + \frac{1}{2}\Lambda^h_2(x), \\ \Lambda^{2h}_2(x) &= \frac{1}{2}\Lambda^h_2(x) + \Lambda^h_3(x) + \frac{1}{2}\Lambda^h_4(x), \\ \end{split} \end{equation*} as expected from (\ref{eq:restrict_gen}). \subsection{Quadratic Splines} For $N=8$, the prolongation is a $10\times 6$ matrix given by \begin{equation} \mathbf{P}^{h}_{2h} = \left( \begin{matrix} 1 & 0 & 0 & 0 & 0 & 0 \\ 1/2 & 1/2 & 0 & 0 & 0 & 0 \\ 0 & 3/4 & 1/4 & 0 & 0 & 0 \\ 0 & 1/4 & 3/4 & 0 & 0 & 0 \\ 0 & 0 & 3/4 & 1/4 & 0 & 0 \\ 0 & 0 & 1/4 & 3/4 & 0 & 0 \\ 0 & 0 & 0 & 3/4 & 1/4 & 0 \\ 0 & 0 & 0 & 1/4 & 3/4 & 0 \\ 0 & 0 & 0 & 0 & 1/2 & 1/2\\ 0 & 0 & 0 & 0 & 0 & 1 \\ \end{matrix}\right) \end{equation} \subsection{Cubic Splines} For $N=10$, the prolongation is a $13\times 8$ matrix given by \begin{equation} \mathbf{P}^{h}_{2h} = \left( \begin{matrix} 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\ 1/2 & 1/2 & 0 & 0 & 0 & 0 & 0 & 0 \\ 0 & 3/4 & 1/4 & 0 & 0 & 0 & 0 & 0 \\ 0 & 3/16 & 11/16 & 1/8 & 0 & 0 & 0 & 0 \\ 0 & 0 & 1/2 & 1/2 & 0 & 0 & 0 & 0 \\ 0 & 0 & 1/8 & 3/4 & 1/8 & 0 & 0 & 0 \\ 0 & 0 & 0 & 1/2 & 1/2 & 0 & 0 & 0 \\ 0 & 0 & 0 & 1/8 & 3/4 & 1/8 & 0 & 0 \\ 0 & 0 & 0 & 0 & 1/2 & 1/2 & 0 & 0 \\ 0 & 0 & 0 & 0 & 1/8 & 11/16 & 3/16 & 0 \\ 0 & 0 & 0 & 0 & 0 & 1/4 & 3/4 & 0 \\ 0 & 0 & 0 & 0 & 0 & 0 & 1/2 & 1/2\\ 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 \\ \end{matrix}\right) \end{equation} Note that from the results shown above, it is straightforward to derive the prolongation matrix for other number of intervals $N$. \section{Practical Considerations} \subsection{Boundary conditions} The \emph{essential Dirichlet boundary conditions} are imposed by zeroing the column and row (first column and first row for the left boundary and last column and last row for the right boundary) of the FE matrix $\mathbf{A}^h$ and putting 1 on the diagonal. The same operation should be also performed on the prolongation matrix, preserving thus the relation (\ref{eq:coarse_mat}). For non-homogeneous Dirichlet boundary conditions, the elements of the column should be saved before the zeroing operation (for example $A^h_{21}, A^h_{31}, \ldots$ for the left boundary condition). They will be used later to modify the right hand side: \begin{equation*} b^h_i \leftarrow b^h_i-A^h_{i1}u^h_1, \quad i=2,\ldots \end{equation*} Nothing has to be done for \emph{natural boundary conditions}. \subsection{Residual norm and error} The residual norm is simply defined as the Euclidean norm of the residue: \begin{equation} \label{eq:resid} \|r\|_2 = \|\mathbf{b}-\mathbf{A}\mathbf{u}\|_2 = \sqrt{\sum_i\left(b_i-\sum_{i'}A_{ii'}u_{i'}\right)^2}. \end{equation} When the \emph{exact} solution $u(x)$ is known, the \emph{discretization error} can defined as \begin{equation} \label{eq:discerr} \|e\|_2 = \sqrt{\int x^\alpha dx\left[\sum_{i}u_{i}\Lambda_i(x)-u(x)\right]^2} \end{equation} and computed using a Gauss quadrature. Note that for Splines of order $p$, $\|e\|_2(h)$ converges to zero as $O(h^{p+1})$. \section{The Model Problems} \subsection{Cartesian geometry} The following second-order boundary value problem is considered: \begin{equation} \label{eq:cartesian_problem} \begin{split} -\frac{d^2}{dx^2} u(x) + \sigma u(x) &= \sin (\pi kx), \qquad 0\le x\le 1 \\ u(0)=u(1) &= 0 \\ \Rightarrow u(x) = \frac{\sin(\pi kx)}{\pi^2k^2+\sigma}.& \\ \end{split} \end{equation} Using the weak form, the FE discretized matrix and right hand side can be computed as \begin{equation} A_{ii'} = \int_0^1dx\left[\Lambda'_i(x)\Lambda'_{i'}(x) + \sigma\Lambda_i(x)\Lambda_{i'}(x)\right], \qquad b_i = \int_0^1dx \sin (\pi kx)\Lambda_i(x). \end{equation} For Splines of order $p$, the integration is done with a $\lceil p+1/2\rceil$ point Gauss quadrature which is \emph{exact} for the matrix $\mathbf{A}$ if $\sigma$ is constant. The boundary conditions are simply imposed by setting \begin{equation*} A_{ki}=A_{ik}=\delta_{ik} \qquad\mbox{and} \qquad b_k=0 \end{equation*} for $k=1$ (the first equation) and $N+p$ (the last equation). \subsection{Cylindrical geometry} The following second-order boundary value problem is considered: \begin{equation} \begin{split} -\frac{1}{r}\frac{d}{dr}r\frac{d}{dr}u(r) + \frac{m^2}{r^2}u(r) &= j^2_{ms}J_{m}(j_{ms}r), \qquad 0\le r\le 1, \quad j_{ms} = s^{th}\mbox{ zero of }J_{m}, \\ u(1) &= 0 \\ \Rightarrow u(r) = J_{m}(j_{ms}r).& \\ \end{split} \end{equation} Using the weak form, the FE discretized matrix and right hand side can be computed as \begin{equation} A_{ii'} = \int_0^1rdr\left[\Lambda'_i(r)\Lambda'_{i'}(r) + \frac{m^2}{r^2}\Lambda_i(r)\Lambda_{i'}(r)\right], \qquad b_i = \int_0^1rdr j^2_{ms}\,J_{m}(j_{ms}r)\Lambda_i(r). \end{equation} The boundary condition has only to be imposed on the last equation, using the same procedure described for the Cartesian geometry. It should be noted here that for $m\neq 0$, the matrix elements $A_{1i}$ and $A_{i1}$ \emph{diverge} since $\Lambda_1(r)$ is not equal to zero at $r=0$. However, using a \emph{direct solver}, one can observe that the resulting \emph{discretization errors} as defined by Eq.(\ref{eq:discerr}) converge for number of Gauss points $N_G$ slightly larger than $p+1$, as shown in Table~\ref{tab:gauss_conv}. Then, using $N_G=4$ and $6$ for the linear and cubic splines respectively, the discretization error as a function of the number of grid intervals (Fig~\ref{fig:cyl_conv}) show the expected quadratic and quartic scaling respectively for the linear and cubic Splines. \begin{figure} \centering \includegraphics[angle=0,width=0.8\hsize]{cyl_conv} \caption{Discretization errors $\|e\|_2$ obtained by a \emph{direct solver} versus the number of grid intervals $N$. A linear fit yields a quadratic scaling ($\sim N^{-2.0}$) for the linear Splines and a quartic convergence ($\sim N^{-4.3}$) for the cubic Splines.} \label{fig:cyl_conv} \end{figure} \begin{table} \centering \begin{tabular}{|c|c|c|}\hline Number of Gauss points & $p=1$ & $p=3$ \\ \hline 2 & 8.319E-04 & \\ 4 & 9.277E-04 & 5.799E-07 \\ 6 & 9.276E-04 & 5.936E-07 \\ 8 & 9.276E-04 & 5.936E-07 \\\hline \end{tabular} \caption{Convergence of the \emph{discretization error} with respect to the number of Gauss points for the cylindrical problem with $m=1$, $s=10$ on a $128$ interval grid.} \label{tab:gauss_conv} \end{table} \section{The Multigrid Schemes} The two grid procedure described in section (\ref{sec:twogrid}) can be generalized as follow. Let $\nu_1$, $\nu_2$ and $\mu$ be three iteration parameters. Given a guess $\mathbf{u}^h$ and right hand side $\mathbf{b}^h$ at the \emph{finest} level, a MG cycle represented by \begin{equation*} \boxed{\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{b}^h)} \end{equation*} will compute a \emph{new} $\mathbf{u}^h$ and is defined recursively by the following steps: \begin{enumerate} \item If $h$ is the coarsest mesh size, direct solve $\mathbf{A}^h\mathbf{u}^h=\mathbf{b}^h$ and return. \item Else \begin{itemize} \item Relax $\mathbf{u}^h$ $\nu_1$ times. \item $\mathbf{b}^{2h} \leftarrow \mathbf{R}^{2h}_h(\mathbf{b}^h-\mathbf{A}^h\mathbf{u}^h), \quad \mathbf{u}^{2h}\leftarrow 0$. \item $\mathbf{u}^{2h} \leftarrow MG^{2h}(\mathbf{u}^{2h},\mathbf{b}^{2h})$ $\mu$ times. \item $\mathbf{u}^h\leftarrow \mathbf{u}^h+\mathbf{P}^{h}_{2h}\mathbf{u}^{2h}$. \item Relax $\mathbf{u}^h$ $\nu_2$ times. \end{itemize} \end{enumerate} The standard $V$-cycle is obtained for $\mu=1$ while $\mu=2$ results in the $W$-cycle. Usually the number of \emph{pre-smooth} and \emph{post-smooth} sweeps $\nu_1$ and $\nu_2$ is limited to 1 or 2. In the following a $V$-cycle will be denoted by $V(\nu_1,\nu_2)$. Another multigrid algorithm called \emph{Full Multigrid} or FMG does not require an input guess $\mathbf{u}^h$ but solves first the problem on coarser grids and uses one or many MG cycles to obtain the problem solution. It can be represented by \begin{equation*} \boxed{\mathbf{u}^h \leftarrow FMG^h(\mathbf{b}^h)} \end{equation*} and defined recursively by the following steps: \begin{enumerate} \item If $h$ is the coarsest mesh size, direct solve $\mathbf{A}^h\mathbf{u}^h=\mathbf{b}^h$ and return. \item Else \begin{itemize} \item $\mathbf{b}^{2h} \leftarrow \mathbf{R}^{2h}_h(\mathbf{b}^h)$. \item $\mathbf{u}^{2h} \leftarrow FMG^{2h}(\mathbf{b}^{2h})$. \item $\mathbf{u}^h \leftarrow \mathbf{P}^{h}_{2h}\mathbf{u}^{2h}$. \item $\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{b}^h)$ $\nu_0$ times. \end{itemize} \end{enumerate} Note that while the MG process is an iterative process (started by setting for example the initial guess $\mathbf{u}^h=0$), the FMG is more like a \emph{direct solver} with appropriate values of $\nu_1$, $\nu_2$ and $\nu_0$ determined experimentally. \section{Numerical Experiments} The residual norm $\|r\|_2$ and error $\|e\|_2$ defined previously are reported after each $V$-cycle in Table~\ref{tab:cartesian1} for the Cartesian model problem and in Table~\ref{tab:cylindrical1} for the cylindrical one . The ratio between successive cycle $\|r\|_2$ and $\|e\|_2$ are shown in columns labeled \emph{ratio} and measure the rate of iteration convergence. The \emph{asymptotic} ratio of $\|r\|_2$ is called the \emph{convergence factor}. In all the cases shown, one can note that $\|e\|_2$ level off quickly to the discretization error obtained by using the \emph{direct solver} on the finest grid, while the residual norms $\|r\|_2$ continue to decrease until the machine zero is eventually reached. One can also verify that the \emph{final} discretization errors scale approximately as $8^2$ and $8^4$ respectively for linear and cubic Splines, as $N$ is increased from $128$ to $1024$. Most interestingly, the \emph{iterative performance} depends very weakly on the problem size $N$, for both the Cartesian and the cylindrical cases. Moreover, the multigrid seems to be less efficient when linear Splines are used for the problem discretization. This iterative performance can be further improved by increasing the \emph{iteration parameters} $\nu_1$, $\nu_2$ and $\mu$, as shown in Table~\ref{tab:improv}. One can also observe in the same table that the Jacobi relaxation is systematically less efficient than Gauss Seidel relaxation. \begin{table} \centering \begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline \multicolumn{9}{|c|}{ Linear B-Splines $p=1$} \\ \hline & \multicolumn{4}{c|}{ $N=128$} & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} $V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline 0 & 6.219E-02 & & 7.164E-04 & & 2.210E-02 & & 7.164E-04 & \\ 1 & 2.169E-02 & 0.35 & 5.880E-05 & 0.08 & 9.699E-03 & 0.44 & 3.622E-05 & 0.05 \\ 2 & 3.801E-03 & 0.18 & 7.806E-06 & 0.13 & 1.790E-03 & 0.18 & 1.965E-06 & 0.05 \\ 3 & 5.061E-04 & 0.13 & 3.666E-06 & 0.47 & 2.923E-04 & 0.16 & 1.583E-07 & 0.08 \\ 4 & 6.762E-05 & 0.13 & 3.564E-06 & 0.97 & 4.055E-05 & 0.14 & 6.197E-08 & 0.39 \\ 5 & 8.902E-06 & 0.13 & 3.585E-06 & 1.01 & 5.586E-06 & 0.14 & 5.655E-08 & 0.91 \\ 6 & 1.199E-06 & 0.13 & 3.589E-06 & 1.00 & 7.122E-07 & 0.13 & 5.622E-08 & 0.99 \\ 7 & 1.585E-07 & 0.13 & 3.590E-06 & 1.00 & 9.815E-08 & 0.14 & 5.620E-08 & 1.00 \\ 8 & 2.089E-08 & 0.13 & 3.590E-06 & 1.00 & 1.320E-08 & 0.13 & 5.619E-08 & 1.00 \\ 9 & 2.746E-09 & 0.13 & 3.590E-06 & 1.00 & 1.887E-09 & 0.14 & 5.619E-08 & 1.00 \\ 10 & 3.741E-10 & 0.14 & 3.590E-06 & 1.00 & 2.533E-10 & 0.13 & 5.619E-08 & 1.00 \\ \hline \hline \multicolumn{9}{|c|}{ Cubic B-Splines $p=3$} \\ \hline & \multicolumn{4}{c|}{ $N=128$} & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} $V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline 0 & 6.187E-02 & & 7.164E-04 & & 2.209E-02 & & 7.164E-04 & \\ 1 & 1.948E-04 & 0.00 & 1.893E-06 & 0.00 & 1.685E-05 & 0.00 & 4.292E-08 & 0.00 \\ 2 & 4.316E-06 & 0.02 & 3.927E-09 & 0.00 & 1.241E-07 & 0.01 & 7.156E-11 & 0.00 \\ 3 & 1.554E-07 & 0.04 & 2.374E-09 & 0.60 & 4.184E-09 & 0.03 & 6.198E-13 & 0.01 \\ 4 & 5.750E-09 & 0.04 & 2.373E-09 & 1.00 & 1.560E-10 & 0.04 & 5.635E-13 & 0.91 \\ 5 & 2.153E-10 & 0.04 & 2.373E-09 & 1.00 & 5.912E-12 & 0.04 & 5.635E-13 & 1.00 \\ 6 & 8.122E-12 & 0.04 & 2.373E-09 & 1.00 & 2.258E-13 & 0.04 & 5.635E-13 & 1.00 \\ 7 & 3.079E-13 & 0.04 & 2.373E-09 & 1.00 & 8.777E-15 & 0.04 & 5.635E-13 & 1.00 \\ 8 & 1.173E-14 & 0.04 & 2.373E-09 & 1.00 & 1.758E-15 & 0.20 & 5.635E-13 & 1.00 \\ 9 & 4.489E-16 & 0.04 & 2.373E-09 & 1.00 & 1.709E-15 & 0.97 & 5.635E-13 & 1.00 \\ 10 & 9.571E-17 & 0.21 & 2.373E-09 & 1.00 & 1.761E-15 & 1.03 & 5.635E-13 & 1.00 \\ \hline \end{tabular} \caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation for a \emph{Cartesian} problem with $k=10$ and $\sigma=0$, discretized on a grid with $N=128$ and $1024$ intervals, using linear and cubic B-splines. For both grid sizes, a total of 6 grid levels were considered.} \label{tab:cartesian1} \end{table} \begin{table} \centering \begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline \multicolumn{9}{|c|}{ Linear B-Splines $p=1$} \\ \hline & \multicolumn{4}{c|}{ $N=128$} & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} $V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline 0 & 1.789E+01 & & 9.354E-02 & & 6.400E+00 & & 9.354E-02 & \\ 1 & 3.373E+00 & 0.19 & 3.068E-03 & 0.03 & 1.826E+00 & 0.29 & 3.036E-03 & 0.03 \\ 2 & 4.895E-01 & 0.15 & 8.064E-04 & 0.26 & 3.133E-01 & 0.17 & 1.624E-04 & 0.05 \\ 3 & 6.160E-02 & 0.13 & 6.704E-04 & 0.83 & 4.581E-02 & 0.15 & 1.411E-05 & 0.09 \\ 4 & 8.013E-03 & 0.13 & 6.811E-04 & 1.02 & 5.959E-03 & 0.13 & 1.062E-05 & 0.75 \\ 5 & 9.871E-04 & 0.12 & 6.844E-04 & 1.00 & 8.098E-04 & 0.14 & 1.069E-05 & 1.01 \\ 6 & 1.283E-04 & 0.13 & 6.847E-04 & 1.00 & 1.048E-04 & 0.13 & 1.070E-05 & 1.00 \\ 7 & 1.613E-05 & 0.13 & 6.847E-04 & 1.00 & 1.504E-05 & 0.14 & 1.070E-05 & 1.00 \\ 8 & 2.097E-06 & 0.13 & 6.847E-04 & 1.00 & 2.050E-06 & 0.14 & 1.070E-05 & 1.00 \\ 9 & 2.639E-07 & 0.13 & 6.847E-04 & 1.00 & 3.008E-07 & 0.15 & 1.070E-05 & 1.00 \\ 10 & 3.500E-08 & 0.13 & 6.847E-04 & 1.00 & 4.074E-08 & 0.14 & 1.070E-05 & 1.00 \\ \hline \hline \multicolumn{9}{|c|}{ Cubic B-Splines $p=3$} \\ \hline & \multicolumn{4}{c|}{ $N=128$} & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} $V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline 0 & 1.768E+01 & & 9.354E-02 & & 6.399E+00 & & 9.354E-02 & \\ 1 & 4.243E-02 & 0.00 & 4.727E-05 & 0.00 & 4.975E-03 & 0.00 & 6.588E-06 & 0.00 \\ 2 & 1.378E-03 & 0.03 & 1.897E-06 & 0.04 & 7.835E-05 & 0.02 & 6.578E-09 & 0.00 \\ 3 & 4.773E-05 & 0.03 & 1.814E-06 & 0.96 & 2.797E-06 & 0.04 & 4.125E-10 & 0.06 \\ 4 & 2.174E-06 & 0.05 & 1.814E-06 & 1.00 & 1.041E-07 & 0.04 & 4.092E-10 & 0.99 \\ 5 & 4.816E-07 & 0.22 & 1.814E-06 & 1.00 & 3.935E-09 & 0.04 & 4.092E-10 & 1.00 \\ 6 & 1.942E-07 & 0.40 & 1.814E-06 & 1.00 & 1.499E-10 & 0.04 & 4.092E-10 & 1.00 \\ 7 & 8.887E-08 & 0.46 & 1.814E-06 & 1.00 & 5.757E-12 & 0.04 & 4.092E-10 & 1.00 \\ 8 & 4.449E-08 & 0.50 & 1.814E-06 & 1.00 & 2.517E-13 & 0.04 & 4.092E-10 & 1.00 \\ 9 & 2.377E-08 & 0.53 & 1.814E-06 & 1.00 & 1.360E-13 & 0.54 & 4.092E-10 & 1.00 \\ 10 & 1.328E-08 & 0.56 & 1.814E-06 & 1.00 & 1.384E-13 & 1.02 & 4.092E-10 & 1.00 \\ \hline \end{tabular} \caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation for a one-dimensional \emph{cylindrical} problem with $m=22$ and $s=10$, discretized on a grid with $N=128$ and $1024$ intervals, using linear and cubic B-splines. For both grid sizes, a total of 6 grid levels were considered.} \label{tab:cylindrical1} \end{table} \begin{table} \centering \begin{tabular}{|c|c|c|c|c|}\hline & \multicolumn{2}{c|}{ Cartesian problem} & \multicolumn{2}{c|}{ Cylindrical problem} \\ \cline{2-5} & $N=128$ & $N=1024$ & $N=128$ & $N=1024$\\ \hline $\nu_1=1, \nu_2=1, \mu=1$ & 0.13 & 0.14 & 0.13 & 0.14 \\ $\nu_1=1, \nu_2=2, \mu=1$ & 0.08 & 0.08 (\emph{0.10}) & 0.08 & 0.08 (\emph{0.09}) \\ $\nu_1=2, \nu_2=1, \mu=1$ & 0.08 & 0.08 & 0.08 & 0.08 \\ $\nu_1=2, \nu_2=2, \mu=1$ & 0.04 & 0.04 (\emph{0.08}) & 0.02 & 0.03 (\emph{0.08})\\ $\nu_1=1, \nu_2=1, \mu=2$ & 0.12 & 0.11 & 0.12 & 0.11 \\ \hline \end{tabular} \caption{The \emph{convergence factor} (averaged over the last 5 cycles) for different iteration parameters $\nu_1$, $\nu_2$ and $\mu$, using the linear Splines for both Cartesian ($k=10$, $\sigma=0$) and cylindrical ($m=22$, $s=10$) problems. The last entry is usually called a $W$-cycle while the first four designate a $V(\nu_1,\nu_2)$ cycle. Gauss Seidel relaxation is used except for the results enclosed in parenthesis which are obtained with the Jacobi (weighted with $\omega=2/3$) relaxation.} \label{tab:improv} \end{table} The next experiment is shown on Table~\ref{tab:fmg}, where two FMG$(\nu_1,\nu_2)$ schemes are applied to the $m=22$, $s=10$ cylindrical problem with grid sizes up to $N=2048$. Note that the problem is solved to the level of discretization for $N\ge 128$ with FMG$(2,1)$ but not with FMG$(1,1)$. Solving the same problem with the $V(2,1)$ cycle required 3 iterations for all the values of $N$ shown. Since the cost of one FMG(2,1) is $\sim 2$ the cost of one $V(2,1)$ (see Appendix \ref{sec:cost}), it appears that FMG is more efficient for $N\ge 128$. Finally, in all the cases shown here, the equality (\ref{eq:coarse_mat}) is verified numerically, except for the cylindrical case with $m\neq 0$. This is expected since as noted earlier, the matrix elements $A_{i1}$ and $A_{1i}$ diverge unless $m=0$ in the cylindrical problem. \begin{table} \centering \begin{tabular}{|c|c|c|c|c|}\hline & \multicolumn{2}{c|}{FMG(1,1)} & \multicolumn{2}{c|}{FMG(2,1)} \\ \cline{2-5} $N$ & $\|e\|_2$ & $\|e\|_2/\|e\|_d$ & $\|e\|_2$ & $\|e\|_2/\|e\|_d$ \\ \hline 4 & 1.011E-01 & 0.968 & 1.012E-01 & 0.969 \\ 8 & 7.781E-02 & 1.031 & 7.679E-02 & 1.018 \\ 16 & 3.332E-02 & 1.310 & 2.808E-02 & 1.104 \\ 32 & 1.516E-03 & 1.421 & 1.098E-03 & 1.030 \\ 64 & 5.168E-05 & 1.443 & 3.652E-05 & 1.019 \\ 128 & 2.012E-06 & 1.109 & 1.818E-06 & 1.002 \\ 256 & 1.125E-07 & 1.053 & 1.069E-07 & 1.001 \\ 512 & 6.819E-09 & 1.037 & 6.576E-09 & 1.000 \\ 1024 & 4.224E-10 & 1.032 & 4.093E-10 & 1.000 \\ 2048 & 2.634E-11 & 1.031 & 2.556E-11 & 1.000 \\ \hline \end{tabular} \caption{The discretization errors $\|e\|_2$ obtained from a FMG$(\nu_1,\nu_2)$ sweep with $\nu_0=1$ for different grid sizes $N$. The columns $\|e\|_2/\|e\|_d$ display their ratio with the discretization errors obtained from a \emph{direct} solver. The cylindrical problem with $m=22$ and $s=10$ using cubic Splines is considered here.} \label{tab:fmg} \end{table} \section{Periodic Case} \subsection{Transfer operators} For periodic problems, we use \emph{periodic} Splines \cite{BSPLINES} which satisfy the periodic boundary condition $\Lambda^h_{i+N}(x)=\Lambda^h_i(x-Nh)$. As a result, both the expansion coefficients and the right hand sides are periodic with periodicity $N$ ($u^h_{i+N}=u^h_i$, $b^h_{i+N}=b^h_i$) and he rank of all matrices should be $N$ instead of $N+p$ as in the non-periodic case. The \emph{prolongation} matrix $\mathbf{P}^h_{2h}$ as given by (\ref{eq:prolongation}) are computed numerically and the results for $N=8$ are given below for linear, quadratic and cubic Splines. \begin{itemize} \item Linear Splines \begin{equation} \mathbf{P}^{h}_{2h} = \left( \begin{matrix} 1 & 0 & 0 & 0 \\ 1/2 & 1/2 & 0 & 0 \\ 0 & 1 & 0 & 0 \\ 0 & 1/2 & 1/2 & 0 \\ 0 & 0 & 1 & 0 \\ 0 & 0 & 1/2 & 1/2 \\ 0 & 0 & 0 & 1 \\ 1/2 & 0 & 0 & 1/2 \\ \end{matrix}\right) \end{equation} \item Quadratic Splines \begin{equation} \mathbf{P}^{h}_{2h} = \left( \begin{matrix} 3/4 & 1/4 & 0 & 0 \\ 1/4 & 3/4 & 0 & 0 \\ 0 & 3/4 & 1/4 & 0 \\ 0 & 1/4 & 3/4 & 0 \\ 0 & 0 & 3/4 & 1/4 \\ 0 & 0 & 1/4 & 3/4 \\ 1/4 & 0 & 0 & 3/4 \\ 3/4 & 0 & 0 & 1/4 \\ \end{matrix}\right) \end{equation} \item Cubic Splines \begin{equation} \mathbf{P}^{h}_{2h} = \left( \begin{matrix} 1/2 & 1/2 & 0 & 0 \\ 1/8 & 3/4 & 1/8 & 0 \\ 0 & 1/2 & 1/2 & 0 \\ 0 & 1/8 & 3/4 & 1/8 \\ 0 & 0 & 1/2 & 1/2 \\ 1/8 & 0 & 1/8 & 3/4 \\ 1/2 & 0 & 0 & 1/2 \\ 3/4 & 1/8 & 0 & 1/8 \\ \end{matrix}\right) \end{equation} \end{itemize} The restriction matrix is simply $\mathbf{R}^{2h}_h= (\mathbf{P}^{h}_{2h})^T$. Generalization for any other number of intervals $N$ should be straightforward. \subsection{Numerical Experiments} In order to test the grid transfer operators obtained above, the same second-order problem (\ref{eq:cartesian_problem}) but with the periodic boundary condition $u(x+1)=u(x)$ is considered. It should be noted that in that case, if $\sigma=0$, the problem is singular since the solution is not \emph{unique}! But we have observed that this problem can be avoided for a slightly non zero $\sigma$, With $\sigma=0.01$ and $k=10$ and using linear and cubic Splines, we recover the same multigrid iterative performances shown in Table \ref{tab:cartesian1} obtained previously for non-periodic Dirichlet boundary conditions. Table \ref{tab:quad_splines} also shows similar iterative efficiencies for \emph{quadratic} non-periodic and periodic problems. The identity (\ref{eq:coarse_mat}) is numerically verified in all the cases considered. \begin{table} \centering \begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline \multicolumn{9}{|c|}{ Cartesian problem with quadratic splines} \\ \hline & \multicolumn{4}{c|}{ $N=128$} & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} $V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline 0 & 6.203E-02 & & 7.164E-04 & & 2.209E-02 & & 7.164E-04 & \\ 1 & 8.114E-04 & 0.01 & 6.375E-06 & 0.01 & 1.003E-04 & 0.00 & 4.509E-07 & 0.00 \\ 2 & 1.891E-05 & 0.02 & 6.079E-08 & 0.01 & 1.769E-06 & 0.02 & 8.061E-10 & 0.00 \\ 3 & 1.103E-06 & 0.06 & 5.220E-08 & 0.86 & 7.018E-08 & 0.04 & 9.970E-11 & 0.12 \\ 4 & 8.148E-08 & 0.07 & 5.220E-08 & 1.00 & 5.620E-09 & 0.08 & 9.958E-11 & 1.00 \\ 5 & 6.368E-09 & 0.08 & 5.220E-08 & 1.00 & 4.772E-10 & 0.08 & 9.958E-11 & 1.00 \\ 6 & 4.969E-10 & 0.08 & 5.220E-08 & 1.00 & 4.101E-11 & 0.09 & 9.958E-11 & 1.00 \\ 7 & 3.874E-11 & 0.08 & 5.220E-08 & 1.00 & 3.548E-12 & 0.09 & 9.958E-11 & 1.00 \\ 8 & 3.081E-12 & 0.08 & 5.220E-08 & 1.00 & 3.081E-13 & 0.09 & 9.958E-11 & 1.00 \\ 9 & 2.489E-13 & 0.08 & 5.220E-08 & 1.00 & 2.690E-14 & 0.09 & 9.958E-11 & 1.00 \\ 10 & 1.986E-14 & 0.08 & 5.220E-08 & 1.00 & 3.212E-15 & 0.12 & 9.958E-11 & 1.00 \\ \hline \hline \multicolumn{9}{|c|}{ Periodic problem with quadratic splines} \\ \hline & \multicolumn{4}{c|}{ $N=128$} & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} $V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline 0 & 6.203E-02 & & 7.164E-04 & & 2.209E-02 & & 7.164E-04 & \\ 1 & 1.285E-03 & 0.02 & 1.294E-05 & 0.02 & 3.116E-04 & 0.01 & 5.862E-06 & 0.01 \\ 2 & 7.878E-05 & 0.06 & 6.569E-07 & 0.05 & 2.893E-05 & 0.09 & 1.626E-07 & 0.03 \\ 3 & 6.573E-06 & 0.08 & 6.511E-08 & 0.10 & 2.691E-06 & 0.09 & 5.631E-09 & 0.03 \\ 4 & 5.681E-07 & 0.09 & 5.224E-08 & 0.80 & 2.385E-07 & 0.09 & 2.400E-10 & 0.04 \\ 5 & 4.890E-08 & 0.09 & 5.219E-08 & 1.00 & 2.097E-08 & 0.09 & 9.997E-11 & 0.42 \\ 6 & 4.198E-09 & 0.09 & 5.219E-08 & 1.00 & 1.828E-09 & 0.09 & 9.958E-11 & 1.00 \\ 7 & 3.607E-10 & 0.09 & 5.219E-08 & 1.00 & 1.584E-10 & 0.09 & 9.958E-11 & 1.00 \\ 8 & 3.103E-11 & 0.09 & 5.219E-08 & 1.00 & 1.370E-11 & 0.09 & 9.958E-11 & 1.00 \\ 9 & 2.674E-12 & 0.09 & 5.219E-08 & 1.00 & 1.184E-12 & 0.09 & 9.958E-11 & 1.00 \\ 10 & 2.307E-13 & 0.09 & 5.219E-08 & 1.00 & 1.025E-13 & 0.09 & 9.958E-11 & 1.00 \\\hline \end{tabular} \caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation for \emph{Cartesian} problem ($k=10$, $\sigma=0$) and \emph{periodic} problem ($k=10$, $\sigma=0.01$), discretized on a grid with $N=128$ and $1024$ intervals, using quadratic B-splines. For both grid sizes, a total of 6 grid levels were considered.} \label{tab:quad_splines} \end{table} \section{Conclusion} Using the variational principle, we have derived the expressions of the grid transfer matrices for Finite Elements using Splines of any order. It is found that: \begin{itemize} \item The grid transfer matrices do not depend of the geometries characterized by the Jacobian as defined in $dV = x^\alpha dx$. \item The standard grid transfer operator used for first order finite difference (FD) discretization for Cartesian geometry is recovered when linear Spline finite elements (FE) are used. \item Applying these transfer matrices, we have solved Cartesian, cylindrical as well as periodic one dimensional problems, and obtained essentially the same multigrid iterative performances as found for standard first order FD Cartesian problems. \item No performance \emph{degradation} is observed when the order of Splines FE is increased from 1 to 3, or when the cylindrical geometry is considered. \end{itemize} For two dimensional problems, notice that for both Cartesian ($dV=dxdy$) and standard curvilinear geometries ($dV=r^\alpha drd\theta$), the Jacobian is \emph{separable}. Using this property, one can show that the two dimensional grid transfer consists of simply applying successively one dimensional grid transfer on each of the $x$ and $y$ (or $r$ and $\theta$) grids. With the solution $\mathbf{u}^h=[u^h_{ij}]$ and right hand side $\mathbf{b}^h=[b^h_{ij}]$ defined by \begin{equation} u(x,y) = \sum_{ij} u^h_{ij}\Lambda^h_i(x)\Lambda^h_j(y), \qquad b^h_{ij} = \int dx\Lambda^h_i(x)\int dy\Lambda^h_j(y)f(x,y), \end{equation} the two dimension grid transfers can be expressed as (see Appendix \ref{sec:twod}) \begin{equation} \begin{split} \mathbf{u}^h &= {_{x}\mathbf{P}^h_{2h}}\; \mathbf{u}^{2h} \left(_{y}\mathbf{P}^h_{2h}\right)^T, \\ \mathbf{b}^{2h} &={_{x}\mathbf{R}^{2h}_{h}}\; \mathbf{b}^{h} \left(_{y}\mathbf{R}^{2h}_{h}\right)^T. \\ \end{split} \end{equation} For more general curvilinear coordinates such as found in tokamak magnetic coordinates defined by $dV=J(s,\theta)dsd\theta$, we will assume that the grid transfer operators derived above are still applicable. The validity of this assumption will be the object of the next task. \appendix \section{Multigrid Cost Estimation} \label{sec:cost} Assuming that the \emph{coarsest} grid is fixed to $2$, the total number of grid levels $L$ is given by $N/2^{L-1}=2$ or $L=\log_2(N)$, where $N$ is the number of intervals in the \emph{finest grid}. Since both \emph{relaxation} and intergrid transfer are proportional to the number of problem unknowns, the cost of the $V$-cycle can be estimated as: \begin{equation} \begin{split} \mbox{MG}(N) &= c\left[ (N+p)+(N/2+p) +\ldots + (N/2^{L-2}+p) \right] \\ &= c \left[ 2N-4 +(L-1)p \right] ,\\ \end{split} \end{equation} where $p$ is the order of Splines used for the discretization. The FMG can then be deduced, assuming $\nu_0=1$ as \begin{equation} \begin{split} \mbox{FMG}(N) &= \mbox{MG}(N) +\mbox{MG}(N/2) + \ldots + \mbox{MG}(N/2^{L-2})\\ &= c\left[ 4N-8 +(L-1)(pL/2-4) \right] .\\ \end{split} \end{equation} As expected a single FMG cycle (with $\nu_0=1$) costs about two $V$-cycles. \section{Two dimensional Grid Transfer} \label{sec:twod} On the fine and the coarse grids, the problem solution $u(x,y)$ can be written as: \begin{equation*} u(x,y) = \sum_{i'j'} u^h_{i'j'}\Lambda^h_{i'}(x)\Lambda^h_{j'}(y) = \sum_{i'j'} u^{2h}_{i'j'}\Lambda^{2h}_{i'}(x)\Lambda^{2h}_{j'}(y). \end{equation*} Projecting these two expansions on the two dimensional basis functions $\Lambda^h_{i}(x)\Lambda^h_{j}(y)$ yields \begin{gather*} \sum_{i'j'} u^h_{i'j'} \underbrace{\int dx\Lambda^h_{i}(x)\Lambda^h_{i'}(x)}_{M^{h,h}_{ii'}} \underbrace{\int dy\Lambda^h_{j}(x)\Lambda^h_{j'}(y)}_{N^{h,h}_{jj'}} = \sum_{i'j'} u^{2h}_{i'j'} \underbrace{\int dx\Lambda^h_{i}(x)\Lambda^{2h}_{i'}(x)}_{M^{h,2h}_{ii'}} \underbrace{\int dy\Lambda^h_{j}(x)\Lambda^{2h}_{j'}(y)}_{N^{h,2h}_{jj'}} \\ \Longrightarrow \quad \mathbf{M}^{h,h}\;\mathbf{u}^h\;\left(\mathbf{N}^{h,h}\right)^T = \mathbf{M}^{h,2h}\mathbf{u}^{2h}\left(\mathbf{N}^{h,2h}\right)^T \\ \Longrightarrow \quad \mathbf{u}^h = \left(\mathbf{M}^{h,h}\right)^{-1} \mathbf{M}^{h,2h}\;\mathbf{u}^{2h}\;\left[\left(\mathbf{N}^{h,h}\right)^{-1}\mathbf{N}^{h,2h}\right]^T. \end{gather*} The right hand side can be written on the fine and coarse grids as \begin{equation*} \begin{split} b^{h}_{ij} = \int dx\Lambda^{h}_i(x)\int dy\Lambda^{h}_j(y)\;f(x,y) = \sum_{i'j'} M^{h,h}_{ii'}f^{h}_{i'j'}N^{h,h}_{jj'} \quad\Longrightarrow\quad \mathbf{b}^h &= \mathbf{M}^{h,h}\;\mathbf{f}^h\left(\mathbf{N}^{h,h}\right)^{T}, \\ b^{2h}_{ij} = \int dx\Lambda^{2h}_i(x)\int dy\Lambda^{2h}_j(y)\;f(x,y) = \sum_{i'j'} M^{2h,h}_{ii'}f^{h}_{i'j'}N^{2h,h}_{jj'} \quad\Longrightarrow\quad \mathbf{b}^{2h} &= \mathbf{M}^{2h,h}\;\mathbf{f}^h\left(\mathbf{N}^{2h,h}\right)^{T}, \\ \end{split} \end{equation*} where the expansion of $f(x,y)$ on the \emph{fine} mesh has been used. Elimination of $\mathbf{f}^h$ then yields \begin{equation*} \mathbf{b}^{2h} = \mathbf{M}^{2h,h}\left(\mathbf{M}^{h,h}\right)^{-1} \; \mathbf{b}^h\; \left[\mathbf{N}^{2h,h}\left(\mathbf{N}^{h,h}\right)^{-1} \right]^T. \end{equation*} \begin{thebibliography}{99} \bibitem{SOLVERS} {\tt The Solvers in BSPLINES}, \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/solvers.pdf} \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide, \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/bsplines.pdf} \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A Multigrid Tutorial, Second Edition, Siam (2000)}. \end{thebibliography} \end{document} diff --git a/multigrid/docs/multigrid_2d.tex b/multigrid/docs/multigrid_2d.tex index a373364..43be13c 100644 --- a/multigrid/docs/multigrid_2d.tex +++ b/multigrid/docs/multigrid_2d.tex @@ -1,425 +1,425 @@ % % @file multigrid_2d.tex % % @brief % % @copyright % Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) % SPC (Swiss Plasma Center) % -% spclibs is free software: you can redistribute it and/or modify it under +% SPClibs is free software: you can redistribute it and/or modify it under % the terms of the GNU Lesser General Public License as published by the Free % Software Foundation, either version 3 of the License, or (at your option) % any later version. % -% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY % WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS % FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License % along with this program. If not, see . % % @authors % (in alphabetical order) % @author Trach-Minh Tran % \documentclass[a4paper]{article} \usepackage{linuxdoc-sgml} \usepackage{graphicx} \usepackage{hyperref} \usepackage{amsmath} \title{Multigrid for Finite Elements using Splines.} \author{Trach-Minh Tran, Stephan Brunner} \date{v0.1, January 2013} \abstract{A multigrid formulation for finite elements is derived, using variational principles. More specifically the grid transfer operators will be derived and tested in 2D Cartesian and cylindrical geometry for arbitrary order B-Splines.} \begin{document} \maketitle \tableofcontents \section{The Model Problems} \subsection{Cartesian Geometry} The following second-order boundary value problem is considered \begin{equation} \label{eq:cartesian_problem} \begin{split} -\left[\frac{\partial^2}{\partial x^2} +\frac{\partial^2}{\partial y^2} \right] u(x,y) &= f(x,y) \qquad 0\le x\le 1,\quad 0\le y\le 1 \\ u(0,y) = u(1,y) &= u(x,0) = u(x,1) = 0. \end{split} \end{equation} By choosing \begin{equation*} f(x,y) = \sin (\pi k_xx + \pi k_yy), \end{equation*} where $k_x$ and $k_y$ are integers, the solution of the BVP is simply \begin{equation*} u(x,y) = \frac{\sin(\pi k_xx+\pi k_yy)}{\pi^2(k_x^2+k_y^2)}. \end{equation*} Using a weak formulation on Eq.(\ref{eq:cartesian_problem}) and a grid of $N_x\times N_y$ intervals, one obtains the following discretized linear system \begin{equation} \sum_{i'=1}^{N_x+p}\sum_{j'=1}^{N_y+p}A_{iji'j'}u_{i'j'} = b_{ij}, \qquad i=1,\ldots,N_x+p,\quad j=1,\ldots,N_y+p, \end{equation} where the unknowns $u_{ij}$ are the Spline (of order $p$) expansion coefficients of the solution \begin{equation} u(x,y) = \sum_{i=1}^{N_x+p}\sum_{j=1}^{N_y+p}u_{ij}\Lambda_i(x)\Lambda_j(y), \end{equation} and the matrix $A$ and right hand side $b$ are determined from \begin{align} A_{iji'j'} &= \int^1_0\int^1_0 dxdy \left[\Lambda'_{i'}(x)\Lambda_{j'}(y)\Lambda'_i(x)\Lambda_j(y) + \Lambda_{i'}(x)\Lambda'_{j'}(y)\Lambda_i(x)\Lambda'_j(y) \right], \\ b_{ij} &= \int^1_0\int^1_0 dxdy\Lambda_i(x)\Lambda_j(y)f(x,y). \end{align} Note that using a Gauss quadrature with $\lceil(2p+1)/2\rceil$ points per interval to calculate the matrix $A$ would yield an exact integration. \subsection{Cylindrical Geometry} The following second-order boundary value problem is considered: \begin{equation} \label{eq:cylindrical_problem} \begin{split} -\left[\frac{1}{r}\frac{\partial}{\partial r}r\frac{\partial}{\partial r} + \frac{1}{r^2} \frac{\partial^2}{\partial\theta^2} \right]u(r,\theta) &= f(r,\theta) \qquad 0\le r\le 1,\quad 0 \le \theta < 2\pi \\ u(1,\theta) &= 0, \\ \end{split} \end{equation} By choosing \begin{equation*} f(r,\theta) = j^2_{ms} J_{m}(j_{ms}r)\cos(m\theta), \end{equation*} where $m$ is an integer and $j_{ms}$, the $s^{th}$ zero of $J_{m}$, the solution of this BVP is \begin{equation*} u(r,\theta) = J_{m}(j_{ms}r)\cos(m\theta). \end{equation*} Using a weak formulation on Eq.(\ref{eq:cylindrical_problem})and a grid of $N_r\times N_\theta$ intervals, one obtains the following discretized linear system \begin{equation} \sum_{i'=1}^{N_r+p}\sum_{j'=1}^{N_\theta}A_{iji'j'}u_{i'j'} = b_{ij}, \qquad i=1,\ldots,N_r+p,\quad j=1,\ldots,N_\theta, \end{equation} where the unknowns $u_{ij}$ are the Spline (of order $p$) expansion coefficients of the solution \begin{equation} u(r,\theta) = \sum_{i=1}^{N_r+p}\sum_{j=1}^{N_\theta}u_{ij}\Lambda_i(r)\Lambda_j(\theta), \end{equation} and the matrix $A$ and right hand side $b$ are determined from \begin{align} A_{iji'j'} &= \int^1_0\int^{2\pi}_0 rdrd\theta \left[\Lambda'_{i'}(r)\Lambda_{j'}(\theta)\Lambda'_i(r)\Lambda_j(\theta) + \frac{1}{r^2} \Lambda_{i'}(r)\Lambda'_{j'}(\theta)\Lambda_i(r)\Lambda'_j(\theta) \right], \\ b_{ij} &= \int^1_0\int^{2\pi}_0 rdrd\theta\Lambda_i(r)\Lambda_j(\theta)f(r,\theta). \end{align} Note that $A$ has an $1/r$ singularity in the integrand. For $m\neq0$, this should not be problematic since the converged solution behaves as $\sim r^m$ near $r=0$. The case $m=0$ will be investigated numerically latter in this report, together withe the $m\neq 0$ case. \section{Restriction Operator} In the following, let us use the superscripts $h$ and $2h$ to denote quantities defined respectively on a \emph{fine} ($N_x\times N_y$ or $N_r\times N_\theta$) and a \emph{coarser} ($N_x/2\times N_y/2$ or $N_r/2\times N_\theta/2$) grid. The two grid transfers required in the standard \emph{multigrid} \cite{MG1D,Briggs} are: \begin{enumerate} \item the \emph{restriction} of the right hand side: $\mathbf{b}^{h} \longrightarrow \mathbf{b}^{2h}$ and \item the \emph{prolongation} of the solution: $\mathbf{u}^{2h} \longrightarrow \mathbf{u}^{h}$. \end{enumerate} Noting that the basis functions $\Lambda^{2h}_i(x)$, which are \emph{piecewise} $C^{p-1}$ polynomials with \emph{breaks} on the \emph{coarse} grid points $x^{2h}_k=(2h)k$ can be also considered as \emph{piecewise} $C^{p-1}$ polynomials with \emph{breaks} on the \emph{fine} grid $x^h_k=kh$, they can be expressed \emph{uniquely} as a linear combination of the \emph{fine} grid basis functions: \begin{equation} \label{eq:basis_transf} \Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}c_{ii'}\Lambda^h_{i'}(x), \quad i=1,\ldots,N/2+p. \end{equation} The (rectangular) matrix $c_{ii'}$ can be identified as the one-dimensional \emph{restriction} $\mathbf{R}$ since \begin{equation*} b^{2h}_i = \int_0^1 dx f(x)\Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}c_{ii'}\;b^h_{i'} = \sum^{N+p}_{i'=1}R_{ii'}\;b^h_{i'}. \end{equation*} It can be computed by simply projecting Eq.(\ref{eq:basis_transf}) on the fine grid basis function $\Lambda^h_{j}(x)$ \cite{MG1D}: \begin{equation} \sum^{N+p}_{i'=1}R_{ii'}\underbrace{\int_0^1 dx \Lambda^h_{i'}(x)\Lambda^h_{j}(x)}_{M^h_{i'j}} = \underbrace{\int_0^1 dx \Lambda^{2h}_i(x)\Lambda^h_{j}(x)}_{M^{2h,h}_{i'j}} \Longrightarrow \mathbf{R}=\mathbf{M}^{2h,h}\cdot(\mathbf{M}^{h})^{-1}. \end{equation} It should be stressed that the representation for $\Lambda^{2h}_i(x)$ in Eq.(\ref{eq:basis_transf}) is \emph {unique}. This is checked by verifying that the same matrix $R_{ii'}$ is obtained using for example the \emph{collocation} methods. One such method, which is used for this check is detailed in Appendix \ref{sec:colloc}. The calculated grid transfer matrices for linear, quadratic and cubic periodic and non-periodic Splines are given in \cite{MG1D}. Denoting the restriction on $x$ and $y$ respectively by $\mathbf{R}^x$ and $\mathbf{R}^y$, the two-dimensional restriction of $b^h_{ij}$ is defined as \begin{equation*} b^{2h}_{ij} = \int_0^1\int_0^1 dxdy f(x,y)\Lambda^{2h}_i(x)\Lambda^{2h}_j(y) = \sum^{N+p}_{i'=1}\sum^{N+p}_{j'=1}R^x_{ii'}R^y_{jj'}b^h_{i'j'}, \end{equation*} and thus \begin{equation} \label{eq:restriction} \boxed{\mathbf{b}^{2h} = \mathbf{R}^x \cdot \mathbf{b}^{h} \cdot (\mathbf{R}^y)^T.} \end{equation} \section{Prolongation Operator} Using Eq.(\ref{eq:basis_transf}) (with $c_{ii'}=R_{ii'}$), the solution at the coarse grid can be expressed as \begin{equation*} u^{2h}(x) = \sum_{i=1}^{N/2+p}u^{2h}_{i}\Lambda^{2h}_{i}(x) = \sum_{i'=1}^{N+p}\left[\sum_{i=1}^{N/2+p} R_{ii'}u^{2h}_{i}\right] \Lambda^h_{i'}(x) = \sum_{i'=1}^{N+p}\underbrace{\left[\sum_{i=1}^{N/2+p} (R)^T_{i'i}u^{2h}_{i}\right]}_{\tilde{u}^h_{i'}} \Lambda^h_{i'}(x), \end{equation*} from which one obvious choice for the \emph{prolongation} operator would be \begin{equation} \mathbf{P} = \mathbf{R}^T = (\mathbf{M}^{h})^{-1}\cdot\mathbf{M}^{h,2h}. \end{equation} Generalization to a two-dimensional prolongation is obtained as follows, where summation over repeated indices is assumed: \begin{equation*} u^{2h}(x,y) = u^{2h}_{ij}\Lambda^{2h}_{i}(x)\Lambda^{2h}_{j}(y) = \left[R^x_{ii'}u^{2h}_{ij}R^y_{jj'}\right]\Lambda^{h}_{i'}(x)\Lambda^{h}_{j'}(y) \end{equation*} which leads to the prolonged solution $\tilde{\mathbf{u}}^{h}$ given by \begin{equation} \label{eq:prolongation} \boxed{\tilde{\mathbf{u}}^{h} = \mathbf{P}^x \cdot \mathbf{u}^{2h} \cdot (\mathbf{P}^y)^T .} \end{equation} It should be noted here that, while the restricted right hand side $\mathbf{b}^{2h}$ as defined in Eq.(\ref{eq:restriction}) is \emph{exactly identical} to the assembled right hand side, the prolonged solution $\tilde{\mathbf{u}}^{h}$ defined in Eq.(\ref{eq:prolongation}) is just a representation of $u^{2h}(x,y)$ on the fine mesh and \emph{not} the solution $u^h(x,y)$ which can only be obtained by solving the problem on the fine mesh! \section{Numerical Experiments} The multigrid performance can be characterized by looking at the convergence of the residual Euclidean norm for the linear system $\mathbf{A}\mathbf{u}=\mathbf{b}$: \begin{equation} \label{eq:resid} \|\mathbf{r}\|_2 = \|\mathbf{b}-\mathbf{A}\mathbf{u}\|_2. \end{equation} When the \emph{exact} solution $u(x,y)$ is known, the \emph{discretization error} can defined as \begin{equation} \label{eq:discerr} \|e\|_2 = \sqrt{\int dV\left[\sum_{ij}u_{ij}\Lambda_{ij}(x,y)-u(x,y)\right]^2} \end{equation} and computed using a Gauss quadrature. Note that for Splines of order $p$, $\|e(x,y)\|_2(h)$ converges to zero as $O(h^{p+1})$. \subsection{Cartesian Geometry} The multigrid performances for varying problem sizes are displayed in Fig.(\ref{fig:linear_mg2d}) for linear Splines and Fig.(\ref{fig:cubic_mg2d}) for cubic Splines. They show that the number of iterations required for convergence (abount 3 for both linear and cubic Splines) is insensitive to the problem sizes. Compared to direct methods, the multigrid should scale much better for large problem sizes, as indicated in Table~\ref{tab:comparison1}. For this model problem, using cubic Splines seems to converge slightly faster than linear Splines! \begin{figure} \centering \includegraphics[angle=0,width=\hsize]{linear_mg2d} \caption{Performance of the multigrid $V(2,1)$ scheme using a Gauss-Seidel relaxation and \emph{linear Splines} for different problem sizes. The size of the \emph{coarsest} grid is $2\times 2$.} \label{fig:linear_mg2d} \end{figure} \begin{figure} \centering \includegraphics[angle=0,width=\hsize]{cubic_mg2d} \caption{Performance of the multigrid $V(2,1)$ scheme using a Gauss-Seidel relaxation and \emph{cubic Splines} for different problem sizes. The size of the \emph{coarsest} grid is $2\times 2$.} \label{fig:cubic_mg2d} \end{figure} \begin{table}[htb] \centering \begin{tabular}{|c|c|c|c|c|c|}\hline & \multicolumn{2}{c|}{Linear Splines} & \multicolumn{2}{c|}{Cubic Splines} \\ \hline $N$ & $V(2,1)$ & Direct & $V(2,1)$ & Direct \\ \hline 16 & 8.844E-04 & 2.051E-03 & 2.653E-03 & 3.970E-03 \\ 32 & 1.661E-03 & 5.345E-03 & 4.983E-03 & 1.540E-02 \\ 64 & 5.766E-03 & 2.054E-02 & 1.730E-02 & 7.492E-02 \\ 128 & 2.347E-02 & 3.288E-01 & 7.042E-02 & 1.060E+00 \\ \hline \end{tabular} \caption{Times (in seconds) used by a the \emph{direct sparse} solver MUMPS-4.10.0 for different problem sizes versus the times used by \emph{three} multigrid $V(2,1)$ cycles. The Intel Fortran-13.0 compiler is used on an Intel i7 platform.} \label{tab:comparison1} \end{table} The effects of the relaxation parameters $\nu_1,\nu_2$ on the multigrid performnace (Fig.(\ref{fig:cubic_mg2d_relax})) indicates that only a few relaxations are sufficient to achieve a good multigrid performance. Further analysis of the computational cost is required however to determine the \emph{optimal} $\nu_1,\nu_2$. Finally, the effects of the number of grid levels are analyzed in Fig.(\ref{fig:cubic_mg2d_levels}). In addition to the computational cost (see Table~\ref{tab:comparison2}), the memory required for the \emph{direct solver} at the coarsest grid level should be taken into account for the choice of the optimal number of grid levels, especially for very large problems. \begin{figure} \centering \includegraphics[angle=0,width=\hsize]{cubic_mg2d_relax} \caption{Effect of the number of the relaxation sweeps $\nu_1,\nu_2$ on the performance of the multigrid $V(\nu_1,\nu_2)$-cycle for \emph{Cubic Splines}. The finest grid has $128\times 128$ intervals.} \label{fig:cubic_mg2d_relax} \end{figure} \begin{figure} \centering \includegraphics[angle=0,width=\hsize]{cubic_mg2d_levels} \caption{Effect of the number grid levels on the performance of the multigrid $V(2,1)$-cycle for \emph{Cubic Splines}. The finest grid has $128\times 128$ intervals.} \label{fig:cubic_mg2d_levels} \end{figure} \begin{table}[htb] \centering \begin{tabular}{|c|c|c|c|}\hline Number of levels & $V(1,0)$ & $V(1,1)$ & $V(2,1)$ \\ \hline 2 & 3.386E-02 & 3.881E-02 & 4.031E-02 \\ 3 & 2.923E-02 & 3.398E-02 & 3.605E-02 \\ 4 & 2.880E-02 & 3.275E-02 & 3.595E-02 \\ 7 & 2.912E-02 & 3.236E-02 & 3.566E-02 \\ \hline \end{tabular} \caption{Effects of the times in seconds used per $V$-cyclefor different number of grid levels and relaxation paramters for a $128\times 128$ problem. The Intel Fortran-13.0 compiler is used on an Intel i7 platform.} \label{tab:comparison2} \end{table} \subsection{Cylindrical Geometry} \newpage \appendix \section{Grid transfer matrix by collocation} \label{sec:colloc} Let first consider the \emph{periodic case}. Denoting $N$ as the number of intervals of the fine grid, the \emph{periodic} Spline basis functions on the \emph{coarse} grid $\Lambda^{2h}_i$ can be expressed as linear combinations of the \emph{fine} grid Spline basis functions as: \begin{equation} \Lambda^{2h}_i(x) = \sum^{N}_{i'=1}R_{ii'}\Lambda^h_{i'}(x), \quad i=1,\ldots,N/2. \end{equation} For any given $i$, the coefficients $R_{ii'}$ can be calculated by expressing the relation above on exactly $N$ points on the $x$-grid. For \emph{odd} Spline order $p$, these \emph{collocation} (or interpolating) points can be chosen as the \emph{break} points of the fine grid $x^h_k,\quad k=0,\ldots,N-1$. For \emph{even} values of $p$, the collocation points should be $x^h_{k+1/2}=(x^h_{k}+x^h_{k+1})/2$ in order to obtain a non-singular linear system of equations \cite{BSPLINES}. The resulting system of equations to solve for $R_{ii'}$ are given below: \begin{equation} \begin{split} p\mbox{ odd}: \qquad &\sum^{N}_{i'=1}\Lambda^h_{i'}(x^h_k)\,R_{ii'} = \Lambda^{2h}_i(x^h_k), \qquad k=0,\ldots,N-1,\quad i=1,\ldots,N/2, \\ p\mbox{ even}: \qquad &\sum^{N}_{i'=1}\Lambda^h_{i'}(x^h_{k+1/2})\,R_{ii'} = \Lambda^{2h}_i(x^h_{k+1/2}), \qquad k=0,\ldots,N-1,\quad i=1,\ldots,N/2.\\ \end{split} \end{equation} For \emph{non-periodic} Splines, there are $N+p$ and $N/2+p$ basis functions respectively on the fine and coarse grid: \begin{equation} \Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}R_{ii'}\Lambda^h_{i'}(x), \quad i=1,\ldots,N/2+p. \end{equation} This implies that for any given $\Lambda^{2h}_i$, $N+p$ conditions are required to determined the $N+p$ terms of row $i$ of the matrix $R_{ii'}$. For odd $p$, $N+1$ collocation points $x_k,\quad k=0,\ldots,N$ can be used with the missing $p-1$ equations obtained by expressing all the $(p-1)/2$ derivatives of $\Lambda^{2h}_i(x)$ at the end points $x_0$ and $x_N$: \begin{equation} \frac{d^\alpha}{dx^\alpha}\Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}R_{ii'}\frac{d^\alpha}{dx^\alpha} \Lambda^h_{i'}(x) , \quad \alpha=1,\dots,\frac{p-1}{2} \quad (\mbox{$p$ odd}). \end{equation} For \emph{even} $p$, in addition to the $N$ relations obtained with the collocation points $x_{k+1/2}$ (as in the \emph{periodic} case), the missing $p$ conditions can be obtained by expressing $\Lambda^{2h}_i$ and its derivatives up to $p/2-1$ at the end points $x_0$ and $x_N$: \begin{equation} \frac{d^\alpha}{dx^\alpha}\Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}R_{ii'}\frac{d^\alpha}{dx^\alpha} \Lambda^h_{i'}(x) , \quad \alpha=0,\dots,\frac{p}{2}-1 \quad (\mbox{$p$ even}). \end{equation} \begin{thebibliography}{99} \bibitem{MG1D} {\tt Multigrid Formulation for Finite Elements},\\ \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/multigrid/docs/multigrid.pdf} \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A Multigrid Tutorial, Second Edition, Siam (2000)}. \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide, \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/bsplines.pdf} \bibitem{SOLVERS} {\tt The Solvers in BSPLINES}, \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/solvers.pdf} \end{thebibliography} \end{document} diff --git a/multigrid/src/CMakeLists.txt b/multigrid/src/CMakeLists.txt index 888ffc3..7530640 100644 --- a/multigrid/src/CMakeLists.txt +++ b/multigrid/src/CMakeLists.txt @@ -1,92 +1,92 @@ # # @file CMakeLists.txt # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # @author Trach-Minh Tran # project(multigrid_tests) if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel") # set(CMAKE_Fortran_FLAGS_RELEASE # "${CMAKE_Fortran_FLAGS_RELEASE} -profile-functions -profile-loops=outer" # ) set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -fpe0" ) endif() set(MG_TESTS transfer1d test_relax test_mg test_mgp test_csr two_grid test_mg2d test_relax2d test_transf2d transfer1d_col test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl ) foreach(test ${MG_TESTS}) add_executable(${test} ${test}.f90) target_link_libraries(${test} bsplines) endforeach() add_executable(poisson_fd poisson_fd.f90 fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) target_link_libraries(poisson_fd bsplines) set(TESTS ${TESTS} poisson_mg) add_executable(partition partition.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) target_link_libraries(partition bsplines) # Fail to compile with crayftn 4.0.46 on ROSA/DAINT add_executable(test_stencil test_stencil.f90 stencil_mod.f90 gvector_mod.f90) target_link_libraries(test_stencil bsplines) add_executable(test_stencilg test_stencilg.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) target_link_libraries(test_stencilg bsplines) # Fail to compile with crayftn 4.0.46 on ROSA/DAINT add_executable(test_jacobi test_jacobi.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) target_link_libraries(test_jacobi bsplines) add_executable(test_jacobig test_jacobig.f90 fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) target_link_libraries(test_jacobig bsplines) add_executable(ppoisson_fd ppoisson_fd.f90 fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) target_link_libraries(ppoisson_fd bsplines) add_executable(test_gvec1d test_gvec1d.f90) target_link_libraries(test_gvec1d bsplines) add_executable(test_intergrid0 test_intergrid0.f90) target_link_libraries(test_intergrid0 bsplines) add_executable(test_intergrid1 test_intergrid1.f90 parmg_mod.f90 gvector_mod.f90 stencil_mod.f90) target_link_libraries(test_intergrid1 bsplines) include_directories(${CMAKE_CURRENT_BINARY_DIR}) diff --git a/multigrid/src/Makefile b/multigrid/src/Makefile index 5748f81..235bb03 100644 --- a/multigrid/src/Makefile +++ b/multigrid/src/Makefile @@ -1,136 +1,136 @@ # # @file Makefile # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Trach-Minh Tran # MPIF90 = mpif90 LD = $(MPIF90) # F90FLAGS = -I$(HOME)/include/O -I$(PPUTILS2) # LDFLAGS = -L$(HOME)/lib/O -L${HDF5}/lib F90FLAGS = -I$(FUTILS)/include -I$(BSPLINES)/include LDFLAGS = -mkl=cluster -L$(FUTILS)/lib -L$(BSPLINES)/lib -L${HDF5}/lib MODS = gvector_mod.o stencil_mod.o LIBS = $(MODS) -lbsplines -lpppack -lpputils2 -lfutils \ -lhdf5_fortran -lhdf5 -lz ifdef MKL SPBLAS = -DMKL endif ifdef MUMPS F90FLAGS += -I$(MUMPS)/include LDFLAGS += -L$(MUMPS)/lib LIBS += $(MUMPSLIBS) endif all: transfer1d test_relax test_mg test_mgp test_csr two_grid \ test_mg2d test_relax2d test_transf2d transfer1d_col \ test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl poisson_fd .SUFFIXES: .SUFFIXES: .o .f90 .f90.o: $(MPIF90) $(F90FLAGS) -c $< partition: partition.o $(LD) $(LDFLAGS) -o $@ $< parmg_mod.o $(MODS) -lpputils2 -lfutils \ -lhdf5_fortran -lhdf5 -lz transfer1d: transfer1d.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) test_relax: test_relax.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) test_mg: test_mg.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) test_mgp: test_mgp.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) test_csr: test_csr.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) two_grid: two_grid.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) test_mg2d: test_mg2d.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) test_relax2d: test_relax2d.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) test_transf2d: test_transf2d.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) transfer1d_col: transfer1d_col.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) test_relax2d_cyl: test_relax2d_cyl.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) test_transf2d_cyl: test_transf2d_cyl.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) test_mg2d_cyl: test_mg2d_cyl.o $(LD) $(LDFLAGS) -o $@ $< $(LIBS) poisson_fd: poisson_fd.o $(LD) $(LDFLAGS) -o $@ $< fdmat_mod.o $(LIBS) csr_mod.o: csr_mod.f90 $(MPIF90) $(FPP) $(SPBLAS) $(F90FLAGS) -c csr_mod.f90 parmg_mod.o: gvector_mod.o stencil_mod.o partition.o: parmg_mod.o transfer1d.o: $(MODS) test_relax.o: $(MODS) test_mg.o: $(MODS) test_mgp.o: $(MODS) test_csr.o: $(MODS) two_grid.o: $(MODS) test_mg2d.o: $(MODS) test_relax2d.o: $(MODS) test_transf2d.o: $(MODS) transfer1d_col.o: $(MODS) test_relax2d_cyl.o: $(MODS) test_transf2d_cyl.o: $(MODS) test_mg2d_cyl.o: $(MODS) poisson_fd.o: fdmat_mod.o fdmat_mod.o: stencil_mod.o parmg_mod.o stencil_mod.o: gvector_mod.o parmg_mod.o: gvector_mod.o stencil_mod.o clean: rm -f *.o *.mod *~ ../wk/*~ a.out lib distclean: clean rm -f ../wk/*.h5 ../wk/fort.* *.eps \ transfer1d test_relax test_mg test_mgp test_csr two_grid \ test_mg2d test_relax2d test_transf2d transfer1d_col \ test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl #include $(HOST).mk diff --git a/multigrid/src/README_mod.txt b/multigrid/src/README_mod.txt index 224e499..22b365f 100644 --- a/multigrid/src/README_mod.txt +++ b/multigrid/src/README_mod.txt @@ -1,91 +1,91 @@ /** * @file README_mod.txt * * @brief * * @copyright * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) * SPC (Swiss Plasma Center) * - * spclibs is free software: you can redistribute it and/or modify it under + * SPClibs is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) * any later version. * - * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . * * @authors * (in alphabetical order) * @author Trach-Minh Tran */ 1) Module parmg ============ - types: grid2_type INTEGER, DIMENSION(2) :: s, e, s0, e0, npt_loc, npt REAL(rkind), ALLOCATABLE :: x(:), y(:) TYPE(gvector_2d) :: f, v TYPE(stencil_2d) :: fdmat, restrict_mat - module procedures: creat_grid coarse (1d, 2d) exchange (gvector) prolong (gvector) restrict (gvector) jacobi get_resids init_restrict (gvector) disp (0d, 1d array of int) get_lmax - Uses gvector stencil 2) Module stencil ============== - types: LOGICAL :: nluni INTEGER, DIMENSION(2) :: ldim, gdim, s0, e0, s, e INTEGER :: npoints INTEGER, ALLOCATABLE :: id(:,:) REAL(rkind), ALLOCATABLE :: val(:,:,:) - module procedures: init vmx laplacian putmat - operators: *: vmx - Uses gvector 3) Module gvector ============== - types: gvector_2d INTEGER, DIMENSION(2) :: s, e ! vector internal bounds INTEGER, DIMENSION(2) :: g ! ghost cell widths REAL(rkind), ALLOCATABLE :: val(:,:) - module procedures: constructor (gvector_2d) disp norm2 (serial, mpi) - operators: + : add_scal, add_vec - : minus_vec, substract_vec * : scale_left, scale_right - assignment: = : from_scal, from_vec diff --git a/multigrid/src/fdmat_mod.f90 b/multigrid/src/fdmat_mod.f90 index c925169..2df2be5 100644 --- a/multigrid/src/fdmat_mod.f90 +++ b/multigrid/src/fdmat_mod.f90 @@ -1,600 +1,600 @@ !> !> @file fdmat_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE fdmat_mod ! USE multigrid IMPLICIT NONE ! INTERFACE fdmat MODULE PROCEDURE fdmat_stencil MODULE PROCEDURE fdmat_gen, fdmat_csr, fdmat_cds END INTERFACE fdmat INTERFACE ibc_fdmat MODULE PROCEDURE ibc_fdmat_stencil MODULE PROCEDURE ibc_fdmat_gen, ibc_fdmat_csr, ibc_fdmat_cds END INTERFACE ibc_fdmat INTERFACE ibc_rhs MODULE PROCEDURE ibc_rhs_g END INTERFACE ibc_rhs ! CONTAINS !-------------------------------------------------------------------------------- SUBROUTINE fdmat_stencil(grid, fdense, icrosst, mat) ! ! Construct model GBS FD partitioned matrix ! USE iso_fortran_env, ONLY : rkind => real64 USE stencil, ONLY : stencil_2d USE parmg, ONLY : grid2_type ! TYPE(grid2_type) :: grid REAL(rkind), INTENT(in) :: icrosst TYPE(stencil_2d) :: mat INTERFACE FUNCTION fdense(x) USE iso_fortran_env, ONLY : rkind => real64 REAL(rkind), INTENT(in) :: x(:) REAL(rkind) :: fdense(SIZE(x)) END FUNCTION fdense END INTERFACE ! REAL(rkind) :: dx, dy REAL(rkind),ALLOCATABLE :: dense(:) REAL(rkind) :: stencil_arr(-1:1,-1:1), zdiag(-1:1,-1:1), corr INTEGER :: nx, ny, i, j, k, d(2) ! ! Grid properties ! nx = grid%npt(1)-1 ny = grid%npt(2)-1 dx = grid%x(1)-grid%x(0) ! Assume equidistant grid dy = grid%y(1)-grid%y(0) ALLOCATE(dense(0:nx)) ! electron densoty vary only along x ! ! Stencil array ! stencil_arr = 0.0d0 zdiag = 0.0d0 corr = 1.d0+icrosst**2/4.0d0 stencil_arr(0,0) = -2.0d0/dx/dx-2.0d0/dy/dy*corr stencil_arr(-1,0) = 1.0/dx/dx stencil_arr(1,0) = 1.0/dx/dx stencil_arr(0,-1) = 1.0/dy/dy*corr stencil_arr(0,1) = 1.0/dy/dy*corr stencil_arr(-1,-1) = icrosst*1.0/4.0/dx/dy stencil_arr(1,-1) = icrosst*(-1.0/4.0/dx/dy) stencil_arr(-1,1) = icrosst*(-1.0/4.0/dx/dy) stencil_arr(1,1) = icrosst*(1.0/4.0/dx/dy) zdiag(0,0) = 1.0d0 dense(:) = fdense(grid%x(:)) ! ! Assemble the stencil by scanning local grid points ! DO k=0,mat%npoints-1 d(:) = mat%id(k,:) DO j=mat%s(2),mat%e(2) DO i=mat%s(1),mat%e(1) mat%val(i,j,k) = stencil_arr(d(1),d(2)) + & & zdiag(d(1),d(2))*dense(i) END DO END DO END DO ! DEALLOCATE(dense) END SUBROUTINE fdmat_stencil !-------------------------------------------------------------------------------- SUBROUTINE ibc_rhs_g(f, s0, e0, prb) ! ! Impose BC on rhs ! USE iso_fortran_env, ONLY : rkind => real64 USE gvector, ONLY : gvector_2d ! TYPE(gvector_2d), INTENT(inout) :: f CHARACTER(len=*), INTENT(in) :: prb INTEGER, INTENT(in) :: s0(2), e0(2) INTEGER :: s(2), e(2) ! s = f%s e = f%e ! IF(s(1).EQ.s0(1)) THEN IF(prb(1:1).EQ.'d') THEN ! West face f%val(s(1),s(2):e(2)) = 0.0_rkind ELSE f%val(s(1),s(2):e(2)) = 0.5_rkind*f%val(s(1),s(2):e(2)) END IF END IF IF(e(1).EQ.e0(1)) THEN IF(prb(2:2).EQ.'d') THEN ! East face f%val(e(1),s(2):e(2)) = 0.0_rkind ELSE f%val(e(1),s(2):e(2)) = 0.5_rkind*f%val(e(1),s(2):e(2)) END IF END IF IF(s(2).EQ.s0(2)) THEN IF(prb(3:3).EQ.'d') THEN ! South face f%val(s(1):e(1),s(2)) = 0.0_rkind ELSE f%val(s(1):e(1),s(2)) = 0.5_rkind*f%val(s(1):e(1),s(2)) END IF END IF IF(e(2).EQ.e0(2)) THEN IF(prb(4:4).EQ.'d') THEN ! North face f%val(s(1):e(1),e(2)) = 0.0_rkind ELSE f%val(s(1):e(1),e(2)) = 0.5_rkind*f%val(s(1):e(1),e(2)) END IF END IF END SUBROUTINE ibc_rhs_g !-------------------------------------------------------------------------------- SUBROUTINE ibc_fdmat_stencil(mat, prb) ! ! Impose BC on matrix ! USE iso_fortran_env, ONLY : rkind => real64 USE stencil, ONLY : stencil_2d ! TYPE(stencil_2d), INTENT(inout) :: mat CHARACTER(len=*), INTENT(in) :: prb ! INTEGER :: s0(2), e0(2), s(2), e(2) ! s0 = mat%s0 e0 = mat%e0 s = mat%s e = mat%e ! ! Neumann BC ! WARNING: Divide the stencil by 2 => should do the same for RHS! ! ! N ! 6---7---8 ! | | | ! W 4---0---5 E Numbering of stencil ! | | | ! 1---2---3 ! S ! IF(s(1).EQ.s0(1) .AND. prb(1:1).EQ.'n') THEN ! West face mat%val(s(1),s(2):e(2),1) = 0.0_rkind mat%val(s(1),s(2):e(2),3) = 0.0_rkind mat%val(s(1),s(2):e(2),4) = 0.0_rkind mat%val(s(1),s(2):e(2),5) = 2.0d0*mat%val(s(1),s(2):e(2),5) mat%val(s(1),s(2):e(2),6) = 0.0_rkind mat%val(s(1),s(2):e(2),8) = 0.0_rkind mat%val(s(1),s(2):e(2),:) = 0.5_rkind*mat%val(s(1),s(2):e(2),:) END IF IF(e(1).EQ.e0(1) .AND. prb(2:2).EQ.'n') THEN ! East face mat%val(e(1),s(2):e(2),1) = 0.0_rkind mat%val(e(1),s(2):e(2),3) = 0.0_rkind mat%val(e(1),s(2):e(2),4) = 2.0d0*mat%val(e(1),s(2):e(2),4) mat%val(e(1),s(2):e(2),5) = 0.0_rkind mat%val(e(1),s(2):e(2),6) = 0.0_rkind mat%val(e(1),s(2):e(2),8) = 0.0_rkind mat%val(e(1),s(2):e(2),:) = 0.5_rkind*mat%val(e(1),s(2):e(2),:) END IF IF(s(2).EQ.s0(2) .AND. prb(3:3).EQ.'n') THEN ! South face mat%val(s(1):e(1),s(2),1) = 0.0_rkind mat%val(s(1):e(1),s(2),2) = 0.0_rkind mat%val(s(1):e(1),s(2),3) = 0.0_rkind mat%val(s(1):e(1),s(2),6) = 0.0_rkind mat%val(s(1):e(1),s(2),7) = 2.0d0*mat%val(s(1):e(1),s(2),7) mat%val(s(1):e(1),s(2),8) = 0.0_rkind mat%val(s(1):e(1),s(2),:) = 0.5_rkind*mat%val(s(1):e(1),s(2),:) END IF IF(e(2).EQ.e0(2) .AND. prb(4:4).EQ.'n') THEN ! North face mat%val(s(1):e(1),e(2),1) = 0.0_rkind mat%val(s(1):e(1),e(2),2) = 2.0d0*mat%val(s(1):e(1),e(2),2) mat%val(s(1):e(1),e(2),3) = 0.0_rkind mat%val(s(1):e(1),e(2),6) = 0.0_rkind mat%val(s(1):e(1),e(2),7) = 0.0_rkind mat%val(s(1):e(1),e(2),8) = 0.0_rkind mat%val(s(1):e(1),e(2),:) = 0.5_rkind*mat%val(s(1):e(1),e(2),:) END IF ! ! Dirichlet BC ! IF(s(1).EQ.s0(1) .AND. prb(1:1).EQ.'d') THEN ! West face mat%val(s(1),s(2):e(2),:) = 0.0_rkind mat%val(s(1),s(2):e(2),0) = 1.0_rkind END IF IF(e(1).EQ.e0(1) .AND. prb(2:2).EQ.'d') THEN ! East face mat%val(e(1),s(2):e(2),:) = 0.0_rkind mat%val(e(1),s(2):e(2),0) = 1.0_rkind END IF IF(s(2).EQ.s0(2) .AND. prb(3:3).EQ.'d') THEN ! South face mat%val(s(1):e(1),s(2),:) = 0.0_rkind mat%val(s(1):e(1),s(2),0) = 1.0_rkind END IF IF(e(2).EQ.e0(2) .AND. prb(4:4).EQ.'d') THEN ! North face mat%val(s(1):e(1),e(2),:) = 0.0_rkind mat%val(s(1):e(1),e(2),0) = 1.0_rkind END IF END SUBROUTINE ibc_fdmat_stencil !-------------------------------------------------------------------------------- SUBROUTINE fdmat_gen(grid, fdense, icrosst, noinit) ! ! Generic version ! TYPE(grid2d), INTENT(inout) :: grid DOUBLE PRECISION, INTENT(in) :: icrosst LOGICAL, INTENT(in), OPTIONAL :: noinit INTERFACE FUNCTION fdense(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: fdense(SIZE(x)) END FUNCTION fdense END INTERFACE ! IF(ALLOCATED(grid%mata)) THEN CALL fdmat_csr(grid, fdense, icrosst, grid%mata, noinit) ELSE CALL fdmat_cds(grid, fdense, icrosst, grid%mata_cds, noinit) END IF END SUBROUTINE fdmat_gen !-------------------------------------------------------------------------------- SUBROUTINE fdmat_cds(grid, fdense, icrosst, mat, noinit) ! ! Construct FD matrix ! TYPE(grid2d), INTENT(in) :: grid DOUBLE PRECISION, INTENT(in) :: icrosst TYPE(cds_mat), INTENT(inout) :: mat LOGICAL, INTENT(in), OPTIONAL :: noinit INTERFACE FUNCTION fdense(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: fdense(SIZE(x)) END FUNCTION fdense END INTERFACE ! INTEGER :: n, nx, ny INTEGER :: kl, ku, k INTEGER :: ix, ix2, jx, iy, jy, iy2, irow DOUBLE PRECISION :: lx, ly, dx, dy, mele DOUBLE PRECISION :: dense(0:grid%n(1)) DOUBLE PRECISION :: stencil(-1:1,-1:1), zdiag(-1:1,-1:1) DOUBLE PRECISION :: corr LOGICAL :: run_init INTEGER, ALLOCATABLE :: dists(:) !-------------------------------------------------------------------------------- run_init = .TRUE. IF(PRESENT(noinit)) run_init = .NOT.noinit ! ! Grid properties ! nx = grid%n(1) ny = grid%n(2) dx = grid%x(1) - grid%x(0) dy = grid%y(1) - grid%y(0) lx = grid%x(nx) ly = grid%y(ny) n = PRODUCT(grid%rank) ! Rank of matrix ! ! Stencil ! stencil = 0.0d0 zdiag = 0.0d0 ! corr = 1.d0+icrosst**2/4.0d0 stencil(0,0) = -2.0d0/dx/dx-2.0d0/dy/dy*corr stencil(-1,0) = 1.0/dx/dx stencil(1,0) = 1.0/dx/dx stencil(0,-1) = 1.0/dy/dy*corr stencil(0,1) = 1.0/dy/dy*corr stencil(-1,-1) = icrosst*1.0/4.0/dx/dy stencil(1,-1) = icrosst*(-1.0/4.0/dx/dy) stencil(-1,1) = icrosst*(-1.0/4.0/dx/dy) stencil(1,1) = icrosst*(1.0/4.0/dx/dy) zdiag(0,0) = 1.0d0 ! ! 9-point stencil "diagonal storage" ! kl=4 ku=4 ALLOCATE(dists(-kl:ku)) DO iy2=-1,1 DO ix2=-1,1 k=3*iy2+ix2 dists(k) = iy2*(nx+1) + ix2 END DO END DO ! IF(run_init) THEN CALL init(n, dists, 1, mat) END IF ! ! Assemble matrix by scanning all grid points ! dense(:) = fdense(grid%x(:)) DO iy=0,ny DO ix=0,nx irow = iy*(nx+1)+ix+1 DO iy2=-1,1 jy=iy+iy2 IF(jy.GE.0 .AND. jy.LE.ny) THEN DO ix2=-1,1 jx=ix+ix2 IF(jx.GE.0 .AND.jx.LE.nx) THEN mele = stencil(ix2,iy2) + zdiag(ix2,iy2)*dense(ix) k=3*iy2+ix2 mat%val(irow,k) = mele END IF END DO END IF END DO END DO END DO ! DEALLOCATE(dists) END SUBROUTINE fdmat_cds !-------------------------------------------------------------------------------- SUBROUTINE fdmat_csr(grid, fdense, icrosst, mat, noinit) ! ! Construct FD matrix ! TYPE(grid2d), INTENT(in) :: grid DOUBLE PRECISION, INTENT(in) :: icrosst TYPE(csr_mat), INTENT(inout) :: mat LOGICAL, INTENT(in), OPTIONAL :: noinit INTERFACE FUNCTION fdense(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: fdense(SIZE(x)) END FUNCTION fdense END INTERFACE ! INTEGER :: n, nx, ny INTEGER :: ix, ix2, jx, iy, jy, iy2, irow, icol DOUBLE PRECISION :: lx, ly, dx, dy, mele DOUBLE PRECISION :: dense(0:grid%n(1)) DOUBLE PRECISION :: stencil(-1:1,-1:1), zdiag(-1:1,-1:1) DOUBLE PRECISION :: corr LOGICAL :: run_init !-------------------------------------------------------------------------------- run_init = .TRUE. IF(PRESENT(noinit)) run_init = .NOT.noinit ! ! Grid properties ! nx = grid%n(1) ny = grid%n(2) dx = grid%x(1) - grid%x(0) dy = grid%y(1) - grid%y(0) lx = grid%x(nx) ly = grid%y(ny) n = PRODUCT(grid%rank) ! Rank of matrix ! ! Stencil ! stencil = 0.0d0 zdiag = 0.0d0 ! corr = 1.d0+icrosst**2/4.0d0 stencil(0,0) = -2.0d0/dx/dx-2.0d0/dy/dy*corr stencil(-1,0) = 1.0/dx/dx stencil(1,0) = 1.0/dx/dx stencil(0,-1) = 1.0/dy/dy*corr stencil(0,1) = 1.0/dy/dy*corr stencil(-1,-1) = icrosst*1.0/4.0/dx/dy stencil(1,-1) = icrosst*(-1.0/4.0/dx/dy) stencil(-1,1) = icrosst*(-1.0/4.0/dx/dy) stencil(1,1) = icrosst*(1.0/4.0/dx/dy) zdiag(0,0) = 1.0d0 ! ! Create CSR matrix IF(run_init) THEN CALL init(n, 1, mat) END IF ! ! Assemble matrix by scanning all grid points ! dense(:) = fdense(grid%x(:)) DO iy=0,ny DO ix=0,nx irow = numb(ix,iy) DO iy2=-1,1 jy=iy+iy2 IF(jy.GE.0 .AND. jy.LE.ny) THEN DO ix2=-1,1 jx=ix+ix2 IF(jx.GE.0 .AND.jx.LE.nx) THEN icol=numb(jx,jy) mele = stencil(ix2,iy2) + zdiag(ix2,iy2)*dense(ix) CALL putele(mat, irow, icol,mele) END IF END DO END IF END DO END DO END DO !-------------------------------------------------------------------------------- CONTAINS INTEGER FUNCTION numb(ix,iy) INTEGER, INTENT(in) :: ix, iy INTEGER :: stride stride = grid%rank(1) numb = iy*stride + (ix+1) END FUNCTION numb !-------------------------------------------------------------------------------- END SUBROUTINE fdmat_csr !++ SUBROUTINE ibc_fdmat_gen(grid, prb) ! ! Generic version ! TYPE(grid2d), INTENT(inout) :: grid CHARACTER(len=*), INTENT(in) :: prb ! IF(ALLOCATED(grid%mata)) THEN CALL ibc_fdmat_csr(grid, grid%mata, prb) ELSE CALL ibc_fdmat_cds(grid, grid%mata_cds, prb) END IF ! END SUBROUTINE ibc_fdmat_gen !++ SUBROUTINE ibc_fdmat_csr(grid, mat, prb) ! ! Impose BC ! TYPE(grid2d), INTENT(in) :: grid TYPE(csr_mat), INTENT(inout) :: mat CHARACTER(len=*), INTENT(in) :: prb ! DOUBLE PRECISION :: arow(mat%rank) INTEGER :: nx, ny, nx1, ny1, n, iy, irow, irow1 !-------------------------------------------------------------------------------- nx = grid%n(1) ny = grid%n(2) nx1=nx+1 ny1=ny+1 n = nx1*ny1 ! ! Dirichelt BC on West/East ! IF(prb.EQ.'dddd') THEN DO irow=1,ny*nx1+1,nx1 arow=0.0d0; arow(irow)=1.0d0 CALL putrow(mat, irow, arow) irow1=irow+nx arow=0.0d0; arow(irow1)=1.0d0 CALL putrow(mat, irow1, arow) END DO ! ! Neumann on West/East ! WARNING: Divide the stencil by 2 => should do the same for RHS! ! ELSE IF(prb.EQ.'nndd') THEN DO irow=1,ny*nx1+1,nx1 iy = irow/nx1 CALL getrow(mat, irow, arow) arow(irow+1) = 2.0d0*arow(irow+1) IF(iy.GT.0) arow(irow-nx) = 0.0d0 IF(iy.LT.ny) arow(irow+nx+2) = 0.0d0 arow(:) = 0.5d0*arow(:) CALL putrow(mat, irow, arow) END DO DO irow=nx1,n,nx1 iy = irow/nx1 CALL getrow(mat, irow, arow) arow(irow-1) = 2.0d0*arow(irow-1) IF(iy.GT.0) arow(irow-nx-2) = 0.0d0 IF(iy.LT.ny) arow(irow+nx) = 0.0d0 arow(:) = 0.5d0*arow(:) CALL putrow(mat, irow, arow) END DO ! ELSE WRITE(*,'(a,a4,a)') 'ibc_mat: prb = ', prb, ' NOT IMPLEMENTED!' STOP END IF ! ! Dirichlet BC on South/North sides ! DO irow=1,nx1 arow=0.0d0; arow(irow)=1.0d0 CALL putrow(mat, irow, arow) END DO DO irow1=ny*nx1+1,n arow=0.0d0; arow(irow1)=1.0d0 CALL putrow(mat, irow1, arow) END DO !-------------------------------------------------------------------------------- END SUBROUTINE ibc_fdmat_csr !++ SUBROUTINE ibc_fdmat_cds(grid, mat, prb) ! ! Impose BC ! TYPE(grid2d), INTENT(in) :: grid TYPE(cds_mat), INTENT(inout) :: mat CHARACTER(len=*), INTENT(in) :: prb ! INTEGER :: nx, ny, iy, irow INTEGER :: n, nx1, ny1 !-------------------------------------------------------------------------------- nx = grid%n(1) ny = grid%n(2) nx1=nx+1 ny1=ny+1 n = nx1*ny1 ! ! 2 == 3 == 4 ! | | | ! -1 == 0 == 1 ! | | | ! -4 == -3 == -2 ! ! Dirichelt BC on West/East ! IF(prb.EQ.'dddd') THEN DO irow=1,ny*nx1+1,nx1 mat%val(irow,:) = 0.0d0 mat%val(irow,0) = 1.0d0 mat%val(irow+nx,:) = 0.0d0 mat%val(irow+nx,0) = 1.0d0 END DO ! ! Neumann on West/East ! WARNING: Divide the stencil by 2 => should do the same for RHS! ! ELSE IF(prb.EQ.'nndd') THEN DO irow=1,ny*nx1+1,nx1 iy = irow/nx1 IF(iy.GT.0) mat%val(irow,-2)=0.0d0 IF(iy.LT.ny) mat%val(irow,+4)=0.0d0 mat%val(irow,+1)=2.0d0*mat%val(irow,+1) mat%val(irow,:)=0.5d0*mat%val(irow,:) END DO DO irow=nx1,n,nx1 iy = irow/nx1 IF(iy.GT.0) mat%val(irow,-4)=0.0d0 IF(iy.LT.ny) mat%val(irow,+2)=0.0d0 mat%val(irow,-1)=2.0d0*mat%val(irow,-1) mat%val(irow,:)=0.5d0*mat%val(irow,:) END DO ! ELSE WRITE(*,'(a,a4,a)') 'ibc_mat: prb = ', prb, ' NOT IMPLEMENTED!' STOP END IF ! ! Dirichlet BC on South/North sides ! DO irow=1,nx1 mat%val(irow,:) = 0.0d0 mat%val(irow,0) = 1.0d0 END DO DO irow=ny*nx1+1,n mat%val(irow,:) = 0.0d0 mat%val(irow,0) = 1.0d0 END DO !-------------------------------------------------------------------------------- END SUBROUTINE ibc_fdmat_cds !++ END MODULE fdmat_mod diff --git a/multigrid/src/gvector_mod.f90 b/multigrid/src/gvector_mod.f90 index 2f0162e..5fddf67 100644 --- a/multigrid/src/gvector_mod.f90 +++ b/multigrid/src/gvector_mod.f90 @@ -1,231 +1,231 @@ !> !> @file gvector_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE gvector ! ! Implementation of 2D vectors with arbitrary ! vector bounds and ghost cell width. ! ! T.M. Tran, CRPP-EPFL ! September 2013 ! USE iso_fortran_env, ONLY : rkind => real64 IMPLICIT NONE PRIVATE PUBLIC :: gvector_2d, disp, norm2, & & OPERATOR(+), OPERATOR(-), OPERATOR(*), & & ASSIGNMENT(=) TYPE gvector_2d INTEGER, DIMENSION(2) :: s, e ! vector internal bounds INTEGER, DIMENSION(2) :: g ! ghost cell widths REAL(rkind), ALLOCATABLE :: val(:,:) END TYPE gvector_2d INTERFACE gvector_2d MODULE PROCEDURE constructor END INTERFACE gvector_2d INTERFACE OPERATOR(+) MODULE PROCEDURE add_scal MODULE PROCEDURE add_vec END INTERFACE OPERATOR(+) INTERFACE OPERATOR(-) MODULE PROCEDURE minus_vec MODULE PROCEDURE substract_vec END INTERFACE OPERATOR(-) INTERFACE OPERATOR(*) MODULE PROCEDURE scale_left MODULE PROCEDURE scale_right END INTERFACE OPERATOR(*) INTERFACE ASSIGNMENT(=) MODULE PROCEDURE from_scal MODULE PROCEDURE from_vec END INTERFACE ASSIGNMENT(=) INTERFACE norm2 MODULE PROCEDURE norm2_gvector_2d MODULE PROCEDURE norm2_root_g_2d MODULE PROCEDURE norm2_all_g_2d END INTERFACE norm2 CONTAINS !======================================================================= FUNCTION constructor(s, e, g) RESULT(res) INTEGER, INTENT(in) :: s(2), e(2) INTEGER, OPTIONAL, INTENT(in) :: g(2) TYPE(gvector_2d) :: res INTEGER :: lb(2), ub(2) res%g= 0 IF(PRESENT(g)) res%g = g res%s = s res%e = e lb = res%s - res%g ub = res%e + res%g ALLOCATE(res%val(lb(1):ub(1),lb(2):ub(2))) ! ! Initialize to 0 on all ghost cells res%val(lb(1):s(1)-1,:) = 0._rkind res%val(e(1)+1:ub(1),:) = 0._rkind res%val(:,lb(2):s(2)-1) = 0._rkind res%val(:,e(2)+1:ub(2)) = 0._rkind END FUNCTION constructor !======================================================================= FUNCTION add_vec(lhs, rhs) RESULT(res) TYPE(gvector_2d), INTENT(in) :: lhs, rhs TYPE(gvector_2d) :: res res = gvector_2d(lhs%s, lhs%e, lhs%g) res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & & lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + & & rhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) END FUNCTION add_vec !======================================================================= FUNCTION add_scal(lhs, rhs) RESULT(res) TYPE(gvector_2d), INTENT(in) :: lhs REAL(rkind), INTENT(in) :: rhs TYPE(gvector_2d) :: res res = gvector_2d(lhs%s, lhs%e, lhs%g) res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & & lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + rhs END FUNCTION add_scal !======================================================================= FUNCTION minus_vec(this) RESULT(res) TYPE(gvector_2d), INTENT(in) :: this TYPE(gvector_2d) :: res res = gvector_2d(this%s, this%e, this%g) res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & & -this%val(res%s(1):res%e(1),res%s(2):res%e(2)) END FUNCTION minus_vec !======================================================================= FUNCTION substract_vec(lhs, rhs) RESULT(res) TYPE(gvector_2d), INTENT(in) :: lhs, rhs TYPE(gvector_2d) :: res res = gvector_2d(lhs%s, lhs%e, lhs%g) res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & & lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) - & & rhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) END FUNCTION substract_vec !======================================================================= FUNCTION scale_left(lhs, rhs) RESULT(res) REAL(rkind), INTENT(in) :: lhs TYPE(gvector_2d), INTENT(in) :: rhs TYPE(gvector_2d) :: res res = gvector_2d(rhs%s, rhs%e, rhs%g) res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & & lhs * rhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) END FUNCTION scale_left !======================================================================= FUNCTION scale_right(lhs, rhs) RESULT(res) TYPE(gvector_2d), INTENT(in) :: lhs REAL(rkind), INTENT(in) :: rhs TYPE(gvector_2d) :: res res = gvector_2d(lhs%s, lhs%e, lhs%g) res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & & lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) * rhs END FUNCTION scale_right !======================================================================= SUBROUTINE from_vec(lhs, rhs) TYPE(gvector_2d), INTENT(inout) :: lhs REAL(rkind), INTENT(in) :: rhs(:,:) INTEGER :: n(2) n = lhs%e - lhs%s + 1 IF(SIZE(rhs,1).NE.n(1) .OR. SIZE(rhs,2).NE.n(2)) THEN PRINT*, 'from_vec: sizes of rhs and lhs not equal!' STOP END IF lhs%val(lhs%s(1):lhs%e(1),lhs%s(2):lhs%e(2)) = rhs(:,:) END SUBROUTINE from_vec !======================================================================= SUBROUTINE from_scal(lhs, rhs) TYPE(gvector_2d), INTENT(inout) :: lhs REAL(rkind), INTENT(in) :: rhs lhs%val(lhs%s(1):lhs%e(1),lhs%s(2):lhs%e(2)) = rhs END SUBROUTINE from_scal !======================================================================= SUBROUTINE disp(str,this) CHARACTER(len=*), INTENT(in) :: str TYPE(gvector_2d), INTENT(in) :: this INTEGER :: i WRITE(*,'(/a,3(" (",i0,",",i0,") "))') str//': s, e, g =',& & this%s, this%e, this%g DO i=LBOUND(this%val,1),UBOUND(this%val,1) WRITE(*,'(10(1pe11.3))') (this%val(i,:)) END DO END SUBROUTINE disp !======================================================================= FUNCTION norm2_gvector_2d(this) RESULT(res) TYPE(gvector_2d), INTENT(in) :: this REAL(rkind) :: res res = NORM2( this%val(this%s(1):this%e(1), & & this%s(2):this%e(2)) ) END FUNCTION norm2_gvector_2d !======================================================================= FUNCTION norm2_root_g_2d(x, comm, root) RESULT(res) ! ! Vector norm of 2d distributed array with ghost cells ! USE mpi TYPE(gvector_2d), INTENT(in) :: x INTEGER, INTENT(in) :: comm INTEGER, INTENT(in) :: root REAL(rkind) :: res INTEGER, PARAMETER :: ndim=2 INTEGER, DIMENSION(ndim) :: s, e REAL(rkind) :: res_loc INTEGER :: me, ierr ! CALL mpi_comm_rank(comm, me, ierr) s = x%s e = x%e res_loc = SUM(x%val(s(1):e(1),s(2):e(2))**2) res = 0.0 CALL mpi_reduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM,& & root, comm, ierr) IF(me.EQ.root) res = SQRT(res) END FUNCTION norm2_root_g_2d !======================================================================= FUNCTION norm2_all_g_2d(x, comm) RESULT(res) ! ! Vector norm of 2d distributed array with ghost cells ! USE mpi TYPE(gvector_2d), INTENT(in) :: x INTEGER, INTENT(in) :: comm REAL(rkind) :: res INTEGER, PARAMETER :: ndim=2 INTEGER, DIMENSION(ndim) :: s, e REAL(rkind) :: res_loc INTEGER :: ierr ! s = x%s e = x%e res_loc = SUM(x%val(s(1):e(1),s(2):e(2))**2) CALL mpi_allreduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & & comm, ierr) res = SQRT(res) END FUNCTION norm2_all_g_2d !======================================================================= END MODULE gvector diff --git a/multigrid/src/parmg_mod.f90 b/multigrid/src/parmg_mod.f90 index 5de78ca..25ebc52 100644 --- a/multigrid/src/parmg_mod.f90 +++ b/multigrid/src/parmg_mod.f90 @@ -1,722 +1,722 @@ !> !> @file parmg_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE parmg ! ! parmg: Utilities for parallel multigrid ! ! T.M. Tran, CRPP-EPFL ! December 2013 ! USE mpi USE iso_fortran_env, ONLY : rkind => real64 USE gvector, ONLY : gvector_2d USE stencil, ONLY : stencil_2d IMPLICIT NONE ! PRIVATE PUBLIC :: grid2_type, mg_info, create_grid, mg, get_lmax, coarse, disp, exchange, & & get_resids, jacobi, prolong, init_restrict, restrict, & & norm_vec, norm_mat ! TYPE grid2_type INTEGER, DIMENSION(2) :: s, e, s0, e0, npt_loc, npt REAL(rkind), ALLOCATABLE :: x(:) REAL(rkind), ALLOCATABLE :: y(:) TYPE(gvector_2d) :: f TYPE(gvector_2d) :: v TYPE(stencil_2d) :: fdmat TYPE(stencil_2d) :: restrict_mat END TYPE grid2_type ! TYPE mg_info INTEGER :: comm ! Communicator INTEGER :: nu1 ! Relaxation down sweeps INTEGER :: nu2 ! Relaxation up sweeps INTEGER :: mu ! mu-cycle number INTEGER :: nu0 ! Number of FMG cycles INTEGER :: levels ! Number of mg levels INTEGER :: direct_solve_nits ! Jacobit nits for direct_solve CHARACTER(len=4) :: relax ! Type of relation REAL(rkind) :: omega ! for weighted Jacobi relaxation END TYPE mg_info ! INTERFACE create_grid MODULE PROCEDURE create_grid_2d END INTERFACE create_grid INTERFACE mg MODULE PROCEDURE mg_2d END INTERFACE mg INTERFACE coarse MODULE PROCEDURE coarse_1d, coarse_2d END INTERFACE coarse INTERFACE exchange MODULE PROCEDURE exchange_g_2d, exchange_g_2d_new END INTERFACE exchange INTERFACE prolong MODULE PROCEDURE prolong_g_2d END INTERFACE prolong INTERFACE restrict MODULE PROCEDURE restrict_g_2d END INTERFACE restrict INTERFACE jacobi MODULE PROCEDURE jacobi_stencila_2d MODULE PROCEDURE jacobi_stencilg_2d END INTERFACE jacobi INTERFACE get_resids MODULE PROCEDURE resids_stencila_2d MODULE PROCEDURE resids_stencilg_2d END INTERFACE get_resids INTERFACE disp MODULE PROCEDURE dispi_0, dispi_1 END INTERFACE disp CONTAINS ! !-------------------------------------------------------------------------------- SUBROUTINE create_grid_2d(x, y, s_in, e_in, id, prb, grids, comm) ! ! Create arrays of partitionned grids ! USE stencil, ONLY : init ! REAL(rkind), INTENT(in) :: x(0:), y(0:) ! Global coordinates INTEGER, INTENT(in) :: s_in(2), e_in(2) ! Partition of finest grid INTEGER, INTENT(in) :: id(:,:) ! Structure of stencil CHARACTER(len=*), INTENT(in) :: prb INTEGER, INTENT(in) :: comm TYPE(grid2_type) :: grids(:) ! INTEGER :: levels INTEGER :: s0(2), e0(2), s(2), e(2) INTEGER :: npt_loc(2), npt_loc_min(2), npt_glob(2) INTEGER :: l, ierr ! levels = SIZE(grids) s = s_in e = e_in ! DO l=1,levels IF(l.GT.1) THEN CALL coarse(s,e) END IF npt_loc = e-s+1 CALL mpi_allreduce(s, s0, 2, MPI_INTEGER, MPI_MIN, comm, ierr) CALL mpi_allreduce(e, e0, 2, MPI_INTEGER, MPI_MAX, comm, ierr) CALL mpi_allreduce(npt_loc, npt_loc_min, 2, MPI_INTEGER, MPI_MIN, comm, ierr) IF(MINVAL(npt_loc_min) .LT. 2) THEN PRINT*, 'CREATE_GRID: number intervals too small!' STOP END IF npt_glob = e0+1 grids(l)%s0 = s0 grids(l)%e0 = e0 grids(l)%s = s grids(l)%e = e grids(l)%npt_loc = npt_loc grids(l)%npt = npt_glob grids(l)%f = gvector_2d(s, e, [1,1]) ! Arrays with ghost cell grids(l)%v = gvector_2d(s, e, [1,1]) ALLOCATE(grids(l)%x(s0(1):e0(1))) ! Global coords (x,y) ALLOCATE(grids(l)%y(s0(2):e0(2))) IF(l.EQ.1) THEN grids(1)%x = x grids(1)%y = y ELSE grids(l)%x(:) = grids(l-1)%x(0::2) grids(l)%y(:) = grids(l-1)%y(0::2) END IF END DO ! ! Set up FD matrix ! DO l=1,levels s = grids(l)%s e = grids(l)%e CALL init(s, e, id, .FALSE., grids(l)%fdmat, comm) END DO ! ! Set up restriction stencil ! DO l=2,levels CALL init_restrict(grids(l), prb, comm) END DO ! END SUBROUTINE create_grid_2d !-------------------------------------------------------------------------------- RECURSIVE SUBROUTINE mg_2d(grids, info, l) ! ! Execute a recursive V-cycle ! USE gvector, ONLY : ASSIGNMENT(=), OPERATOR(+) TYPE(grid2_type), INTENT(inout) :: grids(:) TYPE(mg_info), INTENT(in) :: info INTEGER, INTENT(in) :: l ! TYPE(gvector_2d) :: resids, v_prolong INTEGER, DIMENSION(2) :: s0, e0, s, e, g=[1,1] INTEGER :: comm, levels, k ! comm = info%comm levels = info%levels ! s0 = grids(l)%s0; e0 = grids(l)%e0 s = grids(l)%s; e = grids(l)%e resids = gvector_2d(s, e, g) ! IF(l.EQ.levels) THEN CALL direct_solve(grids(l)%fdmat, grids(l)%v, grids(l)%f) ELSE CALL relax(info%nu1) resids = get_resids(comm, grids(l)%fdmat, grids(l)%v, grids(l)%f) CALL exchange(comm, resids) CALL restrict(grids(l+1)%restrict_mat, resids, grids(l+1)%f) grids(l+1)%v = 0.0d0 ! ! Only 1 call to the coarsest level DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) CALL mg(grids, info, l+1) END DO ! v_prolong = gvector_2d(s, e, g) CALL exchange(comm, grids(l+1)%v) CALL prolong(grids(l+1)%v, v_prolong) grids(l)%v = grids(l)%v + v_prolong CALL relax(info%nu2) END IF ! CONTAINS SUBROUTINE relax(nu) INTEGER, INTENT(in) :: nu SELECT CASE (TRIM(info%relax)) CASE ("jac") CALL jacobi(comm, grids(l)%fdmat, info%omega, nu, grids(l)%v, grids(l)%f) CASE default PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!" STOP END SELECT END SUBROUTINE relax SUBROUTINE direct_solve(mat, v, f) TYPE(stencil_2d), INTENT(in) :: mat TYPE(gvector_2d), INTENT(inout) :: v TYPE(gvector_2d), INTENT(in) :: f v = 0.0d0 CALL jacobi(comm, mat, 1.0_rkind, info%direct_solve_nits, v, f) END SUBROUTINE direct_solve END SUBROUTINE mg_2d !-------------------------------------------------------------------------------- FUNCTION get_lmax(s_in, npt_loc, npt_min, comm) RESULT(lmax) ! ! Get max number of levels on all processes ! INTEGER :: lmax INTEGER, INTENT(in) :: s_in, npt_loc, npt_min, comm INTEGER :: me, ierr INTEGER :: s, e, kpt_loc, kpt, kpt_loc_min ! CALL mpi_comm_rank(comm, me, ierr) s = s_in kpt_loc = npt_loc e = s+npt_loc-1 lmax = 1 DO CALL mpi_allreduce(kpt_loc, kpt_loc_min, 1, MPI_INTEGER, MPI_MIN, & & comm, ierr) CALL mpi_allreduce(kpt_loc, kpt, 1, MPI_INTEGER, MPI_SUM, & & comm, ierr) ! ! Stop if npt-1 not even or when minumum local npt is attained IF(MODULO(kpt-1,2).NE.0 .OR. kpt_loc_min .LE. npt_min) EXIT ! lmax = lmax+1 CALL coarse(s, e) kpt_loc = e-s+1 END DO ! END FUNCTION get_lmax !-------------------------------------------------------------------------------- SUBROUTINE coarse_1d(s, e) ! ! Compute (s,e) of next coarse grid ! INTEGER, INTENT(inout) :: s, e INTEGER :: s0, npt, i ! ! Previous odd indices are discarded s0 = s IF( MODULO(s0,2) .NE. 0 ) THEN s0 = s+1 END IF ! ! Count local number of points npt = 0 DO i=s0,e,2 npt = npt+1 END DO ! ! Coarse s, e s = s0/2 e = s + npt - 1 END SUBROUTINE coarse_1d !-------------------------------------------------------------------------------- SUBROUTINE coarse_2d(s, e) ! ! Compute (s,e) of next coarse grid ! INTEGER, INTENT(inout) :: s(2), e(2) ! CALL coarse_1d(s(1), e(1)) CALL coarse_1d(s(2), e(2)) END SUBROUTINE coarse_2d !-------------------------------------------------------------------------------- SUBROUTINE dispi_0(str, a, comm) ! ! Display integer local scalar ! INTEGER, INTENT(in) :: a, comm CHARACTER(len=*), INTENT(in) :: str INTEGER :: npes, me, ierr INTEGER, ALLOCATABLE, DIMENSION(:) :: a_gather(:) ! CALL MPI_COMM_RANK(comm, me, ierr) CALL MPI_COMM_SIZE(comm, npes, ierr) ALLOCATE(a_gather(npes)) CALL MPI_GATHER(a, 1, MPI_INTEGER, a_gather, 1, MPI_INTEGER, & & 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(a/(20i6))') str, a_gather END IF DEALLOCATE(a_gather) END SUBROUTINE dispi_0 !-------------------------------------------------------------------------------- SUBROUTINE dispi_1(str, a, comm) ! ! Display integer local array ! INTEGER, INTENT(in) :: a(:), comm CHARACTER(len=*), INTENT(in) :: str INTEGER :: npes, me, ierr, n, i INTEGER, ALLOCATABLE, DIMENSION(:) :: a_gather(:,:) ! n = SIZE(a,1) CALL MPI_COMM_RANK(comm, me, ierr) CALL MPI_COMM_SIZE(comm, npes, ierr) ALLOCATE(a_gather(n,npes)) CALL MPI_GATHER(a, n, MPI_INTEGER, a_gather, n, MPI_INTEGER, & & 0, comm, ierr) IF( me.EQ.0 ) THEN WRITE(*,'(a)') str DO i=1,n WRITE(*,'(20i6)') a_gather(i,:) END DO END IF DEALLOCATE(a_gather) END SUBROUTINE dispi_1 !-------------------------------------------------------------------------------- SUBROUTINE exchange_g_2d_new(comm, u) ! ! Exchange ghost cells with (west,east,south,north) neighbors. ! Assume same ghost cells on each dimension: ! u%g(1) : number of ghost cells on west and east boundaries ! u%g(2) : number of ghost cells on south and north boundaries ! INTEGER, INTENT(in) :: comm TYPE(gvector_2d), INTENT(inout) :: u INTEGER :: neighs(4), ierr ! CALL mpi_cart_shift(comm, 0, 1, neighs(1), neighs(2), ierr) CALL mpi_cart_shift(comm, 1, 1, neighs(3), neighs(4), ierr) CALL exchange_g_2d(comm, neighs, u) END SUBROUTINE exchange_g_2d_new !-------------------------------------------------------------------------------- SUBROUTINE exchange_g_2d(comm, neighs, u) ! ! Exchange ghost cells with (west,east,south,north) neighbors. ! Assume same ghost cells on each dimension: ! u%g(1) : number of ghost cells on west and east boundaries ! u%g(2) : number of ghost cells on south and north boundaries ! INTEGER, INTENT(in) :: comm INTEGER, INTENT(in) :: neighs(4) TYPE(gvector_2d), INTENT(inout) :: u ! INTEGER :: cols, rows INTEGER :: ierr INTEGER, PARAMETER :: ndim=2 INTEGER, DIMENSION(ndim) :: g, lb, ub, s, e, n ! s = u%s e = u%e g = u%g lb = s - g ub = e + g n = ub - lb + 1 ! include ghost cells ! ! g(2) matrix full rows with stride n(1) CALL mpi_type_vector(n(2), g(2), n(1), MPI_DOUBLE_PRECISION, rows, ierr) CALL mpi_type_commit(rows, ierr) ! ! g(1) contiguous matrix full columns CALL mpi_type_contiguous(n(1)*g(1), MPI_DOUBLE_PRECISION, cols, ierr) CALL mpi_type_commit(cols, ierr) ! ! Exchange along first dimension CALL mpi_sendrecv(u%val(s(1), lb(2)), 1, rows, neighs(1), 0, & & u%val(e(1)+1,lb(2)), 1, rows, neighs(2), 0, & & comm, MPI_STATUS_IGNORE, ierr) CALL mpi_sendrecv(u%val(e(1)-g(1)+1,lb(2)), 1, rows, neighs(2), 0, & & u%val(lb(1), lb(2)), 1, rows, neighs(1), 0, & & comm, MPI_STATUS_IGNORE, ierr) ! ! Exchange along second dimension CALL mpi_sendrecv(u%val(lb(1),s(2)), 1, cols, neighs(3), 0, & & u%val(lb(1),e(2)+1), 1, cols, neighs(4), 0, & & comm, MPI_STATUS_IGNORE, ierr) CALL mpi_sendrecv(u%val(lb(1),e(2)-g(2)+1), 1, cols, neighs(4), 0, & & u%val(lb(1),lb(2)), 1, cols, neighs(3), 0, & & comm, MPI_STATUS_IGNORE, ierr) END SUBROUTINE exchange_g_2d !-------------------------------------------------------------------------------- SUBROUTINE prolong_g_2d(vbar, v) ! ! 2D bilinear prolongation ! TYPE(gvector_2d), INTENT(in) :: vbar TYPE(gvector_2d), INTENT(inout) :: v ! INTEGER :: i,j,i1,i2,j1,j2 ! i1 = v%s(1)-MODULO(v%s(1),2); i2 = v%e(1)+MODULO(v%e(1),2) j1 = v%s(2)-MODULO(v%s(2),2); j2 = v%e(2)+MODULO(v%e(2),2) ! ! Even numbered nodes on fine mesh ! DO j=j1,j2,2 DO i=i1,i2,2 v%val(i,j) = vbar%val(i/2,j/2) END DO END DO ! ! Linear interpolation on x ! DO j=j1,j2,2 DO i=i1+1,i2-1,2 v%val(i,j) = 0.5d0*(v%val(i-1,j)+v%val(i+1,j)) END DO END DO ! ! Linear interpolation on y ! DO j=j1+1,j2-1,2 DO i=i1,i2 v%val(i,j) = 0.5d0*(v%val(i,j-1)+v%val(i,j+1)) END DO END DO END SUBROUTINE prolong_g_2d !-------------------------------------------------------------------------------- SUBROUTINE init_restrict(grid, prb, comm) ! ! Set up restriction stencil ! USE stencil, ONLY : init TYPE(grid2_type), INTENT(inout) :: grid CHARACTER(len=*), INTENT(in) :: prb INTEGER, INTENT(in) :: comm ! INTEGER, PARAMETER :: npoints=9, ndim=2 INTEGER :: s(2), e(2), n(2), id(9,2) INTEGER :: i, j ! ! Stencil structure initialization ! s = grid%s e = grid%e n = grid%npt-1 ! ! N ! 6---7---8 ! | | | ! W 4---0---5 E Numbering of stencil ! | | | ! 1---2---3 ! S ! id = RESHAPE([0, -1, 0, 1,-1, 1,-1, 0, 1, & & 0, -1,-1,-1, 0, 0, 1, 1, 1], & & [npoints, ndim]) CALL init(s, e, id, .FALSE., grid%restrict_mat, comm) ! ! Fill in stencil ! DO j=s(2),e(2) DO i=s(1),e(1) grid%restrict_mat%val(i,j,:) = [4._rkind, & & 1._rkind, 2._rkind, 1._rkind, & & 2._rkind, 2._rkind, & & 1._rkind, 2._rkind, 1._rkind ]& & / 16._rkind END DO END DO ! ! Apply Dirichlet BC ! IF(s(1).EQ.0 .AND. prb(1:1).EQ.'d') THEN ! West face grid%restrict_mat%val(s(1),:,3) = 0._rkind grid%restrict_mat%val(s(1),:,5) = 0._rkind grid%restrict_mat%val(s(1),:,8) = 0._rkind END IF IF(e(1).EQ.n(1) .AND. prb(2:2).EQ.'d') THEN ! East face grid%restrict_mat%val(e(1),:,1) = 0._rkind grid%restrict_mat%val(e(1),:,4) = 0._rkind grid%restrict_mat%val(e(1),:,6) = 0._rkind END IF IF(s(2).EQ.0 .AND. prb(3:3).EQ.'d') THEN ! South face grid%restrict_mat%val(:,s(2),6) = 0._rkind grid%restrict_mat%val(:,s(2),7) = 0._rkind grid%restrict_mat%val(:,s(2),8) = 0._rkind END IF IF(e(2).EQ.n(2) .AND. prb(4:4).EQ.'d') THEN ! North face grid%restrict_mat%val(:,e(2),1) = 0._rkind grid%restrict_mat%val(:,e(2),2) = 0._rkind grid%restrict_mat%val(:,e(2),3) = 0._rkind END IF END SUBROUTINE init_restrict !-------------------------------------------------------------------------------- SUBROUTINE jacobi_stencila_2d(mat, omega, nu, v, f) ! ! Weighted Jacobi relaxation ! TYPE(stencil_2d),INTENT(in) :: mat REAL(rkind), INTENT(in) :: omega INTEGER, INTENT(in) :: nu REAL(rkind), ALLOCATABLE, INTENT(inout) :: v(:,:) REAL(rkind), ALLOCATABLE, INTENT(in) :: f(:,:) ! REAL(rkind), ALLOCATABLE :: temp(:,:), inv_diag(:,:) INTEGER, DIMENSION(2) :: smin, emax, s, e, d, lb, ub INTEGER :: it, k, i, j ! s(:) = mat%s(:) e(:) = mat%e(:) smin(:) = mat%s0(:) emax(:) = mat%e0(:) ! ALLOCATE(temp(s(1):e(1),s(2):e(2))) ALLOCATE(inv_diag(s(1):e(1),s(2):e(2))) ! inv_diag(:,:) = omega/mat%val(:,:,0) DO it=1,nu temp(:,:) = f(s(1):e(1),s(2):e(2)) DO k=1,mat%npoints-1 ! exclude the diagonal term, f - (L+U)*v d(:) = mat%id(k,:) lb = MAX(smin, smin-d, mat%s) ub = MIN(emax, emax-d, mat%e) DO j=lb(2),ub(2) DO i=lb(1),ub(1) temp(i,j) = temp(i,j) - mat%val(i,j,k)*v(i+d(1),j+d(2)) END DO END DO END DO temp = temp * inv_diag v(s(1):e(1),s(2):e(2)) = (1.d0-omega)*v(s(1):e(1),s(2):e(2)) + temp END DO ! DEALLOCATE(temp) DEALLOCATE(inv_diag) END SUBROUTINE jacobi_stencila_2d !-------------------------------------------------------------------------------- SUBROUTINE jacobi_stencilg_2d(comm, mat, omega, nu, v, f) ! ! Weighted Jacobi relaxation ! USE gvector, ONLY : ASSIGNMENT(=) INTEGER, INTENT(in) :: comm TYPE(stencil_2d),INTENT(in) :: mat REAL(rkind), INTENT(in) :: omega INTEGER, INTENT(in) :: nu TYPE(gvector_2d), INTENT(inout) :: v TYPE(gvector_2d), INTENT(in) :: f ! REAL(rkind), ALLOCATABLE :: temp(:,:), inv_diag(:,:) INTEGER, DIMENSION(2) :: s, e, d INTEGER :: it, k, i, j ! s(:) = v%s(:) e(:) = v%e(:) ! ALLOCATE(temp(s(1):e(1),s(2):e(2))) ALLOCATE(inv_diag(s(1):e(1),s(2):e(2))) ! inv_diag(:,:) = omega/mat%val(:,:,0) DO it=1,nu CALL exchange(comm, v) temp(:,:) = f%val(s(1):e(1),s(2):e(2)) DO k=1,mat%npoints-1 ! exclude the diagonal term, f - (L+U)*v d(:) = mat%id(k,:) DO j=s(2),e(2) DO i=s(1),e(1) temp(i,j) = temp(i,j) - & & mat%val(i,j,k) * v%val(i+d(1),j+d(2)) END DO END DO END DO temp = temp * inv_diag v%val(s(1):e(1),s(2):e(2)) = (1.d0-omega) * v%val(s(1):e(1),s(2):e(2)) + & & temp END DO END SUBROUTINE jacobi_stencilg_2d !-------------------------------------------------------------------------------- FUNCTION resids_stencila_2d(mat, xarr, farr) RESULT(res) ! ! Return residuals res = mat*x, where x, farr and res are simple arrays ! TYPE(stencil_2d), INTENT(in) :: mat REAL(rkind), ALLOCATABLE, INTENT(in) :: xarr(:,:) REAL(rkind), ALLOCATABLE, INTENT(in) :: farr(:,:) REAL(rkind) :: res(LBOUND(xarr,1):UBOUND(xarr,1), & & LBOUND(xarr,2):UBOUND(xarr,2)) INTEGER :: k, i, j INTEGER, DIMENSION(2) :: smin, emax, d, lb, ub ! smin(:) = mat%s0(:) emax(:) = mat%e0(:) res = farr DO k=0,mat%npoints-1 d(:) = mat%id(k,:) lb = MAX(smin, smin-d, mat%s) ub = MIN(emax, emax-d, mat%e) DO j=lb(2),ub(2) DO i=lb(1),ub(1) res(i,j) = res(i,j) - mat%val(i,j,k)*xarr(i+d(1),j+d(2)) END DO END DO END DO END FUNCTION resids_stencila_2d !-------------------------------------------------------------------------------- FUNCTION resids_stencilg_2d(comm, mat, xarr, farr) RESULT(res) ! ! Return residuals res= f-mat*x, where x, f and res are gvectors ! INTEGER, INTENT(in) :: comm TYPE(stencil_2d), INTENT(in) :: mat TYPE(gvector_2d), INTENT(inout) :: xarr TYPE(gvector_2d), INTENT(in) :: farr TYPE(gvector_2d) :: res INTEGER :: k, i, j INTEGER, DIMENSION(2) :: s, e, d ! s = xarr%s e = xarr%e res = gvector_2d(xarr%s, xarr%e, xarr%g) res%val = farr%val CALL exchange(comm, xarr) DO k=0,mat%npoints-1 d(:) = mat%id(k,:) DO j=s(2),e(2) DO i=s(1),e(1) res%val(i,j) = res%val(i,j) - mat%val(i,j,k)*xarr%val(i+d(1),j+d(2)) END DO END DO END DO END FUNCTION resids_stencilg_2d !-------------------------------------------------------------------------------- SUBROUTINE restrict_g_2d(mat, f, fbar) ! ! 2D full weighting restriction ! TYPE(stencil_2d), INTENT(in) :: mat TYPE(gvector_2d), INTENT(in) :: f TYPE(gvector_2d), INTENT(inout) :: fbar ! INTEGER, DIMENSION(2) :: s, e, d INTEGER :: k, i, j ! s = fbar%s e = fbar%e ! ! Diagonal contributions: d(0) = (0,0) DO j=s(2),e(2) DO i=s(1),e(1) fbar%val(i,j) = mat%val(i,j,0) * f%val(2*i,2*j) END DO END DO ! DO k=1,mat%npoints-1 d(:) = mat%id(k,:) DO j=s(2),e(2) DO i=s(1),e(1) fbar%val(i,j) = fbar%val(i,j) + & & mat%val(i,j,k) * f%val(2*i+d(1),2*j+d(2)) END DO END DO END DO END SUBROUTINE restrict_g_2d !-------------------------------------------------------------------------------- REAL(rkind) FUNCTION norm_vec(x, comm, root) ! ! Infinity vector norm ! TYPE(gvector_2d), INTENT(in) :: x INTEGER, INTENT(in) :: comm INTEGER, OPTIONAL, intent(in) :: root REAL(rkind) :: temp INTEGER :: ierr temp = MAXVAL( ABS(x%val(x%s(1):x%e(1),x%s(2):x%e(2))) ) IF(PRESENT(root)) THEN CALL mpi_reduce(temp, norm_vec, 1, MPI_DOUBLE_PRECISION, MPI_MAX, & & root, comm, ierr) ELSE CALL mpi_allreduce(temp, norm_vec, 1, MPI_DOUBLE_PRECISION, MPI_MAX, & & comm, ierr) END IF END FUNCTION norm_vec !-------------------------------------------------------------------------------- REAL(rkind) FUNCTION norm_mat(mat, comm, root) ! ! Infinity matrix norm ! TYPE(stencil_2d), INTENT(in) :: mat INTEGER, INTENT(in) :: comm INTEGER, OPTIONAL, intent(in) :: root REAL(rkind) :: arr_temp(mat%s(1):mat%e(1),mat%s(2):mat%e(2)) REAL(rkind) :: temp INTEGER :: i, j, s(2), e(2), ierr s = mat%s; e = mat%e DO j=s(2),e(2) DO i=s(1),e(1) arr_temp(i,j) = SUM(ABS(mat%val(i,j,:))) END DO END DO temp = MAXVAL(arr_temp) IF(PRESENT(root)) THEN CALL mpi_reduce(temp, norm_mat, 1, MPI_DOUBLE_PRECISION, MPI_MAX, & & root, comm, ierr) ELSE CALL mpi_allreduce(temp, norm_mat, 1, MPI_DOUBLE_PRECISION, MPI_MAX, & & comm, ierr) END IF END FUNCTION norm_mat !-------------------------------------------------------------------------------- END MODULE parmg diff --git a/multigrid/src/partition.f90 b/multigrid/src/partition.f90 index 1a3b2e6..27cd714 100644 --- a/multigrid/src/partition.f90 +++ b/multigrid/src/partition.f90 @@ -1,77 +1,77 @@ !> !> @file partition.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main USE mpi USE pputils2, ONLY : dist1d USE parmg, ONLY : get_lmax, coarse, disp IMPLICIT NONE ! INTEGER :: me, npes, ierr INTEGER :: n, npt, npt_loc, s, e INTEGER :: l, lmax ! CALL MPI_INIT(ierr) CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr) CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr) ! IF(me.EQ.0) THEN WRITE(*,*) 'Enter n' READ(*,*) n END IF CALL mpi_bcast(n,1,MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) ! ! Partition ot finest grid ! npt = n+1 CALL dist1d(MPI_COMM_WORLD, 0, npt, s, npt_loc) e = s+npt_loc-1 CALL disp('start index=', s, MPI_COMM_WORLD) ! ! Max number of levels ! lmax = get_lmax(s, npt_loc, 1, MPI_COMM_WORLD) IF(me.EQ.0) WRITE(*,'(a,i0)') 'Max number of levels ', lmax ! ! Grid coarsening ! DO l=1,lmax IF(l.GT.1) THEN CALL coarse(s, e) npt_loc = e-s+1 CALL mpi_allreduce(npt_loc, npt, 1, MPI_INTEGER, MPI_SUM, & & MPI_COMM_WORLD, ierr) END IF IF(me.EQ.0) WRITE(*, '(a,i3)') 'level', l CALL disp('s ', s, MPI_COMM_WORLD) CALL disp('e ', e, MPI_COMM_WORLD) CALL disp('npt_loc', npt_loc, MPI_COMM_WORLD) CALL disp('npt ', npt, MPI_COMM_WORLD) END DO ! CALL MPI_FINALIZE(ierr) ! !+++++ END PROGRAM main diff --git a/multigrid/src/poisson_fd.f90 b/multigrid/src/poisson_fd.f90 index d44100d..3710133 100644 --- a/multigrid/src/poisson_fd.f90 +++ b/multigrid/src/poisson_fd.f90 @@ -1,485 +1,485 @@ !> !> @file poisson_fd.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 2d PDE using finite differences: ! ! d2/dx2 f(x,y) + tau*d2/dxdy f(x,y) + d2/dy2 f(x,y) = w(x,y), x,y in [0:Lx][0:Ly] ! w(x,y) = -4*pi^2 *[(kx^2/Lx^2+ky^2/Ly^2)*cos(2*kx*pi*x/Lx)*sin(2*ky*pi*y/Ly) ! -tau*(kx*ky)/(Lx*Ly)*sin(2*kx*pi*x/Lx)*cos(2*ky*pi*y/Lx)] ! ! West, East boundaries: Neumann ! South, North boundaries: Dirichlet ! ! Analytic solution : f(x,y) = cos(2*kx*pi*x/Lx)*sin(2*kx*pi*y/Ly) ! USE multigrid USE fdmat_mod IMPLICIT NONE INCLUDE 'mpif.h' ! INTEGER, PARAMETER :: nnumx=32 ! INTEGER :: ierr, np, me DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) DOUBLE PRECISION :: Lx, Ly, icrosst, beta, miome INTEGER :: n, nx,ny,nz,kx,ky CHARACTER(len=4) :: prb INTEGER :: nits DOUBLE PRECISION :: atol, rtol LOGICAL :: nldirect, nldebug ! TYPE(mg_info) :: info ! info for MG INTEGER :: levels, nnu, mu, nu0 ! INTEGER :: inu, nu1(nnumx), nu2(nnumx), niter(nnumx) DOUBLE PRECISION :: titer(nnumx) ! LOGICAL :: nlfixed DOUBLE PRECISION :: omega CHARACTER(len=4) :: mat_type, relax ! DOUBLE PRECISION :: dx, dy INTEGER :: ix, iy, l, its DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:), dense(:) TYPE(grid2d), ALLOCATABLE :: grids(:) ! DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_ana2d(:,:), sol_direct2d(:,:) DOUBLE PRECISION, POINTER :: sol_ana(:), sol_direct(:) DOUBLE PRECISION :: err_direct, resid_direct DOUBLE PRECISION :: norma, normb DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:) DOUBLE PRECISION :: t0, tsetup, tmat(2), tdirect, tbsolve DOUBLE PRECISION, EXTERNAL :: mem ! NAMELIST /parameters/ prb, mat_type,nx, ny, nz, kx, ky, Lx, Ly, icrosst, beta, & & miome, nldebug, nlfixed, levels, nnu, nu1, nu2, mu, nu0, & & relax,omega, nldirect, nits, atol, rtol !-------------------------------------------------------------------------------- ! 1.0 Prologue ! CALL MPI_INIT(ierr) CALL MPI_COMM_RANK(MPI_COMM_WORLD,me,ierr) CALL MPI_COMM_SIZE(MPI_COMM_WORLD,np,ierr) ! ! Default inputs ! nx=32 ny=32 nz=1 kx=1 ky=1 icrosst=1.0d0 Lx = 1.0D0 Ly = 1.0D0 beta = 0d0 miome = 200d0 nldebug = .FALSE. prb = 'dddd' mat_type = 'cds' nldirect = .TRUE. ! nlfixed = .FALSE. levels = 2 nnu = 1 nu1 = 1 nu2 = 1 mu = 1 nu0 = 1 nits = 10 atol = 1.e-8; rtol = 1.e-8 relax = 'jac' omega = 0.6667 ! IF(me==0) THEN READ(*,parameters) WRITE(*,parameters) END IF ! ! Send input parameters to other processors ! CALL MPI_BCAST(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL MPI_BCAST(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL MPI_BCAST(nz, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL MPI_BCAST(kx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL MPI_BCAST(ky, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL MPI_BCAST(icrosst, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL MPI_BCAST(Lx,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr) CALL MPI_BCAST(Ly,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr) CALL MPI_BCAST(beta,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr) CALL MPI_BCAST(miome, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) ! CALL mpi_bcast(nldebug, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nldirect, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nnu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nu1, nnumx, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nu2, nnumx, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(mat_type, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(atol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) ! IF(nnu.GT.nnumx) THEN IF(me.EQ.0) THEN PRINT*, 'Value of nnu larger than', nnumx END IF CALL mpi_finalize(ierr) STOP END IF ! ! Adjust number of levels and fill mg info. ! levels = MIN(levels, get_lmax(nx), get_lmax(ny)) info%nu1 = nu1(1) info%nu2 = nu2(1) info%mu = mu info%nu0 = nu0 info%levels = levels info%relax = relax info%omega = omega !-------------------------------------------------------------------------------- ! 2.0 Setup grids ! ! Grid on the finest level ! dx = lx/REAL(nx,8) dy = ly/REAL(ny,8) ALLOCATE(x(0:nx), y(0:ny)) DO ix=0,nx x(ix) = ix*dx END DO DO iy=0,ny y(iy) = iy*dy END DO WRITE(*,'(a,3(1pe12.3))') 'dx, dy, dx/dy =', dx, dy, dx/dy ! ALLOCATE(dense(0:nx)) dense = fdense(x) ! ! Set up grids ! t0 = mpi_wtime() ALLOCATE(grids(levels)) CALL create_grid_fd(x, y, grids, info, mat_type=mat_type, debug=nldebug) WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) IF(nldebug) THEN CALL printmat('** Prolongation matrix in 1st dim.**', grids(2)%transf(1)) CALL printmat('** Prolongation matrix in 2nd dim.**', grids(2)%transf(2)) END IF ! ! Set BC on grid transfer matrices ! IF(prb.EQ.'dddd') CALL ibc_transf(grids,1,3) ! Direction X CALL ibc_transf(grids,2,3) ! Direction Y tsetup = mpi_wtime()-t0 !-------------------------------------------------------------------------------- ! 3.0 Problem discretization ! ! Construct FD matrix and impose BC on all grids ! t0=mpi_wtime() DO l=1,levels CALL fdmat(grids(l), fdense, icrosst) IF(mat_type.EQ.'csr') CALL to_mat(grids(l)%mata) END DO tmat(1) = mpi_wtime()-t0 ! t0=mpi_wtime() DO l=1,levels CALL ibc_fdmat(grids(l), prb) END DO tmat(2) = mpi_wtime()-t0 ! ! Set RHS and impose BC on the fiest grid ! grids(1)%f(:,:) = frhs(x,y) ! IF(prb.EQ.'dddd') THEN grids(1)%f(0,:) = 0.0d0 ! Dirichlet on west and east grids(1)%f(nx,:) = 0.0d0 ELSE IF(prb.EQ.'nndd') THEN ! Neumann on west and east grids(1)%f(0,:) = 0.5d0*grids(1)%f(0,:) grids(1)%f(nx,:) = 0.5d0*grids(1)%f(nx,:) END IF grids(1)%f(:,0) = 0.0d0 ! Dirichlet on south and north grids(1)%f(:,ny) = 0.0d0 ! !-------------------------------------------------------------------------------- ! 4.0 Analytical solutions and RHS at the finest grid (l=1) ! n = (nx+1)*(ny+1) ! Number of unknowns ALLOCATE(sol_ana2d(0:nx,0:ny)) sol_ana(1:n) => sol_ana2d sol_ana2d(:,:) = fsol(x,y) !-------------------------------------------------------------------------------- ! 5.0 Direct solution at the finest grid (l=1) ! IF(nldirect) THEN WRITE(*,'(/a)') 'Direct solution for the finest grid problem ...' ALLOCATE(sol_direct2d(0:nx,0:ny)) sol_direct(1:n) => sol_direct2d ! t0 = mpi_wtime() sol_direct = grids(1)%f1d CALL direct_solve(grids(1), sol_direct, debug=nldebug) tdirect = mpi_wtime()-t0 ! t0 = mpi_wtime() sol_direct = grids(1)%f1d CALL direct_solve(grids(1), sol_direct, debug=nldebug) tbsolve = mpi_wtime()-t0 ! ! Max norm and residual ! err_direct = MAXVAL(ABS(sol_direct-sol_ana)) resid_direct = residue(grids(1), grids(1)%f1d, sol_direct, 'inf') WRITE(*,'(a,2(1pe12.3))') 'Max norm of error and residual norm', & & err_direct, resid_direct END IF !-------------------------------------------------------------------------------- ! 5.0 Iterative solution using MG V-cycle ! WRITE(*,'(/a)') 'Multigrid MG V-cycles ...' ALLOCATE(errdisc(0:nits)) ALLOCATE(resid(0:nits)) ALLOCATE(rresid(0:nits)) ! ! Norm of A and b ! IF(mat_type.EQ.'csr') THEN norma = matnorm(grids(1)%mata, 'inf') ELSE norma = matnorm(grids(1)%mata_cds, 'inf') END IF normb = MAXVAL(ABS(grids(1)%f1d)) WRITE(*,'(a,2(1pe12.3))') 'Norm A and RHS', norma, normb ! ! Initial guess ! DO inu=1,nnu info%nu1 = nu1(inu) info%nu2 = nu2(inu) WRITE(*,'(/2(a5,i3,2x))') 'nu1 =', nu1(inu), 'nu2 =', nu2(inu) IF(nlfixed .AND. nldirect) THEN grids(1)%v = sol_direct2d ELSE grids(1)%v = 0.0d0 END IF ! errdisc(0) = MAXVAL(ABS(grids(1)%v1d-sol_ana)) resid(0) = residue(grids(1), grids(1)%f1d, grids(1)%v1d, 'inf') rresid(0) = resid(0) / ( norma*MAXVAL(ABS(grids(1)%v1d)) + normb ) WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', & & 'rel. resid', 'ratio' WRITE(*,'(i4,3(1pe12.3,8x))') 0, resid(0), errdisc(0), rresid(0) ! ! Iterations ! t0 = mpi_wtime() DO its=1,nits CALL mg(grids, info, 1) errdisc(its) = MAXVAL(ABS(grids(1)%v1d-sol_ana)) resid(its) = residue(grids(1), grids(1)%f1d, grids(1)%v1d, 'inf') rresid(its) = resid(its) / ( norma*MAXVAL(ABS(grids(1)%v1d)) + normb ) WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') its, & & resid(its), resid(its)/resid(its-1), & & errdisc(its), errdisc(its)/errdisc(its-1), & & rresid(its), rresid(its)/rresid(its-1) IF(resid(its) .LE. atol .or. rresid(its) .le. rtol) EXIT END DO niter(inu) = MIN(nits,its) titer(inu) = mpi_wtime() - t0 END DO !-------------------------------------------------------------------------------- ! 9.0 Epilogue ! ! Display timing ! WRITE(*,'(/a)') 'Timing ...' WRITE(*,'(a,1pe12.3,i5)') 'Setup time (s) ', tsetup WRITE(*,'(a,2(1pe12.3))') 'Matrix construction time(s)', tmat WRITE(*,'(a,2(1pe12.3))') 'Direct and bsolve time (s) ', tdirect, tbsolve WRITE(*,'(/3a6,a15)') 'nu1', 'nu2', 'niter', 'Iter time(s)' DO inu=1,nnu WRITE(*,'(3i6,3x,1pe12.3)') nu1(inu), nu2(inu), niter(inu), titer(inu) END DO ! WRITE(*,'(/a,f12.3)') 'Mem used so far (MB)', mem() ! ! Creata HDF5 file ! IF(me.EQ.0) CALL h5file ! ! Clean up ! DEALLOCATE(x) DEALLOCATE(y) DEALLOCATE(dense) DEALLOCATE(grids) DEALLOCATE(sol_ana2d) IF(nldirect) DEALLOCATE(sol_direct2d) DEALLOCATE(errdisc) DEALLOCATE(resid) DEALLOCATE(rresid) ! CALL MPI_FINALIZE(ierr) !-------------------------------------------------------------------------------- CONTAINS !+++ FUNCTION fdense(x) ! ! Return density ! DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: fdense(SIZE(x)) fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 ); END FUNCTION fdense !+++ FUNCTION frhs(x,y) ! ! Return RHS ! DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: frhs(SIZE(x),SIZE(y)) DOUBLE PRECISION :: c, s, d(SIZE(x)) DOUBLE PRECISION :: corr INTEGER :: j corr = 1.d0+icrosst**2/4.0d0 d(:) = fdense(x(:)) IF(prb.EQ.'dddd') THEN DO j=1,SIZE(y) c = COS(2.0d0*pi*ky*y(j)/Ly) s = SIN(2.0d0*pi*ky*y(j)/Ly) frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s & & -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) & & + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s END DO ELSE IF (prb.EQ.'nndd') THEN DO j=1,SIZE(y) c = COS(2.0d0*pi*ky*y(j)/Ly) s = SIN(2.0d0*pi*ky*y(j)/Ly) frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s & & +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) & & + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s END DO END IF !!$ frhs = -frhs END FUNCTION frhs !+++ FUNCTION fsol(x,y) ! ! Return analytical solution ! DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: fsol(SIZE(x),SIZE(y)) DOUBLE PRECISION :: c INTEGER :: j IF(prb.EQ.'dddd') THEN DO j=1,SIZE(y) c = SIN(2.0d0*pi*ky*y(j)/Ly) fsol(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c END DO ELSE IF (prb.EQ.'nndd') THEN DO j=1,SIZE(y) c = SIN(2.0d0*pi*ky*y(j)/Ly) fsol(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c END DO END IF END FUNCTION fsol !+++ SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='poisson_mg.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'KX', kx) CALL attach(fid, '/', 'KY', ky) CALL attach(fid, '/', 'LX', Lx) CALL attach(fid, '/', 'LY', Ly) CALL attach(fid, '/', 'BETA', beta) CALL attach(fid, '/', 'OMEGA', omega) CALL attach(fid, '/', 'RELAX', relax) CALL attach(fid, '/', 'MAT_TYPE', mat_type) CALL attach(fid, '/', 'NITS', nits) CALL attach(fid, '/', 'LEVELS', levels) CALL attach(fid, '/', 'NNU', nnu) CALL attach(fid, '/', 'NU0', nu0) CALL attach(fid, '/', 'MU', mu) ! CALL putarr(fid, '/nu1', nu1(1:nnu)) CALL putarr(fid, '/nu2', nu2(1:nnu)) CALL putarr(fid, '/dense', dense) CALL creatg(fid, '/mglevels') DO l=1,levels WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL creatg(fid, TRIM(dsname)) IF(mat_type.EQ.'csr') THEN CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) ELSE CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata_cds) END IF IF(l.GT.1) THEN CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) END IF CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) END DO ! ! Solutions at finest grid ! CALL creatg(fid, '/solutions') CALL putarr(fid, '/solutions/xg', grids(1)%x) CALL putarr(fid, '/solutions/yg', grids(1)%y) CALL putarr(fid, '/solutions/calc', grids(1)%v) CALL putarr(fid, '/solutions/anal', sol_ana2d) IF(nldirect) CALL putarr(fid, '/solutions/direct', sol_direct2d) ! nits=niter(nnu) CALL creatg(fid, '/Iterations') CALL putarr(fid, '/Iterations/residues', resid(0:nits)) CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits)) ! CALL closef(fid) END SUBROUTINE h5file !+++ END PROGRAM main diff --git a/multigrid/src/ppoisson_fd.f90 b/multigrid/src/ppoisson_fd.f90 index 0fbf675..090a006 100644 --- a/multigrid/src/ppoisson_fd.f90 +++ b/multigrid/src/ppoisson_fd.f90 @@ -1,418 +1,418 @@ !> !> @file ppoisson_fd.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Test 2D parallel multigrid V-cycle ! MODULE mod USE iso_fortran_env, ONLY : rkind => real64 IMPLICIT NONE ! REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0) CONTAINS END MODULE mod !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ PROGRAM main USE mpi USE fdmat_mod, ONLY : fdmat, ibc_fdmat, ibc_rhs USE pputils2, ONLY : dist1d, timera, hostlist USE gvector, ONLY : gvector_2d, norm2, ASSIGNMENT(=), OPERATOR(-) USE parmg, ONLY : grid2_type, mg_info, create_grid, mg, exchange, & & get_resids, disp, norm_vec, norm_mat USE stencil, ONLY : stencil_2d, putmat USE mod IMPLICIT NONE ! INTEGER, PARAMETER :: ndims=2 ! INTEGER :: me, neighs(4), npes, ierr INTEGER, DIMENSION(ndims) :: coords, comm1d LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] LOGICAL :: reorder =.FALSE. INTEGER :: comm_cart INTEGER, DIMENSION(ndims) :: e0, s0, e, s, npt_glob, npt_loc ! REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:) INTEGER, ALLOCATABLE :: id(:,:) REAL(rkind) :: dx, dy INTEGER :: npoints ! Number of points in FD stencil REAL(rkind) :: t_mg, t_mg0, t_mg_min, t_mg_max ! TYPE(gvector_2d) :: v_exact, resids, errs REAL(rkind) :: norma, normb, normv REAL(rkind), ALLOCATABLE :: resid_it(:), err_it(:), rresid(:) REAL(rkind) :: ratio_err, ratio_resid, ratio_rresid INTEGER, DIMENSION(ndims) :: g, npt_loc_min INTEGER :: l, i, it CHARACTER(len=64) :: str ! TYPE(grid2_type), ALLOCATABLE :: grids(:) TYPE(mg_info) :: info ! info for MG ! ! Input quantities ! LOGICAL :: nldebug=.FALSE. CHARACTER(len=64) :: filein = 'ppoisson_fd.in' INTEGER :: dims(2)=[0,0] CHARACTER(len=4) :: prb='dddd' CHARACTER(len=4) :: relax='jac' INTEGER :: nx=4, ny=4 ! Number of intervals INTEGER :: kx=1, ky=1 REAL(rkind) :: Lx=1.0, Ly=1.0 REAL(rkind) :: icrosst=1.0, beta=0.0, miome=200.0 REAL(rkind) :: omega=1.0d0 INTEGER :: nits=100, direct_solve_nits=5 INTEGER :: levels=2, nu1=3, nu2=3, mu=1, nu0=1 REAL(rkind) :: rtol=1.e-8, atol=1.e-8, errtol=1.e-3 ! NAMELIST /in/ nldebug, dims, prb, nx, ny, kx, ky, Lx, Ly, icrosst, beta, & & miome, omega, nits, levels, relax, nu1, nu2, mu, nu0, & & direct_solve_nits, rtol, atol, errtol !================================================================================ ! 1.0 Prologue ! ! 2D process grid ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) ! ! Read input filename from commmand line argument ! IF( command_argument_count() > 0 ) THEN CALL get_command_argument(1, filein) END IF IF(me.EQ.0) WRITE(*,'(a,a)') 'filein = ', TRIM(filein) ! ! Read problem inputs ! OPEN(unit=99, file=filein, form='formatted') READ(99,in) CLOSE(99) ! CALL mpi_dims_create(npes, ndims, dims, ierr) CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& & ierr) CALL mpi_comm_rank(comm_cart, me, ierr) CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr) CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr) ! CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) ! info%comm = comm_cart info%nu1 = nu1 info%nu2 = nu2 info%mu = mu info%nu0 = nu0 info%levels = levels info%direct_solve_nits = direct_solve_nits info%relax = relax info%omega = omega ! IF(me.EQ.0) THEN WRITE(*, in) END IF IF(nldebug) THEN CALL hostlist(comm_cart) END IF !================================================================================ ! 2.0 2d Grid construction ! ! Partition 2D grid ! CALL timera(0, 'Grid_construction') npt_glob(1) = nx+1 npt_glob(2) = ny+1 DO i=1,ndims CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) e(i) = s(i) + npt_loc(i) - 1 END DO IF(nldebug) THEN WRITE(*,'(a,i3.3,a,2(3i8,", "))') 'PE', me, ' coords, s,e:', & & (coords(i),s(i),e(i),i=1,ndims) END IF ! ! Global mesh ! dx = Lx/REAL(nx) dy = Ly/REAL(ny) ALLOCATE(xgrid(0:nx)) ALLOCATE(ygrid(0:ny)) xgrid = [ (i*dx, i=0,nx) ] ygrid = [ (i*dy, i=0,ny) ] ! ! Create grid structure ! ALLOCATE(grids(levels)) npoints = 9 ! Size of FD stencil ALLOCATE(id(npoints,2)) id=RESHAPE([ 0, -1, 0, 1, -1, 1, -1, 0, 1, & 0, -1, -1, -1, 0, 0, 1, 1, 1], & [npoints,2]) CALL create_grid(xgrid, ygrid, s, e, id, prb, grids, comm_cart) ! IF(nldebug) THEN DO l=1,levels WRITE(str,'(a,i0)') 'Number of local points at level ', l CALL disp(TRIM(str), grids(l)%npt_loc, comm_cart) END DO END IF CALL mpi_reduce(grids(levels)%npt_loc, npt_loc_min, 2, MPI_INTEGER, MPI_MIN, & & 0, comm_cart, ierr) IF(me.EQ.0) THEN WRITE(*,'(a,2i4)') 'Minimum local npt at coarsest grid:', npt_loc_min END IF ! CALL timera(1, 'Grid_construction') !================================================================================ ! 3.0 FD Operator ! CALL timera(0, 'FD Operator') ! DO l=1,levels CALL fdmat(grids(l), fdense, icrosst, grids(l)%fdmat) CALL ibc_fdmat(grids(l)%fdmat, prb) END DO ! CALL timera(1, 'FD Operator') !================================================================================ ! 4.0 RHS and exact solution at the finest grid (l=1) ! ! Allocate memory ! CALL timera(0, 'RHS and exact sol') ! s0 = grids(1)%s0; e0 = grids(1)%e0 s = grids(1)%s; e = grids(1)%e g = [1,1] v_exact = gvector_2d(s, e, g) ! Exact solutions errs = gvector_2d(s, e, g) ! Disc. errors resids = gvector_2d(s, e, g) ! Residuals ALLOCATE(resid_it(0:nits)) ALLOCATE(rresid(0:nits)) ALLOCATE(err_it(0:nits)) ! ! Set RHS at the finest grid and impose Dirichlet/Neuman BC. ! grids(1)%f = frhs(xgrid(s(1):e(1)),ygrid(s(2):e(2))) CALL ibc_rhs(grids(1)%f, s0, e0, prb) ! ! Exact solutions ! v_exact = fexact(xgrid(s(1):e(1)),ygrid(s(2):e(2))) ! CALL timera(1, 'RHS and exact sol') !================================================================================ ! 5.0 MG V-cycle iteration loop ! ! Norm of A and b ! norma = norm_mat(grids(1)%fdmat, comm_cart) normb = norm_vec(grids(1)%f, comm_cart) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Norm A and RHS', norma, normb END IF ! grids(1)%v = 0.0d0 resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f) errs = grids(1)%v - v_exact err_it(0) = norm_vec(errs, comm_cart, root=0) resid_it(0) = norm_vec(resids, comm_cart) normv = norm_vec(grids(1)%v, comm_cart) rresid(0) = resid_it(0) / ( norma*normv + normb ) ! IF(me.EQ.0) THEN WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', & & 'rel. resid', 'ratio' WRITE(*,'(i4,3(1pe12.3,8X))') 0, resid_it(0), err_it(0), rresid(0) END IF ! CALL timera(0, 'MG V-cycle loop') t_mg = 0.0d0 DO it=1,nits t_mg0 = mpi_wtime() CALL mg(grids, info, 1) t_mg = t_mg + (mpi_wtime()-t_mg0) resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f) errs = grids(1)%v - v_exact err_it(it) = norm_vec(errs, comm_cart) resid_it(it) = norm_vec(resids, comm_cart) normv = norm_vec(grids(1)%v, comm_cart) rresid(it) = resid_it(it) / ( norma*normv + normb ) ratio_err = err_it(it)/err_it(it-1) ratio_resid = resid_it(it)/resid_it(it-1) ratio_rresid= rresid(it)/ rresid(it-1) IF(me.EQ.0) THEN WRITE(*,'(i4,3(1pe12.3,0pf8.2))') it, & & resid_it(it), ratio_resid,& & err_it(it), ratio_err, & & rresid(it), ratio_rresid END IF IF(resid_it(it) .LE. atol .OR. rresid(it) .LE. rtol .OR. & & ABS(ratio_err-1._rkind).LT.errtol) THEN nits = it EXIT END IF END DO ! CALL timera(1, 'MG V-cycle loop') CALL mpi_reduce(t_mg, t_mg_max, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm_cart, ierr) CALL mpi_reduce(t_mg, t_mg_min, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm_cart, ierr) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe10.3)/)') 'Minmax of MG (only) time (s):', t_mg_min, t_mg_max END IF !================================================================================ ! 9.0 Epilogue ! IF(nldebug) THEN CALL h5file END IF ! CALL timera(9, '') CALL MPI_FINALIZE(ierr) CONTAINS ! !+++ FUNCTION fdense(x) ! ! Return density ! REAL(rkind), INTENT(in) :: x(:) REAL(rkind) :: fdense(SIZE(x)) fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 ); END FUNCTION fdense !+++ FUNCTION fexact(x,y) ! ! Return analytical solution ! REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: fexact(SIZE(x),SIZE(y)) REAL(rkind) :: c INTEGER :: j IF(prb.EQ.'dddd') THEN DO j=1,SIZE(y) c = SIN(2.0d0*pi*ky*y(j)/Ly) fexact(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c END DO ELSE IF (prb.EQ.'nndd') THEN DO j=1,SIZE(y) c = SIN(2.0d0*pi*ky*y(j)/Ly) fexact(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c END DO END IF END FUNCTION fexact !+++ FUNCTION frhs(x,y) ! ! Return RHS ! REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: frhs(SIZE(x),SIZE(y)) REAL(rkind) :: c, s, d(SIZE(x)) REAL(rkind) :: corr INTEGER :: j corr = 1.d0+icrosst**2/4.0d0 d(:) = fdense(x(:)) IF(prb.EQ.'dddd') THEN DO j=1,SIZE(y) c = COS(2.0d0*pi*ky*y(j)/Ly) s = SIN(2.0d0*pi*ky*y(j)/Ly) frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s & & -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) & & + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s END DO ELSE IF (prb.EQ.'nndd') THEN DO j=1,SIZE(y) c = COS(2.0d0*pi*ky*y(j)/Ly) s = SIN(2.0d0*pi*ky*y(j)/Ly) frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s & & +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) & & + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s END DO END IF END FUNCTION frhs !+++! FUNCTION outerprod(x, y) RESULT(r) ! ! outer product ! REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: r(SIZE(x),SIZE(y)) INTEGER :: i, j DO j=1,SIZE(y) DO i=1,SIZE(x) r(i,j) = x(i)*y(j) END DO END DO END FUNCTION outerprod !+++ SUBROUTINE h5file ! ! Result hdf5 file ! USE futils CHARACTER(len=128) :: file='ppoisson_fd.h5' INTEGER :: fid CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart) CALL attach(fid, '/', 'PRB', prb) CALL attach(fid, '/', 'RELAX', relax) CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'KX', kx) CALL attach(fid, '/', 'KY', ky) CALL attach(fid, '/', 'LX', Lx) CALL attach(fid, '/', 'LY', Ly) CALL attach(fid, '/', 'ICROSST', icrosst) CALL attach(fid, '/', 'BETA', beta) CALL attach(fid, '/', 'MIOME', miome) CALL attach(fid, '/', 'OMEGA', omega) CALL attach(fid, '/', 'NITS', nits) CALL attach(fid, '/', 'DIRECT_SOLVE_NITS', direct_solve_nits) CALL attach(fid, '/', 'LEVELS', levels) CALL attach(fid, '/', 'NU1', nu1) CALL attach(fid, '/', 'NU2', nu2) CALL attach(fid, '/', 'NU0', nu0) CALL attach(fid, '/', 'MU', mu) ! CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO ! CALL putarrnd(fid, '/f', grids(1)%f%val, (/1,2/), garea=g) CALL putarrnd(fid, '/v', v_exact%val, (/1,2/), garea=g) CALL putarrnd(fid, '/u', grids(1)%v%val, (/1,2/), garea=g) CALL putarrnd(fid, '/errs', errs%val, (/1,2/), garea=(/1,1/)) CALL putarrnd(fid, '/resids', resids%val,(/1,2/), garea=(/1,1/)) ! CALL putarr(fid, '/resid', resid_it(0:nits), ionode=0) CALL putarr(fid, '/error', err_it(0:nits), ionode=0) ! CALL putmat(fid, '/MAT', grids(1)%fdmat) CALL closef(fid) END SUBROUTINE h5file !+++ END PROGRAM main diff --git a/multigrid/src/stencil_mod.f90 b/multigrid/src/stencil_mod.f90 index 04095c5..cd2618a 100644 --- a/multigrid/src/stencil_mod.f90 +++ b/multigrid/src/stencil_mod.f90 @@ -1,243 +1,243 @@ !> !> @file stencil_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE stencil ! ! stencil_2d: Implement 2D stencil for matrix-less operations ! ! T.M. Tran, CRPP-EPFL ! August 2013 ! USE iso_fortran_env, ONLY : rkind => real64 IMPLICIT NONE ! PRIVATE PUBLIC :: stencil_2d, init, vmx, putmat, laplacian, & & OPERATOR(*) ! TYPE stencil_2d LOGICAL :: nluni INTEGER, DIMENSION(2) :: ldim, gdim INTEGER, DIMENSION(2) :: s0, e0, s, e INTEGER :: npoints INTEGER, ALLOCATABLE :: id(:,:) REAL(rkind), ALLOCATABLE :: val(:,:,:) END TYPE stencil_2d ! INTERFACE init MODULE PROCEDURE init_stencil_2d END INTERFACE init INTERFACE vmx MODULE PROCEDURE vmx_stencila_2d MODULE PROCEDURE vmx_stencilg_2d END INTERFACE vmx INTERFACE putmat module procedure putmat_stencil END INTERFACE putmat ! INTERFACE OPERATOR(*) MODULE PROCEDURE vmx_stencila_2d MODULE PROCEDURE vmx_stencilg_2d END INTERFACE OPERATOR(*) ! CONTAINS !================================================================================ SUBROUTINE init_stencil_2d(s, e, id, nluni, mat, comm) ! ! stencil_2d constructor ! USE mpi INTEGER, INTENT(in) :: s(2), e(2) ! Bounds in each dim. INTEGER, INTENT(in) :: id(:,:) ! Structure of stencil LOGICAL, INTENT(in) :: nluni ! Uniform stencil TYPE(stencil_2d), INTENT(out) :: mat INTEGER, INTENT(in) :: comm INTEGER :: me, ndim=2, ierr INTEGER :: npoints ! Size of the stencil ! CALL mpi_comm_rank(comm, me, ierr) ! IF(id(1,1).NE.0 .AND. id(1,2).NE.0) THEN IF(me.EQ.0) THEN WRITE(*,*) 'INIT_STENCIL: id(1,:) should be (0,0)!' CALL mpi_abort(comm, -1, ierr) END IF END IF ! npoints = SIZE(id,1) mat%npoints = npoints mat%s = s mat%e = e mat%nluni = nluni IF(nluni) THEN ALLOCATE(mat%val(1,1, 0:npoints-1)) ELSE ALLOCATE(mat%val(s(1):e(1), s(2):e(2), 0:npoints-1)) END IF ALLOCATE(mat%id(0:npoints-1, ndim)) mat%id(:,:) = id(:,:) mat%val(:,:,:) = 0.0 ! mat%ldim = e-s+1 CALL mpi_allreduce(mat%s, mat%s0, ndim, MPI_INTEGER, MPI_MIN, comm, ierr) CALL mpi_allreduce(mat%e, mat%e0, ndim, MPI_INTEGER, MPI_MAX, comm, ierr) mat%gdim = mat%e0 - mat%s0 + 1 ! END SUBROUTINE init_stencil_2d !================================================================================ FUNCTION vmx_stencila_2d(mat, xarr) RESULT(res) ! ! Return product res = mat*x, where x and res are simple arrays ! TYPE(stencil_2d), INTENT(in) :: mat REAL(rkind), ALLOCATABLE, INTENT(in) :: xarr(:,:) REAL(rkind) :: res(LBOUND(xarr,1):UBOUND(xarr,1), & & LBOUND(xarr,2):UBOUND(xarr,2)) INTEGER :: k, i, j INTEGER, DIMENSION(2) :: smin, emax, d, lb, ub ! smin(:) = mat%s0(:) emax(:) = mat%e0(:) res = 0.0 DO k=0,mat%npoints-1 d(:) = mat%id(k,:) lb = MAX(smin, smin-d, mat%s) ub = MIN(emax, emax-d, mat%e) DO j=lb(2),ub(2) DO i=lb(1),ub(1) res(i,j) = res(i,j) + mat%val(i,j,k)*xarr(i+d(1),j+d(2)) END DO END DO END DO END FUNCTION vmx_stencila_2d !================================================================================ FUNCTION vmx_stencilg_2d(mat, xarr) RESULT(res) ! ! Return product res= mat*x, where x and res are gvectors ! USE gvector, ONLY : gvector_2d TYPE(stencil_2d), INTENT(in) :: mat TYPE(gvector_2d), INTENT(in) :: xarr TYPE(gvector_2d) :: res INTEGER :: k, i, j INTEGER, DIMENSION(2) :: d, s, e ! s = xarr%s e = xarr%e res = gvector_2d(xarr%s, xarr%e, xarr%g) ! ! Diagonal contributions: d(0) = (0,0) DO j=s(2),e(2) DO i=s(1),e(1) res%val(i,j) = mat%val(i,j,0)*xarr%val(i,j) END DO END DO ! DO k=1,mat%npoints-1 d(:) = mat%id(k,:) DO j=s(2),e(2) DO i=s(1),e(1) res%val(i,j) = res%val(i,j) + mat%val(i,j,k)*xarr%val(i+d(1),j+d(2)) END DO END DO END DO END FUNCTION vmx_stencilg_2d !================================================================================ SUBROUTINE putmat_stencil(fid, label, mat, str) USE futils INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(stencil_2d), INTENT(in) :: mat CHARACTER(len=*), INTENT(in), OPTIONAL :: str ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF ! CALL putarr(fid, TRIM(label)//'/dists', mat%id, ionode=0) CALL putarrnd(fid, TRIM(label)//'/val', mat%val, (/1,2/)) END SUBROUTINE putmat_stencil !======================================================================= SUBROUTINE laplacian(dx, dy, mat) ! ! Construct a Laplacian using 5-point FD discretization ! Assume homegeneous Dirichlet BC on all 4 faces. ! REAL(rkind), INTENT(in) :: dx, dy TYPE(stencil_2d), INTENT(inout) :: mat ! INTEGER :: i, j, k INTEGER :: ieast, iwest, jsouth, jnorth INTEGER, DIMENSION(2) :: d REAL(rkind) :: dx2inv, dy2inv ! ! Assemble the stencil ! dx2inv = 1.0d0/dx**2 dy2inv = 1.0d0/dy**2 ! mat%val(:,:,0) = -2.0d0*(dx2inv+dy2inv) ! Diagonal ! DO k=1,mat%npoints-1 ! Off diagonal d = mat%id(k,:) DO j=mat%s(2),mat%e(2) DO i=mat%s(1),mat%e(1) IF(d(1).EQ.0) THEN ! north and south mat%val(i,j,k) = dy2inv ELSE IF(d(2).EQ.0) THEN ! east and west mat%val(i,j,k) = dx2inv END IF END DO END DO END DO ! ! Impose Dirichlet BC on all 4 boundaries ! ieast = mat%s0(1) IF(ieast .EQ. mat%s(1)) THEN ! East boundary mat%val(ieast, mat%s(2):mat%e(2), :) = 0.0 mat%val(ieast, mat%s(2):mat%e(2), 0) = 1.0 END IF iwest = mat%e0(1) IF(iwest .EQ. mat%e(1)) THEN ! West boundary mat%val(iwest, mat%s(2):mat%e(2), :) = 0.0 mat%val(iwest, mat%s(2):mat%e(2), 0) = 1.0 END IF jsouth = mat%s0(2) IF(jsouth .EQ. mat%s(2)) THEN ! South boundary mat%val(mat%s(1):mat%e(1), jsouth, :) = 0.0 mat%val(mat%s(1):mat%e(1), jsouth, 0) = 1.0 END IF jnorth = mat%e0(2) IF(jnorth .EQ. mat%e(2)) THEN ! North boundary mat%val(mat%s(1):mat%e(1), jnorth, :) = 0.0 mat%val(mat%s(1):mat%e(1), jnorth, 0) = 1.0 END IF ! END SUBROUTINE laplacian !================================================================================ END MODULE stencil diff --git a/multigrid/src/test_csr.f90 b/multigrid/src/test_csr.f90 index 02172b2..bd6b2e1 100644 --- a/multigrid/src/test_csr.f90 +++ b/multigrid/src/test_csr.f90 @@ -1,145 +1,145 @@ !> !> @file test_csr.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test routines of module csr_mod ! USE multigrid USE csr IMPLICIT NONE ! INTEGER :: nx=8, nidbas=1, alpha=0, modem=10 DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0 LOGICAL :: nlper=.FALSE. INTEGER :: ngauss, nterms INTEGER :: i, j ! TYPE(grid1d) :: gridx(1) TYPE(csr_mat) :: mata ! DOUBLE PRECISION, ALLOCATABLE :: arow(:), sum_row(:) DOUBLE PRECISION, ALLOCATABLE :: acol(:), sum_col(:) DOUBLE PRECISION, ALLOCATABLE :: sol(:), rhs(:) ! NAMELIST /newrun/ nx, nidbas, sigma, kmode, modem, alpha, nlper !---------------------------------------------------------------------------- READ(*,newrun) WRITE(*,newrun) ! ! Set grid ! ngauss = CEILING(REAL(2*nidbas+alpha+1,8)/2.d0) CALL create_grid(nx, nidbas, ngauss, alpha, gridx, nlper) ! ! Create FE matrice and set BC u(0)=u(1)=0 ! nterms = 3 CALL femat(gridx(1)%spl, mata, coefeq, nterms) CALL to_mat(mata) WRITE(*,'(/a,2i6)') 'rank, nnz', mata%rank, mata%nnz WRITE(*,'(a/(12(1pe12.3)))') 'diag', mata%val(mata%idiag) ALLOCATE(arow(mata%rank)) ALLOCATE(acol(mata%rank)) ALLOCATE(sum_row(mata%rank)) ALLOCATE(sum_col(mata%rank)) sum_col = 0.0d0 DO i=1,mata%rank CALL getrow(mata, i, arow) sum_row(i) = SUM(arow) sum_col = sum_col+arow IF(i.EQ.1) WRITE(*,'(/a)') 'Matrix A' WRITE(*,'(12(1pe12.3))') arow END DO WRITE(*,'(a/(12(1pe12.3)))') 'sum of row', sum_row WRITE(*,'(a/(12(1pe12.3)))') 'sum of col', sum_col DO j=1,mata%rank CALL getcol(mata, j, acol) sum_col(j) = SUM(acol) END DO WRITE(*,'(a/(12(1pe12.3)))') 'sum of col', sum_col ! ! Clear and rebuild matrix ! WRITE(*,'(/a)') 'Clear and rebuild matrix ...' CALL clear_mat(mata) CALL femat(gridx(1)%spl, mata, coefeq, nterms) WRITE(*,'(a,2i6)') 'rank, nnz', mata%rank, mata%nnz DO i=1,mata%rank CALL getrow(mata, i, arow) WRITE(*,'(12(1pe12.3))') arow END DO WRITE(*,'(a/(12(1pe12.3)))') 'diag', mata%val(mata%idiag) ! ! Test VMX ! ALLOCATE(sol(mata%rank)) ALLOCATE(rhs(mata%rank)) sol = 1.0d0 ! rhs = vmx(mata, sol) acol = rhs-sum_row WRITE(*,'(/a)') 'Test VMX ...' WRITE(*,'(a/(12(1pe12.3)))') 'amat*sol', rhs WRITE(*,'(a,1pe12.3)') 'Error norm =', SQRT(DOT_PRODUCT(acol,acol)) ! rhs = vmx(mata, sol, 'T') acol = rhs-sum_col WRITE(*,'(a/(12(1pe12.3)))') "amat'*sol", rhs WRITE(*,'(a,1pe12.3)') 'Error norm =', SQRT(DOT_PRODUCT(acol,acol)) ! CALL destroy(mata) CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) SELECT CASE (alpha) CASE(0) ! Cartesian geometry c(1) = 1.0d0 idt(1) = 1 idw(1) = 1 c(2) = sigma idt(2) = 0 idw(2) = 0 c(3) = 1.0d0 idt(3) = 1 idw(3) = 0 CASE(1) c(1) = x idt(1) = 1 idw(1) = 1 c(2) = modem**2/x idt(2) = 0 idw(2) = 0 c(3) = 1.0d0 idt(3) = 1 idw(3) = 0 CASE default WRITE(*,'(a,i0,a)') 'COEFEQ: alpha ', alpha, ' not defined!' END SELECT END SUBROUTINE coefeq !---------------------------------------------------------------------------- END PROGRAM main diff --git a/multigrid/src/test_gvec1d.f90 b/multigrid/src/test_gvec1d.f90 index 1dd22f5..d67e45a 100644 --- a/multigrid/src/test_gvec1d.f90 +++ b/multigrid/src/test_gvec1d.f90 @@ -1,190 +1,190 @@ !> !> @file test_gvec1d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Test implementation of 1D vectors with arbirary ! vector bounds and ghost cell width. ! ! T.M. Tran (09/2013) ! MODULE gvector USE iso_fortran_env, ONLY : rkind => real64 IMPLICIT NONE PRIVATE PUBLIC :: rkind, gvector_1d, disp, norm2, & & OPERATOR(+), OPERATOR(-), OPERATOR(*), & & ASSIGNMENT(=) TYPE gvector_1d INTEGER :: s, e, g REAL(rkind), ALLOCATABLE :: val(:) END TYPE gvector_1d INTERFACE gvector_1d MODULE PROCEDURE constructor END INTERFACE gvector_1d INTERFACE OPERATOR(+) MODULE PROCEDURE add_scal MODULE PROCEDURE add_vec END INTERFACE OPERATOR(+) INTERFACE OPERATOR(-) MODULE PROCEDURE minus_vec MODULE PROCEDURE substract_vec END INTERFACE OPERATOR(-) INTERFACE OPERATOR(*) MODULE PROCEDURE scale_left MODULE PROCEDURE scale_right END INTERFACE OPERATOR(*) INTERFACE ASSIGNMENT(=) MODULE PROCEDURE from_scal MODULE PROCEDURE from_vec END INTERFACE ASSIGNMENT(=) INTERFACE norm2 module procedure norm2_gvector_1d END INTERFACE norm2 CONTAINS FUNCTION constructor(s, e, g) RESULT(res) INTEGER, INTENT(in) :: s, e INTEGER, OPTIONAL, INTENT(in) :: g TYPE(gvector_1d) :: res INTEGER :: lb, ub res%g= 0 IF(PRESENT(g)) res%g=g res%s=s res%e=e lb = res%s-res%g ub = res%e+res%g ALLOCATE(res%val(lb:ub)) res%val = -9999.0 END FUNCTION constructor FUNCTION add_vec(lhs, rhs) RESULT(res) TYPE(gvector_1d), INTENT(in) :: lhs, rhs TYPE(gvector_1d) :: res res = gvector_1d(lhs%s, lhs%e, lhs%g) res%val(res%s:res%e) = lhs%val(res%s:res%e) + rhs%val(res%s:res%e) END FUNCTION add_vec FUNCTION add_scal(lhs, rhs) RESULT(res) TYPE(gvector_1d), INTENT(in) :: lhs REAL(rkind), INTENT(in) :: rhs TYPE(gvector_1d) :: res res = gvector_1d(lhs%s, lhs%e, lhs%g) res%val(res%s:res%e) = lhs%val(res%s:res%e) + rhs END FUNCTION add_scal FUNCTION minus_vec(this) RESULT(res) TYPE(gvector_1d), INTENT(in) :: this TYPE(gvector_1d) :: res res = gvector_1d(this%s, this%e, this%g) res%val(res%s:res%e) = -this%val(res%s:res%e) END FUNCTION minus_vec FUNCTION substract_vec(lhs, rhs) RESULT(res) TYPE(gvector_1d), INTENT(in) :: lhs, rhs TYPE(gvector_1d) :: res res = gvector_1d(lhs%s, lhs%e, lhs%g) res = lhs + (-rhs) END FUNCTION substract_vec FUNCTION scale_left(lhs, rhs) RESULT(res) REAL(rkind), INTENT(in) :: lhs TYPE(gvector_1d), INTENT(in) :: rhs TYPE(gvector_1d) :: res res = gvector_1d(rhs%s, rhs%e, rhs%g) res%val(res%s:res%e) = lhs * rhs%val(res%s:res%e) END FUNCTION scale_left FUNCTION scale_right(lhs, rhs) RESULT(res) TYPE(gvector_1d), INTENT(in) :: lhs REAL(rkind), INTENT(in) :: rhs TYPE(gvector_1d) :: res res = gvector_1d(lhs%s, lhs%e, lhs%g) res%val(res%s:res%e) = rhs * lhs%val(res%s:res%e) END FUNCTION scale_right SUBROUTINE from_vec(lhs, rhs) TYPE(gvector_1d), INTENT(inout) :: lhs REAL(rkind), INTENT(in) :: rhs(:) INTEGER :: n n = lhs%e - lhs%s + 1 IF(SIZE(rhs) .NE. n) THEN PRINT*, 'from_vec: sizes of rhs and lhs not equal!' STOP END IF lhs%val(lhs%s:lhs%e) = rhs(1:n) END SUBROUTINE from_vec SUBROUTINE from_scal(lhs, rhs) TYPE(gvector_1d), INTENT(inout) :: lhs REAL(rkind), INTENT(in) :: rhs lhs%val(lhs%s:lhs%e) = rhs END SUBROUTINE from_scal SUBROUTINE disp(str,this) CHARACTER(len=*), INTENT(in) :: str TYPE(gvector_1d), INTENT(in) :: this WRITE(*,'(/a,3i6)') str//': s, e, g =', this%s, this%e, this%g WRITE(*,'(10(1pe12.3))') this%val END SUBROUTINE disp FUNCTION norm2_gvector_1d(this) RESULT(res) TYPE(gvector_1d), INTENT(in) :: this REAL(rkind) :: res res = NORM2(this%val(this%s:this%e)) END FUNCTION norm2_gvector_1d END MODULE gvector PROGRAM main USE gvector IMPLICIT NONE INTEGER :: s=0, e=5, g=1 INTEGER :: i, lb, ub REAL(rkind) :: a=0.1 TYPE(gvector_1d) :: v1, v2, v3 ! lb = s-g ub = e+g v1 = gvector_1d(s, e, g) v1%val(s:e) = [ (i, i=s,e) ] CALL disp('v1', v1) ! v2 = v1 + a*v1 CALL disp('v1+a*v1', v2) ! v3 = v1 - v1*a CALL disp('v1-v1*a', v3) ! WRITE(*,'(a,1pe12.3)') 'norm of v1 =', NORM2(v1) WRITE(*,'(a,1pe12.3)') 'norm of v1-a*v1 =', NORM2(v1-a*v1) ! v1 = 0.0d0 CALL disp('Should be all zero', v1) v2 = [ 1.d0, 2.d0, 3.d0, 4.d0, 5.d0, 6.d0 ] CALL disp('Should be (1. 2. 3. 4. 5. 6.)', v2) END PROGRAM main diff --git a/multigrid/src/test_intergrid0.f90 b/multigrid/src/test_intergrid0.f90 index 7a76d32..f08d611 100644 --- a/multigrid/src/test_intergrid0.f90 +++ b/multigrid/src/test_intergrid0.f90 @@ -1,230 +1,230 @@ !> !> @file test_intergrid0.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! intergrid transfer using *serial* multigrid module: ! - restriction of rhs ! - prolongation of sol ! USE multigrid, ONLY : grid2d, mg_info, & & get_lmax, create_grid_fd, ibc_transf, & & prolong, restrict IMPLICIT NONE DOUBLE PRECISION, PARAMETER :: pi=4.0d0*ATAN(1.0d0) DOUBLE PRECISION :: Lx, Ly, kx, ky, icrosst, beta, miome INTEGER :: nx, ny, levels CHARACTER(len=4) :: prb LOGICAL :: nldebug ! DOUBLE PRECISION :: dx, dy DOUBLE PRECISION, ALLOCATABLE :: x(:),y(:) ! TYPE(mg_info) :: info ! info for MG TYPE(grid2d), ALLOCATABLE :: grids(:) ! INTEGER :: i, l ! NAMELIST /parameters/ prb, nx, ny, levels, Lx, Ly, kx, ky, icrosst, beta, & & miome, nldebug !-------------------------------------------------------------------------------- ! ! Default inputs ! nx=32 ny=32 levels = 2 kx=1 ky=1 icrosst=1.0d0 Lx = 1.0D0 Ly = 1.0D0 miome = 200d0 beta = 0d0 prb = 'dddd' nldebug = .FALSE. ! READ(*,parameters) WRITE(*,parameters) ! ! Fine grid ! dx = lx/REAL(nx,8) dy = ly/REAL(ny,8) ALLOCATE(x(0:nx), y(0:ny)) x = dx * [(i,i=0,nx)] y = dy * [(i,i=0,ny)] WRITE(*,'(a/10(1pe12.3))') 'x =', x WRITE(*,'(a/10(1pe12.3))') 'y =', y ! ! Create array of grids ! levels = MIN(levels, get_lmax(nx), get_lmax(ny)) WRITE(*,'(a,i4)') 'Number of levels', levels ALLOCATE(grids(levels)) info%nu1 = 1 info%nu2 = 1 info%mu = 1 info%nu0 = 1 info%levels = levels info%relax = 'jac' info%omega = 1 CALL create_grid_fd(x, y, grids, info, mat_type='cds', debug=nldebug) ! ! Set BC on grid transfer matrices ! IF(prb.EQ.'dddd') CALL ibc_transf(grids,1,3) ! Direction X CALL ibc_transf(grids,2,3) ! Direction Y ! ! Define RHS at l=1, compute RHS at l=2,...,levels by "restriction". ! grids(1)%f(:,:) = frhs(grids(1)%x,grids(1)%y) DO l=2,levels grids(l)%f = restrict(grids(l)%matp, grids(l-1)%f) grids(l)%f = 0.25d0*grids(l)%f ! Scaling for FD END DO ! ! Define SOL at l=levels, compute SOL at l=levels-1,..,1 by "prolongation" ! grids(levels)%v(:,:) = fsol(grids(levels)%x,grids(levels)%y) DO l=levels-1,1,-1 grids(l)%v = prolong(grids(l+1)%matp, grids(l+1)%v) END DO ! IF(nldebug) THEN DO l=1,levels WRITE(*,'(a,i3)') '==== Level', l WRITE(*,'(a)') 'f =' DO i=0,grids(l)%n(1) WRITE(*,'(10f8.3)') grids(l)%f(i,:) END DO WRITE(*,'(a)') 'v =' DO i=0,grids(l)%n(1) WRITE(*,'(10f8.3)') grids(l)%v(i,:) END DO END DO END IF ! ! Epilogue ! CALL h5file !-------------------------------------------------------------------------------- CONTAINS !+++ FUNCTION fdense(x) ! ! Return density ! DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: fdense(SIZE(x)) fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 ); END FUNCTION fdense !+++ FUNCTION frhs(x,y) ! ! Return RHS ! DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: frhs(SIZE(x),SIZE(y)) DOUBLE PRECISION :: c, s, d(SIZE(x)) DOUBLE PRECISION :: corr INTEGER :: j corr = 1.d0+icrosst**2/4.0d0 d(:) = fdense(x(:)) IF(prb.EQ.'dddd') THEN DO j=1,SIZE(y) c = COS(2.0d0*pi*ky*y(j)/Ly) s = SIN(2.0d0*pi*ky*y(j)/Ly) frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s & & -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) & & + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s END DO ELSE IF (prb.EQ.'nndd') THEN DO j=1,SIZE(y) c = COS(2.0d0*pi*ky*y(j)/Ly) s = SIN(2.0d0*pi*ky*y(j)/Ly) frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s & & +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) & & + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s END DO END IF END FUNCTION frhs !+++ FUNCTION fsol(x,y) ! ! Return analytical solution ! DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: fsol(SIZE(x),SIZE(y)) DOUBLE PRECISION :: c INTEGER :: j IF(prb.EQ.'dddd') THEN DO j=1,SIZE(y) c = SIN(2.0d0*pi*ky*y(j)/Ly) fsol(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c END DO ELSE IF (prb.EQ.'nndd') THEN DO j=1,SIZE(y) c = SIN(2.0d0*pi*ky*y(j)/Ly) fsol(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c END DO END IF END FUNCTION fsol !+++ SUBROUTINE h5file USE futils USE csr, ONLY : putmat CHARACTER(len=128) :: file='test_intergrid0.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'KX', kx) CALL attach(fid, '/', 'KY', ky) CALL attach(fid, '/', 'LX', Lx) CALL attach(fid, '/', 'LY', Ly) CALL attach(fid, '/', 'BETA', beta) CALL attach(fid, '/', 'LEVELS', levels) CALL attach(fid, '/', 'PRB', prb) CALL attach(fid, '/', 'NLDEBUG', nldebug) CALL creatg(fid, '/mglevels') DO l=1,levels WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL creatg(fid, TRIM(dsname)) IF(l.GT.1) THEN CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) END IF CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) END DO CALL closef(fid) END SUBROUTINE h5file !+++ END PROGRAM main diff --git a/multigrid/src/test_intergrid1.f90 b/multigrid/src/test_intergrid1.f90 index 0d0a23b..514ad2f 100644 --- a/multigrid/src/test_intergrid1.f90 +++ b/multigrid/src/test_intergrid1.f90 @@ -1,240 +1,240 @@ !> !> @file test_intergrid1.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test implementation of (parallel) matrix-free ! USE iso_fortran_env, ONLY : rkind => real64 USE parmg, ONLY : grid2_type, init_restrict, coarse, get_lmax, & & exchange, prolong, restrict, disp, norm_vec USE pputils2, ONLY : dist1d USE gvector, ONLY : gvector_2d,OPERATOR(-) USE futils USE mpi IMPLICIT NONE ! INTEGER, PARAMETER :: ndims=2 INTEGER :: ierr, me, npes INTEGER, DIMENSION(ndims) :: dims=[0,0] INTEGER, DIMENSION(ndims) :: lmax, coords, comm1d LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] LOGICAL :: reorder =.FALSE. INTEGER :: comm_cart, comm_futils ! INTEGER :: fin CHARACTER(len=64) :: filein = 'test_intergrid0.h5' CHARACTER(len=64) :: dsname CHARACTER(len=4) :: prb LOGICAL :: nldebug ! INTEGER :: nx, ny, levels TYPE(grid2_type), ALLOCATABLE :: grids(:), new_grids(:) INTEGER, DIMENSION(ndims) :: e, s, npt_glob, npt_loc, npt_loc_min ! CHARACTER(len=64) :: str REAL(rkind) :: err INTEGER :: i, k, l !-------------------------------------------------------------------------------- ! 1.0 Prologue ! ! Init MPI and setup 2D grid topology ! CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) CALL mpi_dims_create(npes, ndims, dims, ierr) CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& & ierr) CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) ! IF( me .EQ. 0 ) WRITE(*,'(a,i3,i3)') '2d processor grid', dims ! ! Get nx, ny, levels from h5 file created by test_intergrid0 ! IF( command_argument_count() > 0 ) THEN CALL get_command_argument(1, filein) END IF IF(me.EQ.0) WRITE(*,'(a,a)') 'filein = ', TRIM(filein) ! CALL mpi_comm_dup(comm_cart, comm_futils, ierr) CALL openf(filein, fin, mpicomm=comm_futils) CALL getatt(fin, '/', 'NX', nx, ierr) CALL getatt(fin, '/', 'NY', ny, ierr) CALL getatt(fin, '/', 'LEVELS', levels, ierr) CALL getatt(fin, '/', 'PRB', prb, ierr) CALL getatt(fin, '/', 'NLDEBUG', nldebug, ierr) IF(me.EQ.0) WRITE(*,'(a,a,3i5,l3)') 'prb, nx, ny, levels: ', prb, nx, ny, & & levels, nldebug !-------------------------------------------------------------------------------- ! 2.0 Read (f,v) from h5 file ! ALLOCATE(grids(levels)) ! ! Partition on finest grid ! npt_glob(1) = nx+1 npt_glob(2) = ny+1 DO i=1,ndims CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) e(i) = s(i) + npt_loc(i) - 1 lmax(i) = get_lmax(s(i), npt_loc(i), 1, comm1d(i)) END DO npt_loc = e-s+1 IF(me.EQ.0) WRITE(*,'(a,2i4)') 'lmax', lmax ! ! Partition on coaser grids ! DO l=1,levels IF(l.GT.1) THEN CALL coarse(s,e) npt_loc = e-s+1 CALL mpi_allreduce(npt_loc, npt_loc_min, 2, MPI_INTEGER, & & MPI_MIN, comm_cart, ierr) CALL mpi_allreduce(e, npt_glob, 2, MPI_INTEGER, MPI_MAX, & & comm_cart, ierr) npt_glob = npt_glob+1 END IF WRITE(str,'(a,i3,a)') 'Partition at level', l, ': start. index =' CALL disp(TRIM(str), s, comm_cart) IF(me.EQ.0) THEN WRITE(*,'(a,2i6)') 'npt_glob ', npt_glob WRITE(*,'(a,2i6)') 'npt_loc_min', npt_loc_min END IF grids(l)%s = s grids(l)%e = e grids(l)%npt = npt_glob grids(l)%f = gvector_2d(s, e, [1,1]) grids(l)%v = gvector_2d(s, e, [1,1]) ALLOCATE(grids(l)%x(0:npt_glob(1)-1)) ! Global coords (x,y) ALLOCATE(grids(l)%y(0:npt_glob(2)-1)) WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL getarr(fin, TRIM(dsname)//"/x", grids(l)%x) CALL getarr(fin, TRIM(dsname)//"/y", grids(l)%y) CALL getarrnd(fin, TRIM(dsname)//"/v", grids(l)%v%val, [1,2], garea=[1,1]) CALL getarrnd(fin, TRIM(dsname)//"/f", grids(l)%f%val, [1,2], garea=[1,1]) END DO !-------------------------------------------------------------------------------- ! 3.0 Parallel intergrid transfer ! ALLOCATE(new_grids(levels)) CALL copy_grids(grids, new_grids) ! ! Set up restriction stencil ! DO l=2,levels CALL init_restrict(new_grids(l), prb, comm_cart) END DO ! ! Prolongation of v ! DO l=levels-1,1,-1 CALL exchange(comm_cart, grids(l+1)%v) CALL prolong(grids(l+1)%v, new_grids(l)%v) IF(nldebug) THEN IF(me.EQ.0) WRITE(*,'(a)') '=====' DO k=0,npes IF(me.EQ.k) THEN s = grids(l+1)%f%s e = grids(l+1)%f%e WRITE(*,'(a,i2)') 'reference vbar on proc.', me DO i=s(1),e(1) WRITE(*,'(10f8.3)') grids(l+1)%v%val(i,s(2):e(2)) END DO s = grids(l)%f%s e = grids(l)%f%e WRITE(*,'(a,i2)') 'reference v on proc.', me DO i=s(1),e(1) WRITE(*,'(10f8.3)') grids(l)%v%val(i,s(2):e(2)) END DO WRITE(*,'(a,i2)') 'compute v on proc.', me DO i=s(1),e(1) WRITE(*,'(10f8.3)') new_grids(l)%v%val(i,s(2):e(2)) END DO END IF CALL mpi_barrier(comm_cart, ierr) END DO END IF err = norm_vec(new_grids(l)%v-grids(l)%v, comm_cart, 0) IF(me.EQ.0) WRITE(*,'(a,i3,1pe12.3)') 'Error of prolongation: ', l, err END DO ! ! Restriction of f ! DO l=2,levels CALL exchange(comm_cart, grids(l-1)%f) CALL restrict(new_grids(l)%restrict_mat, grids(l-1)%f, new_grids(l)%f) IF(nldebug) THEN IF(me.EQ.0) WRITE(*,'(a)') '=====' DO k=0,npes IF(me.EQ.k) THEN s = grids(l-1)%f%s e = grids(l-1)%f%e WRITE(*,'(a,i2)') 'reference f on proc.', me DO i=s(1),e(1) WRITE(*,'(10f8.3)') grids(l-1)%f%val(i,s(2):e(2)) END DO s = grids(l)%f%s e = grids(l)%f%e WRITE(*,'(a,i2)') 'reference fbar on proc.', me DO i=s(1),e(1) WRITE(*,'(10f8.3)') grids(l)%f%val(i,s(2):e(2)) END DO WRITE(*,'(a,i2)') 'compute fbar on proc.', me DO i=s(1),e(1) WRITE(*,'(10f8.3)') new_grids(l)%f%val(i,s(2):e(2)) END DO END IF CALL mpi_barrier(comm_cart, ierr) END DO END IF err = norm_vec(new_grids(l)%f-grids(l)%f, comm_cart, 0) IF(me.EQ.0) WRITE(*,'(a,i3,1pe12.3)') 'Error of restriction: ', l, err END DO !-------------------------------------------------------------------------------- ! 9.0 Epilogue ! CALL closef(fin) CALL mpi_finalize(ierr) ! CONTAINS SUBROUTINE copy_grids(g1, g2) TYPE(grid2_type), INTENT(in) :: g1(:) TYPE(grid2_type), INTENT(out) :: g2(:) INTEGER :: l DO l=1,SIZE(g1) g2(l)%s = g1(l)%s g2(l)%e = g1(l)%e g2(l)%npt_loc = g1(l)%npt_loc g2(l)%npt = g1(l)%npt ALLOCATE(g2(l)%x(0:g2(l)%npt(1)-1)); g2(l)%x = g1(l)%x ALLOCATE(g2(l)%y(0:g2(l)%npt(2)-1)); g2(l)%y = g1(l)%y g2(l)%v = gvector_2d(g1(l)%v%s, g1(l)%v%e, g1(l)%v%g); g2(l)%v%val = g1(l)%f%val g2(l)%f = gvector_2d(g1(l)%f%s, g1(l)%f%e, g1(l)%f%g); g2(l)%f%val = g1(l)%f%val END DO END SUBROUTINE copy_grids END PROGRAM main diff --git a/multigrid/src/test_jacobi.f90 b/multigrid/src/test_jacobi.f90 index f914535..da791e5 100644 --- a/multigrid/src/test_jacobi.f90 +++ b/multigrid/src/test_jacobi.f90 @@ -1,254 +1,254 @@ !> !> @file test_jacobi.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Test 2D parallel Jacobi using STENCIL_2D matrix-free structure. ! MODULE mod USE iso_fortran_env, ONLY : rkind => real64 IMPLICIT NONE ! LOGICAL, PARAMETER :: nldebug=.FALSE. REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0) CONTAINS END MODULE mod !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ PROGRAM main USE mpi USE pputils2, ONLY : dist1d, exchange, norm2_vec=>ppnorm2, timera, hostlist USE parmg, ONLY : jacobi, get_resids USE stencil, ONLY : stencil_2d, init, laplacian, putmat USE mod IMPLICIT NONE ! INTEGER, PARAMETER :: ndims=2 ! INTEGER :: me, neighs(4), npes, ierr INTEGER, DIMENSION(ndims) :: dims=[0,0] INTEGER, DIMENSION(ndims) :: coords, comm1d LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] LOGICAL :: reorder =.FALSE. INTEGER :: comm_cart ! INTEGER :: nx=4, ny=4 ! Number of intervals INTEGER, DIMENSION(ndims) :: e, s, lb, ub, npt_glob, npt_loc ! REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:) REAL(rkind) :: dx, dy INTEGER, DIMENSION(5,2) :: id ! 5-point stencil INTEGER :: npoints TYPE(stencil_2d) :: mat INTEGER :: i ! REAL(rkind), ALLOCATABLE :: f(:,:), v(:,:), u(:,:) REAL(rkind), ALLOCATABLE :: resids(:,:), errs(:,:) REAL(rkind), ALLOCATABLE :: resid_it(:), err_it(:) REAL(rkind) :: omega=1.0d0, resid INTEGER :: it, it_skip, nits=100 ! NAMELIST /in/ nx, ny, omega, nits !================================================================================ ! 1.0 Prologue ! ! 2D process grid CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_dims_create(npes, ndims, dims, ierr) CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& & ierr) ! CALL mpi_comm_rank(comm_cart, me, ierr) CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr) CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr) ! CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) ! CALL hostlist(comm_cart) IF(me.EQ.0) THEN WRITE(*,'(a,i0,a,i0/)') "Process grid: ", dims(1), " X ", dims(2) END IF ! ! Read problem inputs IF(me.EQ.0) THEN READ(*,in) WRITE(*,in) END IF ! CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr) CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr) CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, comm_cart, ierr) CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr) !================================================================================ ! 2.0 2d Grid construction ! ! Partition 2D grid CALL timera(0, 'Grid_construction') npt_glob(1) = nx+1 npt_glob(2) = ny+1 DO i=1,ndims CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) e(i) = s(i) + npt_loc(i) - 1 END DO WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', & & (coords(i),s(i),e(i),i=1,ndims) ! ! Global mesh dx = 1.0d0/REAL(nx) dy = 1.0d0/REAL(ny) ALLOCATE(xgrid(0:nx)) ALLOCATE(ygrid(0:ny)) xgrid = [ (i*dx, i=0,nx) ] ygrid = [ (i*dy, i=0,ny) ] CALL timera(1, 'Grid_construction') !================================================================================ ! 3.0 FD Laplacian ! CALL timera(0, 'Laplacian') id=RESHAPE([ 0, -1, 0, 1, 0, & 0, 0,-1, 0, 1], & [5,2]) npoints = 5 CALL init(s, e, id, .FALSE., mat, comm_cart) ! CALL laplacian(dx, dy, mat) CALL timera(1, 'Laplacian') !================================================================================ ! 4.0 Test parallel Jacobi with \nabla u(x,y) = f(x,y) ! ! Problem definition ! s = mat%s e = mat%e lb = s-1 ub = e+1 ALLOCATE(f(lb(1):ub(1),lb(2):ub(2))) ! RHS ALLOCATE(v(lb(1):ub(1),lb(2):ub(2))) ! Exact solutions ALLOCATE(u(lb(1):ub(1),lb(2):ub(2))) ! Computed solutions ALLOCATE(resids(lb(1):ub(1),lb(2):ub(2))) ! Residuals ALLOCATE(errs(lb(1):ub(1),lb(2):ub(2))) ! Errors ALLOCATE(resid_it(0:nits)) ALLOCATE(err_it(0:nits)) ! f(s(1):e(1),s(2):e(2)) = rhs(xgrid(s(1):e(1)),ygrid(s(2):e(2))) v(s(1):e(1),s(2):e(2)) = exact(xgrid(s(1):e(1)),ygrid(s(2):e(2))) CALL exchange(comm_cart, f) CALL exchange(comm_cart, v) ! ! Residuals of exact solutions resids = get_resids(mat,v,f) resid = norm2_vec(resids, comm_cart) ! ! Jacobi iteration loop ! IF(me.EQ.0) WRITE(*,'(/a6,t14,a,t34,a)') 'it', 'residual norm', 'discretization error' u = 0.0d0 CALL exchange(comm_cart, u) resids = get_resids(mat,u,f) errs = u-v resid_it(0) = norm2_vec(resids, comm_cart) err_it(0) = norm2_vec(errs, comm_cart) it_skip = MAX(1,nits/10) ! CALL timera(0, 'Jacobi') DO it=1,nits CALL jacobi(mat, omega, 1, u, f) CALL exchange(comm_cart, u) resids = get_resids(mat,u,f) errs = u-v resid_it(it) = norm2_vec(resids, comm_cart) err_it(it) = norm2_vec(errs, comm_cart) IF(me.EQ.0 .AND. MODULO(it,it_skip).EQ.0 ) THEN WRITE(*,'(i6,4(1pe12.3))') it, resid_it(it), resid_it(it)/resid_it(it-1),& & err_it(it), err_it(it)/err_it(it-1) END IF END DO CALL timera(1, 'Jacobi') !================================================================================ ! 9.0 Epilogue CALL h5file ! CALL timera(9, '') CALL MPI_FINALIZE(ierr) CONTAINS SUBROUTINE disp(str, arr) CHARACTER(len=*), INTENT(in) :: str REAL(rkind), INTENT(in) :: arr(:,:) INTEGER :: j WRITE(*,'(/a)') str DO j=1,SIZE(arr,2) WRITE(*,'(10f8.3)') arr(:,j) END DO END SUBROUTINE disp ! SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_jacobi.h5' INTEGER :: fid CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart) CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO ! CALL putarrnd(fid, '/f', f, (/1,2/), garea=(/1,1/)) CALL putarrnd(fid, '/v', v, (/1,2/), garea=(/1,1/)) CALL putarrnd(fid, '/u', u, (/1,2/), garea=(/1,1/)) CALL putarrnd(fid, '/errs', errs, (/1,2/), garea=(/1,1/)) CALL putarrnd(fid, '/resids', resids,(/1,2/), garea=(/1,1/)) ! CALL putarr(fid, '/resid', resid_it, ionode=0) CALL putarr(fid, '/error', err_it, ionode=0) ! CALL putmat(fid, '/MAT', mat) CALL closef(fid) END SUBROUTINE h5file ! FUNCTION outerprod(x, y) RESULT(r) ! ! outer product ! REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: r(SIZE(x),SIZE(y)) INTEGER :: i, j DO j=1,SIZE(y) DO i=1,SIZE(x) r(i,j) = x(i)*y(j) END DO END DO END FUNCTION outerprod ! FUNCTION rhs(x,y) REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: rhs(SIZE(x),SIZE(y)) rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y)) END FUNCTION rhs ! FUNCTION exact(x,y) REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: exact(SIZE(x),SIZE(y)) exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y)) END FUNCTION exact END PROGRAM main diff --git a/multigrid/src/test_jacobig.f90 b/multigrid/src/test_jacobig.f90 index 1fdca83..ff71a3e 100644 --- a/multigrid/src/test_jacobig.f90 +++ b/multigrid/src/test_jacobig.f90 @@ -1,331 +1,331 @@ !> !> @file test_jacobig.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Test 2D parallel Jacobi using STENCIL_2D matrix-free structure. ! MODULE mod USE iso_fortran_env, ONLY : rkind => real64 IMPLICIT NONE ! LOGICAL, PARAMETER :: nldebug=.FALSE. REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0) CONTAINS END MODULE mod !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ PROGRAM main USE mpi USE fdmat_mod, ONLY : fdmat, ibc_fdmat, ibc_rhs USE pputils2, ONLY : dist1d, timera, hostlist USE gvector, ONLY : gvector_2d, ASSIGNMENT(=), OPERATOR(-) USE parmg, ONLY : grid2_type, create_grid, jacobi, exchange, get_resids, norm_vec USE stencil, ONLY : stencil_2d, putmat USE mod IMPLICIT NONE ! INTEGER, PARAMETER :: ndims=2 ! INTEGER :: me, neighs(4), npes, ierr INTEGER, DIMENSION(ndims) :: dims=[0,0] INTEGER, DIMENSION(ndims) :: coords, comm1d LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] LOGICAL :: reorder =.FALSE. INTEGER :: comm_cart INTEGER, DIMENSION(ndims) :: e0, s0, e, s, npt_glob, npt_loc ! REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:) INTEGER, ALLOCATABLE :: id(:,:) REAL(rkind) :: dx, dy INTEGER :: npoints ! Number of points in FD stencil ! TYPE(gvector_2d) :: v_exact, resids, errs REAL(rkind), ALLOCATABLE :: resid_it(:), err_it(:) INTEGER, DIMENSION(ndims) :: g INTEGER :: i, it, it_skip ! INTEGER :: levels=1 TYPE(grid2_type), ALLOCATABLE :: grids(:) ! ! Input quantities ! CHARACTER(len=4) :: prb='dddd' INTEGER :: nx=4, ny=4 ! Number of intervals INTEGER :: kx=1, ky=1 REAL(rkind) :: Lx=1.0, Ly=1.0 REAL(rkind) :: icrosst=1.0, beta=0.0, miome=200.0 REAL(rkind) :: omega=1.0d0 INTEGER :: nits=100, nu=1 ! NAMELIST /in/ prb, nx, ny, kx, ky, Lx, Ly, icrosst, beta, & & miome, omega, nits, nu !================================================================================ ! 1.0 Prologue ! ! 2D process grid ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_dims_create(npes, ndims, dims, ierr) CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& & ierr) ! CALL mpi_comm_rank(comm_cart, me, ierr) CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr) CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr) ! CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) ! CALL hostlist(comm_cart) IF(me.EQ.0) THEN WRITE(*,'(a,i0,a,i0/)') "Process grid: ", dims(1), " X ", dims(2) END IF ! ! Read problem inputs ! IF(me.EQ.0) THEN READ(*,in) WRITE(*,in) END IF CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, comm_cart, ierr) CALL mpi_bcast(kx, 1, MPI_INTEGER, 0, comm_cart, ierr) call mpi_bcast(ky, 1, MPI_INTEGER, 0, comm_cart, ierr) CALL mpi_bcast(icrosst, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr) CALL mpi_bcast(Lx,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr) CALL mpi_bcast(Ly,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr) CALL mpi_bcast(beta,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr) CALL mpi_bcast(miome, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr) CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr) CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr) CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, comm_cart, ierr) CALL mpi_bcast(nu, 1, MPI_INTEGER, 0, comm_cart, ierr) CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr) !================================================================================ ! 2.0 2d Grid construction ! ! Partition 2D grid ! CALL timera(0, 'Grid_construction') npt_glob(1) = nx+1 npt_glob(2) = ny+1 DO i=1,ndims CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) e(i) = s(i) + npt_loc(i) - 1 END DO WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', & & (coords(i),s(i),e(i),i=1,ndims) ! ! Global mesh ! dx = Lx/REAL(nx) dy = Ly/REAL(ny) ALLOCATE(xgrid(0:nx)) ALLOCATE(ygrid(0:ny)) xgrid = [ (i*dx, i=0,nx) ] ygrid = [ (i*dy, i=0,ny) ] CALL timera(1, 'Grid_construction') ! ! Create grid structure ! ALLOCATE(grids(levels)) npoints = 9 ! Size of FD stencil ALLOCATE(id(npoints,2)) id=RESHAPE([ 0, -1, 0, 1, -1, 1, -1, 0, 1, & 0, -1, -1, -1, 0, 0, 1, 1, 1], & [npoints,2]) CALL create_grid(xgrid, ygrid, s, e, id, prb, grids, comm_cart) !================================================================================ ! 3.0 FD Operator ! CALL timera(0, 'Laplacian') ! CALL fdmat(grids(1), fdense, icrosst, grids(1)%fdmat) CALL ibc_fdmat(grids(1)%fdmat, prb) ! CALL timera(1, 'Laplacian') !================================================================================ ! 4.0 RHS and exact solution ! ! Allocate memory ! s0 = grids(1)%s0; e0 = grids(1)%e0 s = grids(1)%s; e = grids(1)%e g = [1,1] v_exact = gvector_2d(s, e, g) ! Exact solutions errs = gvector_2d(s, e, g) ! Disc. errors resids = gvector_2d(s, e, g) ! Residuals ALLOCATE(resid_it(0:nits)) ALLOCATE(err_it(0:nits)) ! ! Set RHS at the finest grid. Impose Dirichlet BC. ! grids(1)%f = frhs(xgrid(s(1):e(1)),ygrid(s(2):e(2))) CALL ibc_rhs(grids(1)%f, s0, e0, prb) ! ! Exact solutions ! v_exact = fexact(xgrid(s(1):e(1)),ygrid(s(2):e(2))) !================================================================================ ! 5.0 Jacobi iteration loop ! IF(me.EQ.0) WRITE(*,'(/a6,t14,a,t34,a)') 'it', 'residual norm', 'discretization error' grids(1)%v = 0.0d0 resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f) errs = grids(1)%v - v_exact resid_it(0) = norm_vec(resids, comm_cart, root=0) err_it(0) = norm_vec(errs, comm_cart, root=0) it_skip = MAX(1,nits/10) ! CALL timera(0, 'Jacobi') DO it=1,nits CALL jacobi(comm_cart, grids(1)%fdmat, omega, nu, grids(1)%v, grids(1)%f) resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f) errs = grids(1)%v - v_exact resid_it(it) = norm_vec(resids, comm_cart, root=0) err_it(it) = norm_vec(errs, comm_cart, root=0) IF(me.EQ.0 .AND. MODULO(it,it_skip).EQ.0 ) THEN WRITE(*,'(i6,4(1pe12.3))') it, resid_it(it), resid_it(it)/resid_it(it-1),& & err_it(it), err_it(it)/err_it(it-1) END IF END DO CALL timera(1, 'Jacobi') !================================================================================ ! 9.0 Epilogue CALL h5file ! CALL timera(9, '') CALL MPI_FINALIZE(ierr) CONTAINS ! !+++ FUNCTION fdense(x) ! ! Return density ! REAL(rkind), INTENT(in) :: x(:) REAL(rkind) :: fdense(SIZE(x)) fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 ); END FUNCTION fdense !+++ FUNCTION fexact(x,y) ! ! Return analytical solution ! REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: fexact(SIZE(x),SIZE(y)) REAL(rkind) :: c INTEGER :: j IF(prb.EQ.'dddd') THEN DO j=1,SIZE(y) c = SIN(2.0d0*pi*ky*y(j)/Ly) fexact(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c END DO ELSE IF (prb.EQ.'nndd') THEN DO j=1,SIZE(y) c = SIN(2.0d0*pi*ky*y(j)/Ly) fexact(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c END DO END IF END FUNCTION fexact !+++ SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_jacobig.h5' INTEGER :: fid CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart) CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO ! CALL putarrnd(fid, '/f', grids(1)%f%val, (/1,2/), garea=g) CALL putarrnd(fid, '/v', v_exact%val, (/1,2/), garea=g) CALL putarrnd(fid, '/u', grids(1)%v%val, (/1,2/), garea=g) CALL putarrnd(fid, '/errs', errs%val, (/1,2/), garea=(/1,1/)) CALL putarrnd(fid, '/resids', resids%val,(/1,2/), garea=(/1,1/)) ! CALL putarr(fid, '/resid', resid_it, ionode=0) CALL putarr(fid, '/error', err_it, ionode=0) ! CALL putmat(fid, '/MAT', grids(1)%fdmat) CALL closef(fid) END SUBROUTINE h5file !+++ FUNCTION frhs(x,y) ! ! Return RHS ! REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: frhs(SIZE(x),SIZE(y)) REAL(rkind) :: c, s, d(SIZE(x)) REAL(rkind) :: corr INTEGER :: j corr = 1.d0+icrosst**2/4.0d0 d(:) = fdense(x(:)) IF(prb.EQ.'dddd') THEN DO j=1,SIZE(y) c = COS(2.0d0*pi*ky*y(j)/Ly) s = SIN(2.0d0*pi*ky*y(j)/Ly) frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s & & -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) & & + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s END DO ELSE IF (prb.EQ.'nndd') THEN DO j=1,SIZE(y) c = COS(2.0d0*pi*ky*y(j)/Ly) s = SIN(2.0d0*pi*ky*y(j)/Ly) frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s & & +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) & & + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s END DO END IF END FUNCTION frhs !+++! FUNCTION outerprod(x, y) RESULT(r) ! ! outer product ! REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: r(SIZE(x),SIZE(y)) INTEGER :: i, j DO j=1,SIZE(y) DO i=1,SIZE(x) r(i,j) = x(i)*y(j) END DO END DO END FUNCTION outerprod ! FUNCTION rhs(x,y) REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: rhs(SIZE(x),SIZE(y)) rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y)) END FUNCTION rhs ! FUNCTION exact(x,y) REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: exact(SIZE(x),SIZE(y)) exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y)) END FUNCTION exact END PROGRAM main diff --git a/multigrid/src/test_mg.f90 b/multigrid/src/test_mg.f90 index 8275f7a..7a90f57 100644 --- a/multigrid/src/test_mg.f90 +++ b/multigrid/src/test_mg.f90 @@ -1,279 +1,279 @@ !> !> @file test_mg.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test multigrid V-cycle ! USE multigrid USE math_util, ONLY : root_bessj IMPLICIT NONE ! INTEGER :: nx=8, nidbas=1, ngauss=2, alpha=0, nits=40 INTEGER :: modem=22, modep=10 INTEGER :: levels=2, nu1=1, nu2=1, mu=1, nu0=1 CHARACTER(len=4) :: relax='jac ' LOGICAL :: nlfixed = .FALSE. DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0) DOUBLE PRECISION :: omega=2.0d0/3.0d0 INTEGER :: l, nrank, its DOUBLE PRECISION :: errdisc_dir DOUBLE PRECISION, ALLOCATABLE :: u_direct(:), u_exact(:), u_calc(:) DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:), sol_calc(:) DOUBLE PRECISION, ALLOCATABLE :: err(:), resid(:), errdisc(:) DOUBLE PRECISION, ALLOCATABLE :: errdisc_fmg(:) ! TYPE(grid1d), ALLOCATABLE :: gridx(:) TYPE(mg_info) :: info ! NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, alpha, & & relax, omega, nits, nlfixed, levels, nu1, nu2, mu, nu0 !-------------------------------------------------------------------------------- ! 1. Prologue ! Inputs ! READ(*,newrun) WRITE(*,newrun) ! levels = MIN(levels, get_lmax(nx)) ! info%nu1 = nu1 info%nu2 = nu2 info%mu = mu info%nu0 = nu0 info%levels = levels info%relax = relax info%omega = omega ! ! Create grids ! ALLOCATE(gridx(levels)) CALL create_grid(nx, nidbas, ngauss, alpha, gridx) WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels) ! ! Create FE matrice and set BC u(0)=u(1)=0 ! DO l=1,levels CALL femat(gridx(l)%spl, gridx(l)%mata, coefeq) ! ! Left Dirichlet BC (only for Cartesian geometry) IF(alpha .EQ. 0) THEN CALL ibcmat(1, gridx(l)%mata) END IF ! ! Right Dirichlet BC CALL ibcmat(gridx(l)%mata%rank, gridx(l)%mata) ! ! BC on grid transfer operator IF(l.GT.1) THEN WHERE( ABS(gridx(l)%transf%val) < 1.d-8) gridx(l)%transf%val=0.0d0 IF(alpha .EQ. 0) gridx(l)%transf%val(2:,1)=0.0d0 gridx(l)%transf%val(1:gridx(l-1)%rank-1,gridx(l)%rank)=0.0d0 END IF END DO CALL printdiag_gb('Diagonal of coarsest A', gridx(levels)%mata) ! ! Construct RHS and set BC only on the finest grid ! nrank = gridx(1)%rank CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs) ! ! Left Dirichlet BC (only for Cartesian geometry) IF(alpha .EQ. 0) THEN gridx(1)%f(1) = 0.0d0 END IF ! ! Right Dirichlet BC gridx(1)%f(nrank) = 0.0d0 !-------------------------------------------------------------------------------- ! 2. Direct solution ! WRITE(*,'(//a)') 'Direct solution for the finest grid problem' ALLOCATE(u_direct(0:nx)) ALLOCATE(sol_direct(nrank)) CALL direct_solve(gridx(1), sol_direct) CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_direct) errdisc_dir = disc_err(gridx(1)%spl, sol_direct, sol) WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir !-------------------------------------------------------------------------------- ! 3. Solution from MG V-cycles ! WRITE(*,'(//a)') 'Multigrid MG V-cycles' ALLOCATE(sol_calc(nrank)) ALLOCATE(err(0:nits)) ALLOCATE(errdisc(0:nits)) ALLOCATE(resid(0:nits)) ! ! Initial guess ! sol_calc(:) = 0.0d0 IF(nlfixed) THEN sol_calc(:) = sol_direct(:) END IF gridx(1)%v(:) = sol_calc(:) err(0) = normf(gridx(1)%matm, sol_calc-sol_direct) errdisc(0) = disc_err(gridx(1)%spl, sol_calc, sol) resid(0) = residue(gridx(1)%mata, gridx(1)%f, sol_calc) ! ! Iterations ! DO its=1,nits CALL mg(gridx, info, 1) sol_calc(:) = gridx(1)%v(:) err(its) = normf(gridx(1)%matm, sol_calc-sol_direct) errdisc(its) = disc_err(gridx(1)%spl, sol_calc, sol) resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc) END DO ! WRITE(*,'(a4,3(a12,a8))') 'its', 'error', 'ratio', 'residue', 'ratio', & & 'disc. err', 'ratio' WRITE(*,'(i4,3(1pe12.3,8x))') 0, err(0), resid(0), errdisc(0) WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') (its, err(its), err(its)/err(its-1), & & resid(its), resid(its)/resid(its-1), & & errdisc(its), errdisc(its)/errdisc(its-1), its=1,nits) !-------------------------------------------------------------------------------- ! 4. Solution from FMG ! WRITE(*,'(//a)') 'Full Multigrid' ALLOCATE(errdisc_fmg(nits)) DO its=1,nits info%nu0 = its CALL fmg(gridx, info, 1) sol_calc(:) = gridx(1)%v(:) errdisc_fmg(its) = disc_err(gridx(1)%spl, sol_calc, sol) resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc) END DO WRITE(*,'(a4,2(a12,a8))') 'nu0', 'residue', 'ratio','disc. err', 'ratio' WRITE(*,'((i4,2(1pe12.3,0pf8.3)))') (its, resid(its), resid(its)/resid(its-1), & & errdisc_fmg(its),errdisc_fmg(its)/errdisc_dir, its=1,nits) ! ! Grid values at final iteration ! ALLOCATE(u_exact(0:nx)) ALLOCATE(u_calc(0:nx)) u_exact = sol(gridx(1)%x) CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_calc) !-------------------------------------------------------------------------------- ! 9. Epilogue ! ! Creata HDF5 file ! CALL h5file !-------------------------------------------------------------------------------- CONTAINS FUNCTION rhs(x) DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION :: rhs DOUBLE PRECISION :: nump SELECT CASE (alpha) CASE(0) ! Cartesian geometry rhs = SIN(pi*kmode*x) CASE(1) ! Cylindrical nump = root_bessj(modem, modep) rhs = x * nump**2 * bessel_jn(modem, nump*x) END SELECT END FUNCTION rhs FUNCTION sol(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sol(SIZE(x)) DOUBLE PRECISION :: nump SELECT CASE (alpha) CASE(0) ! Cartesian geometry sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:)) CASE(1) ! Cylindrical nump = root_bessj(modem, modep) sol(:) = bessel_jn(modem, nump*x(:)) END SELECT END FUNCTION sol SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) SELECT CASE (alpha) CASE(0) ! Cartesian geometry c(1) = 1.0d0 idt(1) = 1 idw(1) = 1 c(2) = sigma idt(2) = 0 idw(2) = 0 CASE(1) ! Cylindrical c(1) = x idt(1) = 1 idw(1) = 1 c(2) = REAL(modem,8)**2/x idt(2) = 0 idw(2) = 0 END SELECT END SUBROUTINE coefeq SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_mg.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL attach(fid, '/', 'SIGMA', sigma) CALL attach(fid, '/', 'KMODE', kmode) CALL attach(fid, '/', 'MODEM', modem) CALL attach(fid, '/', 'MODEP', modep) CALL attach(fid, '/', 'ALPHA', alpha) CALL attach(fid, '/', 'RELAX', relax) CALL attach(fid, '/', 'OMEGA', omega) CALL attach(fid, '/', 'NITS', nits) CALL attach(fid, '/', 'NLFIXED', nlfixed) CALL attach(fid, '/', 'LEVELS', levels) CALL attach(fid, '/', 'NU1', nu1) CALL attach(fid, '/', 'NU2', nu2) CALL attach(fid, '/', 'NU0', nu0) CALL attach(fid, '/', 'MU', mu) CALL creatg(fid, '/mglevels') DO l=1,levels WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL creatg(fid, TRIM(dsname)) CALL putmat(fid, TRIM(dsname)//'/mata', gridx(l)%mata) IF(l.GT.1) THEN CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val) CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows) CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols) END IF CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f) CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v) END DO CALL creatg(fid, '/Iterations') CALL putarr(fid, '/Iterations/errors', err) CALL putarr(fid, '/Iterations/residues', resid) CALL putarr(fid, '/Iterations/disc_errors', errdisc) CALL putarr(fid, '/Iterations/disc_errors_fmg', errdisc_fmg) CALL putarr(fid, '/Iterations/xgrid', gridx(1)%x) CALL putarr(fid, '/Iterations/u_direct', u_direct) CALL putarr(fid, '/Iterations/u_exact', u_exact) CALL putarr(fid, '/Iterations/u_calc', u_calc) CALL closef(fid) END SUBROUTINE h5file END PROGRAM main diff --git a/multigrid/src/test_mg2d.f90 b/multigrid/src/test_mg2d.f90 index 4730a65..ffa9e6d 100644 --- a/multigrid/src/test_mg2d.f90 +++ b/multigrid/src/test_mg2d.f90 @@ -1,413 +1,413 @@ !> !> @file test_mg2d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test 2d multigrid: Cartesian case ! USE multigrid USE csr IMPLICIT NONE INCLUDE 'mpif.h' ! DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0 CHARACTER(len=4) :: prb='poly' INTEGER :: levels=1, nu1=1, nu2=1, mu=1, nu0=1, nits CHARACTER(len=4) :: relax='jac ' DOUBLE PRECISION :: omega=2.0d0/3.0d0, tol LOGICAL :: nlfixed=.FALSE. DOUBLE PRECISION :: t0, tsetup(2), tdirect, tbsolve, titer, titer_per_step DOUBLE PRECISION :: resid_direct, errdisc_direct DOUBLE PRECISION :: norma, normb ! DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) DOUBLE PRECISION :: dx, dy INTEGER :: ix, iy ! DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: sol_calc_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:) ! DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:) DOUBLE PRECISION, POINTER :: sol_direct_1d(:) DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) ! INTEGER :: ierr, me INTEGER :: l, nterms INTEGER :: its ! TYPE(grid2d), ALLOCATABLE :: grids(:) TYPE(mg_info) :: info ! NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels, prb, & & nu1, nu2, mu, nu0, relax, nits, tol, nlfixed, omega !-------------------------------------------------------------------------------- ! 1. Prologue ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Inputs ! n = (/8, 8/) nidbas=(/3,3/) ngauss=(/2,2/) alpha = (/0,0/) kx=4 ky=3 sigma=10.0d0 levels=2 prb='poly' nu1 = 1 nu2 = 1 mu = 1 nu0 = 1 nits = 10 tol = 1.e-8 relax = 'jac' nlfixed= .FALSE. ! IF(me.EQ.0) THEN READ(*,newrun) WRITE(*,newrun) END IF CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nu1, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nu2, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(tol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) ! ! Adjust number of levels and fill mg info. ! levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) info%nu1 = nu1 info%nu2 = nu2 info%mu = mu info%nu0 = nu0 info%levels = levels info%relax = relax info%omega = omega ! ! Create grids ! t0 = mpi_wtime() ! dx = 1.0d0/REAL(n(1),8) dy = 1.0d0/REAL(n(2),8) ALLOCATE(x(0:n(1)), y(0:n(2))) DO ix=0,n(1) x(ix) = ix*dx END DO DO iy=0,n(2) y(iy) = iy*dy END DO ! ALLOCATE(grids(levels)) CALL create_grid(x, y, nidbas, ngauss, alpha, grids) WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) ! ! Construct RHS and set BC only on the finest grid ! CALL disrhs(grids(1)%spl, grids(1)%f, rhs) CALL ibcrhs(grids(1), grids(1)%f) ! ! Build FE matrices and set BC ! nterms = 3 DO l=1,levels CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) CALL ibcmat(grids(l), grids(l)%mata) CALL to_mat(grids(l)%mata) END DO ! ! Set BC on grid transfer matrices ! CALL ibc_transf(grids,1,3) CALL ibc_transf(grids,2,3) ! tsetup(1) = mpi_wtime()-t0 ! ! Clear and rebuild FE matrices and set BC ! t0 = mpi_wtime() nterms = 2 DO l=1,levels CALL clear_mat(grids(l)%mata) CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms, noinit=.TRUE.) CALL ibcmat(grids(l), grids(l)%mata) END DO tsetup(2) = mpi_wtime()-t0 !-------------------------------------------------------------------------------- ! 1. Analytical solution (at the finest grid, l=1) ! ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) sol_anal_grid = sol(grids(1)%x, grids(1)%y) !-------------------------------------------------------------------------------- ! 2. Direct solution (at the finest grid, l=1) ! WRITE(*,'(//a)') 'Direct solution for the finest grid problem' ! ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), & & source=grids(1)%f) ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) ! sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct ! ! PRINT*, 'shape of sol_direct', SHAPE(sol_direct) PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d) ! t0 = mpi_wtime() sol_direct = grids(1)%f CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) tdirect = mpi_wtime()-t0 ! t0 = mpi_wtime() sol_direct = grids(1)%f CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d) errdisc_direct = disc_err(grids(1)%spl, sol_direct, sol) ! tbsolve = mpi_wtime()-t0 ! CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & & [0,0], sol_direct) ! WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', & & errdisc_direct, resid_direct WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid) ! WRITE(*, '(a,1pe12.3)') 'Frobenius norm of A', matnorm(grids(1)%mata) WRITE(*, '(a,1pe12.3)') 'Infinity norm of A ', matnorm(grids(1)%mata, 'inf') WRITE(*, '(a,1pe12.3)') '1 norm of A ', matnorm(grids(1)%mata, '1') !-------------------------------------------------------------------------------- ! 3. Test multigrid V-cycle ! WRITE(*,'(/a)') 'Multigrid MG V-cycles ...' ALLOCATE(sol_calc_grid(0:n(1),0:n(2))) ALLOCATE(errdisc(0:nits)) ALLOCATE(resid(0:nits)) ALLOCATE(rresid(0:nits)) ! ! Norm of A and b ! norma = matnorm(grids(1)%mata) normb = NORM2(grids(1)%f1d) ! ! Initial guess ! IF(nlfixed) THEN grids(1)%v = sol_direct ELSE grids(1)%v = 0.0d0 END IF ! errdisc(0) = disc_err(grids(1)%spl, grids(1)%v, sol) resid(0) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d) rresid(0) = resid(0) / ( norma*NORM2(grids(1)%v1d) + normb ) WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', & & 'rel. resid', 'ratio' WRITE(*,'(i4,3(1pe12.3,8x))') 0, resid(0), errdisc(0), rresid(0) ! ! Iterations ! t0 = mpi_wtime() DO its=1,nits CALL mg(grids, info, 1) errdisc(its) = disc_err(grids(1)%spl, grids(1)%v, sol) resid(its) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d) rresid(its) = resid(its) / ( norma*NORM2(grids(1)%v1d) + normb ) WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') its, & & resid(its), resid(its)/resid(its-1), & & errdisc(its), errdisc(its)/errdisc(its-1), & & rresid(its), rresid(its)/rresid(its-1) IF(resid(its) .LE. tol) EXIT END DO nits = MIN(nits,its) titer = mpi_wtime() - t0 titer_per_step = titer/REAL(its,8) ! CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_calc_grid, & & [0,0], grids(1)%v) !-------------------------------------------------------------------------------- ! 9. Epilogue ! ! Display timing ! WRITE(*,'(a,2(1pe12.3))') 'Set up time (s) ', tsetup WRITE(*,'(a,2(1pe12.3))') 'Direct and solve time (s) ', tdirect, tbsolve WRITE(*,'(a,1pe12.3,i5)') 'Iter time (s) ', titer, nits ! ! Creata HDF5 file ! IF(me.EQ.0) CALL h5file ! CALL mpi_finalize(ierr) !-------------------------------------------------------------------------------- CONTAINS !+++ FUNCTION rhs(x, y) ! ! Return problem RHS ! DOUBLE PRECISION, INTENT(in) :: x, y DOUBLE PRECISION :: rhs DOUBLE PRECISION :: x2, y2 SELECT CASE(TRIM(prb)) CASE('poly') x2 = x*x; y2 = y*y; rhs = 2.d0 * ( (1.0d0-6.d0*x2)*y2*(1.d0-y2) + & & (1.0d0-6.d0*y2)*x2*(1.d0-x2) ) CASE('trig') rhs = SIN(PI*kx*x)*SIN(PI*ky*y) END SELECT END FUNCTION rhs !+++ FUNCTION sol(x, y) ! ! Return exact problem solution ! DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: sol(SIZE(x),SIZE(y)) DOUBLE PRECISION :: c DOUBLE PRECISION :: x2(SIZE(x)), y2(SIZE(y)) INTEGER :: j SELECT CASE(TRIM(prb)) CASE('poly') x2 = x*x; y2 = y*y; DO j=1,SIZE(y) c = y2(j)*(y2(j)-1.d0) sol(:,j) = c*x2(:)*(1.0d0-x2(:)) END DO CASE('trig') DO j=1,SIZE(y) c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2) sol(:,j) = c * SIN(PI*kx*x(:)) END DO END SELECT END FUNCTION sol !+++ SUBROUTINE coefeq(x, y, idt, idw, c) ! ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) ! DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! c(1) = 1.0d0 idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.0d0 idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 ! c(3) = sigma idt(3,1) = 0 idt(3,2) = 0 idw(3,1) = 0 idw(3,2) = 0 END SUBROUTINE coefeq !+++ SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_mg2d.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', n(1)) CALL attach(fid, '/', 'NY', n(2)) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'KX', kx) CALL attach(fid, '/', 'KY', ky) CALL attach(fid, '/', 'SIGMA', sigma) CALL attach(fid, '/', 'ALPHA1', alpha(1)) CALL attach(fid, '/', 'ALPHA2', alpha(2)) CALL attach(fid, '/', 'RELAX', relax) CALL attach(fid, '/', 'NITS', nits) CALL attach(fid, '/', 'LEVELS', levels) CALL attach(fid, '/', 'NU1', nu1) CALL attach(fid, '/', 'NU2', nu2) CALL attach(fid, '/', 'NU0', nu0) CALL attach(fid, '/', 'MU', mu) CALL creatg(fid, '/mglevels') DO l=1,levels WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL creatg(fid, TRIM(dsname)) CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) IF(l.GT.1) THEN CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) END IF CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) END DO ! ! Solutions at finest grid ! CALL creatg(fid, '/solutions') CALL putarr(fid, '/solutions/xg', grids(1)%x) CALL putarr(fid, '/solutions/yg', grids(1)%y) CALL putarr(fid, '/solutions/anal', sol_anal_grid) CALL putarr(fid, '/solutions/calc', sol_calc_grid) CALL putarr(fid, '/solutions/direct', sol_direct_grid) ! CALL creatg(fid, '/Iterations') CALL putarr(fid, '/Iterations/residues', resid(0:nits)) CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits)) ! CALL closef(fid) END SUBROUTINE h5file !+++ END PROGRAM diff --git a/multigrid/src/test_mg2d_cyl.f90 b/multigrid/src/test_mg2d_cyl.f90 index 73644ec..34967dc 100644 --- a/multigrid/src/test_mg2d_cyl.f90 +++ b/multigrid/src/test_mg2d_cyl.f90 @@ -1,427 +1,427 @@ !> !> @file test_mg2d_cyl.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test 2d multigrid ! Cylindrical case ! USE multigrid USE csr IMPLICIT NONE INCLUDE 'mpif.h' ! DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) INTEGER, DIMENSION(2) :: n, nidbas, ngauss INTEGER :: modem=22, modep=10 CHARACTER(len=4) :: prb='poly' INTEGER :: levels=1, nu1=1, nu2=1, mu=1, nu0=1, nits CHARACTER(len=4) :: relax='jac ' DOUBLE PRECISION :: omega=2.0d0/3.0d0, tol, rtol LOGICAL :: nluniq=.TRUE. LOGICAL :: nlfixed=.FALSE. DOUBLE PRECISION :: t0, tsetup(2), tdirect, tbsolve, titer, titer_per_step DOUBLE PRECISION :: resid_direct, errdisc_direct DOUBLE PRECISION :: norma, normb ! DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) DOUBLE PRECISION :: dx, dy INTEGER :: ix, iy ! DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: sol_calc_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:) ! DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:) DOUBLE PRECISION, POINTER :: sol_direct_1d(:) ! DOUBLE PRECISION, ALLOCATABLE :: sol_direct_orig(:,:) DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) ! DOUBLE PRECISION, ALLOCATABLE :: sol_orig(:,:) ! INTEGER :: ierr, me INTEGER :: l, nterms INTEGER :: its ! TYPE(grid2d), ALLOCATABLE :: grids(:) TYPE(mg_info) :: info ! NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, prb, & & nu1, nu2, mu, nu0, relax, nits, tol, rtol, nlfixed, & & nluniq, omega !-------------------------------------------------------------------------------- ! 1. Prologue ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Inputs ! n = (/8, 8/) nidbas=(/3,3/) ngauss=(/2,2/) modem = 22 modep = 10 prb='poly' levels=2 nu1 = 1 nu2 = 1 mu = 1 nu0 = 1 nits = 10 tol = 1.e-8 rtol = 1.e-10 relax = 'jac' nlfixed= .FALSE. nluniq = .TRUE. ! IF(me.EQ.0) THEN READ(*,newrun) WRITE(*,newrun) END IF CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nu1, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nu2, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(tol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) ! ! Adjust number of levels and fill mg info. ! levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) info%nu1 = nu1 info%nu2 = nu2 info%mu = mu info%nu0 = nu0 info%levels = levels info%relax = relax info%omega = omega ! ! Create grids ! t0 = mpi_wtime() ! dx = 1.0d0/REAL(n(1),8) dy = 2.0d0*pi/REAL(n(2),8) ALLOCATE(x(0:n(1)), y(0:n(2))) DO ix=0,n(1) x(ix) = ix*dx END DO DO iy=0,n(2) y(iy) = iy*dy END DO ! ALLOCATE(grids(levels)) CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], & & debug_in=.FALSE.) WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) !!$ CALL printmat('** Prolongation matrix in 1st dim.**', grids(2)%transf(1)) !!$ CALL printmat('** Prolongation matrix in 2nd dim.**', grids(2)%transf(2)) ! ! Construct RHS and set BC only on the finest grid ! CALL disrhs(grids(1)%spl, grids(1)%f, rhs) !!$ WRITE(*,'(a/(8(1pe12.3)))') 'Orig RHS at the axis', grids(1)%f(1,1:n(2)) CALL ibcrhs(grids(1), grids(1)%f, nluniq_in=nluniq) !!$ WRITE(*,'(a/(8(1pe12.3)))') 'RHS at the axis', grids(1)%f(1,1:n(2)) ! ! Build FE matrices and set BC ! nterms = 2 DO l=1,levels CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq) CALL to_mat(grids(l)%mata) END DO ! ! Set BC on grid transfer matrices ! CALL ibc_transf(grids, 1, 2) ! Only right boundary on r (1st dim.) tsetup(1) = mpi_wtime()-t0 ! ! Clear and rebuild FE matrices and set BC ! t0 = mpi_wtime() nterms = 2 DO l=1,levels CALL clear_mat(grids(l)%mata) CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms, noinit=.TRUE.) CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq) END DO tsetup(2) = mpi_wtime()-t0 !-------------------------------------------------------------------------------- ! 1. Analytical solution (at the finest grid, l=1) ! ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) sol_anal_grid = sol(grids(1)%x, grids(1)%y) !-------------------------------------------------------------------------------- ! 2. Direct solution (at the finest grid, l=1) ! WRITE(*,'(//a)') 'Direct solution for the finest grid problem' ! ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), & & source=grids(1)%f) ALLOCATE(sol_direct_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) ! sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct ! ! PRINT*, 'shape of sol_direct', SHAPE(sol_direct) PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d) ! t0 = mpi_wtime() sol_direct = grids(1)%f CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) tdirect = mpi_wtime()-t0 ! t0 = mpi_wtime() sol_direct = grids(1)%f CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d) ! sol_direct_orig = sol_direct CALL back_transf(grids(1), sol_direct_orig, nluniq_in=nluniq) errdisc_direct = disc_err(grids(1)%spl, sol_direct_orig, sol) ! tbsolve = mpi_wtime()-t0 ! CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & & [0,0], sol_direct_orig) ! WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', & & errdisc_direct, resid_direct WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid) !-------------------------------------------------------------------------------- ! 3. Test multigrid V-cycle ! WRITE(*,'(/a)') 'Multigrid MG V-cycles ...' ALLOCATE(sol_calc_grid(0:n(1),0:n(2))) ALLOCATE(errdisc(0:nits)) ALLOCATE(resid(0:nits)) ALLOCATE(rresid(0:nits)) ALLOCATE(sol_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) ! ! Norm of A and b ! norma = matnorm(grids(1)%mata) normb = NORM2(grids(1)%f1d) ! WRITE(*, '(a,1pe12.3)') 'Frobenius norm of A', norma WRITE(*, '(a,1pe12.3)') 'Infinity norm of A ', matnorm(grids(1)%mata, 'inf') WRITE(*, '(a,1pe12.3)') '1 norm of A ', matnorm(grids(1)%mata, '1') ! ! Initial guess ! IF(nlfixed) THEN grids(1)%v = sol_direct ELSE grids(1)%v = 0.0d0 END IF ! sol_orig(:,:) = grids(1)%v(:,:) CALL back_transf(grids(1), sol_orig, nluniq_in=nluniq) errdisc(0) = disc_err(grids(1)%spl, sol_orig, sol) ! resid(0) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d) rresid(0) = resid(0) / ( norma*NORM2(grids(1)%v1d) + normb ) WRITE(*,'(a4,3(a12,a8),a12)') 'its', 'residue', 'ratio', 'disc. err', & & 'ratio', 'rel. resid', 'ratio', '||v||' WRITE(*,'(i4,3(1pe12.3,8x),1pe12.3)') 0, resid(0), errdisc(0), rresid(0), NORM2(grids(1)%v1d) ! ! Iterations ! t0 = mpi_wtime() DO its=1,nits CALL mg_cyl(grids, info, 1, nluniq_in=nluniq) resid(its) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d) rresid(its) = resid(its) / ( norma*NORM2(grids(1)%v1d) + normb ) ! sol_orig(:,:) = grids(1)%v(:,:) CALL back_transf(grids(1), sol_orig, nluniq_in=nluniq) errdisc(its) = disc_err(grids(1)%spl, sol_orig, sol) ! WRITE(*,'((i4,3(1pe12.3,0pf8.2)),1pe12.3)') its, & & resid(its), resid(its)/resid(its-1), & & errdisc(its), errdisc(its)/errdisc(its-1), & & rresid(its), rresid(its)/rresid(its-1), & & NORM2(grids(1)%v1d) IF(resid(its) .LE. tol .OR. rresid(its).LE. rtol ) EXIT END DO nits = MIN(nits,its) titer = mpi_wtime() - t0 titer_per_step = titer/REAL(its,8) ! CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_calc_grid, & & [0,0], sol_orig) !-------------------------------------------------------------------------------- ! 9. Epilogue ! ! Display timing ! WRITE(*,'(a,2(1pe12.3))') 'Set up time (s) ', tsetup WRITE(*,'(a,2(1pe12.3))') 'Direct and solve time (s) ', tdirect, tbsolve WRITE(*,'(a,1pe12.3,i5)') 'Iter time (s) ', titer, nits ! ! Creata HDF5 file ! IF(me.EQ.0) CALL h5file ! CALL mpi_finalize(ierr) !-------------------------------------------------------------------------------- CONTAINS !+++ FUNCTION rhs(r, theta) ! ! Return problem RHS ! USE math_util, ONLY : root_bessj DOUBLE PRECISION, INTENT(in) :: r, theta DOUBLE PRECISION :: rhs DOUBLE PRECISION :: nump ! SELECT CASE(TRIM(prb)) CASE('poly') rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta) CASE('bess') nump = root_bessj(modem, modep) rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta) END SELECT END FUNCTION rhs !+++ FUNCTION sol(r, theta) ! ! Return exact problem solution ! USE math_util, ONLY : root_bessj DOUBLE PRECISION, INTENT(in) :: r(:), theta(:) DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta)) DOUBLE PRECISION :: nump INTEGER :: j ! SELECT CASE(TRIM(prb)) CASE('poly') DO j=1,SIZE(theta) sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j)) END DO CASE('bess') nump = root_bessj(modem, modep) DO j=1,SIZE(theta) sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j)) END DO END SELECT END FUNCTION sol !+++ SUBROUTINE coefeq(r, theta, idt, idw, c) ! ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) ! DOUBLE PRECISION, INTENT(in) :: r, theta INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! c(1) = r idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.0d0/r idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq !+++ SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_mg2d_cyl.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', n(1)) CALL attach(fid, '/', 'NY', n(2)) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'MODEM', modem) CALL attach(fid, '/', 'MODEP', modep) CALL attach(fid, '/', 'RELAX', relax) CALL attach(fid, '/', 'NITS', nits) CALL attach(fid, '/', 'LEVELS', levels) CALL attach(fid, '/', 'NU1', nu1) CALL attach(fid, '/', 'NU2', nu2) CALL attach(fid, '/', 'NU0', nu0) CALL attach(fid, '/', 'MU', mu) CALL creatg(fid, '/mglevels') DO l=1,levels WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL creatg(fid, TRIM(dsname)) CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) IF(l.GT.1) THEN CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) END IF CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) END DO ! ! Solutions at finest grid ! CALL creatg(fid, '/solutions') CALL putarr(fid, '/solutions/xg', grids(1)%x) CALL putarr(fid, '/solutions/yg', grids(1)%y) CALL putarr(fid, '/solutions/anal', sol_anal_grid) CALL putarr(fid, '/solutions/calc', sol_calc_grid) CALL putarr(fid, '/solutions/direct', sol_direct_grid) ! CALL creatg(fid, '/Iterations') CALL putarr(fid, '/Iterations/residues', resid(0:nits)) CALL putarr(fid, '/Iterations/rresidues', rresid(0:nits)) CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits)) ! CALL closef(fid) END SUBROUTINE h5file !+++ END PROGRAM diff --git a/multigrid/src/test_mgp.f90 b/multigrid/src/test_mgp.f90 index 26f3f24..c09190d 100644 --- a/multigrid/src/test_mgp.f90 +++ b/multigrid/src/test_mgp.f90 @@ -1,242 +1,242 @@ !> !> @file test_mgp.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test multigrid V-cycle for periodic problems ! USE multigrid IMPLICIT NONE ! INTEGER :: nx=8, nidbas=1, ngauss=2, nits=40 INTEGER :: levels=2, nu1=1, nu2=1, mu=1, nu0=1 CHARACTER(len=4) :: relax='jac ' LOGICAL :: nlfixed = .FALSE. DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0) DOUBLE PRECISION :: omega=2.0d0/3.0d0 INTEGER :: l, nrank, dim, its DOUBLE PRECISION :: errdisc_dir DOUBLE PRECISION, ALLOCATABLE :: u_direct(:), u_exact(:), u_calc(:) DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:), sol_calc(:), sol_grid(:) DOUBLE PRECISION, ALLOCATABLE :: err(:), resid(:), errdisc(:) DOUBLE PRECISION, ALLOCATABLE :: errdisc_fmg(:) ! TYPE(grid1d), ALLOCATABLE :: gridx(:) TYPE(mg_info) :: info ! NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, & & relax, nits, nlfixed, levels, nu1, nu2, mu, nu0 !-------------------------------------------------------------------------------- ! 1. Prologue ! Inputs ! READ(*,newrun) WRITE(*,newrun) ! levels = MIN(levels, get_lmax(nx)) ! info%nu1 = nu1 info%nu2 = nu2 info%mu = mu info%nu0 = nu0 info%levels = levels info%relax = relax info%omega = omega ! ! Create grids ! ALLOCATE(gridx(levels)) CALL create_grid(nx, nidbas, ngauss, 0, gridx, period=.TRUE.) WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels) ! ! Create FE matrice and set BC u(0)=u(1)=0 ! DO l=1,levels CALL femat(gridx(l)%spl, gridx(l)%matap, coefeq) END DO ! ! Construct RHS only on the finest grid ! nrank = gridx(1)%rank ! Rank of the system (number of unknowns) dim = nrank+nidbas ! Dimension of Splines space CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs) !-------------------------------------------------------------------------------- ! 2. Direct solution ! WRITE(*,'(//a)') 'Direct solution for the finest grid problem' ALLOCATE(u_direct(0:nx)) ALLOCATE(sol_direct(nrank)) ALLOCATE(sol_grid(dim)) ! Required by GRIDVAL ! CALL direct_solve(gridx(1), sol_direct) sol_grid(1:nrank) = sol_direct(1:nrank) sol_grid(nrank+1:dim) = sol_direct(1:nidbas) CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_grid) ! errdisc_dir = disc_err(gridx(1)%spl, sol_grid, sol) WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir !-------------------------------------------------------------------------------- ! 3. Solution from MG V-cycles ! WRITE(*,'(//a)') 'Multigrid MG V-cycles' ALLOCATE(sol_calc(nrank)) ALLOCATE(err(0:nits)) ALLOCATE(errdisc(0:nits)) ALLOCATE(resid(0:nits)) ! ! Initial guess ! sol_calc(:) = 0.0d0 sol_grid(:) = 0.0d0 IF(nlfixed) THEN sol_calc(:) = sol_direct(:) sol_grid(1:nrank) = sol_calc(1:nrank) sol_grid(nrank+1:dim) = sol_calc(1:nidbas) END IF gridx(1)%v(:) = sol_calc(:) err(0) = normf(gridx(1)%matmp, sol_calc-sol_direct) errdisc(0) = disc_err(gridx(1)%spl, sol_grid, sol) resid(0) = residue(gridx(1)%matap, gridx(1)%f, sol_calc) ! ! Iterations ! DO its=1,nits ! CALL mg(gridx, info, 1) sol_calc(:) = gridx(1)%v(:) sol_grid(1:nrank) = sol_calc(1:nrank) sol_grid(nrank+1:dim) = sol_calc(1:nidbas) ! err(its) = normf(gridx(1)%matmp, sol_calc-sol_direct) errdisc(its) = disc_err(gridx(1)%spl, sol_grid, sol) ! will call GRIDVAL resid(its) = residue(gridx(1)%matap, gridx(1)%f, sol_calc) END DO ! WRITE(*,'(a4,3(a12,a8))') 'its', 'error', 'ratio', 'residue', 'ratio', & & 'disc. err', 'ratio' WRITE(*,'(i4,3(1pe12.3,8x))') 0, err(0), resid(0), errdisc(0) WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') (its, err(its), err(its)/err(its-1), & & resid(its), resid(its)/resid(its-1), & & errdisc(its), errdisc(its)/errdisc(its-1), its=1,nits) !-------------------------------------------------------------------------------- ! 4. Solution from FMG ! WRITE(*,'(//a)') 'Full Multigrid' ALLOCATE(errdisc_fmg(nits)) DO its=1,nits info%nu0 = its ! CALL fmg(gridx, info, 1) sol_calc(:) = gridx(1)%v(:) sol_grid(1:nrank) = sol_calc(1:nrank) sol_grid(nrank+1:dim) = sol_calc(1:nidbas) ! errdisc_fmg(its) = disc_err(gridx(1)%spl, sol_grid, sol) ! will call GRIDVAL resid(its) = residue(gridx(1)%matap, gridx(1)%f, sol_calc) END DO WRITE(*,'(a4,2(a12,a8))') 'nu0', 'residue', 'ratio','disc. err', 'ratio' WRITE(*,'((i4,2(1pe12.3,0pf8.3)))') (its, resid(its), resid(its)/resid(its-1), & & errdisc_fmg(its),errdisc_fmg(its)/errdisc_dir, its=1,nits) ! ! Grid values at final iteration ! ALLOCATE(u_exact(0:nx)) ALLOCATE(u_calc(0:nx)) u_exact = sol(gridx(1)%x) CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_grid) !-------------------------------------------------------------------------------- ! 9. Epilogue ! ! Creata HDF5 file ! CALL h5file !-------------------------------------------------------------------------------- CONTAINS FUNCTION rhs(x) DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION :: rhs rhs = SIN(pi*kmode*x) END FUNCTION rhs FUNCTION sol(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sol(SIZE(x)) sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:)) END FUNCTION sol SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) c(1) = 1.0d0 idt(1) = 1 idw(1) = 1 c(2) = sigma idt(2) = 0 idw(2) = 0 END SUBROUTINE coefeq SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_mgp.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL attach(fid, '/', 'SIGMA', sigma) CALL attach(fid, '/', 'KMODE', kmode) CALL attach(fid, '/', 'RELAX', relax) CALL attach(fid, '/', 'NITS', nits) CALL attach(fid, '/', 'NLFIXED', nlfixed) CALL attach(fid, '/', 'LEVELS', levels) CALL attach(fid, '/', 'NU1', nu1) CALL attach(fid, '/', 'NU2', nu2) CALL attach(fid, '/', 'NU0', nu0) CALL attach(fid, '/', 'MU', mu) CALL creatg(fid, '/mglevels') DO l=1,levels WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL creatg(fid, TRIM(dsname)) CALL putarr(fid, TRIM(dsname)//'/mata', gridx(l)%matap%val) IF(l.GT.1) THEN CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val) CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows) CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols) END IF CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f) CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v) END DO CALL creatg(fid, '/Iterations') CALL putarr(fid, '/Iterations/errors', err) CALL putarr(fid, '/Iterations/residues', resid) CALL putarr(fid, '/Iterations/disc_errors', errdisc) CALL putarr(fid, '/Iterations/disc_errors_fmg', errdisc_fmg) CALL putarr(fid, '/Iterations/xgrid', gridx(1)%x) CALL putarr(fid, '/Iterations/u_direct', u_direct) CALL putarr(fid, '/Iterations/u_exact', u_exact) CALL putarr(fid, '/Iterations/u_calc', u_calc) CALL closef(fid) END SUBROUTINE h5file END PROGRAM main diff --git a/multigrid/src/test_relax.f90 b/multigrid/src/test_relax.f90 index 9d00c66..c9de563 100644 --- a/multigrid/src/test_relax.f90 +++ b/multigrid/src/test_relax.f90 @@ -1,227 +1,227 @@ !> !> @file test_relax.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test different relaxations ! USE multigrid USE math_util, ONLY : root_bessj IMPLICIT NONE ! INTEGER :: nx=8, nidbas=1, alpha=0, nits=40 INTEGER :: modem=22, modep=10 CHARACTER(len=4) :: relax='jac ' LOGICAL :: nlfixed = .FALSE. DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0) DOUBLE PRECISION :: omega=2.0d0/3.0d0 INTEGER :: ngauss, i, nrank, its DOUBLE PRECISION, ALLOCATABLE :: u_exact(:), u_direct(:), u_calc(:) DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:) DOUBLE PRECISION, ALLOCATABLE :: sol_calc(:), err(:), resid(:), errdisc(:) DOUBLE PRECISION :: errdisc_dir ! TYPE(grid1d) :: gridx(1) ! NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, & & alpha, relax, omega, nits, nlfixed !-------------------------------------------------------------------------------- ! 1. Prologue: read input, construct matrix and RHS READ(*,newrun) WRITE(*,newrun) ! ! Set grid ! CALL create_grid(nx, nidbas, ngauss, alpha, gridx) ! ! Create FE matrice and set BC u(0)=u(1)=0 ! CALL femat(gridx(1)%spl, gridx(1)%mata, coefeq) nrank = gridx(1)%rank ! ! Left Dirichlet BC (only for Cartesian geometry) IF(alpha .EQ. 0) THEN CALL ibcmat(1, gridx(1)%mata) END IF ! ! Right Dirichlet BC CALL ibcmat(nrank, gridx(1)%mata) ! ! Construct RHS ! CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs) ! ! Left Dirichlet BC (only for Cartesian geometry) IF(alpha .EQ. 0) THEN gridx(1)%f(1) = 0.0d0 END IF ! ! Right Dirichlet BC gridx(1)%f(nrank) = 0.0d0 !-------------------------------------------------------------------------------- ! 2. Direct solution ! ! Direct solutions ! ALLOCATE(sol_direct(nrank)) CALL direct_solve(gridx(1), sol_direct) ! ! Grid values ! ALLOCATE(u_exact(0:nx)) ALLOCATE(u_direct(0:nx)) ALLOCATE(u_calc(0:nx)) ! u_exact = sol(gridx(1)%x) CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_direct) errdisc_dir = disc_err(gridx(1)%spl, sol_direct, sol) WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir !-------------------------------------------------------------------------------- ! 3. Relaxation ! ALLOCATE(sol_calc(nrank)) ALLOCATE(err(0:nits)) ALLOCATE(errdisc(0:nits)) ALLOCATE(resid(0:nits)) ! ! Initial guess sol_calc(:) = 0.0d0 IF(nlfixed) THEN sol_calc(:) = sol_direct(:) END IF err(0) = normf(gridx(1)%matm, sol_calc-sol_direct) errdisc(0) = disc_err(gridx(1)%spl, sol_calc, sol) resid(0) = residue(gridx(1)%mata, gridx(1)%f, sol_calc) ! ! Iterations DO its=1,nits SELECT CASE (TRIM(relax)) CASE('jac') CALL jacobi(gridx(1)%mata, omega, 1, sol_calc, gridx(1)%f) CASE('gs') CALL gs(gridx(1)%mata, 1, sol_calc, gridx(1)%f) END SELECT err(its) = normf(gridx(1)%matm, sol_calc-sol_direct) errdisc(its) = disc_err(gridx(1)%spl, sol_calc, sol) resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc) END DO CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_calc) ! WRITE(*,'(/a4,3a12)') 'its', 'error', 'residue', 'disc. err' WRITE(*,'(i4,3(1pe12.3))') 0, err(0), resid(0), errdisc(0) WRITE(*,'((i4,6(1pe12.3)))') (its, err(its), resid(its), errdisc(its), & & err(its)/err(its-1), resid(its)/resid(its-1), & & errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10)) ! CALL h5file !-------------------------------------------------------------------------------- CONTAINS !+++ FUNCTION rhs(x) DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION :: rhs DOUBLE PRECISION :: nump SELECT CASE (alpha) CASE(0) ! Cartesian geometry rhs = SIN(pi*kmode*x) CASE(1) ! Cylindrical nump = root_bessj(modem, modep) rhs = x * nump**2 * bessel_jn(modem, nump*x) END SELECT END FUNCTION rhs !+++ FUNCTION sol(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sol(SIZE(x)) DOUBLE PRECISION :: nump SELECT CASE (alpha) CASE(0) ! Cartesian geometry sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:)) CASE(1) ! Cylindrical nump = root_bessj(modem, modep) sol(:) = bessel_jn(modem, nump*x(:)) END SELECT END FUNCTION sol !+++ SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) SELECT CASE (alpha) CASE(0) ! Cartesian geometry c(1) = 1.0d0 idt(1) = 1 idw(1) = 1 c(2) = sigma idt(2) = 0 idw(2) = 0 CASE(1) ! Cylindrical c(1) = x idt(1) = 1 idw(1) = 1 c(2) = REAL(modem,8)**2/x idt(2) = 0 idw(2) = 0 END SELECT END SUBROUTINE coefeq !+++ SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_relax.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL attach(fid, '/', 'KX', kmode) CALL attach(fid, '/', 'SIGMA', sigma) CALL attach(fid, '/', 'ALPHA', alpha) CALL attach(fid, '/', 'RELAX', relax) CALL attach(fid, '/', 'OMEGA', omega) CALL attach(fid, '/', 'NITS', nits) CALL attach(fid, '/', 'MODEM', modem) CALL attach(fid, '/', 'MODEP', modep) ! ! Solutions at finest grid ! CALL creatg(fid, '/solutions') CALL putarr(fid, '/solutions/xg', gridx(1)%x) CALL putarr(fid, '/solutions/direct', u_direct) CALL putarr(fid, '/solutions/anal', u_exact) CALL putarr(fid, '/solutions/calc', u_calc) ! CALL creatg(fid, '/relaxation') CALL putarr(fid, '/relaxation/errdisc', errdisc) CALL putarr(fid, '/relaxation/resid', resid) ! ! Store FE matrix ! CALL putmat(fid, '/MATA', gridx(1)%mata) ! CALL closef(fid) END SUBROUTINE h5file !+++ END PROGRAM main diff --git a/multigrid/src/test_relax2d.f90 b/multigrid/src/test_relax2d.f90 index 09583c5..932dfd5 100644 --- a/multigrid/src/test_relax2d.f90 +++ b/multigrid/src/test_relax2d.f90 @@ -1,334 +1,334 @@ !> !> @file test_relax2d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test 2d direcxt solve and relaxation methods ! USE multigrid USE csr IMPLICIT NONE INCLUDE 'mpif.h' ! DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0 INTEGER :: levels=1, nits=1000 CHARACTER(len=4) :: relax='jac ' DOUBLE PRECISION :: omega=2.0d0/3.0d0 DOUBLE PRECISION :: t0 ! DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) DOUBLE PRECISION :: dx, dy INTEGER :: ix, iy ! DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:), sol_relax(:,:) DOUBLE PRECISION, POINTER :: sol_direct_1d(:), sol_relax_1d(:) DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: resid(:), errdisc(:) ! INTEGER :: ierr, me INTEGER :: l, nterms INTEGER :: its ! TYPE(grid2d), ALLOCATABLE :: grids(:) ! NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels, nits, relax !-------------------------------------------------------------------------------- ! 1. Prologue ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Inputs ! n = (/8, 8/) nidbas=(/3,3/) ngauss=(/2,2/) alpha = (/0,0/) kx=4 ky=3 sigma=10.0d0 levels=2 relax='jac' nits=100 ! IF(me.EQ.0) THEN READ(*,newrun) WRITE(*,newrun) END IF CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(relax, LEN(relax), MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) ! ! Adjust number of levels ! levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) ! ! Create grids ! dx = 1.0d0/REAL(n(1),8) dy = 1.0d0/REAL(n(2),8) ALLOCATE(x(0:n(1)), y(0:n(2))) DO ix=0,n(1) x(ix) = ix*dx END DO DO iy=0,n(2) y(iy) = iy*dy END DO ! ALLOCATE(grids(levels)) CALL create_grid(x, y, nidbas, ngauss, alpha, grids) WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) ! ! Construct RHS and set BC only on the finest grid ! CALL disrhs(grids(1)%spl, grids(1)%f, rhs) CALL ibcrhs(grids(1), grids(1)%f) !!$ CALL printmat('** RHS **', grids(1)%f) ! ! Build FE matrices and set BC ! nterms = 3 DO l=1,levels CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) CALL ibcmat(grids(l), grids(l)%mata) CALL to_mat(grids(l)%mata) END DO !-------------------------------------------------------------------------------- ! 1. Direct solution (at the finest grid, l=1) ! WRITE(*,'(//a)') 'Direct solution for the finest grid problem' ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), & & source=grids(1)%f) sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct PRINT*, 'shape of sol_direct', SHAPE(sol_direct) PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d) ! t0 = mpi_wtime() CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) WRITE(*,'(a,1pe12.3)') 'Fact. + solve time (s) =', mpi_wtime()-t0 ! sol_direct = grids(1)%f t0 = mpi_wtime() CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) WRITE(*,'(a,1pe12.3)') 'Solve time (s) =', mpi_wtime()-t0 ! ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & & [0,0], sol_direct) sol_anal_grid = sol(grids(1)%x, grids(1)%y) WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', & & disc_err(grids(1)%spl, sol_direct, sol), & & residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d) !-------------------------------------------------------------------------------- ! 2. Relaxation (at the finest grid, l=1) ! ALLOCATE(errdisc(0:nits)) ALLOCATE(resid(0:nits)) ALLOCATE(sol_relax(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) sol_relax_1d(1:SIZE(grids(1)%v1d)) => sol_relax ! sol_relax_1d = 0.0d0 errdisc(0) = disc_err(grids(1)%spl, sol_relax, sol) resid(0) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d) t0 = mpi_wtime() DO its=1,nits SELECT CASE (TRIM(relax)) CASE('jac') CALL jacobi(grids(1)%mata, omega, 1, sol_relax_1d, grids(1)%f1d) CASE('gs') CALL gs(grids(1)%mata, 1, sol_relax_1d, grids(1)%f1d) END SELECT errdisc(its) = disc_err(grids(1)%spl, sol_relax, sol) resid(its) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d) END DO WRITE(*,'(a,1pe12.3)') 'Iterative solve time (s/iteration) =', (mpi_wtime()-t0)/REAL(nits,8) ! WRITE(*,'(/a4,3a12)') 'its', 'residue', 'disc. err' WRITE(*,'(i4,3(1pe12.3))') 0, resid(0), errdisc(0) WRITE(*,'((i4,4(1pe12.3)))') (its, resid(its), errdisc(its), & & resid(its)/resid(its-1), & & errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10)) !-------------------------------------------------------------------------------- ! 9. Epilogue ! ! Creata HDF5 file ! IF(me.EQ.0) CALL h5file ! CALL mpi_finalize(ierr) !-------------------------------------------------------------------------------- CONTAINS !+++ FUNCTION rhs(x, y) ! ! Return problem RHS ! DOUBLE PRECISION, INTENT(in) :: x, y DOUBLE PRECISION :: rhs rhs = SIN(PI*kx*x)*SIN(PI*ky*y) END FUNCTION rhs !+++ FUNCTION sol(x, y) ! ! Return exact problem solution ! DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: sol(SIZE(x),SIZE(y)) DOUBLE PRECISION :: c INTEGER :: j DO j=1,SIZE(y) c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2) sol(:,j) = c * SIN(PI*kx*x(:)) END DO END FUNCTION sol !+++ SUBROUTINE coefeq(x, y, idt, idw, c) ! ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) ! DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! c(1) = 1.0d0 idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.0d0 idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 ! c(3) = sigma idt(3,1) = 0 idt(3,2) = 0 idw(3,1) = 0 idw(3,2) = 0 END SUBROUTINE coefeq !+++ SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_relax2d.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', n(1)) CALL attach(fid, '/', 'NY', n(2)) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'KX', kx) CALL attach(fid, '/', 'KY', ky) CALL attach(fid, '/', 'SIGMA', sigma) CALL attach(fid, '/', 'ALPHA1', alpha(1)) CALL attach(fid, '/', 'ALPHA2', alpha(2)) CALL attach(fid, '/', 'LEVELS', levels) CALL attach(fid, '/', 'RELAX', relax) CALL attach(fid, '/', 'NITS', nits) CALL creatg(fid, '/mglevels') DO l=1,levels WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL creatg(fid, TRIM(dsname)) CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) IF(l.GT.1) THEN CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) END IF CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) END DO ! ! Solutions at finest grid ! CALL creatg(fid, '/solutions') CALL putarr(fid, '/solutions/xg', grids(1)%x) CALL putarr(fid, '/solutions/yg', grids(1)%y) CALL putarr(fid, '/solutions/direct', sol_direct_grid) CALL putarr(fid, '/solutions/anal', sol_anal_grid) ! CALL creatg(fid, '/relaxation') CALL putarr(fid, '/relaxation/errdisc', errdisc) CALL putarr(fid, '/relaxation/resid', resid) ! IF(ALLOCATED(grids(1)%mata%mumps)) THEN CALL myputmat(fid, '/MUMPS', grids(1)%mata%mumps) END IF ! CALL closef(fid) END SUBROUTINE h5file !+++ SUBROUTINE myputmat(fid, label, mat, str) ! ! Write matrix to hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(mumps_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str CHARACTER(len=128) :: mumps_grp ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'NNZ', mat%nnz) CALL attach(fid, label, 'NLSYM', mat%nlsym) CALL attach(fid, label, 'NLPOS', mat%nlpos) CALL putarr(fid, TRIM(label)//'/irow', mat%irow) CALL putarr(fid, TRIM(label)//'/cols', mat%mumps_par%JCN_loc) CALL putarr(fid, TRIM(label)//'/val', mat%mumps_par%A_loc) ! mumps_grp = TRIM(label)//'/mumps_par' CALL creatg(fid, mumps_grp) CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR) CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM) CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN_loc) ! END SUBROUTINE myputmat END PROGRAM diff --git a/multigrid/src/test_relax2d_cyl.f90 b/multigrid/src/test_relax2d_cyl.f90 index c02b199..d26d308 100644 --- a/multigrid/src/test_relax2d_cyl.f90 +++ b/multigrid/src/test_relax2d_cyl.f90 @@ -1,369 +1,369 @@ !> !> @file test_relax2d_cyl.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test 2d direcxt solve and relaxation methods ! Cylindrical case ! USE multigrid USE csr IMPLICIT NONE INCLUDE 'mpif.h' ! DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) INTEGER, DIMENSION(2) :: n, nidbas, ngauss INTEGER :: modem=22, modep=10 INTEGER :: levels=1, nits=1000 CHARACTER(len=4) :: relax='jac ', prb='poly' DOUBLE PRECISION :: omega=2.0d0/3.0d0 LOGICAL :: nluniq=.TRUE. LOGICAL :: nlfixed=.FALSE. DOUBLE PRECISION :: t0 DOUBLE PRECISION :: resid_direct, errdisc_direct ! DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) DOUBLE PRECISION :: dx, dy INTEGER :: ix, iy ! DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:), sol_relax(:,:) DOUBLE PRECISION, POINTER :: sol_direct_1d(:), sol_relax_1d(:) DOUBLE PRECISION, ALLOCATABLE :: sol_direct_orig(:,:), sol_relax_orig(:,:) DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: sol_relax_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: resid(:), errdisc(:) ! INTEGER :: ierr, me INTEGER :: l, nterms, j INTEGER :: its ! TYPE(grid2d), ALLOCATABLE :: grids(:) ! NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, omega, nits, & & relax, prb, nlfixed, nluniq !-------------------------------------------------------------------------------- ! 1. Prologue ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Inputs ! n = (/8, 8/) nidbas=(/3,3/) ngauss=(/2,2/) modem = 22 modep = 10 levels=2 relax='jac' prb='poly' nits=100 nluniq = .TRUE. ! IF(me.EQ.0) THEN READ(*,newrun) WRITE(*,newrun) END IF CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(relax, LEN(relax), MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) ! ! Adjust number of levels ! levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) ! ! Create grids ! dx = 1.0d0/REAL(n(1),8) dy = 2.0d0*pi/REAL(n(2),8) ALLOCATE(x(0:n(1)), y(0:n(2))) DO ix=0,n(1) x(ix) = ix*dx END DO DO iy=0,n(2) y(iy) = iy*dy END DO ! ALLOCATE(grids(levels)) CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], & & debug_in=.FALSE.) WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) ! ! Construct RHS and set BC only on the finest grid ! CALL disrhs(grids(1)%spl, grids(1)%f, rhs) CALL ibcrhs(grids(1), grids(1)%f, nluniq_in=nluniq) !!$ CALL printmat('** RHS **', grids(1)%f) ! ! Build FE matrices and set BC ! nterms = 2 DO l=1,levels CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq) CALL to_mat(grids(l)%mata) END DO !-------------------------------------------------------------------------------- ! 1. Direct solution (at the finest grid, l=1) ! WRITE(*,'(//a)') 'Direct solution for the finest grid problem' ! ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), & & source=grids(1)%f) sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct ! ALLOCATE(sol_direct_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) ! PRINT*, 'shape of sol_direct', SHAPE(sol_direct) PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d) ! t0 = mpi_wtime() CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) WRITE(*,'(a,1pe12.3)') 'Fact. + solve time (s) =', mpi_wtime()-t0 ! sol_direct = grids(1)%f t0 = mpi_wtime() CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d) ! sol_direct_orig = sol_direct CALL back_transf(grids(1), sol_direct_orig, nluniq_in=nluniq) errdisc_direct = disc_err(grids(1)%spl, sol_direct_orig, sol) ! WRITE(*,'(a,1pe12.3)') 'Solve time (s) =', mpi_wtime()-t0 ! ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) ! CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & & [0,0], sol_direct_orig) ! sol_anal_grid = sol(grids(1)%x, grids(1)%y) WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', & & errdisc_direct, resid_direct WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid) !-------------------------------------------------------------------------------- ! 2. Relaxation (at the finest grid, l=1) ! ALLOCATE(errdisc(0:nits)) ALLOCATE(resid(0:nits)) ALLOCATE(sol_relax(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) ALLOCATE(sol_relax_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) sol_relax_1d(1:SIZE(grids(1)%v1d)) => sol_relax ! ! Initial guess ! IF(nlfixed) THEN sol_relax = sol_direct ! Test fixed point\ ELSE sol_relax = 0.0d0 END IF ! sol_relax_orig = sol_relax CALL back_transf(grids(1), sol_relax_orig, nluniq_in=nluniq) errdisc(0) = disc_err(grids(1)%spl, sol_relax_orig, sol) resid(0) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d) ! t0 = mpi_wtime() DO its=1,nits SELECT CASE (TRIM(relax)) CASE('jac') CALL jacobi(grids(1)%mata, omega, 1, sol_relax_1d, grids(1)%f1d) CASE('gs') CALL gs(grids(1)%mata, 1, sol_relax_1d, grids(1)%f1d) END SELECT resid(its) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d) ! sol_relax_orig = sol_relax CALL back_transf(grids(1), sol_relax_orig, nluniq_in=nluniq) ! errdisc(its) = disc_err(grids(1)%spl, sol_relax_orig, sol) END DO WRITE(*,'(a,1pe12.3)') 'Iterative solve time (s/iteration) =', (mpi_wtime()-t0)/REAL(nits,8) ! ALLOCATE(sol_relax_grid(0:n(1),0:n(2))) CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_relax_grid, & & [0,0], sol_relax_orig) ! WRITE(*,'(/a4,3a12)') 'its', 'residue', 'disc. err' WRITE(*,'(i4,3(1pe12.3))') 0, resid(0), errdisc(0) WRITE(*,'((i4,4(1pe12.3)))') (its, resid(its), errdisc(its), & & resid(its)/resid(its-1), & & errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10)) !-------------------------------------------------------------------------------- ! 9. Epilogue ! ! Creata HDF5 file ! IF(me.EQ.0) CALL h5file ! CALL mpi_finalize(ierr) !-------------------------------------------------------------------------------- CONTAINS !+++ FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 !+++ FUNCTION rhs(r, theta) ! ! Return problem RHS ! USE math_util, ONLY : root_bessj DOUBLE PRECISION, INTENT(in) :: r, theta DOUBLE PRECISION :: rhs DOUBLE PRECISION :: nump ! SELECT CASE(TRIM(prb)) CASE('poly') rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta) CASE('bess') nump = root_bessj(modem, modep) rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta) END SELECT END FUNCTION rhs !+++ FUNCTION sol(r, theta) ! ! Return exact problem solution ! USE math_util, ONLY : root_bessj DOUBLE PRECISION, INTENT(in) :: r(:), theta(:) DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta)) DOUBLE PRECISION :: nump INTEGER :: j ! SELECT CASE(TRIM(prb)) CASE('poly') DO j=1,SIZE(theta) sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j)) END DO CASE('bess') nump = root_bessj(modem, modep) DO j=1,SIZE(theta) sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j)) END DO END SELECT END FUNCTION sol !+++ SUBROUTINE coefeq(r, theta, idt, idw, c) ! ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) ! DOUBLE PRECISION, INTENT(in) :: r, theta INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! c(1) = r idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.0d0/r idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq !+++ SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_relax2d_cyl.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', n(1)) CALL attach(fid, '/', 'NY', n(2)) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'MODEM', modem) CALL attach(fid, '/', 'MODEP', modep) CALL attach(fid, '/', 'LEVELS', levels) CALL attach(fid, '/', 'RELAX', relax) CALL attach(fid, '/', 'NITS', nits) CALL attach(fid, '/', 'OMEGA', omega) CALL creatg(fid, '/mglevels') DO l=1,levels WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL creatg(fid, TRIM(dsname)) CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) IF(l.GT.1) THEN CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) END IF CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) END DO ! ! Solutions at finest grid ! CALL creatg(fid, '/solutions') CALL putarr(fid, '/solutions/xg', grids(1)%x) CALL putarr(fid, '/solutions/yg', grids(1)%y) CALL putarr(fid, '/solutions/direct', sol_direct_grid) CALL putarr(fid, '/solutions/relax', sol_relax_grid) CALL putarr(fid, '/solutions/anal', sol_anal_grid) ! CALL creatg(fid, '/relaxation') CALL putarr(fid, '/relaxation/errdisc', errdisc) CALL putarr(fid, '/relaxation/resid', resid) ! CALL closef(fid) END SUBROUTINE h5file !+++ END PROGRAM diff --git a/multigrid/src/test_stencil.f90 b/multigrid/src/test_stencil.f90 index e1a0dc0..934af01 100644 --- a/multigrid/src/test_stencil.f90 +++ b/multigrid/src/test_stencil.f90 @@ -1,238 +1,238 @@ !> !> @file test_stencil.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE mod USE iso_fortran_env, ONLY : real64 IMPLICIT NONE ! INTEGER, PARAMETER :: rkind = real64 LOGICAL, PARAMETER :: nldebug=.FALSE. REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0) CONTAINS END MODULE mod !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ PROGRAM main USE mpi USE pputils2, ONLY : dist1d, exchange, norm2_vec=>ppnorm2 USE stencil, ONLY : stencil_2d, init, laplacian, vmx, putmat USE mod IMPLICIT NONE ! INTEGER, PARAMETER :: ndims=2 ! INTEGER :: me, neighs(4), npes, ierr INTEGER, DIMENSION(ndims) :: dims=[0,0] INTEGER, DIMENSION(ndims) :: coords, comm1d LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] LOGICAL :: reorder =.FALSE. INTEGER :: comm_cart ! INTEGER :: nx=4, ny=4 ! Number of intervals INTEGER, DIMENSION(ndims) :: e, s, lb, ub, npt_glob, npt_loc ! REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:) REAL(rkind), ALLOCATABLE :: arr(:,:), fexact(:,:) REAL(rkind), ALLOCATABLE :: barr1(:,:), barr2(:,:), barr3(:,:) REAL(rkind) :: dx, dy REAL(rkind) :: err INTEGER, DIMENSION(5,2) :: id ! 5-point stencil INTEGER :: npoints TYPE(stencil_2d) :: mat INTEGER :: i, j ! NAMELIST /in/ nx, ny !================================================================================ ! 1.0 Prologue ! ! 2D process grid CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_dims_create(npes, ndims, dims, ierr) CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& & ierr) ! CALL mpi_comm_rank(comm_cart, me, ierr) CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr) CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr) ! CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) ! ! Read problem inputs IF(me.EQ.0) THEN READ(*,in) WRITE(*,in) END IF CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr) CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr) !================================================================================ ! 2.0 2d Grid construction ! ! Partition 2D grid npt_glob(1) = nx+1 npt_glob(2) = ny+1 DO i=1,ndims CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) e(i) = s(i) + npt_loc(i) - 1 END DO WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', & & (coords(i),s(i),e(i),i=1,ndims) ! ! Global mesh dx = 1.0d0/REAL(nx) dy = 1.0d0/REAL(ny) ALLOCATE(xgrid(0:nx)) ALLOCATE(ygrid(0:ny)) xgrid = [ (i*dx, i=0,nx) ] ygrid = [ (i*dy, i=0,ny) ] !================================================================================ ! 3.0 FD Laplacian ! id=RESHAPE([ 0, -1, 0, 1, 0, & 0, 0,-1, 0, 1], & [5,2]) npoints = 5 CALL init(s, e, id, .FALSE., mat, comm_cart) ! CALL laplacian(dx, dy, mat) !================================================================================ ! 4.0 Check matrice-vector product ! ! Local arrays with ghost cells lb = mat%s-1 ub = mat%e+1 ALLOCATE(arr(lb(1):ub(1),lb(2):ub(2))) ALLOCATE(fexact(lb(1):ub(1),lb(2):ub(2))) ALLOCATE(barr1(lb(1):ub(1),lb(2):ub(2))) ALLOCATE(barr2(lb(1):ub(1),lb(2):ub(2))) ALLOCATE(barr3(lb(1):ub(1),lb(2):ub(2))) ! ! Constant vector => Laplacian = 0 barr1 = 0 arr = 1.0 barr1 = vmx(mat,arr) IF(mat%s(1).EQ.0) barr1(0,:) = 0.0 ! discard boundary values IF(mat%e(1).EQ.nx) barr1(nx,:) = 0.0 IF(mat%s(2).EQ.0) barr1(:,0) = 0.0 IF(mat%e(2).EQ.ny) barr1(:,ny) = 0.0 err = norm2_vec(barr1,comm_cart,root=0,garea=[1,1]) IF(me.EQ.0) THEN WRITE(*,'(/a,1pe12.3)') 'Constant vector: ||B1|| =', err END IF ! ! Bilinear vector => Laplacian = 0 arr =0.0d0 barr2=0.0d0 DO j=mat%s(2),mat%e(2) DO i=mat%s(1),mat%e(1) arr(i,j) = xgrid(i)*ygrid(j) END DO END DO CALL exchange(comm_cart, arr) barr2 = vmx(mat,arr) IF(mat%s(1).EQ.0) barr2(0,:) = 0.0 ! discard boundary values IF(mat%e(1).EQ.nx) barr2(nx,:) = 0.0 IF(mat%s(2).EQ.0) barr2(:,0) = 0.0 IF(mat%e(2).EQ.ny) barr2(:,ny) = 0.0 err = norm2_vec(barr2, comm_cart,root=0,garea=[1,1]) IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3)') 'Bilinear vector: ||B2|| =', err END IF ! ! Biquadratic vector => Laplacian = fexact DO j=mat%s(2),mat%e(2) DO i=mat%s(1),mat%e(1) arr(i,j) = (xgrid(i)*ygrid(j))**2/4.0d0 fexact(i,j) = (xgrid(i)**2 + ygrid(j)**2)/2.0d0 END DO END DO CALL exchange(comm_cart, arr) CALL exchange(comm_cart, fexact) barr3 = vmx(mat,arr) - fexact IF(mat%s(1).EQ.0) barr3(0,:) = 0.0 ! discard boundary values IF(mat%e(1).EQ.nx) barr3(nx,:) = 0.0 IF(mat%s(2).EQ.0) barr3(:,0) = 0.0 IF(mat%e(2).EQ.ny) barr3(:,ny) = 0.0 err = norm2_vec(barr3,comm_cart,root=0,garea=[1,1]) IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3)') 'Biquadratic vector: ||B3|| =', err END IF !================================================================================ ! 9.0 Epilogue CALL h5file CALL MPI_FINALIZE(ierr) CONTAINS SUBROUTINE disp(str, arr) CHARACTER(len=*), INTENT(in) :: str REAL(rkind), INTENT(in) :: arr(:,:) INTEGER :: j WRITE(*,'(/a)') str DO j=1,SIZE(arr,2) WRITE(*,'(10f8.3)') arr(:,j) END DO END SUBROUTINE disp ! SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_stencil.h5' INTEGER :: fid CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart) CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO CALL putarrnd(fid, '/barr1', barr1,(/1,2/), garea=(/1,1/)) CALL putarrnd(fid, '/barr2', barr2,(/1,2/), garea=(/1,1/)) CALL putarrnd(fid, '/barr3', barr3,(/1,2/), garea=(/1,1/)) CALL putmat(fid, '/MAT', mat) CALL closef(fid) END SUBROUTINE h5file ! FUNCTION outerprod(x, y) RESULT(r) ! ! outer product ! REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: r(SIZE(x),SIZE(y)) INTEGER :: i, j DO j=1,SIZE(y) DO i=1,SIZE(x) r(i,j) = x(i)*y(j) END DO END DO END FUNCTION outerprod ! FUNCTION rhs(x,y) REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: rhs(SIZE(x),SIZE(y)) rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y)) END FUNCTION rhs ! FUNCTION exact(x,y) REAL(rkind), INTENT(in) :: x(:), y(:) REAL(rkind) :: exact(SIZE(x),SIZE(y)) exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y)) END FUNCTION exact END PROGRAM main diff --git a/multigrid/src/test_stencilg.f90 b/multigrid/src/test_stencilg.f90 index 0b920cc..0696dd2 100644 --- a/multigrid/src/test_stencilg.f90 +++ b/multigrid/src/test_stencilg.f90 @@ -1,203 +1,203 @@ !> !> @file test_stencilg.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE mod USE iso_fortran_env, ONLY : rkind => real64 IMPLICIT NONE ! LOGICAL, PARAMETER :: nldebug=.FALSE. REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0) CONTAINS END MODULE mod !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ PROGRAM main USE mpi USE pputils2, ONLY : dist1d USE gvector, ONLY : gvector_2d, ASSIGNMENT(=), OPERATOR(-) USE parmg, ONLY : exchange, norm_vec USE stencil, ONLY : stencil_2d, init, laplacian, putmat, OPERATOR(*) USE mod IMPLICIT NONE ! INTEGER, PARAMETER :: ndims=2 ! INTEGER :: me, neighs(4), npes, ierr INTEGER, DIMENSION(ndims) :: dims=[0,0] INTEGER, DIMENSION(ndims) :: coords, comm1d LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] LOGICAL :: reorder =.FALSE. INTEGER :: comm_cart ! INTEGER :: nx=4, ny=4 ! Number of intervals INTEGER, DIMENSION(ndims) :: e, s, g, npt_glob, npt_loc ! REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:) TYPE(gvector_2d) :: arr, fexact TYPE(gvector_2d) :: barr1, barr2, barr3 REAL(rkind) :: dx, dy REAL(rkind) :: err INTEGER, DIMENSION(5,2) :: id ! 5-point stencil INTEGER :: npoints TYPE(stencil_2d) :: mat INTEGER :: i, j ! NAMELIST /in/ nx, ny !================================================================================ ! 1.0 Prologue ! ! 2D process grid CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_dims_create(npes, ndims, dims, ierr) CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& & ierr) ! CALL mpi_comm_rank(comm_cart, me, ierr) CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr) CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr) ! CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) ! ! Read problem inputs IF(me.EQ.0) THEN READ(*,in) WRITE(*,in) END IF CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr) CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr) !================================================================================ ! 2.0 2d Grid construction ! ! Partition 2D grid npt_glob(1) = nx+1 npt_glob(2) = ny+1 DO i=1,ndims CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) e(i) = s(i) + npt_loc(i) - 1 END DO WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', & & (coords(i),s(i),e(i),i=1,ndims) ! ! Global mesh dx = 1.0d0/REAL(nx) dy = 1.0d0/REAL(ny) ALLOCATE(xgrid(0:nx)) ALLOCATE(ygrid(0:ny)) xgrid = [ (i*dx, i=0,nx) ] ygrid = [ (i*dy, i=0,ny) ] !================================================================================ ! 3.0 FD Laplacian ! id=RESHAPE([ 0, -1, 0, 1, 0, & 0, 0,-1, 0, 1], & [5,2]) npoints = 5 CALL init(s, e, id, .FALSE., mat, comm_cart) ! CALL laplacian(dx, dy, mat) !================================================================================ ! 4.0 Check matrice-vector product ! ! Local arrays with ghost cells g = [1,1] arr = gvector_2d(s, e, g) barr1 = gvector_2d(s, e, g) barr2 = gvector_2d(s, e, g) barr3 = gvector_2d(s, e, g) fexact = gvector_2d(s, e, g) ! ! Constant vector => Laplacian = 0 arr = 1.0d0 CALL exchange(comm_cart, arr) barr1 = mat*arr IF(s(1).EQ.0) barr1%val(0,:) = 0.0 ! discard boundary values IF(e(1).EQ.nx) barr1%val(nx,:) = 0.0 IF(s(2).EQ.0) barr1%val(:,0) = 0.0 IF(e(2).EQ.ny) barr1%val(:,ny) = 0.0 err = norm_vec(barr1, comm_cart, root=0) IF(me.EQ.0) THEN WRITE(*,'(/a,1pe12.3)') 'Constant vector: ||B1|| =', err END IF ! ! Bilinear vector => Laplacian = 0 DO j=s(2),e(2) DO i=s(1),e(1) arr%val(i,j) = xgrid(i)*ygrid(j) END DO END DO CALL exchange(comm_cart, arr) barr2 = mat*arr IF(s(1).EQ.0) barr2%val(0,:) = 0.0 ! discard boundary values IF(e(1).EQ.nx) barr2%val(nx,:) = 0.0 IF(s(2).EQ.0) barr2%val(:,0) = 0.0 IF(e(2).EQ.ny) barr2%val(:,ny) = 0.0 err = norm_vec(barr2, comm_cart, root=0) IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3)') 'Bilinear vector: ||B2|| =', err END IF ! ! Biquadratic vector => Laplacian = fexact DO j=s(2),e(2) DO i=s(1),e(1) arr%val(i,j) = (xgrid(i)*ygrid(j))**2/4.0d0 fexact%val(i,j) = (xgrid(i)**2 + ygrid(j)**2)/2.0d0 END DO END DO CALL exchange(comm_cart, arr) CALL exchange(comm_cart, fexact) barr3 = mat*arr - fexact IF(s(1).EQ.0) barr3%val(0,:) = 0.0 ! discard boundary values IF(e(1).EQ.nx) barr3%val(nx,:) = 0.0 IF(s(2).EQ.0) barr3%val(:,0) = 0.0 IF(e(2).EQ.ny) barr3%val(:,ny) = 0.0 err = norm_vec(barr3, comm_cart, root=0) IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3)') 'Biquadratic vector: ||B3|| =', err END IF !================================================================================ ! 9.0 Epilogue CALL h5file CALL MPI_FINALIZE(ierr) ! CONTAINS SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_stencilg.h5' INTEGER :: fid CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart) CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO CALL putarrnd(fid, '/arr', arr%val,(/1,2/), garea=g) CALL putarrnd(fid, '/barr1', barr1%val,(/1,2/), garea=g) CALL putarrnd(fid, '/barr2', barr2%val,(/1,2/), garea=g) CALL putarrnd(fid, '/barr3', barr3%val,(/1,2/), garea=g) CALL putmat(fid, '/MAT', mat) CALL closef(fid) END SUBROUTINE h5file ! END PROGRAM main diff --git a/multigrid/src/test_transf2d.f90 b/multigrid/src/test_transf2d.f90 index a126b7e..82098d7 100644 --- a/multigrid/src/test_transf2d.f90 +++ b/multigrid/src/test_transf2d.f90 @@ -1,301 +1,301 @@ !> !> @file test_transf2d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test 2d multigrid ! USE multigrid USE csr IMPLICIT NONE INCLUDE 'mpif.h' ! DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0 INTEGER :: levels=1 DOUBLE PRECISION :: omega=2.0d0/3.0d0 ! DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) DOUBLE PRECISION :: dx, dy INTEGER :: ix, iy ! DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: errdisc(:), resid(:) ! DOUBLE PRECISION, ALLOCATABLE, TARGET :: fcoarse(:,:) DOUBLE PRECISION, POINTER :: fcoarse_1d(:) DOUBLE PRECISION, ALLOCATABLE, TARGET :: vfine(:,:) DOUBLE PRECISION, POINTER :: vfine_1d(:) DOUBLE PRECISION, ALLOCATABLE :: vfine_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: err_restrict(:), err_prolong(:), & & disc_err_prolong(:) ! INTEGER :: ierr, me INTEGER :: l, nterms INTEGER :: its ! TYPE(grid2d), ALLOCATABLE :: grids(:) ! NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels !-------------------------------------------------------------------------------- ! 1. Prologue ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Inputs ! n = (/8, 8/) nidbas=(/3,3/) ngauss=(/2,2/) alpha = (/0,0/) kx=4 ky=3 sigma=10.0d0 levels=2 ! IF(me.EQ.0) THEN READ(*,newrun) WRITE(*,newrun) END IF CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) ! ! Adjust number of levels ! levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) ! ! Create grids ! dx = 1.0d0/REAL(n(1),8) dy = 1.0d0/REAL(n(2),8) ALLOCATE(x(0:n(1)), y(0:n(2))) DO ix=0,n(1) x(ix) = ix*dx END DO DO iy=0,n(2) y(iy) = iy*dy END DO ! ALLOCATE(grids(levels)) CALL create_grid(x, y, nidbas, ngauss, alpha, grids) WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) ! ! Build FE matrices and set BC ! nterms = 3 DO l=1,levels CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) CALL ibcmat(grids(l), grids(l)%mata) CALL to_mat(grids(l)%mata) END DO ! ! Set BC on grid transfer matrices ! CALL ibc_transf(grids,1,3) CALL ibc_transf(grids,2,3) !-------------------------------------------------------------------------------- ! 1. Direct solutions ! WRITE(*,'(/a)') 'Direct solutions for all levels ...' ALLOCATE(errdisc(levels)) ALLOCATE(resid(levels)) WRITE(*,'(3a5,2a12)') 'l', 'nx', 'ny', 'err', 'resid' DO l=1,levels CALL disrhs(grids(l)%spl, grids(l)%f, rhs) CALL ibcrhs(grids(l), grids(l)%f) grids(l)%v = grids(l)%f CALL direct_solve(grids(l), grids(l)%v1d, debug=.FALSE.) errdisc(l) = disc_err(grids(l)%spl, grids(l)%v, sol) resid(l) = residue(grids(l)%mata, grids(l)%f1d, grids(l)%v1d) WRITE(*,'(3i5,2(1pe12.3))') l, grids(l)%n, Errdisc(l), resid(l) END DO ! ! Grid values of direct solutions at the finest levels ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & & [0,0], grids(1)%v) sol_anal_grid = sol(grids(1)%x, grids(1)%y) !-------------------------------------------------------------------------------- ! 2. Test restrict and prolong ! WRITE(*,'(/a)') 'Testing restrict and prolong...' WRITE(*,'(3a5,3a12)') 'l', 'nx', 'ny', 'rhs', 'sol', 'disc_err' ALLOCATE(err_restrict(2:levels)) ALLOCATE(err_prolong(2:levels)) ALLOCATE(disc_err_prolong(2:levels)) ALLOCATE(vfine_grid(0:n(1),0:n(2))) DO l=2,levels ALLOCATE(fcoarse(SIZE(grids(l)%f,1),SIZE(grids(l)%f,2))) fcoarse_1d(1:SIZE(grids(l)%f1d)) => fcoarse ALLOCATE(vfine(SIZE(grids(l-1)%v,1),SIZE(grids(l-1)%v,2))) vfine_1d(1:SIZE(grids(l-1)%v1d)) => vfine ! fcoarse = restrict(grids(l)%matp, grids(l-1)%f) err_restrict(l) = MAXVAL(ABS(fcoarse_1d-grids(l)%f1d)) ! CALL direct_solve(grids(l), fcoarse_1d) vfine = prolong(grids(l)%matp, fcoarse) disc_err_prolong(l) = disc_err(grids(l-1)%spl, vfine, sol) err_prolong(l) = MAXVAL(ABS(vfine_1d-grids(l-1)%v1d)) ! IF(l.EQ.2) THEN ! Grid val on finest grid CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, vfine_grid, & & [0,0], vfine) END IF ! WRITE(*,'(3i5,3(1pe12.3))') l, grids(l)%n, err_restrict(l), err_prolong(l), & & disc_err_prolong(l) DEALLOCATE(fcoarse) DEALLOCATE(vfine) END DO !-------------------------------------------------------------------------------- ! 9. Epilogue ! ! Creata HDF5 file ! IF(me.EQ.0) CALL h5file ! CALL mpi_finalize(ierr) !-------------------------------------------------------------------------------- CONTAINS !+++ FUNCTION rhs(x, y) ! ! Return problem RHS ! DOUBLE PRECISION, INTENT(in) :: x, y DOUBLE PRECISION :: rhs rhs = SIN(PI*kx*x)*SIN(PI*ky*y) END FUNCTION rhs !+++ FUNCTION sol(x, y) ! ! Return exact problem solution ! DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: sol(SIZE(x),SIZE(y)) DOUBLE PRECISION :: c INTEGER :: j DO j=1,SIZE(y) c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2) sol(:,j) = c * SIN(PI*kx*x(:)) END DO END FUNCTION sol !+++ SUBROUTINE coefeq(x, y, idt, idw, c) ! ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) ! DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! c(1) = 1.0d0 idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.0d0 idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 ! c(3) = sigma idt(3,1) = 0 idt(3,2) = 0 idw(3,1) = 0 idw(3,2) = 0 END SUBROUTINE coefeq !+++ SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_transf2d.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', n(1)) CALL attach(fid, '/', 'NY', n(2)) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'KX', kx) CALL attach(fid, '/', 'KY', ky) CALL attach(fid, '/', 'SIGMA', sigma) CALL attach(fid, '/', 'ALPHA1', alpha(1)) CALL attach(fid, '/', 'ALPHA2', alpha(2)) CALL attach(fid, '/', 'LEVELS', levels) CALL creatg(fid, '/mglevels') DO l=1,levels WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL creatg(fid, TRIM(dsname)) CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) IF(l.GT.1) THEN CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) END IF CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) END DO ! ! Solutions at finest grid ! CALL creatg(fid, '/solutions') CALL putarr(fid, '/solutions/xg', grids(1)%x) CALL putarr(fid, '/solutions/yg', grids(1)%y) CALL putarr(fid, '/solutions/direct', sol_direct_grid) CALL putarr(fid, '/solutions/anal', sol_anal_grid) CALL putarr(fid, '/solutions/vfine', vfine_grid) ! ! Some errors ! CALL creatg(fid, '/errors') CALL putarr(fid, '/errors/errdisc', errdisc) CALL putarr(fid, '/errors/resid', resid) CALL putarr(fid, '/errors/restrict', err_restrict) CALL putarr(fid, '/errors/prolong', err_prolong) CALL putarr(fid, '/errors/disc_err_prolong', disc_err_prolong) ! CALL closef(fid) END SUBROUTINE h5file !+++ END PROGRAM diff --git a/multigrid/src/test_transf2d_cyl.f90 b/multigrid/src/test_transf2d_cyl.f90 index 66427e6..1a4ae79 100644 --- a/multigrid/src/test_transf2d_cyl.f90 +++ b/multigrid/src/test_transf2d_cyl.f90 @@ -1,321 +1,321 @@ !> !> @file test_transf2d_cyl.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Test 2d multigrid ! Cylindrical case ! USE multigrid USE csr IMPLICIT NONE INCLUDE 'mpif.h' ! DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) INTEGER, DIMENSION(2) :: n, nidbas, ngauss INTEGER :: modem=22, modep=10 INTEGER :: levels=1 CHARACTER(len=4) :: prb='poly' LOGICAL :: nluniq=.TRUE. ! DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) DOUBLE PRECISION :: dx, dy INTEGER :: ix, iy ! DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: errdisc(:), resid(:) ! DOUBLE PRECISION, ALLOCATABLE, TARGET :: fcoarse(:,:) DOUBLE PRECISION, POINTER :: fcoarse_1d(:) DOUBLE PRECISION, ALLOCATABLE, TARGET :: vfine(:,:) DOUBLE PRECISION, POINTER :: vfine_1d(:) DOUBLE PRECISION, ALLOCATABLE :: vfine_grid(:,:) DOUBLE PRECISION, ALLOCATABLE :: err_restrict(:), err_prolong(:), & & disc_err_prolong(:) ! INTEGER :: ierr, me INTEGER :: l, nterms INTEGER :: its INTEGER :: n2 ! TYPE(grid2d), ALLOCATABLE :: grids(:) ! NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, prb, nluniq !-------------------------------------------------------------------------------- ! 1. Prologue ! CALL mpi_init(ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Inputs ! n = (/8, 8/) nidbas=(/3,3/) ngauss=(/2,2/) modem = 22 modep = 10 prb='poly' levels=2 nluniq = .TRUE. ! IF(me.EQ.0) THEN READ(*,newrun) WRITE(*,newrun) END IF CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) ! ! Adjust number of levels ! levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) ! ! Create grids ! dx = 1.0d0/REAL(n(1),8) dy = 2.0d0*pi/REAL(n(2),8) ALLOCATE(x(0:n(1)), y(0:n(2))) DO ix=0,n(1) x(ix) = ix*dx END DO DO iy=0,n(2) y(iy) = iy*dy END DO ! ALLOCATE(grids(levels)) CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], & & debug_in=.FALSE.) WRITE(*,'(5a6,a12)') 'l', 'nx', 'ny', 'rx', 'ry', 'shape of v' WRITE(*,'(7i6)') (l, grids(l)%n, grids(l)%rank, SHAPE(grids(l)%v), l=1,levels) ! ! Build FE matrices and set BC ! nterms = 2 DO l=1,levels CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq) CALL to_mat(grids(l)%mata) END DO ! ! Set BC on grid transfer matrices ! CALL ibc_transf(grids, 1, 2) ! Only right boundary on r (1st dim.) !-------------------------------------------------------------------------------- ! 1. Direct solutions ! WRITE(*,'(/a)') 'Direct solutions for all levels ...' WRITE(*,'(3a5,2a12)') 'l', 'nx', 'ny', 'err', 'resid' ! ALLOCATE(errdisc(levels)) ALLOCATE(resid(levels)) ! DO l=1,levels CALL disrhs(grids(l)%spl, grids(l)%f, rhs) CALL ibcrhs(grids(l), grids(l)%f, nluniq_in=nluniq) ! grids(l)%v = grids(l)%f CALL direct_solve(grids(l), grids(l)%v1d, debug=.FALSE.) ! resid(l) = residue(grids(l)%mata, grids(l)%f1d, grids(l)%v1d) CALL back_transf(grids(l), grids(l)%v, nluniq_in=nluniq) errdisc(l) = disc_err(grids(l)%spl, grids(l)%v, sol) WRITE(*,'(3i5,2(1pe12.3))') l, grids(l)%n, Errdisc(l), resid(l) END DO ! ! Grid values of direct solutions at the finest levels ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & & [0,0], grids(1)%v) sol_anal_grid = sol(grids(1)%x, grids(1)%y) !-------------------------------------------------------------------------------- ! 2. Test restrict and prolong ! WRITE(*,'(/a)') 'Testing restrict and prolong...' WRITE(*,'(3a5,3a12)') 'l', 'nx', 'ny', 'rhs', 'sol', 'disc_err' ALLOCATE(err_restrict(2:levels)) ALLOCATE(err_prolong(2:levels)) ALLOCATE(disc_err_prolong(2:levels)) ALLOCATE(vfine_grid(0:n(1),0:n(2))) DO l=2,levels ALLOCATE(fcoarse(SIZE(grids(l)%f,1),SIZE(grids(l)%f,2))) fcoarse_1d(1:SIZE(grids(l)%f1d)) => fcoarse ALLOCATE(vfine(SIZE(grids(l-1)%v,1),SIZE(grids(l-1)%v,2))) vfine_1d(1:SIZE(grids(l-1)%v1d)) => vfine ! fcoarse(:,:) = restrict_cyl(grids(l), grids(l-1)%f, nluniq) ! err_restrict(l) = MAXVAL(ABS(fcoarse_1d-grids(l)%f1d)) ! CALL direct_solve(grids(l), fcoarse_1d) ! vfine(:,:) = prolong_cyl(grids(l), fcoarse, nluniq) ! CALL back_transf(grids(l-1), vfine, nluniq_in=nluniq) disc_err_prolong(l) = disc_err(grids(l-1)%spl, vfine, sol) err_prolong(l) = MAXVAL(ABS(vfine_1d-grids(l-1)%v1d)) ! IF(l.EQ.2) THEN ! Grid val on finest grid CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, vfine_grid, & & [0,0], vfine) END IF ! WRITE(*,'(3i5,3(1pe12.3))') l, grids(l)%n, err_restrict(l), err_prolong(l), & & disc_err_prolong(l) DEALLOCATE(fcoarse) DEALLOCATE(vfine) END DO !-------------------------------------------------------------------------------- ! 9. Epilogue ! ! Creata HDF5 file ! IF(me.EQ.0) CALL h5file ! CALL mpi_finalize(ierr) !-------------------------------------------------------------------------------- CONTAINS !+++ FUNCTION rhs(r, theta) ! ! Return problem RHS ! USE math_util, ONLY : root_bessj DOUBLE PRECISION, INTENT(in) :: r, theta DOUBLE PRECISION :: rhs DOUBLE PRECISION :: nump ! SELECT CASE(TRIM(prb)) CASE('poly') rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta) CASE('bess') nump = root_bessj(modem, modep) rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta) END SELECT END FUNCTION rhs !+++ FUNCTION sol(r, theta) ! ! Return exact problem solution ! USE math_util, ONLY : root_bessj DOUBLE PRECISION, INTENT(in) :: r(:), theta(:) DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta)) DOUBLE PRECISION :: nump INTEGER :: j ! SELECT CASE(TRIM(prb)) CASE('poly') DO j=1,SIZE(theta) sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j)) END DO CASE('bess') nump = root_bessj(modem, modep) DO j=1,SIZE(theta) sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j)) END DO END SELECT END FUNCTION sol !+++ SUBROUTINE coefeq(r, theta, idt, idw, c) ! ! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) ! DOUBLE PRECISION, INTENT(in) :: r, theta INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) ! c(1) = r idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.0d0/r idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq !+++ SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='test_transf2d_cyl.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', n(1)) CALL attach(fid, '/', 'NY', n(2)) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'MODEM', modem) CALL attach(fid, '/', 'MODEP', modep) CALL attach(fid, '/', 'LEVELS', levels) CALL creatg(fid, '/mglevels') DO l=1,levels WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL creatg(fid, TRIM(dsname)) CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) IF(l.GT.1) THEN CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) END IF CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) END DO ! ! Solutions at finest grid ! CALL creatg(fid, '/solutions') CALL putarr(fid, '/solutions/xg', grids(1)%x) CALL putarr(fid, '/solutions/yg', grids(1)%y) CALL putarr(fid, '/solutions/direct', sol_direct_grid) CALL putarr(fid, '/solutions/anal', sol_anal_grid) CALL putarr(fid, '/solutions/vfine', vfine_grid) ! ! Some errors ! CALL creatg(fid, '/errors') CALL putarr(fid, '/errors/errdisc', errdisc) CALL putarr(fid, '/errors/resid', resid) CALL putarr(fid, '/errors/restrict', err_restrict) CALL putarr(fid, '/errors/prolong', err_prolong) CALL putarr(fid, '/errors/disc_err_prolong', disc_err_prolong) ! CALL closef(fid) END SUBROUTINE h5file !+++ END PROGRAM diff --git a/multigrid/src/transfer1d.f90 b/multigrid/src/transfer1d.f90 index c245f09..319883d 100644 --- a/multigrid/src/transfer1d.f90 +++ b/multigrid/src/transfer1d.f90 @@ -1,126 +1,126 @@ !> !> @file transfer1d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> !-------------------------------------------------------------------------------- !-------------------------------------------------------------------------------- !-------------------------------------------------------------------------------- PROGRAM main USE multigrid IMPLICIT NONE ! INTEGER :: nx=8, nidbas=1, ngauss=4, alpha=0, modem=0 DOUBLE PRECISION :: sigma=1.0d0 LOGICAL :: nlper=.FALSE. INTEGER :: j ! TYPE(grid1d) :: gridx(2) TYPE(gemat) :: prolong_mat, restrict_mat, coarse_mat DOUBLE PRECISION, ALLOCATABLE :: arow(:), temp(:,:) ! NAMELIST /newrun/ nx, nidbas, ngauss, sigma, alpha, modem, nlper !-------------------------------------------------------------------------------- READ(*,newrun) WRITE(*,newrun) ! ! Set up fine and coarse grids ! CALL create_grid(nx, nidbas, ngauss, alpha, gridx, period=nlper) CALL printmat('** Prolongation matrix **', gridx(2)%transf) ! ! Restriction matrix = transpose of prolongation matrix ! CALL mcopy(gridx(2)%transf, prolong_mat) CALL init(prolong_mat%mrows, 1, restrict_mat, mrows=prolong_mat%ncols) restrict_mat%val = TRANSPOSE(prolong_mat%val) ! ! Compute femat on fine and coarse grids ! IF(nlper) THEN CALL femat(gridx(1)%spl, gridx(1)%matap, coefeq) CALL printmat('** FE matrix on fine mesh **', gridx(1)%matap) CALL femat(gridx(2)%spl, gridx(2)%matap, coefeq) CALL printmat('** FE matrix on coarse mesh **', gridx(2)%matap) ELSE CALL femat(gridx(1)%spl, gridx(1)%mata, coefeq) CALL printmat('** FE matrix on fine mesh **', gridx(1)%mata) CALL femat(gridx(2)%spl, gridx(2)%mata, coefeq) CALL printmat('** FE matrix on coarse mesh **', gridx(2)%mata) END IF ! ! Compute coarse FE matrix using transfer matrix ! IF(nlper) THEN CALL init(gridx(2)%matap%rank, 1, coarse_mat) ALLOCATE(temp(gridx(1)%matap%rank,gridx(2)%matap%rank)) DO j=1,gridx(2)%matap%rank temp(:,j) = vmx(gridx(1)%matap,prolong_mat%val(:,j)) END DO coarse_mat%val = vmx(restrict_mat,temp) DEALLOCATE(temp) ELSE CALL init(gridx(2)%mata%rank, 1, coarse_mat) coarse_mat%val = vmx(restrict_mat,vmx(gridx(1)%mata,prolong_mat%val)) END IF CALL printmat('** Coarse FE matrix using transfer operators **', coarse_mat) ! ! Compute the diff of Ac - R*Af*P ! IF(nlper) THEN coarse_mat%val = coarse_mat%val - gridx(2)%matap%val ELSE ALLOCATE(arow(gridx(2)%mata%rank)) DO j=1,gridx(2)%mata%rank CALL getcol(gridx(2)%mata, j, arow) coarse_mat%val(:,j) = coarse_mat%val(:,j)-arow(:) END DO DEALLOCATE(arow) END IF WRITE(*,'(a,1pe12.3)') 'Diff =', MAXVAL(ABS(coarse_mat%val)) CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) SELECT CASE (alpha) CASE(0) ! Cartesian geometry c(1) = 1.0d0 idt(1) = 1 idw(1) = 1 c(2) = sigma idt(2) = 0 idw(2) = 0 CASE(1) c(1) = x idt(1) = 1 idw(1) = 1 c(2) = modem**2/x idt(2) = 0 idw(2) = 0 CASE default WRITE(*,'(a,i0,a)') 'COEFEQ: alpha ', alpha, ' not defined!' END SELECT END SUBROUTINE coefeq !-------------------------------------------------------------------------------- END PROGRAM main diff --git a/multigrid/src/transfer1d_col.f90 b/multigrid/src/transfer1d_col.f90 index d0724d5..75b9d8c 100644 --- a/multigrid/src/transfer1d_col.f90 +++ b/multigrid/src/transfer1d_col.f90 @@ -1,53 +1,53 @@ !> !> @file transfer1d_col.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Obtain grid transfer by collocation ! USE multigrid IMPLICIT NONE ! INTEGER :: nx=8, nidbas=1 LOGICAL :: nlper=.TRUE. ! TYPE(grid1d) :: gridx(2) TYPE(gemat) :: pmat ! NAMELIST /newrun/ nx, nidbas, nlper !-------------------------------------------------------------------------------- READ(*,newrun) WRITE(*,newrun) ! CALL create_grid(nx, nidbas, 1, 0, gridx, period=nlper) CALL printmat('** Prolongation matrix (using mass matrix) **', gridx(2)%transf) ! CALL calc_pmat(gridx(1), gridx(2), pmat, .TRUE.) CALL printmat('** Prolongation matrix (by collocation) **', pmat) ! WRITE(*,'(/a,1pe12.3)') 'Max diff =', MAXVAL(ABS(pmat%val-gridx(2)%transf%val)) ! END PROGRAM main diff --git a/multigrid/src/two_grid.f90 b/multigrid/src/two_grid.f90 index db513a8..4342a4d 100644 --- a/multigrid/src/two_grid.f90 +++ b/multigrid/src/two_grid.f90 @@ -1,189 +1,189 @@ !> !> @file two_grid.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Check some properties of grid transfer ! USE multigrid USE math_util, ONLY : root_bessj IMPLICIT NONE ! INTEGER :: nx=8, nidbas=1, ngauss=2, alpha=0 INTEGER :: modem=22, modep=10 INTEGER :: levels=2 INTEGER :: l, nrank DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0) DOUBLE PRECISION, ALLOCATABLE :: v_prolong(:) ! TYPE(grid1d) :: gridx(2) ! NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, alpha !-------------------------------------------------------------------------------- ! 1. Prologue ! Inputs ! READ(*,newrun) WRITE(*,newrun) ! ! Create grids ! CALL create_grid(nx, nidbas, ngauss, alpha, gridx) WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels) ! ! Create FE matrice and set BC u(0)=u(1)=0 ! DO l=1,levels CALL femat(gridx(l)%spl, gridx(l)%mata, coefeq) ! ! Left Dirichlet BC (only for Cartesian geometry) IF(alpha .EQ. 0) THEN CALL ibcmat(1, gridx(l)%mata) END IF ! ! Right Dirichlet BC CALL ibcmat(gridx(l)%mata%rank, gridx(l)%mata) ! ! BC on grid transfer operator IF(l.GT.1) THEN WHERE( ABS(gridx(l)%transf%val) < 1.d-8) gridx(l)%transf%val=0.0d0 IF(alpha .EQ. 0) gridx(l)%transf%val(2:,1)=0.0d0 gridx(l)%transf%val(1:gridx(l-1)%rank-1,gridx(l)%rank)=0.0d0 END IF END DO ! ! Construct RHS and set BC only on the finest grid ! nrank = gridx(1)%rank CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs) ! ! Left Dirichlet BC (only for Cartesian geometry) IF(alpha .EQ. 0) THEN gridx(1)%f(1) = 0.0d0 END IF ! ! Right Dirichlet BC gridx(1)%f(nrank) = 0.0d0 ! ! RHS on coarse grid by restriction ! gridx(2)%f = restrict(gridx(2)%transf,gridx(1)%f) !-------------------------------------------------------------------------------- ! 2. Direct solutions ! DO l=1,levels CALL direct_solve(gridx(l), gridx(l)%v) WRITE(*,'(a,i3/(10(1pe12.3)))') 'Sol at level', l, gridx(l)%v END DO ! ! Prolongation of coarse solution ! ALLOCATE(v_prolong(SIZE(gridx(1)%v))) ! v_prolong = prolong(gridx(2)%transf, gridx(2)%v) WRITE(*,'(a,i3/(10(1pe12.3)))') 'Prolong. sol.', l, v_prolong WRITE(*,'(a,1pe12.3)') 'Error ||V_prolong-V_fine||', normf(gridx(1)%matm, v_prolong-gridx(1)%v) !-------------------------------------------------------------------------------- ! 9. Epilogue ! ! Creata HDF5 file ! CALL h5file !-------------------------------------------------------------------------------- CONTAINS SUBROUTINE h5file USE futils CHARACTER(len=128) :: file='two_grid.h5' INTEGER :: fid INTEGER :: l CHARACTER(len=64) :: dsname CALL creatf(file, fid, real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NIDBAS', nidbas) CALL attach(fid, '/', 'SIGMA', sigma) CALL attach(fid, '/', 'KMODE', kmode) CALL attach(fid, '/', 'ALPHA', alpha) CALL attach(fid, '/', 'LEVELS', levels) CALL creatg(fid, '/mglevels') DO l=1,levels WRITE(dsname,'("/mglevels/level.",i2.2)') l CALL creatg(fid, TRIM(dsname)) CALL putmat(fid, TRIM(dsname)//'/mata', gridx(l)%mata) IF(l.GT.1) THEN CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val) CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows) CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols) END IF CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f) CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v) END DO CALL putarr(fid, '/v_prolong', v_prolong) END SUBROUTINE h5file FUNCTION rhs(x) DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION :: rhs DOUBLE PRECISION :: nump SELECT CASE (alpha) CASE(0) ! Cartesian geometry rhs = SIN(pi*kmode*x) CASE(1) ! Cylindrical nump = root_bessj(modem, modep) rhs = x * nump**2 * bessel_jn(modem, nump*x) END SELECT END FUNCTION rhs FUNCTION sol(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sol(SIZE(x)) DOUBLE PRECISION :: nump SELECT CASE (alpha) CASE(0) ! Cartesian geometry sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:)) CASE(1) ! Cylindrical nump = root_bessj(modem, modep) sol(:) = bessel_jn(modem, nump*x(:)) END SELECT END FUNCTION sol SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) SELECT CASE (alpha) CASE(0) ! Cartesian geometry c(1) = 1.0d0 idt(1) = 1 idw(1) = 1 c(2) = sigma idt(2) = 0 idw(2) = 0 CASE(1) ! Cylindrical c(1) = x idt(1) = 1 idw(1) = 1 c(2) = REAL(modem,8)**2/x idt(2) = 0 idw(2) = 0 END SELECT END SUBROUTINE coefeq END PROGRAM main diff --git a/multigrid/wk/CMakeLists.txt b/multigrid/wk/CMakeLists.txt index 98a7017..40db313 100644 --- a/multigrid/wk/CMakeLists.txt +++ b/multigrid/wk/CMakeLists.txt @@ -1,53 +1,53 @@ # # @file CMakeLists.txt # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Trach-Minh Tran # project(multigrid_wk) set(MG_TESTS transfer1d test_relax test_mg test_mgp test_csr two_grid test_mg2d test_relax2d test_transf2d transfer1d_col test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl poisson_fd ) set(RUNTESTS "${CMAKE_CURRENT_SOURCE_DIR}/runtest.sh") set(BIN_DIR "${multigrid_tests_BINARY_DIR}") set(INPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}") foreach(prog ${MG_TESTS}) add_test(${prog} ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ${RUNTESTS} ${BIN_DIR}/${prog} ${INPUT_DIR} ) endforeach() diff --git a/multigrid/wk/run.sh b/multigrid/wk/run.sh index 152144a..40ef66a 100644 --- a/multigrid/wk/run.sh +++ b/multigrid/wk/run.sh @@ -1,59 +1,59 @@ # # @file run.sh # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Trach-Minh Tran # #!/bin/bash EXE=/home/ttran/bsplines/multigrid/src/poisson_mg TMP=/misc/multigrid [ -e $TMP ] || mkdir -p $TMP cat > in0 <. # # @authors # (in alphabetical order) # @author Trach-Minh Tran # #!/bin/sh progname=$1 input_dir=$2 prog=$(basename ${progname}) input_file=${input_dir}/${prog}.in ${progname} < $input_file exit $? diff --git a/pppack/CMakeLists.txt b/pppack/CMakeLists.txt index 948a8ea..1f42ad7 100644 --- a/pppack/CMakeLists.txt +++ b/pppack/CMakeLists.txt @@ -1,37 +1,37 @@ # # @file CMakeLists.txt # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # @author Trach-Minh Tran # set(SRCS bvalue.f90 interv.f90 ) add_library(pppack STATIC ${SRCS}) install(TARGETS pppack EXPORT ${BSPLINES_EXPORT_TARGETS} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ) diff --git a/pppack/Makefile b/pppack/Makefile index 0a5cbcf..2c83638 100644 --- a/pppack/Makefile +++ b/pppack/Makefile @@ -1,72 +1,72 @@ # # @file Makefile # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Sébastien Jolliet # @author Trach-Minh Tran # SRCS = banfac.f90 banslv.f90 bchfac.f90 bchslv.f90 bsplpp.f90 bsplvb.f90 \ bsplvd.f90 bspp2d.f90 bvalue.f90 chol1d.f90 colloc.f90 colpnt.f90 \ cspint.f90 cubset.f90 cubslo.f90 cubspl.f90 cwidth.f90 difequ.f90 \ dtblok.f90 eqblok.f90 evnnot.f90 factrb.f90 fcblok.f90 interv.f90 \ knots.f90 l2appr.f90 l2err.f90 l2knts.f90 newnot.f90 ppvalu.f90 \ putit.f90 rvec_print.f90 sbblok.f90 setupq.f90 shiftb.f90 slvblk.f90 \ smooth.f90 spli2d.f90 spline_hermite_set.f90 spline_hermite_val.f90 \ splint.f90 splopt.f90 subbak.f90 subfor.f90 tautsp.f90 titanium.f90 OBJS = banfac.o banslv.o bchfac.o bchslv.o bsplpp.o bsplvb.o bsplvd.o \ bspp2d.o bvalue.o chol1d.o colloc.o colpnt.o cspint.o cubset.o \ cubslo.o cubspl.o cwidth.o difequ.o dtblok.o eqblok.o evnnot.o \ factrb.o fcblok.o interv.o knots.o l2appr.o l2err.o l2knts.o newnot.o \ ppvalu.o putit.o rvec_print.o sbblok.o setupq.o shiftb.o slvblk.o \ smooth.o spli2d.o spline_hermite_set.o spline_hermite_val.o splint.o \ splopt.o subbak.o subfor.o tautsp.o titanium.o OBJS = interv.o bvalue.o LIBS = CC = cc CFLAGS = -g FC = ifort FFLAGS = $(OPT) F90 = $(FC) F90FLAGS = $(FFLAGS) LDFLAGS = lib: libpppack.a libpppack.a: $(OBJS) xiar r $@ $? ranlib $@ clean: rm -f *.o *.mod *~ core distclean: clean rm -f libpppack.a a.out .SUFFIXES: .SUFFIXES: .o .c .f90 .f90.o: $(F90) $(F90FLAGS) -c $< diff --git a/pppack/banfac.f90 b/pppack/banfac.f90 index 50ecb50..3aa668a 100644 --- a/pppack/banfac.f90 +++ b/pppack/banfac.f90 @@ -1,234 +1,234 @@ !> !> @file banfac.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine banfac ( w, nroww, nrow, nbandl, nbandu, iflag ) !************************************************************************* ! !! BANFAC factors a banded matrix without pivoting. ! ! Discussion: ! ! BANFAC returns in W the LU-factorization, without pivoting, of ! the banded matrix A of order NROW with (NBANDL+1+NBANDU) bands ! or diagonals in the work array W. ! ! Gauss elimination without pivoting is used. The routine is ! intended for use with matrices A which do not require row ! interchanges during factorization, especially for the totally ! positive matrices which occur in spline calculations. ! ! The matrix storage mode used is the same one used by LINPACK ! and LAPACK, and results in efficient innermost loops. ! ! Explicitly, A has ! ! NBANDL bands below the diagonal ! 1 main diagonal ! NBANDU bands above the diagonal ! ! and thus, with MIDDLE=NBANDU+1, ! A(I+J,J) is in W(I+MIDDLE,J) for I=-NBANDU,...,NBANDL, J=1,...,NROW. ! ! For example, the interesting entries of a banded matrix ! matrix of order 9, with NBANDL=1, NBANDU=2: ! ! 11 12 13 0 0 0 0 0 0 ! 21 22 23 24 0 0 0 0 0 ! 0 32 33 34 35 0 0 0 0 ! 0 0 43 44 45 46 0 0 0 ! 0 0 0 54 55 56 57 0 0 ! 0 0 0 0 65 66 67 68 0 ! 0 0 0 0 0 76 77 78 79 ! 0 0 0 0 0 0 87 88 89 ! 0 0 0 0 0 0 0 98 99 ! ! would appear in the first 1+1+2=4 rows of W as follows: ! ! 0 0 13 24 35 46 57 68 79 ! 0 12 23 34 45 56 67 78 89 ! 11 22 33 44 55 66 77 88 99 ! 21 32 43 54 65 76 87 98 0 ! ! All other entries of W not identified in this way with an ! entry of A are never referenced. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input/output, real ( kind = 8 ) W(NROWW,NROW). ! On input, W contains the "interesting" part of a banded ! matrix A, with the diagonals or bands of A stored in the ! rows of W, while columns of A correspond to columns of W. ! On output, W contains the LU-factorization of A into a unit ! lower triangular matrix L and an upper triangular matrix U ! (both banded) and stored in customary fashion over the ! corresponding entries of A. ! ! This makes it possible to solve any particular linear system A*X=B ! for X by the call ! ! call banslv ( w, nroww, nrow, nbandl, nbandu, b ) ! ! with the solution X contained in B on return. ! ! If IFLAG=2, then one of NROW-1, NBANDL, NBANDU failed to be nonnegative, ! or else one of the potential pivots was found to be zero ! indicating that A does not have an LU-factorization. This ! implies that A is singular in case it is totally positive. ! ! Input, integer NROWW, the row dimension of the work array W. ! NROWW must be at least NBANDL+1 + NBANDU. ! ! Input, integer NROW, the number of rows in A. ! ! Input, integer NBANDL, the number of bands of A below the main diagonal. ! ! Input, integer NBANDU, the number of bands of A above the main diagonal. ! ! Output, integer IFLAG, error flag. ! 1, success. ! 2, failure, the matrix was not factored. ! implicit none integer nrow integer nroww real ( kind = 8 ) factor integer i integer iflag integer j integer k integer middle integer nbandl integer nbandu real ( kind = 8 ) pivot real ( kind = 8 ) w(nroww,nrow) iflag = 1 if ( nrow < 1 ) then iflag = 2 return end if ! ! W(MIDDLE,*) contains the main diagonal of A. ! middle = nbandu + 1 if ( nrow == 1 ) then if ( w(middle,nrow) == 0.0D+00 ) then iflag = 2 end if return end if ! ! A is upper triangular. Check that the diagonal is nonzero. ! if ( nbandl <= 0 ) then do i = 1, nrow-1 if ( w(middle,i) == 0.0D+00 ) then iflag = 2 return end if end do if ( w(middle,nrow) == 0.0D+00 ) then iflag = 2 end if return ! ! A is lower triangular. Check that the diagonal is nonzero and ! divide each column by its diagonal. ! else if ( nbandu <= 0 ) then do i = 1, nrow-1 pivot = w(middle,i) if ( pivot == 0.0D+00 ) then iflag = 2 return end if do j = 1, min ( nbandl, nrow-i ) w(middle+j,i) = w(middle+j,i) / pivot end do end do return end if ! ! A is not just a triangular matrix. ! Construct the LU factorization. ! do i = 1, nrow-1 ! ! W(MIDDLE,I) is the pivot for the I-th step. ! if ( w(middle,i) == 0.0D+00 ) then iflag = 2 write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'BANFAC - Fatal error!' write ( *, '(a,i6)' ) ' Zero pivot encountered in column ', i stop end if ! ! Divide each entry in column I below the diagonal by PIVOT. ! do j = 1, min ( nbandl, nrow-i ) w(middle+j,i) = w(middle+j,i) / w(middle,i) end do ! ! Subtract A(I,I+K)*(I-th column) from (I+K)-th column (below row I). ! do k = 1, min ( nbandu, nrow-i ) factor = w(middle-k,i+k) do j = 1, min ( nbandl, nrow-i ) w(middle-k+j,i+k) = w(middle-k+j,i+k) - w(middle+j,i) * factor end do end do end do ! ! Check the last diagonal entry. ! if ( w(middle,nrow) == 0.0D+00 ) then iflag = 2 end if return end diff --git a/pppack/banslv.f90 b/pppack/banslv.f90 index a8159de..b0e1891 100644 --- a/pppack/banslv.f90 +++ b/pppack/banslv.f90 @@ -1,112 +1,112 @@ !> !> @file banslv.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine banslv ( w, nroww, nrow, nbandl, nbandu, b ) !************************************************************************* ! !! BANSLV solves a banded linear system X * X = B factored by BANFAC. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) W(NROWW,NROW). W contains the banded matrix, ! after it has been factored by BANFAC. ! ! Input, integer NROWW, the row dimension of the work array W. ! NROWW must be at least NBANDL+1 + NBANDU. ! ! Input, integer NROW, the number of rows in A. ! ! Input, integer NBANDL, the number of bands of A below the ! main diagonal. ! ! Input, integer NBANDU, the number of bands of A above the ! main diagonal. ! ! Input/output, real ( kind = 8 ) B(NROW). ! On input, B contains the right hand side of the system to be solved. ! On output, B contains the solution, X. ! implicit none integer nrow integer nroww real ( kind = 8 ) b(nrow) integer i integer j integer jmax integer middle integer nbandl integer nbandu real ( kind = 8 ) w(nroww,nrow) middle = nbandu + 1 if ( nrow == 1 ) then b(1) = b(1) / w(middle,1) return end if ! ! Forward pass ! ! For I = 1, 2, ..., NROW-1, subtract RHS(I)*(I-th column of L) ! from the right side, below the I-th row. ! if ( 0 < nbandl ) then do i = 1, nrow-1 jmax = min ( nbandl, nrow-i ) do j = 1, jmax b(i+j) = b(i+j) - b(i) * w(middle+j,i) end do end do end if ! ! Backward pass ! ! For I=NROW, NROW-1,...,1, divide RHS(I) by ! the I-th diagonal entry of U, then subtract ! RHS(I)*(I-th column of U) from right side, above the I-th row. ! do i = nrow, 2, -1 b(i) = b(i) / w(middle,i) do j = 1, min ( nbandu, i-1 ) b(i-j) = b(i-j) - b(i) * w(middle-j,i) end do end do b(1) = b(1) / w(middle,1) return end diff --git a/pppack/bchfac.f90 b/pppack/bchfac.f90 index c582ad0..67231c8 100644 --- a/pppack/bchfac.f90 +++ b/pppack/bchfac.f90 @@ -1,168 +1,168 @@ !> !> @file bchfac.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine bchfac ( w, nbands, nrow, diag ) !************************************************************************* ! !! BCHFAC constructs a Cholesky factorization of a matrix. ! ! Discussion: ! ! The factorization has the form ! ! C = L * D * L' ! ! with L unit lower triangular and D diagonal, for a given matrix C of ! order NROW, where C is symmetric positive semidefinite and banded, ! having NBANDS diagonals at and below the main diagonal. ! ! Gauss elimination is used, adapted to the symmetry and bandedness of C. ! ! Near-zero pivots are handled in a special way. The diagonal ! element C(N,N)=W(1,N) is saved initially in DIAG(N), all N. ! ! At the N-th elimination step, the current pivot element, W(1,N), ! is compared with its original value, DIAG(N). If, as the result ! of prior elimination steps, this element has been reduced by about ! a word length, (i.e., if W(1,N)+DIAG(N) <= DIAG(N)), then the pivot ! is declared to be zero, and the entire N-th row is declared to ! be linearly dependent on the preceding rows. This has the effect ! of producing X(N) = 0 when solving C*X = B for X, regardless of B. ! ! Justification for this is as follows. In contemplated applications ! of this program, the given equations are the normal equations for ! some least-squares approximation problem, DIAG(N) = C(N,N) gives ! the norm-square of the N-th basis function, and, at this point, ! W(1,N) contains the norm-square of the error in the least-squares ! approximation to the N-th basis function by linear combinations ! of the first N-1. ! ! Having W(1,N)+DIAG(N) <= DIAG(N) signifies that the N-th function ! is linearly dependent to machine accuracy on the first N-1 ! functions, therefore can safely be left out from the basis of ! approximating functions. ! ! The solution of a linear system C*X=B is effected by the ! succession of the following two calls: ! ! CALL BCHFAC(W,NBANDS,NROW,DIAG) ! ! CALL BCHSLV(W,NBANDS,NROW,B,X) ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input/output, real ( kind = 8 ) W(NBANDS,NROW). ! ! On input, W contains the NBANDS diagonals in its rows, ! with the main diagonal in row 1. Precisely, W(I,J) ! contains C(I+J-1,J), I=1,...,NBANDS, J=1,...,NROW. ! ! For example, the interesting entries of a seven diagonal ! symmetric matrix C of order 9 would be stored in W as ! ! 11 22 33 44 55 66 77 88 99 ! 21 32 43 54 65 76 87 98 * ! 31 42 53 64 75 86 97 * * ! 41 52 63 74 85 96 * * * ! ! Entries of the array not associated with an ! entry of C are never referenced. ! ! On output, W contains the Cholesky factorization ! C = L*D*L-transp, with W(1,I) containing 1/D(I,I) and W(I,J) ! containing L(I-1+J,J), I=2,...,NBANDS. ! ! Input, integer NBANDS, indicates the bandwidth of the ! matrix C, i.e., C(I,J) = 0 for NBANDS < ABS(I-J). ! ! Input, integer NROW, is the order of the matrix C. ! ! Work array, real ( kind = 8 ) DIAG(NROW). ! implicit none integer nbands integer nrow real ( kind = 8 ) diag(nrow) integer i integer imax integer j integer jmax integer n real ( kind = 8 ) ratio real ( kind = 8 ) w(nbands,nrow) if ( nrow <= 1 ) then if ( 0.0D+00 < w(1,1) ) then w(1,1) = 1.0D+00 / w(1,1) end if return end if ! ! Store the diagonal. ! diag(1:nrow) = w(1,1:nrow) ! ! Factorization. ! do n = 1, nrow if ( w(1,n) + diag(n) <= diag(n) ) then w(1:nbands,n) = 0.0D+00 else w(1,n) = 1.0D+00 / w(1,n) imax = min ( nbands-1, nrow-n ) jmax = imax do i = 1, imax ratio = w(i+1,n) * w(1,n) do j = 1, jmax w(j,n+i) = w(j,n+i) - w(j+i,n) * ratio end do jmax = jmax-1 w(i+1,n) = ratio end do end if end do return end diff --git a/pppack/bchslv.f90 b/pppack/bchslv.f90 index 503e3e1..93e9745 100644 --- a/pppack/bchslv.f90 +++ b/pppack/bchslv.f90 @@ -1,114 +1,114 @@ !> !> @file bchslv.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine bchslv ( w, nbands, nrow, b ) !************************************************************************* ! !! BCHSLV solves a banded symmetric positive definite system. ! ! Discussion: ! ! The system is of the form: ! ! C * X = B ! ! and the Cholesky factorization of C has been constructed ! by BCHFAC. ! ! With the factorization ! ! C = L * D * L' ! ! available, where L is unit lower triangular and D is diagonal, ! the triangular system ! ! L * Y = B ! ! is solved for Y (forward substitution), Y is stored in B, the ! vector D**(-1)*Y is computed and stored in B, then the ! triangular system L'*X = D**(-1)*Y is solved for X ! (backsubstitution). ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) W(NBANDS,NROW), the Cholesky factorization for C, ! as computed by BCHFAC. ! ! Input, integer NBANDS, the bandwidth of C. ! ! Input, integer NROW, the order of the matrix C. ! ! Input/output, real ( kind = 8 ) B(NROW). ! On input, the right hand side. ! On output, the solution. ! implicit none integer nbands integer nrow real ( kind = 8 ) b(nrow) integer j integer n real ( kind = 8 ) w(nbands,nrow) if ( nrow <= 1 ) then b(1) = b(1) * w(1,1) return end if ! ! Forward substitution. ! Solve L*Y=B. ! do n = 1, nrow do j = 1, min(nbands-1,nrow-n) b(j+n) = b(j+n) - w(j+1,n) * b(n) end do end do ! ! Backsubstitution. ! Solve L'*X=D**(-1)*Y. ! do n = nrow, 1, -1 b(n) = b(n)*w(1,n) do j = 1, min(nbands-1,nrow-n) b(n) = b(n) - w(j+1,n) * b(j+n) end do end do return end diff --git a/pppack/bsplpp.f90 b/pppack/bsplpp.f90 index 41b76bd..819f296 100644 --- a/pppack/bsplpp.f90 +++ b/pppack/bsplpp.f90 @@ -1,165 +1,165 @@ !> !> @file bsplpp.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine bsplpp ( t, bcoef, n, k, scrtch, break, coef, l ) !************************************************************************* ! !! BSPLPP converts from B-spline to piecewise polynomial form. ! ! Discussion: ! ! The B-spline representation of a spline is ( T, BCOEF, N, K ), ! while the piecewise polynomial representation is ! ( BREAK, COEF, L, K ). ! ! For each breakpoint interval, the K relevant B-spline coefficients ! of the spline are found and then differenced repeatedly to get the ! B-spline coefficients of all the derivatives of the spline on that ! interval. ! ! The spline and its first K-1 derivatives are then evaluated at the ! left end point of that interval, using BSPLVB repeatedly to obtain ! the values of all B-splines of the appropriate order at that point. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) T(N+K), the knot sequence. ! ! Input, real ( kind = 8 ) BCOEF(N), the B spline coefficient sequence. ! ! Input, integer N, the number of B spline coefficients. ! ! Input, integer K, the order of the spline. ! ! Work array, real ( kind = 8 ) SCRTCH(K,K). ! ! Output, real ( kind = 8 ) BREAK(L+1), the piecewise polynomial breakpoint ! sequence. BREAK contains the distinct points in the ! sequence T(K),...,T(N+1) ! ! Output, real ( kind = 8 ) COEF(K,N), with COEF(I,J) = (I-1)st derivative ! of the spline at BREAK(J) from the right. ! ! Output, integer L, the number of polynomial pieces which ! make up the spline in the interval (T(K),T(N+1)). ! implicit none integer k integer l integer n real ( kind = 8 ) bcoef(n) real ( kind = 8 ) biatx(k) real ( kind = 8 ) break(*) real ( kind = 8 ) coef(k,n) real ( kind = 8 ) diff integer i integer j integer jp1 integer left integer lsofar real ( kind = 8 ) scrtch(k,k) real ( kind = 8 ) sum1 real ( kind = 8 ) t(n+k) lsofar = 0 break(1) = t(k) do left = k, n ! ! Find the next nontrivial knot interval. ! if ( t(left+1) == t(left) ) then cycle end if lsofar = lsofar + 1 break(lsofar+1) = t(left+1) if ( k <= 1 ) then coef(1,lsofar) = bcoef(left) cycle end if ! ! Store the K B-spline coefficients relevant to current knot ! interval in SCRTCH(*,1). ! do i = 1, k scrtch(i,1) = bcoef(left-k+i) end do ! ! For j=1,...,k-1, compute the k-j b-spline coefficients relevant to ! current knot interval for the j-th derivative by differencing ! those for the (j-1)st derivative, and store in scrtch(.,j+1) . ! do jp1 = 2, k j = jp1-1 do i = 1, k-j diff = t(left+i)-t(left+i-(k-j)) if ( 0.0D+00 < diff ) then scrtch(i,jp1)=((scrtch(i+1,j)-scrtch(i,j)) / diff ) & * real ( k - j, kind = 8 ) end if end do end do ! ! For J=0, ..., K-1, find the values at T(left) of the j+1 ! B-splines of order J+1 whose support contains the current ! knot interval from those of order J (in biatx ), then comb- ! ine with the B-spline coefficients (in scrtch(.,k-j) ) found earlier ! to compute the (k-j-1)st derivative at t(left) of the given ! spline. ! call bsplvb ( t, 1, 1, t(left), left, biatx ) coef(k,lsofar) = scrtch(1,k) do jp1 = 2, k call bsplvb ( t, jp1, 2, t(left), left, biatx ) sum1 = 0.0D+00 do i = 1, jp1 sum1 = sum1 + biatx(i) * scrtch(i,k+1-jp1) end do coef(k+1-jp1,lsofar) = sum1 end do end do l = lsofar return end diff --git a/pppack/bsplvb.f90 b/pppack/bsplvb.f90 index 89d9578..54e9460 100644 --- a/pppack/bsplvb.f90 +++ b/pppack/bsplvb.f90 @@ -1,170 +1,170 @@ !> !> @file bsplvb.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine bsplvb ( t, jhigh, index, x, left, biatx ) !************************************************************************* ! !! BSPLVB evaluates B-splines at a point X with a given knot sequence. ! ! Discusion: ! ! BSPLVB evaluates all possibly nonzero B-splines at X of order ! ! JOUT = MAX ( JHIGH, (J+1)*(INDEX-1) ) ! ! with knot sequence T. ! ! The recurrence relation ! ! X - T(I) T(I+J+1) - X ! B(I,J+1)(X) = ----------- * B(I,J)(X) + --------------- * B(I+1,J)(X) ! T(I+J)-T(I) T(I+J+1)-T(I+1) ! ! is used to generate B(LEFT-J:LEFT,J+1)(X) from B(LEFT-J+1:LEFT,J)(X) ! storing the new values in BIATX over the old. ! ! The facts that ! ! B(I,1)(X) = 1 if T(I) <= X < T(I+1) ! ! and that ! ! B(I,J)(X) = 0 unless T(I) <= X < T(I+J) ! ! are used. ! ! The particular organization of the calculations follows ! algorithm 8 in chapter X of the text. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) T(LEFT+JOUT), the knot sequence. T is assumed to ! be nondecreasing, and also, T(LEFT) must be strictly less than ! T(LEFT+1). ! ! Input, integer JHIGH, INDEX, determine the order ! JOUT = MAX(JHIGH,(J+1)*(INDEX-1)) ! of the B-splines whose values at X are to be returned. ! INDEX is used to avoid recalculations when several ! columns of the triangular array of B-spline values are ! needed, for example, in BVALUE or in BSPLVD. ! ! If INDEX = 1, the calculation starts from scratch and the entire ! triangular array of B-spline values of orders ! 1, 2, ...,JHIGH is generated order by order, i.e., ! column by column. ! ! If INDEX = 2, only the B-spline values of order J+1, J+2, ..., JOUT ! are generated, the assumption being that BIATX, J, ! DELTAL, DELTAR are, on entry, as they were on exit ! at the previous call. In particular, if JHIGH = 0, ! then JOUT = J+1, i.e., just the next column of B-spline ! values is generated. ! ! WARNING: the restriction JOUT <= JMAX (= 20) is ! imposed arbitrarily by the dimension statement for DELTAL ! and DELTAR, but is nowhere checked for. ! ! Input, real ( kind = 8 ) X, the point at which the B-splines ! are to be evaluated. ! ! Input, integer LEFT, an integer chosen so that ! T(LEFT) <= X <= T(LEFT+1). ! ! Output, real ( kind = 8 ) BIATX(JOUT), with BIATX(I) containing the ! value at X of the polynomial of order JOUT which agrees ! with the B-spline B(LEFT-JOUT+I,JOUT,T) on the interval ! (T(LEFT),T(LEFT+1)). ! implicit none integer, parameter :: jmax = 20 integer jhigh real ( kind = 8 ) biatx(jhigh) !!$ real ( kind = 8 ), save, dimension ( jmax ) :: deltal !!$ real ( kind = 8 ), save, dimension ( jmax ) :: deltar real ( kind = 8 ), dimension ( jmax ) :: deltal real ( kind = 8 ), dimension ( jmax ) :: deltar integer i integer index !!$ integer, save :: j = 1 integer :: j integer left real ( kind = 8 ) saved real ( kind = 8 ) t(left+jhigh) real ( kind = 8 ) term real ( kind = 8 ) x ! Forces starting always from scratch! !!$ if ( index == 1 ) then j = 1 biatx(1) = 1.0D+00 if ( jhigh <= j ) then return end if !!$ end if if ( t(left+1) <= t(left) ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'BSPLVB - Fatal error!' write ( *, '(a)' ) ' It is required that T(LEFT) < T(LEFT+1).' write ( *, '(a,i6)' ) ' But LEFT = ', left write ( *, '(a,g14.6)' ) ' T(LEFT) = ', t(left) write ( *, '(a,g14.6)' ) ' T(LEFT+1) = ', t(left+1) stop end if do deltar(j) = t(left+j) - x deltal(j) = x - t(left+1-j) saved = 0.0D+00 do i = 1, j term = biatx(i) / ( deltar(i) + deltal(j+1-i) ) biatx(i) = saved + deltar(i) * term saved = deltal(j+1-i) * term end do biatx(j+1) = saved j = j + 1 if ( jhigh <= j ) then exit end if end do return end diff --git a/pppack/bsplvd.f90 b/pppack/bsplvd.f90 index 82d203c..e4b831f 100644 --- a/pppack/bsplvd.f90 +++ b/pppack/bsplvd.f90 @@ -1,189 +1,189 @@ !> !> @file bsplvd.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine bsplvd ( t, k, x, left, a, dbiatx, nderiv ) !************************************************************************* ! !! BSPLVD calculates the nonvanishing B-splines and derivatives at X. ! ! Discussion: ! ! Values at X of all the relevant B-splines of order K, K-1,..., K+1-NDERIV ! are generated via BSPLVB and stored temporarily in DBIATX. ! ! Then, the B-spline coefficients of the required derivatives ! of the B-splines of interest are generated by differencing, ! each from the preceding one of lower order, and combined with ! the values of B-splines of corresponding order in DBIATX ! to produce the desired values. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) T(LEFT+K), the knot sequence. It is assumed that ! T(LEFT) < T(LEFT+1). Also, the output is correct only if ! T(LEFT) <= X <= T(LEFT+1) . ! ! Input, integer K, the order of the B-splines to be evaluated. ! ! Input, real ( kind = 8 ) X, the point at which these values are sought. ! ! Input, integer LEFT, indicates the left endpoint of the interval of ! interest. The K B-splines whose support contains the interval ! (T(LEFT), T(LEFT+1)) are to be considered. ! ! Workspace, real ( kind = 8 ) A(K,K). ! ! Output, real ( kind = 8 ) DBIATX(K,NDERIV). DBIATX(I,M) contains ! the value of the (M-1)st derivative of the (LEFT-K+I)-th B-spline ! of order K for knot sequence T, I=M,...,K, M=1,...,NDERIV. ! ! Input, integer NDERIV, indicates that values of ! B-splines and their derivatives up to but not ! including the NDERIV-th are asked for. ! implicit none integer k integer left integer nderiv real ( kind = 8 ) a(k,k) real ( kind = 8 ) dbiatx(k,nderiv) real ( kind = 8 ) factor real ( kind = 8 ) fkp1mm integer i integer ideriv integer il integer j integer jlow integer jp1mid integer ldummy integer m integer mhigh real ( kind = 8 ) sum1 real ( kind = 8 ) t(left+k) real ( kind = 8 ) x mhigh = max ( min ( nderiv, k ), 1 ) ! ! MHIGH is usually equal to nderiv. ! call bsplvb ( t, k+1-mhigh, 1, x, left, dbiatx ) if ( mhigh == 1 ) then return end if ! ! The first column of DBIATX always contains the B-spline values ! for the current order. These are stored in column K+1-current ! order before BSPLVB is called to put values for the next ! higher order on top of it. ! ideriv = mhigh do m = 2, mhigh jp1mid = 1 do j = ideriv, k dbiatx(j,ideriv) = dbiatx(jp1mid,1) jp1mid = jp1mid+1 end do ideriv = ideriv-1 call bsplvb(t,k+1-ideriv,2,x,left,dbiatx) end do ! ! At this point, b(left-k+i, k+1-j)(x) is in dbiatx(i,j) for ! i=j,...,k and j=1,...,mhigh ('=' nderiv). in particular, the ! first column of dbiatx is already in final form. to obtain cor- ! ??? LOST A LINE ??? ! rate their b-repr. by differencing, then evaluate at x. ! jlow = 1 do i = 1, k do j = jlow,k a(j,i) = 0.0D+00 end do jlow = i a(i,i) = 1.0D+00 end do ! ! At this point, a(.,j) contains the b-coefficients for the J-th of the ! k b-splines of interest here. ! do m = 2, mhigh fkp1mm = real ( k + 1 - m, kind = 8 ) il = left i = k ! ! For j=1,...,k, construct b-coefficients of (m-1)st derivative of ! b-splines from those for preceding derivative by differencing ! and store again in a(.,j) . The fact that a(i,j)=0 for ! i < j is used. ! do ldummy = 1, k+1-m factor = fkp1mm/(t(il+k+1-m)-t(il)) ! ! The assumption that t(left) < t(left+1) makes denominator ! in factor nonzero. ! do j = 1, i a(i,j) = (a(i,j)-a(i-1,j))*factor end do il = il-1 i = i-1 end do ! ! For i=1,...,k, combine b-coefficients a(.,i) with B-spline values ! stored in dbiatx(.,m) to get value of (m-1)st derivative of ! i-th b-spline (of interest here) at x , and store in ! dbiatx(i,m). storage of this value over the value of a b-spline ! of order m there is safe since the remaining b-spline derivat- ! ives of the same order do not use this value due to the fact ! that a(j,i)=0 for j < i. ! do i = 1, k sum1 = 0.0D+00 jlow = max(i,m) do j = jlow,k sum1 = sum1 + a(j,i) * dbiatx(j,m) end do dbiatx(i,m) = sum1 end do end do return end diff --git a/pppack/bspp2d.f90 b/pppack/bspp2d.f90 index 59db260..e50c5da 100644 --- a/pppack/bspp2d.f90 +++ b/pppack/bspp2d.f90 @@ -1,203 +1,203 @@ !> !> @file bspp2d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine bspp2d ( t, bcoef, n, k, m, scrtch, break, coef, l ) !************************************************************************* ! !! BSPP2D converts from B-spline to piecewise polynomial representation. ! ! Discussion: ! ! The B-spline representation ! ! T, BCOEF(.,J), N, K ! ! is converted to its piecewise polynomial representation ! ! BREAK, COEF(J,.,.), L, K, J=1, ..., M. ! ! This is an extended version of BSPLPP for use with tensor products. ! ! For each breakpoint interval, the K relevant B-spline ! coefficients of the spline are found and then differenced ! repeatedly to get the B-spline coefficients of all the ! derivatives of the spline on that interval. ! ! The spline and its first K-1 derivatives are then evaluated ! at the left endpoint of that interval, using BSPLVB ! repeatedly to obtain the values of all B-splines of the ! appropriate order at that point. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) T(N+K), the knot sequence. ! ! Input, real ( kind = 8 ) BCOEF(N,M). For each J, B(*,J) is the ! B-spline coefficient sequence, of length N. ! ! Input, integer N, the length of BCOEF. ! ! Input, integer K, the order of the spline. ! ! Input, integer M, the number of data sets. ! ! Work array, real ( kind = 8 ) SCRTCH(K,K,M). ! ! Output, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence ! containing the distinct points in the sequence T(K),...,T(N+1) ! ! Output, real ( kind = 8 ) COEF(M,K,N), with COEF(MM,I,J) = the (I-1)st ! derivative of the MM-th spline at BREAK(J) from the right, MM=1, ..., M. ! ! Output, integer L, the number of polynomial pieces which make up the ! spline in the interval (T(K), T(N+1)). ! implicit none integer k integer m integer n real ( kind = 8 ) bcoef(n,m) real ( kind = 8 ) biatx(k) real ( kind = 8 ) break(*) real ( kind = 8 ) coef(m,k,*) real ( kind = 8 ) diff real ( kind = 8 ) fkmj integer i integer j integer jp1 integer kmj integer l integer left integer lsofar integer mm real ( kind = 8 ) scrtch(k,k,m) real ( kind = 8 ) sum1 real ( kind = 8 ) t(n+k) lsofar = 0 break(1) = t(k) do left = k, n ! ! Find the next nontrivial knot interval. ! if ( t(left+1) == t(left) ) then cycle end if lsofar = lsofar+1 break(lsofar+1) = t(left+1) if ( k <= 1 ) then do mm = 1, m coef(mm,1,lsofar) = bcoef(left,mm) end do cycle end if ! ! Store the K b-spline coefficients relevant to current knot interval ! in scrtch(.,1) . ! do i = 1, k do mm = 1, m scrtch(i,1,mm) = bcoef(left-k+i,mm) end do end do ! ! for j=1,...,k-1, compute the k-j b-spline coefficients relevant to ! current knot interval for the j-th derivative by differencing ! those for the (j-1)st derivative, and store in scrtch(.,j+1) . ! do jp1 = 2, k j = jp1-1 kmj = k-j fkmj = real ( k - j, kind = 8 ) do i = 1, k-j diff = (t(left+i)-t(left+i-kmj))/fkmj if ( 0.0D+00 < diff ) then do mm = 1, m scrtch(i,jp1,mm)=(scrtch(i+1,j,mm)-scrtch(i,j,mm))/diff end do end if end do end do ! ! For j=0, ..., k-1, find the values at T(left) of the j+1 ! b-splines of order j+1 whose support contains the current ! knot interval from those of order j (in biatx ), then comb- ! ine with the b-spline coefficients (in scrtch(.,k-j) ) found earlier ! to compute the (k-j-1)st derivative at t(left) of the given ! spline. ! call bsplvb ( t, 1, 1, t(left), left, biatx ) do mm = 1, m coef(mm,k,lsofar) = scrtch(1,k,mm) end do do jp1 = 2, k call bsplvb (t,jp1,2,t(left),left,biatx) kmj = k+1-jp1 do mm = 1, m sum1 = 0.0D+00 do i = 1, jp1 sum1 = sum1 + biatx(i) * scrtch(i,kmj,mm) end do coef(mm,kmj,lsofar) = sum1 end do end do end do l = lsofar return end diff --git a/pppack/bvalue.f90 b/pppack/bvalue.f90 index ead864f..04af9b5 100644 --- a/pppack/bvalue.f90 +++ b/pppack/bvalue.f90 @@ -1,226 +1,226 @@ !> !> @file bvalue.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> function bvalue ( t, bcoef, n, k, x, jderiv ) !************************************************************************* ! !! BVALUE evaluates a derivative of a spline from its B-spline representation. ! ! Discussion: ! ! The spline is taken to be continuous from the right. ! ! The nontrivial knot interval (T(I),T(I+1)) containing X is ! located with the aid of INTERV. The K B-spline coefficients ! of F relevant for this interval are then obtained from BCOEF, ! or are taken to be zero if not explicitly available, and are ! then differenced JDERIV times to obtain the B-spline ! coefficients of (D**JDERIV)F relevant for that interval. ! ! Precisely, with J = JDERIV, we have from X.(12) of the text that: ! ! (D**J)F = sum ( BCOEF(.,J)*B(.,K-J,T) ) ! ! where ! / BCOEF(.), , J == 0 ! / ! BCOEF(.,J) = / BCOEF(.,J-1) - BCOEF(.-1,J-1) ! / -----------------------------, 0 < J ! / (T(.+K-J) - T(.))/(K-J) ! ! Then, we use repeatedly the fact that ! ! sum ( A(.)*B(.,M,T)(X) ) = sum ( A(.,X)*B(.,M-1,T)(X) ) ! ! with ! (X - T(.))*A(.) + (T(.+M-1) - X)*A(.-1) ! A(.,X) = --------------------------------------- ! (X - T(.)) + (T(.+M-1) - X) ! ! to write (D**J)F(X) eventually as a linear combination of ! B-splines of order 1, and the coefficient for B(I,1,T)(X) ! must then be the desired number (D**J)F(X). ! See x.(17)-(19) of text. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) T(N+K), the knot sequence. T is assumed ! to be nondecreasing. ! ! Input, real ( kind = 8 ) BCOEF(N), B-spline coefficient sequence. ! ! Input, integer N, the length of BCOEF. ! ! Input, integer K, the order of the spline. ! ! Input, real ( kind = 8 ) X, the point at which to evaluate. ! ! Input, integer JDERIV, the order of the derivative to ! be evaluated. JDERIV is assumed to be zero or positive. ! ! Output, real ( kind = 8 ) BVALUE, the value of the (JDERIV)-th ! derivative of the spline at X. ! implicit none integer k integer n real ( kind = 8 ) aj(k) real ( kind = 8 ) bcoef(n) real ( kind = 8 ) bvalue real ( kind = 8 ) dl(k) real ( kind = 8 ) dr(k) integer i integer ilo integer j integer jc integer jcmax integer jcmin integer jderiv integer jj integer mflag real ( kind = 8 ) t(n+k) real ( kind = 8 ) x bvalue = 0.0D+00 if ( k <= jderiv ) then return end if ! ! Find I so that 1 <= i < n+k and t(i) < t(i+1) and t(i) <= x < t(i+1). ! ! If no such i can be found, X lies ! outside the support of the spline F and bvalue=0. ! (the asymmetry in this choice of i makes F rightcontinuous) ! call interv ( t, n+k, x, i, mflag ) if ( mflag /= 0 ) then return end if ! ! If K=1 (and jderiv = 0), bvalue = bcoef(i). ! if ( k <= 1 ) then bvalue = bcoef(i) return end if ! ! Store the K b-spline coefficients relevant for the knot interval ! (T(i),T(i+1)) in aj(1),...,aj(k) and compute dl(j)=x-t(i+1-j), ! dr(j)=T(i+j)-x, j=1,...,k-1 . set any of the aj not obtainable ! from input to zero. Set any T's not obtainable equal to T(1) or ! to T(n+k) appropriately. ! jcmin = 1 if ( k <= i ) then do j = 1, k-1 dl(j) = x-t(i+1-j) end do else jcmin = 1-(i-k) do j = 1, i dl(j) = x-t(i+1-j) end do do j = i, k-1 aj(k-j) = 0.0D+00 dl(j) = dl(i) end do end if jcmax = k if ( i <= n ) then go to 90 end if jcmax = k + n - i do j = 1, k+n-i dr(j) = t(i+j)-x end do do j = k+n-i, k-1 aj(j+1) = 0.0D+00 dr(j) = dr(k+n-i) end do go to 110 90 continue do j = 1, k-1 dr(j) = t(i+j)-x end do 110 continue do jc = jcmin, jcmax aj(jc) = bcoef(i-k+jc) end do ! ! Difference the coefficients JDERIV times. ! do j = 1, jderiv ilo = k-j do jj = 1, k-j aj(jj) = ((aj(jj+1)-aj(jj))/(dl(ilo)+dr(jj))) * real ( k - j, kind = 8 ) ilo = ilo-1 end do end do ! ! Compute value at X in (t(i),t(i+1)) of jderiv-th derivative, ! given its relevant b-spline coefficients in aj(1),...,aj(k-jderiv). ! do j = jderiv+1, k-1 ilo = k-j do jj = 1, k-j aj(jj) = ( aj(jj+1) * dl(ilo) + aj(jj) * dr(jj) ) & / ( dl(ilo) + dr(jj) ) ilo = ilo-1 end do end do bvalue = aj(1) return end diff --git a/pppack/chol1d.f90 b/pppack/chol1d.f90 index 8a0e19b..4fdfbb4 100644 --- a/pppack/chol1d.f90 +++ b/pppack/chol1d.f90 @@ -1,146 +1,146 @@ !> !> @file chol1d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine chol1d ( p, v, qty, npoint, u, qu ) !************************************************************************* ! !! CHOL1D sets up and solves linear systems needed by SMOOTH. ! ! Discussion: ! ! This routine constructs the upper three diagonals of ! ! V(I,J), I=2 to NPOINT-1, J=1,3, ! ! of the matrix ! ! 6*(1-P)*Q-transpose*(D**2)*Q + P*R. ! ! It then computes its L*L' decomposition and stores it also ! in V, then applies forward and backsubstitution to the right side ! ! Q'*Y ! ! in QTY to obtain the solution in U. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) P, ? ! ! ?put, real ( kind = 8 ) V(NPOINT,7), ? ! ! ?put, real ( kind = 8 ) QTY(NPOINT), ? ! ! Input, integer NPOINT, ? ! ! Output, real ( kind = 8 ) U(NPOINT), the solution. ! ! Output, real ( kind = 8 ) QU(NPOINT), the value of Q * U. ! implicit none integer npoint integer i real ( kind = 8 ) p real ( kind = 8 ) qty(npoint) real ( kind = 8 ) qu(npoint) real ( kind = 8 ) u(npoint) real ( kind = 8 ) v(npoint,7) real ( kind = 8 ) prev real ( kind = 8 ) ratio real ( kind = 8 ) six1mp real ( kind = 8 ) twop ! ! Construct 6*(1-p)*q'*(d**2)*q + p*r ! six1mp = 6.0D+00 * ( 1.0D+00 - p ) twop = 2.0D+00 * p do i = 2, npoint-1 v(i,1) = six1mp * v(i,5)+twop*(v(i-1,4)+v(i,4)) v(i,2) = six1mp * v(i,6)+p*v(i,4) v(i,3) = six1mp * v(i,7) end do if ( npoint < 4 ) then u(1) = 0.0D+00 u(2) = qty(2) / v(2,1) u(3) = 0.0D+00 ! ! Factorization ! else do i = 2, npoint-2 ratio = v(i,2)/v(i,1) v(i+1,1) = v(i+1,1)-ratio*v(i,2) v(i+1,2) = v(i+1,2)-ratio*v(i,3) v(i,2) = ratio ratio = v(i,3)/v(i,1) v(i+2,1) = v(i+2,1)-ratio*v(i,3) v(i,3) = ratio end do ! ! Forward substitution ! u(1) = 0.0D+00 v(1,3) = 0.0D+00 u(2) = qty(2) do i = 2, npoint-2 u(i+1) = qty(i+1)-v(i,2)*u(i)-v(i-1,3)*u(i-1) end do ! ! Back substitution. ! u(npoint) = 0.0D+00 u(npoint-1) = u(npoint-1) / v(npoint-1,1) do i = npoint-2, 2, -1 u(i) = u(i)/v(i,1)-u(i+1)*v(i,2)-u(i+2)*v(i,3) end do end if ! ! Construct Q*U. ! prev = 0.0D+00 do i = 2, npoint qu(i) = (u(i)-u(i-1))/v(i-1,4) qu(i-1) = qu(i)-prev prev = qu(i) end do qu(npoint) = -qu(npoint) return end diff --git a/pppack/colloc.f90 b/pppack/colloc.f90 index df60a0e..26b0031 100644 --- a/pppack/colloc.f90 +++ b/pppack/colloc.f90 @@ -1,275 +1,275 @@ !> !> @file colloc.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine colloc ( aleft, aright, lbegin, iorder, ntimes, addbrk, & relerr ) !************************************************************************* ! !! COLLOC solves an ordinary differential equation by collocation. ! ! Method: ! ! The M-th order ordinary differential equation with M side ! conditions, to be specified in subroutine DIFEQU, is solved ! approximately by collocation. ! ! The approximation F to the solution G is piecewise polynomial of order ! k+m with L pieces and M-1 continuous derivatives. F is determined by ! the requirement that it satisfy the differential equation at K points ! per interval (to be specified in COLPNT ) and the M side conditions. ! ! This usually nonlinear system of equations for f is solved by ! Newton's method. the resulting linear system for the b-coefficients of an ! iterate is constructed appropriately in eqblok and then solved ! in slvblk, a program designed to solve almost block ! diagonal linear systems efficiently. ! ! There is an opportunity to attempt improvement of the breakpoint ! sequence (both in number and location) through use of NEWNOT. ! ! Printed output consists of the pp-representation of the approximate ! solution, and of the error at selected points. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) ALEFT, ARIGHT, the endpoints of the interval. ! ! Input, integer LBEGIN, the initial number of polynomial pieces ! in the approximation. A uniform breakpoint sequence will be chosen. ! ! Input, integer IORDER, the order of the polynomial pieces to be ! used in the approximation ! ! Input, integer NTIMES, the number of passes to be made through NEWNOT. ! ! addbrk the number (possibly fractional) of breaks to be added per ! pass through newnot. e.g., if addbrk=.33334, then a break- ! point will be added at every third pass through newnot. ! ! relerr a tolerance. Newton iteration is stopped if the difference ! between the b-coefficients of two successive iterates is no more ! than relerr*(absolute largest b-coefficient). ! implicit none integer, parameter :: npiece = 100 integer, parameter :: ndim = 200 integer, parameter :: ncoef = 2000 integer, parameter :: lenblk = 2000 real ( kind = 8 ) a(ndim) real ( kind = 8 ) addbrk real ( kind = 8 ) aleft real ( kind = 8 ) amax real ( kind = 8 ) aright real ( kind = 8 ) asave(ndim) real ( kind = 8 ) b(ndim) real ( kind = 8 ) bloks(lenblk) real ( kind = 8 ) break real ( kind = 8 ) coef real ( kind = 8 ) dx real ( kind = 8 ) err integer i integer iflag integer ii integer integs(3,npiece) integer iorder integer iside integer itemps(ndim) integer iter integer itermx integer j integer k integer kpm integer l integer lbegin integer lnew integer m integer n integer nbloks integer nt integer ntimes real ( kind = 8 ) relerr real ( kind = 8 ) rho real ( kind = 8 ) t(ndim) real ( kind = 8 ) templ(lenblk) real ( kind = 8 ) temps(ndim) real ( kind = 8 ) xside equivalence (bloks,templ) common /approx/ break(npiece),coef(ncoef),l,kpm common /side/ m,iside,xside(10) common /other/ itermx,k,rho(19) kpm = iorder if ( ncoef < lbegin * kpm ) then go to 120 end if ! ! Set the various parameters concerning the particular dif.equ. ! including a first approximation in case the de is to be solved by ! iteration ( 0 < itermx ). ! call difequ ( 1, temps(1), temps ) ! ! Obtain the K collocation points for the standard interval. ! k = kpm-m call colpnt(k,rho) ! ! The following five statements could be replaced by a read in or- ! der to obtain a specific (nonuniform) spacing of the breakpnts. ! dx = (aright-aleft) / real ( lbegin, kind = 8 ) temps(1) = aleft do i = 2, lbegin temps(i) = temps(i-1)+dx end do temps(lbegin+1) = aright ! ! Generate the required knots t(1),...,t(n+kpm). ! call knots ( temps, lbegin, kpm, t, n ) nt = 1 ! ! Generate the almost block diagonal coefficient matrix bloks and ! right side b from collocation equations and side conditions. ! then solve via slvblk , obtaining the b-representation of the ! approximation in T, A, N, KPM. ! 20 continue call eqblok ( t, n, kpm, temps, a, bloks, lenblk, integs, nbloks, b ) call slvblk ( bloks, integs, nbloks, b, itemps, a, iflag ) iter = 1 if ( itermx <= 1 ) then go to 60 end if ! ! Save b-spline coefficients of current approx. in asave , then get new ! approx. and compare with old. if coefficients are more than relerr ! apart (relatively) or if number of iterations is less than itermx , ! continue iterating. ! 30 continue call bsplpp(t,a,n,kpm,templ,break,coef,l) do i = 1, n asave(i) = a(i) end do call eqblok ( t, n, kpm, temps, a, bloks, lenblk, integs, nbloks, b ) call slvblk(bloks,integs,nbloks,b,itemps,a,iflag) err = 0.0D+00 amax = 0.0D+00 do i = 1, n amax = max ( amax, abs ( a(i) ) ) err = max ( err, abs ( a(i)-asave(i) ) ) end do if ( err <= relerr*amax ) then go to 60 end if iter = iter + 1 if ( iter < itermx ) then go to 30 end if ! ! Iteration (if any) completed. print out approx. based on current ! breakpoint sequence, then try to improve the sequence. ! 60 continue write(*,70)kpm,l,n,(break(i),i=2,l) 70 format (' approximation from a space of splines of order',i3, & ' on ',i3,' intervals,'/' of dimension',i4,'. breakpoints -'/ & (5e20.10)) if ( 0 < itermx ) then write(*,*)' ' write(*,*)'Results on interation ',iter end if call bsplpp(t,a,n,kpm,templ,break,coef,l) write ( *, * ) ' ' write ( *, * ) 'The piecewise polynomial representation of the approximation:' write ( *, * ) ' ' do i = 1, l ii = ( i - 1 ) * kpm write(*,'(f9.3,e13.6,10e11.3)')break(i),(coef(ii+j),j=1,kpm) end do ! ! The following call is provided here for possible further analysis ! of the approximation specific to the problem being solved. ! it is, of course, easily omitted. ! call difequ ( 4, temps(1), temps ) if ( ntimes < nt ) then return end if ! ! From the pp-rep. of the current approx., obtain in NEWNOT a new ! (and possibly better) sequence of breakpoints, adding (on the ! average) ADDBRK breakpoints per pass through NEWNOT. ! lnew = lbegin + int ( real ( nt, kind = 8 ) * addbrk ) if ( ncoef < lnew * kpm ) then go to 120 end if call newnot(break,coef,l,kpm,temps,lnew,templ) call knots ( temps, lnew, kpm, t, n ) nt = nt+1 go to 20 120 continue write(*,*)' ' write(*,*)'COLLOC - Fatal error!' write(*,*)' The assigned dimension for COEF is ',ncoef write(*,*)' but this is too small.' stop end diff --git a/pppack/colpnt.f90 b/pppack/colpnt.f90 index 8bfb5ad..37ef621 100644 --- a/pppack/colpnt.f90 +++ b/pppack/colpnt.f90 @@ -1,117 +1,117 @@ !> !> @file colpnt.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine colpnt ( k, rho ) !************************************************************************* ! !! COLPNT supplies collocation points. ! ! Discussion: ! ! The collocation points are for the standard interval (-1,1) as the ! zeros of the Legendre polynomial of degree K, provided K <= 8. ! ! Otherwise, uniformly spaced points are given. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, integer K, the number of collocation points desired. ! ! Output, real ( kind = 8 ) RHO(K), the collocation points. ! implicit none integer k integer j real ( kind = 8 ) rho(k) if ( k == 1 ) then rho(1) = 0.0D+00 else if ( k == 2 ) then rho(1) = -0.577350269189626D+00 rho(2) = 0.577350269189626D+00 else if ( k == 3 ) then rho(1) = -0.774596669241483D+00 rho(2) = 0.0 rho(3) = 0.774596669241483D+00 else if ( k == 4 ) then rho(1) = -0.861136311594053D+00 rho(2) = -0.339981043584856D+00 rho(3) = 0.339981043584856D+00 rho(4) = 0.861136311594053D+00 else if ( k == 5 ) then rho(1) = -0.906179845938664D+00 rho(2) = -0.538469310105683D+00 rho(3) = 0.0D+00 rho(4) = 0.538469310105683D+00 rho(5) = 0.906179845938664D+00 else if ( k == 6 ) then rho(1) = -0.932469514203152D+00 rho(2) = -0.661209386466265D+00 rho(3) = -0.238619186083197D+00 rho(4) = 0.238619186083197D+00 rho(5) = 0.661209386466265D+00 rho(6) = 0.932469514203152D+00 else if ( k == 7 ) then rho(5) = 0.405845151377397D+00 rho(3) = -rho(5) rho(6) = 0.741531185599394D+00 rho(2) = -rho(6) rho(7) = 0.949107912342759D+00 rho(1) = -rho(7) rho(4) = 0.0 else if ( k == 8 ) then rho(5) = 0.183434642495650D+00 rho(4) = -rho(5) rho(6) = 0.525532409916329D+00 rho(3) = -rho(6) rho(7) = 0.796666477413627D+00 rho(2) = -rho(7) rho(8) = 0.960289856497536D+00 rho(1) = -rho(8) else write ( *, * )' ' write ( *, * )'ColPnt - Warning!' write ( *, * )' Equispaced collocation points will be used,' write ( *, * )' because K =',k,' which is greater than 8.' do j = 1, k rho(j) = -1.0D+00 + 2.0D+00 * real ( j - 1, kind = 8 ) & / real ( k - 1, kind = 8 ) end do end if return end diff --git a/pppack/cspint.f90 b/pppack/cspint.f90 index fb201fb..0de7b13 100644 --- a/pppack/cspint.f90 +++ b/pppack/cspint.f90 @@ -1,214 +1,214 @@ !> !> @file cspint.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine cspint ( ftab, xtab, ntab, a, b, y, e, work, result, ind ) !************************************************************************* ! !! CSPINT estimates an integral using a spline interpolant. ! ! Discussion: ! ! CSPINT estimates the integral from A to B of F(X) by ! computing the natural spline S(X) that interpolates to F ! and integrating that exactly. ! ! F is supplied to the routine in the form of tabulated data. ! ! Other output from the program includes the definite integral ! from X(1) to X(I) of the spline, and the coefficients ! necessary for the user to evaluate the spline outside of ! this routine. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) FTAB(NTAB), contains the tabulated values ! of the functions, FTAB(I)=F(XTAB(I)). ! ! Input, real ( kind = 8 ) XTAB(NTAB), contains the points at ! which the function was evaluated. The XTAB's must be ! distinct and in ascending order. ! ! Input, integer NTAB, the number of entries in FTAB ! and XTAB. NTAB must be at least 3. ! ! Input, real ( kind = 8 ) A, lower limit of integration. ! ! Input, real ( kind = 8 ) B, upper limit of integration. ! ! Output, real ( kind = 8 ) Y(3,NTAB), will contain the coefficients ! of the interpolating natural spline over each subinterval. ! ! For XTAB(I) < = X <= XTAB(I+1), ! ! S(X) = FTAB(I) + Y(1,I)*(X-XTAB(I)) + Y(2,I)*(X-XTAB(I))**2 ! + Y(3,I)*(X-XTAB(I))**3 ! ! Output, real ( kind = 8 ) E(NTAB), E(I)=the definite integral ! from XTAB(1) to XTAB(I) of S(X). ! ! Workspace, real ( kind = 8 ) WORK(NTAB). ! ! Output, real ( kind = 8 ) RESULT, the estimated value of the integral. ! ! Output, integer IND, error flag. ! IND=0 if NTAB < 3 or the XTAB's are not distinct and in ! ascending order. ! IND=1 otherwise. ! implicit none integer ntab real ( kind = 8 ) a real ( kind = 8 ) b real ( kind = 8 ) e(ntab) real ( kind = 8 ) ftab(ntab) integer i integer ind integer j real ( kind = 8 ) r real ( kind = 8 ) result real ( kind = 8 ) s real ( kind = 8 ) term real ( kind = 8 ) u real ( kind = 8 ) work(ntab) real ( kind = 8 ) xtab(ntab) real ( kind = 8 ) y(3,ntab) ind = 0 if ( ntab < 3 ) then write(*,*)' ' write(*,*)'CSPINT - Fatal error!' write(*,*)' NTAB must be at least 3,' write(*,*)' but your value was NTAB = ',ntab stop end if do i = 1, ntab-1 if ( xtab(i+1) <= xtab(i) ) then write(*,*)' ' write(*,*)'CSPINT - Fatal error!' write(*,*)' Interval ',i,' is illegal.' write(*,*)' XTAB(I) =',xtab(i) write(*,*)' XTAB(I+1)=',xtab(i+1) stop end if end do s = 0.0D+00 do i = 1, ntab-1 r = ( ftab(i+1) - ftab(i) ) / ( xtab(i+1) - xtab(i) ) y(2,i) = r - s s = r end do result = 0.0D+00 s = 0.0D+00 r = 0.0D+00 y(2,1) = 0.0D+00 y(2,ntab) = 0.0D+00 do i = 2, ntab-1 y(2,i) = y(2,i) + r * y(2,i-1) work(i) = 2.0D+00 * ( xtab(i-1) - xtab(i+1) ) - r * s s = xtab(i+1) - xtab(i) r = s / work(i) end do do j = 2, ntab-1 i = ntab+1-j y(2,i) = ((xtab(i+1)-xtab(i))*y(2,i+1)-y(2,i))/work(i) end do do i = 1, ntab-1 s = xtab(i+1) - xtab(i) r = y(2,i+1) - y(2,i) y(3,i) = r / s y(2,i) = 3.0D+00 * y(2,i) y(1,i) = ( ftab(i+1) - ftab(i) ) / s - ( y(2,i) + r ) * s end do e(1) = 0.0D+00 do i = 1, ntab-1 s = xtab(i+1) - xtab(i) term = ( ( ( y(3,i) * 0.25D+00 * s & + y(2,i) / 3.0D+00 ) * s & + y(1,i) * 0.5D+00 ) * s + ftab(i) ) * s e(i+1) = e(i) + term end do ! ! Determine where the endpoints A and B lie in the mesh of XTAB's. ! r = a u = 1.0D+00 do j = 1, 2 if ( r <= xtab(1) ) then result = result-u*((r-xtab(1))*y(1,1)* 0.5D+00 + ftab(1))*(r-xtab(1)) else if ( xtab(ntab) <= r ) then result = result-u*(e(ntab)+(r-xtab(ntab))*(ftab(ntab) + 0.5D+00 * & (ftab(ntab-1)+(xtab(ntab)-xtab(ntab-1))*y(1,ntab-1))*(r- & xtab(ntab)))) else do i = 1, ntab-1 if ( r <= xtab(i+1) ) then r = r - xtab(i) result = result-u*(e(i)+(((y(3,i)*0.25D+00*r+y(2,i)/3.0D+00)*r & +y(1,i) * 0.5D+00 )*r+ftab(i))*r) go to 100 end if end do end if 100 continue u = -1.0D+00 r = b end do ind = 1 return end diff --git a/pppack/cubset.f90 b/pppack/cubset.f90 index 567b9de..9256b62 100644 --- a/pppack/cubset.f90 +++ b/pppack/cubset.f90 @@ -1,106 +1,106 @@ !> !> @file cubset.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine cubset ( tau, c, n, ibcbeg, ibcend ) !******************************************************************************* ! !! CUBSET sets up a simple cubic spline interpolant. ! ! WARNING: IBCBEG and IBCEND are not set up yet. ! ! A tridiagonal linear system for the unknown slopes S(I) of ! F at TAU(I), I=1,..., N, is generated and then solved by Gauss ! elimination, with S(I) ending up in C(2,I), for all I. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) TAU(N), the abscissas or X values of ! the data points. The entries of TAU are assumed to be ! strictly increasing. ! ! Input, integer N, the number of data points. N is ! assumed to be at least 2. ! ! Input/output, real ( kind = 8 ) C(4,N). ! On input, if IBCBEG or IBCBEG is 1 or 2, then C(2,1) ! or C(2,N) should have been set to the desired derivative ! values, as described further under IBCBEG and IBCEND. ! On output, C contains the polynomial coefficients of ! the cubic interpolating spline with interior knots ! TAU(2) through TAU(N-1). ! In the interval interval (TAU(I), TAU(I+1)), the spline ! F is given by F(X) = ! C(1,I) + ! C(2,I) * ( X - TAU(I) ) + ! C(3,I) * ( X - TAU(I) )**2 + ! C(4,I) * ( X - TAU(I) )**3 ! ! IBCBEG, ! IBCEND Input, integer IBCBEG, IBCEND, boundary condition ! indicators. ! ! IBCBEG=0 means no boundary condition at TAU(1) is given. ! In this case, the "not-a-knot condition" is used. That ! is, the jump in the third derivative across TAU(2) is ! forced to zero. Thus the first and the second cubic ! polynomial pieces are made to coincide. ! ! IBCBEG=1 means that the slope at TAU(1) is to equal the ! input value C(2,1). ! ! IBCBEG=2 means that the second derivative at TAU(1) is ! to equal C(2,1). ! ! IBCEND=0, 1, or 2 has analogous meaning concerning the ! boundary condition at TAU(N), with the additional ! information taken from C(2,N). ! implicit none integer n real ( kind = 8 ) c(4,n) integer ibcbeg integer ibcend real ( kind = 8 ) tau(n) ! ! Solve for the slopes at internal nodes. ! call cubslo ( tau, c, n ) ! ! Now compute the quadratic and cubic coefficients used in the ! piecewise polynomial representation. ! call spline_hermite_set ( n, tau, c ) return end diff --git a/pppack/cubslo.f90 b/pppack/cubslo.f90 index 659792d..ffe3736 100644 --- a/pppack/cubslo.f90 +++ b/pppack/cubslo.f90 @@ -1,111 +1,111 @@ !> !> @file cubslo.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine cubslo ( tau, c, n ) !******************************************************************************* ! !! CUBSLO solves for slopes defining a cubic spline. ! ! Discussion: ! ! A tridiagonal linear system for the unknown slopes S(I) of ! F at TAU(I), I=1,..., N, is generated and then solved by Gauss ! elimination, with S(I) ending up in C(2,I), for all I. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) TAU(N), the abscissas or X values of ! the data points. The entries of TAU are assumed to be ! strictly increasing. ! ! Input, integer N, the number of data points. N is ! assumed to be at least 2. ! ! Input/output, real ( kind = 8 ) C(4,N). ! On input, C(1,I) contains the function value at TAU(I), ! for I = 1 to N. ! C(2,1) contains the slope at TAU(1) and C(2,N) contains ! the slope at TAU(N). ! On output, the intermediate slopes at TAU(I) have been ! stored in C(2,I), for I = 2 to N-1. ! implicit none integer n real ( kind = 8 ) c(4,n) real ( kind = 8 ) g integer i integer ibcbeg integer ibcend real ( kind = 8 ) tau(n) ! ! Set up the right hand side of the linear system. ! C(2,1) and C(2,N) are presumably already set. ! do i = 2, n-1 c(2,i) = 3.0D+00 * ( & ( tau(i) - tau(i-1) ) * ( c(1,i+1) - c(1,i) ) / ( tau(i+1) - tau(i) ) + & ( tau(i+1) - tau(i) ) * ( c(1,i) - c(1,i-1) ) / ( tau(i) - tau(i-1) ) ) end do ! ! Set the diagonal coefficients. ! c(4,1) = 1.0D+00 do i = 2, n-1 c(4,i) = 2.0D+00 * ( tau(i+1) - tau(i-1) ) end do c(4,n) = 1.0D+00 ! ! Set the off-diagonal coefficients. ! c(3,1) = 0.0D+00 do i = 2, n c(3,i) = tau(i) - tau(i-1) end do ! ! Forward elimination. ! do i = 2, n-1 g = -c(3,i+1) / c(4,i-1) c(4,i) = c(4,i) + g * c(3,i-1) c(2,i) = c(2,i) + g * c(2,i-1) end do ! ! Back substitution for the interior slopes. ! do i = n-1, 2, -1 c(2,i) = ( c(2,i) - c(3,i) * c(2,i+1) ) / c(4,i) end do return end diff --git a/pppack/cubspl.f90 b/pppack/cubspl.f90 index ffbc652..3973860 100644 --- a/pppack/cubspl.f90 +++ b/pppack/cubspl.f90 @@ -1,282 +1,282 @@ !> !> @file cubspl.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine cubspl ( tau, c, n, ibcbeg, ibcend ) !******************************************************************************* ! !! CUBSPL defines an interpolatory cubic spline. ! ! Discussion: ! ! A tridiagonal linear system for the unknown slopes S(I) of ! F at TAU(I), I=1,..., N, is generated and then solved by Gauss ! elimination, with S(I) ending up in C(2,I), for all I. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) TAU(N), the abscissas or X values of ! the data points. The entries of TAU are assumed to be ! strictly increasing. ! ! Input, integer N, the number of data points. N is ! assumed to be at least 2. ! ! Input/output, real ( kind = 8 ) C(4,N). ! On input, if IBCBEG or IBCBEG is 1 or 2, then C(2,1) ! or C(2,N) should have been set to the desired derivative ! values, as described further under IBCBEG and IBCEND. ! On output, C contains the polynomial coefficients of ! the cubic interpolating spline with interior knots ! TAU(2) through TAU(N-1). ! ! In the interval interval (TAU(I), TAU(I+1)), the spline ! F is given by ! ! F(X) = ! C(1,I) + ! C(2,I) * H + ! C(3,I) * H**2 / 2 + ! C(4,I) * H**3 / 6. ! ! where H=X-TAU(I). The routine PPVALU may be used to ! evaluate F or its derivatives from TAU, C, L=N-1, ! and K=4. ! ! Input, integer IBCBEG, IBCEND, boundary condition indicators. ! ! IBCBEG=0 means no boundary condition at TAU(1) is given. ! In this case, the "not-a-knot condition" is used. That ! is, the jump in the third derivative across TAU(2) is ! forced to zero. Thus the first and the second cubic ! polynomial pieces are made to coincide. ! ! IBCBEG=1 means the slope at TAU(1) is to equal the ! input value C(2,1). ! ! IBCBEG=2 means the second derivative at TAU(1) is ! to equal C(2,1). ! ! IBCEND=0, 1, or 2 has analogous meaning concerning the ! boundary condition at TAU(N), with the additional ! information taken from C(2,N). ! implicit none integer n real ( kind = 8 ) c(4,n) real ( kind = 8 ) divdf1 real ( kind = 8 ) divdf3 real ( kind = 8 ) dtau real ( kind = 8 ) g integer i integer ibcbeg integer ibcend real ( kind = 8 ) tau(n) ! ! C(3,*) and C(4,*) are used initially for temporary storage. ! ! Store first differences of the TAU sequence in C(3,*). ! ! Store first divided difference of data in C(4,*). ! do i = 2, n c(3,i) = tau(i) - tau(i-1) end do do i = 2, n c(4,i) = ( c(1,i) - c(1,i-1) ) / ( tau(i) - tau(i-1) ) end do ! ! Construct the first equation from the boundary condition ! at the left endpoint, of the form: ! ! C(4,1)*S(1) + C(3,1)*S(2) = C(2,1) ! ! IBCBEG = 0: Not-a-knot ! if ( ibcbeg == 0 ) then if ( n <= 2 ) then c(4,1) = 1.0D+00 c(3,1) = 1.0D+00 c(2,1) = 2.0D+00 * c(4,2) go to 120 end if c(4,1) = c(3,3) c(3,1) = c(3,2) + c(3,3) c(2,1) = ( ( c(3,2) + 2.0D+00 * c(3,1) ) * c(4,2) * c(3,3) & + c(3,2)**2 * c(4,3) ) / c(3,1) ! ! IBCBEG = 1: derivative specified. ! else if ( ibcbeg == 1 ) then c(4,1) = 1.0D+00 c(3,1) = 0.0D+00 if ( n == 2 ) then go to 120 end if ! ! Second derivative prescribed at left end. ! else c(4,1) = 2.0D+00 c(3,1) = 1.0D+00 c(2,1) = 3.0D+00 * c(4,2) - c(3,2) / 2.0D+00 * c(2,1) if ( n == 2 ) then go to 120 end if end if ! ! If there are interior knots, generate the corresponding ! equations and carry out the forward pass of Gauss elimination, ! after which the I-th equation reads: ! ! C(4,I) * S(I) + C(3,I) * S(I+1) = C(2,I). ! do i = 2, n-1 g = -c(3,i+1) / c(4,i-1) c(2,i) = g * c(2,i-1) + 3.0D+00 * ( c(3,i) * c(4,i+1) + c(3,i+1) * c(4,i) ) c(4,i) = g * c(3,i-1) + 2.0D+00 * ( c(3,i) + c(3,i+1)) end do ! ! Construct the last equation from the second boundary condition, of ! the form ! ! -G * C(4,N-1) * S(N-1) + C(4,N) * S(N) = C(2,N) ! ! If slope is prescribed at right end, one can go directly to ! back-substitution, since the C array happens to be set up just ! right for it at this point. ! if ( ibcend == 1 ) then go to 160 end if if ( 1 < ibcend ) then go to 110 end if 90 continue ! ! Not-a-knot and 3 <= N, and either 3 < N or also not-a-knot ! at left end point. ! if ( n /= 3 .or. ibcbeg /= 0 ) then g = c(3,n-1) + c(3,n) c(2,n) = ( ( c(3,n) + 2.0D+00 * g ) * c(4,n) * c(3,n-1) + c(3,n)**2 & * ( c(1,n-1) - c(1,n-2) ) / c(3,n-1) ) / g g = - g / c(4,n-1) c(4,n) = c(3,n-1) c(4,n) = c(4,n) + g * c(3,n-1) c(2,n) = ( g * c(2,n-1) + c(2,n) ) / c(4,n) go to 160 end if ! ! N=3 and not-a-knot also at left. ! 100 continue c(2,n) = 2.0D+00 * c(4,n) c(4,n) = 1.0D+00 g = -1.0D+00 / c(4,n-1) c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1) c(2,n) = ( g * c(2,n-1) + c(2,n) ) / c(4,n) go to 160 ! ! IBCEND = 2: Second derivative prescribed at right endpoint. ! 110 continue c(2,n) = 3.0D+00 * c(4,n) + c(3,n) / 2.0D+00 * c(2,n) c(4,n) = 2.0D+00 g = -1.0D+00 / c(4,n-1) c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1) c(2,n) = ( g * c(2,n-1)+c(2,n))/c(4,n) go to 160 ! ! N = 2. ! 120 continue if ( ibcend == 2 ) then c(2,n) = 3.0D+00 * c(4,n) + c(3,n) / 2.0D+00 * c(2,n) c(4,n) = 2.0D+00 g = -1.0D+00 / c(4,n-1) c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1) c(2,n) = (g*c(2,n-1)+c(2,n)) / c(4,n) else if ( ibcend == 0 .and. ibcbeg /= 0 ) then c(2,n) = 2.0D+00 * c(4,n) c(4,n) = 1.0D+00 g = -1.0D+00 / c(4,n-1) c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1) c(2,n) = (g*c(2,n-1)+c(2,n))/c(4,n) else if ( ibcend == 0 .and. ibcbeg == 0 ) then c(2,n) = c(4,n) end if ! ! Back solve the upper triangular system ! C(4,I) * S(I) + C(3,I) * S(I+1) = B(I) ! for the slopes C(2,I), given that S(N) is already known. ! 160 continue do i = n-1, 1, -1 c(2,i) = ( c(2,i) - c(3,i) * c(2,i+1) ) / c(4,i) end do ! ! Generate cubic coefficients in each interval, that is, the ! derivatives at its left endpoint, from value and slope at its ! endpoints. ! do i = 2, n dtau = c(3,i) divdf1 = ( c(1,i) - c(1,i-1) ) / dtau divdf3 = c(2,i-1) + c(2,i) - 2.0D+00 * divdf1 c(3,i-1) = 2.0D+00 * ( divdf1 - c(2,i-1) - divdf3 ) / dtau c(4,i-1) = 6.0D+00 * divdf3 / dtau**2 end do return end diff --git a/pppack/cwidth.f90 b/pppack/cwidth.f90 index cdca694..cd8d383 100644 --- a/pppack/cwidth.f90 +++ b/pppack/cwidth.f90 @@ -1,351 +1,351 @@ !> !> @file cwidth.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine cwidth ( w, b, nequ, ncols, integs, nbloks, d, x, iflag ) !************************************************************************* ! !! CWIDTH solves an almost block diagonal linear system. ! ! Discussion: ! ! This routine is a variation of the theme in the algorithm bandet1 ! by Martin and Wilkinson (numer.math. 9(1976)279-307). It solves ! the linear system ! A*X = B ! of NEQU equations in case A is almost block diagonal with all ! blocks having NCOLS columns using no more storage than it takes to ! store the interesting part of A. Such systems occur in the determ- ! ination of the b-spline coefficients of a spline approximation. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! w on input, a two-dimensional array of size (nequ,ncols) contain- ! ing the interesting part of the almost block diagonal coeffici- ! ent matrix a (see description and example below). the array ! integs describes the storage scheme. ! on output, w contains the upper triangular factor u of the ! lu factorization of a possibly permuted version of a . in par- ! ticular, the determinant of a could now be found as ! iflag*w(1,1)*w(2,1)* ... * w(nequ,1) . ! ! b on input, the right side of the linear system, of length nequ. ! the contents of b are changed during execution. ! ! Input, integer NEQU, the number of equations. ! ! Input, integer NCOLS, the block width, that is, the number of ! columns in each block. ! ! integs integer array, of size (2,nequ), describing the block ! structure of a . ! integs(1,i)=no. of rows in block i = nrow ! integs(2,i)=no. of elimination steps in block i ! =overhang over next block = last ! nbloks number of blocks ! ! d work array, to contain row sizes . if storage is scarce, the ! array x could be used in the calling sequence for d . ! ! x on output, contains computed solution (if iflag /= 0), of ! length nequ . ! ! iflag on output, integer ! =(-1)**(no.of interchanges during elimination) ! if a is invertible ! = 0 if a is singular ! ! block structure of a ! ! the interesting part of a is taken to consist of nbloks con- ! secutive blocks, with the i-th block made up of nrowi=integs(1,i) ! consecutive rows and ncols consecutive columns of a , and with ! the first lasti=integs(2,i) columns to the left of the next block. ! these blocks are stored consecutively in the workarray w . ! ! for example, here is an 11th order matrix and its arrangement in ! the workarray w . (the interesting entries of a are indicated by ! their row and column index modulo 10.) ! ! --- a --- --- w --- ! ! nrow1=3 ! 11 12 13 14 11 12 13 14 ! 21 22 23 24 21 22 23 24 ! 31 32 33 34 nrow2=2 31 32 33 34 ! last1=2 43 44 45 46 43 44 45 46 ! 53 54 55 56 nrow3=3 53 54 55 56 ! last2=3 66 67 68 69 66 67 68 69 ! 76 77 78 79 76 77 78 79 ! 86 87 88 89 nrow4=1 86 87 88 89 ! last3=1 97 98 99 90 nrow5=2 97 98 99 90 ! last4=1 08 09 00 01 08 09 00 01 ! 18 19 10 11 18 19 10 11 ! last5=4 ! ! for this interpretation of a as an almost block diagonal matrix, ! we have nbloks=5 , and the integs array is ! ! i= 1 2 3 4 5 ! k= ! integs(k,i)= 1 3 2 3 1 2 ! 2 2 3 1 1 4 ! ! ! Method: ! ! gauss elimination with scaled partial pivoting is used, but mult- ! ipliers are n o t s a v e d in order to save storage. rather, the ! right side is operated on during elimination. the two parameters ! i p v t e q and l a s t e q ! are used to keep track of the action. ipvteq is the index of the ! variable to be eliminated next, from equations ipvteq+1,...,lasteq, ! using equation ipvteq (possibly after an interchange) as the pivot ! equation. the entries in the pivot column are a l w a y s in column ! 1 of w . this is accomplished by putting the entries in rows ! ipvteq+1,...,lasteq revised by the elimination of the ipvteq-th ! variable one to the left in w . in this way, the columns of the ! equations in a given block (as stored in w ) will be aligned with ! those of the next block at the moment when these next equations be- ! come involved in the elimination process. ! ! thus, for the above example, the first elimination steps proceed ! as follows. ! ! *11 12 13 14 11 12 13 14 11 12 13 14 11 12 13 14 ! *21 22 23 24 *22 23 24 22 23 24 22 23 24 ! *31 32 33 34 *32 33 34 *33 34 33 34 ! 43 44 45 46 43 44 45 46 *43 44 45 46 *44 45 46 etc. ! 53 54 55 56 53 54 55 56 *53 54 55 56 *54 55 56 ! 66 67 68 69 66 67 68 69 66 67 68 69 66 67 68 69 ! . . . . ! ! In all other respects, the procedure is standard, including the ! scaled partial pivoting. ! implicit none integer nbloks integer ncols integer nequ real ( kind = 8 ) awi1od real ( kind = 8 ) b(nequ) real ( kind = 8 ) colmax real ( kind = 8 ) d(nequ) integer i integer icount integer iflag integer ii integer integs(2,nbloks) integer ipvteq integer ipvtp1 integer istar integer j integer jmax integer lastcl integer lasteq integer lasti integer nexteq integer nrowad real ( kind = 8 ) ratio real ( kind = 8 ) rowmax real ( kind = 8 ) sum1 real ( kind = 8 ) temp real ( kind = 8 ) w(nequ,ncols) real ( kind = 8 ) x(nequ) iflag = 1 ipvteq = 0 lasteq = 0 ! ! The I loop runs over the blocks. ! do i = 1, nbloks ! ! The equations for the current block are added to those current- ! ly involved in the elimination process, by increasing lasteq ! by integs(1,i) after the rowsize of these equations has been ! recorded in the array D. ! nrowad = integs(1,i) do icount = 1, nrowad nexteq = lasteq + icount rowmax = 0.0D+00 do j = 1, ncols rowmax = max ( rowmax, abs ( w(nexteq,j) ) ) end do if ( rowmax == 0.0D+00 ) then go to 150 end if d(nexteq) = rowmax end do lasteq = lasteq + nrowad ! ! There will be lasti=integs(2,i) elimination steps before ! the equations in the next block become involved. further, ! l a s t c l records the number of columns involved in the cur- ! rent elimination step. it starts equal to ncols when a block ! first becomes involved and then drops by one after each elim- ! ination step. ! lastcl = ncols lasti = integs(2,i) do icount = 1, lasti ipvteq = ipvteq+1 if ( ipvteq < lasteq ) then go to 30 end if if ( d(ipvteq) < abs ( w(ipvteq,1)) + d(ipvteq) ) then go to 100 end if go to 150 ! ! Determine the smallest ISTAR in (ipvteq,lasteq) for ! which abs(w(istar,1))/d(istar) is as large as possible, and ! interchange equations ipvteq and istar in case ipvteq ! < istar . ! 30 continue colmax = abs(w(ipvteq,1)) / d(ipvteq) istar = ipvteq ipvtp1 = ipvteq+1 do ii = ipvtp1, lasteq awi1od = abs(w(ii,1)) / d(ii) if ( colmax < awi1od ) then colmax = awi1od istar = ii end if end do if ( abs(w(istar,1))+d(istar) == d(istar) ) then go to 150 end if if ( istar == ipvteq ) then go to 60 end if iflag = -iflag temp = d(istar) d(istar) = d(ipvteq) d(ipvteq) = temp temp = b(istar) b(istar) = b(ipvteq) b(ipvteq) = temp do j = 1, lastcl temp = w(istar,j) w(istar,j) = w(ipvteq,j) w(ipvteq,j) = temp end do ! ! Subtract the appropriate multiple of equation ipvteq from ! equations ipvteq+1,...,lasteq to make the coefficient of the ! ipvteq-th unknown (presently in column 1 of w ) zero, but ! store the new coefficients in w one to the left from the old. ! 60 continue do ii = ipvtp1, lasteq ratio = w(ii,1)/w(ipvteq,1) do j = 2, lastcl w(ii,j-1) = w(ii,j)-ratio*w(ipvteq,j) end do w(ii,lastcl) = 0.0D+00 b(ii) = b(ii)-ratio*b(ipvteq) end do lastcl = lastcl-1 end do 100 continue end do ! ! At this point, W and B contain an upper triangular linear system ! equivalent to the original one, with w(i,j) containing entry ! (i, i-1+j ) of the coefficient matrix. solve this system by backsub- ! stitution, taking into account its block structure. ! ! i-loop over the blocks, in reverse order ! i = nbloks 110 continue lasti = integs(2,i) jmax = ncols-lasti do icount = 1, lasti sum1 = 0.0D+00 do j = 1, jmax sum1 = sum1 + x(ipvteq+j) * w(ipvteq,j+1) end do x(ipvteq) = ( b(ipvteq) - sum1 ) / w(ipvteq,1) jmax = jmax+1 ipvteq = ipvteq-1 end do i = i-1 if ( 0 < i ) then go to 110 end if return 150 continue iflag = 0 return end diff --git a/pppack/difequ.f90 b/pppack/difequ.f90 index 51a61f3..48ef91e 100644 --- a/pppack/difequ.f90 +++ b/pppack/difequ.f90 @@ -1,185 +1,185 @@ !> !> @file difequ.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine difequ ( mode, xx, v ) !************************************************************************* ! !! DIFEQU returns information about a differential equation. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, integer MODE, an integer indicating the task to be performed. ! 1, initialization ! 2, evaluate de at xx ! 3, specify the next side condition ! 4, analyze the approximation ! ! Input, real ( kind = 8 ) XX, a point at which information is wanted ! ! Output, real ( kind = 8 ) V, depends on the MODE. See comments below ! implicit none integer, parameter :: npiece = 100 integer, parameter :: ncoef = 2000 real ( kind = 8 ) break real ( kind = 8 ) coef real ( kind = 8 ) eps real ( kind = 8 ) ep1 real ( kind = 8 ) ep2 real ( kind = 8 ) error real ( kind = 8 ) factor integer i integer iside integer itermx integer k integer kpm integer l integer m integer mode real ( kind = 8 ) rho real ( kind = 8 ) s2ovep real ( kind = 8 ) solutn real ( kind = 8 ) un real ( kind = 8 ) v(20) real ( kind = 8 ) value real ( kind = 8 ) x real ( kind = 8 ) xside real ( kind = 8 ) xx common /approx/ break(npiece),coef(ncoef),l,kpm common /side/ m,iside,xside(10) common /other/ itermx,k,rho(19) ! ! This sample of DIFEQU is for the example in chapter xv. It is a ! nonlinear second order two point boundary value problem. ! go to (10,50,60,110), mode ! ! Initialize everything, Set the order M of the differential equation, ! the nondecreasing sequence xside(i),i=1,...,m, of points at which side ! conditions are given and anything else necessary. ! 10 continue m = 2 xside(1) = 0.0D+00 xside(2) = 1.0D+00 ! ! Print out heading. ! write ( *, * ) ' ' write ( *, * ) ' Carrier''s nonlinear perturb. problem' write ( *, * ) ' ' eps = 0.005D+00 write(*,*)'EPS = ',eps ! ! Set constants used in formula for solution below. ! factor = ( sqrt ( 2.0D+00 ) + sqrt ( 3.0D+00 ) )**2 s2ovep = sqrt ( 2.0D+00 / eps ) ! ! Initial guess for Newton iteration. un(x)=x*x-1. ! l = 1 break(1) = 0.0D+00 do i = 1, kpm coef(i) = 0.0D+00 end do coef(1) = -1.0D+00 coef(3) = 2.0D+00 itermx = 10 return ! ! Provide value of left side coefficients and right side at xx . ! specifically, at xx the dif.equ. reads: ! ! v(m+1)d**m+v(m)d**(m-1) + ... + v(1)d**0 = v(m+2) ! ! in terms of the quantities v(i),i=1,...,m+2, to be computed here. ! 50 continue v(3) = eps v(2) = 0.0D+00 call ppvalu(break,coef,l,kpm,xx,0,un) v(1) = 2.0D+00 * un v(4) = un**2 + 1.0D+00 return ! ! provide the M side conditions. these conditions are of the form ! v(m+1)d**m+v(m)d**(m-1) + ... + v(1)d**0 = v(m+2) ! in terms of the quantities v(i),i=1,...,m+2, to be specified here. ! note that v(m+1)=0 for customary side conditions. ! 60 continue v(m+1) = 0.0D+00 if ( iside == 1 ) then v(2) = 1.0D+00 v(1) = 0.0D+00 v(4) = 0.0D+00 iside = iside+1 else if ( iside == 2 ) then v(2) = 0.0D+00 v(1) = 1.0D+00 v(4) = 0.0D+00 iside = iside + 1 end if return ! ! Calculate the error near the boundary layer at 1. ! 110 continue write(*,*)' ' write(*,*)' X, G(X) and G(X)-F(X) at selected points:' write(*,*)' ' x = 0.75D+00 do i = 1, 9 ep1 = exp ( s2ovep * ( 1.0D+00 - x ) ) * factor ep2 = exp ( s2ovep * ( 1.0D+00 + x ) ) * factor solutn = 12.0D+00 / ( 1.0D+00 + ep1 )**2 * ep1 & +12.0D+00 / ( 1.0D+00 + ep2 )**2 * ep2 - 1.0D+00 call ppvalu(break,coef,l,kpm,x,0,value) error = solutn-value write ( *, '(1x,3g14.6)' ) x, solutn, error x = x+0.03125 end do return end diff --git a/pppack/dtblok.f90 b/pppack/dtblok.f90 index 03d5727..1327c65 100644 --- a/pppack/dtblok.f90 +++ b/pppack/dtblok.f90 @@ -1,103 +1,103 @@ !> !> @file dtblok.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine dtblok ( bloks, integs, nbloks, ipivot, iflag, detsgn, detlog ) !************************************************************************* ! !! DTBLOK gets the determinant of an almost block diagonal matrix. ! ! Discussion: ! ! The matrix's PLU factorization must have been obtained ! previously by FCBLOK. ! ! The logarithm of the determinant is computed instead of the ! determinant itself to avoid the danger of overflow or underflow ! inherent in this calculation. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! bloks, integs, nbloks, ipivot, iflag are as on return from fcblok. ! in particular, iflag=(-1)**(number of interchanges dur- ! ing factorization) if successful, otherwise iflag=0. ! ! detsgn on output, contains the sign of the determinant. ! ! detlog on output, contains the natural logarithm of the determi- ! nant if determinant is not zero. otherwise contains 0. ! implicit none integer nbloks real ( kind = 8 ) bloks(1) real ( kind = 8 ) detlog real ( kind = 8 ) detsgn integer i integer iflag integer index integer indexp integer integs(3,nbloks) integer ip integer ipivot(1) integer k integer last integer nrow detsgn = iflag detlog = 0.0D+00 if ( iflag == 0 ) then return end if index = 0 indexp = 0 do i = 1, nbloks nrow = integs(1,i) last = integs(3,i) do k = 1, last ip = index + nrow * (k-1) + ipivot(indexp+k) detlog = detlog + log ( abs ( bloks(ip) ) ) detsgn = detsgn * sign ( 1.0D+00, bloks(ip) ) end do index = nrow*integs(2,i)+index indexp = indexp+nrow end do return end diff --git a/pppack/eqblok.f90 b/pppack/eqblok.f90 index 411f1de..4476458 100644 --- a/pppack/eqblok.f90 +++ b/pppack/eqblok.f90 @@ -1,192 +1,192 @@ !> !> @file eqblok.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine eqblok ( t, n, kpm, work1, work2, bloks, lenblk, integs, & nbloks, b ) !************************************************************************* ! !! EQBLOK is to be called in COLLOC. ! ! Method: ! ! Each breakpoint interval gives rise to a block in the linear system. ! this block is determined by the K collocation equations in the interval ! with the side conditions (if any) in the interval interspersed ap- ! propriately, and involves the kpm b-splines having the interval in ! their support. correspondingly, such a block has nrow=k+isidel ! rows, with isidel=number of side conditions in this and the prev- ! ious intervals, and ncol=kpm columns. ! ! Further, because the interior knots have multiplicity k, we can ! carry out (in slvblk) k elimination steps in a block before pivot- ! ing might involve an equation from the next block. in the last block, ! of course, all kpm elimination steps will be carried out (in slvblk). ! ! see the detailed comments in the solveblok package for further in- ! formation about the almost block diagonal form used here. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! input ! ! Input, real ( kind = 8 ) T(N+KPM), the knot sequence. ! ! Input, integer N, the dimension of the approximating spline space, ! that is, the order of the linear system to be constructed. ! ! Input, integer KPM, = K + M, the order of the approximating spline. ! ! Input, integer LENBLK, the maximum length of the array BLOKS, ! as allowed by the dimension statement in COLLOC. ! ! work areas ! ! work1 used in putit, of size (kpm,kpm) ! work2 used in putit, of size (kpm,m+1) ! ! output ! ! bloks the coefficient matrix of the linear system, stored in al- ! most block diagonal form, of size ! kpm*sum(integs(1,i) , i=1,...,nbloks) ! ! integs an integer array, of size (3,nbloks), describing the block ! structure. ! integs(1,i) = number of rows in block i ! integs(2,i) = number of columns in block i ! integs(3,i) = number of elimination steps which can be ! carried out in block i before pivoting might ! bring in an equation from the next block. ! ! nbloks number of blocks, equals number of polynomial pieces ! ! b the right side of the linear system, stored corresponding to the ! almost block diagonal form, of size sum(integs(1,i) , i=1,..., ! nbloks). ! implicit none integer kpm integer n real ( kind = 8 ) b(*) real ( kind = 8 ) bloks(*) integer i integer index integer indexb integer integs(3,*) integer iside integer isidel integer itermx integer k integer left integer lenblk integer m integer nbloks integer nrow real ( kind = 8 ) rho real ( kind = 8 ) t(n+kpm) real ( kind = 8 ) work1(kpm,kpm) real ( kind = 8 ) work2(kpm,*) real ( kind = 8 ) xside common /side/ m,iside,xside(10) common /other/ itermx,k,rho(19) index = 1 indexb = 1 i = 0 iside = 1 do left = kpm, n, k i = i + 1 ! ! Determine integs(.,i) ! integs(2,i) = kpm if ( n <= left ) then integs(3,i) = kpm isidel = m go to 30 end if integs(3,i) = k ! ! At this point, iside-1 gives the number of side conditions ! incorporated so far. adding to this the side conditions in the ! current interval gives the number isidel . ! isidel = iside - 1 do if ( isidel == m ) then exit end if if ( t(left+1) <= xside(isidel+1) ) then exit end if isidel = isidel + 1 end do 30 continue nrow = k + isidel integs(1,i) = nrow ! ! The detailed equations for this block are generated and put ! together in PUTIT. ! if ( lenblk < index + nrow * kpm - 1 ) then write ( *, * ) ' ' write ( *, * ) 'EQBLOK - Fatal error!' write ( *, * ) ' The dimension of BLOKS is too small.' write ( *, * ) ' LENBLK = ', lenblk stop end if call putit ( t, kpm, left, work1, work2, bloks(index), nrow, b(indexb) ) index = index + nrow * kpm indexb = indexb + nrow end do nbloks = i return end diff --git a/pppack/evnnot.f90 b/pppack/evnnot.f90 index 518bf00..bd553fc 100644 --- a/pppack/evnnot.f90 +++ b/pppack/evnnot.f90 @@ -1,85 +1,85 @@ !> !> @file evnnot.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine evnnot ( break, coef, l, k, brknew, lnew, coefg ) !************************************************************************* ! !! EVNNOT is a "fake" version of NEWNOT. ! ! Discussion: ! ! EVNNOT returns LNEW+1 knots in BRKNEW which are ! evenly spaced between BREAK(1) and BREAK(L+1). ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) BREAK(L+1), coef, l, k.....contains the ! pp-representation of a certain function F of order K. Specifically, ! d**(k-1)f(x)=coef(k,i) for break(i) <= x < break(i+1) ! ! Input, integer LNEW, the number of subintervals into which the interval ! (a,b) is to be sectioned by the new breakpoint sequence brknew . ! ! Output, real ( kind = 8 ) BRKNEW(LNEW+1), the new breakpoints. ! ! Output, coefg the coefficient part of the pp-repr. break, coefg, l, 2 ! for the monotone p.linear function G with respect to which brknew will ! be equidistributed. ! implicit none integer k integer l integer lnew real ( kind = 8 ) break(l+1) real ( kind = 8 ) brknew(lnew+1) real ( kind = 8 ) coef(k,l) real ( kind = 8 ) coefg(2,l) integer i if ( lnew == 0 ) then brknew(1) = 0.5D+00 * ( break(1) + break(l+1) ) else do i = 1, lnew+1 brknew(i) = ( real ( lnew - i + 1, kind = 8 ) * break(1) & + real ( i - 1, kind = 8 ) * break(l+1) ) & / real ( lnew, kind = 8 ) end do end if return end diff --git a/pppack/factrb.f90 b/pppack/factrb.f90 index 298312b..fa0a395 100644 --- a/pppack/factrb.f90 +++ b/pppack/factrb.f90 @@ -1,188 +1,188 @@ !> !> @file factrb.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine factrb ( w, ipivot, d, nrow, ncol, last, iflag ) !************************************************************************* ! !! FACTRB constructs a partial PLU factorization. ! ! Discussion: ! ! This factorization corresponds to steps 1 through LAST in Gauss ! elimination for the matrix W of order ( NROW, NCOL ), using ! pivoting of scaled rows. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input/output, real ( kind = 8 ) W(NROW,NCOL); on input, contains the ! matrix to be partially factored; on output, the partial factorization. ! ! Output, integer IPIVOT(NROW), contains a record of the pivoting ! strategy used; row IPIVOT(I) is used during the I-th elimination step, ! for I = 1, ..., LAST. ! ! Workspace, real ( kind = 8 ) D(NROW), used to store the maximum entry ! in each row. ! ! Input, integer NROW, the number of rows of W. ! ! Input, integer NCOL, the number of columns of W. ! ! Input, integer LAST, the number of elimination steps to be carried out. ! ! Input/output, integer IFLAG. On output, equals the input value ! times (-1)**(number of row interchanges during the factorization ! process), in case no zero pivot was encountered. ! Otherwise, iflag=0 on output. ! implicit none integer ncol integer nrow real ( kind = 8 ) awikdi real ( kind = 8 ) colmax real ( kind = 8 ) d(nrow) integer i integer iflag integer ipivi integer ipivk integer ipivot(nrow) integer j integer k integer kp1 integer last real ( kind = 8 ) ratio real ( kind = 8 ) rowmax real ( kind = 8 ) w(nrow,ncol) ! ! Initialize IPIVOT and D. ! do i = 1, nrow ipivot(i) = i end do do i = 1, nrow rowmax = 0.0D+00 do j = 1, ncol rowmax = max ( rowmax, abs ( w(i,j) ) ) end do if ( rowmax == 0.0D+00 ) then iflag = 0 return end if d(i) = rowmax end do ! ! Gauss elimination with pivoting of scaled rows, loop over k=1,.,last ! k = 1 ! ! As pivot row for k-th step, pick among the rows not yet used, ! i.e., from rows ipivot(k),...,ipivot(nrow), the one whose k-th ! entry (compared to the row size) is largest. then, if this row ! does not turn out to be row ipivot(k), redefine ipivot(k) ap- ! propriately and record this interchange by changing the sign ! of IFLAG. ! 30 continue ipivk = ipivot(k) if ( k == nrow ) then if ( abs(w(ipivk,nrow))+d(ipivk) <= d(ipivk) ) then iflag = 0 end if return end if j = k kp1 = k+1 colmax = abs(w(ipivk,k))/d(ipivk) ! ! Find the largest pivot ! do i = kp1, nrow ipivi = ipivot(i) awikdi = abs(w(ipivi,k)) / d(ipivi) if ( colmax < awikdi ) then colmax = awikdi j = i end if end do if ( j /= k ) then ipivk = ipivot(j) ipivot(j) = ipivot(k) ipivot(k) = ipivk iflag = -iflag end if ! ! If pivot element is too small in absolute value, declare ! matrix to be noninvertible and quit. ! if ( abs(w(ipivk,k))+d(ipivk) <= d(ipivk) ) then iflag = 0 return end if ! ! Otherwise, subtract the appropriate multiple of the pivot ! row from remaining rows, i.e., the rows ipivot(k+1),..., ! ipivot(nrow), to make k-th entry zero. save the multiplier in ! its place. ! do i = kp1, nrow ipivi = ipivot(i) w(ipivi,k) = w(ipivi,k)/w(ipivk,k) ratio = -w(ipivi,k) do j = kp1, ncol w(ipivi,j) = ratio*w(ipivk,j)+w(ipivi,j) end do end do k = kp1 ! ! Check for having reached the next block. ! if ( k <= last ) then go to 30 end if return end diff --git a/pppack/fcblok.f90 b/pppack/fcblok.f90 index 72f11d8..5a0c1d9 100644 --- a/pppack/fcblok.f90 +++ b/pppack/fcblok.f90 @@ -1,126 +1,126 @@ !> !> @file fcblok.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine fcblok ( bloks, integs, nbloks, ipivot, scrtch, iflag ) !************************************************************************* ! !! FCBLOK supervises the PLU factorization with pivoting of ! scaled rows of the almost block diagonal matrix. ! ! The almost block diagonal matrix is stored in the arrays ! BLOKS and INTEGS. ! ! The FACTRB routine carries out steps 1,...,last of gauss ! elimination (with pivoting) for an individual block. ! ! The SHIFTB routine shifts the remaining rows to the top of ! the next block ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! bloks an array that initially contains the almost block diagonal ! matrix a to be factored, and on return contains the com- ! puted factorization of a . ! ! integs an integer array describing the block structure of a . ! ! nbloks the number of blocks in a . ! ! ipivot an integer array of dimension sum (integs(1,n) ; n=1, ! ...,nbloks) which, on return, contains the pivoting stra- ! tegy used. ! ! scrtch work area required, of length max (integs(1,n) ; n=1, ! ...,nbloks). ! ! iflag output parameter; ! =0 in case matrix was found to be singular. ! otherwise, ! =(-1)**(number of row interchanges during factorization) ! implicit none integer nbloks real ( kind = 8 ) bloks(*) integer i integer iflag integer index integer indexb integer indexn integer integs(3,nbloks) integer ipivot(*) integer last integer ncol integer nrow real ( kind = 8 ) scrtch(*) iflag = 1 indexb = 1 indexn = 1 i = 1 ! ! Loop over the blocks. i is loop index ! do index = indexn nrow = integs(1,i) ncol = integs(2,i) last = integs(3,i) ! ! Carry out elimination on the I-th block until next block ! enters, for columns 1 through LAST of I-th block. ! call factrb ( bloks(index), ipivot(indexb), scrtch, nrow, ncol, & last, iflag ) ! ! Check for having reached a singular block or the last block. ! if ( iflag == 0 .or. i == nbloks ) then exit end if i = i + 1 indexn = nrow * ncol + index ! ! Put the rest of the I-th block onto the next block. ! call shiftb ( bloks(index), ipivot(indexb), nrow, ncol, last, & bloks(indexn), integs(1,i), integs(2,i) ) indexb = indexb + nrow end do return end diff --git a/pppack/interv.f90 b/pppack/interv.f90 index 60b9cb1..e2ed195 100644 --- a/pppack/interv.f90 +++ b/pppack/interv.f90 @@ -1,223 +1,223 @@ !> !> @file interv.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine interv ( xt, lxt, x, left, mflag ) !******************************************************************************* ! !! INTERV brackets a real value in an ascending vector of values. ! ! Discussion: ! ! The XT array is a set of increasing values. The goal of the routine ! is to determine the largest index I so that XT(I) <= X. ! ! The routine is designed to be efficient in the common situation ! that it is called repeatedly, with X taken from an increasing ! or decreasing sequence. ! ! This will happen when a piecewise polynomial is to be graphed. ! The first guess for LEFT is therefore taken to be the value ! returned at the previous call and stored in the local variable ILO. ! ! A first check ascertains that ILO < LXT. This is necessary ! since the present call may have nothing to do with the previous ! call. Then, if XT(ILO) < = X < XT(ILO+1), we set LEFT=ILO ! and are done after just three comparisons. ! ! Otherwise, we repeatedly double the difference ISTEP=IHI-ILO ! while also moving ILO and IHI in the direction of X, until ! XT(ILO) < = X < XT(IHI) ! after which we use bisection to get, in addition, ILO+1=IHI. ! LEFT=ILO is then returned. ! ! Modified: ! ! 05 February 2004 ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) XT(LXT), a nondecreasing sequence of values. ! ! Input, integer LXT, the dimension of XT. ! ! Input, real ( kind = 8 ) X, the point whose location with ! respect to the sequence XT is to be determined. ! ! Output, integer LEFT, the index of the bracketing value: ! 1 if X < XT(1) ! I if XT(I) <= X < XT(I+1) ! LXT if XT(LXT) <= X ! ! Output, integer MFLAG, indicates whether X lies within the ! range of the data. ! -1: X < XT(1) ! 0: XT(I) <= X < XT(I+1) ! +1: XT(LXT) <= X ! implicit none integer lxt integer left integer mflag integer ihi integer, save :: ilo = 1 integer istep integer middle real ( kind = 8 ) x real ( kind = 8 ) xt(lxt) !$omp threadprivate(ilo) ihi = ilo + 1 if ( lxt <= ihi ) then if ( xt(lxt) <= x ) then go to 110 end if if ( lxt <= 1 ) then mflag = -1 left = 1 return end if ilo = lxt - 1 ihi = lxt end if if ( xt(ihi) <= x ) then go to 40 end if if ( xt(ilo) <= x ) then mflag = 0 left = ilo return end if ! ! Now X < XT(ILO). Decrease ILO to capture X. ! istep = 1 31 continue ihi = ilo ilo = ihi - istep if ( 1 < ilo ) then if ( xt(ilo) <= x ) then go to 50 end if istep = istep * 2 go to 31 end if ilo = 1 if ( x < xt(1) ) then mflag = -1 left = 1 return end if go to 50 ! ! Now XT(IHI) <= X. Increase IHI to capture X. ! 40 continue istep = 1 41 continue ilo = ihi ihi = ilo + istep if ( ihi < lxt ) then if ( x < xt(ihi) ) then go to 50 end if istep = istep * 2 go to 41 end if if ( xt(lxt) <= x ) then go to 110 end if ! ! Now XT(ILO) < = X < XT(IHI). Narrow the interval. ! ihi = lxt 50 continue do middle = ( ilo + ihi ) / 2 if ( middle == ilo ) then mflag = 0 left = ilo return end if ! ! It is assumed that MIDDLE = ILO in case IHI = ILO+1. ! if ( xt(middle) <= x ) then ilo = middle else ihi = middle end if end do ! ! Set output and return. ! 110 continue mflag = 1 if ( x == xt(lxt) ) then mflag = 0 end if do left = lxt, 1, -1 if ( xt(left) < xt(lxt) ) then return end if end do return end diff --git a/pppack/knots.f90 b/pppack/knots.f90 index c7be744..574b793 100644 --- a/pppack/knots.f90 +++ b/pppack/knots.f90 @@ -1,103 +1,103 @@ !> !> @file knots.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine knots ( break, l, kpm, t, n ) !************************************************************************* ! !! KNOTS is to be called in COLLOC. ! ! Discussion: ! ! From the given breakpoint sequence BREAK the routine constructs the ! knot sequence T so that ! ! SPLINE(K+M,T) = PP(K+M,BREAK) ! ! with M-1 continuous derivatives. This means that ! ! t(1),...,t(n+kpm) = break(1) kpm times, then break(2),..., ! break(l) each k times, then, finally, break(l+1) kpm times. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence. ! ! Input, integer L, the number of intervals or pieces. ! ! Input, integer KPM, = K+M, the order of the piecewise polynomial ! function or spline. ! ! Output, real ( kind = 8 ) T(N+KPM), the knot sequence. ! ! Output, integer N, = L*K+M = the dimension of SPLINE(K+M,T). ! implicit none integer kpm integer l integer n real ( kind = 8 ) break(l+1) integer iside integer j integer jj integer jjj integer k integer ll integer m real ( kind = 8 ) t(*) real ( kind = 8 ) xside common /side/ m,iside,xside(10) k = kpm-m n = l*k+m jj = n+kpm jjj = l+1 do ll = 1, kpm t(jj) = break(jjj) jj = jj-1 end do do j = 1, l jjj = jjj-1 do ll = 1, k t(jj) = break(jjj) jj = jj-1 end do end do t(1:kpm) = break(1) return end diff --git a/pppack/l2appr.f90 b/pppack/l2appr.f90 index 861d3bd..58a6c23 100644 --- a/pppack/l2appr.f90 +++ b/pppack/l2appr.f90 @@ -1,196 +1,196 @@ !> !> @file l2appr.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine l2appr ( t, n, k, q, diag, bcoef ) !************************************************************************* ! !! L2APPR constructs a weighted L2 spline approximation to given data. ! ! Discussion: ! ! The routine constructs the weighted discrete L2-approximation by ! splines of order K with knot sequence T(1), ..., T(n+k) to ! given data points ( TAU(1:NTAU), GTAU(1:NTAU) ). ! ! The B-spline coefficients BCOEF of the approximating spline are ! determined from the normal equations using Cholesky's method. ! ! Method: ! ! The B-spline coefficients of the L2-approximation are determined as the ! solution of the normal equations ! ! sum ( (b(i), b(j) ) * bcoef(j) : j=1,...,n) =(b(i),g), ! i=1, ..., n . ! Here, b(i) denotes the i-th B-spline, G denotes the function to ! be approximated, and the inner product of two functions F and G ! is given by ! ! (f,g) := sum ( f(tau(i))*g(tau(i))*weight(i) : i=1,...,ntau) . ! ! The arrays TAU and WEIGHT are given in common block ! DATA, as is the array GTAU containing the sequence ! g(tau(i)), i=1,..., NTAU. ! ! The relevant function values of the B-splines b(i), i=1,...,n, are ! supplied by the subprogram BSPLVB. ! ! The coefficient matrix C, with ! c(i,j) := (b(i), b(j)), i,j=1,...,n, ! of the normal equations is symmetric and (2*k-1)-banded, therefore ! can be specified by giving its K bands at or below the diagonal. ! For i=1,...,n, we store ! (b(i),b(j)) = c(i,j) in q(i-j+1,j), j=i,...,min(i+k-1,n) ! and the right side ! (b(i), g ) in bcoef(i). ! ! Since B-spline values are most efficiently generated by finding sim- ! ultaneously the value of every nonzero B-spline at one point, ! the entries of C (i.e., of Q ), are generated by computing, for ! each ll, all the terms involving tau(ll) simultaneously and adding ! them to all relevant entries. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) T(N+K), the knot sequence. ! ! Input, integer N, the dimension of the space of splines of order K ! with knots t. ! ! Input, integer K, the order. ! ! work arrays ! ! q....a work array of size (at least) k*n. its first k rows are used ! for the k lower diagonals of the gramian matrix c. ! ! diag.....a work array of length n used in bchfac . ! ! input via c o m m o n /data/ ! ! ntau.....number of data points ! (tau(i),gtau(i)), i=1,...,ntau are the ntau data points to be ! fitted . ! weight(i), i=1,...,ntau are the corresponding weights . ! ! output ! bcoef(1), ..., bcoef(n) the b-spline coefficients of the l2-appr. ! implicit none integer k integer n integer, parameter :: ntmax = 200 real ( kind = 8 ) bcoef(n) real ( kind = 8 ) biatx(k) real ( kind = 8 ) diag(n) real ( kind = 8 ) dw real ( kind = 8 ) gtau integer i integer j integer jj integer left integer leftmk integer ll integer mm integer ntau real ( kind = 8 ) q(k,n) real ( kind = 8 ) t(n+k) real ( kind = 8 ) tau real ( kind = 8 ) totalw real ( kind = 8 ) weight COMMON /DATA/ tau(ntmax),gtau(ntmax),weight(ntmax),totalw,ntau bcoef(1:n) = 0.0D+00 q(1:k,1:n) = 0.0D+00 left = k leftmk = 0 do ll = 1, ntau ! ! Locate LEFT such that tau(ll) in (t(left),t(left+1)). ! do if ( left == n ) then exit end if if ( tau(ll) < t(left+1) ) then exit end if left = left + 1 leftmk = leftmk + 1 end do call bsplvb ( t, k, 1, tau(ll), left, biatx ) ! ! biatx(mm) contains the value of b(left-k+mm) at tau(ll). ! hence, with dw := biatx(mm)*weight(ll), the number dw*gtau(ll) ! is a summand in the inner product ! (b(left-k+mm), g) which goes into bcoef(left-k+mm) ! and the number biatx(jj)*dw is a summand in the inner product ! (b(left-k+jj), b(left-k+mm)), into q(jj-mm+1,left-k+mm) ! since (left-k+jj)-(left-k+mm)+1 = jj - mm + 1 . ! do mm = 1, k dw = biatx(mm)*weight(ll) j = leftmk+mm bcoef(j) = dw*gtau(ll)+bcoef(j) i = 1 do jj = mm, k q(i,j) = biatx(jj)*dw+q(i,j) i = i+1 end do end do end do ! ! Construct the Cholesky factorization for C in q , then ! use it to solve the normal equations ! c*x = bcoef ! for X, and store X in BCOEF. ! call bchfac ( q, k, n, diag ) call bchslv ( q, k, n, bcoef ) return end diff --git a/pppack/l2err.f90 b/pppack/l2err.f90 index 7449380..e25b7ab 100644 --- a/pppack/l2err.f90 +++ b/pppack/l2err.f90 @@ -1,146 +1,146 @@ !> !> @file l2err.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine l2err ( iprfun, ftau, error ) !************************************************************************* ! !! L2ERR computes the errors of an L2 approximation. ! ! Discussion: ! ! This routine computes various errors of the current L2-approximation, ! whose piecewise polynomial representation is contained in common ! block APPROX, to the given data contained in common block data. ! ! It prints out the average error errl1, the l2-error errl2, and the ! maximum error errmax. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, integer IPRFUN. If iprfun= 1, the routine prints out ! the value of the approximation as well as its error at ! every data point. ! ! Output, real ( kind = 8 ) FTAU(NTAU), contains the value of the computed ! approximation at each value TAU(1:NTAU). ! ! Output, error(1), ..., error(ntau), with error(i)=scale*(g-f) ! at tau(i), all i. here, SCALE equals 1. in case ! iprfun /= 1 , or the absolute error is greater than 100 some- ! where. otherwise, SCALE is such that the maximum of ! abs(error)) over all I lies between 10 and 100. This ! makes the printed output more illustrative. ! implicit none integer, parameter :: lpkmax = 100 integer, parameter :: ntmax = 200 integer, parameter :: ltkmax = 2000 integer ntau real ( kind = 8 ) break real ( kind = 8 ) coef real ( kind = 8 ) err real ( kind = 8 ) errl1 real ( kind = 8 ) errl2 real ( kind = 8 ) errmax real ( kind = 8 ) error(ntau) real ( kind = 8 ) ftau(ntau) real ( kind = 8 ) gtau integer ie integer iprfun integer k integer l integer ll real ( kind = 8 ) scale real ( kind = 8 ) tau real ( kind = 8 ) totalw real ( kind = 8 ) weight COMMON /DATA/ tau(ntmax),gtau(ntmax),weight(ntmax),totalw,ntau common /approx/ break(lpkmax),coef(ltkmax),l,k errl1 = 0.0D+00 errl2 = 0.0D+00 errmax = 0.0D+00 do ll = 1, ntau call ppvalu(break,coef,l,k,tau(ll),0,ftau(ll)) error(ll) = gtau(ll)-ftau(ll) err = abs(error(ll)) if ( errmax < err ) then errmax = err end if errl1 = errl1 + err * weight(ll) errl2 = errl2 + err**2 * weight(ll) end do errl1 = errl1 / totalw errl2 = sqrt ( errl2 / totalw ) write ( *, * ) ' ' write ( *, * ) ' Least square error =',errl2 write ( *, * ) ' Average error =',errl1 write ( *, * ) ' Maximum error =',errmax write ( *, * ) ' ' if ( iprfun /= 1 ) then return end if ! ! Scale error curve and print ! ie = 0 scale = 1.0D+00 if ( errmax < 10.0D+00 ) then do ie = 1, 9 scale = scale * 10.0D+00 if ( 10.0D+00 <= errmax * scale ) then exit end if end do end if error(1:ntau) = error(1:ntau) * scale write(*,60) ie, (ll,tau(ll),ftau(ll),error(ll),ll=1,ntau) 60 format (///14x,'approximation and scaled error curve'/ & 7x,'data point',7x,'approximation',3x,'deviation x 10**',i1/ & (i4, f16.8,f16.8,f17.6)) return end diff --git a/pppack/l2knts.f90 b/pppack/l2knts.f90 index 6c064a6..c771c16 100644 --- a/pppack/l2knts.f90 +++ b/pppack/l2knts.f90 @@ -1,83 +1,83 @@ !> !> @file l2knts.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine l2knts ( break, l, k, t, n ) !************************************************************************* ! !! L2KNTS converts breakpoints to knots. ! ! Discussion: ! ! The breakpoint sequence BREAK is converted into a corresponding ! knot sequence T to allow the representation of a piecewise ! polynomial function of order K with K-2 continuous derivatives ! as a spline of order K with knot sequence T. ! ! This means that ! t(1), ..., t(n+k)= break(1) k times, then break(i), i=2,...,l, each ! once, then break(l+1) k times. Therefore, n=k-1+l. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, integer K, the order. ! ! Input, integer L, the number of polynomial pieces. ! ! Input, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence. ! ! Output, real ( kind = 8 ) T(N+K), the knot sequence. ! ! Output, integer N, the dimension of the corresponding spline space ! of order K. ! implicit none integer k integer l integer n real ( kind = 8 ) break(l+1) integer i real ( kind = 8 ) t(k-1+l+k) t(1:k-1) = break(1) do i = 1, l t(k-1+i) = break(i) end do n = k-1+l t(n+1:n+k) = break(l+1) return end diff --git a/pppack/newnot.f90 b/pppack/newnot.f90 index f3c556a..436356e 100644 --- a/pppack/newnot.f90 +++ b/pppack/newnot.f90 @@ -1,206 +1,206 @@ !> !> @file newnot.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine newnot ( break, coef, l, k, brknew, lnew, coefg ) !************************************************************************* ! !! NEWNOT returns LNEW+1 knots which are equidistributed on (A,B). ! ! Discussion: ! ! (a,b) = (break(1),break(l+1)) with respect to a certain monotone ! function G related to the K-th root of the K-th derivative of the ! piecewise polynomial function F whose piecewise polynomial ! representation is contained in break, coef, l, k . ! ! method ! ! The K-th derivative of the given piecewise polynomial function F does ! not exist, except perhaps as a linear combination of delta functions. ! Nevertheless, we construct a piecewise constant function H with ! breakpoint sequence BREAK which is approximately proportional ! to abs(d**k(f)). ! ! Specifically, on (break(i), break(i+1)), ! ! abs(jump at break(i) of pc) abs(jump at break(i+1) of pc) ! h=-------------- + ---------------------------- ! break(i+1)-break(i-1) break(i+2) - break(i) ! ! with pc the p.constant (k-1)st derivative of f . ! then, the p.linear function g is constructed as ! ! g(x) = integral of h(y)**(1/k) for y from a to x ! ! and its pp coefficients stored in coefg . ! ! then brknew is determined by ! ! brknew(i) = a+g**(-1)((i-1)*step) , i=1,...,lnew+1 ! ! where step=g(b)/lnew and (a,b) = (break(1),break(l+1)). ! ! In the event that pc=d**(k-1)(f) is constant in (a,b) and ! therefore h=0 identically, brknew is chosen uniformly spaced. ! ! optional printed output ! coefg.....the pp coefficients of g are printed out if iprint is set ! positive in data statement below. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, break, coef, l, k.....contains the pp-representation of a certain ! function f of order k . specifically, ! d**(k-1)f(x)=coef(k,i) for break(i) <= x < break(i+1) ! ! Input, lnew.....number of intervals into which the interval (a,b) is to be ! sectioned by the new breakpoint sequence brknew . ! ! Output, real ( kind = 8 ) BRKNEW(LNEW+1), the new breakpoint sequence. ! ! Output, coefg.....the coefficient part of the pp-repr. break, coefg, l, 2 ! for the monotone p.linear function g with respect to which brknew will ! be equidistributed. ! implicit none integer k integer l integer lnew real ( kind = 8 ) break(l+1) real ( kind = 8 ) brknew(lnew+1) real ( kind = 8 ) coef(k,l) real ( kind = 8 ) coefg(2,l) real ( kind = 8 ) dif real ( kind = 8 ) difprv integer i integer, save :: iprint = 0 integer j real ( kind = 8 ) oneovk real ( kind = 8 ) step real ( kind = 8 ) stepi brknew(1) = break(1) brknew(lnew+1) = break(l+1) ! ! If G is constant, BRKNEW is uniform. ! if ( l <= 1) then step = (break(l+1)-break(1))/ real ( lnew, kind = 8 ) do i = 2, lnew brknew(i) = break(1) + real ( i - 1, kind = 8 ) * step end do return end if ! ! Construct the continuous piecewise linear function G. ! oneovk = 1.0D+00 / real ( k, kind = 8 ) coefg(1,1) = 0.0D+00 difprv = abs(coef(k,2)-coef(k,1))/(break(3)-break(1)) do i = 2, l dif = abs(coef(k,i)-coef(k,i-1))/(break(i+1)-break(i-1)) coefg(2,i-1) = (dif+difprv)**oneovk coefg(1,i) = coefg(1,i-1)+coefg(2,i-1)*(break(i)-break(i-1)) difprv = dif end do coefg(2,l) = ( 2.0D+00 * difprv )**oneovk ! ! step = g(b)/lnew ! step=(coefg(1,l)+coefg(2,l)*(break(l+1)-break(l))) / real ( lnew, kind = 8 ) if ( 0 < iprint ) then write(*,20)step,(i,coefg(1,i),coefg(2,i),i=1,l) end if 20 format (' step =',e16.7/(i5,2e16.5)) ! ! if G is constant, BRKNEW is uniform. ! if ( step <= 0.0D+00 ) then step = (break(l+1)-break(1)) / real ( lnew, kind = 8 ) do i = 2, lnew brknew(i) = break(1) + real ( i - 1, kind = 8 ) * step end do return end if ! ! for i=2,...,lnew, construct brknew(i)=a+g**(-1)(stepi), ! with stepi=(i-1)*step . this requires inversion of the p.lin- ! ear function g . for this, j is found so that ! g(break(j)) <= stepi .le. g(break(j+1)) ! and then ! brknew(i) = break(j)+(stepi-g(break(j)))/dg(break(j)) . ! the midpoint is chosen if dg(break(j))=0 . ! j = 1 do i = 2, lnew stepi = real ( i - 1, kind = 8 ) * step do if ( j == l ) then exit end if if ( stepi <= coefg(1,j+1) ) then exit end if j = j + 1 end do if ( coefg(2,j) /= 0.0D+00 ) then brknew(i) = break(j)+(stepi-coefg(1,j))/coefg(2,j) else brknew(i) = ( break(j) + break(j+1) ) / 2.0D+00 end if end do return end diff --git a/pppack/ppvalu.f90 b/pppack/ppvalu.f90 index b392355..71a60ee 100644 --- a/pppack/ppvalu.f90 +++ b/pppack/ppvalu.f90 @@ -1,134 +1,134 @@ !> !> @file ppvalu.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine ppvalu ( break, coef, l, k, x, jderiv, value ) !******************************************************************************* ! !! PPVALU evaluates a piecewise polynomial function or its derivative. ! ! Discussion: ! ! PPVALU calculates the value at X of the JDERIV-th derivative of ! the piecewise polynomial function F from its piecewise ! polynomial representation. ! ! The interval index I, appropriate for X, is found through a ! call to INTERV. The formula for the JDERIV-th derivative ! of F is then evaluated by nested multiplication. ! ! The J-th derivative of F is given by: ! ! (d**j)f(x) = ! coef(j+1,i) + h * ( ! coef(j+2,i) + h * ( ! ... ! coef(k-1,i) + h * ( ! coef(k,i) / (k-j-1) ) / (k-j-2) ... ) / 2 ) / 1 ! ! with ! ! H=X-BREAK(I) ! ! and ! ! i = max( 1 , max( j , break(j) <= x , 1 <= j <= l ) ). ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) BREAK(L+1), real COEF(*), integer L, for ! piecewise polynomial representation of the function F to ! be evaluated. ! ! Input, integer K, the order of the polynomial pieces ! that make up the function F. The usual value for ! K is 4, signifying a piecewise cubic polynomial. ! ! Input, real ( kind = 8 ) X, the point at which to evaluate F or ! of its derivatives. ! ! Input, integer JDERIV, the order of the derivative to be ! evaluated. If JDERIV is 0, then F itself is evaluated, ! which is actually the most common case. It is assumed ! that JDERIV is zero or positive. ! ! Output, real ( kind = 8 ) VALUE, the value of the JDERIV-th ! derivative of F at X. ! implicit none integer k integer l real ( kind = 8 ) break(l+1) real ( kind = 8 ) coef(k,l) real ( kind = 8 ) fmmjdr real ( kind = 8 ) h integer i integer jderiv integer m integer ndummy real ( kind = 8 ) value real ( kind = 8 ) x value = 0.0D+00 fmmjdr = k - jderiv ! ! Derivatives of order K or higher are identically zero. ! if ( k <= jderiv ) then return end if ! ! Find the index I of the largest breakpoint to the left of X. ! call interv ( break, l+1, x, i, ndummy ) ! ! Evaluate the JDERIV-th derivative of the I-th polynomial piece at X. ! h = x - break(i) m = k do value = ( value / fmmjdr ) * h + coef(m,i) m = m - 1 fmmjdr = fmmjdr - 1.0D+00 if ( fmmjdr <= 0.0D+00 ) then exit end if end do return end diff --git a/pppack/putit.f90 b/pppack/putit.f90 index 28886c2..401b35a 100644 --- a/pppack/putit.f90 +++ b/pppack/putit.f90 @@ -1,178 +1,178 @@ !> !> @file putit.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine putit ( t, kpm, left, scrtch, dbiatx, q, nrow, b ) !************************************************************************* ! !! PUTIT puts together one block of the collocation equation system. ! ! Method: ! ! The K collocation equations for the interval (t(left),t(left+1)) ! are constructed with the aid of the subroutine DIFEQU( 2, ., ! . ) and interspersed (in order) with the side conditions (if any) in ! this interval, using DIFEQU ( 3, ., . ) for the information. ! ! The block Q has kpm columns, corresponding to the kpm b- ! splines of order kpm which have the interval (t(left),t(left+1)) ! in their support. the block's diagonal is part of the diagonal of the ! total system. The first equation in this block not overlapped by the ! preceding block is therefore equation LOWROW, with lowrow = ! number of side conditions in preceding intervals (or blocks). ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) T(LEFT+KPM), the knot sequence. ! ! Input, integer KPM, the order of the spline. ! ! Input, integer LEFT, indicates the interval of interest, viz the interval ! (t(left), t(left+1)). ! ! Input, integer NROW, number of rows in block to be put together ! ! Workspace, scrtch used in bsplvd, of size (kpm,kpm) ! ! Workspace, real ( kind = 8 ) DBIATX(KPM,M+1), derivatives of b-splines, ! with dbiatx(j,i+1) containing the i-th derivative of the ! j-th b-spline of interest ! ! Output, Q the block, of size (nrow,kpm). ! ! Output, B the corresponding piece of the right side, of size (nrow) ! implicit none integer kpm integer nrow real ( kind = 8 ) b(*) real ( kind = 8 ) dbiatx(kpm,*) real ( kind = 8 ) dx integer i integer irow integer iside integer itermx integer j integer k integer left integer ll integer lowrow integer m integer mode integer mp1 real ( kind = 8 ) q(nrow,kpm) real ( kind = 8 ) rho real ( kind = 8 ) scrtch(*) real ( kind = 8 ) sum1 real ( kind = 8 ) t(*) real ( kind = 8 ) v(20) real ( kind = 8 ) xm real ( kind = 8 ) xside real ( kind = 8 ) xx common /side/ m,iside,xside(10) common /other/ itermx,k,rho(19) mp1 = m + 1 q(1:nrow,1:kpm) = 0.0D+00 xm = ( t(left+1) + t(left) ) / 2.0D+00 dx = ( t(left+1) - t(left) ) / 2.0D+00 ll = 1 lowrow = iside do irow = lowrow, nrow if ( k < ll ) then go to 20 end if mode = 2 ! ! next collocation point is ... ! xx = xm+dx*rho(ll) ll = ll+1 ! ! The corresponding collocation equation is next unless the next side ! condition occurs at a point at, or to the left of, the next ! collocation point. ! if ( m < iside ) then go to 30 end if if ( xx < xside(iside) ) then go to 30 end if ll = ll-1 20 continue mode = 3 xx = xside(iside) 30 continue call difequ(mode,xx,v) ! ! The next equation, a collocation equation (mode=2) or a side ! condition (mode=3), reads ! (*) (v(m+1)*d**m+v(m)*d**(m-1) +...+ v(1)*d**0)f(xx)=v(m+2) ! in terms of the info supplied by difequ . the corresponding ! equation for the b-coefficients of f therefore has the left side of ! (*), evaluated at each of the kpm b-splines having xx in ! their support, as its kpm possibly nonzero coefficients. ! call bsplvd(t,kpm,xx,left,scrtch,dbiatx,mp1) do j = 1, kpm sum1 = 0.0D+00 do i = 1, mp1 sum1 = sum1 + v(i) * dbiatx(j,i) end do q(irow,j) = sum1 end do b(irow) = v(m+2) end do return end diff --git a/pppack/rvec_print.f90 b/pppack/rvec_print.f90 index e482e0a..a25ae9d 100644 --- a/pppack/rvec_print.f90 +++ b/pppack/rvec_print.f90 @@ -1,83 +1,83 @@ !> !> @file rvec_print.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine rvec_print ( n, a, title ) !******************************************************************************* ! !! RVEC_PRINT prints a real vector. ! ! Discussion: ! ! If all the entries are integers, the data is printed ! in integer format. ! ! Modified: ! ! 19 November 2002 ! ! Author: ! ! John Burkardt ! ! Parameters: ! ! Input, integer N, the number of components of the vector. ! ! Input, real ( kind = 8 ) A(N), the vector to be printed. ! ! Input, character ( len = * ) TITLE, a title to be printed. ! implicit none integer n real ( kind = 8 ) a(n) integer i character ( len = * ) title if ( 0 < len_trim ( title ) ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) trim ( title ) end if write ( *, '(a)' ) ' ' if ( all ( a(1:n) == aint ( a(1:n) ) ) ) then do i = 1, n write ( *, '(i6,i6)' ) i, int ( a(i) ) end do else if ( all ( abs ( a(1:n) ) < 1000000.0D+00 ) ) then do i = 1, n write ( *, '(i6,f14.6)' ) i, a(i) end do else do i = 1, n write ( *, '(i6,g14.6)' ) i, a(i) end do end if return end diff --git a/pppack/sbblok.f90 b/pppack/sbblok.f90 index e490a42..d3db9e6 100644 --- a/pppack/sbblok.f90 +++ b/pppack/sbblok.f90 @@ -1,106 +1,106 @@ !> !> @file sbblok.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine sbblok ( bloks, integs, nbloks, ipivot, b, x ) !************************************************************************* ! !! SBBLOK solves a linear system that was factored by FCBLOK. ! ! Discussion: ! ! The routine supervises the solution, by forward and backward ! substitution, of the linear system ! ! A * x = b ! ! for X, with the PLU factorization of A already generated in FCBLOK. ! Individual blocks of equations are solved via SUBFOR and SUBBAK. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! bloks, integs, nbloks, ipivot are as on return from fcblok. ! ! b the right side, stored corresponding to the storage of ! the equations. see comments in SLVBLK for details. ! ! Output, real ( kind = 8 ) X(*), the solution vector. ! implicit none integer nbloks real ( kind = 8 ) b(*) real ( kind = 8 ) bloks(*) integer i integer index integer indexb integer indexx integer integs(3,nbloks) integer ipivot(*) integer j integer last integer nbp1 integer ncol integer nrow real ( kind = 8 ) x(*) ! ! Forward substitution pass: ! index = 1 indexb = 1 indexx = 1 do i = 1, nbloks nrow = integs(1,i) last = integs(3,i) call subfor(bloks(index),ipivot(indexb),nrow,last,b(indexb),x(indexx)) index = nrow*integs(2,i)+index indexb = indexb+nrow indexx = indexx+last end do ! ! Back substitution pass. ! nbp1 = nbloks + 1 do j = 1, nbloks i = nbp1 - j nrow = integs(1,i) ncol = integs(2,i) last = integs(3,i) index = index - nrow * ncol indexb = indexb - nrow indexx = indexx - last call subbak ( bloks(index), ipivot(indexb), nrow, ncol, last, x(indexx) ) end do return end diff --git a/pppack/setupq.f90 b/pppack/setupq.f90 index 56aa439..9e771e4 100644 --- a/pppack/setupq.f90 +++ b/pppack/setupq.f90 @@ -1,101 +1,101 @@ !> !> @file setupq.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine setupq ( x, dx, y, npoint, v, qty ) !************************************************************************* ! !! SETUPQ is to be called in SMOOTH. ! ! Discussion: ! ! put delx=x(.+1)-x(.) into v(.,4), ! put the three bands of q-transp*d into v(.,1-3), and ! put the three bands of (d*q)-transp*(d*q) at and above the diagonal ! into v(.,5-7) . ! ! here, q is the tridiagonal matrix of order (npoint-2,npoint) ! with general row 1/delx(i) , -1/delx(i)-1/delx(i+1) , 1/delx(i+1) ! and d is the diagonal matrix with general row dx(i) . ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! implicit none integer npoint real ( kind = 8 ) diff real ( kind = 8 ) dx(npoint) integer i real ( kind = 8 ) prev real ( kind = 8 ) qty(npoint) real ( kind = 8 ) v(npoint,7) real ( kind = 8 ) x(npoint) real ( kind = 8 ) y(npoint) v(1,4) = x(2)-x(1) do i = 2, npoint-1 v(i,4) = x(i+1) - x(i) v(i,1) = dx(i-1) / v(i-1,4) v(i,2) = -dx(i) / v(i,4) - dx(i) / v(i-1,4) v(i,3) = dx(i+1) / v(i,4) end do v(npoint,1) = 0.0D+00 do i = 2, npoint-1 v(i,5) = v(i,1)**2 + v(i,2)**2 + v(i,3)**2 end do do i = 3, npoint-1 v(i-1,6) = v(i-1,2)*v(i,1)+v(i-1,3)*v(i,2) end do v(npoint-1,6) = 0.0D+00 do i = 4, npoint-1 v(i-2,7) = v(i-2,3) * v(i,1) end do v(npoint-2,7) = 0.0D+00 v(npoint-1,7) = 0.0D+00 ! ! Construct q-transp. * y in QTY. ! prev = (y(2)-y(1)) / v(1,4) do i = 2, npoint-1 diff = (y(i+1)-y(i)) / v(i,4) qty(i) = diff-prev prev = diff end do return end diff --git a/pppack/shiftb.f90 b/pppack/shiftb.f90 index 0ddcd79..8e51c0c 100644 --- a/pppack/shiftb.f90 +++ b/pppack/shiftb.f90 @@ -1,107 +1,107 @@ !> !> @file shiftb.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine shiftb ( ai, ipivot, nrowi, ncoli, last, ai1, nrowi1, ncoli1 ) !************************************************************************* ! !! SHIFTB shifts the rows in current block, ai, not used as pivot ! rows, if any, i.e., rows ipivot(last+1),...,ipivot(nrowi), ! onto the first mmax=nrow-last rows of the next block, ai1, ! with column last+j of ai going to column j , ! for j=1,...,jmax=ncoli-last. the remaining columns of these ! rows of ai1 are zeroed out. ! ! picture ! ! original situation after results in a new block i+1 ! last=2 columns have been created and ready to be ! done in factrb (assuming no factored by next factrb call. ! interchanges of rows) ! 1 ! x x 1x x x x x x x x ! 1 ! 0 x 1x x x 0 x x x x ! block i 1 --- ! nrowi=4 0 0 1x x x 0 0 1x x x 0 01 ! ncoli=5 1 1 1 ! last=2 0 0 1x x x 0 0 1x x x 0 01 ! ------------------- 1 1 new ! 1x x x x x 1x x x x x1 block ! 1 1 1 i+1 ! block i+1 1x x x x x 1x x x x x1 ! nrowi1= 5 1 1 1 ! ncoli1= 5 1x x x x x 1x x x x x1 ! ------------------- 1-------------1 ! 1 ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! implicit none integer ncoli integer ncoli1 integer nrowi1 integer nrowi real ( kind = 8 ) ai(nrowi,ncoli) real ( kind = 8 ) ai1(nrowi1,ncoli1) integer ip integer ipivot(nrowi) integer j integer last integer m if ( nrowi-last < 1 ) then return end if if ( ncoli-last < 1 ) then return end if ! ! Put the remainder of block I into AI1. ! do m = 1, nrowi-last ip = ipivot(last+m) do j = 1, ncoli-last ai1(m,j) = ai(ip,last+j) end do end do ! ! Zero out the upper right corner of ai1. ! do j = ncoli+1-last, ncoli1 do m = 1, nrowi-last ai1(m,j) = 0.0D+00 end do end do return end diff --git a/pppack/slvblk.f90 b/pppack/slvblk.f90 index 0ce546c..4ea74a5 100644 --- a/pppack/slvblk.f90 +++ b/pppack/slvblk.f90 @@ -1,180 +1,180 @@ !> !> @file slvblk.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine slvblk ( bloks, integs, nbloks, b, ipivot, x, iflag ) !************************************************************************* ! !! SLVBLK solves the almost block diagonal linear system A*x=b. ! ! Discussion: ! ! Such almost block diagonal matrices arise naturally in piecewise ! polynomial interpolation or approximation and in finite element ! methods for two-point boundary value problems. The PLU factorization ! method is implemented here to take advantage of the special structure ! of such systems for savings in computing time and storage requirements. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! bloks a one-dimenional array, of length ! sum( integs(1,i)*integs(2,i) ; i=1,nbloks ) ! on input, contains the blocks of the almost block diagonal ! matrix a . the array integs (see below and the example) ! describes the block structure. ! on output, contains correspondingly the plu factorization ! of a (if iflag /= 0). certain of the entries into bloks ! are arbitrary (where the blocks overlap). ! ! integs integer array description of the block structure of a . ! integs(1,i)=no. of rows of block i = nrow ! integs(2,i)=no. of colums of block i = ncol ! integs(3,i)=no. of elim. steps in block i = last ! i =1,2,...,nbloks ! the linear system is of order ! n = sum ( integs(3,i) , i=1,...,nbloks ), ! but the total number of rows in the blocks is ! nbrows=sum( integs(1,i) ; i = 1,...,nbloks) ! ! nbloks number of blocks ! b right side of the linear system, array of length nbrows. ! certain of the entries are arbitrary, corresponding to ! rows of the blocks which overlap (see block structure and ! the example below). ! ipivot on output, integer array containing the pivoting sequence ! used. length is nbrows ! x on output, contains the computed solution (if iflag /= 0) ! length is n. ! iflag on output, integer ! =(-1)**(no. of interchanges during factorization) ! if a is invertible ! =0 if a is singular ! ! auxiliary programs ! fcblok (bloks,integs,nbloks,ipivot,scrtch,iflag) factors the matrix ! a , and is used for this purpose in slvblk. its arguments ! are as in slvblk, except for ! scrtch=a work array of length max(integs(1,i)). ! ! sbblok (bloks,integs,nbloks,ipivot,b,x) solves the system a*x=b ! once a is factored. this is done automatically by slvblk ! for one right side b, but subsequent solutions may be ! obtained for additional b-vectors. the arguments are all ! as in slvblk. ! ! dtblok (bloks,integs,nbloks,ipivot,iflag,detsgn,detlog) computes the ! determinant of a once slvblk or fcblok has done the fact- ! orization.the first five arguments are as in slvblk. ! detsgn =sign of the determinant ! detlog =natural log of the determinant ! ! block structure of a ! the nbloks blocks are stored consecutively in the array bloks . ! the first block has its (1,1)-entry at bloks(1), and, if the i-th ! block has its (1,1)-entry at bloks(index(i)), then ! index(i+1)=index(i) + nrow(i)*ncol(i) . ! the blocks are pieced together to give the interesting part of a ! as follows. for i=1,2,...,nbloks-1, the (1,1)-entry of the next ! block (the (i+1)st block ) corresponds to the (last+1,last+1)-entry ! of the current i-th block. recall last=integs(3,i) and note that ! this means that ! a. every block starts on the diagonal of a . ! b. the blocks overlap (usually). the rows of the (i+1)st block ! which are overlapped by the i-th block may be arbitrarily de- ! fined initially. they are overwritten during elimination. ! the right side for the equations in the i-th block are stored cor- ! respondingly as the last entries of a piece of b of length nrow ! (= integs(1,i)) and following immediately in b the corresponding ! piece for the right side of the preceding block, with the right side ! for the first block starting at b(1) . in this, the right side for ! an equation need only be specified once on input, in the first block ! in which the equation appears. ! ! example and test driver ! the test driver for this package contains an example, a linear ! system of order 11, whose nonzero entries are indicated in the fol- ! lowing schema by their row and column index modulo 10. next to it ! are the contents of the integs arrray when the matrix is taken to ! be almost block diagonal with nbloks=5, and below it are the five ! blocks. ! ! nrow1=3, ncol1 = 4 ! 11 12 13 14 ! 21 22 23 24 nrow2=3, ncol2 = 3 ! 31 32 33 34 ! last1=2 43 44 45 ! 53 54 55 nrow3=3, ncol3 = 4 ! last2=3 66 67 68 69 nrow4 = 3, ncol4 = 4 ! 76 77 78 79 nrow5=4, ncol5 = 4 ! 86 87 88 89 ! last3=1 97 98 99 90 ! last4=1 08 09 00 01 ! 18 19 10 11 ! last5=4 ! ! actual input to bloks shown by rows of blocks of a . ! (the ** items are arbitrary, this storage is used by slvblk) ! ! 11 12 13 14 / ** ** ** / 66 67 68 69 / ** ** ** ** / ** ** ** ** ! 21 22 23 24 / 43 44 45 / 76 77 78 79 / ** ** ** ** / ** ** ** ** ! 31 32 33 34/ 53 54 55/ 86 87 88 89/ 97 98 99 90/ 08 09 00 01 ! 18 19 10 11 ! ! index=1 index = 13 index = 22 index = 34 index = 46 ! ! actual right side values with ** for arbitrary values ! b1 b2 b3 ** b4 b5 b6 b7 b8 ** ** b9 ** ** b10 b11 ! ! (it would have been more efficient to combine block 3 with block 4) ! implicit none integer nbloks real ( kind = 8 ) b(*) real ( kind = 8 ) bloks(*) integer iflag integer integs(3,nbloks) integer ipivot(*) real ( kind = 8 ) x(*) ! ! In the call to FCBLOK, X is used for temporary storage. ! call fcblok ( bloks, integs, nbloks, ipivot, x, iflag ) if ( iflag == 0 ) then return end if call sbblok ( bloks, integs, nbloks, ipivot, b, x ) return end diff --git a/pppack/smooth.f90 b/pppack/smooth.f90 index cab3d0b..d014648 100644 --- a/pppack/smooth.f90 +++ b/pppack/smooth.f90 @@ -1,218 +1,218 @@ !> !> @file smooth.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine smooth ( x, y, dy, npoint, s, v, a, sfp ) !************************************************************************* ! !! SMOOTH constructs the cubic smoothing spline to given data. ! ! Discussion: ! ! The data is of the form ! ! (x(i),y(i)), i=1,...,npoint, ! ! The cubic smoothing spline has as small a second derivative as ! possible while ! ! s(f)=sum( ((y(i)-f(x(i)))/dy(i))**2 , i=1,...,npoint ) <= s . ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! input ! ! Input, real ( kind = 8 ) X(NPOINT), the abscissas, assumed to be strictly ! increasing . ! ! Input, real ( kind = 8 ) Y(NPOINT), the corresponding ordinates. ! ! dy(1),...,dy(npoint) estimate of uncertainty in data, assumed ! to be positive . ! ! npoint.....number of data points, assumed greater than 1 ! ! s.....upper bound on the discrete weighted mean square distance of ! the approximation f from the data . ! ! work arrays: ! ! v of size (npoint,7) ! a of size (npoint,4) ! ! output ! ! a(.,1).....contains the sequence of smoothed ordinates . ! a(i,j)=d**(j-1)f(x(i)), j=2,3,4, i=1,...,npoint-1 , i.e., the ! first three derivatives of the smoothing spline f at the ! left end of each of the data intervals . ! Warning . . . a would have to be transposed before it ! could be used in ppvalu . ! ! Method: ! ! the matrices q-transp*d and q-transp*d**2*q are constructed in ! SETUPQ from x and dy , as is the vector qty=q-transp*y . ! then, for given p , the vector U is determined in CHOL1D as ! the solution of the linear system ! (6(1-p)q-transp*d**2*q+p*r)u =qty . ! from u , the smoothing spline f (for this choice of smoothing par- ! ameter p ) is obtained in the sense that ! f(x(.)) = y-6(1-p)d**2*q*u and ! (d**2)f(x(.)) = 6*p*u . ! ! the smoothing parameter p is found (if possible) so that ! sf(p) = s , ! with sf(p)=s(f) , where f is the smoothing spline as it depends ! on p . if s=0, then p = 1 . if sf(0) <= s , then p = 0 . ! otherwise, the secant method is used to locate an appropriate p in ! the open interval (0,1) . specifically, ! p(0)=0, p(1) = (s-sf(0))/dsf ! with dsf=-24*u-transp*r*u a good approximation to d(sf(0)) = dsf ! +60*(d*q*u)-transp*(d*q*u) , and u as obtained for p=0 . ! after that, for n=1,2,... until sf(p(n)) <= 1.01*s, do.... ! determine p(n+1) as the point at which the secant to sf at the ! points p(n) and p(n-1) takes on the value s . ! if p(n+1) >= 1 , choose instead p(n+1) as the point at which ! the parabola sf(p(n))*((1-.)/(1-p(n)))**2 takes on the value s. ! ! Note that, in exact arithmetic, always p(n+1) < p(n) , hence ! sf(p(n+1)) < sf(p(n)) . therefore, also stop the iteration, ! with final p=1 , in case sf(p(n+1)) >= sf(p(n)) . ! implicit none integer npoint real ( kind = 8 ) a(npoint,4) real ( kind = 8 ) change real ( kind = 8 ) dy(npoint) integer i real ( kind = 8 ) p real ( kind = 8 ) prevp real ( kind = 8 ) prevsf real ( kind = 8 ) s real ( kind = 8 ) sfp real ( kind = 8 ) utru real ( kind = 8 ) v(npoint,7) real ( kind = 8 ) x(npoint) real ( kind = 8 ) y(npoint) call setupq ( x, dy, y, npoint, v, a(1,4) ) if ( 0.0D+00 < s ) then go to 20 end if 10 continue p = 1.0D+00 call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) ) sfp = 0.0D+00 go to 70 20 continue p = 0.0D+00 call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) ) sfp = 0.0D+00 do i = 1, npoint sfp = sfp + ( a(i,1) * dy(i) )**2 end do sfp = sfp * 36.0D+00 if ( sfp <= s ) then go to 70 end if prevp = 0.0D+00 prevsf = sfp utru = 0.0D+00 do i = 2, npoint utru = utru + v(i-1,4) * ( a(i-1,3) * ( a(i-1,3) + a(i,3) ) + a(i,3)**2 ) end do p = ( sfp - s ) / ( 24.0D+00 * utru ) ! ! Secant iteration for the determination of p starts here. ! 50 continue call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) ) sfp = 0.0D+00 do i = 1, npoint sfp = sfp + ( a(i,1) * dy(i) )**2 end do sfp = sfp * 36.0D+00 * ( 1.0D+00 - p )**2 if ( sfp <= 1.01D+00 * s ) then go to 70 end if if ( prevsf <= sfp ) then go to 10 end if change = ( p - prevp ) / ( sfp - prevsf ) * ( sfp - s ) prevp = p p = p - change prevsf = sfp if ( 1.0D+00 <= p ) then p = 1.0D+00 - sqrt ( s / prevsf ) * ( 1.0D+00 - prevp ) end if go to 50 ! ! The correct value of p has been found. ! Compute polynomial coefficients from q*u (in a(.,1)). ! 70 continue do i = 1, npoint a(i,1) = y(i) - 6.0D+00 * ( 1.0D+00 - p ) * dy(i)**2 * a(i,1) end do do i = 1, npoint a(i,3) = 6.0D+00 * a(i,3) * p end do do i = 1, npoint-1 a(i,4) = ( a(i+1,3) - a(i,3) ) / v(i,4) a(i,2) = ( a(i+1,1) - a(i,1) ) / v(i,4) & - ( a(i,3) + a(i,4) / 3.0D+00 * v(i,4) ) / 2.0D+00 * v(i,4) end do return end diff --git a/pppack/spli2d.f90 b/pppack/spli2d.f90 index 348d253..b71a3f4 100644 --- a/pppack/spli2d.f90 +++ b/pppack/spli2d.f90 @@ -1,241 +1,241 @@ !> !> @file spli2d.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine spli2d ( tau, gtau, t, n, k, m, work, q, bcoef, iflag ) !************************************************************************* ! !! SPLI2D produces a interpolatory tensor product spline. ! ! Discussion: ! ! SPLI2D is an extended version of SPLINT. ! ! SPLI2D produces the B-spline coefficients BCOEF(J,.) of the ! spline of order K with knots T(I), I=1,..., N+K, which takes on ! the value GTAU(I,J) at TAU(I), I=1,..., N, J=1,...,M. ! ! The I-th equation of the linear system ! ! A * BCOEF = B ! ! for the B-spline coefficients of the interpolant enforces ! interpolation at TAU(I), I=1,...,N. Hence, B(I)=GTAU(I), ! all I, and A is a band matrix with 2K-1 bands, if it is ! invertible. ! ! The matrix A is generated row by row and stored, diagonal by ! diagonal, in the rows of the array Q, with the main diagonal ! going into row K. ! ! The banded system is then solved by a call to BANFAC, which ! constructs the triangular factorization for A and stores it ! again in Q, followed by a call to BANSLV, which then obtains ! the solution BCOEF by substitution. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) TAU(N), contains the data point abscissas. ! TAU must be strictly increasing ! ! Input, real ( kind = 8 ) GTAU(N), contains the data point ordinates, ! J=1,...,M. ! ! Input, real ( kind = 8 ) T(N+K), the knot sequence. ! ! Input, integer N, the number of data points and the ! dimension of the spline space SPLINE(K,T) ! ! Input, integer K, the order of the spline. ! ! Input, integer M, the number of data sets. ! ! Work space, real ( kind = 8 ) WORK(N). ! ! Output, real ( kind = 8 ) Q(2*K-1)*N, containing the triangular ! factorization of the coefficient matrix of the linear ! system for the B-spline coefficients of the spline interpolant. ! ! The B-spline coefficients for the interpolant of an additional ! data set (TAU(I),HTAU(I)), I=1,...,N with the same data ! abscissae can be obtained without going through all the ! calculations in this routine, simply by loading HTAU into ! BCOEF and then using the statement ! ! CALL BANSLV(Q,2*K-1,N,K-1,K-1,BCOEF) ! ! Output, real ( kind = 8 ) BCOEF(N), the B-spline coefficients of ! the interpolant. ! ! Output, integer IFLAG, error indicator. ! 1, no error. ! 2, an error occurred, which may have been caused by ! singularity of the linear system. ! ! The linear system to be solved is theoretically invertible if ! and only if ! ! T(I) < TAU(I) < TAU(I+K), for all I. ! ! Violation of this condition is certain to lead to IFLAG=2. ! implicit none integer m integer n real ( kind = 8 ) bcoef(m,n) real ( kind = 8 ) gtau(n,m) integer i integer iflag integer ilp1mx integer j integer jj integer k integer left real ( kind = 8 ) q((2*k-1)*n) real ( kind = 8 ) t(n+k) real ( kind = 8 ) tau(n) real ( kind = 8 ) taui real ( kind = 8 ) work(n) left = k do i = 1, (2*k-1)*n q(i) = 0.0 end do ! ! Construct the N interpolation equations. ! do i = 1, n taui = tau(i) ilp1mx = min(i+k,n+1) ! ! Find the index LEFT in the closed interval (I,I+K-1) such ! that: ! ! T(LEFT) < = TAU(I) < T(LEFT+1) ! ! The matrix will be singular if this is not possible. ! left = max(left,i) if ( taui < t(left) ) then iflag = 2 write(*,*)' ' write(*,*)'SPLI2D - Fatal error!' write(*,*)' The TAU array is not strictly increasing.' stop end if 20 continue if ( t(left+1) <= taui ) then left = left+1 if ( left < ilp1mx ) then go to 20 end if left = left-1 if ( t(left+1) < taui ) then iflag = 2 write(*,*)' ' write(*,*)'SPLI2D - Fatal error!' write(*,*)' The TAU array is not strictly increasing.' stop end if end if ! ! The I-th equation enforces interpolation at TAUI, hence ! ! A(I,J)=B(J,K,T)(TAUI), for all J. ! ! Only the K entries with J=LEFT-K+1, ..., LEFT actually might be ! nonzero. These K numbers are returned, in WORK (used for ! temporary storage here), by the following call: ! call bsplvb(t,k,1,taui,left,work) ! ! We therefore want ! ! WORK(J)=B(LEFT-K+J)(TAUI) ! ! to go into ! ! a(i,left-k+j), i.e., into q(i-(left+j)+2*k,(left+j)-k) since ! a(i+j,j) is to go into q(i+k,j), all i,j, if we consider q ! as a two-dim. array , with 2*k-1 rows (see comments in ! banfac). in the present program, we treat q as an equivalent ! one-dimensional array (because of fortran restrictions on ! ?? LOST LINE ?? ! entry ! i -(left+j)+2*k + ((left+j)-k-1)*(2*k-1) ! = i-left+1+(left -k)*(2*k-1) + (2*k-2)*j ! of q . ! jj = i-left+1+(left-k)*(k+k-1) do j = 1, k jj = jj+k+k-2 q(jj) = work(j) end do end do ! ! Factor A, stored again in Q. ! call banfac(q,k+k-1,n,k-1,k-1,iflag) if ( iflag == 2 ) then write(*,*)' ' write(*,*)'SPLI2D - Fatal error!' write(*,*)' BANFAC reports that the matrix is singular.' stop end if ! ! Solve A*BCOEF=GTAU by backsubstitution. ! do j = 1, m work(1:n) = gtau(1:n,j) call banslv ( q, k+k-1, n, k-1, k-1, work ) bcoef(j,1:n) = work(1:n) end do return end diff --git a/pppack/spline_hermite_set.f90 b/pppack/spline_hermite_set.f90 index 3b7e28e..cca087e 100644 --- a/pppack/spline_hermite_set.f90 +++ b/pppack/spline_hermite_set.f90 @@ -1,90 +1,90 @@ !> !> @file spline_hermite_set.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine spline_hermite_set ( ndata, tdata, c ) !************************************************************************* ! !! SPLINE_HERMITE_SET sets up a piecewise cubic Hermite interpolant. ! ! Modified: ! ! 06 April 1999 ! ! Reference: ! ! Conte and de Boor, ! Algorithm CALCCF, ! Elementary Numerical Analysis, ! 1973, page 235. ! ! Parameters: ! ! Input, integer NDATA, the number of data points. ! NDATA must be at least 2. ! ! Input, real ( kind = 8 ) TDATA(NDATA), the abscissas of the data points. ! The entries of TDATA are assumed to be strictly increasing. ! ! Input/output, real ( kind = 8 ) C(4,NDATA). ! On input, C(1,I) and C(2,I) should contain the value of the ! function and its derivative at TDATA(I), for I = 1 to NDATA. ! These values will not be changed by this routine. ! On output, C(3,I) and C(4,I) contain the quadratic ! and cubic coefficients of the Hermite polynomial ! in the interval (TDATA(I), TDATA(I+1)), for I=1 to NDATA-1. ! C(3,NDATA) and C(4,NDATA) are set to 0. ! In the interval (TDATA(I), TDATA(I+1)), the interpolating Hermite ! polynomial is given by ! ! SVAL(TVAL) = C(1,I) ! + ( TVAL - TDATA(I) ) * ( C(2,I) ! + ( TVAL - TDATA(I) ) * ( C(3,I) ! + ( TVAL - TDATA(I) ) * C(4,I) ) ) ! implicit none integer ndata real ( kind = 8 ) c(4,ndata) real ( kind = 8 ) divdif1 real ( kind = 8 ) divdif3 real ( kind = 8 ) dt integer i real ( kind = 8 ) tdata(ndata) do i = 1, ndata-1 dt = tdata(i+1) - tdata(i) divdif1 = ( c(1,i+1) - c(1,i) ) / dt divdif3 = c(2,i) + c(2,i+1) - 2.0D+00 * divdif1 c(3,i) = ( divdif1 - c(2,i) - divdif3 ) / dt c(4,i) = divdif3 / ( dt * dt ) end do c(3,ndata) = 0.0D+00 c(4,ndata) = 0.0D+00 return end diff --git a/pppack/spline_hermite_val.f90 b/pppack/spline_hermite_val.f90 index 12bdcc9..76bb98c 100644 --- a/pppack/spline_hermite_val.f90 +++ b/pppack/spline_hermite_val.f90 @@ -1,97 +1,97 @@ !> !> @file spline_hermite_val.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine spline_hermite_val ( ndata, tdata, c, tval, sval ) !************************************************************************* ! !! SPLINE_HERMITE_VAL evaluates a piecewise cubic Hermite interpolant. ! ! Discussion: ! ! SPLINE_HERMITE_SET must be called first, to set up the ! spline data from the raw function and derivative data. ! ! Modified: ! ! 06 April 1999 ! ! Reference: ! ! Conte and de Boor, ! Algorithm PCUBIC, ! Elementary Numerical Analysis, ! 1973, page 234. ! ! Parameters: ! ! Input, integer NDATA, the number of data points. ! NDATA is assumed to be at least 2. ! ! Input, real ( kind = 8 ) TDATA(NDATA), the abscissas of the data points. ! The entries of TDATA are assumed to be strictly increasing. ! ! Input, real ( kind = 8 ) C(4,NDATA), contains the data computed by ! SPLINE_HERMITE_SET. ! ! Input, real ( kind = 8 ) TVAL, the point where the interpolant is to ! be evaluated. ! ! Output, real ( kind = 8 ) SVAL, the value of the interpolant at TVAL. ! implicit none integer ndata real ( kind = 8 ) c(4,ndata) real ( kind = 8 ) dt integer i integer j real ( kind = 8 ) sval real ( kind = 8 ) tdata(ndata) real ( kind = 8 ) tval ! ! Find the interval J = [ TDATA(J), TDATA(J+1) ] that contains ! or is nearest to TVAL. ! j = ndata - 1 do i = 1, ndata-2 if ( tval < tdata(i+1) ) then j = i exit end if end do ! ! Evaluate the cubic polynomial. ! dt = tval - tdata(j) sval = c(1,j) + dt * ( c(2,j) + dt * ( c(3,j) + dt * c(4,j) ) ) return end diff --git a/pppack/splint.f90 b/pppack/splint.f90 index 10ef1f8..5a23103 100644 --- a/pppack/splint.f90 +++ b/pppack/splint.f90 @@ -1,208 +1,208 @@ !> !> @file splint.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine splint ( tau, gtau, t, n, k, q, bcoef, iflag ) !************************************************************************* ! !! SPLINT produces the B-spline coefficients BCOEF of an interpolating spline. ! ! Discussion: ! ! The spline is of order K with knots T(1:N+K), and takes on the ! value GTAU(I) at TAU(I), for I = 1 to N. ! ! The I-th equation of the linear system ! A * BCOEF = B ! for the b-coefficients of the interpolant enforces interpolation ! at TAU(1:N). ! ! Hence, b(i)=gtau(i), all i, and a is a band matrix with 2k-1 ! bands (if it is invertible). ! ! The matrix A is generated row by row and stored, diagonal by di- ! agonal, in the rows of the array q , with the main diagonal go- ! ing into row K. see comments in the program below. ! ! The banded system is then solved by a call to banfac (which con- ! structs the triangular factorization for a and stores it again in ! q ), followed by a call to banslv (which then obtains the solution ! bcoef by substitution). ! ! BANFAC does no pivoting, since the total positivity of the matrix ! A makes this unnecessary. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) TAU(N), the data point abscissas. The entries in ! TAU should be strictly increasing. ! ! Input, real ( kind = 8 ) GTAU(N), the data ordinates. ! ! Input, real ( kind = 8 ) T(N+K), the knot sequence. ! ! Input, integer N, the number of data points. ! ! Input, integer K, the order of the spline. ! ! output ! ! q, array of size (2*k-1)*n , containing the triangular factoriz- ! ation of the coefficient matrix of the linear system for the b- ! coefficients of the spline interpolant. ! the b-coefficients for the interpolant of an additional data set can ! be obtained without going through all the calculations in this ! routine, simply by loading htau into bcoef and then execut- ! ing the call banslv ( q, 2*k-1, n, k-1, k-1, bcoef ) ! ! bcoef, the b-coefficients of the interpolant, of length n. ! ! iflag, an integer indicating success (= 1) or failure (= 2) ! the linear system to be solved is (theoretically) invertible if ! and only if ! t(i) < tau(i) < tau(i+k), all i. ! violation of this condition is certain to lead to iflag=2 . ! implicit none integer n real ( kind = 8 ) bcoef(n) real ( kind = 8 ) gtau(n) integer i integer iflag integer ilp1mx integer j integer jj integer k integer kpkm2 integer left real ( kind = 8 ) q((2*k-1)*n) real ( kind = 8 ) t(n+k) real ( kind = 8 ) tau(n) real ( kind = 8 ) taui kpkm2 = 2*(k-1) left = k do i = 1, (2*k-1)*n q(i) = 0.0D+00 end do ! ! loop over i to construct the n interpolation equations ! do i = 1, n taui = tau(i) ilp1mx = min(i+k,n+1) ! ! find left in the closed interval (i,i+k-1) such that ! t(left) <= tau(i) < t(left+1) ! matrix is singular if this is not possible ! left = max(left,i) if ( taui < t(left)) then go to 70 end if 20 continue if ( taui < t(left+1)) then go to 30 end if left = left+1 if ( left < ilp1mx) then go to 20 end if left = left-1 if ( t(left+1) < taui ) then go to 70 end if ! ! The i-th equation enforces interpolation at taui, hence ! a(i,j)=b(j,k,t)(taui), all j. only the k entries with j = ! left-k+1,...,left actually might be nonzero. these k numbers ! are returned, in bcoef (used for temp.storage here), by the ! following ! 30 continue call bsplvb(t,k,1,taui,left,bcoef) ! ! We therefore want bcoef(j)=b(left-k+j)(taui) to go into ! a(i,left-k+j), i.e., into q(i-(left+j)+2*k,(left+j)-k) since ! a(i+j,j) is to go into q(i+k,j), all i,j, if we consider q ! as a two-dim. array , with 2*k-1 rows (see comments in ! banfac). in the present program, we treat q as an equivalent ! one-dimensional array (because of fortran restrictions on ! dimension statements) . we therefore want bcoef(j) to go into ! entry ! i -(left+j)+2*k + ((left+j)-k-1)*(2*k-1) ! = i-left+1+(left -k)*(2*k-1) + (2*k-2)*j ! of q . ! jj = i-left+1+(left-k)*(k+k-1) do j = 1,k jj = jj+kpkm2 q(jj) = bcoef(j) end do end do ! ! Obtain factorization of A, stored again in Q. ! call banfac ( q, k+k-1, n, k-1, k-1, iflag ) if ( iflag == 2 ) then write(*,*)' ' write(*,*)'SPLINT - Fatal Error!' write(*,*)' The linear system is not invertible!' return end if ! ! Solve a*bcoef=gtau by backsubstitution ! bcoef(1:n) = gtau(1:n) call banslv ( q, k+k-1, n, k-1, k-1, bcoef ) return 70 iflag=2 write ( *, * ) ' ' write ( *, * ) 'SPLINT - Fatal Error!' write ( *, * ) ' The linear system is not invertible!' return end diff --git a/pppack/splopt.f90 b/pppack/splopt.f90 index e27065d..0171ab2 100644 --- a/pppack/splopt.f90 +++ b/pppack/splopt.f90 @@ -1,371 +1,371 @@ !> !> @file splopt.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine splopt ( tau, n, k, scrtch, t, iflag ) !************************************************************************* ! !! SPLOPT computes the knots for an optimal recovery scheme. ! ! Discussion: ! ! The optimal recovery scheme is of order K for data at TAU(1:N). ! ! The interior knots T(K+1:N) are determined by Newton's method in ! such a way that the signum function which changes sign at ! T(K+1), ..., T(N) and nowhere else in ( TAU(1), TAU(n) ) is ! orthogonal to the spline space SPLINE ( K, TAU ) on that interval. ! ! Let XI(J) be the current guess for T(K+J), j=1,...,n-k. Then ! the next Newton iterate is of the form ! xi(j) + (-)**(n-k-j)*x(j) , j=1,...,n-k, ! with X the solution of the linear system ! C * X = D. ! ! Here, c(i,j)=b(i)(xi(j)), all j, with b(i) the i-th b-spline of ! order K for the knot sequence TAU, all i, and D is the vector ! given by d(i)=sum( -a(j) , j=i,...,n )*(TAU(i+k)-TAU(i))/k, all i, ! with a(i)=sum ( (-)**(n-k-j)*b(i,k+1,tau)(xi(j)) , j=1,...,n-k ) ! for i=1,...,n-1, and a(n)=-.5 . ! ! See chapter XIII of text and references there for a derivation. ! ! The first guess for t(k+j) is (TAU(j+1)+...+TAU(j+k-1))/(k-1) . ! iteration terminates if max(abs(x(j))) < t o l , with ! TOL = t o l r t e *(TAU(n)-TAU(1))/(n-k) , ! or else after NEWTMX iterations , currently, ! newtmx, tolrte / 10, .000001 ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) TAU(N), the interpolation points. ! assumed to be nondecreasing, with tau(i) < tau(i+k),all i. ! ! Input, integer N, the number of data points. ! ! Input, integer K, the order of the optimal recovery scheme to be used. ! ! Workspace, real ( kind = 8 ) SCRTCH((N-K)*(2*K+3)+5*K+3). The various ! contents are specified in the text below . ! ! Output, real ( kind = 8 ) T(N+K), the optimal knots ready for ! use in optimal recovery. specifically, t(1)=... = t(k) = ! tau(1) and t(n+1)=... = t(n+k) = tau(n) , while the n-k ! interior knots t(k+1), ..., t(n) are calculated. ! ! Output, integer IFLAG, error indicator. ! = 1, success. T contains the optimal knots. ! = 2, failure. K < 3 or N < K or the linear system was singular. ! implicit none integer k integer n real ( kind = 8 ) del real ( kind = 8 ) delmax real ( kind = 8 ) floatk integer i integer id integer iflag integer index integer j integer kp1 integer kpkm1 integer kpn integer l integer left integer leftmk integer lenw integer ll integer llmax integer llmin integer na integer nb integer nc integer nd integer, parameter :: newtmx = 10 integer newton integer nmk integer nx real ( kind = 8 ) scrtch((n-k)*(2*k+3)+5*k+3) real ( kind = 8 ) t(n+k) real ( kind = 8 ) tau(n) real ( kind = 8 ) sign real ( kind = 8 ) signst real ( kind = 8 ) sum1 real ( kind = 8 ) tol real ( kind = 8 ), parameter :: tolrte = 0.000001D+00 real ( kind = 8 ) xij nmk = n - k if ( n < k ) then write ( *, * ) ' ' write ( *, * ) 'SPLOPT - Fatal error!' write ( *, * ) ' N < K, N = ',n,' K = ',k iflag = 2 return end if if ( n == k ) then do i = 1, k t(i) = tau(1) t(n+i) = tau(n) end do return end if if ( k <= 2 ) then write(*,*)' ' write(*,*)'SPLOPT - Fatal error!' write(*,*)' K < 2, K = ',k iflag = 2 stop end if floatk = k kp1 = k+1 kpkm1 = k+k-1 kpn = k+n signst = -1.0D+00 if ( (nmk/2) * 2 < nmk ) then signst = 1.0D+00 end if ! ! scrtch(i)=tau-extended(i), i=1,...,n+k+k ! nx = n + k + k + 1 ! ! scrtch(i+nx)=xi(i),i=0,...,n-k+1 ! na = nx + nmk + 1 ! ! scrtch(i+na)=-a(i), i=1,...,n ! nd = na + n ! ! scrtch(i+nd)=x(i) or d(i), i=1,...,n-k ! nb = nd+nmk ! ! scrtch(i+nb)=biatx(i),i=1,...,k+1 ! nc = nb+kp1 ! ! scrtch(i+(j-1)*(2k-1)+nc)=w(i,j) = c(i-k+j,j), i=j-k,...,j+k, ! j=1,...,n-k. ! lenw = kpkm1*nmk ! ! Extend TAU to a knot sequence and store in scrtch. ! do j = 1, k scrtch(j) = tau(1) scrtch(kpn+j) = tau(n) end do do j = 1, n scrtch(k+j) = tau(j) end do ! ! First guess for scrtch (.+nx) = xi . ! scrtch(nx) = tau(1) scrtch(nmk+1+nx) = tau(n) do j = 1, nmk sum1 = 0.0D+00 do l = 1, k-1 sum1 = sum1 + tau(j+l) end do scrtch(j+nx) = sum1 / real ( k - 1, kind = 8 ) end do ! ! last entry of scrtch (.+na) =-a is always ... ! scrtch(n+na) = 0.5D+00 ! ! Start the Newton iteration. ! newton = 1 tol = tolrte * ( tau(n) - tau(1) ) / real ( nmk, kind = 8 ) ! ! Start the Newton step. ! compute the 2k-1 bands of the matrix c and store in scrtch(.+nc), ! and compute the vector scrtch(.+na)=-a. ! 100 continue do i = 1, lenw scrtch(i+nc) = 0.0D+00 end do do i = 2, n scrtch(i-1+na) = 0.0D+00 end do sign = signst left = kp1 do j = 1, nmk xij = scrtch(j+nx) 130 continue if ( xij < scrtch(left+1) ) then go to 140 end if left = left+1 if ( left < kpn ) then go to 130 end if left = left-1 140 continue call bsplvb(scrtch,k,1,xij,left,scrtch(1+nb)) ! ! The TAU sequence in scrtch is preceded by k additional knots ! therefore, scrtch(ll+nb) now contains b(left-2k+ll)(xij) ! which is destined for c(left-2k+ll,j), and therefore for ! w(left-k-j+ll,j)= scrtch(left-k-j+ll+(j-1)*kpkm1 + nc) ! since we store the 2k-1 bands of c in the 2k-1 r o w s of ! the work array w, and w in turn is stored in s c r t c h , ! with w(1,1)=scrtch(1+nc). ! ! also, c being of order n-k, we would want ! 1 <= left-2k+ll .le. n-k or ! llmin=2k-left <= ll .le. n-left+k = llmax . ! leftmk = left-k index = leftmk-j+(j-1)*kpkm1+nc llmin = max(1,k-leftmk) llmax = min(k,n-leftmk) do ll = llmin, llmax scrtch(ll+index)=scrtch(ll+nb) end do call bsplvb (scrtch,kp1,2,xij,left,scrtch(1+nb)) id=max(0,leftmk-kp1) llmin=1-min(0,leftmk-kp1) do ll=llmin, kp1 id=id+1 scrtch(id+na)=scrtch(id+na)-sign*scrtch(ll+nb) end do sign=-sign end do call banfac(scrtch(1+nc),kpkm1,nmk,k-1,k-1,iflag) if ( iflag == 2 ) then write ( *, * ) ' ' write ( *, * ) 'SPLOPT - Fatal error!' write ( *, * ) ' Matrix C is not invertible.' stop end if ! ! compute scrtch (.+nd)= d from scrtch (.+na) =-a . ! do i=n,2,-1 scrtch(i-1+na)=scrtch(i-1+na)+scrtch(i+na) end do do i=1,nmk scrtch(i+nd)=scrtch(i+na)*(tau(i+k)-tau(i))/floatk end do ! ! Compute scrtch (.+nd)= x . ! call banslv(scrtch(1+nc),kpkm1,nmk,k-1,k-1,scrtch(1+nd)) ! ! Compute scrtch (.+nd)=change in xi . modify, if necessary, to ! prevent new xi from moving more than 1/3 of the way to its ! neighbors. then add to xi to obtain new xi in scrtch(.+nx). ! delmax = 0.0D+00 sign = signst do i = 1, nmk del = sign * scrtch(i+nd) delmax = max ( delmax, abs ( del ) ) if ( 0.0D+00 < del ) then go to 230 end if del = max ( del, ( scrtch(i-1+nx) - scrtch(i+nx) ) / 3.0D+00 ) go to 240 230 del = min (del,(scrtch(i+1+nx)-scrtch(i+nx))/3.0D+00 ) 240 sign = -sign scrtch(i+nx) = scrtch(i+nx)+del end do ! ! Call it a day in case change in xi was small enough or too many ! steps were taken. ! if ( delmax < tol ) then go to 270 end if newton = newton + 1 if ( newton <= newtmx ) then go to 100 end if write ( *, * ) ' ' write ( *, * ) 'SPLOPT - Warning!' write ( *, * ) ' No convergence. Number of Newton steps was ', newtmx 270 continue do i = 1, nmk t(k+i) = scrtch(i+nx) end do 290 continue do i=1,k t(i)=tau(1) t(n+i)=tau(n) end do return ! 310 iflag=2 ! ! return end diff --git a/pppack/subbak.f90 b/pppack/subbak.f90 index c3df5e9..815b401 100644 --- a/pppack/subbak.f90 +++ b/pppack/subbak.f90 @@ -1,80 +1,80 @@ !> !> @file subbak.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine subbak ( w, ipivot, nrow, ncol, last, x ) !************************************************************************* ! !! SUBBAK carries out backsubstitution for the current block. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! w, ipivot, nrow, ncol, last are as on return from factrb. ! ! x(1),...,x(ncol) contains, on input, the right side for the ! equations in this block after backsubstitution has been ! carried up to but not including equation ipivot(last). ! means that x(j) contains the right side of equation ipi- ! vot(j) as modified during elimination, j=1,...,last, while ! for j > last, x(j) is already a component of the solut- ! ion vector. ! ! x(1),...,x(ncol) contains, on output, the components of the solut- ! ion corresponding to the present block. ! implicit none integer ncol integer nrow integer ip integer ipivot(nrow) integer j integer k integer last real ( kind = 8 ) s real ( kind = 8 ) w(nrow,ncol) real ( kind = 8 ) x(ncol) do k = last, 1, -1 ip = ipivot(k) s = 0.0D+00 do j = k+1, ncol s = s + w(ip,j) * x(j) end do x(k) = ( x(k) - s ) / w(ip,k) end do end diff --git a/pppack/subfor.f90 b/pppack/subfor.f90 index 58f7afe..a1a2517 100644 --- a/pppack/subfor.f90 +++ b/pppack/subfor.f90 @@ -1,100 +1,100 @@ !> !> @file subfor.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine subfor ( w, ipivot, nrow, last, b, x ) !************************************************************************* ! !! SUBFOR carries out the forward pass of substitution for the current block. ! ! Discussion: ! ! The forward pass is the action on the right side corresponding to the ! elimination carried out in FACTRB for this block. ! ! At the end, x(j) contains the right side of the transformed ! ipivot(j)-th equation in this block, j=1,...,nrow. then, since ! for i=1,...,nrow-last, b(nrow+i) is going to be used as the right ! side of equation I in the next block (shifted over there from ! this block during factorization), it is set equal to x(last+i) here. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! w, ipivot, nrow, last are as on return from factrb. ! ! b(j) is expected to contain, on input, the right side of j-th ! equation for this block, j=1,...,nrow. ! b(nrow+j) contains, on output, the appropriately modified right ! side for equation j in next block, j=1,...,nrow-last. ! ! x(j) contains, on output, the appropriately modified right ! side of equation ipivot(j) in this block, j=1,...,last (and ! even for j=last+1,...,nrow). ! implicit none integer last integer nrow real ( kind = 8 ) b(nrow+nrow-last) integer ip integer ipivot(nrow) integer j integer k real ( kind = 8 ) s real ( kind = 8 ) w(nrow,last) real ( kind = 8 ) x(nrow) ip = ipivot(1) x(1) = b(ip) do k = 2, nrow ip = ipivot(k) s = 0.0D+00 do j = 1, min ( k-1, last ) s = s + w(ip,j) * x(j) end do x(k) = b(ip) - s end do ! ! Transfer modified right sides of equations ipivot(last+1),..., ! ipivot(nrow) to next block. ! do k = last+1, nrow b(nrow-last+k) = x(k) end do return end diff --git a/pppack/tautsp.f90 b/pppack/tautsp.f90 index d585f07..4cb0bae 100644 --- a/pppack/tautsp.f90 +++ b/pppack/tautsp.f90 @@ -1,530 +1,530 @@ !> !> @file tautsp.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine tautsp ( tau, gtau, ntau, gamma, s, break, coef, l, k, iflag ) !************************************************************************* ! !! TAUTSP constructs a cubic spline interpolant to given data. ! ! Discussion: ! ! If 0 < GAMMA, additional knots are introduced where needed to ! make the interpolant more flexible locally. This avoids extraneous ! inflection points typical of cubic spline interpolation at knots to ! rapidly changing data. ! ! Method: ! ! On the I-th interval, (TAU(I), TAU(I+1)), the interpolant is of the ! form: ! ! (*) f(u(x))=a+b*u + c*h(u,z) + d*h(1-u,1-z) , ! ! with ! ! U = U(X) = ( X - TAU(I) ) / DTAU(I). ! ! Here, ! z=z(i) = addg(i+1)/(addg(i)+addg(i+1)) ! (= .5, in case the denominator vanishes). with ! addg(j)=abs(ddg(j)), ddg(j) = dg(j+1)-dg(j), ! dg(j)=divdif ( j) = (gtau(j+1)-gtau(j))/dtau(j) ! and ! h(u,z)=alpha*u**3+(1-alpha)*(max(((u-zeta)/(1-zeta)),0)**3 ! with ! alpha(z)=(1-gamma/3)/zeta ! zeta(z)=1-gamma*min((1 - z), 1/3) ! thus, for 1/3 <= z .le. 2/3, f is just a cubic polynomial on ! the interval i. otherwise, it has one additional knot, at ! tau(i)+zeta*dtau(i) . ! as z approaches 1, h(.,z) has an increasingly sharp bend near 1, ! thus allowing f to turn rapidly near the additional knot. ! in terms of f(j)=gtau(j) and ! fsecnd(j)= second derivative of f at tau(j), ! the coefficients for (*) are given as ! a=f(i)-d ! b=(f(i+1)-f(i)) - (c - d) ! c=fsecnd(i+1)*dtau(i)**2/hsecnd(1,z) ! d=fsecnd(i)*dtau(i)**2/hsecnd(1,1-z) ! hence can be computed once fsecnd(i),i=1,...,ntau, is fixed. ! ! F is automatically continuous and has a continuous second derivative ! (except when z=0 or 1 for some i). we determine fscnd(.) from ! the requirement that also the first derivative of F be continuous. ! ! In addition, we require that the third derivative be continuous ! across TAU(2) and across TAU(NTAU-1). This leads to a strictly ! diagonally dominant tridiagonal linear system for the fsecnd(i) ! which we solve by Gauss elimination without pivoting. ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Input, real ( kind = 8 ) TAU(NTAU), the sequence of data points. ! TAU must be strictly increasing. ! ! Input, real ( kind = 8 ) GTAU(NTAU), the corresponding sequence of ! function values. ! ! Input, integer NTAU, the number of data points. NTAU must be at least 4. ! ! Input, gamma indicates whether additional flexibility is desired. ! =0., no additional knots ! in (0.,3.), under certain conditions on the given data at ! points i-1, i, i+1, and i+2, a knot is added in the ! i-th interval, i=2,...,ntau-2. see description of meth- ! od below. the interpolant gets rounded with increasing ! gamma. a value of 2.5 for gamma is typical. ! in (3.,6.), same , except that knots might also be added in ! intervals in which an inflection point would be permit- ! ted. a value of 5.5 for gamma is typical. ! ! Output, break, coef, l, k give the pp-representation of the interpolant. ! specifically, for break(i) <= x .le. break(i+1), the ! interpolant has the form ! f(x)=coef(1,i) +dx(coef(2,i) +(dx/2)(coef(3,i) +(dx/3)coef(4,i))) ! with dx=x-break(i) and i=1,...,l . ! ! Output, iflag =1, ok ! =2, input was incorrect. a printout specifying the mistake ! was made. ! workspace ! ! Output, s is required, of size (ntau,6). the individual columns of this ! array contain the following quantities mentioned in the write- ! up and below. ! s(.,1)=dtau = tau(.+1)-tau ! s(.,2)=diag = diagonal in linear system ! s(.,3)=u = upper diagonal in linear system ! s(.,4)=r = right side for linear system (initially) ! = fsecnd = solution of linear system , namely the second ! derivatives of interpolant at tau ! s(.,5)=z = indicator of additional knots ! s(.,6)=1/hsecnd(1,x) with x = z or = 1-z. see below. ! implicit none integer ntau real ( kind = 8 ) alph real ( kind = 8 ) alpha real ( kind = 8 ) break(*) real ( kind = 8 ) c real ( kind = 8 ) coef(4,*) real ( kind = 8 ) d real ( kind = 8 ) del real ( kind = 8 ) denom real ( kind = 8 ) divdif real ( kind = 8 ) entry real ( kind = 8 ) entry3 real ( kind = 8 ) factor real ( kind = 8 ) factr2 real ( kind = 8 ) gam real ( kind = 8 ) gamma real ( kind = 8 ) gtau(ntau) integer i integer iflag integer k integer l integer method real ( kind = 8 ) onemg3 real ( kind = 8 ) onemzt real ( kind = 8 ) ratio real ( kind = 8 ) s(ntau,6) real ( kind = 8 ) sixth real ( kind = 8 ) tau(ntau) real ( kind = 8 ) temp real ( kind = 8 ) x real ( kind = 8 ) z real ( kind = 8 ) zeta real ( kind = 8 ) zt2 alph(x) = min ( 1.0D+00, onemg3 / x ) ! ! There must be at least 4 interpolation points. ! if ( ntau < 4 ) then write ( *, '(a)' ) ' ' write ( *, '(a)' ) 'TAUTSP - Fatal error!' write ( *, '(a)' ) ' Input NTAU must be at least 4.' write ( *, '(a,i6)' ) ' NTAU = ', ntau iflag = 2 stop end if ! ! Construct delta tau and first and second (divided) differences of data. ! do i = 1, ntau-1 s(i,1) = tau(i+1)-tau(i) if ( s(i,1) <= 0.0D+00 ) then write(*,30)i,tau(i),tau(i+1) 30 format (' point ',i3,' and the next',2e15.6,' are disordered') iflag=2 return end if s(i+1,4) = ( gtau(i+1) - gtau(i) ) / s(i,1) end do do i = 2, ntau-1 s(i,4) = s(i+1,4)-s(i,4) end do ! ! Construct system of equations for second derivatives at tau. at each ! interior data point, there is one continuity equation, at the first ! and the last interior data point there is an additional one for a ! total of NTAU equations in ntau unknowns. ! i = 2 s(2,2) = s(1,1) / 3.0D+00 sixth = 1.0D+00 / 6.0D+00 method = 2 gam = gamma if ( gam <= 0.0D+00 ) then method=1 end if if ( 3.0D+00 < gam ) then method = 3 gam = gam - 3.0D+00 end if onemg3 = 1.0D+00 - gam / 3.0D+00 ! ! loop over i ! 70 continue ! ! Construct z(i) and zeta(i) ! z = 0.5D+00 if ( method == 1) then go to 100 end if if ( method == 3) then go to 90 end if if ( s(i,4)*s(i+1,4) < 0.0D+00 ) then go to 100 end if 90 continue temp = abs ( s(i+1,4) ) denom = abs ( s(i,4) ) +temp if ( denom /= 0.0D+00 ) then z = temp/denom if ( abs ( z - 0.5D+00 ) <= sixth ) then z=0.5D+00 end if end if 100 continue s(i,5) = z ! ! Set up part of the i-th equation which depends on the i-th interval. ! if ( z < 0.5D+00 ) then zeta = gam*z onemzt = 1.0D+00 - zeta zt2 = zeta**2 alpha = alph(onemzt) factor = zeta/(alpha*(zt2-1.0D+00 ) + 1.0D+00 ) s(i,6) = zeta*factor / 6.0D+00 s(i,2) = s(i,2) + s(i,1) & * ( ( 1.0D+00 - alpha * onemzt ) * factor / 2.0D+00-s(i,6)) ! ! If z=0 and the previous z = 1, then d(i) = 0. since then ! also u(i-1)=l(i+1) = 0, its value does not matter. reset ! d(i)=1 to insure nonzero pivot in elimination. ! if ( s(i,2) <= 0.0D+00 ) then s(i,2) = 1.0D+00 end if s(i,3)=s(i,1) / 6.0D+00 else if ( z - 0.5D+00 == 0.0D+00 ) then s(i,2)=s(i,2)+s(i,1) / 3.0D+00 s(i,3)=s(i,1) / 6.0D+00 else if ( 0.0D+00 < z - 0.5D+00 ) then onemzt = gam*(1.0D+00 - z) zeta = 1.0D+00 - onemzt alpha = alph(zeta) factor = onemzt/(1.0D+00 - alpha * zeta * ( 1.0D+00 + onemzt ) ) s(i,6) = onemzt*factor / 6.0D+00 s(i,2) = s(i,2)+s(i,1) / 3.0D+00 s(i,3) = s(i,6)*s(i,1) end if if ( 2 < i ) then go to 190 end if s(1,5) = 0.5D+00 ! ! The first two equations enforce continuity of the first and of ! the third derivative across tau(2). ! s(1,2)=s(1,1) / 6.0D+00 s(1,3)=s(2,2) entry3=s(2,3) if ( z-0.5D+00) 150, 160, 170 150 continue factr2 = zeta * ( alpha * ( zt2 - 1.0D+00 ) + 1.0D+00 ) & / ( alpha * ( zeta * zt2 - 1.0D+00 ) + 1.0D+00 ) ratio=factr2*s(2,1)/s(1,2) s(2,2)=factr2*s(2,1)+s(1,1) s(2,3)=-factr2*s(1,1) go to 180 160 continue ratio=s(2,1)/s(1,2) s(2,2)=s(2,1)+s(1,1) s(2,3)=-s(1,1) go to 180 170 continue ratio=s(2,1)/s(1,2) s(2,2)=s(2,1)+s(1,1) s(2,3)=-s(1,1)*6.0D+00 * alpha * s(2,6) ! ! At this point, the first two equations read ! diag(1)*x1+u(1)*x2 + entry3*x3=r(2) ! -ratio*diag(1)*x1+diag(2)*x2 + u(2)*x3=0.0 ! Eliminate first unknown from second equation ! 180 continue s(2,2)=ratio*s(1,3)+s(2,2) s(2,3)=ratio*entry3+s(2,3) s(1,4)=s(2,4) s(2,4)=ratio*s(1,4) go to 200 190 continue ! ! The i-th equation enforces continuity of the first derivative ! across tau(i). it has been set up in statements 35 up to 40 ! and 21 up to 25 and reads now ! -ratio*diag(i-1)*xi-1+diag(i)*xi + u(i)*xi+1=r(i) . ! eliminate (i-1)st unknown from this equation ! s(i,2)=ratio*s(i-1,3)+s(i,2) s(i,4)=ratio*s(i-1,4)+s(i,4) ! ! Set up the part of the next equation which depends on the ! i-th interval. ! 200 continue if ( z- 0.5D+00 ) 210, 220, 230 210 continue ratio = -s(i,6) * s(i,1) / s(i,2) s(i+1,2)=s(i,1) / 3.0D+00 go to 240 220 continue ratio=-(s(i,1) / 6.0D+00 ) / s(i,2) s(i+1,2)=s(i,1) / 3.0D+00 go to 240 230 continue ratio=-( s(i,1) / 6.0D+00 )/s(i,2) s(i+1,2)=s(i,1)*((1.0D+00-zeta*alpha) * factor / 2.0D+00 - s(i,6) ) ! ! end of i loop ! 240 continue i=i+1 if ( i < ntau-1) then go to 70 end if s(i,5) = 0.5D+00 ! ! The last two equations enforce continuity of third derivative and ! of first derivative across tau(ntau-1). ! entry=ratio*s(i-1,3)+s(i,2)+s(i,1)/3.0D+00 s(i+1,2)=s(i,1)/6.0D+00 s(i+1,4)=ratio*s(i-1,4)+s(i,4) if ( z- 0.5D+00 ) 250, 260, 270 250 continue ratio = s(i,1) * 6.0D+00 * s(i-1,6) * alpha / s(i-1,2) s(i,2)=ratio*s(i-1,3)+s(i,1)+s(i-1,1) s(i,3)=-s(i-1,1) go to 280 260 continue ratio=s(i,1)/s(i-1,2) s(i,2)=ratio*s(i-1,3)+s(i,1)+s(i-1,1) s(i,3)=-s(i-1,1) go to 280 270 continue factr2=onemzt*(alpha*(onemzt**2-1.0D+00)+1.0D+00) & /(alpha*(onemzt**3-1.0D+00)+1.0D+00) ratio = factr2*s(i,1) / s(i-1,2) s(i,2)=ratio*s(i-1,3)+factr2*s(i-1,1)+s(i,1) s(i,3)=-factr2*s(i-1,1) ! ! At this point, the last two equations read: ! ! diag(i)*xi+ u(i)*xi+1=r(i) ! -ratio*diag(i)*xi+diag(i+1)*xi+1=r(i+1) ! ! Eliminate XI from the last equation. ! 280 continue s(i,4)=ratio*s(i-1,4) ratio=-entry/s(i,2) s(i+1,2)=ratio*s(i,3)+s(i+1,2) s(i+1,4)=ratio*s(i,4)+s(i+1,4) ! ! Back substitution. ! s(ntau,4) = s(ntau,4) / s(ntau,2) 290 continue s(i,4)=(s(i,4)-s(i,3)*s(i+1,4))/s(i,2) i=i-1 if ( 1 < i ) then go to 290 end if s(1,4)=(s(1,4)-s(1,3)*s(2,4)-entry3*s(3,4))/s(1,2) ! ! Construct polynomial pieces. ! break(1)=tau(1) l=1 do i=1, ntau-1 coef(1,l)=gtau(i) coef(3,l)=s(i,4) divdif=(gtau(i+1)-gtau(i))/s(i,1) z=s(i,5) if ( z- 0.5D+00 ) 300, 310, 320 300 continue if ( z == 0.0D+00 ) go to 330 zeta=gam*z onemzt=1.0D+00-zeta c=s(i+1,4) / 6.0D+00 d=s(i,4)*s(i,6) l=l+1 del=zeta*s(i,1) break(l)=tau(i)+del zt2=zeta**2 alpha=alph(onemzt) factor=onemzt**2*alpha coef(1,l)=gtau(i)+divdif*del+s(i,1)**2*(d*onemzt*(factor-1.0D+00) & +c*zeta*(zt2-1.0D+00)) coef(2,l)=divdif+s(i,1)*(d*(1.0D+00-3.0D+00*factor)+c*(3.0D+00*zt2-1.0D+00)) coef(3,l)=6.0D+00*(d*alpha*onemzt+c*zeta) coef(4,l)=6.0D+00*(c-d*alpha)/s(i,1) coef(4,l-1)=coef(4,l)-6.0D+00*d*(1.0D+00-alpha)/(del*zt2) coef(2,l-1)=coef(2,l)-del*(coef(3,l)-(del/2.0D+00)*coef(4,l-1)) go to 340 310 continue coef(2,l) = divdif - s(i,1) * ( 2.0D+00 * s(i,4) + s(i+1,4) ) / 6.0D+00 coef(4,l)=(s(i+1,4)-s(i,4))/s(i,1) go to 340 320 continue onemzt=gam*(1.0D+00-z) if ( onemzt == 0.0D+00 ) then go to 330 end if zeta = 1.0D+00 - onemzt alpha=alph(zeta) c=s(i+1,4)*s(i,6) d=s(i,4)/6.0D+00 del=zeta*s(i,1) break(l+1)=tau(i)+del coef(2,l)=divdif-s(i,1)*(2.0D+00*d+c) coef(4,l)=6.0D+00*(c*alpha-d)/s(i,1) l=l+1 coef(4,l)=coef(4,l-1)+6.0D+00*(1.0D+00-alpha)*c/(s(i,1)*onemzt**3) coef(3,l)=coef(3,l-1)+del*coef(4,l-1) coef(2,l)=coef(2,l-1)+del*(coef(3,l-1)+(del/2.0D+00)*coef(4,l-1)) coef(1,l)=coef(1,l-1)+del*(coef(2,l-1)+(del/2.0D+00)*(coef(3,l-1) & +(del/3.0D+00)*coef(4,l-1))) go to 340 330 continue coef(2,l) = divdif coef(3,l) = 0D+00 coef(4,l) = 0.0D+00 340 continue l = l + 1 break(l) = tau(i+1) end do l = l - 1 k = 4 iflag = 1 return end diff --git a/pppack/titanium.f90 b/pppack/titanium.f90 index 9879a5f..162e461 100644 --- a/pppack/titanium.f90 +++ b/pppack/titanium.f90 @@ -1,88 +1,88 @@ !> !> @file titanium.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> subroutine titanium ( n, t, g ) !*********************************************************************** ! !! TITANIUM represents a temperature dependent property of titanium. ! ! Discussion: ! ! The data has been used extensively as an example in spline ! approximation with variable knots. ! ! Modified: ! ! 20 November 2000 ! ! Reference: ! ! Carl DeBoor, ! A Practical Guide to Splines, ! Springer Verlag. ! ! Parameters: ! ! Output, integer N, the number of data points, which is 49. ! ! Output, real ( kind = 8 ) T(N), the location of the data points. ! ! Output, real ( kind = 8 ) G(N), the value associated with the data points. ! implicit none real ( kind = 8 ) g(*) integer n real ( kind = 8 ) t(*) n = 49 t(1:49) = (/ & 595.0D+00, 605.0D+00, 615.0D+00, 625.0D+00, 635.0D+00, & 645.0D+00, 655.0D+00, 665.0D+00, 675.0D+00, 685.0D+00, & 695.0D+00, 705.0D+00, 715.0D+00, 725.0D+00, 735.0D+00, & 745.0D+00, 755.0D+00, 765.0D+00, 775.0D+00, 785.0D+00, & 795.0D+00, 805.0D+00, 815.0D+00, 825.0D+00, 835.0D+00, & 845.0D+00, 855.0D+00, 865.0D+00, 875.0D+00, 885.0D+00, & 895.0D+00, 905.0D+00, 915.0D+00, 925.0D+00, 935.0D+00, & 945.0D+00, 955.0D+00, 965.0D+00, 975.0D+00, 985.0D+00, & 995.0D+00, 1005.0D+00, 1015.0D+00, 1025.0D+00, 1035.0D+00, & 1045.0D+00, 1055.0D+00, 1065.0D+00, 1075.0D+00 /) g(1:49) = (/ & 0.644D+00, 0.622D+00, 0.638D+00, 0.649D+00, 0.652D+00, & 0.639D+00, 0.646D+00, 0.657D+00, 0.652D+00, 0.655D+00, & 0.644D+00, 0.663D+00, 0.663D+00, 0.668D+00, 0.676D+00, & 0.676D+00, 0.686D+00, 0.679D+00, 0.678D+00, 0.683D+00, & 0.694D+00, 0.699D+00, 0.710D+00, 0.730D+00, 0.763D+00, & 0.812D+00, 0.907D+00, 1.044D+00, 1.336D+00, 1.881D+00, & 2.169D+00, 2.075D+00, 1.598D+00, 1.211D+00, 0.916D+00, & 0.746D+00, 0.672D+00, 0.627D+00, 0.615D+00, 0.607D+00, & 0.606D+00, 0.609D+00, 0.603D+00, 0.601D+00, 0.603D+00, & 0.601D+00, 0.611D+00, 0.601D+00, 0.608D+00 /) return end diff --git a/pputils2/CMakeLists.txt b/pputils2/CMakeLists.txt index 1b0c5d6..a0cd0e2 100644 --- a/pputils2/CMakeLists.txt +++ b/pputils2/CMakeLists.txt @@ -1,74 +1,74 @@ # # @file CMakeLists.txt # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # @author Trach-Minh Tran # project(pputils2) set(SRCS pputils2.f90 ) set(CMAKE_Fortran_MODULE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/modules ) add_library(pputils2 STATIC ${SRCS}) target_include_directories(pputils2 PRIVATE $ ${MPI_Fortran_INCLUDE_PATH} INTERFACE $ $ ${MPI_Fortran_INCLUDE_PATH} ) target_compile_options(pputils2 PUBLIC ${MPI_Fortran_COMPILE_FLAGS}) target_link_libraries(pputils2 PUBLIC ${MPI_Fortran_LIBRARIES}) set_property(TARGET pputils2 PROPERTY PUBLIC_HEADER ${CMAKE_CURRENT_BINARY_DIR}/modules/pputils.mod) include(GNUInstallDirs) install(TARGETS pputils2 EXPORT ${BSPLINES_EXPORT_TARGETS} LIBRARY DESTINATION ${CNAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CNAKE_INSTALL_LIBDIR} PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) if(BSPLINES_EXAMPLES) set(EXAMPLES ex1 ex2 ex3 ex4 ex5 ex6 ex7) foreach(ex ${EXAMPLES}) add_executable(pputils2_${ex} ${ex}.f90) target_link_libraries(pputils2_${ex} pputils2 futils) endforeach() add_test(ex1 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex1) add_test(ex2 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 9 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex2) add_test(ex3 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 5 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex3) add_test(ex4 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 12 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex4) add_test(ex5 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 8 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex5) add_test(ex6 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 12 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex6) add_test(ex7 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 6 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex7) endif() diff --git a/pputils2/Makefile b/pputils2/Makefile index b0f3ddd..ad0e537 100644 --- a/pputils2/Makefile +++ b/pputils2/Makefile @@ -1,105 +1,105 @@ # # @file Makefile # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Trach-Minh Tran # F90 = mpiifort CC = cc debug = -g -traceback -CB optim = -O3 -xSSE4.2 #OPT=$(debug) OPT=$(optim) F90FLAGS = $(OPT) -I. -I$(FUTILS)/include -I${HDF5}/lib CFLAGS = -O2 LDFLAGS = $(OPT) -fPIC -L. -L$(FUTILS)/lib -L${HDF5}/lib LIBS = -lfutils pputils2.o -lhdf5_fortran -lhdf5 -lz .SUFFIXES: .SUFFIXES: .o .c .f90 .f90.o: $(F90) $(F90FLAGS) -c $< all: ex1 ex2 ex3 ex4 ex5 ex6 ex7 lib: libpputils2.a libpputils2.a: pputils2.o xiar r $@ $? ranlib $@ ex1: ex1.o $(F90) $(LDFLAGS) -o $@ $< $(LIBS) ex2: ex2.o $(F90) $(LDFLAGS) -o $@ $< $(LIBS) ex3: ex3.o $(F90) $(LDFLAGS) -o $@ $< $(LIBS) ex4: ex4.o $(F90) $(LDFLAGS) -o $@ $< $(LIBS) ex5: ex5.o $(F90) $(LDFLAGS) -o $@ $< $(LIBS) ex6: ex6.o $(F90) $(LDFLAGS) -o $@ $< $(LIBS) ex7: ex7.o $(F90) $(LDFLAGS) -o $@ $< $(LIBS) tests: ex1 ex2 ex3 ex4 ex5 ex6 ex7 @echo ==== Running ex1 ====== @mpiexec -n 4 ./ex1 @echo ==== Running ex2 ====== @mpiexec -n 9 ./ex2 @echo ==== Running ex3 ====== @mpiexec -n 5 ./ex3 @echo ==== Running ex4 ====== @mpiexec -n 12 ./ex4 @echo ==== Running ex5 ====== @mpiexec -n 8 ./ex5 @echo ==== Running ex6 ====== @mpiexec -n 12 ./ex6 @echo ==== Running ex7 ====== @mpiexec -n 6 ./ex7 ex1.o: pputils2.o ex2.o: pputils2.o ex3.o: pputils2.o ex4.o: pputils2.o ex5.o: pputils2.o ex6.o: pputils2.o ex7.o: pputils2.o tags: etags *.f90 $(FUTILS)/futils.f90 clean: rm -f *.o *~ a.out distclean: clean rm -f ex1 ex2 ex3 ex4 ex5 ex6 ex7 *.h5 *.a *.mod diff --git a/pputils2/ex1.f90 b/pputils2/ex1.f90 index 46dcca6..5d1fb27 100644 --- a/pputils2/ex1.f90 +++ b/pputils2/ex1.f90 @@ -1,113 +1,113 @@ !> !> @file ex1.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Tranpsose of 2d matrix partitionned on a 1d proc grid: ! - A(n1,n2/P1) -> AT(n2,n1/P1) -> B(n1,n2/P1) ! USE pputils2 USE futils IMPLICIT NONE INCLUDE "mpif.h" CHARACTER(len=32) :: file='ex1.h5' INTEGER :: fid ! INTEGER, PARAMETER :: ndims=1 ! N. of dims of proc. grid INTEGER :: ierr, me, npes INTEGER, DIMENSION(ndims) :: dims, coords LOGICAL :: periods(ndims), reorder INTEGER :: cart ! INTEGER :: n1, n2, n1p, n2p DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: a, atr, b DOUBLE PRECISION :: x INTEGER :: i, j, iglob, jglob, kerrors, nerrors !================================================================================ ! ! Init MPI CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Create cartesian topololy dims = npes periods = (/.FALSE./) reorder = .FALSE. IF( PRODUCT(dims) .NE. npes ) THEN IF( me .EQ. 0 ) THEN PRINT*, PRODUCT(dims), " processors required!" CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) END IF END IF CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) CALL mpi_cart_coords(cart, me, ndims, coords, ierr) ! ! Define local array n1p=2; n1=n1p*dims(1) n2p=2; n2=n2p*dims(1) ALLOCATE( a(n1,n2p), atr(n2,n1p), b(n1,n2p) ) a = 0 atr = 0 b = 0 DO i=1,n1 DO j=1,n2p jglob = coords(1)*n2p + j a(i,j) = 10*i + jglob END DO END DO IF( me.EQ. 0 ) THEN WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2 END IF ! ! Tranpose A(n1,n2/P1) -> AT(n2,n1/P1) -> B(n1,n2/P1) CALL pptransp(cart, a, atr) CALL pptransp(cart, atr, b) ! ! Check ATR kerrors = 0 DO i=1,n1p iglob = coords(1)*n1p + i DO j=1,n2 x = 10*iglob + j IF( x .NE. atr(j,i) ) kerrors = kerrors+1 END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking ATR', nerrors ! ! Write to file ! CALL creatf(file, fid, mpicomm=cart) CALL putarrnd(fid, '/arraya', a, (/2/) ) CALL putarrnd(fid, '/arrayat', atr, (/2/) ) CALL putarrnd(fid, '/arrayb', b, (/2/) ) ! ! Clean up and quit DEALLOCATE(a, atr) CALL closef(fid) CALL mpi_finalize(ierr) END PROGRAM main diff --git a/pputils2/ex2.f90 b/pputils2/ex2.f90 index 8b8d1c2..f951468 100644 --- a/pputils2/ex2.f90 +++ b/pputils2/ex2.f90 @@ -1,170 +1,170 @@ !> !> @file ex2.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Tranpsose of matrices partitionned on a 2d proc grid: ! - A(n1,n2/P1,n3/P2) -> AT(n3,n2/P1,n1/P2) ! - B(n1,n2,n3/P1,n4/P2) -> BT(n4,n2,n3/P1,n1/P2) ! USE pputils2 USE futils IMPLICIT NONE INCLUDE "mpif.h" CHARACTER(len=32) :: file='ex2.h5' INTEGER :: fid ! INTEGER, PARAMETER :: ndims=2 ! N. of dims of proc. grid INTEGER :: ierr, me, npes INTEGER, DIMENSION(ndims) :: dims, coords LOGICAL :: periods(ndims), reorder INTEGER :: cart, cartcol, cartrow ! INTEGER :: n1, n2, n3, n4, n1p, n2p, n3p, n4p DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t DOUBLE PRECISION :: x INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors !================================================================================ ! ! Init MPI CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Create cartesian topololy dims = (/3, 3/) periods = (/.FALSE., .TRUE./) reorder = .FALSE. IF( PRODUCT(dims) .NE. npes ) THEN IF( me .EQ. 0 ) THEN PRINT*, PRODUCT(dims), " processors required!" CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) END IF END IF CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) CALL mpi_cart_coords(cart, me, ndims, coords, ierr) CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr) CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr) ! ! Define local array A3 n1p=2; n1=n1p*dims(2) n2p=4; n2=n2p*dims(1) n3p=3; n3=n3p*dims(2) ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) ) a3 = 0 a3t = 0 DO i=1,n1 DO j=1,n2p jglob = coords(1)*n2p + j DO k=1,n3p kglob = coords(2)*n3p + k a3(i,j,k) = 10000*i + 100*jglob + kglob END DO END DO END DO IF( me .EQ. 0 ) THEN WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2, n3 END IF ! ! Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2) CALL pptransp(cartrow, a3, a3t, 1, 3) ! ! Check A3T kerrors = 0 DO i=1,n1p iglob = coords(2)*n1p + i DO j=1,n2p jglob = coords(1)*n2p + j DO k=1,n3 x = 10000*iglob + 100*jglob + k IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1 END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors ! ! Define local array B4 n1p=2; n1=n1p*dims(2) n2p=4; n2=n2p*dims(1) n3p=3; n3=n3p*dims(1) n4p=3; n4=n4p*dims(2) ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n4,n2,n3p,n1p) ) b4 = 0 b4t = 0 DO i=1,n1 DO j=1,n2 DO k=1,n3p kglob = coords(1)*n3p + k DO l=1,n4p lglob = coords(2)*n4p + l b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob END DO END DO END DO END DO IF( me.EQ. 0 ) THEN WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4 END IF ! ! Tranpose B4(n1,n2,n3/P1,n4/P2) -> B4T(n4,n2,n3/P1,n1/P2) CALL pptransp(cartrow, b4, b4t, 1, 4) ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! ! Check B4T kerrors = 0 DO i=1,n1p iglob = coords(2)*n1p + i DO j=1,n2 DO k=1,n3p kglob = coords(1)*n3p + k DO l=1,n4 x = 1000000*iglob + 10000*j + 100*kglob + l IF( x .NE. b4t(l,j,k,i) ) kerrors = kerrors+1 END DO END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors ! ! Write to file ! CALL creatf(file, fid, mpicomm=cart) CALL putarrnd(fid, '/a3' , a3, (/2,3/) ) CALL putarrnd(fid, '/a3t', a3t,(/2,3/) ) CALL putarrnd(fid, '/b4' , b4, (/3,4/) ) CALL putarrnd(fid, '/b4t', b4t,(/3,4/) ) ! Clean up and quit DEALLOCATE(a3, a3t) DEALLOCATE(b4, b4t) CALL closef(fid) CALL mpi_finalize(ierr) END PROGRAM main diff --git a/pputils2/ex3.f90 b/pputils2/ex3.f90 index dabf46b..04de1bc 100644 --- a/pputils2/ex3.f90 +++ b/pputils2/ex3.f90 @@ -1,111 +1,111 @@ !> !> @file ex3.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Tranpsose of 2d matrix partitionned on a 1d proc grid ! - A(n1,n2/P1) -> AT(n2,n1/P1) ! n1, n2 NOT REQUIRED to be divided evenly by NPES ! USE pputils2 USE futils IMPLICIT NONE INCLUDE "mpif.h" CHARACTER(len=32) :: file='ex3.h5' INTEGER :: fid ! INTEGER, PARAMETER :: ndims=1 ! N. of dims of proc. grid INTEGER :: ierr, me, npes INTEGER, DIMENSION(ndims) :: dims, coords LOGICAL :: periods(ndims), reorder INTEGER :: cart ! INTEGER :: n1=9, n2=8, s1, s2, n1p, n2p DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: a, atr DOUBLE PRECISION :: x INTEGER :: i, j, iglob, jglob, kerrors, nerrors !================================================================================ ! ! Init MPI CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Create cartesian topololy dims = npes periods = (/.FALSE./) reorder = .FALSE. IF( PRODUCT(dims) .NE. npes ) THEN IF( me .EQ. 0 ) THEN PRINT*, PRODUCT(dims), " processors required!" CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) END IF END IF CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) CALL mpi_cart_coords(cart, me, ndims, coords, ierr) ! ! Partition array CALL dist1d(cart, 0, n1, s1, n1p) CALL dist1d(cart, 0, n2, s2, n2p) ALLOCATE( a(n1,n2p), atr(n2,n1p) ) a = 0 atr = 0 DO i=1,n1 DO j=1,n2p jglob = s2 + j a(i,j) = 10*i + jglob END DO END DO IF( me.EQ. 0 ) THEN WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2 END IF ! ! Tranpose A(n1,n2/P1) -> ATR(n2,n1/P1) CALL pptransp(cart, a, atr) ! ! Check ATR kerrors = 0 DO i=1,n1p iglob = s1 + i DO j=1,n2 x = 10*iglob + j IF( x .NE. atr(j,i) ) kerrors = kerrors+1 END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking ATR', nerrors ! ! Write to file ! CALL creatf(file, fid, mpicomm=cart) CALL putarrnd(fid, '/arraya', a, (/2/) ) CALL putarrnd(fid, '/arrayat', atr, (/2/) ) ! ! Clean up and quit DEALLOCATE(a, atr) CALL closef(fid) CALL mpi_finalize(ierr) END PROGRAM main diff --git a/pputils2/ex4.f90 b/pputils2/ex4.f90 index 1f75216..1ab7721 100644 --- a/pputils2/ex4.f90 +++ b/pputils2/ex4.f90 @@ -1,171 +1,171 @@ !> !> @file ex4.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Tranpsose of matrices partitionned on a 2d proc grid: ! - A(n1,n2/P1,n3/P2) -> AT(n3,n2/P1,n1/P2) ! - B(n1,n2,n3/P1,n4/P2) -> BT(n4,n2,n3/P1,n1/P2) ! n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES ! USE pputils2 USE futils IMPLICIT NONE INCLUDE "mpif.h" CHARACTER(len=32) :: file='ex4.h5' INTEGER :: fid ! INTEGER, PARAMETER :: ndims=2 ! N. of dims of proc. grid INTEGER :: ierr, me, npes INTEGER, DIMENSION(ndims) :: dims, coords LOGICAL :: periods(ndims), reorder INTEGER :: cart, cartcol, cartrow ! INTEGER :: n1=15, n2=10, n3=9, n4=8, n1p, n2p, n3p, n4p, s1, s2, s3, s4 DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t DOUBLE PRECISION :: x INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors !================================================================================ ! ! Init MPI CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Create cartesian topololy dims = (/4, 3/) periods = (/.FALSE., .TRUE./) reorder = .FALSE. IF( PRODUCT(dims) .NE. npes ) THEN IF( me .EQ. 0 ) THEN PRINT*, PRODUCT(dims), " processors required!" CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) END IF END IF CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) CALL mpi_cart_coords(cart, me, ndims, coords, ierr) CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr) CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr) ! ! Define local array A3 CALL dist1d(cartrow, 0, n1, s1, n1p) CALL dist1d(cartcol, 0, n2, s2, n2p) CALL dist1d(cartrow, 0, n3, s3, n3p) ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) ) a3 = 0 a3t = 0 DO i=1,n1 DO j=1,n2p jglob = s2 + j DO k=1,n3p kglob = s3 + k a3(i,j,k) = 10000*i + 100*jglob + kglob END DO END DO END DO IF( me.EQ. 0 ) THEN WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2, n3 END IF ! ! Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2) CALL pptransp(cartrow, a3, a3t, 1, 3) ! ! Check A3T kerrors = 0 DO i=1,n1p iglob = s1 + i DO j=1,n2p jglob = s2 + j DO k=1,n3 x = 10000*iglob + 100*jglob + k IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1 END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors ! ! Define local array B4 CALL dist1d(cartrow, 0, n1, s1, n1p) CALL dist1d(cartcol, 0, n3, s3, n3p) CALL dist1d(cartrow, 0, n4, s4, n4p) ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n4,n2,n3p,n1p) ) b4 = 0 b4t = 0 DO i=1,n1 DO j=1,n2 DO k=1,n3p kglob = s3 + k DO l=1,n4p lglob = s4 + l b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob END DO END DO END DO END DO IF( me.EQ. 0 ) THEN WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4 END IF ! ! Tranpose B4(n1,n2,n3/P1,n4/P2) -> B4T(n4,n2,n3/P1,n1/P2) !!$ CALL pptransp(cartrow, b4, b4t) CALL pptransp(cartrow, b4, b4t, 1, 4) ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! ! Check B4T kerrors = 0 DO i=1,n1p iglob = s1 + i DO j=1,n2 DO k=1,n3p kglob = s3 + k DO l=1,n4 x = 1000000*iglob + 10000*j + 100*kglob + l IF( x .NE. b4t(l,j,k,i) ) kerrors = kerrors+1 END DO END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors ! ! Write to file ! CALL creatf(file, fid, mpicomm=cart) CALL putarrnd(fid, '/a3' , a3, (/2,3/) ) CALL putarrnd(fid, '/a3t', a3t,(/2,3/) ) CALL putarrnd(fid, '/b4' , b4, (/3,4/) ) CALL putarrnd(fid, '/b4t', b4t,(/3,4/) ) ! Clean up and quit DEALLOCATE(a3, a3t) DEALLOCATE(b4, b4t) CALL closef(fid) CALL mpi_finalize(ierr) END PROGRAM main diff --git a/pputils2/ex5.f90 b/pputils2/ex5.f90 index 181a2dc..3fc88b1 100644 --- a/pputils2/ex5.f90 +++ b/pputils2/ex5.f90 @@ -1,221 +1,221 @@ !> !> @file ex5.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Tranpsose of matrices partitionned on a 2d proc grid: ! - A(n1,n2/P1,n3/P2) -> AT(n2,n1/P1,n3/P2) ! - B(n1,n2,n3/P1,n4/P2) -> BT(n3,n2,n1/P1,n4/P2) ! - C(n1,n2/P1,n3,n4/P2) -> CT(n2,n1/P1,n3,n4/P2) ! n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES ! USE pputils2 USE futils IMPLICIT NONE INCLUDE "mpif.h" CHARACTER(len=32) :: file='ex4.h5' INTEGER :: fid ! INTEGER, PARAMETER :: ndims=2 ! N. of dims of proc. grid INTEGER :: ierr, me, npes INTEGER, DIMENSION(ndims) :: dims, coords LOGICAL :: periods(ndims), reorder INTEGER :: cart, cartcol, cartrow ! INTEGER :: n1=8, n2=10, n3=6, n4=5, n1p, n2p, n3p, n4p, s1, s2, s3, s4 DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t, c4, c4t DOUBLE PRECISION :: x INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors !================================================================================ ! ! Init MPI CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Create cartesian topololy dims = (/4, 2/) periods = (/.FALSE., .TRUE./) reorder = .FALSE. IF( PRODUCT(dims) .NE. npes ) THEN IF( me .EQ. 0 ) THEN PRINT*, PRODUCT(dims), " processors required!" CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) END IF END IF CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) CALL mpi_cart_coords(cart, me, ndims, coords, ierr) CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr) CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr) ! ! Define local array A3 CALL dist1d(cartcol, 0, n1, s1, n1p) CALL dist1d(cartcol, 0, n2, s2, n2p) CALL dist1d(cartrow, 0, n3, s3, n3p) ALLOCATE( a3(n1,n2p,n3p), a3t(n2,n1p,n3p) ) a3 = 0 a3t = 0 DO i=1,n1 DO j=1,n2p jglob = s2 + j DO k=1,n3p kglob = s3 + k a3(i,j,k) = 10000*i + 100*jglob + kglob END DO END DO END DO IF( me .EQ. 0 ) THEN WRITE(*,'(a,4i4)') 'Global dimension of matrix a', n1, n2, n3 END IF ! ! Tranpose A(n1,n2/P1,n3/P2) -> AT(n2,n1/P1,n3/P2) CALL pptransp(cartcol, a3, a3t, 1, 2) ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! ! Check A3T kerrors = 0 DO i=1,n1p iglob = s1 + i DO j=1,n2 DO k=1,n3p kglob = s3 + k x = 10000*iglob + 100*j + kglob IF( x .NE. a3t(j,i,k) ) kerrors = kerrors+1 END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors ! ! Define local array B4 CALL dist1d(cartcol, 0, n1, s1, n1p) CALL dist1d(cartcol, 0, n3, s3, n3p) CALL dist1d(cartrow, 0, n4, s4, n4p) ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n3,n2,n1p,n4p) ) b4 = 0 b4t = 0 DO i=1,n1 DO j=1,n2 DO k=1,n3p kglob = s3 + k DO l=1,n4p lglob = s4 + l b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob END DO END DO END DO END DO IF( me .EQ. 0 ) THEN WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4 END IF ! ! Tranpose B(n1,n2,n3/P1,n4/P2) -> BT(n3,n2,n1/P1,n4/P2) CALL pptransp(cartcol, b4, b4t, 1, 3) ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! ! Check B4T kerrors = 0 DO i=1,n1p iglob = s1 + i DO j=1,n2 DO k=1,n3 DO l=1,n4p lglob = s4 + l x = 1000000*iglob + 10000*j + 100*k + lglob IF( x .NE. b4t(k,j,i,l) ) kerrors = kerrors+1 END DO END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors ! ! Define local array C4 CALL dist1d(cartcol, 0, n1, s1, n1p) CALL dist1d(cartcol, 0, n2, s2, n2p) CALL dist1d(cartrow, 0, n4, s4, n4p) ALLOCATE( c4(n1,n2p,n3,n4p), c4t(n2,n1p,n3,n4p) ) c4 = 0 c4t = 0 DO i=1,n1 DO j=1,n2p jglob = s2 + j DO k=1,n3 DO l=1,n4p lglob = s4 + l c4(i,j,k,l) = 1000000*i + 10000*jglob + 100*k +lglob END DO END DO END DO END DO IF( me.EQ. 0 ) THEN WRITE(*,'(a,4i4)') 'Global dimension of matrix c', n1, n2, n3, n4 END IF ! ! Tranpose C(n1,n2/P1,n3,n4/P2) -> CT(n2,n1/P1,n3,n4/P2) CALL pptransp(cartcol, c4, c4t, 1, 2) ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! ! Check C4T kerrors = 0 DO i=1,n1p iglob = s1 + i DO j=1,n2 DO k=1,n3 DO l=1,n4p lglob = s4 + l x = 1000000*iglob + 10000*j + 100*k + lglob IF( x .NE. c4t(j,i,k,l) ) kerrors = kerrors+1 END DO END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking c4', nerrors ! ! Write to file ! CALL creatf(file, fid, mpicomm=cart) CALL putarrnd(fid, '/a3' , a3, (/2,3/) ) CALL putarrnd(fid, '/a3t', a3t,(/2,3/) ) CALL putarrnd(fid, '/b4' , b4, (/3,4/) ) CALL putarrnd(fid, '/b4t', b4t,(/3,4/) ) CALL putarrnd(fid, '/c4' , c4, (/2,4/) ) CALL putarrnd(fid, '/c4t', c4t,(/2,4/) ) ! Clean up and quit DEALLOCATE(a3, a3t) DEALLOCATE(b4, b4t) DEALLOCATE(c4, c4t) CALL closef(fid) CALL mpi_finalize(ierr) END PROGRAM main diff --git a/pputils2/ex6.f90 b/pputils2/ex6.f90 index defba7c..432c083 100644 --- a/pputils2/ex6.f90 +++ b/pputils2/ex6.f90 @@ -1,270 +1,270 @@ !> !> @file ex6.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Tranpsose of matrices partitionned on a 2d proc grid: ! - A(n1/P1,n2,n3/P2) -> AT(n1/P1,n3,n2/P2) ! - B(n1,n2,n3/P1,n4/P2) -> BT(n1,n3,n2/P1,n4/P2) ! - C(n1,n2,n3/P1,n4/P2) -> CT(n1,n4,n3/P1,n2/P2) ! - D(n1,n2/P1,n3,n4/P2) -> DT(n1,n2/P1,n4,n3/P2) ! n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES ! USE pputils2 USE futils IMPLICIT NONE INCLUDE "mpif.h" CHARACTER(len=32) :: file='ex4.h5' INTEGER :: fid ! INTEGER, PARAMETER :: ndims=2 ! N. of dims of proc. grid INTEGER :: ierr, me, npes INTEGER, DIMENSION(ndims) :: dims, coords LOGICAL :: periods(ndims), reorder INTEGER :: cart, cartcol, cartrow ! INTEGER :: n1=8, n2=10, n3=6, n4=5, n1p, n2p, n3p, n4p, s1, s2, s3, s4 DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t, c4, c4t, d4, d4t DOUBLE PRECISION :: x INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors !================================================================================ ! ! Init MPI CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Create cartesian topololy dims = (/4, 3/) periods = (/.FALSE., .TRUE./) reorder = .FALSE. IF( PRODUCT(dims) .NE. npes ) THEN IF( me .EQ. 0 ) THEN PRINT*, PRODUCT(dims), " processors required!" CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) END IF END IF CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) CALL mpi_cart_coords(cart, me, ndims, coords, ierr) CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr) CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr) ! ! Define local array A3 CALL dist1d(cartcol, 0, n1, s1, n1p) CALL dist1d(cartrow, 0, n2, s2, n2p) CALL dist1d(cartrow, 0, n3, s3, n3p) ALLOCATE( a3(n1p,n2,n3p), a3t(n1p,n3,n2p) ) a3 = 0 a3t = 0 DO i=1,n1p iglob = s1 + i DO j=1,n2 DO k=1,n3p kglob = s3 + k a3(i,j,k) = 10000*iglob + 100*j + kglob END DO END DO END DO IF( me .EQ. 0 ) THEN WRITE(*,'(a,4i4)') 'Global dimension of matrix a', n1, n2, n3 END IF ! ! Tranpose A(n1/P1,n2,n3/P2) -> AT(n1/P1,n3,n2/P2) CALL pptransp(cartrow, a3, a3t, 2, 3) ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! ! Check A3T kerrors = 0 DO i=1,n1p iglob = s1 + i DO j=1,n2p jglob = s2 + j DO k=1,n3 x = 10000*iglob + 100*jglob + k IF( x .NE. a3t(i,k,j) ) kerrors = kerrors+1 END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors ! ! Define local array B4 CALL dist1d(cartcol, 0, n2, s2, n2p) CALL dist1d(cartcol, 0, n3, s3, n3p) CALL dist1d(cartrow, 0, n4, s4, n4p) ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n1,n3,n2p,n4p) ) b4 = 0 b4t = 0 DO i=1,n1 DO j=1,n2 DO k=1,n3p kglob = s3 + k DO l=1,n4p lglob = s4 + l b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob END DO END DO END DO END DO IF( me .EQ. 0 ) THEN WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4 END IF ! ! Tranpose B(n1,n2,n3/P1,n4/P2) -> BT(n1,n3,n2/P1,n4/P2) CALL pptransp(cartcol, b4, b4t, 2, 3) ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! ! Check B4T kerrors = 0 DO i=1,n1 DO j=1,n2p jglob = s2 + j DO k=1,n3 DO l=1,n4p lglob = s4 + l x = 1000000*i + 10000*jglob + 100*k + lglob IF( x .NE. b4t(i,k,j,l) ) kerrors = kerrors+1 END DO END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors ! ! Define local array C4 CALL dist1d(cartrow, 0, n2, s2, n2p) CALL dist1d(cartcol, 0, n3, s3, n3p) CALL dist1d(cartrow, 0, n4, s4, n4p) ALLOCATE( c4(n1,n2,n3p,n4p), c4t(n1,n4,n3p,n2p) ) c4 = 0 c4t = 0 DO i=1,n1 DO j=1,n2 DO k=1,n3p kglob = s3 + k DO l=1,n4p lglob = s4 + l c4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob END DO END DO END DO END DO IF( me.EQ. 0 ) THEN WRITE(*,'(a,4i4)') 'Global dimension of matrix c', n1, n2, n3, n4 END IF ! ! Tranpose C(n1,n2,n3/P1,n4/P2) -> CT(n1,n4,n3/P1,n2/P2) CALL pptransp(cartrow, c4, c4t, 2, 4) ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! ! Check C4T kerrors = 0 DO i=1,n1 DO j=1,n2p jglob = s2 + j DO k=1,n3p kglob = s3 + k DO l=1,n4 x = 1000000*i + 10000*jglob + 100*kglob + l IF( x .NE. c4t(i,l,k,j) ) kerrors = kerrors+1 END DO END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking c4', nerrors ! ! Define local array D4 CALL dist1d(cartcol, 0, n2, s2, n2p) CALL dist1d(cartrow, 0, n3, s3, n3p) CALL dist1d(cartrow, 0, n4, s4, n4p) ALLOCATE( d4(n1,n2p,n3,n4p), d4t(n1,n2p,n4,n3p) ) d4 = 0 d4t = 0 DO i=1,n1 DO j=1,n2p jglob = s2 + j DO k=1,n3 DO l=1,n4p lglob = s4 + l d4(i,j,k,l) = 1000000*i + 10000*jglob + 100*k +lglob END DO END DO END DO END DO IF( me.EQ. 0 ) THEN WRITE(*,'(a,4i4)') 'Global dimension of matrix d', n1, n2, n3, n4 END IF ! ! Tranpose D(n1,n2/P1,n3,n4/P2) -> DT(n1,n2/P1,n4,n3/P2) CALL pptransp(cartrow, d4, d4t, 3, 4) ! CALL mpi_barrier(MPI_COMM_WORLD, ierr) ! ! Check D4T kerrors = 0 DO i=1,n1 DO j=1,n2p jglob = s2 + j DO k=1,n3p kglob = s3 + k DO l=1,n4 x = 1000000*i + 10000*jglob + 100*kglob + l IF( x .NE. d4t(i,j,l,k) ) kerrors = kerrors+1 END DO END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & MPI_COMM_WORLD, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking d4', nerrors ! ! Write to file ! CALL creatf(file, fid, mpicomm=cart) CALL putarrnd(fid, '/a3' , a3, (/1,3/) ) CALL putarrnd(fid, '/a3t', a3t,(/1,3/) ) CALL putarrnd(fid, '/b4' , b4, (/3,4/) ) CALL putarrnd(fid, '/b4t', b4t,(/3,4/) ) CALL putarrnd(fid, '/c4' , c4, (/3,4/) ) CALL putarrnd(fid, '/c4t', c4t,(/3,4/) ) CALL putarrnd(fid, '/d4' , d4, (/2,4/) ) CALL putarrnd(fid, '/d4t', d4t,(/2,4/) ) ! ! Clean up and quit DEALLOCATE(a3, a3t) DEALLOCATE(b4, b4t) DEALLOCATE(c4, c4t) DEALLOCATE(d4, d4t) CALL closef(fid) CALL mpi_finalize(ierr) END PROGRAM main diff --git a/pputils2/ex7.f90 b/pputils2/ex7.f90 index 0247599..7a8bb6f 100644 --- a/pputils2/ex7.f90 +++ b/pputils2/ex7.f90 @@ -1,160 +1,160 @@ !> !> @file ex7.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Tranpsose of 3d matrices partitionned in 1 and 2 proc grid: ! - A(n1,n2,n3/P) -> AT(n3,n2,n1/P) ! - B(n1,n2/P1,n3/P2) -> BT(n3,n2/P1,n1/P2) ! n1, n2, n3 NOT REQUIRED to be divided evenly by P ! USE pputils2 IMPLICIT NONE INCLUDE "mpif.h" INTEGER :: ierr, me, npes, comm=MPI_COMM_WORLD INTEGER :: n1=15, n2=10, n3=20, n1p, n2p, n3p, s1, s2,s3 DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t DOUBLE PRECISION :: x INTEGER :: i, j, k, iglob, jglob, kglob, kerrors, nerrors ! INTEGER, PARAMETER :: ndims=2 ! N. of dims of proc. grid INTEGER, DIMENSION(ndims) :: dims, coords LOGICAL :: periods(ndims), reorder INTEGER :: cart, cartcol, cartrow !================================================================================ ! ! Init MPI CALL mpi_init(ierr) CALL mpi_comm_size(comm, npes, ierr) CALL mpi_comm_rank(comm, me, ierr) ! !-------------------------------------------------------------------------------- ! ! 1D partition: ! ! Define local array A3 CALL dist1d(comm, 0, n1, s1, n1p) CALL dist1d(comm, 0, n3, s3, n3p) ALLOCATE( a3(n1,n2,n3p), a3t(n3,n2,n1p) ) a3 = 0 a3t = 0 DO i=1,n1 DO j=1,n2 DO k=1,n3p kglob = s3 + k a3(i,j,k) = 10000*i + 100*j + kglob END DO END DO END DO IF( me.EQ. 0 ) THEN WRITE(*,'(a)') '*** 1D partition ***' WRITE(*,'(a,3i4)') 'Global dimensions of matrix a', n1, n2, n3 END IF ! ! Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2) CALL pptransp(comm, a3, a3t, 1, 3) ! ! Check A3T kerrors = 0 DO i=1,n1p iglob = s1 + i DO j=1,n2 DO k=1,n3 x = 10000*iglob + 100*j + k IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1 END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & comm, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors DEALLOCATE(a3, a3t) !-------------------------------------------------------------------------------- ! ! 2D partition: ! ! Create cartesian topololy dims = (/2, 3/) periods = (/.FALSE., .FALSE./) reorder = .FALSE. IF( PRODUCT(dims) .NE. npes ) THEN IF( me .EQ. 0 ) THEN PRINT*, PRODUCT(dims), " processors required!" CALL mpi_abort(comm, -1, ierr) END IF END IF CALL mpi_barrier(comm, ierr) ! CALL mpi_cart_create(comm, ndims, dims, periods, reorder, cart, ierr) CALL mpi_cart_coords(cart, me, ndims, coords, ierr) CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr) CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr) ! ! Define local array A3 CALL dist1d(cartrow, 0, n1, s1, n1p) CALL dist1d(cartcol, 0, n2, s2, n2p) CALL dist1d(cartrow, 0, n3, s3, n3p) ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) ) a3 = 0 a3t = 0 DO i=1,n1 DO j=1,n2p jglob = s2 + j DO k=1,n3p kglob = s3 + k a3(i,j,k) = 10000*i + 100*jglob + kglob END DO END DO END DO IF( me.EQ. 0 ) THEN WRITE(*,'(a)') '*** 2D partition ***' WRITE(*,'(a,3i4)') 'Global dimensions of matrix a', n1, n2, n3 END IF ! ! Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2) CALL pptransp(cartrow, a3, a3t, 1, 3) ! ! Check A3T kerrors = 0 DO i=1,n1p iglob = s1 + i DO j=1,n2p jglob = s2 + j DO k=1,n3 x = 10000*iglob + 100*jglob + k IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1 END DO END DO END DO CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & & comm, ierr) IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors DEALLOCATE(a3, a3t) !-------------------------------------------------------------------------------- ! Epilogue ! CALL mpi_finalize(ierr) END PROGRAM main diff --git a/pputils2/pptransp2.tpl b/pputils2/pptransp2.tpl index ada1f71..bffdc1d 100644 --- a/pputils2/pptransp2.tpl +++ b/pputils2/pptransp2.tpl @@ -1,89 +1,89 @@ !> !> @file pptransp2.tpl !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! INTEGER :: me, npes, i, j, istr, iend, ierr INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets INTEGER :: dims(lastdim), np(2), npmx(2) INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount INTEGER :: status(MPI_STATUS_SIZE) !---------------------------------------------------------------------- ! 0. Prologue ! CALL mpi_comm_rank(comm, me, ierr) CALL mpi_comm_size(comm, npes, ierr) ! ! Determine send/receive proc. id ALLOCATE(ids(npes), idr(npes)) CALL partners(comm, ids, idr) !---------------------------------------------------------------------- ! 1. Send/receive buffers ! ! Distribution of first and last partitionned dimensions ALLOCATE(ndists(2,npes)) ALLOCATE(offsets(2,0:npes)) np(1) = SIZE(b,lastdim) ! Local first np(2) = SIZE(a,lastdim) ! and last dimension CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr) offsets = 0 DO i=1,npes offsets(:,i) = offsets(:,i-1) + ndists(:,i) END DO ! ! Allocate send and receive 1d buffers npmx = MAXVAL(ndists,2) bufsiz = npmx(1)*npmx(2) ! Maximum size of send/receive buffers DO i=2,lastdim-1 bufsiz = bufsiz * SIZE(a,i) END DO ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) ) !---------------------------------------------------------------------- ! 2. Exchange blocks ! DO i=1,npes istr = offsets(1,ids(i)) + 1 ! Partition a along first dim iend = offsets(1,ids(i)+1) dims = SHAPE(a) dims(1) = iend-istr+1 scount = PRODUCT(dims) s_buf(1:scount) = RESHAPE(a(istr:iend,:), (/scount/)) !*** dim dependant ***! CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,& & r_buf, bufsiz, mpitype, idr(i), i,& & comm, status, ierr) istr = offsets(2,idr(i)) + 1 ! Partition b along first dim iend = offsets(2,idr(i)+1) dims = SHAPE(b) dims(1) = iend-istr+1 b(istr:iend,:) = RESHAPE(r_buf, dims, order=(/lastdim, 1/)) !*** dim dependant ***! END DO !---------------------------------------------------------------------- ! 9. Epilogue ! DEALLOCATE(ids, idr) DEALLOCATE(ndists, offsets) DEALLOCATE(s_buf, r_buf) ! diff --git a/pputils2/pptransp3.tpl b/pputils2/pptransp3.tpl index ce817af..320d444 100644 --- a/pputils2/pptransp3.tpl +++ b/pputils2/pptransp3.tpl @@ -1,113 +1,113 @@ !> !> @file pptransp3.tpl !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> INTEGER :: me, npes, i, j, istr, iend, ierr INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets INTEGER :: dims(lastdim), np(2), npmx(2) INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount INTEGER :: status(MPI_STATUS_SIZE) !---------------------------------------------------------------------- ! 0. Prologue ! CALL mpi_comm_rank(comm, me, ierr) CALL mpi_comm_size(comm, npes, ierr) ! ! Determine send/receive proc. id ALLOCATE(ids(npes), idr(npes)) CALL partners(comm, ids, idr) !---------------------------------------------------------------------- ! 1. Send/receive buffers ! ! Distribution of dim1 and dim2 partitionned dimensions ALLOCATE(ndists(2,npes)) ALLOCATE(offsets(2,0:npes)) np(1) = SIZE(b, dim2) ! Local first np(2) = SIZE(a, dim2) ! and second dimension CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr) offsets = 0 DO i=1,npes offsets(:,i) = offsets(:,i-1) + ndists(:,i) END DO ! ! Allocate send and receive 1d buffers npmx = MAXVAL(ndists,2) bufsiz = npmx(1)*npmx(2) ! Maximum size of send/receive buffers DO i=1,lastdim IF ( (i .NE. dim1) .AND. (i .NE. dim2) ) bufsiz = bufsiz * SIZE(a,i) END DO ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) ) !---------------------------------------------------------------------- ! 2. Exchange blocks ! IF ( (dim1 .EQ. 1) .AND. ( dim2 .EQ. 2 ) ) THEN !*** dim dependant ***! recv_order = (/2,1,3/) !*** dim dependant ***! ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***! recv_order = (/3,2,1/) !*** dim dependant ***! ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***! recv_order = (/1,3,2/) !*** dim dependant ***! ELSE IF ( me .EQ. 0 ) THEN WRITE(*, '(a,i4,a,i4,a)') 'pptransp3: Cannot handle case dim1 = ', dim1, ', dim2 = ', dim2, '!' CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) END IF END IF ! DO i=1,npes istr = offsets(1,ids(i)) + 1 ! Partition a along dimension dim1 iend = offsets(1,ids(i)+1) dims = SHAPE(a) dims(dim1) = iend-istr+1 scount = PRODUCT(dims) IF (dim1 .EQ. 1) THEN !*** dim dependant ***! s_buf(1:scount) = RESHAPE(a(istr:iend,:,:), (/scount/)) !*** dim dependant ***! ELSE IF (dim1 .EQ. 2) THEN !*** dim dependant ***! s_buf(1:scount) = RESHAPE(a(:,istr:iend,:), (/scount/)) !*** dim dependant ***! END IF !*** dim dependant ***! CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,& & r_buf, bufsiz, mpitype, idr(i), i,& & comm, status, ierr) istr = offsets(2,idr(i)) + 1 ! Partition b along dimension dim1 iend = offsets(2,idr(i)+1) dims = SHAPE(b) dims(dim1) = iend-istr+1 IF (dim1 .EQ. 1) THEN !*** dim dependant ***! b(istr:iend,:,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***! ELSE IF (dim1 .EQ. 2) THEN !*** dim dependant ***! b(:,istr:iend,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***! END IF !*** dim dependant ***! END DO !---------------------------------------------------------------------- ! 9. Epilogue ! DEALLOCATE(ids, idr) DEALLOCATE(ndists, offsets) DEALLOCATE(s_buf, r_buf) ! diff --git a/pputils2/pptransp4.tpl b/pputils2/pptransp4.tpl index cec2004..780ddf7 100644 --- a/pputils2/pptransp4.tpl +++ b/pputils2/pptransp4.tpl @@ -1,122 +1,122 @@ !> !> @file pptransp4.tpl !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> INTEGER :: me, npes, i, j, istr, iend, ierr INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets INTEGER :: dims(lastdim), np(2), npmx(2) INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount INTEGER :: status(MPI_STATUS_SIZE) !---------------------------------------------------------------------- ! 0. Prologue ! CALL mpi_comm_rank(comm, me, ierr) CALL mpi_comm_size(comm, npes, ierr) ! ! Determine send/receive proc. id ALLOCATE(ids(npes), idr(npes)) CALL partners(comm, ids, idr) !---------------------------------------------------------------------- ! 1. Send/receive buffers ! ! Distribution of dim1 and dim2 partitionned dimensions ALLOCATE(ndists(2,npes)) ALLOCATE(offsets(2,0:npes)) np(1) = SIZE(b, dim2) ! Local first np(2) = SIZE(a, dim2) ! and second dimension CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr) offsets = 0 DO i=1,npes offsets(:,i) = offsets(:,i-1) + ndists(:,i) END DO ! ! Allocate send and receive 1d buffers npmx = MAXVAL(ndists,2) bufsiz = npmx(1)*npmx(2) ! Maximum size of send/receive buffers DO i=1,lastdim IF ( (i .NE. dim1) .AND. (i .NE. dim2) ) bufsiz = bufsiz * SIZE(a,i) END DO ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) ) !---------------------------------------------------------------------- ! 2. Exchange blocks ! IF ( (dim1 .EQ. 1) .AND. (dim2 .EQ. 2) ) THEN !*** dim dependant ***! recv_order = (/2,1,3,4/) !*** dim dependant ***! ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***! recv_order = (/3,2,1,4/) !*** dim dependant ***! ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***! recv_order = (/4,2,3,1/) !*** dim dependant ***! ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***! recv_order = (/1,3,2,4/) !*** dim dependant ***! ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***! recv_order = (/1,4,3,2/) !*** dim dependant ***! ELSE IF ( (dim1 .EQ. 3) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***! recv_order = (/1,2,4,3/) !*** dim dependant ***! ELSE IF ( me .EQ. 0 ) THEN WRITE(*, '(a,i4,a,i4,a)') 'pptransp4: Cannot handle case dim1 = ', dim1, ', dim2 = ', dim2, '!' CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) END IF END IF ! DO i=1,npes istr = offsets(1,ids(i)) + 1 ! Partition a along dimension dim1 iend = offsets(1,ids(i)+1) dims = SHAPE(a) dims(dim1) = iend-istr+1 scount = PRODUCT(dims) IF (dim1 .EQ. 1) THEN !*** dim dependant ***! s_buf(1:scount) = RESHAPE(a(istr:iend,:,:,:), (/scount/)) !*** dim dependant ***! ELSE IF (dim1 .EQ. 2) THEN !*** dim dependant ***! s_buf(1:scount) = RESHAPE(a(:,istr:iend,:,:), (/scount/)) !*** dim dependant ***! ELSE IF (dim1 .EQ. 3) THEN !*** dim dependant ***! s_buf(1:scount) = RESHAPE(a(:,:,istr:iend,:), (/scount/)) !*** dim dependant ***! END IF !*** dim dependant ***! CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,& & r_buf, bufsiz, mpitype, idr(i), i,& & comm, status, ierr) istr = offsets(2,idr(i)) + 1 ! Partition b along dimension dim1 iend = offsets(2,idr(i)+1) dims = SHAPE(b) dims(dim1) = iend-istr+1 IF (dim1 .EQ. 1) THEN !*** dim dependant ***! b(istr:iend,:,:,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***! ELSE IF (dim1 .EQ. 2) THEN !*** dim dependant ***! b(:,istr:iend,:,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***! ELSE IF (dim1 .EQ. 3) THEN !*** dim dependant ***! b(:,:,istr:iend,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***! END IF !*** dim dependant ***! END DO !---------------------------------------------------------------------- ! 9. Epilogue ! DEALLOCATE(ids, idr) DEALLOCATE(ndists, offsets) DEALLOCATE(s_buf, r_buf) ! diff --git a/pputils2/pputils2.f90 b/pputils2/pputils2.f90 index 348a14a..e29570b 100644 --- a/pputils2/pputils2.f90 +++ b/pputils2/pputils2.f90 @@ -1,456 +1,456 @@ !> !> @file pputils2.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE pputils2 ! ! PPUTILS2: Some MPI utilities. ! ! T.M. Tran, CRPP-EPFL ! September 2010 ! September 2013: add exchange, norm2 ! November 2013: add timera, hostlist ! USE iso_fortran_env, ONLY : rkind => real64 USE mpi IMPLICIT NONE PRIVATE PUBLIC :: pptransp, dist1d, exchange, ppnorm2, timera, hostlist ! INTERFACE pptransp MODULE PROCEDURE pptransp2_r, pptransp3_r, pptransp4_r MODULE PROCEDURE pptransp2_c, pptransp3_c, pptransp4_c END INTERFACE INTERFACE exchange MODULE PROCEDURE exchange_2d, exchange_2d_new END INTERFACE exchange INTERFACE ppnorm2 MODULE PROCEDURE norm2_para_2d END INTERFACE ppnorm2 ! CONTAINS !======================================================================= SUBROUTINE pptransp2_r(comm, a, b) ! ! Handles double precision-type matrices. ! ! Transpose of rank 2 matrix A: ! A(n1,n2/P) -> B(n2,n1/P) ! INTEGER, INTENT(in) :: comm REAL(rkind), DIMENSION(:,:), INTENT(in) :: a !*** dim dependant ***! REAL(rkind), DIMENSION(:,:), INTENT(out) :: b !*** dim dependant ***! INTEGER, PARAMETER :: lastdim = 2, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***! REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf ! INCLUDE 'pptransp2.tpl' ! END SUBROUTINE pptransp2_r !======================================================================= SUBROUTINE pptransp2_c(comm, a, b) ! ! Same as pptransp2_r, but for double complex-type matrices. ! INTEGER, INTENT(in) :: comm COMPLEX(rkind), DIMENSION(:,:), INTENT(in) :: a !*** dim dependant ***! COMPLEX(rkind), DIMENSION(:,:), INTENT(out) :: b !*** dim dependant ***! INTEGER, PARAMETER :: lastdim = 2, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***! COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf ! INCLUDE 'pptransp2.tpl' ! END SUBROUTINE pptransp2_c !======================================================================= SUBROUTINE pptransp3_r(comm, a, b, dim1, dim2) ! ! Handles double precision-type matrices. ! ! Transpose dimensions dim1 and dim2 of rank 3 matrix A. ! dim1 and dim2 are such that 1 <= dim1 < dim2 <= 3. ! At input, matrix A is partitioned along dimension dim2 of matrix A. ! At exit, B = transpose(A), and B is partitioned along dimension dim1 of matrix A. ! ! For example: ! dim1 = 1, dim2 = 2 : A(n1,n2/P,n3) -> B(n2,n1/P,n3) ! dim1 = 1, dim2 = 3 : A(n1,n2,n3/P) -> B(n3,n2,n1/P) ! dim1 = 2, dim2 = 3 : A(n1,n2,n3/P) -> B(n1,n3,n2/P) ! INTEGER, INTENT(in) :: comm REAL(rkind), DIMENSION(:,:,:), INTENT(in) :: a !*** dim dependant ***! REAL(rkind), DIMENSION(:,:,:), INTENT(out) :: b !*** dim dependant ***! INTEGER, INTENT(in) :: dim1, dim2 INTEGER, PARAMETER :: lastdim = 3, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***! REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf INTEGER :: recv_order(lastdim) ! INCLUDE 'pptransp3.tpl' ! END SUBROUTINE pptransp3_r !======================================================================= SUBROUTINE pptransp3_c(comm, a, b, dim1, dim2) ! ! Same as pptransp3_r, but for double complex-type matrices. ! INTEGER, INTENT(in) :: comm COMPLEX(rkind), DIMENSION(:,:,:), INTENT(in) :: a !*** dim dependant ***! COMPLEX(rkind), DIMENSION(:,:,:), INTENT(out) :: b !*** dim dependant ***! INTEGER, INTENT(in) :: dim1, dim2 INTEGER, PARAMETER :: lastdim = 3, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***! COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf INTEGER :: recv_order(lastdim) ! INCLUDE 'pptransp3.tpl' ! END SUBROUTINE pptransp3_c !======================================================================= SUBROUTINE pptransp4_r(comm, a, b, dim1, dim2) ! ! Handles double precision-type matrices. ! ! Transpose dimensions dim1 and dim2 of rank 4 matrix A. ! dim1 and dim2 are such that 1 <= dim1 < dim2 <= 4. ! At input, matrix A is partitioned along dimension dim2 of matrix A. ! At exit, B = transpose(A), and B is partitioned along dimension dim1 of matrix A. ! ! For example: ! dim1 = 1, dim2 = 2 : A(n1,n2/P,n3 ,n4 ) -> B(n2,n1/P,n3 ,n4 ) ! dim1 = 1, dim2 = 3 : A(n1,n2 ,n3/P,n4 ) -> B(n3,n2 ,n1/P,n4 ) ! dim1 = 1, dim2 = 4 : A(n1,n2 ,n3 ,n4/P) -> B(n4,n2 ,n3 ,n1/P) ! dim1 = 2, dim2 = 3 : A(n1,n2 ,n3/P,n4 ) -> B(n1,n3 ,n2/P,n4 ) ! dim1 = 2, dim2 = 4 : A(n1,n2 ,n3 ,n4/P) -> B(n1,n4 ,n3 ,n2/P) ! dim1 = 3, dim2 = 4 : A(n1,n2 ,n3 ,n4/P) -> B(n1,n2 ,n4 ,n3/P) ! INTEGER, INTENT(in) :: comm REAL(rkind), DIMENSION(:,:,:,:), INTENT(in ) :: a !*** dim dependant ***! REAL(rkind), DIMENSION(:,:,:,:), INTENT(out) :: b !*** dim dependant ***! INTEGER, INTENT(in) :: dim1, dim2 INTEGER, PARAMETER :: lastdim = 4, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***! REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf INTEGER :: recv_order(lastdim) ! INCLUDE 'pptransp4.tpl' ! END SUBROUTINE pptransp4_r !======================================================================= SUBROUTINE pptransp4_c(comm, a, b, dim1, dim2) ! ! Same as pptransp4_r, but for double complex-type matrices ! INTEGER, INTENT(in) :: comm COMPLEX(rkind), DIMENSION(:,:,:,:), INTENT(in) :: a !*** dim dependant ***! COMPLEX(rkind), DIMENSION(:,:,:,:), INTENT(out) :: b !*** dim dependant ***! INTEGER, INTENT(in) :: dim1, dim2 INTEGER, PARAMETER :: lastdim = 4, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***! COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf INTEGER :: recv_order(lastdim) ! INCLUDE 'pptransp4.tpl' ! END SUBROUTINE pptransp4_c !======================================================================= SUBROUTINE dist1d(comm, s0, ntot, s, nloc) ! ! 1d distribute ntot elements, returns offset s and local number of ! elements nloc. ! INTEGER, INTENT(in) :: s0, ntot INTEGER, INTENT(out) :: s, nloc INTEGER :: comm, me, npes, ierr, naver, rem ! CALL MPI_COMM_SIZE(comm, npes, ierr) CALL MPI_COMM_RANK(comm, me, ierr) naver = ntot/npes rem = MODULO(ntot,npes) s = s0 + MIN(rem,me) + me*naver nloc = naver IF( me.LT.rem ) nloc = nloc+1 ! END SUBROUTINE dist1d !======================================================================= SUBROUTINE exchange_2d_new(comm, u, garea) ! ! Exhange ghost cells with (west,east,south,north) neighbors. ! Assume same ghost cells on each dimension: ! garea(1) : number of ghost cells on west and east boundaries ! garea(2) : number of ghost cells on south and north boundaries ! Both are equal to 1 by default. ! INTEGER, INTENT(in) :: comm REAL(rkind), ALLOCATABLE, INTENT(inout) :: u(:,:) INTEGER, OPTIONAL, INTENT(in) :: garea(2) INTEGER :: neighs(4), ierr ! CALL mpi_cart_shift(comm, 0, 1, neighs(1), neighs(2), ierr) CALL mpi_cart_shift(comm, 1, 1, neighs(3), neighs(4), ierr) CALL exchange_2d(comm, neighs, u, garea) END SUBROUTINE exchange_2d_new !======================================================================= SUBROUTINE exchange_2d(comm, neighs, u, garea) ! ! Exhange ghost cells with (west,east,south,north) neighbors. ! Assume same ghost cells on each dimension: ! garea(1) : number of ghost cells on west and east boundaries ! garea(2) : number of ghost cells on south and north boundaries ! Both are equal to 1 by default. ! INTEGER, INTENT(in) :: comm INTEGER, INTENT(in) :: neighs(4) REAL(rkind), ALLOCATABLE, INTENT(inout) :: u(:,:) INTEGER, OPTIONAL, INTENT(in) :: garea(2) ! INTEGER :: cols, rows INTEGER :: ierr INTEGER, PARAMETER :: ndim=2 INTEGER, DIMENSION(ndim) :: g, lb, ub, s, e, n ! g = [1,1] IF(PRESENT(garea)) g = garea lb = LBOUND(u) ub = UBOUND(u) s = lb + g e = ub - g n = ub - lb + 1 ! include ghost cells ! ! g(2) matrix full rows with stride n(1) CALL mpi_type_vector(n(2), g(2), n(1), MPI_DOUBLE_PRECISION, rows, ierr) CALL mpi_type_commit(rows, ierr) ! ! g(1) contiguous matrix full columns CALL mpi_type_contiguous(n(1)*g(1), MPI_DOUBLE_PRECISION, cols, ierr) CALL mpi_type_commit(cols, ierr) ! ! Exchange along first dimension CALL mpi_sendrecv(u(s(1), lb(2)), 1, rows, neighs(1), 0, & & u(e(1)+1,lb(2)), 1, rows, neighs(2), 0, & & comm, MPI_STATUS_IGNORE, ierr) CALL mpi_sendrecv(u(e(1)-g(1)+1,lb(2)), 1, rows, neighs(2), 0, & & u(lb(1), lb(2)), 1, rows, neighs(1), 0, & & comm, MPI_STATUS_IGNORE, ierr) ! ! Exchange along second dimension CALL mpi_sendrecv(u(lb(1),s(2)), 1, cols, neighs(3), 0, & & u(lb(1),e(2)+1), 1, cols, neighs(4), 0, & & comm, MPI_STATUS_IGNORE, ierr) CALL mpi_sendrecv(u(lb(1),e(2)-g(2)+1), 1, cols, neighs(4), 0, & & u(lb(1),lb(2)), 1, cols, neighs(3), 0, & & comm, MPI_STATUS_IGNORE, ierr) END SUBROUTINE exchange_2d !======================================================================= FUNCTION norm2_para_2d(x, comm, root, garea) RESULT(res) ! ! Vector norm of 2d distributed array with ghost cells ! USE mpi REAL(rkind), ALLOCATABLE, INTENT(in) :: x(:,:) INTEGER, INTENT(in) :: comm INTEGER, INTENT(in), OPTIONAL :: root INTEGER, INTENT(in), OPTIONAL :: garea(:) REAL(rkind) :: res INTEGER, PARAMETER :: ndim=2 INTEGER, DIMENSION(ndim) :: g, s, e REAL(rkind) :: res_loc INTEGER :: r, me, ierr ! CALL mpi_comm_rank(comm, me, ierr) g = [1,1] IF(PRESENT(garea)) g = garea r = 0 IF(PRESENT(root)) r = root s = LBOUND(x) + g e = UBOUND(x) - g res_loc = SUM(x(s(1):e(1),s(2):e(2))**2) CALL mpi_reduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM, r, comm, ierr) if(me.eq.r) res = SQRT(res) END FUNCTION norm2_para_2d !======================================================================= SUBROUTINE timera(cntrl, str, eltime, comm) ! ! Timers (cntrl=0/1 to Init/Update) ! USE mpi INTEGER, INTENT(in) :: cntrl CHARACTER(len=*), INTENT(in) :: str DOUBLE PRECISION, OPTIONAL, INTENT(out) :: eltime INTEGER, OPTIONAL, INTENT(in) :: comm ! INTEGER, PARAMETER :: ncmax=128, maxlen=32 ! INTEGER, SAVE :: icall=0, nc=0 DOUBLE PRECISION, SAVE :: startt0=0.0 DOUBLE PRECISION, DIMENSION(ncmax), SAVE :: startt = 0.0, endt = 0.0 CHARACTER(len=maxlen), SAVE :: which(ncmax) ! DOUBLE PRECISION, DIMENSION(ncmax) :: endtmin, endtmax INTEGER :: comm0, me, lstr, found, i, ierr !________________________________________________________________________________ IF(PRESENT(comm)) THEN comm0 = comm ELSE comm0 = MPI_COMM_WORLD END IF CALL mpi_comm_rank(comm0, me, ierr) CALL mpi_barrier(comm0, ierr) !________________________________________________________________________________ ! IF( icall .EQ. 0 ) THEN icall = icall+1 startt0 = mpi_wtime() END IF lstr = MIN(LEN_TRIM(str),maxlen) IF( lstr .GT. 0 ) found = loc(str) !________________________________________________________________________________ ! SELECT CASE (cntrl) ! CASE(-1) ! Current wall time IF( PRESENT(eltime) ) THEN eltime = mpi_wtime() - startt0 ELSE IF (me .EQ. 0 ) THEN WRITE(*,'(/a,a,1pe10.3/)') "++ ", ' Wall time used so far = ', & & mpi_wtime() - startt0 END IF ! CASE(0) ! Init Timer IF( found .EQ. 0 ) THEN ! Called for the 1st time for 'str' nc = nc+1 which(nc) = str(1:lstr) found = nc END IF startt(found) = mpi_wtime() ! CASE(1) ! Update timer endt(found) = mpi_wtime() - startt(found) IF( PRESENT(eltime) ) THEN eltime = endt(found) ELSE IF (me .EQ. 0 ) THEN WRITE(*,'(/a,a,1pe10.3/)') "++ "//str, ' wall clock time = ', & & endt(found) END IF ! CASE(2) ! Update and reset timer endt(found) = endt(found) + mpi_wtime() - startt(found) startt(found) = mpi_wtime() IF( PRESENT(eltime) ) THEN eltime = endt(found) END IF ! CASE(9) ! Display all timers IF( nc .GT. 0 ) THEN CALL mpi_reduce(endt, endtmin, nc, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm0, ierr) CALL mpi_reduce(endt, endtmax, nc, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm0, ierr) IF( me .EQ. 0 ) THEN WRITE(*,'(a)') "Minmax Timer Summary" WRITE(*,'(a)') "====================" DO i=1,nc WRITE(*,'(a20,2x,2(1pe12.3))') TRIM(which(i))//":", endtmin(i), endtmax(i) END DO END IF END IF ! END SELECT ! CONTAINS INTEGER FUNCTION loc(str) CHARACTER(len=*), INTENT(in) :: str INTEGER :: i, ind loc = 0 DO i=1,nc ind = INDEX(which(i), str(1:lstr)) IF( ind .GT. 0 .AND. LEN_TRIM(which(i)) .EQ. lstr ) THEN loc = i EXIT END IF END DO END FUNCTION loc END SUBROUTINE timera !======================================================================= SUBROUTINE hostlist(comm) ! ! Print list of hostnames in comm ! USE mpi INTEGER, OPTIONAL, INTENT(in) :: comm ! CHARACTER(len=MPI_MAX_PROCESSOR_NAME) :: procname CHARACTER(len=MPI_MAX_PROCESSOR_NAME), ALLOCATABLE :: procnames(:) INTEGER :: comm0, me, nprocs, ierr, i, l ! IF(PRESENT(comm)) THEN comm0 = comm ELSE comm0 = mpi_comm_world END IF CALL MPI_COMM_RANK(comm0, me, ierr) CALL MPI_COMM_SIZE(comm0, nprocs, ierr) CALL MPI_GET_PROCESSOR_NAME(procname, l, ierr) ALLOCATE(procnames(0:nprocs-1)) CALL mpi_gather(procname,MPI_MAX_PROCESSOR_NAME,mpi_character, & & procnames,MPI_MAX_PROCESSOR_NAME,mpi_character,0, & & comm0,ierr) IF(me.EQ.0) THEN WRITE(*,'(a/(10(1x,a)))') 'Host list:', & & (TRIM(procnames(i)),i=0,nprocs-1) END IF END SUBROUTINE hostlist !======================================================================= ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Private routines/functions ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! SUBROUTINE partners(comm, ids, idr) ! ! Compute ranks of send and receive procs. ! IMPLICIT NONE INTEGER, INTENT(in) :: comm INTEGER, INTENT(out) :: ids(:), idr(:) INTEGER :: me, npes, ierr, i ! CALL mpi_comm_rank(comm, me, ierr) CALL mpi_comm_size(comm, npes, ierr) IF( ispower2(npes) ) THEN DO i=0,npes-1 ids(i+1) = IEOR(me, i) idr(i+1) = ids(i+1) END DO ELSE DO i=0,npes-1 ids(i+1) = MODULO(me+i, npes) idr(i+1) = MODULO(me-i, npes) END DO END IF END SUBROUTINE partners !======================================================================= LOGICAL FUNCTION ispower2(n) INTEGER, INTENT(in) :: n INTEGER :: l l=2 DO WHILE ( l .LT. n ) l = 2*l END DO ispower2 = l .EQ. n END FUNCTION ispower2 !======================================================================= END MODULE pputils2 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b55fdb1..5d069c5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,111 +1,111 @@ # # @file CMakeLists.txt # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # @author Trach-Minh Tran # project(bsplines_src) set(SRCS bsplines.f90 matrix.f90 sparse_mod.f90 lapack_extra.f math_util.f90 ) set(SRCS_PP conmat.f90 ) set(PUBLIC_MODULES bsplines.mod matrix.mod math_util.mod conmat_mod.mod sparse.mod ) if(HAS_PARDISO) list(APPEND ${SRCS_PP} pardiso_mod.f90) endif() set_property(SOURCE conmat.f90 APPEND PROPERTY COMPILE_OPTIONS -DWSMP ${MKL_DEFINITIONS}) if(HAS_MUMPS) list(APPEND SRCS multigrid_mod.f90 ) list(APPEND SRCS_PP mumps_mod.f90 csr_mod.f90 cds_mod.f90 ) list(APPEND PUBLIC_MODULES cds.mod csr.mod mumps_bsplines.mod) set_property(SOURCE conmat.f90 APPEND PROPERTY COMPILE_OPTIONS -DMUMPS) endif() set(_public_headers) foreach(_modules ${PUBLIC_MODULES}) list(APPEND _public_headers ${CMAKE_CURRENT_BINARY_DIR}/${_modules}) endforeach() set_property(SOURCE ${SRCS_PP} APPEND PROPERTY COMPILE_OPTIONS -cpp) include(GNUInstallDirs) add_library(bsplines STATIC ${SRCS} ${SRCS_PP}) target_include_directories(bsplines PRIVATE $ ${MUMPS_INCLUDE_DIR} INTERFACE $ $ ) set_property(TARGET bsplines PROPERTY PUBLIC_HEADER ${_public_headers}) target_link_libraries(bsplines PUBLIC futils pppack pputils2 fft ${BLAS_LIBRARIES} ${MUMPS_LIBRARIES} ${LAPACK_LIBRARIES} ) if(MKL_Fortran_FLAGS) separate_arguments(MKL_Fortran_FLAGS) target_compile_options(bsplines PUBLIC ${MKL_Fortran_FLAGS}) target_link_options(bsplines PUBLIC ${MKL_Fortran_FLAGS}) endif() install(TARGETS bsplines EXPORT ${BSPLINES_EXPORT_TARGETS} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) diff --git a/src/Makefile b/src/Makefile index 229a18e..edee4a5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,176 +1,176 @@ # # @file Makefile # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Stephan Brunner # @author Sébastien Jolliet # @author Trach-Minh Tran # PREFIX=/usr/local/crpp FUTILS=$(PREFIX)/futils PPPACK=../pppack PPUTILS2=../pputils2 MPIF90 = mpif90 F90 = mpif90 LD = $(MPIF90) debug = -g -traceback -check bounds -fpe0 -warn alignments -warn unused debug = -g -traceback -check bounds -fpe0 -warn alignments optim = -O3 -xHOST #OPT=$(debug) OPT=$(optim) F90FLAGS = $(OPT) -fPIC -I. -I$(FUTILS)/include CC = cc CFLAGS = -O2 SPL_OBJS = bsplines.o matrix.o sparse_mod.o pardiso_mod.o \ lapack_extra.o conmat.o math_util.o ifdef MKL SPBLAS = -DMKL endif ifdef MUMPS SPL_OBJS += mumps_mod.o csr_mod.o cds_mod.o multigrid_mod.o F90FLAGS += -I$(MUMPS)/include endif ifdef WSMP SPL_OBJS += wsmp_mod.o pwsmp_mod.o endif ifdef PETSC_DIR SPL_OBJS += petsc_mod.o FCCPFLAGS = -I$(PETSC_DIR)/include -I$(PETSC_DIR)/$(PETSC_ARCH)/include endif .SUFFIXES: .SUFFIXES: .o .c .f90 .f .F90 .f90.o: $(MPIF90) $(F90FLAGS) -c $< .F90.o: $(MPIF90) $(F90FLAGS) $(FCCPFLAGS) -c $< .f.o: $(F90) $(F90FLAGS) -c $< SUBDIRS = pputils2 pppack fft subdirs: $(SUBDIRS) .PHONY: subdirs $(SUBSDIRS) $(PPUTILS2) $(SUBDIRS): $(MAKE) "OPT=$(OPT)" -C ../$@ lib lib: subdirs libbsplines.a cp -p $(PPPACK)/libpppack.a ./ touch lib cp -p lib ../examples libbsplines.a: $(SPL_OBJS) xiar r $@ $? ranlib $@ debug: make clean make "OPT=$(debug)" lib mkdir -p .g cp -p libbsplines.a $(PPPACK)/libpppack.a *.mod .g/ opt: make clean make "OPT=$(optim)" lib mkdir -p $(PREFIX)/{lib,include}/O mkdir -p .O cp -p libbsplines.a $(PPPACK)/libpppack.a *.mod .O/ install: debug opt mkdir -p $(PREFIX)/{lib,include}/g mv .g/*.a $(PREFIX)/lib/g/ mv .g/*.mod $(PREFIX)/include/g/ mkdir -p $(PREFIX)/{lib,include}/O mv .O/*.a $(PREFIX)/lib/O/ mv .O/*.mod $(PREFIX)/include/O/ uninstall: rm -f $(PREFIX)/include/{O,g}/bsplines.mod \ $(PREFIX)/include/{O,g}/cds.mod \ $(PREFIX)/include/{O,g}/conmat_mod.mod \ $(PREFIX)/include/{O,g}/csr.mod \ $(PREFIX)/include/{O,g}/math_util.mod \ $(PREFIX)/include/{O,g}/matrix.mod \ $(PREFIX)/include/{O,g}/multigrid.mod \ $(PREFIX)/include/{O,g}/mumps_bsplines.mod \ $(PREFIX)/include/{O,g}/pardiso_bsplines.mod \ $(PREFIX)/include/{O,g}/petsc_bsplines.mod \ $(PREFIX)/include/{O,g}/sparse.mod \ $(PREFIX)/include/{O,g}/wsmp_bsplines.mod \ $(PREFIX)/lib/{O,g}/libbsplines.a \ $(PREFIX)/lib/{O,g}/libpppack.a matrix.o: matrix.f90 sparse_mod.o: sparse_mod.f90 bsplines.o: bsplines.f90 matrix.o multigrid_mod.o: bsplines.o matrix.o conmat.o csr_mod.o cds_mod.o conmat.o: conmat.f90 conmat.tpl conmat_1d.tpl zconmat.tpl zconmat_1d.tpl conrhs.tpl $(F90) -fpp -DMKL -DWSMP -DMUMPS $(F90FLAGS) -c conmat.f90 cds_mod.o: cds_mod.f90 $(F90) -fpp $(SPBLAS) $(F90FLAGS) -c cds_mod.f90 pardiso_mod.o: pardiso_mod.f90 sparse_mod.o psum_mat.tpl p2p_mat.tpl $(F90) -fpp $(SPBLAS) $(F90FLAGS) -c pardiso_mod.f90 mumps_mod.o:mumps_mod.f90 sparse_mod.o psum_mat.tpl p2p_mat.tpl $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c mumps_mod.f90 wsmp_mod.o: wsmp_mod.f90 sparse_mod.o psum_mat.tpl p2p_mat.tpl $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c wsmp_mod.f90 pwsmp_mod.o: pwsmp_mod.f90 sparse_mod.o wsmp_mod.o psum_mat.tpl p2p_mat.tpl $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c pwsmp_mod.f90 petsc_mod.o: petsc_mod.F90 sparse_mod.o $(F90) -fpp $(FCCPFLAGS) $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c petsc_mod.F90 csr_mod.o: csr_mod.f90 sparse_mod.o mumps_mod.o $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c csr_mod.f90 tags: etags *.f *.f90 $(PPPACK)/*.f90 clean: $(MAKE) -C $(PPPACK) clean $(MAKE) -C ../fft clean rm -f *.o *.mod *~ a.out distclean: clean $(MAKE) -C $(PPPACK) distclean $(MAKE) -C ../fft distclean $(MAKE) -C $(PPUTILS2) distclean rm -f lib *.a *.mod ../bin/* rm -rf .O .g diff --git a/src/bsplines.f90 b/src/bsplines.f90 index 1feff79..0ab9ca8 100644 --- a/src/bsplines.f90 +++ b/src/bsplines.f90 @@ -1,4285 +1,4285 @@ !> !> @file bsplines.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Stephan Brunner !> @author Trach-Minh Tran !> MODULE bsplines ! ! BSPLINES: A module to construct B-Splines of any order on ! non-equidistant mesh. Can be used for interpolation and ! Finite Element discretization. ! ! T.M. Tran, S. Brunner, CRPP-EPFL ! February 2007 ! USE matrix IMPLICIT NONE PRIVATE PUBLIC :: spline1d, set_spline, get_dim, get_gauss, gridval PUBLIC :: spline2d, spline2d1d, def_knots, allsplines PUBLIC :: set_splcoef, get_splcoef PUBLIC :: fintg, calc_integ, destroy_sp PUBLIC :: gauleg, CompMassMatrix PUBLIC :: basfun_recur, basfun, def_basfun, is_equid, locintv_old, locintv PUBLIC :: calc_fftmass, calc_fftmass_old PUBLIC :: init_dft, ft_basfun PUBLIC :: getgrad PUBLIC :: dftmap ! DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 ! TYPE dftmap INTEGER :: n ! Total number of modes INTEGER :: kmin, kmax ! Define the Foyrier window INTEGER :: dk ! Number of modes in window DOUBLE PRECISION :: dx ! Interval in real space INTEGER, POINTER :: mode_couplings(:) => NULL() ! Table of mode couplings DOUBLE COMPLEX, POINTER :: coefs(:,:) => NULL() ! The restricted Fourier coefs END TYPE dftmap ! TYPE spline1d INTEGER :: order ! Spline order = spline degree + 1 INTEGER :: nints ! Number of knot intervals INTEGER :: nsites ! Number of interpolation sites INTEGER :: dim ! Dimension of spline space INTEGER :: left=0 ! Save value used by routine LOCATE LOGICAL :: period ! is the grid periodic ? LOGICAL :: nlequid ! is the grid equidistant ? LOGICAL :: nlppform ! Construct and build PPFORM in GRIDVAL if .TRUE. DOUBLE PRECISION :: lperiod ! Periodicity DOUBLE PRECISION :: hinv ! Inverse of mesh size, when nlequid=T INTEGER, POINTER ::& & fmap(:) => NULL() ! Mapping of fine to coarse mesh DOUBLE PRECISION, POINTER :: & & knots(:) => NULL(), & ! Spline knots (-p:n+p) & val0(:,:,:) => NULL(), & ! Values and deriv. at left boundary of all splines & valc(:,:) => NULL(), & ! Values and deriv. at left boundary of equid. per. splines & gausx(:,:) => NULL(), & ! Gauss abscissas & gausw(:,:) => NULL(), & ! Gauss weights & intspl(:) => NULL(), & ! Integral of splines & ppform(:,:) => NULL(), & ! PPFORM coefs & bcoefs(:) => NULL() ! Spline coefs DOUBLE COMPLEX, POINTER :: & & ppformz(:,:) => NULL(), & ! PPFORM coefs for complex function & bcoefsc(:) => NULL() ! Spline coefs for complex function TYPE(GBMAT) :: mat ! Interpolation matrix TYPE(periodic_mat) :: matp ! Interpolation matrix (periodic case) TYPE(dftmap) :: dft ! Define DFT mapping END TYPE spline1d ! TYPE spline2d TYPE(spline1d) :: sp1 ! Spline in direction 1 TYPE(spline1d) :: sp2 ! Spline in direction 2 DOUBLE PRECISION, POINTER :: ppform(:,:,:,:) => NULL() ! 2d PPFORM coefs DOUBLE PRECISION, POINTER :: bcoefs(:,:) => NULL() ! Spline coefs DOUBLE COMPLEX, POINTER :: ppformz(:,:,:,:) => NULL() ! PPFORM coefs for complex function DOUBLE COMPLEX, POINTER :: bcoefsc(:,:) => NULL() ! Spline coefs for complex function END TYPE spline2d ! TYPE spline2d1d TYPE(spline2d) :: sp12 ! 2D spline for dir. 1 and 2 TYPE(spline1d) :: sp3 ! 1D spline for dir. 3 DOUBLE PRECISION, POINTER :: ppform(:,:,:,:,:,:) => NULL() ! PPFORM coefs DOUBLE PRECISION, POINTER :: bcoefs(:,:,:) => NULL() ! Spline coefs DOUBLE PRECISION, POINTER :: ppformz(:,:,:,:,:,:) => NULL()! PPFORM coefs for complex function DOUBLE COMPLEX, POINTER :: bcoefsc(:,:,:) => NULL() ! Spline coefs for complex function END TYPE spline2d1d ! INTERFACE set_spline MODULE PROCEDURE set_spline1d, set_spline2d, set_spline2d1d END INTERFACE INTERFACE get_dim MODULE PROCEDURE get_dim1, get_dim2 END INTERFACE INTERFACE gridval MODULE PROCEDURE gridval1d, gridval1dz, & & gridval2d, gridval2dz, & & gridval2d_1d, gridval2d_1dz, & & gridval2d_2d, gridval2d_2dz, & & gridval2d1d_3d, gridval2d1d_1d END INTERFACE INTERFACE set_splcoef MODULE PROCEDURE set_splcoef1d, set_splcoef2d END INTERFACE INTERFACE get_splcoef MODULE PROCEDURE get_splcoef1, get_splcoef1z, get_splcoefn, & & get_splcoef2d, get_splcoef2dz END INTERFACE INTERFACE fintg MODULE PROCEDURE fintg1, fintg2 END INTERFACE INTERFACE destroy_sp MODULE PROCEDURE destroy_sp1d, destroy_sp2d, destroy_sp2d1d END INTERFACE INTERFACE CompMassMatrix MODULE PROCEDURE CompMassMatrix1, CompMassMatrix_gb, CompMassMatrix_zgb END INTERFACE INTERFACE calc_integ MODULE PROCEDURE calc_integ0,calc_integn END INTERFACE INTERFACE locintv MODULE PROCEDURE locintv0, locintv1 END INTERFACE locintv INTERFACE locintv_old MODULE PROCEDURE locintv0_old, locintv1_old END INTERFACE locintv_old INTERFACE ppval MODULE PROCEDURE ppval0, ppval1, ppval2, & & ppval0_n, & & ppval0z, ppval1z, ppval2z, & & ppval0z_n END INTERFACE ppval INTERFACE basfun MODULE PROCEDURE basfun0, basfun1 END INTERFACE basfun INTERFACE ft_basfun MODULE PROCEDURE ft_basfun0, ft_basfun1 END INTERFACE ft_basfun INTERFACE def_basfun MODULE PROCEDURE def_basfun0, def_basfun1 END INTERFACE def_basfun INTERFACE getgrad MODULE PROCEDURE getgradr, getgradz END INTERFACE getgrad ! CONTAINS !=========================================================================== SUBROUTINE set_spline1d(p, ngauss, grid, sp, period, nlppform, nlequid) ! ! Setup a spline ! INTEGER, INTENT(in) :: p, ngauss DOUBLE PRECISION, INTENT(in) :: grid(:) TYPE(spline1d), INTENT(out) :: sp LOGICAL, OPTIONAL, INTENT(in) :: period LOGICAL, OPTIONAL, INTENT(in) :: nlppform LOGICAL, OPTIONAL, INTENT(in) :: nlequid ! DOUBLE COMPLEX :: zc DOUBLE PRECISION :: leng, xp, factinv, h INTEGER :: order, nints, i, k DOUBLE PRECISION :: temp(1:p+1,0:p) ! ! Order of splines order = p+1 sp%order = order ! ! Dimension of spline space nints = SIZE(grid)-1 sp%nints = nints sp%period = .FALSE. IF( PRESENT(period) ) THEN sp%period = period sp%lperiod = grid(nints+1) - grid(1) END IF sp%dim = nints+p ! ! Use or not PPFORM sp%nlppform = .TRUE. IF( PRESENT(nlppform) ) THEN sp%nlppform = nlppform END IF ! ! Determine sequence of knots IF( ASSOCIATED(sp%knots) ) DEALLOCATE(sp%knots) ALLOCATE( sp%knots(-p:nints+p) ) sp%knots(0:nints) = grid(:) ! ! Is the grid equidistant ? IF( PRESENT(nlequid) ) THEN sp%nlequid = nlequid ELSE sp%nlequid = is_equid(grid) END IF ! ! Coarse to fine mesh mapping for non-equidistant mesh IF(sp%nlequid) THEN sp%hinv = 1.0d0/(sp%knots(1)-sp%knots(0)) ELSE IF(ASSOCIATED(sp%fmap)) DEALLOCATE(sp%fmap) CALL create_fine(sp%knots(0:nints), h, sp%fmap) sp%hinv = 1.0d0/h END IF ! ! Extend knots at both sides of given grid points IF( sp%period ) THEN leng = sp%knots(nints) - sp%knots(0) DO i=-1,-p,-1 sp%knots(i) = sp%knots(nints+i) - leng END DO DO i=1,p sp%knots(nints+i) = sp%knots(i) + leng END DO !!$ sp%knots(-p:-1) = sp%knots(nints-p:nints-1) - leng !!$ sp%knots(nints+1:nints+p) = leng + sp%knots(1:p) ELSE sp%knots(-p:-1) = sp%knots(0) sp%knots(nints+1:nints+p) = sp%knots(nints) END IF ! ! Precalculated values of all splines and their derivatives at left boundaries IF( ASSOCIATED(sp%val0) ) DEALLOCATE(sp%val0) ALLOCATE( sp%val0(0:p, p+1, 1:nints) ) sp%val0 = 0.0d0 DO i=1,nints xp = sp%knots(i-1) + EPSILON(1.0d0)*ABS(sp%knots(i-1)) CALL basfun_recur(xp, sp, temp, i) sp%val0(:,:,i) = TRANSPOSE(temp) END DO ! factinv = 1.0d0 DO k=2,p ! Divide by k! for use in PPFORM_ALT factinv = factinv/k sp%val0(k,:,:) = sp%val0(k,:,:)*factinv END DO ! ! Case of periodic equidistant splines (translational invariance) IF(sp%period .AND. sp%nlequid) THEN IF( ASSOCIATED(sp%valc) ) DEALLOCATE(sp%valc) ALLOCATE(sp%valc(0:p, p+1)) sp%valc = sp%val0(:,:,1) END IF ! ! Gauss abscissas and weights IF( ngauss .GT. 0 ) THEN IF( ASSOCIATED(sp%gausx) ) DEALLOCATE(sp%gausx) IF( ASSOCIATED(sp%gausw) ) DEALLOCATE(sp%gausw) ALLOCATE(sp%gausx(ngauss,nints)) ALLOCATE(sp%gausw(ngauss,nints)) DO i=1,nints CALL gauleg(sp%knots(i-1), sp%knots(i), & & sp%gausx(1:ngauss,i), sp%gausw(1:ngauss,i), ngauss) END DO END IF ! ! Compute integral of each splines IF( ASSOCIATED(sp%intspl) ) DEALLOCATE(sp%intspl) ALLOCATE(sp%intspl(0:sp%dim-1)) CALL calc_integ(sp, sp%intspl) ! END SUBROUTINE set_spline1d !=========================================================================== SUBROUTINE init_dft(sp, kmin, kmax, couplings) ! ! Initialize DFT ! TYPE(spline1d) :: sp INTEGER, INTENT(in) :: kmin, kmax INTEGER, INTENT(in), OPTIONAL :: couplings(:) ! INTEGER :: n, p, dk, k, j, nc DOUBLE COMPLEX :: zc ! n = sp%nints p = sp%order-1 dk = kmax-kmin+1 ! ! Check that -N/2 .LE. Kmin .LE. Kmax .LT. N/2 ! IF(kmin.GT.kmax .OR. kmin.LT.-n/2 .OR. kmax.GE.n/2) THEN WRITE(*,'(a,2i6,a)') 'kmin, kmax =', kmin, kmax, ' erroneous!' STOP END IF ! ! The Fourier window ! sp%dft%n = n sp%dft%kmin = kmin sp%dft%kmax = kmax sp%dft%dk = dk sp%dft%dx = sp%knots(1) - sp%knots(0) ! ! Precalculate the DFT coefs exp( i(2*pi/N)jk ), k=kmin,kmax, j=0,p ! IF( ASSOCIATED(sp%dft%coefs) ) DEALLOCATE(sp%dft%coefs) ALLOCATE(sp%dft%coefs(kmin:kmax,0:p)) zc = EXP( CMPLX(0.0d0, 2.0d0*pi/REAL(n,8),8) ) sp%dft%coefs(:,0) = 1.0d0 ! j=0 sp%dft%coefs(kmin,1) = zc**kmin ! j=1 DO k=kmin+1,kmax sp%dft%coefs(k,1) = sp%dft%coefs(k-1,1)*zc END DO DO j=2,p sp%dft%coefs(:,j) = sp%dft%coefs(:,1)*sp%dft%coefs(:,j-1) END DO ! ! Mode couplings: by default use the whole window ! nc = dk IF( PRESENT(couplings)) nc = SIZE(couplings) ! IF(ASSOCIATED(sp%dft%mode_couplings)) DEALLOCATE(sp%dft%mode_couplings) ALLOCATE(sp%dft%mode_couplings(nc)) ! IF(PRESENT(couplings)) THEN sp%dft%mode_couplings = couplings ELSE sp%dft%mode_couplings = (/ (k,k=kmin,kmax) /) END IF END SUBROUTINE init_dft !=========================================================================== SUBROUTINE set_spline2d(p, ngauss, grid1, grid2, sp, period, nlppform,& & nlequid) ! ! Setup a 2d spline ! INTEGER, INTENT(in) :: p(2), ngauss(2) DOUBLE PRECISION, INTENT(in) :: grid1(:) DOUBLE PRECISION, INTENT(in) :: grid2(:) TYPE(spline2d), INTENT(out) :: sp LOGICAL, OPTIONAL, INTENT(in) :: period(2) LOGICAL, OPTIONAL, INTENT(in) :: nlppform LOGICAL, OPTIONAL, INTENT(in) :: nlequid(2) ! IF(PRESENT(period).AND.PRESENT(nlppform).AND.PRESENT(nlequid)) THEN CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1), nlppform, & & nlequid(1)) CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2), nlppform, & & nlequid(2)) ELSE IF(PRESENT(period).AND.PRESENT(nlppform)) THEN CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1), nlppform) CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2), nlppform) ELSE IF(PRESENT(period).AND.PRESENT(nlequid)) THEN CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period=period(1), nlequid=nlequid(1)) CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period=period(2), nlequid=nlequid(2)) ELSE IF(PRESENT(nlppform).AND.PRESENT(nlequid)) THEN CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlppform=nlppform, nlequid=nlequid(1)) CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlppform=nlppform, nlequid=nlequid(2)) ELSE IF(PRESENT(period)) THEN CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1)) CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2)) ELSE IF(PRESENT(nlppform)) THEN CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlppform=nlppform) CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlppform=nlppform) ELSE IF(PRESENT(nlequid)) THEN CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlequid=nlequid(1)) CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlequid=nlequid(2)) ELSE CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1) CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2) END IF END SUBROUTINE set_spline2d !=========================================================================== SUBROUTINE set_spline2d1d(p, ngauss, grid1, grid2, grid3, sp, period, & & nlppform, nlequid) ! ! Setup a 2d1d spline (for axisymmetric problems) ! INTEGER, INTENT(in) :: p(3), ngauss(3) DOUBLE PRECISION, INTENT(in) :: grid1(:) DOUBLE PRECISION, INTENT(in) :: grid2(:) DOUBLE PRECISION, INTENT(in) :: grid3(:) TYPE(spline2d1d), INTENT(out) :: sp LOGICAL, OPTIONAL, INTENT(in) :: period(3) LOGICAL, OPTIONAL, INTENT(in) :: nlppform LOGICAL, OPTIONAL, INTENT(in) :: nlequid(3) ! IF(PRESENT(period).AND.PRESENT(nlppform).AND.PRESENT(nlequid)) THEN CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2),& & nlppform, nlequid(1:2)) CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3), nlppform, & & nlequid(3)) ELSE IF(PRESENT(period).AND.PRESENT(nlppform)) THEN CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2),& & nlppform) CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3), nlppform) ELSE IF(PRESENT(period).AND.PRESENT(nlequid)) THEN CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period=period(1:2),& & nlequid=nlequid(1:2)) CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period=period(3), & & nlequid=nlequid(3)) ELSE IF(PRESENT(nlppform).AND.PRESENT(nlequid)) THEN CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlppform=nlppform,& & nlequid=nlequid(1:2)) CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlppform=nlppform, & & nlequid=nlequid(3)) ELSE IF(PRESENT(period)) THEN CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2)) CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3)) ELSE IF(PRESENT(nlppform)) THEN CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlppform=nlppform) CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlppform=nlppform) ELSE IF(PRESENT(nlequid)) THEN CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlequid=nlequid(1:2)) CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlequid=nlequid(3)) ELSE CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12) CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3) END IF END SUBROUTINE set_spline2d1d !=========================================================================== SUBROUTINE get_dim1(sp, dim, nx, nidbas) ! ! Return spline dimension of 1d spline sp and optionally ! number of knot intervals nx and spline degree nidbas ! TYPE(spline1d), INTENT(in) :: sp INTEGER, INTENT(out) :: dim INTEGER, OPTIONAL, INTENT(out) :: nx, nidbas dim = sp%dim IF( PRESENT(nx) ) nx = sp%nints IF( PRESENT(nidbas) ) nidbas = sp%order - 1 END SUBROUTINE get_dim1 !=========================================================================== SUBROUTINE get_dim2(sp, dim, nx, nidbas) ! ! Return spline dimension of 2d spline sp and optionally ! number of knot intervals nx and spline degree nidbas ! TYPE(spline2d), INTENT(in) :: sp INTEGER, INTENT(out) :: dim(2) INTEGER, OPTIONAL, INTENT(out) :: nx(2), nidbas(2) ! dim(1) = sp%sp1%dim IF( PRESENT(nx) ) nx(1) = sp%sp1%nints IF( PRESENT(nidbas) ) nidbas(1) = sp%sp1%order - 1 ! dim(2) = sp%sp2%dim IF( PRESENT(nx) ) nx(2) = sp%sp2%nints IF( PRESENT(nidbas) ) nidbas(2) = sp%sp2%order - 1 END SUBROUTINE get_dim2 !=========================================================================== SUBROUTINE get_gauss(sp, n, i, x, w) ! ! Get Gauss points and weights from spline sp ! TYPE(spline1d), INTENT(in) :: sp INTEGER, INTENT(out) :: n INTEGER, INTENT(in), OPTIONAL :: i DOUBLE PRECISION, DIMENSION(:), OPTIONAL, INTENT(out) :: x, w ! n = SIZE(sp%gausx, 1) IF( PRESENT(i) ) THEN x(:) = sp%gausx(:,i) w(:) = sp%gausw(:,i) END IF END SUBROUTINE get_gauss !=========================================================================== SUBROUTINE def_basfun0(xp, sp, fun, left) ! ! Define the basis function and its derivatives at x ! fun(i,j) = (j-1)th derivative of ith basis function. ! DOUBLE PRECISION, INTENT(in) :: xp TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(out) :: fun(:,:) INTEGER, OPTIONAL, INTENT(out) :: left DOUBLE PRECISION :: x INTEGER :: p, n, kleft INTEGER :: ierr, j, k ! CALL locintv(sp, xp, kleft) CALL basfun(xp, sp, fun, kleft+1) IF(PRESENT(left)) THEN left = kleft END IF END SUBROUTINE def_basfun0 !=========================================================================== SUBROUTINE basfun0(xp, sp, f, left) ! ! Define the basis function and its derivatives at x, in interval ! [left,left+1], using PPFORM defined by sp%val0., left=1,..,nints ! f(i,j) = jth derivative of ith basis function. ! DOUBLE PRECISION, INTENT(in) :: xp DOUBLE PRECISION, INTENT(out) :: f(:,0:) INTEGER, INTENT(in) :: left ! =1,2,...,nints TYPE(spline1d) :: sp ! INTEGER :: p, n, jdermx, i, jder DOUBLE PRECISION :: x, h ! p = sp%order - 1 n = sp%nints jdermx = SIZE(f,2)-1 ! h = xp-sp%knots(left-1) ! knots are numbered from 0 ! IF(sp%period .AND. sp%nlequid) THEN DO jder=0,jdermx ! Derivative jder CALL my_ppval(p, h, sp%valc, jder, f(:,jder)) END DO ELSE DO jder=0,jdermx ! Derivative jder CALL my_ppval(p, h, sp%val0(:,:,left), jder, f(:,jder)) END DO END IF CONTAINS SUBROUTINE my_ppval(p, x, ppform, jder, f) INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:) INTEGER, INTENT(in) :: jder DOUBLE PRECISION, INTENT(out) :: f(:) DOUBLE PRECISION :: fact INTEGER :: j SELECT CASE (jder) CASE(0) ! function value SELECT CASE(p) CASE(1) f(1) = ppform(1,1) + x*ppform(2,1) CASE(2) f(1) = ppform(1,1) + x*(ppform(2,1)+x*ppform(3,1)) f(2) = ppform(1,2) + x*(ppform(2,2)+x*ppform(3,2)) CASE(3) f(1) = ppform(1,1) + x*(ppform(2,1)+x*(ppform(3,1)+x*ppform(4,1))) f(2) = ppform(1,2) + x*(ppform(2,2)+x*(ppform(3,2)+x*ppform(4,2))) f(3) = ppform(1,3) + x*(ppform(2,3)+x*(ppform(3,3)+x*ppform(4,3))) CASE(4:) f(1:p) = ppform(p+1,1:p) DO j=p,1,-1 f(1:p) = f(1:p)*x + ppform(j,1:p) END DO END SELECT f(p+1) = 1.0d0 - SUM(f(1:p)) CASE(1) ! 1st derivative SELECT CASE(p) CASE(1) f(1) = ppform(2,1) CASE(2) f(1) = ppform(2,1) + x*2.d0*ppform(3,1) f(2) = ppform(2,2) + x*2.d0*ppform(3,2) CASE(3) f(1) = ppform(2,1) + x*(2.d0*ppform(3,1)+x*3.0d0*ppform(4,1)) f(2) = ppform(2,2) + x*(2.d0*ppform(3,2)+x*3.0d0*ppform(4,2)) f(3) = ppform(2,3) + x*(2.d0*ppform(3,3)+x*3.0d0*ppform(4,3)) CASE(4:) f(1:p) = p*ppform(p+1,1:p) DO j=p-1,1,-1 f(1:p) = f(1:p)*x + j*ppform(j+1,1:p) END DO END SELECT f(p+1) = -SUM(f(1:p)) CASE default ! 2nd and higher derivatives fact = p-jder f(1:p) = ppform(p+1,1:p) DO j=p,jder+1,-1 f(1:p) = f(1:p)/fact*j*x + ppform(j,1:p) fact = fact-1.0d0 END DO DO j=2,jder f(1:p) = f(1:p)*j END DO f(p+1) = -SUM(f(1:p)) END SELECT END SUBROUTINE my_ppval END SUBROUTINE basfun0 !=========================================================================== SUBROUTINE basfun1(xp, sp, f, left) ! ! Define the basis function and its derivatives at x, in interval i=1,2, ! using PPFORM defined by sp%val0. ! f(i,j,p) = jth derivative of ith basis function at coordinate xp ! DOUBLE precision, INTENT(in) :: xp(:) DOUBLE PRECISION, INTENT(out) :: f(0:,0:,:) INTEGER, INTENT(in) :: left(:) ! =1,2,...,nints TYPE(spline1d) :: sp ! INTEGER :: p, n, kleft, i, j, jder, ierr INTEGER :: npt, jdermx DOUBLE PRECISION :: h(SIZE(xp)), temp(SIZE(xp)) DOUBLE PRECISION :: ppform(SIZE(xp),sp%order) ! p = sp%order - 1 n = sp%nints npt = SIZE(xp) jdermx = SIZE(f,2)-1 ! h = xp - sp%knots(left-1) ! knots are numbered from 0 ! IF( sp%period .AND. sp%nlequid) THEN DO jder=0,jdermx CALL my_ppval_same(p, h, sp%valc, jder, f(:,jder,1:npt)) END DO ELSE DO i=0,p ! Spline i DO j=1,npt ppform(j,:) = sp%val0(:,i+1,left(j)) END DO DO jder=0,jdermx ! Derivative jder CALL my_ppval(p, h, ppform, jder, temp) f(i,jder,1:npt) = temp END DO END DO END IF CONTAINS !+++ SUBROUTINE my_ppval(p, x, ppform, jder, f) ! ! Compute function and derivatives from the PP representation ! for many points x(:) INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION, INTENT(in) :: ppform(:,:) INTEGER, INTENT(in) :: jder DOUBLE PRECISION, INTENT(out) :: f(:) DOUBLE PRECISION :: fact INTEGER :: j SELECT CASE (jder) CASE(0) ! function value SELECT CASE(p) CASE(1) f(:) = ppform(:,1) + x(:)*ppform(:,2) CASE(2) f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3)) CASE(3) f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4))) CASE(4:) f(:) = ppform(:,p+1) DO j=p,1,-1 f(:) = ppform(:,j) + f(:)*x(:) END DO END SELECT CASE(1) ! 1st derivative SELECT CASE(p) CASE(1) f(:) = ppform(:,2) CASE(2) f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3) CASE(3) f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4)) CASE(4:) f(:) = p*ppform(:,p+1) DO j=p-1,1,-1 f(:) = f(:)*x(:) + j*ppform(:,j+1) END DO END SELECT CASE default ! 2nd and higher derivatives f(:) = ppform(:,p+1) fact = p-jder DO j=p,jder+1,-1 f(:) = f(:)/fact*j*x(:) + ppform(:,j) fact = fact-1.0d0 END DO DO j=2,jder f(:) = f(:)*j END DO END SELECT END SUBROUTINE my_ppval !+++ SUBROUTINE my_ppval_same(p, x, ppform, jder, f) ! ! Compute function and derivatives from the PP representation ! for many points x(:), same ppform (translationnal invariant spline) INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION, INTENT(in) :: ppform(:,:) INTEGER, INTENT(in) :: jder DOUBLE PRECISION, INTENT(out) :: f(:,:) DOUBLE PRECISION :: fact INTEGER :: j,k SELECT CASE (jder) ! ! function value CASE(0) SELECT CASE(p) CASE(1) f(1,:) = ppform(1,1) + x(:)*ppform(2,1) CASE(2) f(1,:) = ppform(1,1) + x(:)*(ppform(2,1)+x(:)*ppform(3,1)) f(2,:) = ppform(1,2) + x(:)*(ppform(2,2)+x(:)*ppform(3,2)) CASE(3) f(1,:) = ppform(1,1) + x(:)*(ppform(2,1)+x(:)*(ppform(3,1)+x(:)*ppform(4,1))) f(2,:) = ppform(1,2) + x(:)*(ppform(2,2)+x(:)*(ppform(3,2)+x(:)*ppform(4,2))) f(3,:) = ppform(1,3) + x(:)*(ppform(2,3)+x(:)*(ppform(3,3)+x(:)*ppform(4,3))) CASE(4:) DO k=1,p f(k,:) = ppform(p+1,k) DO j=p,1,-1 f(k,:) = ppform(j,k) + f(k,:)*x(:) END DO END DO END SELECT f(p+1,:) = 1.0d0 - SUM(f(1:p,:),DIM=1) ! ! 1st derivative CASE(1) SELECT CASE(p) CASE(1) f(1,:) = ppform(2,1) CASE(2) f(1,:) = ppform(2,1) + x(:)*2.d0*ppform(3,1) f(2,:) = ppform(2,2) + x(:)*2.d0*ppform(3,2) CASE(3) f(1,:) = ppform(2,1) + x(:)*(2.d0*ppform(3,1)+x(:)*3.0d0*ppform(4,1)) f(2,:) = ppform(2,2) + x(:)*(2.d0*ppform(3,2)+x(:)*3.0d0*ppform(4,2)) f(3,:) = ppform(2,3) + x(:)*(2.d0*ppform(3,3)+x(:)*3.0d0*ppform(4,3)) CASE(4:) DO k=1,p f(k,:) = p*ppform(p+1,k) DO j=p-1,1,-1 f(k,:) = f(k,:)*x(:) + j*ppform(j+1,k) END DO END DO END SELECT f(p+1,:) = -SUM(f(1:p,:),DIM=1) ! ! 2nd and higher derivatives CASE(2:) DO k=1,p f(k,:) = ppform(p+1,k) fact = p-jder DO j=p,jder+1,-1 f(k,:) = f(k,:)/fact*j*x(:) + ppform(j,k) fact = fact-1.0d0 END DO DO j=2,jder f(k,:) = f(k,:)*j END DO END DO f(p+1,:) = -SUM(f(1:p,:),DIM=1) END SELECT END SUBROUTINE my_ppval_same !+++ END SUBROUTINE basfun1 !=========================================================================== SUBROUTINE def_basfun1(xp, sp, fun, left) ! ! Define the basis function and its derivatives at x ! fun(i,j) = (j-1)th derivative of ith basis function. ! DOUBLE PRECISION, INTENT(in) :: xp(:) TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(out) :: fun(:,:,:) INTEGER, OPTIONAL, INTENT(out) :: left(:) DOUBLE PRECISION :: x(SIZE(xp)) INTEGER :: kleft(SIZE(xp)) INTEGER :: p, n INTEGER :: ierr, j, k ! CALL locintv(sp, xp, kleft) CALL basfun(xp, sp, fun, kleft+1) IF(PRESENT(left)) THEN left = kleft END IF END SUBROUTINE def_basfun1 !=========================================================================== SUBROUTINE ft_basfun0(xp, sp, ft_f, left) ! ! DFT of basis functions: ft_f(k,j), k=sp%dft%kmin, sp$dft%kmax (modes) ! j=0, p-1 (order of derivative)) ! DOUBLE PRECISION, INTENT(in) :: xp DOUBLE COMPLEX, INTENT(out) :: ft_f(:,:) INTEGER, INTENT(in) :: left TYPE(spline1d) :: sp DOUBLE PRECISION :: f(sp%order,SIZE(ft_f,2)) ! ! Construct all splines on interval [left,left+1] at coordinate xp CALL basfun(xp, sp, f, left) ! ! DFT of splines ft_f = MATMUL(sp%dft%coefs, f) END SUBROUTINE ft_basfun0 !=========================================================================== SUBROUTINE ft_basfun1(xp, sp, ft_f, left) ! ! DFT of basis functions: ft_f(k,j), k=sp%dft%kmin, sp$dft%kmax (modes) ! j=0, p-1 (order of derivative)) ! at xp(i) ! DOUBLE PRECISION, INTENT(in) :: xp(:) DOUBLE COMPLEX, INTENT(out) :: ft_f(:,:,:) INTEGER, INTENT(in) :: left(:) TYPE(spline1d) :: sp ! INTEGER :: i, n3 DOUBLE PRECISION :: f(sp%order,SIZE(ft_f,2),SIZE(ft_f,3)) ! ! Construct all splines on interval [left,left+1] at coordinate xp CALL basfun(xp, sp, f, left) ! ! DFT of splines n3 = SIZE(xp) DO i=1,n3 ft_f(:,:,i) = MATMUL(sp%dft%coefs, f(:,:,i)) END DO END SUBROUTINE ft_basfun1 !=========================================================================== SUBROUTINE basfun_recur(xp, sp, fun, left) ! ! Define the basis function and its derivatives at x, in interval i=1,2, ! using recurrence construct in function BVALUE ! fun(i,j) = (j-1)th derivative of ith basis function. ! DOUBLE PRECISION, INTENT(in) :: xp TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(out) :: fun(:,:) INTEGER, INTENT(in) :: left DOUBLE PRECISION :: bcoef(1)=1.0d0, bvalue, x INTEGER :: p, n, kleft INTEGER :: ierr, j, k ! p = sp%order - 1 n = sp%nints fun = 0.0d0 ! IF( sp%period ) THEN ! ** Applly periodicity ** x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod) ELSE x = xp END IF ! kleft = left-1 DO j=kleft-p, kleft DO k=0, SIZE(fun,2)-1 fun(j-kleft+p+1, k+1) = bvalue(sp%knots(j), bcoef, 1, p+1, x, k) END DO END DO END SUBROUTINE basfun_recur !=========================================================================== SUBROUTINE gauleg(x1,x2,x,w,n) ! ! Compute Gauss-Legendre abscissas and weights in interval [x1, x2] ! INTEGER, INTENT(in) :: n DOUBLE PRECISION, INTENT(in) :: x1,x2 DOUBLE PRECISION, INTENT(out) :: x(n),w(n) DOUBLE PRECISION :: EPS INTEGER i,j,m DOUBLE PRECISION p1,p2,p3,pp,xl,xm,z,z1 ! eps=EPSILON(eps) m=(n+1)/2 xm=0.5d0*(x2+x1) xl=0.5d0*(x2-x1) DO i=1,m z=COS(3.141592654d0*(i-.25d0)/(n+.5d0)) DO p1=1.d0; p2=0.d0 DO j=1,n p3=p2; p2=p1 p1=((2.d0*j-1.d0)*z*p2-(j-1.d0)*p3)/j END DO pp=n*(z*p1-p2)/(z*z-1.d0) z1=z z=z1-p1/pp IF( ABS(z-z1) .LE. EPS ) EXIT END DO x(i)=xm-xl*z x(n+1-i)=xm+xl*z w(i)=2.d0*xl/((1.d0-z*z)*pp*pp) w(n+1-i)=w(i) END DO END SUBROUTINE gauleg !=========================================================================== SUBROUTINE gridval1dz(sp, xp, f, jder, c, ppformz) ! ! Compute values or jder-th dervivative of f(x) from ppform ! of spline sp. Recompute the ppform if the optional spline ! coefficients are given. ! TYPE(spline1d) :: sp DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: f INTEGER, INTENT(in) :: jder DOUBLE COMPLEX, DIMENSION(:), OPTIONAL, INTENT(in) :: c DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL :: ppformz DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION :: x(SIZE(xp)), h, fact INTEGER :: order, nints, i, j, nidbas INTEGER :: leftx(SIZE(xp)) ! order = sp%order nints = sp%nints nidbas = order-1 ! ! Compute PPFORM/BCOEFS if spline coefs are passed ! IF (PRESENT(c)) THEN IF (sp%nlppform) THEN IF( PRESENT(ppformz) ) THEN CALL topp0z(sp, c, ppformz) ELSE IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz) ALLOCATE(sp%ppformz(order,nints)) CALL topp0z(sp, c, sp%ppformz) END IF ELSE IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) ALLOCATE(sp%bcoefsc(SIZE(c))) sp%bcoefsc = c END IF END IF ! ! Applly periodicity if required ! IF( sp%period ) THEN x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod) ELSE x = xp END IF ! ! Locate the intervals containing x ! CALL locintv(sp, x, leftx) ! ! Compute function/derivatives ! IF( sp%nlppform ) THEN ! using PP form DO i=1,SIZE(x) IF( PRESENT(ppformz) ) THEN CALL ppval(sp, x(i), ppformz(:,leftx(i)+1), leftx(i), jder, f(i)) ELSE CALL ppval(sp, x(i), sp%ppformz(:,leftx(i)+1), leftx(i), jder, f(i)) END IF END DO ELSE ! using spline expansion ALLOCATE(fun(0:nidbas,0:jder)) f = 0.0d0 DO i=1,SIZE(x) CALL basfun(x(i), sp, fun, leftx(i)+1) DO j=0,nidbas f(i) = f(i) + sp%bcoefsc(leftx(i)+j+1)*fun(j,jder) END DO END DO DEALLOCATE(fun) END IF ! END SUBROUTINE gridval1dz !=========================================================================== SUBROUTINE gridval1d(sp, xp, f, jder, c, ppform) ! ! Compute values or jder-th dervivative of f(x) from ppform ! of spline sp. Recompute the ppform if the optional spline ! coefficients are given. ! TYPE(spline1d) :: sp DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: f INTEGER, INTENT(in) :: jder DOUBLE PRECISION, DIMENSION(:), OPTIONAL, INTENT(in) :: c DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL :: ppform DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) DOUBLE PRECISION :: x(SIZE(xp)), h, fact INTEGER :: order, nints, i, j, nidbas INTEGER :: leftx(SIZE(xp)) ! order = sp%order nints = sp%nints nidbas = order-1 ! ! Compute PPFORM/BCOEFS if spline coefs are passed ! IF (PRESENT(c)) THEN IF (sp%nlppform) THEN IF( PRESENT(ppform) ) THEN CALL topp0(sp, c, ppform) ELSE IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) ALLOCATE(sp%ppform(order,nints)) CALL topp0(sp, c, sp%ppform) END IF ELSE IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) ALLOCATE(sp%bcoefs(SIZE(c))) sp%bcoefs = c END IF END IF ! ! Applly periodicity if required ! IF( sp%period ) THEN x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod) ELSE x = xp END IF ! ! Locate the intervals containing x ! CALL locintv(sp, x, leftx) ! ! Compute function/derivatives ! IF( sp%nlppform ) THEN ! using PP form DO i=1,SIZE(x) IF( PRESENT(ppform) ) THEN CALL ppval(sp, x(i), ppform(:,leftx(i)+1), leftx(i), jder, f(i)) ELSE CALL ppval(sp, x(i), sp%ppform(:,leftx(i)+1), leftx(i), jder, f(i)) END IF END DO ELSE ! using spline expansion ALLOCATE(fun(0:nidbas,0:jder)) f = 0.0d0 DO i=1,SIZE(x) CALL basfun(x(i), sp, fun, leftx(i)+1) DO j=0,nidbas f(i) = f(i) + sp%bcoefs(leftx(i)+j+1)*fun(j,jder) END DO END DO DEALLOCATE(fun) END IF ! END SUBROUTINE gridval1d !=========================================================================== SUBROUTINE def_knots(p, xg, knots, period, nlskip) ! ! Define spline knots for interpolating at sites given by xg ! INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: xg(0:) DOUBLE PRECISION, POINTER :: knots(:) LOGICAL, OPTIONAL, INTENT(in) :: period, nlskip LOGICAL :: kperiod, mlskip INTEGER :: npt, dim, nx, i, ii ! kperiod=.FALSE. mlskip = .TRUE. IF( PRESENT(period) ) kperiod=period IF( PRESENT(nlskip) ) mlskip = nlskip ! ! Periodic splines ! IF( kperiod ) THEN nx = SIZE(xg) -1 IF( ASSOCIATED(knots) ) DEALLOCATE(knots) ALLOCATE(knots(0:nx)) IF( MODULO(p,2) .NE. 0 ) THEN ! Odd degree knots(0:nx) = xg(0:nx) ELSE ! Even degree DO i=1,nx knots(i) = 0.5d0*(xg(i-1)+xg(i)) END DO knots(0) = knots(nx) - (xg(nx)-xg(0)) END IF RETURN END IF ! ! Non-periodic splines ! npt = SIZE(xg) dim = npt IF( .NOT. mlskip ) THEN dim = dim + 2*(p/2) ! Add BC on derivatives END IF nx = dim-p IF( ASSOCIATED(knots) ) DEALLOCATE(knots) ALLOCATE(knots(0:nx)) ! knots(0) = xg(0) knots(nx) = xg(npt-1) ! IF( MODULO(p,2) .EQ. 0 ) THEN ii = 0 IF( mlskip ) ii = p/2 ! skip first p/2 intervals DO i=1,nx-1 ii = ii+1 knots(i) = (xg(ii)+xg(ii-1))/2 END DO ELSE ii = 0 IF( mlskip ) ii = (p-1)/2 ! skip (p-1)/2 points after the first point ii=0 DO i=1,nx-1 ii = ii+1 knots(i) = xg(ii) END DO END IF ! END SUBROUTINE def_knots !=========================================================================== SUBROUTINE allsplines(sp, xpt, splines) ! ! Return all splines defined on points xpt ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: xpt(:) DOUBLE PRECISION, POINTER :: splines(:,:) DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) INTEGER :: i, n, left, dim, p ! p = sp%order - 1 dim = sp%dim n = SIZE(xpt) ! IF( ASSOCIATED(splines) ) DEALLOCATE(splines) ALLOCATE(splines(n,dim), fun(p+1,1)) splines = 0.0d0 DO i=1,n CALL locintv(sp, xpt(i), left) CALL basfun(xpt(i), sp, fun, left+1) splines(i,left+1:left+p+1) = fun(1:p+1,1) END DO DEALLOCATE(fun) END SUBROUTINE allsplines !=========================================================================== SUBROUTINE set_splcoef1d(p, x, sp, period, ibc) ! ! Setup 1d interpolation matrix for spline of degree p ! INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x(:) TYPE(spline1d), INTENT(out) :: sp LOGICAL, OPTIONAL, INTENT(in) :: period INTEGER, OPTIONAL :: ibc(:,:) ! LOGICAL :: kperiod ! kperiod = .FALSE. IF( PRESENT(period) ) kperiod = period ! IF( kperiod ) THEN CALL splcoefp_setup(p, x, sp) ELSE IF( PRESENT(ibc) ) THEN CALL splcoef_setup(p, x, sp, ibc) ELSE CALL splcoef_setup(p, x, sp) END IF END IF END SUBROUTINE set_splcoef1d !=========================================================================== SUBROUTINE set_splcoef2d(p, x1, x2, sp, period, ibc1, ibc2) ! ! Setup 2d interpolation matrix for spline of degree p ! INTEGER, INTENT(in) :: p(2) DOUBLE PRECISION, INTENT(in) :: x1(:), x2(:) TYPE(spline2d), INTENT(out) :: sp LOGICAL, OPTIONAL, INTENT(in) :: period(2) INTEGER, OPTIONAL :: ibc1(:,:),ibc2(:,:) ! LOGICAL :: kperiod(2) ! kperiod = .FALSE. IF( PRESENT(period) ) kperiod = period ! ! Direction 1 IF( kperiod(1) ) THEN CALL splcoefp_setup(p(1), x1, sp%sp1) ELSE IF( PRESENT(ibc1) ) THEN CALL splcoef_setup(p(1), x1, sp%sp1, ibc1) ELSE CALL splcoef_setup(p(1), x1, sp%sp1) END IF END IF ! ! Direction 2 IF( kperiod(2) ) THEN CALL splcoefp_setup(p(2), x2, sp%sp2) ELSE IF( PRESENT(ibc2) ) THEN CALL splcoef_setup(p(2), x2, sp%sp2, ibc2) ELSE CALL splcoef_setup(p(2), x2, sp%sp2) END IF END IF END SUBROUTINE set_splcoef2d !=========================================================================== SUBROUTINE get_splcoef1(sp, f, c, fbc) ! ! Compute the spline coefficients c from grid values f ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: f(:) DOUBLE PRECISION, INTENT(out) :: c(:) DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:) ! IF( sp%period ) THEN CALL splcoefp1(sp, f, c) ELSE IF( PRESENT(fbc) ) THEN CALL splcoef1(sp, f, c, fbc) ELSE CALL splcoef1(sp, f, c) END IF END IF END SUBROUTINE get_splcoef1 !=========================================================================== SUBROUTINE get_splcoef2d(sp, f, c, fbc1, fbc2) ! ! Compute the spline coefficients c from 2d grid values f ! TYPE(spline2d) :: sp DOUBLE PRECISION, INTENT(in) :: f(:,:) DOUBLE PRECISION, INTENT(out) :: c(:,:) DOUBLE PRECISION :: ctr(SIZE(c,2), SIZE(f,1)) DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc1(:,:,:), fbc2(:,:,:) DOUBLE PRECISION, DIMENSION(:, :) , ALLOCATABLE :: c_fbc1_left, c_fbc1_right DOUBLE PRECISION, DIMENSION(:, :, :), ALLOCATABLE :: c_fbc1_all ! ! Along direction 2 ! IF( PRESENT(fbc2) ) THEN CALL get_splcoefn(sp%sp2, TRANSPOSE(f), ctr, fbc2) ELSE CALL get_splcoefn(sp%sp2, TRANSPOSE(f), ctr) END IF ! ! Along direction 1 ! IF( PRESENT(fbc1) ) THEN ALLOCATE( c_fbc1_left(SIZE(c, 2), SIZE(fbc1, 2))) ALLOCATE(c_fbc1_right(SIZE(c, 2), SIZE(fbc1, 2))) ALLOCATE(c_fbc1_all(2, SIZE(fbc1, 2), SIZE(c, 2))) CALL get_splcoefn(sp%sp2, TRANSPOSE(fbc1(1, :, :)), c_fbc1_left ) CALL get_splcoefn(sp%sp2, TRANSPOSE(fbc1(2, :, :)), c_fbc1_right) c_fbc1_all(1, :, :) = TRANSPOSE(c_fbc1_left ) c_fbc1_all(2, :, :) = TRANSPOSE(c_fbc1_right) CALL get_splcoefn(sp%sp1, TRANSPOSE(ctr), c, c_fbc1_all) DEALLOCATE(c_fbc1_left, c_fbc1_right, c_fbc1_all) ELSE CALL get_splcoefn(sp%sp1, TRANSPOSE(ctr), c) END IF ! END SUBROUTINE get_splcoef2d !=========================================================================== SUBROUTINE get_splcoef2dz(sp, f, c) ! ! Compute the spline coefficients c from 2d grid values f ! TYPE(spline2d) :: sp DOUBLE COMPLEX, INTENT(in) :: f(:,:) DOUBLE COMPLEX, INTENT(out) :: c(:,:) DOUBLE PRECISION, DIMENSION(SIZE(c,1), SIZE(c,2),2) :: pc ! CALL get_splcoef2d(sp, REAL(f), pc(:,:,1)) CALL get_splcoef2d(sp, AIMAG(f), pc(:,:,2)) c(:,:) = CMPLX(pc(:,:,1),pc(:,:,2)) END SUBROUTINE get_splcoef2dz !=========================================================================== SUBROUTINE get_splcoef1z(sp, f, c, fbc) ! ! Compute the spline coefficients c from grid values f ! TYPE(spline1d) :: sp DOUBLE COMPLEX, INTENT(in) :: f(:) DOUBLE COMPLEX, INTENT(out) :: c(:) DOUBLE COMPLEX, INTENT(in), OPTIONAL :: fbc(:,:) DOUBLE PRECISION :: pf(SIZE(f),2), pc(SIZE(c),2) DOUBLE PRECISION, ALLOCATABLE :: pfbc(:,:,:) ! pf(:,1) = REAL(f(:)) pf(:,2) = AIMAG(f(:)) IF(PRESENT(fbc)) THEN ALLOCATE(pfbc(SIZE(fbc,1),SIZE(fbc,2),2)) pfbc(:,:,1) = REAL(fbc(:,:)) pfbc(:,:,2) = AIMAG(fbc(:,:)) CALL get_splcoefn(sp, pf, pc, pfbc) DEALLOCATE(pfbc) ELSE CALL get_splcoefn(sp, pf, pc) END IF c(:) = CMPLX(pc(:,1), pc(:,2)) ! END SUBROUTINE get_splcoef1z !=========================================================================== SUBROUTINE get_splcoefn(sp, f, c, fbc) ! ! Compute the spline coefficients c from grid values f ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: f(:,:) DOUBLE PRECISION, INTENT(out) :: c(:,:) DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:,:) ! IF( sp%period ) THEN CALL splcoefpn(sp, f, c) ELSE IF( PRESENT(fbc) ) THEN CALL splcoefn(sp, f, c, fbc) ELSE CALL splcoefn(sp, f, c) END IF END IF END SUBROUTINE get_splcoefn !=========================================================================== SUBROUTINE splcoef_setup(p, x, sp, ibc) ! ! Setup the interpolation matrix ! for spline of degree p ! INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x(:) TYPE(spline1d), INTENT(out) :: sp INTEGER, OPTIONAL :: ibc(:,:) DOUBLE PRECISION, POINTER :: knots(:)=>NULL(), arow(:)=>NULL(), & & fun(:,:)=>NULL() INTEGER :: nx, dim, kl, ku, rank INTEGER :: i, left, ishift LOGICAL :: nlskip ! ! Type of Boundary Conditions nlskip = .TRUE. ishift = 0 IF( PRESENT(ibc) ) THEN nlskip = .FALSE. ishift = p/2 END IF ! ! Set up spline nx = SIZE(x) - 1 ! X is the interpolation sites CALL def_knots(p, x, knots, nlskip=nlskip) CALL set_spline(p, 0, knots, sp) sp%nsites = nx + 1 ! Store away the number of interpolation sites DEALLOCATE(knots) ! ! Set up interpolation matrix dim = sp%dim kl = MAX(p-1,0) ku = MAX(p-1,0) rank = dim CALL init(kl, ku, rank, 0, sp%mat) !!$ WRITE(*,'(a,3i6)') 'Interpolation matrix:, kl, ku, rank ', kl, ku, rank ! ! COMPUTE matrix row by row ALLOCATE(arow(dim), fun(p+1,0:p)) DO i=1,SIZE(x) arow = 0.0d0 CALL locintv(sp, x(i), left) CALL basfun(x(i), sp, fun(:,0:0), left+1) arow(left+1:left+p+1) = fun(1:p+1,0) CALL putrow(sp%mat, i+ishift, arow) !!$ WRITE(*,'(i5,13f8.3)') i+ishift, arow END DO ! ! Add BC if specified IF( PRESENT(ibc) ) THEN CALL locintv(sp, x(1), left) CALL basfun(x(1), sp, fun, left+1) ! BC at the left side DO i=1,p/2 arow = 0.0d0 arow(left+1:left+p+1) = fun(1:p+1,ibc(1,i)) CALL putrow(sp%mat, i, arow) !!$ WRITE(*,'(i5,13f8.3)') i, arow END DO CALL locintv(sp, x(SIZE(x)), left) CALL basfun(x(SIZE(x)), sp, fun, left+1) ! BC at the right side DO i=1,p/2 arow = 0.0d0 arow(left+1:left+p+1) = fun(1:p+1,ibc(2,i)) CALL putrow(sp%mat, dim-i+1, arow) !!$ WRITE(*,'(i5,13f8.3)') dim-i+1, arow END DO END IF DEALLOCATE(arow, fun) ! ! Factor the matrix CALL factor(sp%mat) ! END SUBROUTINE splcoef_setup !=========================================================================== SUBROUTINE splcoefp_setup(p, x, sp) ! ! Set up the interpolation matrix ! for periodic case ! USE matrix INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x(:) TYPE(spline1d), INTENT(out) :: sp ! TYPE(gemat) :: hmat DOUBLE PRECISION, POINTER :: knots(:)=>NULL(), arow(:)=>NULL(), & & fun(:,:)=>NULL() !!$ DOUBLE PRECISION, POINTER :: arr2d(:,:)=>null() INTEGER :: nx, kl, ku, rank, mr, nc INTEGER :: i, left, j, jj DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0 !________________________________________________________________________________ ! ! Set up spline ! nx = SIZE(x) - 1 ! X is the interpolation sites CALL def_knots(p, x, knots, period=.TRUE.) CALL set_spline(p, 0, knots, sp, period=.TRUE.) sp%nsites = nx + 1 ! Store away the number of interpolation sites DEALLOCATE(knots) !________________________________________________________________________________ ! ! Set up interpolation matrix sp%matp ! kl = MAX(p/2,0) ku = kl rank = nx !!$ WRITE(*,'(a,3i6)') 'Interpolation matrix:, kl, ku, rank ', kl, ku, rank ! CALL init(kl, ku, rank, 0, sp%matp%mat) ! matp%mat is a GB matrix ALLOCATE(sp%matp%matu(rank, kl+ku), sp%matp%matvt(kl+ku,rank)) ! sp%matp%matu = zero sp%matp%matvt = zero ! kl = ku = 2 DO j=1,kl ! [ 1 0 0 . . . . ] sp%matp%matu(j,j) = one ! [ 0 1 0 . . . . ] END DO ! [ . 0 . . . . . ] DO j=1,ku ! [ . . . . . 0 . ] i=rank-ku+j ! [ . . . . 0 1 0 ] sp%matp%matu(i,kl+j) = one ! [ . . . . 0 0 1 ] END DO !________________________________________________________________________________ ! ! COMPUTE matrix row by row ! ALLOCATE(arow(rank), fun(p+1,1)) DO i=1,rank arow = zero CALL locintv(sp, x(i), left) CALL basfun(x(i), sp, fun, left+1) left = left-p/2 DO j=0,p jj = MODULO(left+j, rank) + 1 arow(jj) = fun(j+1,1) END DO CALL putrow(sp%matp%mat, i, arow) IF( i .LE. kl ) THEN sp%matp%matvt(i,rank-kl+1:rank) = arow(rank-kl+1:rank) ELSE IF ( i .GE. rank-ku+1 ) THEN j = i-(rank-ku+1) + 1 sp%matp%matvt(kl+j,1:ku) = arow(1:ku) END IF !!$ WRITE(8, '(i5, 12(1pe12.3))') left, x(i), fun !!$ WRITE(*,'(i5, 12(1pe12.3))') i, arow END DO DEALLOCATE(arow, fun) ! !!$ PRINT*, 'Matrix U, V' !!$ DO i=1,rank !!$ WRITE(*, '(i5,12(1pe12.3))') i, sp%matp%matu(i,:), sp%matp%matvt(:,i) !!$ END DO !!$ ALLOCATE(arr2d(rank,rank)) !!$ arr2d = MATMUL(sp%matp%matu, sp%matp%matvt) !!$ PRINT*, 'Product U*V^T' !!$ DO i=1,rank !!$ WRITE(*, '(i5,12(1pe12.3))') i, arr2d(i,:) !!$ END DO !!$ DEALLOCATE(arr2d) !________________________________________________________________________________ ! ! Factorisation ! ! Factor A CALL factor(sp%matp%mat) ! ! For constant and linear splines, A is diagnonal! ! Should skip the rest ! IF( kl.EQ.0 .OR. ku.EQ.0 ) THEN RETURN END IF ! ! U <-- A^(-1) * U CALL bsolve(sp%matp%mat, sp%matp%matu) ! ! H <-- 1 + V^T * U mr = SIZE(sp%matp%matvt, 1) nc = SIZE(sp%matp%matvt, 2) CALL init(mr, 0, hmat) ! hmat is initialized to 0! DO i=1,mr hmat%val(i,i) = one END DO CALL dgemm('N', 'N', mr, mr, nc, one, sp%matp%matvt, mr, & & sp%matp%matu, nc, one, hmat%val, mr) ! !!$ hmat%val = MATMUL(sp%matp%matvt, sp%matp%matu) !!$ DO i=1,kl+ku !!$ hmat%val(i,i) = 1.0d0 + hmat%val(i,i) !!$ END DO ! ! V^T <-- H^(-1) V^T CALL factor(hmat) CALL bsolve(hmat, sp%matp%matvt) CALL destroy(hmat) ! END SUBROUTINE splcoefp_setup !=========================================================================== SUBROUTINE splcoef1(sp, f, c, fbc) ! ! Compute the spline coefficients c from grid values f and BC fbc ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: f(:) DOUBLE PRECISION, INTENT(out) :: c(:) DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:) INTEGER :: p, dim, i, ishift ! p = sp%order-1 dim = sp%dim ! ! BC at left and right boundary ishift = 0 IF( PRESENT(fbc) ) THEN DO i=1,p/2 c(i) = fbc(1,i) ! Left boundary c(dim-i+1) = fbc(2,i) ! Right boundary END DO ishift = p/2 END IF ! ! Interior points DO i=1,sp%nsites c(i+ishift) = f(i) END DO ! WRITE(*,'(a/(13f8.3))') 'RHS', c ! ! Solve for the interpolation coefs. using the factored sp%mat CALL bsolve(sp%mat, c) ! END SUBROUTINE splcoef1 !=========================================================================== SUBROUTINE splcoefn(sp, f, c, fbc) ! ! Compute the spline coefficients c from grid values f and BC fbc ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: f(:,:) DOUBLE PRECISION, INTENT(out) :: c(:,:) DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:,:) INTEGER :: p, dim, i, ishift ! p = sp%order-1 dim = sp%dim ! ! BC at left and right boundary ishift = 0 IF( PRESENT(fbc) ) THEN DO i=1,p/2 c(i,:) = fbc(1,i,:) ! Left boundary c(dim-i+1,:) = fbc(2,i,:) ! Right boundary END DO ishift = p/2 END IF ! ! Interior points ! c(:,j) for j>SIZE(f,2) could be anything DO i=1,sp%nsites ! (periodicity in the 2nd dimension)! c(i+ishift,1:SIZE(f,2)) = f(i,:) END DO !!$ WRITE(*,'(a/(13f8.3))') 'RHS', c ! ! Solve for the interpolation coefs. using the factored sp%mat CALL bsolve(sp%mat, c) ! END SUBROUTINE splcoefn !=========================================================================== SUBROUTINE splcoefp1(sp, f, c) ! ! Compute the spline coefficient c from grid values f ! f(x) is periodic ! USE matrix ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: f(:) DOUBLE PRECISION, INTENT(out) :: c(:) ! DOUBLE PRECISION, POINTER :: arow(:), brow(:) DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0, minus1=-1.0d0 INTEGER :: dim, p, rank, bandw INTEGER :: i, j !________________________________________________________________________________ ! p = sp%order-1 rank = sp%nints bandw = SIZE(sp%matp%matvt,1) ! ! Solve the interpolation system ! ! Solve Ay = f ALLOCATE(arow(rank), brow(bandw)) ! arow(1:rank) = f(1:rank) CALL bsolve(sp%matp%mat, arow) ! ! For constant and linear splines, A is diagnonal! ! Should skip the rest ! IF( p.LE.1 ) GOTO 100 ! ! ! t = V^T*y CALL dgemv('N', bandw, rank, one, sp%matp%matvt, bandw, arow, 1, zero, & & brow, 1) ! ! y = y - Ut CALL dgemv('N', rank, bandw, minus1, sp%matp%matu, rank, brow, 1, one, & & arow, 1) ! 100 CONTINUE ! ! Interpolation coefficients dim = sp%dim DO i=1,dim j = MODULO(i-1-p/2, rank) + 1 c(i) = arow(j) END DO ! DEALLOCATE(arow,brow) ! END SUBROUTINE splcoefp1 !=========================================================================== SUBROUTINE splcoefpn(sp, f, c) ! ! Compute the spline coefficient c from grid values f ! f(x) is periodic ! USE matrix ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: f(:,:) DOUBLE PRECISION, INTENT(out) :: c(:,:) ! DOUBLE PRECISION, POINTER :: arow(:,:), brow(:,:) DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0, minus1=-1.0d0 INTEGER :: p, dim, rank, bandw, nrhs INTEGER :: i, j, k !________________________________________________________________________________ ! p = sp%order-1 rank = sp%nints bandw = SIZE(sp%matp%matvt,1) nrhs = SIZE(f,2) ! ! ! Solve the interpolation system ! ! Solve Ay = f ALLOCATE(arow(rank,nrhs), brow(bandw,nrhs)) ! arow(1:rank,1:nrhs) = f(1:rank,1:nrhs) CALL bsolve(sp%matp%mat, arow) ! ! For constant and linear splines, A is diagnonal! ! Should skip the rest ! IF( p.LE.1 ) GOTO 100 ! ! ! t = V^T*y CALL dgemm('N', 'N', bandw, nrhs, rank, one, sp%matp%matvt, bandw, arow, & & rank, zero, brow, bandw) ! ! y = y - Ut CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, sp%matp%matu, rank, brow, & & bandw, one, arow, rank) ! 100 CONTINUE ! ! Interpolation coefficients dim = sp%dim DO k=1,nrhs DO i=1,dim j = MODULO(i-1-p/2, rank) + 1 c(i,k) = arow(j,k) END DO END DO ! DEALLOCATE(arow,brow) ! END SUBROUTINE splcoefpn ! !=========================================================================== SUBROUTINE topp0(sp, c, ppform) ! ! Compute PPFORM of a fuction defined by the spline SP ! and spline coefficients C(1:d) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: c(:) DOUBLE PRECISION, INTENT(out) :: ppform(0:,:) INTEGER :: p, nints, i, j, k ! p = sp%order - 1 nints = sp%nints ! ppform = 0.0d0 DO i=1,nints ! on each knot interval DO j=1,p+1 ! all spline in interval i DO k=0,p ! k_th derivatives ppform(k,i) = ppform(k,i) + sp%val0(k,j,i)*c(j+i-1) END DO END DO END DO ! END SUBROUTINE topp0 !=========================================================================== SUBROUTINE topp0z(sp, c, ppform) ! ! Compute PPFORM of a fuction defined by the spline SP ! and spline coefficients C(1:d) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE COMPLEX, INTENT(in) :: c(:) DOUBLE COMPLEX, INTENT(out) :: ppform(0:,:) INTEGER :: p, nints, i, j, k ! p = sp%order - 1 nints = sp%nints ! ppform = (0.0d0, 0.0d0) DO i=1,nints ! on each knot interval DO j=1,p+1 ! all spline in interval i DO k=0,p ! k_th derivatives ppform(k,i) = ppform(k,i) + sp%val0(k,j,i)*c(j+i-1) END DO END DO END DO ! END SUBROUTINE topp0z !=========================================================================== SUBROUTINE topp1(sp, c, ppform) ! ! Compute PPFORM of a fuction defined by the spline SP ! and spline coefficients C(1:d,:) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: c(:,:) DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:) INTEGER :: m ! DO m=1,SIZE(c,2) CALL topp0(sp, c(:,m), ppform(m,:,:)) END DO ! END SUBROUTINE topp1 !=========================================================================== SUBROUTINE topp1z(sp, c, ppform) ! ! Compute PPFORM of a fuction defined by the spline SP ! and spline coefficients C(1:d,:) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE COMPLEX, INTENT(in) :: c(:,:) DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:) INTEGER :: m ! DO m=1,SIZE(c,2) CALL topp0z(sp, c(:,m), ppform(m,:,:)) END DO ! END SUBROUTINE topp1z !=========================================================================== SUBROUTINE topp2(sp, c, ppform) ! ! Compute PPFORM of a fuction defined by the spline SP ! and spline coefficients C(1:d,:,:) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: c(:,:,:) DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:) INTEGER :: m, mm ! DO mm=1,SIZE(c,3) DO m=1,SIZE(c,2) CALL topp0(sp, c(:,m,mm), ppform(m,mm,:,:)) END DO END DO ! END SUBROUTINE topp2 !=========================================================================== SUBROUTINE topp2z(sp, c, ppform) ! ! Compute PPFORM of a fuction defined by the spline SP ! and spline coefficients C(1:d,:,:) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE COMPLEX, INTENT(in) :: c(:,:,:) DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:) INTEGER :: m, mm ! DO mm=1,SIZE(c,3) DO m=1,SIZE(c,2) CALL topp0z(sp, c(:,m,mm), ppform(m,mm,:,:)) END DO END DO ! END SUBROUTINE topp2z !=========================================================================== SUBROUTINE topp3(sp, c, ppform) ! ! Compute PPFORM of a fuction defined by the spline SP ! and spline coefficients C(1:d,:,:,:) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: c(:,:,:,:) DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:,:) INTEGER :: m, mm, mmm ! DO mmm=1,SIZE(c,4) DO mm=1,SIZE(c,3) DO m=1,SIZE(c,2) CALL topp0(sp, c(:,m,mm,mmm), ppform(m,mm,mmm,:,:)) END DO END DO END DO ! END SUBROUTINE topp3 !=========================================================================== SUBROUTINE topp3z(sp, c, ppform) ! ! Compute PPFORM of a fuction defined by the spline SP ! and spline coefficients C(1:d,:,:,:) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE COMPLEX, INTENT(in) :: c(:,:,:,:) DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:,:) INTEGER :: m, mm, mmm ! DO mmm=1,SIZE(c,4) DO mm=1,SIZE(c,3) DO m=1,SIZE(c,2) CALL topp0z(sp, c(:,m,mm,mmm), ppform(m,mm,mmm,:,:)) END DO END DO END DO ! END SUBROUTINE topp3z !=========================================================================== SUBROUTINE topp4(sp, c, ppform) ! ! Compute PPFORM of a fuction defined by the spline SP ! and spline coefficients C(1:d,:,:) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: c(:,:,:,:,:) DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:,:,:) INTEGER :: m, mm, mmm, mmmm ! DO mmmm=1,SIZE(c,5) DO mmm=1,SIZE(c,4) DO mm=1,SIZE(c,3) DO m=1,SIZE(c,2) CALL topp0(sp, c(:,m,mm,mmm,mmmm), ppform(m,mm,mmm,mmmm,:,:)) END DO END DO END DO END DO ! END SUBROUTINE topp4 !=========================================================================== SUBROUTINE topp4z(sp, c, ppform) ! ! Compute PPFORM of a fuction defined by the spline SP ! and spline coefficients C(1:d,:,:) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE COMPLEX, INTENT(in) :: c(:,:,:,:,:) DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:,:,:) INTEGER :: m, mm, mmm, mmmm ! DO mmmm=1,SIZE(c,5) DO mmm=1,SIZE(c,4) DO mm=1,SIZE(c,3) DO m=1,SIZE(c,2) CALL topp0z(sp, c(:,m,mm,mmm,mmmm), ppform(m,mm,mmm,mmmm,:,:)) END DO END DO END DO END DO ! END SUBROUTINE topp4z !=========================================================================== SUBROUTINE ppval0(sp, x, ppform, left, jder, f) ! ! Compute function and derivatives from the PP representation ! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: x, ppform(:) INTEGER, INTENT(in) :: left, jder DOUBLE PRECISION, INTENT(out) :: f DOUBLE PRECISION :: h, fact INTEGER :: j, order ! order = sp%order ! Polynomial degree p + 1 ! h = x-sp%knots(left) f = 0.0d0 IF( jder .LT. 0 .OR. jder .GE. order ) RETURN ! SELECT CASE (jder) CASE(0) ! function value DO j=order,1,-1 f = f*h + ppform(j) END DO CASE(1) ! 1st derivative DO j=order-1,1,-1 f = f*h + j*ppform(j+1) END DO CASE default ! 2nd and higher derivatives fact = order-jder DO j=order,jder+1,-1 f = f/fact*j*h + ppform(j) fact = fact-1.0d0 END DO DO j=2,jder f = f*j END DO END SELECT END SUBROUTINE ppval0 !=========================================================================== SUBROUTINE ppval0z(sp, x, ppform, left, jder, f) ! ! Compute function and derivatives from the PP representation ! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: x DOUBLE COMPLEX, INTENT(in) :: ppform(:) INTEGER, INTENT(in) :: left, jder DOUBLE COMPLEX, INTENT(out) :: f DOUBLE PRECISION :: h, fact INTEGER :: j, order ! order = sp%order ! Polynomial degree p + 1 ! h = x-sp%knots(left) f = (0.0d0,0.0d0) IF( jder .LT. 0 .OR. jder .GE. order ) RETURN ! SELECT CASE (jder) CASE(0) ! function value DO j=order,1,-1 f = f*h + ppform(j) END DO CASE(1) ! 1st derivative DO j=order-1,1,-1 f = f*h + j*ppform(j+1) END DO CASE default ! 2nd and higher derivatives fact = order-jder DO j=order,jder+1,-1 f = f/fact*j*h + ppform(j) fact = fact-1.0d0 END DO DO j=2,jder f = f*j END DO END SELECT END SUBROUTINE ppval0z !=========================================================================== SUBROUTINE ppval0z_n(sp, x, ppform, left, jder, f) ! ! PPVAL0Z for many points x(:) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE COMPLEX, INTENT(in) :: ppform(:,:) INTEGER, INTENT(in) :: left(:), jder DOUBLE COMPLEX, INTENT(out) :: f(:) DOUBLE PRECISION :: h(SIZE(x)), fact INTEGER :: j, order ! order = sp%order ! Polynomial degree p + 1 ! h(:) = x(:)-sp%knots(left(:)) f(:) = 0.0d0 IF( jder .LT. 0 .OR. jder .GE. order ) RETURN ! SELECT CASE (jder) CASE(0) ! function value DO j=order,1,-1 f(:) = f(:)*h(:) + ppform(:,j) END DO CASE(1) ! 1st derivative DO j=order-1,1,-1 f(:) = f(:)*h(:) + j*ppform(:,j+1) END DO CASE default ! 2nd and higher derivatives fact = order-jder DO j=order,jder+1,-1 f(:) = f(:)/fact*j*h(:) + ppform(:,j) fact = fact-1.0d0 END DO DO j=2,jder f(:) = f(:)*j END DO END SELECT END SUBROUTINE ppval0z_n !=========================================================================== SUBROUTINE ppval0_n(sp, x, ppform, left, jder, f) ! ! PPVAL0 for many points x(:) ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: x(:), ppform(:,:) INTEGER, INTENT(in) :: left(:), jder DOUBLE PRECISION, INTENT(out) :: f(:) DOUBLE PRECISION :: h(SIZE(x)), fact INTEGER :: j, order ! order = sp%order ! Polynomial degree p + 1 ! h(:) = x(:)-sp%knots(left(:)) f(:) = 0.0d0 IF( jder .LT. 0 .OR. jder .GE. order ) RETURN ! SELECT CASE (jder) CASE(0) ! function value DO j=order,1,-1 f(:) = f(:)*h(:) + ppform(:,j) END DO CASE(1) ! 1st derivative DO j=order-1,1,-1 f(:) = f(:)*h(:) + j*ppform(:,j+1) END DO CASE default ! 2nd and higher derivatives fact = order-jder DO j=order,jder+1,-1 f(:) = f(:)/fact*j*h(:) + ppform(:,j) fact = fact-1.0d0 END DO DO j=2,jder f(:) = f(:)*j END DO END SELECT END SUBROUTINE ppval0_n !=========================================================================== SUBROUTINE ppval1(sp, x, ppform, left, jder, f) ! ! Compute function and derivatives from the PP representation ! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:) INTEGER, INTENT(in) :: left, jder DOUBLE PRECISION, INTENT(out) :: f(:) DOUBLE PRECISION :: h, fact INTEGER :: j, order ! order = sp%order ! Polynomial degree p + 1 ! h = x-sp%knots(left) f = 0.0d0 IF( jder .LT. 0 .OR. jder .GE. order ) RETURN ! SELECT CASE (jder) CASE(0) ! function value DO j=order,1,-1 f(:) = f(:)*h + ppform(j,:) END DO CASE(1) ! 1st derivative DO j=order-1,1,-1 f(:) = f(:)*h + j*ppform(j+1,:) END DO CASE default ! 2nd and higher derivatives fact = order-jder DO j=order,jder+1,-1 f(:) = f(:)/fact*j*h + ppform(j,:) fact = fact-1.0d0 END DO DO j=2,jder f(:) = f(:)*j END DO END SELECT END SUBROUTINE ppval1 !=========================================================================== SUBROUTINE ppval1z(sp, x, ppform, left, jder, f) ! ! Compute function and derivatives from the PP representation ! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: x DOUBLE COMPLEX, INTENT(in) :: ppform(:,:) INTEGER, INTENT(in) :: left, jder DOUBLE COMPLEX, INTENT(out) :: f(:) DOUBLE PRECISION :: h, fact INTEGER :: j, order ! order = sp%order ! Polynomial degree p + 1 ! h = x-sp%knots(left) f = (0.0d0,0.0d0) IF( jder .LT. 0 .OR. jder .GE. order ) RETURN ! SELECT CASE (jder) CASE(0) ! function value DO j=order,1,-1 f(:) = f(:)*h + ppform(j,:) END DO CASE(1) ! 1st derivative DO j=order-1,1,-1 f(:) = f(:)*h + j*ppform(j+1,:) END DO CASE default ! 2nd and higher derivatives fact = order-jder DO j=order,jder+1,-1 f(:) = f(:)/fact*j*h + ppform(j,:) fact = fact-1.0d0 END DO DO j=2,jder f(:) = f(:)*j END DO END SELECT END SUBROUTINE ppval1z !=========================================================================== SUBROUTINE ppval2(sp, x, ppform, left, jder, f) ! ! Compute function and derivatives from the PP representation ! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:,:) INTEGER, INTENT(in) :: left, jder DOUBLE PRECISION, INTENT(out) :: f(:,:) DOUBLE PRECISION :: h, fact INTEGER :: j, order ! order = sp%order ! Polynomial degree p + 1 ! h = x-sp%knots(left) f = 0.0d0 IF( jder .LT. 0 .OR. jder .GE. order ) RETURN ! SELECT CASE (jder) CASE(0) ! function value DO j=order,1,-1 f(:,:) = f(:,:)*h + ppform(j,:,:) END DO CASE(1) ! 1st derivative DO j=order-1,1,-1 f(:,:) = f(:,:)*h + j*ppform(j+1,:,:) END DO CASE default ! 2nd and higher derivatives fact = order-jder DO j=order,jder+1,-1 f(:,:) = f(:,:)/fact*j*h + ppform(j,:,:) fact = fact-1.0d0 END DO DO j=2,jder f(:,:) = f(:,:)*j END DO END SELECT END SUBROUTINE ppval2 !=========================================================================== SUBROUTINE ppval2z(sp, x, ppform, left, jder, f) ! ! Compute function and derivatives from the PP representation ! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k ! TYPE(spline1d), INTENT(in) :: sp DOUBLE PRECISION, INTENT(in) :: x DOUBLE COMPLEX, INTENT(in) :: ppform(:,:,:) INTEGER, INTENT(in) :: left, jder DOUBLE COMPLEX, INTENT(out) :: f(:,:) DOUBLE PRECISION :: h, fact INTEGER :: j, order ! order = sp%order ! Polynomial degree p + 1 ! h = x-sp%knots(left) f = (0.0d0,0.0d0) IF( jder .LT. 0 .OR. jder .GE. order ) RETURN ! SELECT CASE (jder) CASE(0) ! function value DO j=order,1,-1 f(:,:) = f(:,:)*h + ppform(j,:,:) END DO CASE(1) ! 1st derivative DO j=order-1,1,-1 f(:,:) = f(:,:)*h + j*ppform(j+1,:,:) END DO CASE default ! 2nd and higher derivatives fact = order-jder DO j=order,jder+1,-1 f(:,:) = f(:,:)/fact*j*h + ppform(j,:,:) fact = fact-1.0d0 END DO DO j=2,jder f(:,:) = f(:,:)*j END DO END SELECT END SUBROUTINE ppval2z !=========================================================================== SUBROUTINE locintv0_old(sp, x, left) ! ! Locate the interval containing x ! Should be in [0, nints-1] ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: left DOUBLE PRECISION :: hinv INTEGER :: nints ! nints = sp%nints ! ! Case of equidistant mesh IF( sp%nlequid) THEN hinv = sp%hinv left = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nints-1)) RETURN END IF ! ! Non-equistant mesh left = sp%left DO IF( left .EQ. nints ) THEN left = nints-1 EXIT END IF IF( left .LT. 0 ) THEN left = 0 EXIT END IF IF( x .LT. sp%knots(left+1) ) THEN IF( x .GE. sp%knots(left) ) THEN EXIT ELSE left = left-1 END IF ELSE left = left+1 END IF END DO IF(left .GT. 0 .AND. left .LT. nints) THEN sp%left = left END IF END SUBROUTINE locintv0_old ! !=========================================================================== SUBROUTINE locintv1_old(sp, x, left) ! ! Locate the intervals left(:) containing x(:) ! Should be in [0, nints-1] ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: x(:) INTEGER, INTENT(out) :: left(:) DOUBLE PRECISION :: hinv INTEGER :: nints, i ! ! Case of equidistant mesh nints = sp%nints IF( sp%nlequid) THEN hinv = sp%hinv left(:) = MAX(0,MIN(FLOOR((x(:)-sp%knots(0))*hinv),nints-1)) RETURN END IF ! ! Non-equistant mesh DO i=1,SIZE(x) CALL locintv0_old(sp, x(i), left(i)) END DO END SUBROUTINE locintv1_old ! !=========================================================================== SUBROUTINE locintv0(sp, x, left) ! ! Locate the interval containing x ! Should be in [0, nints-1] ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: left DOUBLE PRECISION :: hinv INTEGER :: l, nf, nints ! nints = sp%nints ! ! Case of equidistant mesh IF( sp%nlequid) THEN hinv = sp%hinv left = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nints-1)) RETURN END IF ! ! Non-equistant mesh hinv = sp%hinv nf = SIZE(sp%fmap) - 1 l = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nf-1)) ! left on fine mesh left = sp%fmap(l) IF( x.GE.sp%knots(left+1) ) left = MIN(left+1,nints-1) END SUBROUTINE locintv0 ! !=========================================================================== SUBROUTINE locintv1(sp, x, left) ! ! Locate the intervals left(:) containing x(:) ! Should be in [0, nints-1] ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: x(:) INTEGER, INTENT(out) :: left(:) INTEGER :: l(SIZE(x)) DOUBLE PRECISION :: hinv INTEGER :: npt, nf, nints, i ! npt = SIZE(x) ! ! Case of equidistant mesh nints = sp%nints IF( sp%nlequid) THEN hinv = sp%hinv left(1:npt) = MAX(0,MIN(FLOOR((x(1:npt)-sp%knots(0))*hinv),nints-1)) RETURN END IF ! ! Non-equistant mesh hinv = sp%hinv nf = SIZE(sp%fmap) - 1 l(:) = MAX(0,MIN(FLOOR((x(:)-sp%knots(0))*hinv),nf-1)) ! left on fine mesh left(1:npt) = sp%fmap(l(1:npt)) WHERE( x.GE.sp%knots(left+1) ) left = MIN(left+1,nints-1) END SUBROUTINE locintv1 ! !=========================================================================== SUBROUTINE destroy_sp1d(sp) ! ! Clean up 1d spline object ! TYPE(spline1d) :: sp ! IF( ASSOCIATED(sp%knots) ) DEALLOCATE (sp%knots) IF( ASSOCIATED(sp%val0) ) DEALLOCATE (sp%val0) IF( ASSOCIATED(sp%valc) ) DEALLOCATE (sp%valc) IF( ASSOCIATED(sp%gausx) ) DEALLOCATE (sp%gausx) IF( ASSOCIATED(sp%gausw) ) DEALLOCATE (sp%gausw) IF( ASSOCIATED(sp%intspl) ) DEALLOCATE (sp%intspl) IF( ASSOCIATED(sp%ppform) ) DEALLOCATE (sp%ppform) IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE (sp%ppformz) IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) IF( ASSOCIATED(sp%fmap) ) DEALLOCATE(sp%fmap) ! CALL destroy(sp%mat) CALL destroy(sp%matp) CALL destroy_dftmap(sp%dft) END SUBROUTINE destroy_sp1d ! !=========================================================================== SUBROUTINE destroy_dftmap(m) ! ! Clean up DFTMAP ! TYPE(dftmap) :: m ! IF(ASSOCIATED(m%coefs)) DEALLOCATE(m%coefs) END SUBROUTINE destroy_dftmap !=========================================================================== SUBROUTINE destroy_sp2d(sp) ! ! Clean up 2d spline object ! TYPE(spline2d) :: sp ! IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz) IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) CALL destroy_sp1d(sp%sp1) CALL destroy_sp1d(sp%sp2) END SUBROUTINE destroy_sp2d !=========================================================================== SUBROUTINE destroy_sp2d1d(sp) ! ! Clean up 2d1d spline object ! TYPE(spline2d1d) :: sp ! IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE (sp%ppformz) IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) CALL destroy_sp2d(sp%sp12) CALL destroy_sp1d(sp%sp3) END SUBROUTINE destroy_sp2d1d ! !=========================================================================== SUBROUTINE calc_integ0(sp, finteg) ! ! Compute integral of splines ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(out) :: finteg(0:) DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:), fun(:,:) DOUBLE PRECISION :: x1, x2 INTEGER :: dim, nx, nidbas, ng, i, ig, j, jj, left ! CALL get_dim(sp, dim, nx, nidbas) ng = MAX(2, (nidbas+2)/2) ALLOCATE(xg(ng), wg(ng), fun(0:nidbas,1)) fun = 0.0d0 finteg = 0.0d0 DO i=1,nx ! Loop thru the intervals left = i x1 = sp%knots(i-1) x2 = sp%knots(i) CALL gauleg(x1, x2, xg, wg, ng) DO ig=1,ng ! Loop thru Gauss points CALL basfun(xg(ig), sp, fun, i) left = i-1 DO j=0,nidbas ! Loop thru the splines [left:left+nidbas] jj = left+j ! in this interval IF( sp%period ) jj = MODULO(left+j, nx) finteg(jj) = finteg(jj) + wg(ig)*fun(j,1) END DO END DO END DO DEALLOCATE(xg, wg, fun) END SUBROUTINE calc_integ0 !=========================================================================== SUBROUTINE calc_integn(sp, finteg) ! ! Compute integrals = Int( x^a \Lambda_j(x) ), a=0,1,... n ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(out) :: finteg(0:,0:) DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:), fun(:,:) DOUBLE PRECISION :: x1, x2, xpow INTEGER :: dim, nx, nidbas, ng, i, ig, j, k, jj, left INTEGER :: nord ! nord = SIZE(finteg,2)-1 CALL get_dim(sp, dim, nx, nidbas) ng = MAX(2, (nidbas+nord+2)/2) ALLOCATE(xg(ng), wg(ng), fun(0:nidbas,1)) fun = 0.0d0 finteg = 0.0d0 DO i=1,nx ! Loop thru the intervals left = i x1 = sp%knots(i-1) x2 = sp%knots(i) CALL gauleg(x1, x2, xg, wg, ng) DO ig=1,ng ! Loop thru Gauss points CALL basfun(xg(ig), sp, fun, i) left=i-1 DO j=0,nidbas ! Loop thru the splines [left:left+nidbas] jj = left+j ! in this interval IF( sp%period ) jj = MODULO(left+j, nx) xpow = wg(ig)*fun(j,1) DO k=0,nord finteg(jj,k) = finteg(jj,k) + xpow xpow = xpow*xg(ig) END DO END DO END DO END DO DEALLOCATE(xg, wg, fun) END SUBROUTINE calc_integn !=========================================================================== ! DOUBLE PRECISION FUNCTION fintg1(sp, c) ! ! Integral of 1d function from its spline coefs c. ! TYPE(spline1d) :: sp DOUBLE PRECISION, INTENT(in) :: c(0:) INTEGER :: dim dim = sp%dim fintg1 = DOT_PRODUCT(sp%intspl(0:dim-1), c(0:dim-1)) END FUNCTION fintg1 !=========================================================================== DOUBLE PRECISION FUNCTION fintg2(sp, c) ! ! Integral of 2d function from its spline coefs c. ! TYPE(spline2d) :: sp DOUBLE PRECISION, INTENT(in) :: c(0:,0:) INTEGER :: dim1, dim2, i, j dim1 = sp%sp1%dim dim2 = sp%sp2%dim fintg2 = 0.0d0 DO j=0,dim2-1 DO i=0,dim1-1 fintg2 = fintg2 + c(i,j)*sp%sp1%intspl(i)*sp%sp2%intspl(j) END DO END DO END FUNCTION fintg2 !=========================================================================== SUBROUTINE gridval2d_2d(sp, xp, yp, fp, jder, c, ppform) ! ! Compute values or jder-th dervivative of f(x,y) from ppform ! of spline sp. Recompute the ppform if the optional spline ! coefficients c are given. ! ! F(I,J) = F(X(I), Y(J)) ! TYPE(spline2d), INTENT(inout) :: sp DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp DOUBLE PRECISION, DIMENSION(:,:), INTENT(out) :: fp INTEGER, INTENT(in) :: jder(2) DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform ! INTEGER :: d1, d2, k1, k2, n1, n2 DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:) DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) INTEGER :: i, j, k, ii, jj LOGICAL :: nlppform ! d1 = sp%sp1%dim d2 = sp%sp2%dim k1 = sp%sp1%order k2 = sp%sp2%order n1 = sp%sp1%nints n2 = sp%sp2%nints nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform ! ! Compute PPFORMM/BCOEFS if spline coefs are passed ! IF( PRESENT(c) ) THEN IF( nlppform ) THEN ALLOCATE(work(d2,k1,n1)) CALL topp1(sp%sp1, c , work) IF(PRESENT(ppform)) THEN CALL topp2(sp%sp2, work, ppform) ELSE IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) ALLOCATE(sp%ppform(k1,n1,k2,n2)) CALL topp2(sp%sp2, work, sp%ppform) END IF DEALLOCATE(work) ELSE IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2))) sp%bcoefs = c END IF END IF ! ! Applly periodicity if required ! IF( sp%sp1%period ) THEN ! ** Applly periodicity ** x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) ELSE x = xp END IF IF( sp%sp2%period ) THEN ! ** Applly periodicity ** y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) ELSE y = yp END IF ! ! Locate interval containing (x,y) ! CALL locintv(sp%sp1, x, leftx) CALL locintv(sp%sp2, y, lefty) ! ! Compute function/derivatives ! IF( nlppform ) THEN ! using PP form ALLOCATE(temp(k2)) DO j=1,SIZE(y) DO i=1,SIZE(x) IF(PRESENT(ppform)) THEN CALL ppval(sp%sp1, x(i), ppform(:,leftx(i)+1,:,lefty(j)+1),& & leftx(i), jder(1), temp) ELSE CALL ppval(sp%sp1, x(i), sp%ppform(:,leftx(i)+1,:,lefty(j)+1),& & leftx(i), jder(1), temp) END IF CALL ppval(sp%sp2, y(j), temp, lefty(j), jder(2), fp(i,j)) END DO END DO DEALLOCATE(temp) ELSE ! using spline expansion ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) fp = 0.0d0 DO j=1,SIZE(y) CALL basfun(y(j), sp%sp2, funy, lefty(j)+1) DO i=1,SIZE(x) CALL basfun(x(i), sp%sp1, funx, leftx(i)+1) DO jj=1,k2 DO ii=1,k1 fp(i,j) = fp(i,j) + sp%bcoefs(leftx(i)+ii,lefty(j)+jj) * & & funx(ii,jder(1))*funy(jj,jder(2)) END DO END DO END DO END DO DEALLOCATE(funx, funy) END IF END SUBROUTINE gridval2d_2d !=========================================================================== SUBROUTINE gridval2d1d_3d(sp, xp, yp, zp, fp, jder, c, ppform) ! ! Compute values or jder-th dervivative of f(x,y,z) from spline ! coefficients (nlppform=.false.) or ppform (nlppform=.true.) ! ! F(I,J,K) = F(X(I), Y(J), Z(K)) ! TYPE(spline2d1d), TARGET :: sp DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp, zp DOUBLE PRECISION, DIMENSION(:,:,:), INTENT(out) :: fp INTEGER, INTENT(in) :: jder(3) DOUBLE PRECISION, DIMENSION(:,:,:), OPTIONAL, INTENT(in) :: c DOUBLE PRECISION, DIMENSION(:,:,:,:,:,:), OPTIONAL :: ppform ! TYPE(spline2d), POINTER :: sp2 DOUBLE PRECISION, ALLOCATABLE :: work1(:,:,:,:), work2(:,:,:,:,:) DOUBLE PRECISION, ALLOCATABLE :: temp1(:,:), temp2(:) DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:), funz(:,:) DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)), z(SIZE(zp)) INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)), leftz(SIZE(zp)) INTEGER :: d1, d2, d3, k1, k2, k3, n1, n2, n3 INTEGER :: ipx, ipy, ipz, k, ii, jj, kk LOGICAL :: nlppform !-------------------------------------------------------------------------------- ! 1. Prologue ! sp2 => sp%sp12 d1 = sp2%sp1%dim d2 = sp2%sp2%dim d3 = sp%sp3%dim k1 = sp2%sp1%order k2 = sp2%sp2%order k3 = sp%sp3%order n1 = sp2%sp1%nints n2 = sp2%sp2%nints n3 = sp%sp3%nints nlppform = sp2%sp1%nlppform .OR. sp2%sp2%nlppform .OR. sp%sp3%nlppform ! ! Applly periodicity if required IF( sp2%sp1%period ) THEN x = sp2%sp1%knots(0) + MODULO(xp-sp2%sp1%knots(0), sp2%sp1%lperiod) ELSE x = xp END IF IF( sp2%sp2%period ) THEN y = sp2%sp2%knots(0) + MODULO(yp-sp2%sp2%knots(0), sp2%sp2%lperiod) ELSE y = yp END IF IF( sp%sp3%period ) THEN z = sp%sp3%knots(0) + MODULO(zp-sp%sp3%knots(0), sp%sp3%lperiod) ELSE z = zp END IF ! ! Locate interval containing (x,y,z) CALL locintv(sp2%sp1, x, leftx) CALL locintv(sp2%sp2, y, lefty) CALL locintv(sp%sp3, z, leftz) !-------------------------------------------------------------------------------- ! 2. Using PPFORM ! IF( nlppform ) THEN ! ! Compute PPFORM from BCOEF IF( PRESENT(c) ) THEN ALLOCATE(work2(d3,k1,n1,k2,n2)) ALLOCATE(work1(d2,d3,k1,n1)) CALL topp2(sp2%sp1, c, work1) CALL topp3(sp2%sp2, work1, work2) DEALLOCATE(work1) IF( PRESENT(ppform) )THEN CALL topp4(sp%sp3, work2, ppform) ELSE IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) ALLOCATE(sp%ppform(k1,n1,k2,n2,k3,n3)) CALL topp4(sp%sp3, work2, sp%ppform) END IF DEALLOCATE(work2) END IF ! ! Compute function/derivatives ALLOCATE(temp1(k2,k3)) ALLOCATE(temp2(k3)) DO ipz=1,SIZE(z) DO ipy=1,SIZE(y) DO ipx=1,SIZE(x) IF(PRESENT(ppform)) THEN CALL ppval(sp2%sp1, x(ipx), & & ppform(:,leftx(ipx)+1,:,lefty(ipy)+1,:,leftz(ipz)+1),& & leftx(ipx), jder(1), temp1) ELSE CALL ppval(sp2%sp1, x(ipx), & & sp%ppform(:,leftx(ipx)+1,:,lefty(ipy)+1,:,leftz(ipz)+1),& & leftx(ipx), jder(1), temp1) END IF CALL ppval(sp2%sp2, y(ipy), temp1, lefty(ipy), jder(2), & & temp2) CALL ppval(sp%sp3, z(ipz), temp2, leftz(ipz), jder(3), & & fp(ipx,ipy,ipz)) END DO END DO END DO DEALLOCATE(temp1) DEALLOCATE(temp2) !-------------------------------------------------------------------------------- ! 3. Using spline expansion ! ELSE IF( PRESENT(c) ) THEN IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2),SIZE(c,3))) sp%bcoefs = c END IF ! ! Compute function/derivatives ALLOCATE(funx(1:k1,0:jder(1))) ALLOCATE(funy(1:k2,0:jder(2))) ALLOCATE(funz(1:k3,0:jder(3))) fp = 0.0d0 DO ipz=1,SIZE(z) CALL basfun(z(ipz), sp%sp3, funz, leftz(ipz)+1) DO ipy=1,SIZE(y) CALL basfun(y(ipy), sp2%sp2, funy, lefty(ipy)+1) DO ipx=1,SIZE(x) CALL basfun(x(ipx), sp2%sp1, funx, leftx(ipx)+1) DO kk=1,k3 DO jj=1,k2 DO ii=1,k1 fp(ipx,ipy,ipz) = fp(ipx,ipy,ipz) + & & sp%bcoefs(leftx(ipx)+ii,lefty(ipy)+jj,leftz(ipz)+kk) * & & funx(ii,jder(1)) * funy(jj,jder(2)) * funz(kk,jder(3)) END DO END DO END DO END DO END DO END DO DEALLOCATE(funx, funy, funz) END IF END SUBROUTINE gridval2d1d_3d !=========================================================================== SUBROUTINE gridval2d1d_1d(sp, xp, yp, zp, fp, jder, c, ppform) ! ! Compute values or jder-th dervivative of f(x,y,z) from spline ! coefficients (nlppform=.false.) or ppform (nlppform=.true.) ! ! F(I) = F(X(I),Y(I),Z(I)) ! TYPE(spline2d1d), TARGET :: sp DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp, zp DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: fp INTEGER, INTENT(in) :: jder(3) DOUBLE PRECISION, DIMENSION(:,:,:), OPTIONAL, INTENT(in) :: c DOUBLE PRECISION, DIMENSION(:,:,:,:,:,:), OPTIONAL :: ppform ! TYPE(spline2d), POINTER :: sp2 DOUBLE PRECISION, ALLOCATABLE :: work1(:,:,:,:), work2(:,:,:,:,:) DOUBLE PRECISION, ALLOCATABLE :: temp1(:,:), temp2(:) DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:), funz(:,:) DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)), z(SIZE(zp)) INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)), leftz(SIZE(zp)) INTEGER :: d1, d2, d3, k1, k2, k3, n1, n2, n3 INTEGER :: np, ip, ii, jj, kk LOGICAL :: nlppform !-------------------------------------------------------------------------------- ! 1. Prologue ! sp2 => sp%sp12 d1 = sp2%sp1%dim d2 = sp2%sp2%dim d3 = sp%sp3%dim k1 = sp2%sp1%order k2 = sp2%sp2%order k3 = sp%sp3%order n1 = sp2%sp1%nints n2 = sp2%sp2%nints n3 = sp%sp3%nints np = SIZE(xp) nlppform = sp2%sp1%nlppform .OR. sp2%sp2%nlppform .OR. sp%sp3%nlppform ! ! Applly periodicity if required IF( sp2%sp1%period ) THEN x = sp2%sp1%knots(0) + MODULO(xp-sp2%sp1%knots(0), sp2%sp1%lperiod) ELSE x = xp END IF IF( sp2%sp2%period ) THEN y = sp2%sp2%knots(0) + MODULO(yp-sp2%sp2%knots(0), sp2%sp2%lperiod) ELSE y = yp END IF IF( sp%sp3%period ) THEN z = sp%sp3%knots(0) + MODULO(zp-sp%sp3%knots(0), sp%sp3%lperiod) ELSE z = zp END IF ! ! Locate interval containing (x,y,z) CALL locintv(sp2%sp1, x, leftx) CALL locintv(sp2%sp2, y, lefty) CALL locintv(sp%sp3, z, leftz) !-------------------------------------------------------------------------------- ! 2. Using PPFORM ! IF( nlppform ) THEN ! ! Compute PPFORM from BCOEF IF( PRESENT(c) ) THEN ALLOCATE(work2(d3,k1,n1,k2,n2)) ALLOCATE(work1(d2,d3,k1,n1)) CALL topp2(sp2%sp1, c, work1) CALL topp3(sp2%sp2, work1, work2) DEALLOCATE(work1) IF( PRESENT(ppform) )THEN CALL topp4(sp%sp3, work2, ppform) ELSE IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) ALLOCATE(sp%ppform(k1,n1,k2,n2,k3,n3)) CALL topp4(sp%sp3, work2, sp%ppform) END IF DEALLOCATE(work2) END IF ! ! Compute function/derivatives ALLOCATE(temp1(k2,k3)) ALLOCATE(temp2(k3)) DO ip=1,np IF(PRESENT(ppform)) THEN CALL ppval(sp2%sp1, x(ip), & & ppform(:,leftx(ip)+1,:,lefty(ip)+1,:,leftz(ip)+1),& & leftx(ip), jder(1), temp1) ELSE CALL ppval(sp2%sp1, x(ip), & & sp%ppform(:,leftx(ip)+1,:,lefty(ip)+1,:,leftz(ip)+1),& & leftx(ip), jder(1), temp1) END IF CALL ppval(sp2%sp2, y(ip), temp1, lefty(ip), jder(2), temp2) CALL ppval(sp%sp3, z(ip), temp2, leftz(ip), jder(3), fp(ip)) END DO DEALLOCATE(temp1) DEALLOCATE(temp2) !-------------------------------------------------------------------------------- ! 3. Using spline expansion ! ELSE IF( PRESENT(c) ) THEN IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2),SIZE(c,3))) sp%bcoefs = c END IF ! ! Compute function/derivatives ALLOCATE(funx(1:k1,0:jder(1))) ALLOCATE(funy(1:k2,0:jder(2))) ALLOCATE(funz(1:k3,0:jder(3))) fp = 0.0d0 DO ip=1,np CALL basfun(x(ip), sp2%sp1, funx, leftx(ip)+1) CALL basfun(y(ip), sp2%sp2, funy, lefty(ip)+1) CALL basfun(z(ip), sp%sp3, funz, leftz(ip)+1) DO kk=1,k3 DO jj=1,k2 DO ii=1,k1 fp(ip) = fp(ip) + & & sp%bcoefs(leftx(ip)+ii,lefty(ip)+jj,leftz(ip)+kk) * & & funx(ii,jder(1))*funy(jj,jder(2))*funz(kk,jder(3)) END DO END DO END DO END DO DEALLOCATE(funx, funy, funz) END IF END SUBROUTINE gridval2d1d_1d !=========================================================================== SUBROUTINE gridval2d(sp, xp, yp, fp, jder, c, ppform) ! ! Compute values or jder-th dervivative of f(x,y) from ppform ! of spline sp. Recompute the ppform if the optional spline ! coefficients c are given. ! ! F = F(X, Y) ! TYPE(spline2d), INTENT(inout) :: sp DOUBLE PRECISION, INTENT(in) :: xp, yp DOUBLE PRECISION, INTENT(out) :: fp INTEGER, INTENT(in) :: jder(2) DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform ! INTEGER :: d1, d2, k1, k2, n1, n2 DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:) DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) DOUBLE PRECISION :: x, y INTEGER :: leftx, lefty INTEGER :: i, j, k, ii, jj LOGICAL :: nlppform ! d1 = sp%sp1%dim d2 = sp%sp2%dim k1 = sp%sp1%order k2 = sp%sp2%order n1 = sp%sp1%nints n2 = sp%sp2%nints nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform ! ! Compute PPFORM/BCOEFS if spline coefs are passed ! IF( PRESENT(c)) THEN IF( nlppform ) THEN ALLOCATE(work(d2,k1,n1)) CALL topp1(sp%sp1, c , work) IF(PRESENT(ppform)) THEN CALL topp2(sp%sp2, work, ppform) ELSE IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) ALLOCATE(sp%ppform(k1,n1,k2,n2)) CALL topp2(sp%sp2, work, sp%ppform) END IF DEALLOCATE(work) ELSE IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2))) sp%bcoefs = c END IF END IF ! ! Applly periodicity if required ! IF( sp%sp1%period ) THEN ! ** Applly periodicity ** x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) ELSE x = xp END IF IF( sp%sp2%period ) THEN ! ** Applly periodicity ** y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) ELSE y = yp END IF ! ! Locate the interval containing x, y ! CALL locintv(sp%sp1, x, leftx) CALL locintv(sp%sp2, y, lefty) ! ! Compute function/derivatives ! IF( nlppform ) THEN ! using PP form ALLOCATE(temp(k2)) IF(PRESENT(ppform)) THEN CALL ppval(sp%sp1, x, ppform(:,leftx+1,:,lefty+1),& & leftx, jder(1), temp) ELSE CALL ppval(sp%sp1, x, sp%ppform(:,leftx+1,:,lefty+1),& & leftx, jder(1), temp) END IF CALL ppval(sp%sp2, y, temp, lefty, jder(2), fp) DEALLOCATE(temp) ELSE ! using spline expansion ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) fp = 0.0d0 CALL basfun(x, sp%sp1, funx, leftx+1) CALL basfun(y, sp%sp2, funy, lefty+1) DO jj=1,k2 DO ii=1,k1 fp = fp + & & funy(jj,jder(2))*sp%bcoefs(leftx+ii,lefty+jj)* & & funx(ii,jder(1)) END DO END DO DEALLOCATE(funx, funy) END IF END SUBROUTINE gridval2d !=========================================================================== SUBROUTINE gridval2d_1d(sp, xp, yp, fp, jder, c, ppform) ! ! Compute values or jder-th dervivative of f(x,y) from ppform ! of spline sp. Recompute the ppform if the optional spline ! coefficients c are given. ! ! F(I) = F(X(I), Y(I)) ! TYPE(spline2d), INTENT(inout) :: sp DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: fp INTEGER, INTENT(in) :: jder(2) DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform ! INTEGER :: d1, d2, k1, k2, n1, n2, np DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:) DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) INTEGER :: i, j, k, ii, jj LOGICAL :: nlppform ! d1 = sp%sp1%dim d2 = sp%sp2%dim k1 = sp%sp1%order k2 = sp%sp2%order n1 = sp%sp1%nints n2 = sp%sp2%nints nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform ! ! Compute PPFORM/BCOEFS if spline coefs are passed ! IF( PRESENT(c)) THEN IF( nlppform ) THEN ALLOCATE(work(d2,k1,n1)) CALL topp1(sp%sp1, c , work) IF(PRESENT(ppform)) THEN CALL topp2(sp%sp2, work, ppform) ELSE IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) ALLOCATE(sp%ppform(k1,n1,k2,n2)) CALL topp2(sp%sp2, work, sp%ppform) END IF DEALLOCATE(work) ELSE IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2))) sp%bcoefs = c END IF END IF ! ! Applly periodicity if required ! np = SIZE(xp) IF( sp%sp1%period ) THEN ! ** Applly periodicity ** x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) ELSE x = xp END IF IF( sp%sp2%period ) THEN ! ** Applly periodicity ** y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) ELSE y = yp END IF ! ! Locate the interval containing x, y ! CALL locintv(sp%sp1, x, leftx) CALL locintv(sp%sp2, y, lefty) ! ! Compute function/derivatives ! IF( nlppform ) THEN ! using PP form ALLOCATE(temp(k2)) DO i=1,np IF(PRESENT(ppform)) THEN CALL ppval(sp%sp1, x(i), ppform(:,leftx(i)+1,:,lefty(i)+1),& & leftx(i), jder(1), temp) ELSE CALL ppval(sp%sp1, x(i), sp%ppform(:,leftx(i)+1,:,lefty(i)+1),& & leftx(i), jder(1), temp) END IF CALL ppval(sp%sp2, y(i), temp, lefty(i), jder(2), fp(i)) END DO DEALLOCATE(temp) ELSE ! using spline expansion ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) fp = 0.0d0 DO i=1,np CALL basfun(x(i), sp%sp1, funx, leftx(i)+1) CALL basfun(y(i), sp%sp2, funy, lefty(i)+1) DO jj=1,k2 DO ii=1,k1 fp(i) = fp(i) + & & funy(jj,jder(2))*sp%bcoefs(leftx(i)+ii,lefty(i)+jj)* & & funx(ii,jder(1)) END DO END DO END DO DEALLOCATE(funx, funy) END IF END SUBROUTINE gridval2d_1d !=========================================================================== SUBROUTINE gridval2dz(sp, xp, yp, fp, jder, c, ppformz) ! ! Compute values or jder-th dervivative of f(x,y) from ppform ! of spline sp. Recompute the ppform if the optional spline ! coefficients c are given. ! ! F = F(X, Y) ! TYPE(spline2d), INTENT(inout) :: sp DOUBLE PRECISION, INTENT(in) :: xp, yp DOUBLE COMPLEX, INTENT(out) :: fp INTEGER, INTENT(in) :: jder(2) DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz ! INTEGER :: d1, d2, k1, k2, n1, n2 DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:) DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) DOUBLE PRECISION :: x, y INTEGER :: leftx, lefty INTEGER :: i, j, k, ii, jj LOGICAL :: nlppform ! d1 = sp%sp1%dim d2 = sp%sp2%dim k1 = sp%sp1%order k2 = sp%sp2%order n1 = sp%sp1%nints n2 = sp%sp2%nints nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform ! ! Compute PPFORM/BCOEFS if spline coefs are passed ! IF( PRESENT(c)) THEN IF( nlppform ) THEN ALLOCATE(work(d2,k1,n1)) CALL topp1z(sp%sp1, c , work) IF(PRESENT(ppformz)) THEN CALL topp2z(sp%sp2, work, ppformz) ELSE IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz) ALLOCATE(sp%ppformz(k1,n1,k2,n2)) CALL topp2z(sp%sp2, work, sp%ppformz) END IF DEALLOCATE(work) ELSE IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2))) sp%bcoefsc = c END IF END IF ! ! Applly periodicity if required ! IF( sp%sp1%period ) THEN ! ** Applly periodicity ** x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) ELSE x = xp END IF IF( sp%sp2%period ) THEN ! ** Applly periodicity ** y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) ELSE y = yp END IF ! ! Locate the interval containing x, y ! CALL locintv(sp%sp1, x, leftx) CALL locintv(sp%sp2, y, lefty) ! ! Compute function/derivatives ! IF( nlppform ) THEN ! using PP form ALLOCATE(temp(k2)) IF(PRESENT(ppformz)) THEN CALL ppval(sp%sp1, x, ppformz(:,leftx+1,:,lefty+1),& & leftx, jder(1), temp) ELSE CALL ppval(sp%sp1, x, sp%ppformz(:,leftx+1,:,lefty+1),& & leftx, jder(1), temp) END IF CALL ppval(sp%sp2, y, temp, lefty, jder(2), fp) DEALLOCATE(temp) ELSE ! using spline expansion ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) fp = (0.0d0,0.0d0) CALL basfun(x, sp%sp1, funx, leftx+1) CALL basfun(y, sp%sp2, funy, lefty+1) DO jj=1,k2 DO ii=1,k1 fp = fp + & & funy(jj,jder(2))*sp%bcoefsc(leftx+ii,lefty+jj)* & & funx(ii,jder(1)) END DO END DO DEALLOCATE(funx, funy) END IF END SUBROUTINE gridval2dz !=========================================================================== SUBROUTINE gridval2d_1dz(sp, xp, yp, fp, jder, c, ppformz) ! ! Compute values or jder-th dervivative of f(x,y) from ppform ! of spline sp. Recompute the ppform if the optional spline ! coefficients c are given. ! ! F(I) = F(X(I), Y(I)) ! TYPE(spline2d), INTENT(inout) :: sp DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: fp INTEGER, INTENT(in) :: jder(2) DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz ! INTEGER :: d1, d2, k1, k2, n1, n2, np DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:) DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) INTEGER :: i, j, k, ii, jj LOGICAL :: nlppform ! d1 = sp%sp1%dim d2 = sp%sp2%dim k1 = sp%sp1%order k2 = sp%sp2%order n1 = sp%sp1%nints n2 = sp%sp2%nints nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform ! ! Compute PPFORM/BCOEFS if spline coefs are passed ! IF( PRESENT(c)) THEN IF( nlppform ) THEN ALLOCATE(work(d2,k1,n1)) CALL topp1z(sp%sp1, c , work) IF(PRESENT(ppformz)) THEN CALL topp2z(sp%sp2, work, ppformz) ELSE IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz) ALLOCATE(sp%ppformz(k1,n1,k2,n2)) CALL topp2z(sp%sp2, work, sp%ppformz) END IF DEALLOCATE(work) ELSE IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2))) sp%bcoefsc = c END IF END IF ! ! Applly periodicity if required ! np = SIZE(xp) IF( sp%sp1%period ) THEN ! ** Applly periodicity ** x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) ELSE x = xp END IF IF( sp%sp2%period ) THEN ! ** Applly periodicity ** y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) ELSE y = yp END IF ! ! Locate the interval containing x, y ! CALL locintv(sp%sp1, x, leftx) CALL locintv(sp%sp2, y, lefty) ! ! Compute function/derivatives ! IF( nlppform ) THEN ! using PP form ALLOCATE(temp(k2)) DO i=1,np IF(PRESENT(ppformz)) THEN CALL ppval(sp%sp1, x(i), ppformz(:,leftx(i)+1,:,lefty(i)+1),& & leftx(i), jder(1), temp) ELSE CALL ppval(sp%sp1, x(i), sp%ppformz(:,leftx(i)+1,:,lefty(i)+1),& & leftx(i), jder(1), temp) END IF CALL ppval(sp%sp2, y(i), temp, lefty(i), jder(2), fp(i)) END DO DEALLOCATE(temp) ELSE ! using spline expansion ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) fp = (0.0d0,0.0d0) DO i=1,np CALL basfun(x(i), sp%sp1, funx, leftx(i)+1) CALL basfun(y(i), sp%sp2, funy, lefty(i)+1) DO jj=1,k2 DO ii=1,k1 fp(i) = fp(i) + & & funy(jj,jder(2))*sp%bcoefsc(leftx(i)+ii,lefty(i)+jj)* & & funx(ii,jder(1)) END DO END DO END DO DEALLOCATE(funx, funy) END IF END SUBROUTINE gridval2d_1dz !=========================================================================== SUBROUTINE gridval2d_2dz(sp, xp, yp, fp, jder, c, ppformz) ! ! Compute values or jder-th dervivative of f(x,y) from ppform ! of spline sp. Recompute the ppform if the optional spline ! coefficients c are given. ! ! F(I,J) = F(X(I), Y(J)) ! TYPE(spline2d), INTENT(inout) :: sp DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp DOUBLE COMPLEX, DIMENSION(:,:), INTENT(out) :: fp INTEGER, INTENT(in) :: jder(2) DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz ! INTEGER :: d1, d2, k1, k2, n1, n2 DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:) DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) INTEGER :: i, j, k, ii, jj LOGICAL :: nlppform ! d1 = sp%sp1%dim d2 = sp%sp2%dim k1 = sp%sp1%order k2 = sp%sp2%order n1 = sp%sp1%nints n2 = sp%sp2%nints nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform ! ! Compute PPFORMM/BCOEFS if spline coefs are passed ! IF( PRESENT(c) ) THEN IF( nlppform ) THEN ALLOCATE(work(d2,k1,n1)) CALL topp1z(sp%sp1, c , work) IF(PRESENT(ppformz)) THEN CALL topp2z(sp%sp2, work, ppformz) ELSE IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz) ALLOCATE(sp%ppformz(k1,n1,k2,n2)) CALL topp2z(sp%sp2, work, sp%ppformz) END IF DEALLOCATE(work) ELSE IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2))) sp%bcoefsc = c END IF END IF ! ! Applly periodicity if required ! IF( sp%sp1%period ) THEN ! ** Applly periodicity ** x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) ELSE x = xp END IF IF( sp%sp2%period ) THEN ! ** Applly periodicity ** y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) ELSE y = yp END IF ! ! Locate interval containing (x,y) ! CALL locintv(sp%sp1, x, leftx) CALL locintv(sp%sp2, y, lefty) ! ! Compute function/derivatives ! IF( nlppform ) THEN ! using PP form ALLOCATE(temp(k2)) DO j=1,SIZE(y) DO i=1,SIZE(x) IF(PRESENT(ppformz)) THEN CALL ppval(sp%sp1, x(i), ppformz(:,leftx(i)+1,:,lefty(j)+1),& & leftx(i), jder(1), temp) ELSE CALL ppval(sp%sp1, x(i), sp%ppformz(:,leftx(i)+1,:,lefty(j)+1),& & leftx(i), jder(1), temp) END IF CALL ppval(sp%sp2, y(j), temp, lefty(j), jder(2), fp(i,j)) END DO END DO DEALLOCATE(temp) ELSE ! using spline expansion ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) fp = 0.0d0 DO j=1,SIZE(y) CALL basfun(y(j), sp%sp2, funy, lefty(j)+1) DO i=1,SIZE(x) CALL basfun(x(i), sp%sp1, funx, leftx(i)+1) DO jj=1,k2 DO ii=1,k1 fp(i,j) = fp(i,j) + sp%bcoefsc(leftx(i)+ii,lefty(j)+jj) * & & funx(ii,jder(1))*funy(jj,jder(2)) END DO END DO END DO END DO DEALLOCATE(funx, funy) END IF END SUBROUTINE gridval2d_2dz !=========================================================================== SUBROUTINE calc_fftmass(spl, fftmat) ! ! Compute FT of mass matrix for periodic spline on equidistant mesh ! TYPE(spline1d) :: spl DOUBLE PRECISION, INTENT(out) :: fftmat(0:) ! INTEGER :: dim, nx, nidbas, ngauss DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:,:) INTEGER :: igauss, intv ! CALL get_dim(spl, dim, nx, nidbas) CALL get_gauss(spl, ngauss) ALLOCATE(xgauss(ngauss), wgauss(ngauss)) ALLOCATE(ft_fun(0:nx-1,1)) ! ! Integrate on first interval intv = 1 CALL get_gauss(spl, ngauss, intv, xgauss, wgauss) fftmat = 0.0d0 DO igauss=1,ngauss CALL ft_basfun(xgauss(igauss), spl, ft_fun, intv) fftmat(:) = fftmat(:) + wgauss(igauss)*ft_fun(:,1)*CONJG(ft_fun(:,1)) END DO ! DEALLOCATE(ft_fun) DEALLOCATE(xgauss, wgauss) END SUBROUTINE calc_fftmass !=========================================================================== SUBROUTINE calc_fftmass_old(spl, fftmat) ! ! Compute FT of mass matrix for periodic spline on equidistant mesh ! TYPE(spline1d) :: spl DOUBLE PRECISION, INTENT(out) :: fftmat(0:) INTEGER :: dim, nx, nidbas, ngauss DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), xgauss(:), wgauss(:), arow(:) INTEGER :: i, j, k, igauss, intv DOUBLE PRECISION :: pi, arg0, arg ! CALL get_dim(spl, dim, nx, nidbas) CALL get_gauss(spl, ngauss) ALLOCATE(fun(0:nidbas,1)) ! Spline ALLOCATE(xgauss(ngauss), wgauss(ngauss)) ALLOCATE(arow(0:nidbas)) ! ! Assemble the first row of the upper mass matrix arow = 0.0d0 intv = 1 ! Get splines on Gauss points in first interval CALL get_gauss(spl, ngauss, intv, xgauss, wgauss) DO igauss=1,ngauss CALL basfun(xgauss(igauss), spl, fun, intv) DO i=0,nidbas DO j=0,nidbas-i arow(i)=arow(i)+fun(j,1)*fun(i+j,1)*wgauss(igauss) END DO END DO END DO ! ! Fourier transform pi = 4.0d0*ATAN(1.0d0) arg0 = 2.0d0*pi/REAL(nx,8) DO k=0,nx-1 fftmat(k) = arow(0) arg = k*arg0 DO i=1,nidbas fftmat(k) = fftmat(k) + 2.0d0*arow(i)*COS(i*arg) END DO END DO ! DEALLOCATE(arow) DEALLOCATE(fun) DEALLOCATE(xgauss, wgauss) END SUBROUTINE calc_fftmass_old !=========================================================================== SUBROUTINE CompMassMatrix1(sp1, sp2, a, b, MassMatrix) ! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over ! interval [a, b] IMPLICIT NONE TYPE(spline1d), INTENT(IN) :: sp1, sp2 DOUBLE PRECISION, INTENT(IN) :: a, b DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMatrix INTEGER :: ndim1, n1, nidbas1 INTEGER :: ndim2, n2, nidbas2 INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2 !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(sp1, ndim1, n1, nidbas1) CALL get_dim(sp2, ndim2, n2, nidbas2) ! PRINT "('In CompMassMatrix1')" ! PRINT "('sp1: dim, #intervals, degree', I, I, I)", ndim1, n1, nidbas1 ! PRINT "('sp2: dim, #intervals, degree', I, I, I)", ndim2, n2, nidbas2 ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives) IF (sp1%period) THEN IF (sp2%period) THEN ALLOCATE(MassMatrix(n1, n2)) ELSE ALLOCATE(MassMatrix(n1, ndim2)) END IF ELSE IF (sp2%period) THEN ALLOCATE(MassMatrix(ndim1, n2)) ELSE ALLOCATE(MassMatrix(ndim1, ndim2)) END IF END IF ! ! Gauss quadature ! ALLOCATE(allknots(0:n1+n2+3)) CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint) nint = nint-1 ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0) ALLOCATE(xg(ngauss), wg(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! MassMatrix = 0.d0 DO int = 1, nint ! Get gauss abscissas and weights for current interval CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss) DO ig = 1, ngauss CALL locintv(sp1, xg(ig), left1) CALL locintv(sp2, xg(ig), left2) CALL basfun(xg(ig), sp1, fun1, left1+1) CALL basfun(xg(ig), sp2, fun2, left2+1) DO k1 = 0, nidbas1 IF (sp1%period) THEN i1 = modulo(left1+1 + k1 -1, n1) +1 ELSE i1 = left1+1 + k1 END IF DO k2 = 0, nidbas2 IF (sp2%period) THEN j2 = modulo(left2+1 + k2 -1, n2) +1 ELSE j2 = left2+1 + k2 END IF MassMatrix(i1, j2) = MassMatrix(i1, j2) + wg(ig)*fun1(k1, 1)*fun2(k2, 1) END DO END DO END DO END DO !=========================================================================== ! 3.0 Epilogue ! DEALLOCATE(xg, wg) DEALLOCATE(fun1, fun2) DEALLOCATE(allknots) END SUBROUTINE CompMassMatrix1 !=========================================================================== SUBROUTINE CompMassMatrix_gb(sp1, sp2, a, b, MassMatrix) ! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over ! interval [a, b] IMPLICIT NONE TYPE(spline1d), INTENT(IN) :: sp1, sp2 DOUBLE PRECISION, INTENT(IN) :: a, b TYPE(gbmat) :: MassMatrix INTEGER :: ndim1, n1, nidbas1 INTEGER :: ndim2, n2, nidbas2 INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2 DOUBLE PRECISION :: val !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(sp1, ndim1, n1, nidbas1) CALL get_dim(sp2, ndim2, n2, nidbas2) ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! ALLOCATE(allknots(0:n1+n2+3)) CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint) nint = nint-1 ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0) ALLOCATE(xg(ngauss), wg(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO int = 1, nint ! Get gauss abscissas and weights for current interval CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss) DO ig = 1, ngauss CALL locintv(sp1, xg(ig), left1) CALL locintv(sp2, xg(ig), left2) CALL basfun(xg(ig), sp1, fun1, left1+1) CALL basfun(xg(ig), sp2, fun2, left2+1) DO k1 = 0, nidbas1 IF (sp1%period) THEN i1 = modulo(left1+1 + k1 -1, n1) +1 ELSE i1 = left1+1 + k1 END IF DO k2 = 0, nidbas2 IF (sp2%period) THEN j2 = modulo(left2+1 + k2 -1, n2) +1 ELSE j2 = left2+1 + k2 END IF val = wg(ig)*fun1(k1, 1)*fun2(k2, 1) CALL updtmat(MassMatrix, i1, j2, val) END DO END DO END DO END DO !=========================================================================== ! 3.0 Epilogue ! DEALLOCATE(xg, wg) DEALLOCATE(fun1, fun2) DEALLOCATE(allknots) END SUBROUTINE CompMassMatrix_gb !=========================================================================== SUBROUTINE CompMassMatrix_zgb(sp1, sp2, a, b, MassMatrix) ! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over ! interval [a, b] IMPLICIT NONE TYPE(spline1d), INTENT(IN) :: sp1, sp2 DOUBLE PRECISION, INTENT(IN) :: a, b TYPE(zgbmat) :: MassMatrix INTEGER :: ndim1, n1, nidbas1 INTEGER :: ndim2, n2, nidbas2 INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2 DOUBLE COMPLEX :: val !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(sp1, ndim1, n1, nidbas1) CALL get_dim(sp2, ndim2, n2, nidbas2) ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! ALLOCATE(allknots(0:n1+n2+3)) CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint) nint = nint-1 ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0) ALLOCATE(xg(ngauss), wg(ngauss)) !=========================================================================== ! 2.0 Assembly loop ! DO int = 1, nint ! Get gauss abscissas and weights for current interval CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss) DO ig = 1, ngauss CALL locintv(sp1, xg(ig), left1) CALL locintv(sp2, xg(ig), left2) CALL basfun(xg(ig), sp1, fun1, left1+1) CALL basfun(xg(ig), sp2, fun2, left2+1) DO k1 = 0, nidbas1 IF (sp1%period) THEN i1 = modulo(left1+1 + k1 -1, n1) +1 ELSE i1 = left1+1 + k1 END IF DO k2 = 0, nidbas2 IF (sp2%period) THEN j2 = modulo(left2+1 + k2 -1, n2) +1 ELSE j2 = left2+1 + k2 END IF val = wg(ig)*fun1(k1, 1)*fun2(k2, 1) CALL updtmat(MassMatrix, i1, j2, val) END DO END DO END DO END DO !=========================================================================== ! 3.0 Epilogue ! DEALLOCATE(xg, wg) DEALLOCATE(fun1, fun2) DEALLOCATE(allknots) END SUBROUTINE CompMassMatrix_zgb !=========================================================================== SUBROUTINE sorted_merge(arr1, n1, arr2, n2, a, b, arrm, nm) IMPLICIT NONE ! Peforms: ! 1) Merge of arrays arr1 & arr2 including boundary values a & b ! 2) Sorts the merged arrays keeping only values in [a, b] ! 3) Removes duplicates INTEGER, INTENT(IN) :: n1, n2 INTEGER, INTENT(OUT) :: nm DOUBLE PRECISION, INTENT(IN) :: a, b DOUBLE PRECISION, INTENT(IN) :: arr1(n1), arr2(n2) DOUBLE PRECISION, DIMENSION(*), INTENT(OUT) :: arrm INTEGER :: i, j ! Merge the two arrays including a & b nm = n1 + n2 + 2 arrm(1:nm) = (/ a, arr1(1:n1), b, arr2(1:n2) /) ! Sort CALL sort(arrm, nm) ! Remove duplicates j = 1 DO i = 2, nm IF(arrm(i) .GT. arrm(j)) THEN j = j + 1 arrm(j) = arrm(i) END IF END DO nm = j ! Remove values outside [a, b] j = 0 DO i = 1, nm IF((arrm(i) .GE. a) .AND. (arrm(i) .LE. b)) THEN j = j + 1 arrm(j) = arrm(i) END IF END DO nm = j END SUBROUTINE sorted_merge !=========================================================================== SUBROUTINE sort(arr, n) ! Sorts array ARR of length N into ascending numerical order by the ! Shell-Mezgar algorithm. ! See Sec. 8.1 of Numerical Recipes IMPLICIT NONE INTEGER, INTENT(IN) :: n DOUBLE PRECISION, DIMENSION(n), INTENT(INOUT) :: arr INTEGER :: nsort, is, i, j, l, m DOUBLE PRECISION :: tmp DOUBLE PRECISION, PARAMETER :: tiny = 1D-5 nsort = INT(LOG(REAL(n, 8))/LOG(2.D0) + tiny) m = n DO is = 1, nsort m = m/2 DO j = 1, n-m i = j DO l = i+m IF (arr(l) .LT. arr(i)) THEN tmp = arr(i) arr(i) = arr(l) arr(l) = tmp i = i-m IF (i .LT. 1) EXIT ELSE EXIT END IF END DO END DO END DO END SUBROUTINE sort !=========================================================================== LOGICAL FUNCTION is_equid(x, dev) ! ! Check whether mesh is euidistant or not ! DOUBLE PRECISION, INTENT(in) :: x(0:) DOUBLE PRECISION, INTENT(out), OPTIONAL :: dev ! DOUBLE PRECISION :: dx(SIZE(x)-1), dxmin, dxmax, dxaver, e DOUBLE PRECISION, PARAMETER :: tol=1.d-6 INTEGER :: n, i n=SIZE(x)-1 dx = (/ (x(i)-x(i-1),i=1,n) /) dxmin = MINVAL(dx) dxmax = MAXVAL(dx) dxaver = (x(n)-x(0))/REAL(n,8) e = (dxmax-dxmin)/dxaver !!$ e = (dxmax-dxmin)/(SUM(x)/REAL(n+1)) is_equid = e.LT.tol IF(PRESENT(dev)) dev = e END FUNCTION is_equid !=========================================================================== SUBROUTINE create_fine(cmesh, h, fmap) ! ! Create a fine mesh from a coarse mesh and returns its mapping ! DOUBLE PRECISION, INTENT(in) :: cmesh(0:) DOUBLE PRECISION, INTENT(out) :: h INTEGER, POINTER, INTENT(out) :: fmap(:) ! DOUBLE PRECISION, ALLOCATABLE :: fmesh(:) DOUBLE PRECISION :: xlen, hmin INTEGER :: n, nfine, i, ic ! n = SIZE(cmesh)-1 xlen = cmesh(n)-cmesh(0) ! ! Minimum interval size hmin = xlen DO i=1,n hmin = MIN(hmin, cmesh(i)-cmesh(i-1)) END DO ! ! Create the fine mesh nfine = CEILING(xlen/hmin) h = xlen / REAL(nfine,8) ALLOCATE(fmap(0:nfine)) ALLOCATE(fmesh(0:nfine)) fmesh = cmesh(0) + (/ (i*h, i=0,nfine) /) fmesh(nfine) = cmesh(n) ! ! Map fine to coarse mesh ic = 0 fmap(0) = ic DO i=1,nfine-1 DO IF(fmesh(i).GE.cmesh(ic+1)) THEN ic = ic+1 ELSE EXIT END IF END DO fmap(i) = ic END DO fmap(nfine) = n-1 ! DEALLOCATE(fmesh) END SUBROUTINE create_fine !=========================================================================== SUBROUTINE getgradr(sp, xp, yp, f00, f10, f01) ! ! Compute the function f00 and its derivatives ! f10 = d/dx f ! f01 = d/dy f ! assuming that its PPFORM/BCOEFSC was already computed! ! TYPE(spline2d), INTENT(inout) :: sp DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: f00, f10, f01 ! INTEGER :: np DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) INTEGER :: i, ip, ii, jj, nidbas(2) DOUBLE PRECISION :: temp0(SIZE(xp),sp%sp2%order), temp1(SIZE(xp),sp%sp2%order) DOUBLE PRECISION, ALLOCATABLE, SAVE :: funx(:,:), funy(:,:) DOUBLE PRECISION, ALLOCATABLE, SAVE :: ftemp0(:), ftemp1(:) LOGICAL :: nlppform ! ! Apply periodicity if required ! np = SIZE(xp) nidbas(1) = sp%sp1%order-1 nidbas(2) = sp%sp2%order-1 nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform ! ! Locate the interval containing x, y ! CALL locintv(sp%sp1, xp, leftx) CALL locintv(sp%sp2, yp, lefty) x(:) = xp(:) - sp%sp1%knots(leftx(:)) y(:) = yp(:) - sp%sp2%knots(lefty(:)) ! ! Compute function/derivatives ! IF(nlppform) THEN ! ! Using PPFORM !---------- DO i=1,np CALL my_ppval1(nidbas(1), x(i), sp%ppform(:,leftx(i)+1,:,lefty(i)+1), & & temp0(i,:), temp1(i,:)) END DO ! CALL my_ppval0(nidbas(2), y, temp0, 0, f00) CALL my_ppval0(nidbas(2), y, temp0, 1, f01) CALL my_ppval0(nidbas(2), y, temp1, 0, f10) ELSE ! ! Using spline expansion with sp%bcoefsc !---------- IF(.NOT.ALLOCATED(funx)) THEN ALLOCATE(funx(0:nidbas(1),0:1)) ! Spline and its first derivative ALLOCATE(funy(0:nidbas(2),0:1)) ALLOCATE(ftemp0(0:nidbas(1))) ALLOCATE(ftemp1(0:nidbas(1))) END IF ! DO ip=1,np CALL my_splines(nidbas(1), x(ip), sp%sp1%val0(:,:,leftx(ip)+1), funx) CALL my_splines(nidbas(2), y(ip), sp%sp2%val0(:,:,lefty(ip)+1), funy) DO ii=0,nidbas(1) ftemp0(ii) = (0.d0,0.d0) ftemp1(ii) = (0.d0,0.d0) DO jj=0,nidbas(2) ftemp0(ii) = ftemp0(ii) + sp%bcoefs(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,0) ftemp1(ii) = ftemp1(ii) + sp%bcoefs(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,1) END DO END DO f00(ip) = SUM(funx(:,0)*ftemp0(:)) f01(ip) = SUM(funx(:,0)*ftemp1(:)) f10(ip) = SUM(funx(:,1)*ftemp0(:)) END DO !----------- END IF CONTAINS !+++ SUBROUTINE my_ppval0(p, x, ppform, jder, f) ! ! Compute function and derivatives from the PP representation ! for many points x(:) INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION, INTENT(in) :: ppform(:,:) INTEGER, INTENT(in) :: jder DOUBLE PRECISION, INTENT(out) :: f(:) DOUBLE PRECISION :: fact INTEGER :: j SELECT CASE (jder) CASE(0) ! function value SELECT CASE(p) CASE(1) f(:) = ppform(:,1) + x(:)*ppform(:,2) CASE(2) f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3)) !!$ CASE(3) !!$ f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4))) CASE(3:) f(:) = ppform(:,p+1) DO j=p,1,-1 f(:) = f(:)*x(:) + ppform(:,j) END DO END SELECT CASE(1) ! 1st derivative SELECT CASE(p) CASE(1) f(:) = ppform(:,2) CASE(2) f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3) !!$ CASE(3) !!$ f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4)) CASE(3:) f(:) = p*ppform(:,p+1) DO j=p-1,1,-1 f(:) = f(:)*x(:) + j*ppform(:,j+1) END DO END SELECT CASE default ! 2nd and higher derivatives f(:) = ppform(:,p+1) fact = p-jder DO j=p,jder+1,-1 f(:) = f(:)/fact*j*x(:) + ppform(:,j) fact = fact-1.0d0 END DO DO j=2,jder f(:) = f(:)*j END DO END SELECT END SUBROUTINE my_ppval0 !+++ SUBROUTINE my_ppval1(p, x, ppform, f0, f1) ! ! Compute function and first derivative from the PP representation INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION, INTENT(in) :: ppform(:,:) DOUBLE PRECISION, INTENT(out) :: f0(:) DOUBLE PRECISION, INTENT(out) :: f1(:) DOUBLE PRECISION :: fact INTEGER :: j SELECT CASE(p) CASE(1) f0(:) = ppform(1,:) + x*ppform(2,:) f1(:) = ppform(2,:) CASE(2) f0(:) = ppform(1,:) + x*(ppform(2,:)+x*ppform(3,:)) f1(:) = ppform(2,:) + x*2.d0*ppform(3,:) CASE(3) f0(:) = ppform(1,:) + x*(ppform(2,:)+x*(ppform(3,:)+x*ppform(4,:))) f1(:) = ppform(2,:) + x*(2.d0*ppform(3,:)+x*3.0d0*ppform(4,:)) CASE(4:) f0 = ppform(p+1,:) f1 = f0 DO j=p,2,-1 f0(:) = ppform(j,:) + x*f0(:) f1(:) = f0(:) + x*f1(:) END DO f0(:) = ppform(1,:) + x*f0(:) END SELECT END SUBROUTINE my_ppval1 !+++ SUBROUTINE my_splines(p, x, ppform, f) INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION, INTENT(in) :: ppform(0:p,0:p) DOUBLE PRECISION, INTENT(out) :: f(0:p,0:1) INTEGER :: i DOUBLE PRECISION :: powerx(0:p) SELECT CASE(p) CASE(1) f(0,0) = ppform(0,0) + x*ppform(1,0) f(0,1) = ppform(1,0) f(1,0) = 1.0-f(0,0) f(1,1) = -f(0,1) CASE(2) f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*ppform(2,0)) f(0,1) = ppform(1,0) + 2.d0*x*ppform(2,0) f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*ppform(2,1)) f(1,1) = ppform(1,1) + 2.d0*x*ppform(2,1) f(2,0) = 1.0 - f(0,0) - f(1,0) f(2,1) = - f(0,1) - f(1,1) CASE(3) f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*(ppform(2,0)+x*ppform(3,0))) f(0,1) = ppform(1,0) + x*(2.d0*ppform(2,0)+3.d0*x*ppform(3,0)) f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*(ppform(2,1)+x*ppform(3,1))) f(1,1) = ppform(1,1) + x*(2.d0*ppform(2,1)+3.d0*x*ppform(3,1)) f(2,0) = ppform(0,2) + x*(ppform(1,2)+x*(ppform(2,2)+x*ppform(3,2))) f(2,1) = ppform(1,2) + x*(2.d0*ppform(2,2)+3.d0*x*ppform(3,2)) f(3,0) = 1.0 - f(0,0) - f(1,0) - f(2,0) f(3,1) = - f(0,1) - f(1,1) - f(2,1) CASE(4:) powerx(0) = 1.d0 DO i=1,p powerx(i) = powerx(i-1)*x END DO DO i=0,p-1 f(i,0) = DOT_PRODUCT(ppform(:,i),powerx(:)) END DO f(p,0) = 1.d0 - SUM(f(0:p-1,0)) f(p,1) = - SUM(f(0:p-1,1)) END SELECT END SUBROUTINE my_splines !+++ END SUBROUTINE getgradr !=========================================================================== SUBROUTINE getgradz(sp, xp, yp, f00, f10, f01) ! ! Compute the function f00 and its derivatives ! f10 = d/dx f ! f01 = d/dy f ! assuming that its PPFORM/BCOEFSC was already computed! ! TYPE(spline2d), INTENT(inout) :: sp DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: f00, f10, f01 ! INTEGER :: np DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) INTEGER :: i, ip, ii, jj, nidbas(2) DOUBLE COMPLEX :: temp0(SIZE(xp),sp%sp2%order), temp1(SIZE(xp),sp%sp2%order) DOUBLE PRECISION, ALLOCATABLE, SAVE :: funx(:,:), funy(:,:) DOUBLE COMPLEX, ALLOCATABLE, SAVE :: ftemp0(:), ftemp1(:) LOGICAL :: nlppform ! ! Apply periodicity if required ! np = SIZE(xp) nidbas(1) = sp%sp1%order-1 nidbas(2) = sp%sp2%order-1 nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform ! ! Locate the interval containing x, y ! CALL locintv(sp%sp1, xp, leftx) CALL locintv(sp%sp2, yp, lefty) x(:) = xp(:) - sp%sp1%knots(leftx(:)) y(:) = yp(:) - sp%sp2%knots(lefty(:)) ! ! Compute function/derivatives ! IF(nlppform) THEN ! ! Using PPFORM !---------- DO i=1,np CALL my_ppval1(nidbas(1), x(i), sp%ppformz(:,leftx(i)+1,:,lefty(i)+1), & & temp0(i,:), temp1(i,:)) END DO ! CALL my_ppval0(nidbas(2), y, temp0, 0, f00) CALL my_ppval0(nidbas(2), y, temp0, 1, f01) CALL my_ppval0(nidbas(2), y, temp1, 0, f10) ELSE ! ! Using spline expansion with sp%bcoefsc !---------- IF(.NOT.ALLOCATED(funx)) THEN ALLOCATE(funx(0:nidbas(1),0:1)) ! Spline and its first derivative ALLOCATE(funy(0:nidbas(2),0:1)) ALLOCATE(ftemp0(0:nidbas(1))) ALLOCATE(ftemp1(0:nidbas(1))) END IF ! DO ip=1,np CALL my_splines(nidbas(1), x(ip), sp%sp1%val0(:,:,leftx(ip)+1), funx) CALL my_splines(nidbas(2), y(ip), sp%sp2%val0(:,:,lefty(ip)+1), funy) DO ii=0,nidbas(1) ftemp0(ii) = (0.d0,0.d0) ftemp1(ii) = (0.d0,0.d0) DO jj=0,nidbas(2) ftemp0(ii) = ftemp0(ii) + sp%bcoefsc(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,0) ftemp1(ii) = ftemp1(ii) + sp%bcoefsc(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,1) END DO END DO f00(ip) = SUM(funx(:,0)*ftemp0(:)) f01(ip) = SUM(funx(:,0)*ftemp1(:)) f10(ip) = SUM(funx(:,1)*ftemp0(:)) END DO !----------- END IF CONTAINS !+++ SUBROUTINE my_ppval0(p, x, ppform, jder, f) ! ! Compute function and derivatives from the PP representation ! for many points x(:) INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE COMPLEX, INTENT(in) :: ppform(:,:) INTEGER, INTENT(in) :: jder DOUBLE COMPLEX, INTENT(out) :: f(:) DOUBLE PRECISION :: fact INTEGER :: j SELECT CASE (jder) CASE(0) ! function value SELECT CASE(p) CASE(1) f(:) = ppform(:,1) + x(:)*ppform(:,2) CASE(2) f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3)) CASE(3) f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4))) CASE(4:) DO j=p+1,1,-1 f(:) = f(:)*x(:) + ppform(:,j) END DO END SELECT CASE(1) ! 1st derivative SELECT CASE(p) CASE(1) f(:) = ppform(:,2) CASE(2) f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3) CASE(3) f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4)) CASE(4:) DO j=p,1,-1 f(:) = f(:)*x(:) + j*ppform(:,j+1) END DO END SELECT CASE default ! 2nd and higher derivatives f(:) = ppform(:,p+1) fact = p-jder DO j=p,jder+1,-1 f(:) = f(:)/fact*j*x(:) + ppform(:,j) fact = fact-1.0d0 END DO DO j=2,jder f(:) = f(:)*j END DO END SELECT END SUBROUTINE my_ppval0 !+++ SUBROUTINE my_ppval1(p, x, ppform, f0, f1) ! ! Compute function and first derivative from the PP representation INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x DOUBLE COMPLEX, INTENT(in) :: ppform(:,:) DOUBLE COMPLEX, INTENT(out) :: f0(:) DOUBLE COMPLEX, INTENT(out) :: f1(:) DOUBLE PRECISION :: fact INTEGER :: j SELECT CASE(p) CASE(1) f0(:) = ppform(1,:) + x*ppform(2,:) f1(:) = ppform(2,:) CASE(2) f0(:) = ppform(1,:) + x*(ppform(2,:)+x*ppform(3,:)) f1(:) = ppform(2,:) + x*2.d0*ppform(3,:) CASE(3) f0(:) = ppform(1,:) + x*(ppform(2,:)+x*(ppform(3,:)+x*ppform(4,:))) f1(:) = ppform(2,:) + x*(2.d0*ppform(3,:)+x*3.0d0*ppform(4,:)) CASE(4:) f0 = ppform(p+1,:) f1 = f0 DO j=p,2,-1 f0(:) = ppform(j,:) + x*f0(:) f1(:) = f0(:) + x*f1(:) END DO f0(:) = ppform(1,:) + x*f0(:) END SELECT END SUBROUTINE my_ppval1 !+++ SUBROUTINE my_splines(p, x, ppform, f) INTEGER, INTENT(in) :: p DOUBLE PRECISION, INTENT(in) :: x DOUBLE PRECISION, INTENT(in) :: ppform(0:p,0:p) DOUBLE PRECISION, INTENT(out) :: f(0:p,0:1) INTEGER :: i DOUBLE PRECISION :: powerx(0:p) SELECT CASE(p) CASE(1) f(0,0) = ppform(0,0) + x*ppform(1,0) f(0,1) = ppform(1,0) f(1,0) = 1.0-f(0,0) f(1,1) = -f(0,1) CASE(2) f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*ppform(2,0)) f(0,1) = ppform(1,0) + 2.d0*x*ppform(2,0) f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*ppform(2,1)) f(1,1) = ppform(1,1) + 2.d0*x*ppform(2,1) f(2,0) = 1.0 - f(0,0) - f(1,0) f(2,1) = - f(0,1) - f(1,1) CASE(3) f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*(ppform(2,0)+x*ppform(3,0))) f(0,1) = ppform(1,0) + x*(2.d0*ppform(2,0)+3.d0*x*ppform(3,0)) f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*(ppform(2,1)+x*ppform(3,1))) f(1,1) = ppform(1,1) + x*(2.d0*ppform(2,1)+3.d0*x*ppform(3,1)) f(2,0) = ppform(0,2) + x*(ppform(1,2)+x*(ppform(2,2)+x*ppform(3,2))) f(2,1) = ppform(1,2) + x*(2.d0*ppform(2,2)+3.d0*x*ppform(3,2)) f(3,0) = 1.0 - f(0,0) - f(1,0) - f(2,0) f(3,1) = - f(0,1) - f(1,1) - f(2,1) CASE(4:) powerx(0) = 1.d0 DO i=1,p powerx(i) = powerx(i-1)*x END DO DO i=0,p-1 f(i,0) = DOT_PRODUCT(ppform(:,i),powerx(:)) END DO f(p,0) = 1.d0 - SUM(f(0:p-1,0)) f(p,1) = - SUM(f(0:p-1,1)) END SELECT END SUBROUTINE my_splines !+++ END SUBROUTINE getgradz END MODULE bsplines diff --git a/src/cds_mod.f90 b/src/cds_mod.f90 index b0ca9d6..79ab611 100644 --- a/src/cds_mod.f90 +++ b/src/cds_mod.f90 @@ -1,626 +1,626 @@ !> !> @file cds_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE cds ! ! CDSMAT: Implement sparse matrix using Compressed ! Diagonal Storage. ! ! T.M. Tran, CRPP-EPFL ! November 2010 ! USE mumps_bsplines IMPLICIT NONE ! TYPE cds_mat ! Compressed Diagonal Storage INTEGER :: rank INTEGER :: kl, ku, ndiags INTEGER :: nterms, kmat INTEGER :: ny INTEGER, DIMENSION(:), POINTER :: dists => NULL() DOUBLE PRECISION, DIMENSION(:), POINTER :: rowv => NULL() DOUBLE PRECISION, DIMENSION(:), POINTER :: colh => NULL() DOUBLE PRECISION, DIMENSION(:), POINTER :: bal => NULL() DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => NULL() TYPE(mumps_mat), ALLOCATABLE :: mumps END TYPE cds_mat ! !-------------------------------------------------------------------------------- INTERFACE init MODULE PROCEDURE init_cds_mat END INTERFACE init INTERFACE clear_mat MODULE PROCEDURE clear_cds_mat END INTERFACE clear_mat INTERFACE destroy MODULE PROCEDURE destroy_cds_mat END INTERFACE destroy INTERFACE updtmat MODULE PROCEDURE updt_cds END INTERFACE updtmat INTERFACE getele MODULE PROCEDURE getele_cds END INTERFACE getele INTERFACE putele MODULE PROCEDURE putele_cds END INTERFACE putele INTERFACE getcol MODULE PROCEDURE getcol_cds END INTERFACE getcol INTERFACE getrow MODULE PROCEDURE getrow_cds END INTERFACE getrow INTERFACE putcol MODULE PROCEDURE putcol_cds END INTERFACE putcol INTERFACE putrow MODULE PROCEDURE putrow_cds END INTERFACE putrow INTERFACE getdiag MODULE PROCEDURE getdiag_cds END INTERFACE getdiag INTERFACE vmx MODULE PROCEDURE vmx_cds, vmxn_cds END INTERFACE INTERFACE putmat MODULE PROCEDURE putmat_cds END INTERFACE INTERFACE getmat MODULE PROCEDURE getmat_cds END INTERFACE INTERFACE flops MODULE PROCEDURE flops_cds END INTERFACE flops INTERFACE matnorm MODULE PROCEDURE matnorm_cds END INTERFACE matnorm ! CONTAINS !=========================================================================== SUBROUTINE init_cds_mat(rank, dists, nterms, mat, bw0, kmat) ! ! Initialize a CDS matrix obtained for a 2d FE discretization ! using Splines of orders p(1) and p(2). ! Number first the 2nd (periodic) dimension. ! INTEGER, INTENT(in) :: rank INTEGER, ALLOCATABLE, INTENT(in) :: dists(:) INTEGER, INTENT(in) :: nterms TYPE(cds_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: bw0, kmat ! INTEGER :: kl, ku ! mat%rank = rank mat%nterms = nterms mat%ny = 0 ! Used for unicity condition in cyl. geometry. IF(PRESENT(kmat)) mat%kmat = kmat ! kl = -LBOUND(dists,1) ku = UBOUND(dists,1) ! mat%kl = kl mat%ku = ku mat%ndiags = ku + kl + 1 IF(ASSOCIATED(mat%dists)) DEALLOCATE(mat%dists) ALLOCATE(mat%dists(-kl:ku)) mat%dists = dists ! IF(ASSOCIATED(mat%rowv)) DEALLOCATE(mat%rowv) IF(ASSOCIATED(mat%colh)) DEALLOCATE(mat%colh) IF(PRESENT(bw0)) THEN ALLOCATE(mat%rowv(bw0), mat%colh(bw0)) mat%rowv = 0.0d0 mat%colh = 0.0d0 ELSE ALLOCATE(mat%rowv(0)) ALLOCATE(mat%colh(0)) END IF ! IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%val(rank, -kl:ku)) mat%val = 0.0d0 ! IF(ASSOCIATED(mat%bal)) DEALLOCATE(mat%bal) ALLOCATE(mat%bal(rank)) mat%bal = 0.0d0 END SUBROUTINE init_cds_mat !=========================================================================== SUBROUTINE clear_cds_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(cds_mat) :: mat ! mat%val = 0.0d0 END SUBROUTINE clear_cds_mat !=========================================================================== SUBROUTINE destroy_cds_mat(mat) ! ! Deallocate pointers in mat ! TYPE(cds_mat) :: mat IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF( ASSOCIATED(mat%dists)) DEALLOCATE(mat%dists) IF( ASSOCIATED(mat%rowv)) DEALLOCATE(mat%rowv) IF( ASSOCIATED(mat%colh)) DEALLOCATE(mat%colh) END SUBROUTINE destroy_cds_mat !=========================================================================== SUBROUTINE updt_cds(mat, i, j, val) ! ! Update element Aij into sparse CDS matrix ! TYPE(cds_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: d, k ! d = j-i DO k = -mat%kl, mat%ku IF( d .EQ. mat%dists(k) ) THEN mat%val(i,k) = mat%val(i,k)+val RETURN END IF END DO WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists STOP '*** Abnormal EXIT in MODULE matrix ***' END SUBROUTINE updt_cds !=========================================================================== SUBROUTINE getele_cds(mat, i, j, val) ! ! Get element Aij of sparse CDS matrix ! TYPE(cds_mat), INTENT(in) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(out) :: val INTEGER :: d, k ! d = j-i DO k = -mat%kl, mat%ku IF( d .EQ. mat%dists(k) ) THEN val = mat%val(i,k) RETURN END IF END DO WRITE(*,'(a,2i6)') 'GETELE: i, j out of range ', i, j WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists STOP '*** Abnormal EXIT in MODULE matrix ***' END SUBROUTINE getele_cds !=========================================================================== SUBROUTINE putele_cds(mat, i, j, val) ! ! Update element Aij into sparse CDS matrix ! TYPE(cds_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: d, k ! d = j-i DO k = -mat%kl, mat%ku IF( d .EQ. mat%dists(k) ) THEN mat%val(i,k) = val RETURN END IF END DO WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists STOP '*** Abnormal EXIT in MODULE matrix ***' END SUBROUTINE putele_cds !=========================================================================== SUBROUTINE getcol_cds(mat, j, arr) ! ! Get a column from matrix ! TYPE(cds_mat), INTENT(in) :: mat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: n,i, k ! n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'GETCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF arr(1:n) = 0.0d0 DO k=-mat%kl, mat%ku i = j-mat%dists(k) IF( i.GE.1 .AND. i.LE.n ) arr(i) = mat%val(i,k) END DO END SUBROUTINE getcol_cds !=========================================================================== SUBROUTINE getrow_cds(mat, i, arr) ! ! Get a row from matrix ! TYPE(cds_mat), INTENT(in) :: mat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: n, j, k ! n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'GETROW: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF arr(1:n) = 0.0d0 DO k=-mat%kl, mat%ku j = i+mat%dists(k) IF( j.GE.1 .AND. j.LE.n ) arr(j) = mat%val(i,k) END DO END SUBROUTINE getrow_cds !=========================================================================== SUBROUTINE putcol_cds(mat, j, arr) ! ! Put a column to matrix ! TYPE(cds_mat) :: mat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: n,i, k ! n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'PUTCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF DO k=-mat%kl, mat%ku i = j-mat%dists(k) IF( i.GE. 1 .AND. i.LE.n ) mat%val(i,k) = arr(i) END DO END SUBROUTINE putcol_cds !=========================================================================== SUBROUTINE putrow_cds(mat, i, arr) ! ! Put a row from matrix ! TYPE(cds_mat) :: mat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: n, j, k ! n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'PUTROW: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF DO k=-mat%kl, mat%ku j = i+mat%dists(k) IF( j.GE.1 .AND. j.LE.n ) mat%val(i,k) = arr(j) END DO END SUBROUTINE putrow_cds !=========================================================================== SUBROUTINE getdiag_cds(mat, d) ! ! Returns diagonal of matrix ! TYPE(cds_mat) :: mat DOUBLE PRECISION :: d(:) INTEGER :: ny ! d(:) = mat%val(:,0) ! ! The extra row and column implied by periodic BC !!$ ny = mat%ny !!$ IF( ny .NE. 0 ) THEN !!$ d(ny) = mat%rowv(ny) + mat%colh(ny) !!$ END IF !!$ WRITE(*,'(a/(8(1pe12.3)))') 'd', d END SUBROUTINE getdiag_cds !=========================================================================== FUNCTION vmx_cds(mat, xarr) ! ! Return product mat*x ! TYPE(cds_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:) DOUBLE PRECISION :: vmx_cds(SIZE(xarr)) ! DOUBLE PRECISION :: alpha=1.0d0, beta=1.0d0 INTEGER :: m, bw0, ny, k, d, i, i1, i2 ! m = mat%rank bw0 = SIZE(mat%rowv) ny = mat%ny vmx_cds = 0.0d0 ! IF( ny .NE. 0 ) THEN ! Contributions from unicity BC vmx_cds(ny:bw0) = mat%colh(ny:bw0)*xarr(ny) vmx_cds(ny) = vmx_cds(ny) + DOT_PRODUCT(mat%rowv(ny:bw0), xarr(ny:bw0)) END IF ! #ifdef MKL CALL mkl_ddiamv('n', m, m, alpha, 'g', mat%val, m, mat%dists, & & mat%ndiags, xarr, beta, vmx_cds) #else DO k=-mat%kl,mat%ku d = mat%dists(k) i1 = MAX(1,1-d) i2 = MIN(mat%rank,mat%rank-d) DO i=i1,i2 vmx_cds(i) = vmx_cds(i) + mat%val(i,k)*xarr(i+d) END DO END DO #endif END FUNCTION vmx_cds !=========================================================================== FUNCTION vmxn_cds(mat, xarr) ! ! Return product mat*x ! TYPE(cds_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:,:) DOUBLE PRECISION :: vmxn_cds(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE PRECISION :: alpha=1.0d0, beta=1.0d0 INTEGER :: m, nrhs, bw0, ny, k, d, i, j, i1, i2 ! m = mat%rank nrhs = SIZE(xarr,2) bw0 = SIZE(mat%rowv) ny = mat%ny vmxn_cds = 0.0d0 ! IF( ny .NE. 0 ) THEN ! Contributions from unicity BC DO j=1,nrhs vmxn_cds(ny:bw0,j) = mat%colh(ny:bw0)*xarr(ny,j) vmxn_cds(ny,j) = vmxn_cds(ny,j) + & & DOT_PRODUCT(mat%rowv(ny:bw0), xarr(ny:bw0,j)) END DO END IF ! #ifdef MKL CALL mkl_ddiamm('n', m, nrhs, m, alpha, 'g', mat%val, m, & & mat%dists, mat%ndiags, xarr, m, beta, vmxn_cds, m) #else DO k=-mat%kl,mat%ku d = mat%dists(k) i1 = MAX(1,1-d) i2 = MIN(mat%rank,mat%rank-d) DO j=1,nrhs DO i=i1,i2 vmxn_cds(i,j) = vmxn_cds(i,j) + mat%val(i,k)*xarr(i+d,j) END DO END DO END DO #endif END FUNCTION vmxn_cds !=========================================================================== SUBROUTINE getmat_cds(fid, label, mat) ! ! Read in CDS matrix from hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(cds_mat) :: mat ! CALL getatt(fid, label, 'RANK', mat%rank) CALL getatt(fid, label, 'KL', mat%kl) CALL getatt(fid, label, 'KU', mat%ku) CALL getatt(fid, label, 'NDIAGS', mat%ndiags) CALL getatt(fid, label, 'NTERMS', mat%nterms) CALL getatt(fid, label, 'KMAT', mat%kmat) CALL getatt(fid, label, 'NY', mat%ny) IF( ASSOCIATED(mat%dists) ) THEN CALL getarr(fid, TRIM(label)//'/dists', mat%dists) END IF IF(ASSOCIATED(mat%bal)) THEN CALL getarr(fid, TRIM(label)//'/bal', mat%bal) END IF CALL getarr(fid, TRIM(label)//'/vals', mat%val) IF(ASSOCIATED(mat%rowv)) THEN CALL getarr(fid, TRIM(label)//'/rowv', mat%rowv) CALL getarr(fid, TRIM(label)//'/colh', mat%colh) END IF END SUBROUTINE getmat_cds !=========================================================================== SUBROUTINE putmat_cds(fid, label, mat, str) ! ! Write CDS matrix in hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(cds_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'KL', mat%kl) CALL attach(fid, label, 'KU', mat%ku) CALL attach(fid, label, 'NDIAGS', mat%ndiags) CALL attach(fid, label, 'NTERMS', mat%nterms) CALL attach(fid, label, 'KMAT', mat%kmat) CALL attach(fid, label, 'NY', mat%ny) IF( ASSOCIATED(mat%dists) ) THEN CALL putarr(fid, TRIM(label)//'/dists', mat%dists) END IF IF(ASSOCIATED(mat%bal)) THEN CALL putarr(fid, TRIM(label)//'/bal', mat%bal) END IF CALL putarr(fid, TRIM(label)//'/vals', mat%val) IF(ASSOCIATED(mat%rowv)) THEN CALL putarr(fid, TRIM(label)//'/rowv', mat%rowv) CALL putarr(fid, TRIM(label)//'/colh', mat%colh) END IF END SUBROUTINE putmat_cds !=========================================================================== FUNCTION flops_cds(mat, xarr, ny) ! ! Return FLOPS in product mat*x ! TYPE(cds_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:) DOUBLE PRECISION :: flops_cds INTEGER, OPTIONAL, INTENT(in) :: ny ! INTEGER :: k, d, i, i1, i2 ! flops_cds = 0.0d0 IF( PRESENT(ny) ) THEN ! Contributions from unicity BC flops_cds = 4.0d0*(SIZE(mat%rowv)-ny+1) END IF DO k=-mat%kl,mat%ku d = mat%dists(k) i1 = MAX(1,1-d) i2 = MIN(mat%rank,mat%rank-d) flops_cds = flops_cds + 2.0d0*(i2-i1+1) END DO END FUNCTION flops_cds !=========================================================================== SUBROUTINE cds2mumps(mat, mat_mumps) ! ! Fill mumps structure (based on routine to_mumps_mat) ! INCLUDE 'mpif.h' TYPE(cds_mat) :: mat TYPE(mumps_mat) :: mat_mumps ! INTEGER :: i, ii, i1, i2, j, k, rank, d, bw0, s, e INTEGER :: comm, ierr, nnz_loc ! CALL init(mat%rank, mat%nterms, mat_mumps) ! comm = mat_mumps%mumps_par%COMM mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix ! ! Compute nnz_loc ! rank = mat_mumps%rank s = mat_mumps%istart e = mat_mumps%iend ! nnz_loc=0 DO k=-mat%kl,mat%ku d = mat%dists(k) i1 = MAX(s,1-d) i2 = MIN(e,rank-d) nnz_loc = nnz_loc + (i2-i1+1) END DO ! ! Extra col and row from unicity conditions ! bw0 = SIZE(mat%rowv) IF(bw0.GT.0) THEN IF(mat%ny.GE.s .AND. mat%ny.LE.e) THEN nnz_loc = nnz_loc + bw0-mat%ny ! rowh(ny+1:bw0) END IF nnz_loc = nnz_loc + (MIN(bw0,e)-MAX(mat%ny,s)) ! colh(ny+1:bw0) END IF ! mat_mumps%nnz_start = 0 CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) mat_mumps%nnz_start = mat_mumps%nnz_start + 1 mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1 mat_mumps%nnz_loc = nnz_loc CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) ! mat_mumps%mumps_par%N = rank mat_mumps%mumps_par%NZ_loc = nnz_loc ! ! Construct MUMPS (IRN, JCN, A) ! ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc)) ALLOCATE(mat_mumps%mumps_par%JCN_loc(nnz_loc)) ALLOCATE(mat_mumps%mumps_par%A_loc(nnz_loc)) ! ii=0 DO k=-mat%kl,mat%ku d = mat%dists(k) i1 = MAX(s,1-d) i2 = MIN(e,rank-d) DO i=i1,i2 ii = ii+1 mat_mumps%mumps_par%IRN_loc(ii) = i mat_mumps%mumps_par%JCN_loc(ii) = i+d mat_mumps%mumps_par%A_loc(ii) = mat%val(i,k) END DO END DO ! IF(bw0.GT.0) THEN IF(mat%ny.GE.s .AND. mat%ny.LE.e) THEN DO j=mat%ny+1,bw0 ! rowh(ny+1:bw0) ii = ii+1 mat_mumps%mumps_par%IRN_loc(ii) = mat%ny mat_mumps%mumps_par%JCN_loc(ii) = j mat_mumps%mumps_par%A_loc(ii) = mat%rowv(j) END DO END IF DO i=MAX(mat%ny,s)+1,MIN(bw0,e) ! colh(ny+1:bw0) ii = ii+1 mat_mumps%mumps_par%IRN_loc(ii) = i mat_mumps%mumps_par%JCN_loc(ii) = mat%ny mat_mumps%mumps_par%A_loc(ii) = mat%colh(i) END DO END IF ! CALL destroy(mat_mumps%mat) NULLIFY(mat_mumps%mat) END SUBROUTINE cds2mumps !=========================================================================== DOUBLE PRECISION FUNCTION matnorm_cds(mat, p) ! ! Compute matrix norm ! TYPE(cds_mat), INTENT(in) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: p ! CHARACTER(len=4) :: norm_type INTEGER :: i, j, k, d DOUBLE PRECISION :: temp(mat%rank) ! norm_type = 'fro' IF(PRESENT(p)) norm_type = p ! SELECT CASE (norm_type) CASE ('inf') DO i=1,mat%rank temp(i) = SUM(ABS(mat%val(i,:))) END DO matnorm_cds = MAXVAL(temp) CASE ('1') temp = 0.0d0 DO k=-mat%kl,mat%ku d = mat%dists(k) DO i=MAX(1,1-d),MIN(mat%rank,mat%rank-d) temp(i+d) = temp(i+d) + ABS(mat%val(i,k)) END DO END DO matnorm_cds = MAXVAL(temp) CASE('fro') matnorm_cds = SQRT(SUM(mat%val**2)) END SELECT END FUNCTION matnorm_cds !=========================================================================== ! END MODULE cds diff --git a/src/conmat.f90 b/src/conmat.f90 index 3a489e5..887994c 100644 --- a/src/conmat.f90 +++ b/src/conmat.f90 @@ -1,257 +1,257 @@ !> !> @file conmat.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE conmat_mod ! ! CONMAT: Matrix construction for FE discretization. ! ! T.M. Tran, CRPP-EPFL ! November 2011 ! USE bsplines USE matrix #ifdef MKL USE pardiso_bsplines #endif IMPLICIT NONE ! INTERFACE conrhs MODULE PROCEDURE conrhs_r, conrhs_z END INTERFACE conrhs INTERFACE conmat MODULE PROCEDURE conmat_1d_gb, conmat_1d_ge, conmat_1d_pb, conmat_1d_periodic, & & conmat_1d_zgb, conmat_1d_zpb, conmat_1d_zperiodic, & & conmat_gb, conmat_pb, & & conmat_zgb, conmat_zpb END INTERFACE conmat #ifdef MKL INTERFACE conmat MODULE PROCEDURE conmat_1d_pardiso, conmat_1d_zpardiso, & & conmat_pardiso, conmat_zpardiso END INTERFACE conmat #endif ! CONTAINS !=========================================================================== SUBROUTINE conmat_1d_gb(spl, mat, coefeq, maxder) ! ! Construction of FE matrix mat for 1D differential operator ! using spline spl ! TYPE(gbmat) :: mat TYPE(spline1d), INTENT(in) :: spl ! INCLUDE 'conmat_1d.tpl' END SUBROUTINE conmat_1d_gb !=========================================================================== SUBROUTINE conmat_1d_ge(spl, mat, coefeq, maxder) ! ! Construction of FE matrix mat for 1D differential operator ! using spline spl ! TYPE(gemat) :: mat TYPE(spline1d), INTENT(in) :: spl ! INCLUDE 'conmat_1d.tpl' END SUBROUTINE conmat_1d_ge !=========================================================================== SUBROUTINE conmat_1d_pb(spl, mat, coefeq, maxder) ! ! Construction of FE matrix mat for 1D differential operator ! using spline spl ! TYPE(pbmat) :: mat TYPE(spline1d), INTENT(in) :: spl ! INCLUDE 'conmat_1d.tpl' END SUBROUTINE conmat_1d_pb !=========================================================================== SUBROUTINE conmat_1d_periodic(spl, mat, coefeq, maxder) ! ! Construction of FE matrix mat for 1D differential operator ! using spline spl ! TYPE(periodic_mat) :: mat TYPE(spline1d), INTENT(in) :: spl ! INCLUDE 'conmat_1d.tpl' END SUBROUTINE conmat_1d_periodic !=========================================================================== SUBROUTINE conmat_1d_zgb(spl, mat, coefeq, maxder) ! ! Construction of FE matrix mat for 1D differential operator ! using spline spl ! TYPE(zgbmat) :: mat TYPE(spline1d), INTENT(in) :: spl ! INCLUDE 'zconmat_1d.tpl' END SUBROUTINE conmat_1d_zgb !=========================================================================== SUBROUTINE conmat_1d_zpb(spl, mat, coefeq, maxder) ! ! Construction of FE matrix mat for 1D differential operator ! using spline spl ! TYPE(zpbmat) :: mat TYPE(spline1d), INTENT(in) :: spl ! INCLUDE 'zconmat_1d.tpl' END SUBROUTINE conmat_1d_zpb !=========================================================================== SUBROUTINE conmat_1d_zperiodic(spl, mat, coefeq, maxder) ! ! Construction of FE matrix mat for 1D differential operator ! using spline spl ! TYPE(zperiodic_mat) :: mat TYPE(spline1d), INTENT(in) :: spl ! INCLUDE 'zconmat_1d.tpl' END SUBROUTINE conmat_1d_zperiodic !=========================================================================== SUBROUTINE conmat_gb(spl, mat, coefeq, maxder, nat_order) ! ! Construction of FE matrix mat for 2D differential operator ! using spline spl ! TYPE(gbmat) :: mat TYPE(spline2d), INTENT(in) :: spl ! INCLUDE 'conmat.tpl' END SUBROUTINE conmat_gb !=========================================================================== SUBROUTINE conmat_pb(spl, mat, coefeq, maxder, nat_order) ! ! Construction of FE matrix mat for 2D differential operator ! using spline spl ! TYPE(pbmat) :: mat TYPE(spline2d), INTENT(in) :: spl ! INCLUDE 'conmat.tpl' END SUBROUTINE conmat_pb !=========================================================================== SUBROUTINE conmat_zgb(spl, mat, coefeq, maxder, nat_order) ! ! Construction of FE matrix mat for 2D differential operator ! using spline spl ! TYPE(zgbmat) :: mat TYPE(spline2d), INTENT(in) :: spl ! INCLUDE 'zconmat.tpl' END SUBROUTINE conmat_zgb !=========================================================================== SUBROUTINE conmat_zpb(spl, mat, coefeq, maxder, nat_order) ! ! Construction of FE matrix mat for 2D differential operator ! using spline spl ! TYPE(zpbmat) :: mat TYPE(spline2d), INTENT(in) :: spl ! INCLUDE 'zconmat.tpl' END SUBROUTINE conmat_zpb !=========================================================================== SUBROUTINE conrhs_r(spl, farr, frhs) ! ! Projection of RHS on spline basis functions ! TYPE(spline1d) :: spl DOUBLE PRECISION, INTENT(out) :: farr(:) INTERFACE DOUBLE PRECISION FUNCTION frhs(x) DOUBLE PRECISION, INTENT(in) :: x END FUNCTION frhs END INTERFACE DOUBLE PRECISION :: contrib ! INCLUDE 'conrhs.tpl' END SUBROUTINE conrhs_r !=========================================================================== SUBROUTINE conrhs_z(spl, farr, frhs) ! ! Projection of RHS on spline basis functions ! TYPE(spline1d) :: spl DOUBLE COMPLEX, INTENT(out) :: farr(:) INTERFACE DOUBLE COMPLEX FUNCTION frhs(x) DOUBLE PRECISION, INTENT(in) :: x END FUNCTION frhs END INTERFACE DOUBLE COMPLEX :: contrib ! INCLUDE 'conrhs.tpl' END SUBROUTINE conrhs_z !=========================================================================== #ifdef MKL SUBROUTINE conmat_1d_pardiso(spl, mat, coefeq, maxder) ! ! Construction of FE matrix mat for 1D differential operator ! using spline spl ! TYPE(pardiso_mat) :: mat TYPE(spline1d), INTENT(in) :: spl ! INCLUDE 'conmat_1d.tpl' END SUBROUTINE conmat_1d_pardiso !=========================================================================== SUBROUTINE conmat_1d_zpardiso(spl, mat, coefeq, maxder) ! ! Construction of FE matrix mat for 1D differential operator ! using spline spl ! TYPE(zpardiso_mat) :: mat TYPE(spline1d), INTENT(in) :: spl ! INCLUDE 'zconmat_1d.tpl' END SUBROUTINE conmat_1d_zpardiso !=========================================================================== SUBROUTINE conmat_pardiso(spl, mat, coefeq, maxder, nat_order) ! ! Construction of FE matrix mat for 2D differential operator ! using spline spl ! TYPE(pardiso_mat) :: mat TYPE(spline2d), INTENT(in) :: spl ! INCLUDE 'conmat.tpl' END SUBROUTINE conmat_pardiso !=========================================================================== SUBROUTINE conmat_zpardiso(spl, mat, coefeq, maxder, nat_order) ! ! Construction of FE matrix mat for 2D differential operator ! using spline spl ! TYPE(zpardiso_mat) :: mat TYPE(spline2d), INTENT(in) :: spl ! INCLUDE 'zconmat.tpl' END SUBROUTINE conmat_zpardiso !=========================================================================== #endif END MODULE conmat_mod diff --git a/src/conmat.tpl b/src/conmat.tpl index 09f96c6..f3d7ccb 100644 --- a/src/conmat.tpl +++ b/src/conmat.tpl @@ -1,213 +1,213 @@ !> !> @file conmat.tpl !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! In this version s[lines are precalculted ! (on all n1/n2 intervals ! INTERFACE SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE INTEGER, OPTIONAL :: maxder(2) ! maximum oder of derivatives LOGICAL, OPTIONAL :: nat_order ! Natural ordering for 2d-1d mapping ! INTEGER :: n1, nidbas1, ndim1, n1e INTEGER :: n2, nidbas2, ndim2, n2e INTEGER :: ng1, ng2 INTEGER :: i1, i2, ig1, ig2 INTEGER :: igt1, igt2, igw1, igw2, irow, jcol INTEGER, ALLOCATABLE :: left1(:), left2(:) ! LOGICAL :: nlper1, nlper2, nlnat ! INTEGER :: kterms ! Number of terms in weak form INTEGER :: k, kmaxder, it1, iw1, it2, iw2 INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form ! DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:) DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:) DOUBLE PRECISION, ALLOCATABLE :: mata(:,:,:,:), matc(:,:) DOUBLE PRECISION, ALLOCATABLE :: matg(:,:,:), matf(:,:,:), matcg(:,:,:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) nlper1 = spl%sp1%period nlper2 = spl%sp2%period ! n1e = n1+nidbas1 ! Number of elements in 1st coordinate n2e = n2+nidbas2 ! Number of elements in 2nd coordinate iF(nlper2) n2e = n2 ! ! Gauss points and weights on all intervals ! xg1 => spl%sp1%gausx ! xg1(ng1,n1) wg1 => spl%sp1%gausw ! wg1(ng1,n1) ng1 = SIZE(xg1,1) xg2 => spl%sp2%gausx wg2 => spl%sp2%gausw ng2 = SIZE(xg2,1) ! ! Splines on all intervals ! kmaxder = 1 IF(PRESENT(maxder)) kmaxder = maxder(1) ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1)) ALLOCATE(left1(ng1)) DO i1=1,n1 left1 = i1 CALL basfun(xg1(:,i1), spl%sp1, fun1(:,:,:,i1), left1) END DO DEALLOCATE(left1) ! kmaxder = 1 IF(PRESENT(maxder)) kmaxder = maxder(2) ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2,n2)) ALLOCATE(left2(ng2)) DO i2=1,n2 left2 = i2 CALL basfun(xg2(:,i2), spl%sp2, fun2(:,:,:,i2), left2) END DO DEALLOCATE(left2) ! ! Ordering in local to global matrix mapping ! nlnat = .FALSE. IF(PRESENT(nat_order)) nlnat = nat_order !=========================================================================== ! 2.0 Assembly loop ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2)) ALLOCATE(iderw(kterms,2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ! Allocate local matrices ! ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2)) ALLOCATE(matc(ng1,ng2)) ALLOCATE(matg(0:nidbas2,0:nidbas2,ng2)) ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1)) ALLOCATE(matcg(ng1,0:nidbas2,0:nidbas2)) ! DO i1=1,n1 DO i2=1,n2 ! ! Coefficients of the weak form ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), & & idert, iderw, coefs(:,ig1,ig2)) END DO END DO ! ! Compute local matrix: A <- E*(C*D^T) + A ! mata = 0.0d0 DO k=1,kterms ! matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2) ! DO it1=0,nidbas1 DO iw1=0,nidbas1 DO ig1=1,ng1 matf(it1,iw1,ig1) = wg1(ig1,i1) * & & fun1(it1,idert(k,1),ig1,i1) * & & fun1(iw1,iderw(k,1),ig1,i1) END DO END DO END DO ! DO it2=0,nidbas2 DO iw2=0,nidbas2 DO ig2=1,ng2 matg(it2,iw2,ig2) = wg2(ig2,i2) * & & fun2(it2,idert(k,2),ig2,i2) * & & fun2(iw2,iderw(k,2),ig2,i2) END DO END DO END DO ! CALL dgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, 1.0d0, & & matc, ng1, matg, (nidbas2+1)*(nidbas2+1), 0.0d0, & & matcg, ng1) CALL dgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), & & ng1, 1.0d0, matf, (nidbas1+1)*(nidbas1+1), matcg, ng1, 1.0d0, & & mata, (nidbas1+1)*(nidbas1+1)) ! END DO ! ! Map local matrix A to global matrix ! DO it1=0,nidbas1 igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1 DO it2=0,nidbas2 igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1 irow = glmap(igt1, igt2, n1e, n2e) DO iw1=0,nidbas1 igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1 DO iw2=0,nidbas2 igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1 jcol = glmap(igw1, igw2, n1e, n2e) CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2)) END DO END DO END DO END DO ! END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun1) DEALLOCATE(fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(mata) DEALLOCATE(matc) DEALLOCATE(matg) DEALLOCATE(matcg) DEALLOCATE(matf) ! CONTAINS INTEGER FUNCTION glmap(i,j,n1,n2) INTEGER, INTENT(in) :: i,j,n1,n2 IF(nlnat) THEN glmap = (j-1)*n1 + i ELSE glmap = (i-1)*n2 + j END IF END FUNCTION glmap diff --git a/src/conmat2.tpl b/src/conmat2.tpl index 27f7e4d..e5b0f64 100644 --- a/src/conmat2.tpl +++ b/src/conmat2.tpl @@ -1,202 +1,202 @@ !> !> @file conmat2.tpl !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! In this version local matrices E and D are precalculted ! (on all n1/n2 intervals and nterms weak-form terms ! INTERFACE SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE INTEGER, OPTIONAL :: maxder(2) ! maximum oder of derivatives ! INTEGER :: n1, nidbas1, ndim1 INTEGER :: n2, nidbas2, ndim2 INTEGER :: ng1, ng2 INTEGER :: i1, i2, ig1, ig2 INTEGER :: igt1, igt2, igw1, igw2, irow, jcol INTEGER, ALLOCATABLE :: left1(:), left2(:) ! LOGICAL :: nlper1, nlper2 ! INTEGER :: kterms ! Number of terms in weak form INTEGER :: k, kmaxder, it1, iw1, it2, iw2 INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form DOUBLE PRECISION :: dummy(mat%nterms) ! DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:) DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:), fun2(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: mata(:,:,:,:), matc(:,:), matcd(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: matd(:,:,:,:,:), mate(:,:,:,:,:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) nlper1 = spl%sp1%period nlper2 = spl%sp2%period ! ! Gauss points and weights on all intervals ! xg1 => spl%sp1%gausx ! xg1(ng1,n1) wg1 => spl%sp1%gausw ! wg1(ng1,n1) ng1 = SIZE(xg1,1) xg2 => spl%sp2%gausx wg2 => spl%sp2%gausw ng2 = SIZE(xg2,1) ! ! Derivative orders in the weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2)) ALLOCATE(iderw(kterms,2)) CALL coefeq(xg1(1,1), xg2(1,1), idert, iderw, dummy) ! ! Precalc matrix E in dimension 1 ! kmaxder = 1 IF(PRESENT(maxder)) kmaxder = maxder(1) ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1)) ALLOCATE(left1(ng1)) ALLOCATE(mate(0:nidbas1,0:nidbas1,ng1,kterms,n1)) DO i1=1,n1 left1 = i1 CALL basfun(xg1(:,i1), spl%sp1, fun1, left1) DO k=1,kterms DO ig1=1,ng1 DO iw1=0,nidbas1 DO it1=0,nidbas1 mate(it1,iw1,ig1,k,i1) = wg1(ig1,i1) * & & fun1(it1,idert(k,1),ig1) * & & fun1(iw1,iderw(k,1),ig1) END DO END DO END DO END DO END DO DEALLOCATE(fun1) DEALLOCATE(left1) ! ! Precalc matrix D in dimension 2 ! kmaxder = 1 IF(PRESENT(maxder)) kmaxder = maxder(2) ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2)) ALLOCATE(left2(ng2)) ALLOCATE(matd(0:nidbas2,0:nidbas2,ng2,kterms,n2)) DO i2=1,n2 left2 = i2 CALL basfun(xg2(:,i2), spl%sp2, fun2, left2) DO k=1,kterms DO ig2=1,ng2 DO iw2=0,nidbas2 DO it2=0,nidbas2 matd(it2,iw2,ig2,k,i2) = wg2(ig2,i2) * & & fun2(it2,idert(k,2),ig2) * & & fun2(iw2,iderw(k,2),ig2) END DO END DO END DO END DO END DO DEALLOCATE(fun2) DEALLOCATE(left2) !=========================================================================== ! 2.0 Assembly loop ! ! Physical coefficients in Weak form ! ALLOCATE(coefs(kterms,ng1,ng2)) ALLOCATE(matc(ng1,ng2)) ! ! Allocate local matrix A ! ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2)) ALLOCATE(matcd(ng1,0:nidbas2,0:nidbas2)) ! DO i1=1,n1 DO i2=1,n2 ! ! Coefficients of the weak form ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), & & idert, iderw, coefs(:,ig1,ig2)) END DO END DO ! ! Compute local matrix: A <- E*(C*D^T) + A ! mata = 0.0d0 DO k=1,kterms ! matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2) ! CALL dgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, 1.0d0, & & matc, ng1, matd(0,0,1,k,i2), (nidbas2+1)*(nidbas2+1), 0.0d0, & & matcd, ng1) CALL dgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), & & ng1, 1.0d0, mate(0,0,1,k,i1), (nidbas1+1)*(nidbas1+1), matcd, ng1, 1.0d0, & & mata, (nidbas1+1)*(nidbas1+1)) ! END DO ! ! Map local matrix A to global matrix ! DO it1=0,nidbas1 igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1 DO it2=0,nidbas2 igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1 irow = igt2 + (igt1-1)*n2 DO iw1=0,nidbas1 igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1 DO iw2=0,nidbas2 igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1 jcol = igw2 + (igw1-1)*n2 CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2)) END DO END DO END DO END DO ! END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(idert, iderw, coefs) DEALLOCATE(mata) DEALLOCATE(matc) DEALLOCATE(matd) DEALLOCATE(matcd) DEALLOCATE(mate) diff --git a/src/conmat_1d.tpl b/src/conmat_1d.tpl index ce06d1d..282c9a8 100644 --- a/src/conmat_1d.tpl +++ b/src/conmat_1d.tpl @@ -1,156 +1,156 @@ !> !> @file conmat_1d.tpl !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! In this version s[lines are precalculted ! (on all n1/n2 intervals ! INTERFACE SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE INTEGER, OPTIONAL :: maxder ! maximum oder of derivatives ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: i1, ig1 INTEGER :: irow, jcol INTEGER, ALLOCATABLE :: left1(:) ! LOGICAL :: nlper1 ! INTEGER :: kterms ! Number of terms in weak form INTEGER :: k, kmaxder, it1, iw1 INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:) ! Terms in weak form ! DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:) DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:) DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), matc(:) DOUBLE PRECISION, ALLOCATABLE :: matf(:,:,:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, ndim1, n1, nidbas1) nlper1 = spl%period ! ! Gauss points and weights on all intervals ! xg1 => spl%gausx ! xg1(ng1,n1) wg1 => spl%gausw ! wg1(ng1,n1) ng1 = SIZE(xg1,1) ! ! Splines on all intervals ! kmaxder = 1 IF(PRESENT(maxder)) kmaxder = maxder ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1)) ALLOCATE(left1(ng1)) DO i1=1,n1 left1 = i1 !!$ DO ig1=1,ng1 !!$ CALL basfun(xg1(ig1,i1), spl, fun1(:,:,ig1,i1), left1(ig1)) !!$ END DO CALL basfun(xg1(:,i1), spl, fun1(:,:,:,i1), left1) END DO DEALLOCATE(left1) !=========================================================================== ! 2.0 Assembly loop ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms)) ALLOCATE(iderw(kterms)) ALLOCATE(coefs(kterms,ng1)) ! ! Allocate local matrices ! ALLOCATE(mata(0:nidbas1,0:nidbas1)) ALLOCATE(matc(ng1)) ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1)) ! DO i1=1,n1 ! ! Coefficients of the weak form ! DO ig1=1,ng1 CALL coefeq(xg1(ig1,i1), idert, iderw, coefs(:,ig1)) END DO ! ! Compute local matrix: A <- F*c + A ! mata = 0.0d0 DO k=1,kterms ! matc(1:ng1) = coefs(k,1:ng1) ! DO it1=0,nidbas1 DO iw1=0,nidbas1 DO ig1=1,ng1 matf(it1,iw1,ig1) = wg1(ig1,i1) * & & fun1(it1,idert(k),ig1,i1) * & & fun1(iw1,iderw(k),ig1,i1) END DO END DO END DO ! CALL dgemv('N', (nidbas1+1)*(nidbas1+1), ng1, 1.0d0, matf, & & (nidbas1+1)*(nidbas1+1), matc, 1, 1.0d0, mata, 1) END DO ! ! Map local matrix A to global matrix ! !!$ WRITE(*,'(/a,i3)') "Lambda, i =", i1 !!$ DO ig1=1,ng1 !!$ WRITE(*,'(10(1pe12.3))') fun1(:,0,ig1,i1) !!$ END DO !!$ WRITE(*,'(a,i3)') "Lambda', i =", i1 !!$ DO ig1=1,ng1 !!$ WRITE(*,'(10(1pe12.3))') fun1(:,1,ig1,i1) !!$ END DO !!$ WRITE(*,'(/a)') 'local matrix' DO it1=0,nidbas1 irow = i1+it1; IF(nlper1) irow = MODULO(irow-1,n1) + 1 DO iw1=0,nidbas1 jcol = i1+iw1; IF(nlper1) jcol = MODULO(jcol-1,n1) + 1 CALL updtmat(mat, irow, jcol, mata(it1,iw1)) END DO !!$ WRITE(*,'(10(1pe12.3))') mata(it1,:) END DO ! END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun1) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(mata) DEALLOCATE(matc) DEALLOCATE(matf) diff --git a/src/conrhs.tpl b/src/conrhs.tpl index 5278381..66b05e5 100644 --- a/src/conrhs.tpl +++ b/src/conrhs.tpl @@ -1,52 +1,52 @@ !> !> @file conrhs.tpl !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:) DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) INTEGER :: ndim, n, nidbas, ng INTEGER :: i, ig, it, irow LOGICAL :: nlper ! CALL get_dim(spl, ndim, n, nidbas) nlper = spl%period xg => spl%gausx ! xg(ng,n) wg => spl%gausw ! wg(ng,n) ng = SIZE(xg,1) ALLOCATE(fun(0:nidbas,1)) ! farr = 0.0d0 DO i=1,n DO ig=1,ng CALL basfun(xg(ig,i), spl, fun, i) contrib = wg(ig,i)*frhs(xg(ig,i)) DO it=0,nidbas irow = i+it IF(nlper) irow = MODULO(irow-1,n) +1 farr(irow) = farr(irow)+contrib*fun(it,1) END DO END DO END DO ! DEALLOCATE(fun) diff --git a/src/csr_mod.f90 b/src/csr_mod.f90 index ca67f83..23d0a36 100644 --- a/src/csr_mod.f90 +++ b/src/csr_mod.f90 @@ -1,1255 +1,1255 @@ !> !> @file csr_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE csr ! ! CSR: Implement CSR (Compressed Sparse Row) matrice ! ! T.M. Tran, CRPP-EPFL ! October 2012 ! USE sparse USE mumps_bsplines IMPLICIT NONE ! TYPE, EXTENDS(spmat) :: csr_mat INTEGER :: mrows, ncols INTEGER :: nnz = 0 ! Number of non-zeros INTEGER :: nterms ! Number of terms in weak form LOGICAL :: nlforce_zero ! Keep exixting nodes with zero value if .true. INTEGER, POINTER :: irow(:) => NULL() ! points to start of row INTEGER, POINTER :: idiag(:) => NULL() ! points to diagonal element INTEGER, POINTER :: cols(:) => NULL() ! Column indices DOUBLE PRECISION, POINTER :: val(:) => NULL() ! Elelement values TYPE(mumps_mat), ALLOCATABLE :: mumps END TYPE csr_mat ! TYPE, EXTENDS(zspmat) :: zcsr_mat INTEGER :: mrows, ncols INTEGER :: nnz = 0 ! Number of non-zeros INTEGER :: nterms ! Number of terms in weak form LOGICAL :: nlforce_zero ! Keep exixting nodes with zero value if .true. INTEGER, POINTER :: irow(:) => NULL() ! points to start of row INTEGER, POINTER :: idiag(:) => NULL() ! points to diagonal element INTEGER, POINTER :: cols(:) => NULL() ! Column indices DOUBLE COMPLEX, POINTER :: val(:) => NULL() ! Elelement values ! TYPE(zmumps_mat), ALLOCATABLE :: mumps END TYPE zcsr_mat ! INTERFACE init MODULE PROCEDURE init_csr_mat, init_zcsr_mat END INTERFACE init INTERFACE clear_mat MODULE PROCEDURE clear_csr_mat, clear_zcsr_mat END INTERFACE clear_mat INTERFACE updtmat MODULE PROCEDURE updt_csr_mat, updt_zcsr_mat END INTERFACE updtmat INTERFACE putele MODULE PROCEDURE putele_csr_mat, putele_zcsr_mat END INTERFACE putele INTERFACE getele MODULE PROCEDURE getele_csr_mat, getele_zcsr_mat END INTERFACE getele INTERFACE putrow MODULE PROCEDURE putrow_csr_mat, putrow_zcsr_mat END INTERFACE putrow INTERFACE getrow MODULE PROCEDURE getrow_csr_mat, getrow_zcsr_mat END INTERFACE getrow INTERFACE getdiag MODULE PROCEDURE getdiag_csr_mat, getdiag_zcsr_mat END INTERFACE getdiag INTERFACE putcol MODULE PROCEDURE putcol_csr_mat, putcol_zcsr_mat END INTERFACE putcol INTERFACE getcol MODULE PROCEDURE getcol_csr_mat, getcol_zcsr_mat END INTERFACE getcol INTERFACE to_mat MODULE PROCEDURE to_csr_mat, to_zcsr_mat END INTERFACE to_mat INTERFACE vmx MODULE PROCEDURE vmx_csr_mat, vmx_csr_matn, vmx_zcsr_mat, vmx_zcsr_matn END INTERFACE vmx INTERFACE destroy MODULE PROCEDURE destroy_csr_mat, destroy_zcsr_mat END INTERFACE destroy INTERFACE putmat MODULE PROCEDURE put_csr_mat, put_zcsr_mat END INTERFACE putmat !>>>>> !>>>>> CONMAT !>>>> INTERFACE conmat MODULE PROCEDURE conmat_1d_csr, conmat_2d_csr, conmat_1d_zcsr, conmat_2d_zcsr END INTERFACE conmat !>>>> !>>>> MULTIGRID_MOD !>>>> INTERFACE femat MODULE PROCEDURE femat_csr END INTERFACE femat INTERFACE matnorm MODULE PROCEDURE matnorm_csr END INTERFACE matnorm INTERFACE kron MODULE PROCEDURE kron_csr END INTERFACE kron !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_csr_mat(n, nterms, mat, nlforce_zero, ncols) ! ! Initialize an empty CSR matrix ! INTEGER, INTENT(in) :: n, nterms TYPE(csr_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero INTEGER, OPTIONAL, intent(in) :: ncols ! CALL init(n, mat%spmat) mat%mrows = n mat%ncols = n IF(PRESENT(ncols)) mat%ncols = ncols mat%nterms = nterms mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero ! END SUBROUTINE init_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_zcsr_mat(n, nterms, mat, nlforce_zero, ncols) ! ! Initialize an empty CSR matrix ! INTEGER, INTENT(in) :: n, nterms TYPE(zcsr_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero INTEGER, OPTIONAL, intent(in) :: ncols ! CALL init(n, mat%zspmat) mat%mrows = n mat%ncols = n IF(PRESENT(ncols)) mat%ncols = ncols mat%nterms = nterms mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero ! END SUBROUTINE init_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE clear_zcsr_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(zcsr_mat) :: mat ! mat%val = (0.0d0,0.0d0) END SUBROUTINE clear_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE clear_csr_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(csr_mat) :: mat ! mat%val = 0.0d0 END SUBROUTINE clear_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_csr_mat(mat, i, j, val) ! ! Update element Aij of csr matrix ! TYPE(csr_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: k, s, e ! IF(mat%nnz.EQ.0) THEN ! Still using linked lists CALL updtmat(mat%spmat, i, j, val) ELSE s = mat%irow(i) e = mat%irow(i+1)-1 k = isearch(mat%cols(s:e), j) IF( k.GE.0 ) THEN mat%val(s+k) = mat%val(s+k)+val ELSE WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE csr_mod ***' END IF END IF END SUBROUTINE updt_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_zcsr_mat(mat, i, j, val) ! ! Update element Aij of csr matrix ! TYPE(zcsr_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val INTEGER :: k, s, e ! IF(mat%nnz.EQ.0) THEN ! Still using linked lists CALL updtmat(mat%zspmat, i, j, val) ELSE s = mat%irow(i) e = mat%irow(i+1)-1 k = isearch(mat%cols(s:e), j) IF( k.GE.0 ) THEN mat%val(s+k) = mat%val(s+k)+val ELSE WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE csr_mod ***' END IF END IF END SUBROUTINE updt_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_csr_mat(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(csr_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: iput, jput INTEGER :: k, s, e ! iput = i jput = j ! IF(mat%nnz.EQ.0) THEN ! Still using linked lists CALL putele(mat%spmat, iput, jput, val, & & nlforce_zero=mat%nlforce_zero) ELSE s = mat%irow(iput) e = mat%irow(iput+1)-1 k = isearch(mat%cols(s:e), jput) IF( k.GE.0 ) THEN mat%val(s+k) = val ELSE IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j PRINT*, 'val', val PRINT*, 'matrix m, n', mat%mrows, mat%ncols STOP '*** Abnormal EXIT in MODULE csr_mod ***' END IF END IF END IF END SUBROUTINE putele_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_zcsr_mat(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(zcsr_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val INTEGER :: iput, jput INTEGER :: k, s, e ! iput = i jput = j ! IF(mat%nnz.EQ.0) THEN ! Still using linked lists CALL putele(mat%zspmat, iput, jput, val, & & nlforce_zero=mat%nlforce_zero) ELSE s = mat%irow(iput) e = mat%irow(iput+1)-1 k = isearch(mat%cols(s:e), jput) IF( k.GE.0 ) THEN mat%val(s+k) = val ELSE IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j PRINT*, 'val', val PRINT*, 'matrix m, n', mat%mrows, mat%ncols STOP '*** Abnormal EXIT in MODULE csr_mod ***' END IF END IF END IF END SUBROUTINE putele_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_csr_mat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(csr_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(out) :: val INTEGER :: iget, jget INTEGER :: k, s, e ! iget = i jget = j IF(mat%nnz.EQ.0) THEN ! Still using linked lists CALL getele(mat%spmat, iget, jget, val) ELSE s = mat%irow(iget) e = mat%irow(iget+1)-1 k = isearch(mat%cols(s:e), jget) IF( k.GE.0 ) THEN val =mat%val(s+k) ELSE val = 0.0d0 ! Assume zero val if not found END IF END IF END SUBROUTINE getele_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_zcsr_mat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(zcsr_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(out) :: val INTEGER :: iget, jget INTEGER :: k, s, e ! iget = i jget = j IF(mat%nnz.EQ.0) THEN ! Still using linked lists CALL getele(mat%zspmat, iget, jget, val) ELSE s = mat%irow(iget) e = mat%irow(iget+1)-1 k = isearch(mat%cols(s:e), jget) IF( k.GE.0 ) THEN val =mat%val(s+k) ELSE val = 0.0d0 ! Assume zero val if not found END IF END IF END SUBROUTINE getele_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_csr_mat(amat, i, arr) ! ! Put a row into sparse matrix ! TYPE(csr_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: s, e, j ! IF(amat%nnz.EQ.0) THEN ! Still using linked lists DO j=1,amat%ncols CALL putele(amat, i, j, arr(j)) END DO ELSE s = amat%irow(i) e = amat%irow(i+1)-1 DO j=s,e amat%val(j) = arr(amat%cols(j)) END DO END IF END SUBROUTINE putrow_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_zcsr_mat(amat, i, arr) ! ! Put a row into sparse matrix ! TYPE(zcsr_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER :: s, e, j ! IF(amat%nnz.EQ.0) THEN ! Still using linked lists DO j=1,amat%ncols CALL putele(amat, i, j, arr(j)) END DO ELSE s = amat%irow(i) e = amat%irow(i+1)-1 DO j=s,e amat%val(j) = arr(amat%cols(j)) END DO END IF END SUBROUTINE putrow_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_csr_mat(amat, i, arr) ! ! Get a row from sparse matrix ! TYPE(csr_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: s, e, j ! arr = 0.0d0 IF(amat%nnz.EQ.0) THEN ! Still using linked lists DO j=1,amat%ncols CALL getele(amat, i, j, arr(j)) END DO ELSE s = amat%irow(i) e = amat%irow(i+1)-1 DO j=s,e arr(amat%cols(j)) = amat%val(j) END DO END IF END SUBROUTINE getrow_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_zcsr_mat(amat, i, arr) ! ! Get a row from sparse matrix ! TYPE(zcsr_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER :: s, e, j ! arr = 0.0d0 IF(amat%nnz.EQ.0) THEN ! Still using linked lists DO j=1,amat%ncols CALL getele(amat, i, j, arr(j)) END DO ELSE s = amat%irow(i) e = amat%irow(i+1)-1 DO j=s,e arr(amat%cols(j)) = amat%val(j) END DO END IF END SUBROUTINE getrow_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getdiag_csr_mat(amat, arr) ! ! Get the diagonal from matrix ! TYPE(csr_mat), INTENT(in) :: amat DOUBLE PRECISION, INTENT(out) :: arr(:) ! ! WARNING: assume that CSR matrix has been converted from linked lists ! arr(:) = amat%val(amat%idiag(:)) END SUBROUTINE getdiag_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getdiag_zcsr_mat(amat, arr) ! ! Get the diagonal from matrix ! TYPE(zcsr_mat), INTENT(in) :: amat DOUBLE COMPLEX, INTENT(out) :: arr(:) ! ! WARNING: assume that CSR matrix has been converted from linked lists ! arr(:) = amat%val(amat%idiag(:)) END SUBROUTINE getdiag_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_csr_mat(amat, j, arr) ! ! Put a column into sparse matrix ! TYPE(csr_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: i ! DO i=1,amat%mrows CALL putele(amat, i, j, arr(i)) END DO END SUBROUTINE putcol_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_zcsr_mat(amat, j, arr) ! ! Put a column into sparse matrix ! TYPE(zcsr_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER :: i ! DO i=1,amat%mrows CALL putele(amat, i, j, arr(i)) END DO END SUBROUTINE putcol_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_csr_mat(amat, j, arr) ! ! Get a column from sparse matrix ! TYPE(csr_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: i ! DO i=1,amat%mrows CALL getele(amat, i, j, arr(i)) END DO END SUBROUTINE getcol_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_zcsr_mat(amat, j, arr) ! ! Get a column from sparse matrix ! TYPE(zcsr_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER :: i ! DO i=1,amat%mrows CALL getele(amat, i, j, arr(i)) END DO END SUBROUTINE getcol_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE to_csr_mat(mat, nlkeep) ! ! Convert linked list spmat to csr matrice structure ! TYPE(csr_mat) :: mat LOGICAL, INTENT(in), OPTIONAL :: nlkeep INTEGER :: nnz_arr(mat%rank) INTEGER :: i, nnz, rank, s, e LOGICAL :: nlclean ! nlclean = .TRUE. IF(PRESENT(nlkeep)) THEN nlclean = .NOT. nlkeep END IF ! ! Allocate the csr matrix structure ! nnz = get_count(mat%spmat, nnz_arr) rank = mat%rank mat%nnz = nnz IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag) IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%irow(rank+1)) ALLOCATE(mat%idiag(rank)) ALLOCATE(mat%cols(nnz)) ALLOCATE(mat%val(nnz)) ! ! Fill csr structure and optionally deallocate the sparse rows ! mat%irow = 1 DO i=1,rank mat%irow(i+1) = mat%irow(i) + nnz_arr(i) s = mat%irow(i) e = mat%irow(i+1)-1 CALL getrow(mat%spmat%row(i), mat%val(s:e), mat%cols(s:e)) mat%idiag(i) = isearch(mat%cols(s:e), i) + s IF(nlclean) CALL destroy(mat%spmat%row(i)) END DO !!$! !!$! MUMPS mat for direct solver !!$! !!$ ALLOCATE(mat%mumps) !!$ CALL csr2mumps(mat, mat%mumps) END SUBROUTINE to_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE to_zcsr_mat(mat, nlkeep) ! ! Convert linked list spmat to csr matrice structure ! TYPE(zcsr_mat) :: mat LOGICAL, INTENT(in), OPTIONAL :: nlkeep INTEGER :: nnz_arr(mat%rank) INTEGER :: i, nnz, rank, s, e LOGICAL :: nlclean ! nlclean = .TRUE. IF(PRESENT(nlkeep)) THEN nlclean = .NOT. nlkeep END IF ! ! Allocate the csr matrix structure ! nnz = get_count(mat%zspmat, nnz_arr) rank = mat%rank mat%nnz = nnz IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag) IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%irow(rank+1)) ALLOCATE(mat%idiag(rank)) ALLOCATE(mat%cols(nnz)) ALLOCATE(mat%val(nnz)) ! ! Fill csr structure and optionally deallocate the sparse rows ! mat%irow = 1 DO i=1,rank mat%irow(i+1) = mat%irow(i) + nnz_arr(i) s = mat%irow(i) e = mat%irow(i+1)-1 CALL getrow(mat%zspmat%row(i), mat%val(s:e), mat%cols(s:e)) mat%idiag(i) = isearch(mat%cols(s:e), i) + s IF(nlclean) CALL destroy(mat%zspmat%row(i)) END DO !!$! !!$! MUMPS mat for direct solver !!$! !!$ ALLOCATE(mat%mumps) !!$ CALL csr2mumps(mat, mat%mumps) END SUBROUTINE to_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE csr2mumps(mat, mat_mumps) ! ! Fill mumps structure (based on routine to_mumps_mat) ! INCLUDE 'mpif.h' TYPE(csr_mat) :: mat TYPE(mumps_mat) :: mat_mumps ! INTEGER :: i, rank, s, e INTEGER :: comm, ierr, nnz_loc ! CALL init(mat%rank, mat%nterms, mat_mumps) ! comm = mat_mumps%mumps_par%COMM mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix ! ! Allocate the Mumps matrix structure ! CSR format: (cols, irow, val) or (JCN, irow, A) ! COO format: (IRN, JCN, A) or (IRN, cols, val) ! rank = mat_mumps%rank nnz_loc = mat%nnz mat_mumps%nnz_start = 0 CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) mat_mumps%nnz_start = mat_mumps%nnz_start + 1 mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1 mat_mumps%nnz_loc = nnz_loc CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) ! mat_mumps%mumps_par%N = rank mat_mumps%mumps_par%NZ_loc = nnz_loc ! mat_mumps%cols => mat%cols mat_mumps%irow => mat%irow mat_mumps%val => mat%val ! ! (A,JCN) picked from CSR mat mat_mumps%mumps_par%A_loc => mat_mumps%val mat_mumps%mumps_par%JCN_loc => mat_mumps%cols ! ! Determine IRN array IF(ASSOCIATED(mat_mumps%mumps_par%IRN_loc)) DEALLOCATE(mat_mumps%mumps_par%IRN_loc) ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc)) DO i=mat_mumps%istart,mat_mumps%iend s = mat_mumps%irow(i) - mat_mumps%nnz_start + 1 e = mat_mumps%irow(i+1) - mat_mumps%nnz_start mat_mumps%mumps_par%IRN_loc(s:e) = i END DO CALL destroy(mat_mumps%mat) NULLIFY(mat_mumps%mat) ! END SUBROUTINE csr2mumps !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE zcsr2mumps(mat, mat_mumps) ! ! Fill mumps structure (based on routine to_mumps_mat) ! INCLUDE 'mpif.h' TYPE(zcsr_mat) :: mat TYPE(zmumps_mat) :: mat_mumps ! INTEGER :: i, rank, s, e INTEGER :: comm, ierr, nnz_loc ! CALL init(mat%rank, mat%nterms, mat_mumps) ! comm = mat_mumps%mumps_par%COMM mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix ! ! Allocate the Mumps matrix structure ! CSR format: (cols, irow, val) or (JCN, irow, A) ! COO format: (IRN, JCN, A) or (IRN, cols, val) ! rank = mat_mumps%rank nnz_loc = mat%nnz mat_mumps%nnz_start = 0 CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) mat_mumps%nnz_start = mat_mumps%nnz_start + 1 mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1 mat_mumps%nnz_loc = nnz_loc CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) ! mat_mumps%mumps_par%N = rank mat_mumps%mumps_par%NZ_loc = nnz_loc ! mat_mumps%cols => mat%cols mat_mumps%irow => mat%irow mat_mumps%val => mat%val ! ! (A,JCN) picked from CSR mat mat_mumps%mumps_par%A_loc => mat_mumps%val mat_mumps%mumps_par%JCN_loc => mat_mumps%cols ! ! Determine IRN array IF(ASSOCIATED(mat_mumps%mumps_par%IRN_loc)) DEALLOCATE(mat_mumps%mumps_par%IRN_loc) ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc)) DO i=mat_mumps%istart,mat_mumps%iend s = mat_mumps%irow(i) - mat_mumps%nnz_start + 1 e = mat_mumps%irow(i+1) - mat_mumps%nnz_start mat_mumps%mumps_par%IRN_loc(s:e) = i END DO CALL destroy(mat_mumps%mat) NULLIFY(mat_mumps%mat) ! END SUBROUTINE zcsr2mumps !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_csr_mat(mat) ! ! Deallocate csr mat ! TYPE(csr_mat) :: mat ! CALL destroy(mat%spmat) IF(mat%nnz.GT.0) THEN DEALLOCATE(mat%irow) DEALLOCATE(mat%idiag) DEALLOCATE(mat%cols) DEALLOCATE(mat%val) END IF mat%nnz = 0 END SUBROUTINE destroy_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_zcsr_mat(mat) ! ! Deallocate csr mat ! TYPE(zcsr_mat) :: mat ! CALL destroy(mat%zspmat) IF(mat%nnz.GT.0) THEN DEALLOCATE(mat%irow) DEALLOCATE(mat%idiag) DEALLOCATE(mat%cols) DEALLOCATE(mat%val) END IF mat%nnz = 0 END SUBROUTINE destroy_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_csr_mat(mat, xarr, transa_in) RESULT(yarr) ! ! Return product mat*x ! TYPE(csr_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:) CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in DOUBLE PRECISION :: yarr(SIZE(xarr)) ! DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 CHARACTER(len=1) :: transa CHARACTER(len=6) :: matdescra INTEGER :: n, i, j ! n = mat%rank transa = 'N' IF(PRESENT(transa_in)) transa = transa_in ! #ifdef MKL matdescra = 'g' CALL mkl_dcsrmv(transa, n, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, & & beta, yarr) #else yarr = 0.0d0 DO i=1,mat%rank DO j=mat%irow(i), mat%irow(i+1)-1 IF(transa .EQ. 'N') THEN yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) ELSE yarr(mat%cols(j)) = yarr(mat%cols(j)) + mat%val(j)*xarr(i) END IF END DO END DO #endif ! END FUNCTION vmx_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_zcsr_mat(mat, xarr, transa_in) RESULT(yarr) ! ! Return product mat*x ! TYPE(zcsr_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: xarr(:) CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in DOUBLE COMPLEX :: yarr(SIZE(xarr)) ! DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) CHARACTER(len=1) :: transa CHARACTER(len=6) :: matdescra INTEGER :: n, i, j ! n = mat%rank transa = 'N' IF(PRESENT(transa_in)) transa = transa_in ! #ifdef MKL matdescra = 'g' CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, & & beta, yarr) #else yarr = 0.0d0 DO i=1,mat%rank DO j=mat%irow(i), mat%irow(i+1)-1 IF(transa .EQ. 'N') THEN yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) ELSE yarr(mat%cols(j)) = yarr(mat%cols(j)) + mat%val(j)*xarr(i) END IF END DO END DO #endif ! END FUNCTION vmx_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_csr_matn(mat, xarr, transa_in) RESULT(yarr) ! ! Return product mat*x ! TYPE(csr_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:,:) CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 CHARACTER(len=1) :: transa INTEGER :: n, nrhs, i, j CHARACTER(len=6) :: matdescra ! n = mat%rank nrhs = SIZE(xarr,2) transa = 'N' IF(PRESENT(transa_in)) transa = transa_in ! #ifdef MKL matdescra = 'g' CALL mkl_dcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val,& & mat%cols, mat%irow(1), mat%irow(2), xarr, & & n, beta, yarr, n) #else yarr = 0.0d0 DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 IF(transa .EQ. 'N') THEN yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) ELSE yarr(mat%cols(j),:) = yarr(mat%cols(j),:) + mat%val(j)*xarr(i,:) END IF END DO END DO #endif ! END FUNCTION vmx_csr_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_zcsr_matn(mat, xarr, transa_in) RESULT(yarr) ! ! Return product mat*x ! TYPE(zcsr_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: xarr(:,:) CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in DOUBLE COMPLEX :: yarr(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) CHARACTER(len=1) :: transa INTEGER :: n, nrhs, i, j CHARACTER(len=6) :: matdescra ! n = mat%rank nrhs = SIZE(xarr,2) transa = 'N' IF(PRESENT(transa_in)) transa = transa_in ! #ifdef MKL matdescra = 'g' CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val,& & mat%cols, mat%irow(1), mat%irow(2), xarr, & & n, beta, yarr, n) #else yarr = (0.0d0,0.0d0) DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 IF(transa .EQ. 'N') THEN yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) ELSE yarr(mat%cols(j),:) = yarr(mat%cols(j),:) + mat%val(j)*xarr(i,:) END IF END DO END DO #endif ! END FUNCTION vmx_zcsr_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE conmat_1d_csr(spl, mat, coefeq, maxder) ! ! Construction of FE matrix mat for 1D differential operator ! using spline spl ! USE bsplines TYPE(csr_mat) :: mat TYPE(spline1d), INTENT(in) :: spl ! INCLUDE '../../bsplines/src/conmat_1d.tpl' END SUBROUTINE conmat_1d_csr !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE conmat_1d_zcsr(spl, mat, coefeq, maxder) ! ! Construction of FE matrix mat for 1D differential operator ! using spline spl ! USE bsplines TYPE(zcsr_mat) :: mat TYPE(spline1d), INTENT(in) :: spl ! INCLUDE '../../bsplines/src/zconmat_1d.tpl' END SUBROUTINE conmat_1d_zcsr !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE conmat_2d_csr(spl, mat, coefeq, maxder, nat_order) ! ! Construction of FE matrix mat for 2D differential operator ! using spline spl ! USE bsplines TYPE(spline2d), INTENT(in) :: spl TYPE(csr_mat) :: mat ! INCLUDE '../../bsplines/src/conmat.tpl' END SUBROUTINE conmat_2d_csr !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE conmat_2d_zcsr(spl, mat, coefeq, maxder, nat_order) ! ! Construction of FE matrix mat for 2D differential operator ! using spline spl ! USE bsplines TYPE(spline2d), INTENT(in) :: spl TYPE(zcsr_mat) :: mat ! INCLUDE '../../bsplines/src/zconmat.tpl' END SUBROUTINE conmat_2d_zcsr !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE femat_csr(spl, mat, coefeq, nterms) ! ! Compute fe matrix ! USE bsplines TYPE(spline1d), INTENT(in) :: spl TYPE(csr_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: nterms INTERFACE SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE ! INTEGER :: nrank, nx, nidbas ! CALL get_dim(spl, nrank, nx, nidbas) IF(spl%period) nrank = nx IF(mat%nnz.EQ.0) THEN WRITE(*,'(a,i0,a)') 'FEMAT: Initialize mat with ', & & nterms, ' terms ...' CALL init(nrank, nterms, mat) END IF CALL conmat(spl, mat, coefeq) END SUBROUTINE femat_csr !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE put_csr_mat(fid, label, mat, str) ! ! Write matrix to hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(csr_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'NNZ', mat%nnz) CALL putarr(fid, TRIM(label)//'/irow', mat%irow) CALL putarr(fid, TRIM(label)//'/cols', mat%cols) CALL putarr(fid, TRIM(label)//'/idiag', mat%idiag) CALL putarr(fid, TRIM(label)//'/val', mat%val) END SUBROUTINE put_csr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE put_zcsr_mat(fid, label, mat, str) ! ! Write matrix to hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(zcsr_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'NNZ', mat%nnz) CALL putarr(fid, TRIM(label)//'/irow', mat%irow) CALL putarr(fid, TRIM(label)//'/cols', mat%cols) CALL putarr(fid, TRIM(label)//'/idiag', mat%idiag) CALL putarr(fid, TRIM(label)//'/val', mat%val) END SUBROUTINE put_zcsr_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ DOUBLE PRECISION FUNCTION matnorm_csr(mat, p) ! ! Compute matrix norm ! TYPE(csr_mat), INTENT(in) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: p ! CHARACTER(len=4) :: norm_type INTEGER :: i, j DOUBLE PRECISION :: temp(mat%rank) ! norm_type = 'fro' IF(PRESENT(p)) norm_type = p ! SELECT CASE (norm_type) CASE ('inf') DO i=1,mat%rank temp(i) = SUM(ABS(mat%val(mat%irow(i):mat%irow(i+1)-1))) END DO matnorm_csr = MAXVAL(temp) CASE ('1') temp = 0.0d0 DO i=1,mat%rank DO j=mat%irow(i), mat%irow(i+1)-1 temp(mat%cols(j)) = temp(mat%cols(j)) + ABS(mat%val(j)) END DO END DO matnorm_csr = MAXVAL(temp) CASE('fro') matnorm_csr = SQRT(DOT_PRODUCT(mat%val, mat%val)) END SELECT END FUNCTION matnorm_csr !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE full_to_csr(fullmat, mat) ! ! Convert full rectangular matrix to csr mat ! DOUBLE PRECISION, INTENT(inout) :: fullmat(:,:) TYPE(csr_mat), INTENT(out) :: mat ! INTEGER :: m, n, nnz INTEGER :: i, j, k ! m = SIZE(fullmat,1) n = SIZE(fullmat,2) CALL init(m, 0, mat, ncols=n) ! ! Determine nnz of matrix ! IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag) IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ! ALLOCATE(mat%irow(m+1)) ALLOCATE(mat%idiag(m)) ! ! Clear matrix small elements of fullmat WHERE( ABS(fullmat) < 1.d-8) fullmat=0.0d0 ! mat%irow(1) = 1 nnz = 0 DO i=1,m DO j=1,n IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN nnz = nnz+1 IF(m.EQ.n .AND. i.EQ.j) THEN ! Only for square matrix mat%idiag(i) = nnz END IF END IF END DO mat%irow(i+1) = nnz+1 END DO ! ! Allocate and fill the csr matrix structure ! mat%nnz = nnz ALLOCATE(mat%cols(nnz)) ALLOCATE(mat%val(nnz)) k=0 DO i=1,m DO j=1,n IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN k=k+1 mat%cols(k) = j mat%val(k) = fullmat(i,j) END IF END DO END DO END SUBROUTINE full_to_csr !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE full_to_zcsr(fullmat, mat) ! ! Convert full rectangular matrix to csr mat ! DOUBLE COMPLEX, INTENT(inout) :: fullmat(:,:) TYPE(zcsr_mat), INTENT(out) :: mat ! INTEGER :: m, n, nnz INTEGER :: i, j, k ! m = SIZE(fullmat,1) n = SIZE(fullmat,2) CALL init(m, 0, mat, ncols=n) ! ! Determine nnz of matrix ! IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag) IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ! ALLOCATE(mat%irow(m+1)) ALLOCATE(mat%idiag(m)) ! ! Clear matrix small elements of fullmat WHERE( ABS(fullmat) < 1.d-8) fullmat=(0.0d0,0.0d0) ! mat%irow(1) = 1 nnz = 0 DO i=1,m DO j=1,n IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN nnz = nnz+1 IF(m.EQ.n .AND. i.EQ.j) THEN ! Only for square matrix mat%idiag(i) = nnz END IF END IF END DO mat%irow(i+1) = nnz+1 END DO ! ! Allocate and fill the csr matrix structure ! mat%nnz = nnz ALLOCATE(mat%cols(nnz)) ALLOCATE(mat%val(nnz)) k=0 DO i=1,m DO j=1,n IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN k=k+1 mat%cols(k) = j mat%val(k) = fullmat(i,j) END IF END DO END DO END SUBROUTINE full_to_zcsr !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE check_dom(mat) ! ! Check whether mat is strict diagonal dominabce. ! TYPE(csr_mat), INTENT(in) :: mat DOUBLE PRECISION :: arow(mat%rank), asum(mat%rank) INTEGER :: n, i, j1, j2, jdiag ! n = mat%rank DO i=1,n j1 = mat%irow(i) jdiag = mat%idiag(i) j2 = mat%irow(i+1)-1 asum(i) = SUM(ABS(mat%val(j1:j2))) / ABS(mat%val(jdiag)) - 1.0d0 END DO WRITE(*,'(/a,1pe12.3)') 'Max of sum of off-diag', MAXVAL(asum) ! END SUBROUTINE check_dom !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE kron_csr(mata, matb, matc) ! ! Kronecker product of 2 CSR matrices ! USE sparse, ONLY : isearch TYPE(csr_mat), INTENT(in) :: mata, matb TYPE(csr_mat), INTENT(out) :: matc ! INTEGER :: m1, n1, nnz1, m2, n2, nnz2, m, n, nnz INTEGER :: i,i1,i2,j1,s,s1,s2,e,e1,e2,k,nc2 ! m1 = mata%mrows n1 = mata%ncols nnz1 = mata%nnz m2 = matb%mrows n2 = matb%ncols nnz2 = matb%nnz m = m1*m2 n = n1*n2 nnz = nnz1*nnz2 ! CALL init(m, 0, matc, ncols=n) matc%nnz = nnz IF(ASSOCIATED(matc%irow)) DEALLOCATE(matc%irow) IF(ASSOCIATED(matc%idiag)) DEALLOCATE(matc%idiag) IF(ASSOCIATED(matc%cols)) DEALLOCATE(matc%cols) IF(ASSOCIATED(matc%val)) DEALLOCATE(matc%val) ALLOCATE(matc%irow(m+1)) IF(m.EQ.n) THEN ALLOCATE(matc%idiag(m)) ! Only for square matrices END IF ALLOCATE(matc%cols(nnz)) ALLOCATE(matc%val(nnz)) ! k = 0 matc%irow(1) = 1 DO i1=1,m1 s1=mata%irow(i1) e1=mata%irow(i1+1)-1 DO i2=1,m2 s2=matb%irow(i2) e2=matb%irow(i2+1)-1 nc2=e2-s2+1 DO j1=s1,e1 matc%val(k+1:k+nc2) = mata%val(j1)*matb%val(s2:e2) matc%cols(k+1:k+nc2) = (mata%cols(j1)-1)*n2 + matb%cols(s2:e2) k = k+nc2 matc%irow((i1-1)*m2+i2+1) = k+1 ! Points to next row END DO END DO END DO ! ! Search the diagonals DO i=1,matc%mrows s = matc%irow(i) e = matc%irow(i+1)-1 matc%idiag(i) = isearch(matc%cols(s:e),i) + s END DO END SUBROUTINE kron_csr !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE csr diff --git a/src/lapack_extra.f b/src/lapack_extra.f index 8f5248f..d96b286 100644 --- a/src/lapack_extra.f +++ b/src/lapack_extra.f @@ -1,718 +1,718 @@ !> !> @file lapack_extra.f !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> DOUBLE PRECISION FUNCTION DOPGB( SUBNAM, M, N, KL, KU, IPIV ) * * -- LAPACK timing routine (version 3.0) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * June 30, 1999 * * .. Scalar Arguments .. CHARACTER*6 SUBNAM INTEGER KL, KU, M, N * .. * .. Array Arguments .. INTEGER IPIV( * ) * .. * * Purpose * ======= * * DOPGB counts operations for the LU factorization of a band matrix * xGBTRF. * * Arguments * ========= * * SUBNAM (input) CHARACTER*6 * The name of the subroutine. * * M (input) INTEGER * The number of rows of the coefficient matrix. M >= 0. * * N (input) INTEGER * The number of columns of the coefficient matrix. N >= 0. * * KL (input) INTEGER * The number of subdiagonals of the matrix. KL >= 0. * * KU (input) INTEGER * The number of superdiagonals of the matrix. KU >= 0. * * IPIV (input) INTEGER array, dimension (min(M,N)) * The vector of pivot indices from DGBTRF or ZGBTRF. * * ===================================================================== * * .. Local Scalars .. LOGICAL CORZ, SORD CHARACTER C1 CHARACTER*2 C2 CHARACTER*3 C3 INTEGER I, J, JP, JU, KM DOUBLE PRECISION ADDFAC, ADDS, MULFAC, MULTS * .. * .. External Functions .. LOGICAL LSAME, LSAMEN EXTERNAL LSAME, LSAMEN * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * DOPGB = 0 MULTS = 0 ADDS = 0 C1 = SUBNAM( 1: 1 ) C2 = SUBNAM( 2: 3 ) C3 = SUBNAM( 4: 6 ) SORD = LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) CORZ = LSAME( C1, 'C' ) .OR. LSAME( C1, 'Z' ) IF( .NOT.( SORD .OR. CORZ ) ) $ RETURN IF( LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) ) THEN ADDFAC = 1 MULFAC = 1 ELSE ADDFAC = 2 MULFAC = 6 END IF * * -------------------------- * GB: General Band matrices * -------------------------- * IF( LSAMEN( 2, C2, 'GB' ) ) THEN * * xGBTRF: M, N, KL, KU => M, N, KL, KU * IF( LSAMEN( 3, C3, 'TRF' ) ) THEN JU = 1 DO 10 J = 1, MIN( M, N ) KM = MIN( KL, M-J ) JP = IPIV( J ) JU = MAX( JU, MIN( JP+KU, N ) ) IF( KM.GT.0 ) THEN MULTS = MULTS + KM*( 1+JU-J ) ADDS = ADDS + KM*( JU-J ) END IF 10 CONTINUE END IF * * --------------------------------- * GT: General Tridiagonal matrices * --------------------------------- * ELSE IF( LSAMEN( 2, C2, 'GT' ) ) THEN * * xGTTRF: N => M * IF( LSAMEN( 3, C3, 'TRF' ) ) THEN MULTS = 2*( M-1 ) ADDS = M - 1 DO 20 I = 1, M - 2 IF( IPIV( I ).NE.I ) $ MULTS = MULTS + 1 20 CONTINUE * * xGTTRS: N, NRHS => M, N * ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN MULTS = 4*N*( M-1 ) ADDS = 3*N*( M-1 ) * * xGTSV: N, NRHS => M, N * ELSE IF( LSAMEN( 3, C3, 'SV ' ) ) THEN MULTS = ( 4*N+2 )*( M-1 ) ADDS = ( 3*N+1 )*( M-1 ) DO 30 I = 1, M - 2 IF( IPIV( I ).NE.I ) $ MULTS = MULTS + 1 30 CONTINUE END IF END IF * DOPGB = MULFAC*MULTS + ADDFAC*ADDS RETURN * * End of DOPGB * END DOUBLE PRECISION FUNCTION DOPLA( SUBNAM, M, N, KL, KU, NB ) * * -- LAPACK timing routine (version 3.0) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * June 30, 1999 * * .. Scalar Arguments .. CHARACTER*6 SUBNAM INTEGER KL, KU, M, N, NB * .. * * Purpose * ======= * * DOPLA computes an approximation of the number of floating point * operations used by the subroutine SUBNAM with the given values * of the parameters M, N, KL, KU, and NB. * * This version counts operations for the LAPACK subroutines. * * Arguments * ========= * * SUBNAM (input) CHARACTER*6 * The name of the subroutine. * * M (input) INTEGER * The number of rows of the coefficient matrix. M >= 0. * * N (input) INTEGER * The number of columns of the coefficient matrix. * For solve routine when the matrix is square, * N is the number of right hand sides. N >= 0. * * KL (input) INTEGER * The lower band width of the coefficient matrix. * If needed, 0 <= KL <= M-1. * For xGEQRS, KL is the number of right hand sides. * * KU (input) INTEGER * The upper band width of the coefficient matrix. * If needed, 0 <= KU <= N-1. * * NB (input) INTEGER * The block size. If needed, NB >= 1. * * Notes * ===== * * In the comments below, the association is given between arguments * in the requested subroutine and local arguments. For example, * * xGETRS: N, NRHS => M, N * * means that arguments N and NRHS in DGETRS are passed to arguments * M and N in this procedure. * * ===================================================================== * * .. Local Scalars .. LOGICAL CORZ, SORD CHARACTER C1 CHARACTER*2 C2 CHARACTER*3 C3 INTEGER I DOUBLE PRECISION ADDFAC, ADDS, EK, EM, EMN, EN, MULFAC, MULTS, $ WL, WU * .. * .. External Functions .. LOGICAL LSAME, LSAMEN EXTERNAL LSAME, LSAMEN * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. * .. Executable Statements .. * * -------------------------------------------------------- * Initialize DOPLA to 0 and do a quick return if possible. * -------------------------------------------------------- * DOPLA = 0 MULTS = 0 ADDS = 0 C1 = SUBNAM( 1: 1 ) C2 = SUBNAM( 2: 3 ) C3 = SUBNAM( 4: 6 ) SORD = LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) CORZ = LSAME( C1, 'C' ) .OR. LSAME( C1, 'Z' ) IF( M.LE.0 .OR. .NOT.( SORD .OR. CORZ ) ) $ RETURN * * --------------------------------------------------------- * If the coefficient matrix is real, count each add as 1 * operation and each multiply as 1 operation. * If the coefficient matrix is complex, count each add as 2 * operations and each multiply as 6 operations. * --------------------------------------------------------- * IF( LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) ) THEN ADDFAC = 1 MULFAC = 1 ELSE ADDFAC = 2 MULFAC = 6 END IF EM = M EN = N EK = KL * * --------------------------------- * GE: GEneral rectangular matrices * --------------------------------- * IF( LSAMEN( 2, C2, 'GE' ) ) THEN * * xGETRF: M, N => M, N * IF( LSAMEN( 3, C3, 'TRF' ) ) THEN EMN = MIN( M, N ) ADDS = EMN*( EM*EN-( EM+EN )*( EMN+1.D0 ) / 2.D0+ $ ( EMN+1.D0 )*( 2.D0*EMN+1.D0 ) / 6.D0 ) MULTS = ADDS + EMN*( EM-( EMN+1.D0 ) / 2.D0 ) * * xGETRS: N, NRHS => M, N * ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN MULTS = EN*EM*EM ADDS = EN*( EM*( EM-1.D0 ) ) * * xGETRI: N => M * ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN MULTS = EM*( 5.D0 / 6.D0+EM*( 1.D0 / 2.D0+EM*( 2.D0 / $ 3.D0 ) ) ) ADDS = EM*( 5.D0 / 6.D0+EM*( -3.D0 / 2.D0+EM*( 2.D0 / $ 3.D0 ) ) ) * * xGEQRF or xGEQLF: M, N => M, N * ELSE IF( LSAMEN( 3, C3, 'QRF' ) .OR. $ LSAMEN( 3, C3, 'QR2' ) .OR. $ LSAMEN( 3, C3, 'QLF' ) .OR. LSAMEN( 3, C3, 'QL2' ) ) $ THEN IF( M.GE.N ) THEN MULTS = EN*( ( ( 23.D0 / 6.D0 )+EM+EN / 2.D0 )+EN* $ ( EM-EN / 3.D0 ) ) ADDS = EN*( ( 5.D0 / 6.D0 )+EN* $ ( 1.D0 / 2.D0+( EM-EN / 3.D0 ) ) ) ELSE MULTS = EM*( ( ( 23.D0 / 6.D0 )+2.D0*EN-EM / 2.D0 )+EM* $ ( EN-EM / 3.D0 ) ) ADDS = EM*( ( 5.D0 / 6.D0 )+EN-EM / 2.D0+EM* $ ( EN-EM / 3.D0 ) ) END IF * * xGERQF or xGELQF: M, N => M, N * ELSE IF( LSAMEN( 3, C3, 'RQF' ) .OR. $ LSAMEN( 3, C3, 'RQ2' ) .OR. $ LSAMEN( 3, C3, 'LQF' ) .OR. LSAMEN( 3, C3, 'LQ2' ) ) $ THEN IF( M.GE.N ) THEN MULTS = EN*( ( ( 29.D0 / 6.D0 )+EM+EN / 2.D0 )+EN* $ ( EM-EN / 3.D0 ) ) ADDS = EN*( ( 5.D0 / 6.D0 )+EM+EN* $ ( -1.D0 / 2.D0+( EM-EN / 3.D0 ) ) ) ELSE MULTS = EM*( ( ( 29.D0 / 6.D0 )+2.D0*EN-EM / 2.D0 )+EM* $ ( EN-EM / 3.D0 ) ) ADDS = EM*( ( 5.D0 / 6.D0 )+EM / 2.D0+EM* $ ( EN-EM / 3.D0 ) ) END IF * * xGEQPF: M, N => M, N * ELSE IF( LSAMEN( 3, C3, 'QPF' ) ) THEN EMN = MIN( M, N ) MULTS = 2*EN*EN + EMN*( 3*EM+5*EN+2*EM*EN-( EMN+1 )* $ ( 4+EN+EM-( 2*EMN+1 ) / 3 ) ) ADDS = EN*EN + EMN*( 2*EM+EN+2*EM*EN-( EMN+1 )* $ ( 2+EN+EM-( 2*EMN+1 ) / 3 ) ) * * xGEQRS or xGERQS: M, N, NRHS => M, N, KL * ELSE IF( LSAMEN( 3, C3, 'QRS' ) .OR. LSAMEN( 3, C3, 'RQS' ) ) $ THEN MULTS = EK*( EN*( 2.D0-EK )+EM* $ ( 2.D0*EN+( EM+1.D0 ) / 2.D0 ) ) ADDS = EK*( EN*( 1.D0-EK )+EM* $ ( 2.D0*EN+( EM-1.D0 ) / 2.D0 ) ) * * xGELQS or xGEQLS: M, N, NRHS => M, N, KL * ELSE IF( LSAMEN( 3, C3, 'LQS' ) .OR. LSAMEN( 3, C3, 'QLS' ) ) $ THEN MULTS = EK*( EM*( 2.D0-EK )+EN* $ ( 2.D0*EM+( EN+1.D0 ) / 2.D0 ) ) ADDS = EK*( EM*( 1.D0-EK )+EN* $ ( 2.D0*EM+( EN-1.D0 ) / 2.D0 ) ) * * xGEBRD: M, N => M, N * ELSE IF( LSAMEN( 3, C3, 'BRD' ) ) THEN IF( M.GE.N ) THEN MULTS = EN*( 20.D0 / 3.D0+EN* $ ( 2.D0+( 2.D0*EM-( 2.D0 / 3.D0 )*EN ) ) ) ADDS = EN*( 5.D0 / 3.D0+( EN-EM )+EN* $ ( 2.D0*EM-( 2.D0 / 3.D0 )*EN ) ) ELSE MULTS = EM*( 20.D0 / 3.D0+EM* $ ( 2.D0+( 2.D0*EN-( 2.D0 / 3.D0 )*EM ) ) ) ADDS = EM*( 5.D0 / 3.D0+( EM-EN )+EM* $ ( 2.D0*EN-( 2.D0 / 3.D0 )*EM ) ) END IF * * xGEHRD: N => M * ELSE IF( LSAMEN( 3, C3, 'HRD' ) ) THEN IF( M.EQ.1 ) THEN MULTS = 0.D0 ADDS = 0.D0 ELSE MULTS = -13.D0 + EM*( -7.D0 / 6.D0+EM* $ ( 0.5D0+EM*( 5.D0 / 3.D0 ) ) ) ADDS = -8.D0 + EM*( -2.D0 / 3.D0+EM* $ ( -1.D0+EM*( 5.D0 / 3.D0 ) ) ) END IF * END IF * * ---------------------------- * GB: General Banded matrices * ---------------------------- * Note: The operation count is overestimated because * it is assumed that the factor U fills in to the maximum * extent, i.e., that its bandwidth goes from KU to KL + KU. * ELSE IF( LSAMEN( 2, C2, 'GB' ) ) THEN * * xGBTRF: M, N, KL, KU => M, N, KL, KU * IF( LSAMEN( 3, C3, 'TRF' ) ) THEN DO 10 I = MIN( M, N ), 1, -1 WL = MAX( 0, MIN( KL, M-I ) ) WU = MAX( 0, MIN( KL+KU, N-I ) ) MULTS = MULTS + WL*( 1.D0+WU ) ADDS = ADDS + WL*WU 10 CONTINUE * * xGBTRS: N, NRHS, KL, KU => M, N, KL, KU * ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN WL = MAX( 0, MIN( KL, M-1 ) ) WU = MAX( 0, MIN( KL+KU, M-1 ) ) MULTS = EN*( EM*( WL+1.D0+WU )-0.5D0* $ ( WL*( WL+1.D0 )+WU*( WU+1.D0 ) ) ) ADDS = EN*( EM*( WL+WU )-0.5D0* $ ( WL*( WL+1.D0 )+WU*( WU+1.D0 ) ) ) * END IF * * -------------------------------------- * PO: POsitive definite matrices * PP: Positive definite Packed matrices * -------------------------------------- * ELSE IF( LSAMEN( 2, C2, 'PO' ) .OR. LSAMEN( 2, C2, 'PP' ) ) THEN * * xPOTRF: N => M * IF( LSAMEN( 3, C3, 'TRF' ) ) THEN MULTS = EM*( 1.D0 / 3.D0+EM*( 1.D0 / 2.D0+EM*( 1.D0 / $ 6.D0 ) ) ) ADDS = ( 1.D0 / 6.D0 )*EM*( -1.D0+EM*EM ) * * xPOTRS: N, NRHS => M, N * ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN MULTS = EN*( EM*( EM+1.D0 ) ) ADDS = EN*( EM*( EM-1.D0 ) ) * * xPOTRI: N => M * ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN MULTS = EM*( 2.D0 / 3.D0+EM*( 1.D0+EM*( 1.D0 / 3.D0 ) ) ) ADDS = EM*( 1.D0 / 6.D0+EM*( -1.D0 / 2.D0+EM*( 1.D0 / $ 3.D0 ) ) ) * END IF * * ------------------------------------ * PB: Positive definite Band matrices * ------------------------------------ * ELSE IF( LSAMEN( 2, C2, 'PB' ) ) THEN * * xPBTRF: N, K => M, KL * IF( LSAMEN( 3, C3, 'TRF' ) ) THEN MULTS = EK*( -2.D0 / 3.D0+EK*( -1.D0+EK*( -1.D0 / 3.D0 ) ) ) $ + EM*( 1.D0+EK*( 3.D0 / 2.D0+EK*( 1.D0 / 2.D0 ) ) ) ADDS = EK*( -1.D0 / 6.D0+EK*( -1.D0 / 2.D0+EK*( -1.D0 / $ 3.D0 ) ) ) + EM*( EK / 2.D0*( 1.D0+EK ) ) * * xPBTRS: N, NRHS, K => M, N, KL * ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN MULTS = EN*( ( 2*EM-EK )*( EK+1.D0 ) ) ADDS = EN*( EK*( 2*EM-( EK+1.D0 ) ) ) * END IF * * ---------------------------------- * PT: Positive definite Tridiagonal * ---------------------------------- * ELSE IF( LSAMEN( 2, C2, 'PT' ) ) THEN * * xPTTRF: N => M * IF( LSAMEN( 3, C3, 'TRF' ) ) THEN MULTS = 2*( EM-1 ) ADDS = EM - 1 * * xPTTRS: N, NRHS => M, N * ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN MULTS = EN*( 3*EM-2 ) ADDS = EN*( 2*( EM-1 ) ) * * xPTSV: N, NRHS => M, N * ELSE IF( LSAMEN( 3, C3, 'SV ' ) ) THEN MULTS = 2*( EM-1 ) + EN*( 3*EM-2 ) ADDS = EM - 1 + EN*( 2*( EM-1 ) ) END IF * * -------------------------------------------------------- * SY: SYmmetric indefinite matrices * SP: Symmetric indefinite Packed matrices * HE: HErmitian indefinite matrices (complex only) * HP: Hermitian indefinite Packed matrices (complex only) * -------------------------------------------------------- * ELSE IF( LSAMEN( 2, C2, 'SY' ) .OR. LSAMEN( 2, C2, 'SP' ) .OR. $ LSAMEN( 3, SUBNAM, 'CHE' ) .OR. $ LSAMEN( 3, SUBNAM, 'ZHE' ) .OR. $ LSAMEN( 3, SUBNAM, 'CHP' ) .OR. $ LSAMEN( 3, SUBNAM, 'ZHP' ) ) THEN * * xSYTRF: N => M * IF( LSAMEN( 3, C3, 'TRF' ) ) THEN MULTS = EM*( 10.D0 / 3.D0+EM* $ ( 1.D0 / 2.D0+EM*( 1.D0 / 6.D0 ) ) ) ADDS = EM / 6.D0*( -1.D0+EM*EM ) * * xSYTRS: N, NRHS => M, N * ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN MULTS = EN*EM*EM ADDS = EN*( EM*( EM-1.D0 ) ) * * xSYTRI: N => M * ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN MULTS = EM*( 2.D0 / 3.D0+EM*EM*( 1.D0 / 3.D0 ) ) ADDS = EM*( -1.D0 / 3.D0+EM*EM*( 1.D0 / 3.D0 ) ) * * xSYTRD, xSYTD2: N => M * ELSE IF( LSAMEN( 3, C3, 'TRD' ) .OR. LSAMEN( 3, C3, 'TD2' ) ) $ THEN IF( M.EQ.1 ) THEN MULTS = 0.D0 ADDS = 0.D0 ELSE MULTS = -15.D0 + EM*( -1.D0 / 6.D0+EM* $ ( 5.D0 / 2.D0+EM*( 2.D0 / 3.D0 ) ) ) ADDS = -4.D0 + EM*( -8.D0 / 3.D0+EM* $ ( 1.D0+EM*( 2.D0 / 3.D0 ) ) ) END IF END IF * * ------------------- * Triangular matrices * ------------------- * ELSE IF( LSAMEN( 2, C2, 'TR' ) .OR. LSAMEN( 2, C2, 'TP' ) ) THEN * * xTRTRS: N, NRHS => M, N * IF( LSAMEN( 3, C3, 'TRS' ) ) THEN MULTS = EN*EM*( EM+1.D0 ) / 2.D0 ADDS = EN*EM*( EM-1.D0 ) / 2.D0 * * xTRTRI: N => M * ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN MULTS = EM*( 1.D0 / 3.D0+EM*( 1.D0 / 2.D0+EM*( 1.D0 / $ 6.D0 ) ) ) ADDS = EM*( 1.D0 / 3.D0+EM*( -1.D0 / 2.D0+EM*( 1.D0 / $ 6.D0 ) ) ) * END IF * ELSE IF( LSAMEN( 2, C2, 'TB' ) ) THEN * * xTBTRS: N, NRHS, K => M, N, KL * IF( LSAMEN( 3, C3, 'TRS' ) ) THEN MULTS = EN*( EM*( EM+1.D0 ) / 2.D0-( EM-EK-1.D0 )* $ ( EM-EK ) / 2.D0 ) ADDS = EN*( EM*( EM-1.D0 ) / 2.D0-( EM-EK-1.D0 )*( EM-EK ) / $ 2.D0 ) END IF * * -------------------- * Trapezoidal matrices * -------------------- * ELSE IF( LSAMEN( 2, C2, 'TZ' ) ) THEN * * xTZRQF: M, N => M, N * IF( LSAMEN( 3, C3, 'RQF' ) ) THEN EMN = MIN( M, N ) MULTS = 3*EM*( EN-EM+1 ) + ( 2*EN-2*EM+3 )* $ ( EM*EM-EMN*( EMN+1 ) / 2 ) ADDS = ( EN-EM+1 )*( EM+2*EM*EM-EMN*( EMN+1 ) ) END IF * * ------------------- * Orthogonal matrices * ------------------- * ELSE IF( ( SORD .AND. LSAMEN( 2, C2, 'OR' ) ) .OR. $ ( CORZ .AND. LSAMEN( 2, C2, 'UN' ) ) ) THEN * * -MQR, -MLQ, -MQL, or -MRQ: M, N, K, SIDE => M, N, KL, KU * where KU<= 0 indicates SIDE = 'L' * and KU> 0 indicates SIDE = 'R' * IF( LSAMEN( 3, C3, 'MQR' ) .OR. LSAMEN( 3, C3, 'MLQ' ) .OR. $ LSAMEN( 3, C3, 'MQL' ) .OR. LSAMEN( 3, C3, 'MRQ' ) ) THEN IF( KU.LE.0 ) THEN MULTS = EK*EN*( 2.D0*EM+2.D0-EK ) ADDS = EK*EN*( 2.D0*EM+1.D0-EK ) ELSE MULTS = EK*( EM*( 2.D0*EN-EK )+ $ ( EM+EN+( 1.D0-EK ) / 2.D0 ) ) ADDS = EK*EM*( 2.D0*EN+1.D0-EK ) END IF * * -GQR or -GQL: M, N, K => M, N, KL * ELSE IF( LSAMEN( 3, C3, 'GQR' ) .OR. LSAMEN( 3, C3, 'GQL' ) ) $ THEN MULTS = EK*( -5.D0 / 3.D0+( 2.D0*EN-EK )+ $ ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) ) ADDS = EK*( 1.D0 / 3.D0+( EN-EM )+ $ ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) ) * * -GLQ or -GRQ: M, N, K => M, N, KL * ELSE IF( LSAMEN( 3, C3, 'GLQ' ) .OR. LSAMEN( 3, C3, 'GRQ' ) ) $ THEN MULTS = EK*( -2.D0 / 3.D0+( EM+EN-EK )+ $ ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) ) ADDS = EK*( 1.D0 / 3.D0+( EM-EN )+ $ ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) ) * END IF * END IF * DOPLA = MULFAC*MULTS + ADDFAC*ADDS * RETURN * * End of DOPLA * END LOGICAL FUNCTION LSAMEN( N, CA, CB ) * * -- LAPACK auxiliary routine (version 2.0) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * September 30, 1994 * * .. Scalar Arguments .. CHARACTER*( * ) CA, CB INTEGER N * .. * * Purpose * ======= * * LSAMEN tests if the first N letters of CA are the same as the * first N letters of CB, regardless of case. * LSAMEN returns .TRUE. if CA and CB are equivalent except for case * and .FALSE. otherwise. LSAMEN also returns .FALSE. if LEN( CA ) * or LEN( CB ) is less than N. * * Arguments * ========= * * N (input) INTEGER * The number of characters in CA and CB to be compared. * * CA (input) CHARACTER*(*) * CB (input) CHARACTER*(*) * CA and CB specify two character strings of length at least N. * Only the first N characters of each string will be accessed. * * ===================================================================== * * .. Local Scalars .. INTEGER I * .. * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. Intrinsic Functions .. INTRINSIC LEN * .. * .. Executable Statements .. * LSAMEN = .FALSE. IF( LEN( CA ).LT.N .OR. LEN( CB ).LT.N ) $ GO TO 20 * * Do for each character in the two strings. * DO 10 I = 1, N * * Test if the characters are equal using LSAME. * IF( .NOT.LSAME( CA( I: I ), CB( I: I ) ) ) $ GO TO 20 * 10 CONTINUE LSAMEN = .TRUE. * 20 CONTINUE RETURN * * End of LSAMEN * END diff --git a/src/math_util.f90 b/src/math_util.f90 index 60817ff..6f4a4e5 100644 --- a/src/math_util.f90 +++ b/src/math_util.f90 @@ -1,291 +1,291 @@ !> !> @file math_util.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE math_util ! ! MATH_UTIL: Some math utilities. ! ! T.M. Tran, CRPP-EPFL ! December 2012 ! ! ! Notes: ! - Assume the Fortran 2008 intrinsic BESSEL_JN(n,x) exists! ! IMPLICIT NONE DOUBLE PRECISION, PARAMETER :: pi=4.0d0*ATAN(1.0d0) ! CONTAINS ELEMENTAL FUNCTION bessjp(n,x) ! ! Derivative of J_n ! DOUBLE PRECISION :: bessjp INTEGER, INTENT(in) :: n DOUBLE PRECISION, INTENT(in) :: x ! IF(n.EQ.0) THEN bessjp = -bessel_jn(1,x) ELSE bessjp = 0.5d0*(bessel_jn(n-1,x)-bessel_jn(n+1,x)) END IF END FUNCTION bessjp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION root_bessj(n, s, info) ! ! s^th root of j_n ! DOUBLE PRECISION :: root_bessj INTEGER, INTENT(in) :: n INTEGER, INTENT(in) :: s INTEGER, OPTIONAL, INTENT(out) :: info ! DOUBLE PRECISION :: b0,b1,b2,b3,b5,b7,t0,t1,t3,t5,t7,fn,fk,f1,f2,f3 DOUBLE PRECISION :: c1=1.8557571d0, c2=1.033150d0, c3=.00397d0, c4=.0908d0,& & c5=.043d0 DOUBLE PRECISION :: zero INTEGER :: iter ! fn = REAL(ABS(n),8) IF(s.EQ.1) THEN ! first zero IF(n.EQ.0) THEN zero = c1+c2-c3-c4+c5 ELSE f1 = fn**(1.d0/3.d0) f2 = f1*f1*fn f3 = f1*fn*fn zero = fn+c1*f1+(c2/f1)-(c3/fn)-(c4/f2)+(c5/f3) END IF ELSE ! Other zeros t0 = 4.d0*fn*fn t1 = t0-1.d0 t3 = 4.d0*t1*(7.d0*t0-31.d0) t5 = 32.d0*t1*((83.d0*t0-982.d0)*t0+3779.d0) t7 = 64.d0*t1*(((6949.d0*t0-153855.d0)*t0+1585743.d0)*t0 & -6277237.d0) fk = REAL(s,8) ! b0 = (fk+.5d0*fn-.25d0)*pi! mac mahon's series for k>>n b1 = 8.d0*b0 b2 = b1*b1 b3 = 3.d0*b1*b2 b5 = 5.d0*b3*b2 b7 = 7.d0*b5*b2 zero = b0-(t1/b1)-(t3/b3)-(t5/b5)-(t7/b7) END IF CALL newton(iter) IF(PRESENT(info)) info = iter root_bessj = zero CONTAINS SUBROUTINE newton(iter) INTEGER, INTENT(out) :: iter INTEGER :: itermx = 20 DOUBLE PRECISION :: dx, tol tol = EPSILON(1.0d0)*zero iter = 0 DO iter = iter+1 dx = -bessel_jn(n,zero)/bessjp(n,zero) zero = zero+dx IF(iter.GE.itermx .OR. ABS(dx).LT.tol) EXIT END DO END SUBROUTINE newton END FUNCTION root_bessj !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION root_bessjp(n, s, info) ! ! s^th root of derivative of j_n ! DOUBLE PRECISION :: root_bessjp INTEGER, INTENT(in) :: n INTEGER, INTENT(in) :: s INTEGER, OPTIONAL, INTENT(out) :: info ! DOUBLE PRECISION :: c1=0.8086165D0, c2=0.072490D0, c3=.05097D0, c4=.0094D0 DOUBLE PRECISION :: b0,b1,b2,b3,b5,b7,t0,t1,t3,t5,t7,fn,fk,f1,f2 INTEGER :: iter DOUBLE PRECISION :: zero ! IF(n.EQ.0 .AND. s.EQ.1) THEN root_bessjp = 0.0d0 IF(PRESENT(info)) info = 0 RETURN END IF ! fn = REAL(ABS(n),8) fk = REAL(s,8) ! IF(s.GT.1) THEN ! ! McMahon's series for s >> n b0 = (fk+.5d0*fn-.75d0)*pi b1 = 8.d0*b0 b2 = b1*b1 b3 = 3.d0*b1*b2 b5 = 5.d0*b3*b2 b7 = 7.d0*b5*b2 t0 = 4.d0*fn*fn t1 = t0+3.d0 t3 = 4.d0*((7.d0*t0+82.d0)*t0-9.d0) t5 = 32.d0*(((83.d0*t0+2075.d0)*t0-3039.d0)*t0+3537.d0) t7 = 64.d0*((((6949.d0*t0+296492.d0)*t0-1248002.d0)*t0 & +7414380.d0)*t0-5853627.d0) zero = b0-(t1/b1)-(t3/b3)-(t5/b5)-(t7/b7) ELSE ! ! Tchebychev's series for s <= n f1 = fn**(1.d0/3.d0) f2 = f1*f1*fn zero = fn+c1*f1+(c2/f1)-(c3/fn)+(c4/f2) END IF ! CALL newton(iter) root_bessjp = zero IF(PRESENT(info)) info = iter CONTAINS SUBROUTINE newton(iter) INTEGER, INTENT(out) :: iter INTEGER :: itermx = 20 DOUBLE PRECISION :: dx, tol tol = EPSILON(1.0d0)*zero iter = 0 DO iter = iter+1 dx = -bessel_jn(n,zero)/bessjp(n,zero) dx = -2.0d0 * (bessel_jn(n-1,zero)-bessel_jn(n+1,zero)) / & & (bessel_jn(n-2,zero)-2.d0*bessel_jn(n,zero)+& & bessel_jn(n+2,zero)) zero = zero+dx IF(iter.GE.itermx .OR. ABS(dx).LT.tol) EXIT END DO END SUBROUTINE newton END FUNCTION root_bessjp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ !!$ PURE FUNCTION BESSEL_JN(n,x) RESULT(bessj) !!$ DOUBLE PRECISION, INTENT(in) :: x !!$ DOUBLE PRECISION :: bessj !!$ DOUBLE PRECISION BIGNO,BIGNI !!$ INTEGER n,IACC !!$ PARAMETER (IACC=40,BIGNO=1.d10,BIGNI=1.d-10) !!$ INTEGER j,jsum,m !!$ DOUBLE PRECISION ax,bj,bjm,bjp,sum,tox,bessj0,bessj1 !!$ IF( n.EQ.0 ) THEN !!$ bessj = bessj0(x) !!$ RETURN !!$ ELSE IF( n.EQ.1 ) THEN !!$ bessj = bessj1(x) !!$ RETURN !!$ ENDIF !!$ ax=ABS(x) !!$ IF(ax.EQ.0.d0)THEN !!$ bessj=0.d0 !!$ ELSE IF(ax.GT.float(n))THEN !!$ tox=2./ax !!$ bjm=bessj0(ax) !!$ bj=bessj1(ax) !!$ DO j=1,n-1 !!$ bjp=j*tox*bj-bjm !!$ bjm=bj !!$ bj=bjp !!$ END DO !!$ bessj=bj !!$ ELSE !!$ tox=2./ax !!$ m=2*((n+INT(SQRT(float(IACC*n))))/2) !!$ bessj=0.d0 !!$ jsum=0 !!$ sum=0.d0 !!$ bjp=0.d0 !!$ bj=1. !!$ DO j=m,1,-1 !!$ bjm=j*tox*bj-bjp !!$ bjp=bj !!$ bj=bjm !!$ IF(ABS(bj).GT.BIGNO)THEN !!$ bj=bj*BIGNI !!$ bjp=bjp*BIGNI !!$ bessj=bessj*BIGNI !!$ sum=sum*BIGNI !!$ ENDIF !!$ IF(jsum.NE.0)sum=sum+bj !!$ jsum=1-jsum !!$ IF(j.EQ.n)bessj=bjp !!$ END DO !!$ sum=2.*sum-bj !!$ bessj=bessj/sum !!$ ENDIF !!$ IF(x.LT.0.d0.AND.MOD(n,2).EQ.1)bessj=-bessj !!$ RETURN !!$ END FUNCTION bessel_jn !!$ !!$ PURE FUNCTION bessj0(x) !!$ DOUBLE PRECISION, INTENT(in) :: x !!$ DOUBLE PRECISION bessj0 !!$ DOUBLE PRECISION ax,xx,z !!$ DOUBLE PRECISION p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6,y !!$ SAVE p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6 !!$ DATA p1,p2,p3,p4,p5/1.d0,-.1098628627d-2,.2734510407d-4,-.2073370639d-5,.2093887211d-6/ !!$ DATA q1,q2,q3,q4,q5/-.1562499995d-1,.1430488765d-3,-.6911147651d-5,.7621095161d-6,-.934945152d-7/ !!$ DATA r1,r2,r3,r4,r5,r6/57568490574.d0,-13362590354.d0,651619640.7d0,-11214424.18d0,& !!$ & 77392.33017d0,-184.9052456d0/ !!$ DATA s1,s2,s3,s4,s5,s6/57568490411.d0,1029532985.d0,9494680.718d0,59272.64853d0,267.8532712d0,1.d0/ !!$ IF(ABS(x).LT.8.)THEN !!$ y=x**2 !!$ bessj0=(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))))/(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))) !!$ ELSE !!$ ax=ABS(x) !!$ z=8./ax !!$ y=z**2 !!$ xx=ax-.785398164 !!$ bessj0=SQRT(.636619772/ax)*(COS(xx)*(p1+y*(p2+y*(p3+y*(p4+y*p5))))-z*SIN(xx)*(q1+y*(q2+y*(q3+y*(q4+y*q5))))) !!$ ENDIF !!$ RETURN !!$ END FUNCTION bessj0 !!$ !!$ PURE FUNCTION bessj1(x) !!$ DOUBLE PRECISION, INTENT(in) :: x !!$ DOUBLE PRECISION bessj1 !!$ DOUBLE PRECISION ax,xx,z !!$ DOUBLE PRECISION p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6,y !!$ SAVE p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6 !!$ DATA r1,r2,r3,r4,r5,r6/72362614232.d0,-7895059235.d0,242396853.1d0,-2972611.439d0,15704.48260d0,-30.16036606d0/ !!$ DATA s1,s2,s3,s4,s5,s6/144725228442.d0,2300535178.d0,18583304.74d0,99447.43394d0,376.9991397d0,1.d0/ !!$ DATA p1,p2,p3,p4,p5/1.d0,.183105d-2,-.3516396496d-4,.2457520174d-5,-.240337019d-6/ !!$ DATA q1,q2,q3,q4,q5/.04687499995d0,-.2002690873d-3,.8449199096d-5,-.88228987d-6,.105787412d-6/ !!$ IF(ABS(x).LT.8.)THEN !!$ y=x**2 !!$ bessj1=x*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))))/(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))) !!$ ELSE !!$ ax=ABS(x) !!$ z=8.d0/ax !!$ y=z**2 !!$ xx=ax-2.356194491d0 !!$ bessj1=SQRT(.636619772d0/ax)*(COS(xx)*(p1+y*(p2+y*(p3+y*(p4+y*p5))))- & !!$ & z*SIN(xx)*(q1+y*(q2+y*(q3+y*(q4+y*q5)))))*SIGN(1.d0,x) !!$ ENDIF !!$ RETURN !!$ END FUNCTION bessj1 END MODULE math_util diff --git a/src/matrix.f90 b/src/matrix.f90 index 22ea822..fc8372e 100644 --- a/src/matrix.f90 +++ b/src/matrix.f90 @@ -1,3295 +1,3295 @@ !> !> @file matrix.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE matrix ! ! MATRIX: Simple interface to the direct solver LAPACK. ! ! T.M. Tran, CRPP-EPFL ! February 2007 ! IMPLICIT NONE ! TYPE gbmat ! Lapack General Band matrix storage INTEGER :: kl, ku, rank INTEGER :: mrows, ncols INTEGER :: nterms, kmat INTEGER, DIMENSION(:), POINTER :: piv => null() DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null() END TYPE gbmat ! TYPE gemat ! Lapack General DENSE matrix storage INTEGER :: rank INTEGER :: mrows, ncols INTEGER :: nterms, kmat INTEGER, DIMENSION(:), POINTER :: piv => null() DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null() END TYPE gemat ! TYPE pbmat ! Lapack Pack Band matrix storage (super-diagonals) INTEGER :: ku, rank INTEGER :: nterms, kmat DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null() END TYPE pbmat ! TYPE zgbmat ! Lapack General Band matrix storage INTEGER :: kl, ku, rank INTEGER :: mrows, ncols INTEGER :: nterms, kmat INTEGER, DIMENSION(:), POINTER :: piv => null() DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => NULL() END TYPE zgbmat ! TYPE zgemat ! Lapack General DENSE matrix storage INTEGER :: rank INTEGER :: mrows, ncols INTEGER :: nterms, kmat INTEGER, DIMENSION(:), POINTER :: piv => null() DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => null() END TYPE zgemat ! TYPE zpbmat ! Lapack Pack Band matrix storage (super-diagonals) INTEGER :: ku, rank INTEGER :: nterms, kmat DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => null() END TYPE zpbmat ! TYPE periodic_mat TYPE(gbmat) :: mat INTEGER :: nterms DOUBLE PRECISION, DIMENSION(:,:), POINTER :: & & matu => null(), & & matvt => null() END TYPE periodic_mat ! TYPE zperiodic_mat TYPE(zgbmat) :: mat INTEGER :: nterms DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: & & matu => null(), & & matvt => null() END TYPE zperiodic_mat ! !-------------------------------------------------------------------------------- INTERFACE init MODULE PROCEDURE init_gb, init_ge, init_pb, & & init_zgb, init_zge, init_zpb, & & init_periodic, init_zperiodic END INTERFACE INTERFACE getvalp MODULE PROCEDURE getvalp_gb, getvalp_ge, getvalp_pb, & & getvalp_zgb, getvalp_zge, getvalp_zpb END INTERFACE INTERFACE mcopy MODULE PROCEDURE mcopy_gb, mcopy_ge, mcopy_pb, & & mcopy_zgb, mcopy_zge, mcopy_zpb, & & mcopy_periodic, mcopy_zperiodic END INTERFACE INTERFACE maddto MODULE PROCEDURE maddto_gb, maddto_ge, maddto_pb, & & maddto_zgb, maddto_zge, maddto_zpb, & & maddto_periodic, maddto_zperiodic END INTERFACE INTERFACE destroy MODULE PROCEDURE destroy_gb, destroy_ge, destroy_pb, & & destroy_zgb, destroy_zge, destroy_zpb, & & destroy_periodic, destroy_zperiodic END INTERFACE INTERFACE updtmat MODULE PROCEDURE updt_gb, updt_ge, updt_pb, & & updt_zgb, updt_zpb, & & updt_periodic, updt_zperiodic END INTERFACE INTERFACE getele MODULE PROCEDURE getele_gb, getele_pb, & & getele_zgb, getele_zpb, & & getele_periodic, getele_zperiodic END INTERFACE INTERFACE putele MODULE PROCEDURE putele_gb, putele_pb, & & putele_zgb, putele_zpb, & & putele_periodic, putele_zperiodic END INTERFACE INTERFACE getcol MODULE PROCEDURE getcol_gb, getcol_pb, & & getcol_zgb, getcol_zpb, & & getcol_periodic, getcol_zperiodic END INTERFACE INTERFACE getrow MODULE PROCEDURE getrow_gb, getrow_ge, getrow_pb, & & getrow_zgb, getrow_zpb, & & getrow_periodic, getrow_zperiodic END INTERFACE INTERFACE putcol MODULE PROCEDURE putcol_gb, putcol_ge, putcol_pb, & & putcol_zgb, putcol_zpb, & & putcol_periodic, putcol_zperiodic END INTERFACE INTERFACE putrow MODULE PROCEDURE putrow_gb, putrow_ge, putrow_pb, & & putrow_zgb, putrow_zpb, & & putrow_periodic, putrow_zperiodic END INTERFACE INTERFACE factor MODULE PROCEDURE factor_gb, factor_ge, factor_pb, & & factor_zgb, factor_zge, factor_zpb, & & factor_periodic, factor_zperiodic END INTERFACE INTERFACE bsolve MODULE PROCEDURE bsolve_gb1, bsolve_gbn, bsolve_ge1, bsolve_gen, & & bsolve_pb1, bsolve_pbn, & & bsolve_periodic1, bsolve_periodicn, & & bsolve_zperiodic1, bsolve_zperiodicn, & & bsolve_zgb1, bsolve_zgbn, bsolve_zge1, bsolve_zgen, & & bsolve_zpb1, bsolve_zpbn END INTERFACE INTERFACE vmx MODULE PROCEDURE vmx_gb, vmx_gbn, vmx_pb, vmx_pbn, & & vmx_zgb, vmx_zgbn, vmx_zpb, vmx_zpbn, & & vmx_ge, vmx_gen, vmx_zge, vmx_zgen, & & vmx_periodic, vmx_zperiodic END INTERFACE INTERFACE determinant MODULE PROCEDURE determinant_ge, determinant_gb, determinant_pb, & & determinant_zge, determinant_zgb, determinant_zpb END INTERFACE INTERFACE putmat MODULE PROCEDURE putmat_gb END INTERFACE INTERFACE getmat MODULE PROCEDURE getmat_gb END INTERFACE INTERFACE kron MODULE PROCEDURE kron_ge END INTERFACE kron ! CONTAINS !=========================================================================== SUBROUTINE init_ge(n, nterms, mat, kmat, mrows) ! ! Initialize Lapack General Dense matrice ! INTEGER, INTENT(in) :: n, nterms INTEGER, OPTIONAL :: kmat INTEGER, OPTIONAL :: mrows TYPE(gemat) :: mat ! mat%ncols = n mat%mrows = n IF(PRESENT(mrows)) THEN mat%mrows = mrows END IF mat%rank = n ! Warning: ok if square matrix mat%nterms = nterms IF(PRESENT(kmat)) mat%kmat = kmat IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) ALLOCATE(mat%val(mat%mrows,mat%ncols)) ALLOCATE(mat%piv(MIN(mat%mrows,mat%ncols))) mat%val = 0.0d0 mat%piv = 0 END SUBROUTINE init_ge !=========================================================================== SUBROUTINE init_gb(kl, ku, n, nterms, mat, kmat, mrows) ! ! Initialize Lapack General Banded matrice ! INTEGER, INTENT(in) :: kl, ku, n, nterms INTEGER, OPTIONAL :: kmat INTEGER, OPTIONAL :: mrows TYPE(gbmat) :: mat INTEGER :: lda ! mat%kl = kl mat%ku = ku mat%ncols = n mat%mrows = n IF(PRESENT(mrows)) THEN mat%mrows = mrows END IF mat%rank = n ! Warning: ok if square matrix mat%nterms = nterms IF(PRESENT(kmat)) mat%kmat = kmat lda = 2*kl + ku + 1 IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) ALLOCATE(mat%val(lda,n)) ALLOCATE(mat%piv(n)) mat%val = 0.0d0 mat%piv = 0 END SUBROUTINE init_gb !=========================================================================== SUBROUTINE init_periodic(kl, ku, n, nterms, mat, kmat) ! ! Initialize Lapack Periodic General Banded matrice ! INTEGER, INTENT(in) :: kl, ku, n, nterms INTEGER, OPTIONAL :: kmat TYPE(periodic_mat) :: mat INTEGER :: i,j DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0 ! ! In band matrix matp%mat is a GB matrix IF( PRESENT(kmat)) THEN CALL init(kl, ku, n, nterms, mat%mat, kmat) ELSE CALL init(kl, ku, n, nterms, mat%mat) END IF mat%nterms = nterms ! ! Off band matrices IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu) IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt) ALLOCATE(mat%matu(n, kl+ku)) ALLOCATE(mat%matvt(kl+ku,n)) ! mat%matu = zero mat%matvt = zero ! kl=3, ku=2 DO j=1,kl ! [ 1 0 0 . . ] mat%matu(j,j) = one ! [ 0 1 0 . . ] END DO ! [ 0 0 1 . . ] ! ! [ 0 . . . . ] DO j=1,ku ! [ . . . . . ] i=n-ku+j ! [ . . . 1 0 ] mat%matu(i,ku+j) = one ! [ . . . 0 1 ] END DO END SUBROUTINE init_periodic !=========================================================================== SUBROUTINE init_zperiodic(kl, ku, n, nterms, mat, kmat) ! ! Initialize Lapack Periodic General Banded matrice ! INTEGER, INTENT(in) :: kl, ku, n, nterms INTEGER, OPTIONAL :: kmat TYPE(zperiodic_mat) :: mat INTEGER :: i,j DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0 ! ! In band matrix matp%mat is a GB matrix IF( PRESENT(kmat)) THEN CALL init(kl, ku, n, nterms, mat%mat, kmat) ELSE CALL init(kl, ku, n, nterms, mat%mat) END IF mat%nterms = nterms ! ! Off band matrices IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu) IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt) ALLOCATE(mat%matu(n, kl+ku)) ALLOCATE(mat%matvt(kl+ku,n)) ! mat%matu = zero mat%matvt = zero ! kl=3, ku=2 DO j=1,kl ! [ 1 0 0 . . ] mat%matu(j,j) = one ! [ 0 1 0 . . ] END DO ! [ 0 0 1 . . ] ! ! [ 0 . . . . ] DO j=1,ku ! [ . . . . . ] i=n-ku+j ! [ . . . 1 0 ] mat%matu(i,ku+j) = one ! [ . . . 0 1 ] END DO END SUBROUTINE init_zperiodic !=========================================================================== SUBROUTINE init_pb(ku, n, nterms, mat, kmat) ! ! Initialize Lapack Packed Banded matrice ! INTEGER, INTENT(in) :: ku, n, nterms INTEGER, OPTIONAL :: kmat TYPE(pbmat) :: mat INTEGER :: lda ! mat%ku = ku mat%rank = n mat%nterms = nterms IF(PRESENT(kmat)) mat%kmat = kmat lda = ku + 1 IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%val(lda,n)) mat%val = 0.0d0 END SUBROUTINE init_pb !=========================================================================== SUBROUTINE init_zge(n, nterms, mat, kmat, mrows) ! ! Initialize Lapack General Dense matrice ! INTEGER, INTENT(in) :: n, nterms INTEGER, OPTIONAL :: kmat INTEGER, OPTIONAL :: mrows TYPE(zgemat) :: mat ! mat%ncols = n mat%mrows = n IF(PRESENT(mrows)) THEN mat%mrows = mrows END IF mat%rank = n mat%nterms = nterms IF(PRESENT(kmat)) mat%kmat = kmat IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) ALLOCATE(mat%val(mat%mrows,mat%ncols)) ALLOCATE(mat%piv(MIN(mat%mrows,mat%ncols))) mat%val = 0.0d0 mat%piv = 0 END SUBROUTINE init_zge !=========================================================================== SUBROUTINE init_zgb(kl, ku, n, nterms, mat, kmat, mrows) ! ! Initialize Lapack General Banded matrice ! INTEGER, INTENT(in) :: kl, ku, n, nterms INTEGER, OPTIONAL :: kmat INTEGER, OPTIONAL :: mrows TYPE(zgbmat) :: mat INTEGER :: lda ! mat%kl = kl mat%ku = ku mat%ncols = n mat%mrows = n IF(PRESENT(mrows)) THEN mat%mrows = mrows END IF mat%rank = n ! Warning: ok if square matrix mat%nterms = nterms IF(PRESENT(kmat)) mat%kmat = kmat lda = 2*kl + ku + 1 IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) ALLOCATE(mat%val(lda,n)) ALLOCATE(mat%piv(n)) mat%val = 0.0d0 mat%piv = 0 END SUBROUTINE init_zgb !=========================================================================== SUBROUTINE init_zpb(ku, n, nterms, mat, kmat) ! ! Initialize Lapack Packed Banded matrice ! INTEGER, INTENT(in) :: ku, n, nterms INTEGER, OPTIONAL :: kmat TYPE(zpbmat) :: mat INTEGER :: lda ! mat%ku = ku mat%rank = n mat%nterms = nterms IF(PRESENT(kmat)) mat%kmat = kmat lda = ku + 1 IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%val(lda,n)) mat%val = 0.0d0 END SUBROUTINE init_zpb !=========================================================================== SUBROUTINE mcopy_ge(mata, matb) ! ! Matrix copy: B = A ! TYPE(gemat) :: mata, matb ! matb%rank = mata%rank matb%mrows = mata%mrows matb%ncols = mata%ncols matb%nterms = mata%nterms matb%kmat = mata%kmat IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv) ALLOCATE(matb%val(matb%mrows,matb%ncols)) ALLOCATE(matb%piv(MIN(matb%mrows,matb%ncols))) matb%val = mata%val matb%piv = mata%piv END SUBROUTINE mcopy_ge !=========================================================================== SUBROUTINE mcopy_gb(mata, matb) ! ! Matrix copy: B = A ! TYPE(gbmat) :: mata, matb INTEGER :: n, lda ! n = mata%rank matb%kl = mata%kl matb%ku = mata%ku matb%rank = mata%rank matb%mrows = mata%mrows matb%ncols = mata%ncols matb%nterms = mata%nterms matb%kmat = mata%kmat lda = 2*mata%kl + mata%ku + 1 IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv) ALLOCATE(matb%val(lda,n)) ALLOCATE(matb%piv(n)) matb%val = mata%val matb%piv = mata%piv END SUBROUTINE mcopy_gb !=========================================================================== SUBROUTINE mcopy_periodic(mata, matb) ! ! Matrix copy: B = A ! TYPE(periodic_mat) :: mata, matb INTEGER :: n, kl, ku ! kl = mata%mat%kl ku = mata%mat%ku n = mata%mat%rank ! CALL mcopy(mata%mat, matb%mat) IF( ASSOCIATED(matb%matu)) DEALLOCATE(matb%matu) IF( ASSOCIATED(matb%matvt)) DEALLOCATE(matb%matvt) ALLOCATE(matb%matu(n,kl+ku)) ALLOCATE(matb%matvt(kl+ku,n)) matb%matu = mata%matu matb%matvt = mata%matvt END SUBROUTINE mcopy_periodic !=========================================================================== SUBROUTINE mcopy_zperiodic(mata, matb) ! ! Matrix copy: B = A ! TYPE(zperiodic_mat) :: mata, matb INTEGER :: n, kl, ku ! kl = mata%mat%kl ku = mata%mat%ku n = mata%mat%rank ! CALL mcopy(mata%mat, matb%mat) IF( ASSOCIATED(matb%matu)) DEALLOCATE(matb%matu) IF( ASSOCIATED(matb%matvt)) DEALLOCATE(matb%matvt) ALLOCATE(matb%matu(n,kl+ku)) ALLOCATE(matb%matvt(kl+ku,n)) matb%matu = mata%matu matb%matvt = mata%matvt END SUBROUTINE mcopy_zperiodic !=========================================================================== SUBROUTINE mcopy_pb(mata, matb) ! ! Matrix copy: B = A ! TYPE(pbmat) :: mata, matb INTEGER :: n, lda ! n = mata%rank matb%ku = mata%ku matb%rank = mata%rank matb%nterms = mata%nterms lda = mata%ku + 1 IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) ALLOCATE(matb%val(lda,n)) matb%val = mata%val END SUBROUTINE mcopy_pb !=========================================================================== SUBROUTINE mcopy_zge(mata, matb) ! ! Matrix copy: B = A ! TYPE(zgemat) :: mata, matb INTEGER :: n ! n = mata%rank matb%rank = n IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv) ALLOCATE(matb%val(n,n)) ALLOCATE(matb%piv(n)) matb%val = mata%val matb%piv = mata%piv END SUBROUTINE mcopy_zge !=========================================================================== SUBROUTINE mcopy_zgb(mata, matb) ! ! Matrix copy: B = A ! TYPE(zgbmat) :: mata, matb INTEGER :: n, lda ! n = mata%rank matb%kl = mata%kl matb%ku = mata%ku matb%rank = mata%rank matb%nterms = mata%nterms lda = 2*mata%kl + mata%ku + 1 IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv) ALLOCATE(matb%val(lda,n)) ALLOCATE(matb%piv(n)) matb%val = mata%val matb%piv = mata%piv END SUBROUTINE mcopy_zgb !=========================================================================== SUBROUTINE mcopy_zpb(mata, matb) ! ! Matrix copy: B = A ! TYPE(zpbmat) :: mata, matb INTEGER :: n, lda ! n = mata%rank matb%ku = mata%ku matb%rank = mata%rank matb%nterms = mata%nterms lda = mata%ku + 1 IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) ALLOCATE(matb%val(lda,n)) matb%val = mata%val END SUBROUTINE mcopy_zpb !=========================================================================== SUBROUTINE maddto_ge(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(gemat) :: mata, matb DOUBLE PRECISION :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_ge !=========================================================================== SUBROUTINE maddto_gb(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(gbmat) :: mata, matb DOUBLE PRECISION :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_gb !=========================================================================== SUBROUTINE maddto_periodic(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(periodic_mat) :: mata, matb DOUBLE PRECISION :: alpha ! mata%mat%val = mata%mat%val + alpha*matb%mat%val mata%matvt = mata%matvt + alpha*matb%matvt END SUBROUTINE maddto_periodic !=========================================================================== SUBROUTINE maddto_zperiodic(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(zperiodic_mat) :: mata, matb DOUBLE COMPLEX :: alpha ! mata%mat%val = mata%mat%val + alpha*matb%mat%val mata%matvt = mata%matvt + alpha*matb%matvt END SUBROUTINE maddto_zperiodic !=========================================================================== SUBROUTINE maddto_pb(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(pbmat) :: mata, matb DOUBLE PRECISION :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_pb !=========================================================================== SUBROUTINE maddto_zge(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(zgemat) :: mata, matb DOUBLE COMPLEX :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_zge !=========================================================================== SUBROUTINE maddto_zgb(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(zgbmat) :: mata, matb DOUBLE COMPLEX :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_zgb !=========================================================================== SUBROUTINE maddto_zpb(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(zpbmat) :: mata, matb DOUBLE COMPLEX :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_zpb !=========================================================================== SUBROUTINE getvalp_ge(mat, p) ! ! Get pointer to matrix coefficients ! TYPE(gemat) :: mat DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p ! p => mat%val END SUBROUTINE getvalp_ge !=========================================================================== SUBROUTINE getvalp_gb(mat, p) ! ! Get pointer to matrix coefficients ! TYPE(gbmat) :: mat DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p ! p => mat%val END SUBROUTINE getvalp_gb !=========================================================================== SUBROUTINE getvalp_pb(mat, p) ! ! Get pointer to matrix coefficients ! TYPE(pbmat) :: mat DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p ! p => mat%val END SUBROUTINE getvalp_pb !=========================================================================== SUBROUTINE getvalp_zge(mat, p) ! ! Get pointer to matrix coefficients ! TYPE(zgemat) :: mat DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p ! p => mat%val END SUBROUTINE getvalp_zge !=========================================================================== SUBROUTINE getvalp_zgb(mat, p) ! ! Get pointer to matrix coefficients ! TYPE(zgbmat) :: mat DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p ! p => mat%val END SUBROUTINE getvalp_zgb !=========================================================================== SUBROUTINE getvalp_zpb(mat, p) ! ! Get pointer to matrix coefficients ! TYPE(zpbmat) :: mat DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p ! p => mat%val END SUBROUTINE getvalp_zpb !=========================================================================== SUBROUTINE destroy_gb(mat) ! ! Deallocate pointers in mat ! TYPE(gbmat) :: mat IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) END SUBROUTINE destroy_gb !=========================================================================== SUBROUTINE destroy_ge(mat) ! ! Deallocate pointers in mat ! TYPE(gemat) :: mat IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) END SUBROUTINE destroy_ge !=========================================================================== SUBROUTINE destroy_pb(mat) ! ! Deallocate pointers in mat ! TYPE(pbmat) :: mat IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) END SUBROUTINE destroy_pb !=========================================================================== SUBROUTINE destroy_zgb(mat) ! ! Deallocate pointers in mat ! TYPE(zgbmat) :: mat IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) END SUBROUTINE destroy_zgb !=========================================================================== SUBROUTINE destroy_zge(mat) ! ! Deallocate pointers in mat ! TYPE(zgemat) :: mat IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) END SUBROUTINE destroy_zge !=========================================================================== SUBROUTINE destroy_zpb(mat) ! ! Deallocate pointers in mat ! TYPE(zpbmat) :: mat IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) END SUBROUTINE destroy_zpb !=========================================================================== SUBROUTINE destroy_periodic(mat) ! ! Deallocate pointers in mat ! TYPE(periodic_mat) :: mat IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu) IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt) CALL destroy(mat%mat) END SUBROUTINE destroy_periodic !=========================================================================== SUBROUTINE destroy_zperiodic(mat) ! ! Deallocate pointers in mat ! TYPE(zperiodic_mat) :: mat IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu) IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt) CALL destroy(mat%mat) END SUBROUTINE destroy_zperiodic !=========================================================================== SUBROUTINE updt_gb(mat, i, j, val) ! ! Update element Aij into banded matrix ! TYPE(gbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: lda, n, ib ! lda = SIZE(mat%val, 1) n = mat%rank ib = mat%kl + mat%ku + i - j + 1 IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n STOP '*** Abnormal EXIT in MODULE matrix ***' END IF mat%val(ib,j) = mat%val(ib,j) + val END SUBROUTINE updt_gb !=========================================================================== SUBROUTINE updt_ge(mat, i, j, val) ! ! Update element Aij into banded matrix ! TYPE(gemat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val ! IF( (i .GT. mat%mrows) .OR. (j .GT. mat%ncols) .OR. (j .LT. 1) .OR. (i.LT.1)) THEN WRITE(*,*) 'UPDT: i, j out of range ', i, j, mat%mrows, mat%ncols STOP '*** Abnormal EXIT in MODULE matrix ***' END IF mat%val(i,j) = mat%val(i,j) + val END SUBROUTINE updt_ge !=========================================================================== SUBROUTINE updt_periodic(mat, i, j, val) ! ! Update element Aij into periodic banded matrix ! TYPE(periodic_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: n, kl, ku ! n = mat%mat%rank kl = mat%mat%kl ku = mat%mat%ku ! IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN ! ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) mat%matvt(i,j) = mat%matvt(i,j) + val ! ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN ! ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) mat%matvt(i-n+kl+ku,j) = mat%matvt(i-n+kl+ku,j) + val ! ELSE ! ! Put into the banded matrix CALL updtmat(mat%mat, i, j, val) ! END IF END SUBROUTINE updt_periodic !=========================================================================== SUBROUTINE updt_zperiodic(mat, i, j, val) ! ! Update element Aij into periodic banded matrix ! TYPE(zperiodic_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val INTEGER :: n, kl, ku ! n = mat%mat%rank kl = mat%mat%kl ku = mat%mat%ku ! IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN ! ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) mat%matvt(i,j) = mat%matvt(i,j) + val ! ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN ! ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) mat%matvt(i-n+kl+ku,j) = mat%matvt(i-n+kl+ku,j) + val ! ELSE ! ! Put into the banded matrix CALL updtmat(mat%mat, i, j, val) ! END IF END SUBROUTINE updt_zperiodic !=========================================================================== SUBROUTINE updt_pb(mat, i, j, val) ! ! Update element Aij into banded matrix ! TYPE(pbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: lda, n, ib ! lda = SIZE(mat%val, 1) n = mat%rank IF( i .LE. j ) THEN ib = mat%ku + i - j + 1 IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n STOP '*** Abnormal EXIT in MODULE matrix ***' END IF mat%val(ib,j) = mat%val(ib,j) + val END IF END SUBROUTINE updt_pb !=========================================================================== SUBROUTINE updt_zgb(mat, i, j, val) ! ! Update element Aij into banded matrix ! TYPE(zgbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val INTEGER :: lda, n, ib ! lda = SIZE(mat%val, 1) n = mat%rank ib = mat%kl + mat%ku + i - j + 1 IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n STOP '*** Abnormal EXIT in MODULE matrix ***' END IF mat%val(ib,j) = mat%val(ib,j) + val END SUBROUTINE updt_zgb !=========================================================================== SUBROUTINE updt_zpb(mat, i, j, val) ! ! Update element Aij into banded matrix ! TYPE(zpbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val INTEGER :: lda, n, ib ! lda = SIZE(mat%val, 1) n = mat%rank IF( i .LE. j ) THEN ib = mat%ku + i - j + 1 IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n STOP '*** Abnormal EXIT in MODULE matrix ***' END IF mat%val(ib,j) = mat%val(ib,j) + val END IF END SUBROUTINE updt_zpb !=========================================================================== SUBROUTINE getele_gb(mat, i, j, val) ! ! Get element (i,j) of matrix ! TYPE(gbmat), INTENT(in) :: mat DOUBLE PRECISION, INTENT (OUT) :: val INTEGER, INTENT (IN) :: i, j INTEGER :: lda, n, ib ! lda = SIZE(mat%val, 1) n = mat%rank ib = mat%kl + mat%ku + i - j + 1 IF( (ib .GT. lda) .OR. (j .GT. n)) THEN WRITE(*,*) 'GETELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE matrix***' END IF val = mat%val(ib,j) END SUBROUTINE getele_gb !=========================================================================== SUBROUTINE getele_periodic(mat, i, j, val) ! ! Get element Aij of periodic banded matrix ! TYPE(periodic_mat), INTENT(in) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(out) :: val INTEGER :: n, kl, ku ! n = mat%mat%rank kl = mat%mat%kl ku = mat%mat%ku ! IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN ! ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) val =mat%matvt(i,j) ! ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN ! ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) val = mat%matvt(i-n+kl+ku,j) ! ELSE ! ! Put into the banded matrix CALL getele(mat%mat, i, j, val) ! END IF END SUBROUTINE getele_periodic !=========================================================================== SUBROUTINE getele_zperiodic(mat, i, j, val) ! ! Get element Aij of periodic banded matrix ! TYPE(zperiodic_mat), INTENT(in) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(out) :: val INTEGER :: n, kl, ku ! n = mat%mat%rank kl = mat%mat%kl ku = mat%mat%ku ! IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN ! ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) val =mat%matvt(i,j) ! ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN ! ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) val = mat%matvt(i-n+kl+ku,j) ! ELSE ! ! Put into the banded matrix CALL getele(mat%mat, i, j, val) ! END IF END SUBROUTINE getele_zperiodic !=========================================================================== SUBROUTINE getele_pb(mat, i, j, val) ! ! Get element (i,j) of matrix ! TYPE(pbmat), INTENT(in) :: mat DOUBLE PRECISION, INTENT (OUT) :: val INTEGER, INTENT (IN) :: i, j INTEGER :: lda, n, ib, irow, jcol ! lda = SIZE(mat%val, 1) n = mat%rank IF( i .LE. j ) THEN ! Upper triangular matrix irow = i; jcol = j ELSE ! Lower triangular matrix irow = j; jcol = i END IF ib = mat%ku + irow - jcol + 1 IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN WRITE(*,*) 'GETELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE matrix***' END IF val = mat%val(ib,jcol) END SUBROUTINE getele_pb !=========================================================================== SUBROUTINE getele_zgb(mat, i, j, val) ! ! Get element (i,j) of matrix ! TYPE(zgbmat), INTENT(in) :: mat DOUBLE COMPLEX, INTENT (OUT) :: val INTEGER, INTENT (IN) :: i, j INTEGER :: lda, n, ib ! lda = SIZE(mat%val, 1) n = mat%rank ib = mat%kl + mat%ku + i - j + 1 IF( (ib .GT. lda) .OR. (j .GT. n)) THEN WRITE(*,*) 'GETELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE matrix***' END IF val = mat%val(ib,j) END SUBROUTINE getele_zgb !=========================================================================== SUBROUTINE getele_zpb(mat, i, j, val) ! ! Get element (i,j) of matrix ! TYPE(zpbmat), INTENT(in) :: mat DOUBLE COMPLEX, INTENT (OUT) :: val INTEGER, INTENT (IN) :: i, j INTEGER :: lda, n, ib, irow, jcol ! lda = SIZE(mat%val, 1) n = mat%rank ! IF( i .LE. j ) THEN ! Upper triangular matrix irow = i; jcol = j ib = mat%ku + irow - jcol + 1 IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN WRITE(*,*) 'GETELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE matrix***' END IF val = mat%val(ib,jcol) RETURN ELSE ! Lower triangular matrix irow = j; jcol = i ib = mat%ku + irow - jcol + 1 IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN WRITE(*,*) 'GETELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE matrix***' END IF val = CONJG(mat%val(ib,jcol)) END IF END SUBROUTINE getele_zpb !=========================================================================== SUBROUTINE putele_gb(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(gbmat), INTENT(inout) :: mat DOUBLE PRECISION, INTENT (in) :: val INTEGER, INTENT (in) :: i, j INTEGER :: lda, n, ib ! lda = SIZE(mat%val, 1) n = mat%rank ib = mat%kl + mat%ku + i - j + 1 IF( (ib .GT. lda) .OR. (j .GT. n)) THEN WRITE(*,*) 'GETELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE matrix ***' END IF mat%val(ib,j) = val END SUBROUTINE putele_gb !=========================================================================== SUBROUTINE putele_periodic(mat, i, j, val) ! ! Put element Aij into periodic banded matrix ! TYPE(periodic_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: n, kl, ku ! n = mat%mat%rank kl = mat%mat%kl ku = mat%mat%ku ! IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN ! ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) mat%matvt(i,j) = val ! ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN ! ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) mat%matvt(i-n+kl+ku,j) = val ! ELSE ! ! Put into the banded matrix CALL putele(mat%mat, i, j, val) ! END IF END SUBROUTINE putele_periodic !=========================================================================== SUBROUTINE putele_zperiodic(mat, i, j, val) ! ! Put element Aij into periodic banded matrix ! TYPE(zperiodic_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val INTEGER :: n, kl, ku ! n = mat%mat%rank kl = mat%mat%kl ku = mat%mat%ku ! IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN ! ! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) mat%matvt(i,j) = val ! ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN ! ! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) mat%matvt(i-n+kl+ku,j) = val ! ELSE ! ! Put into the banded matrix CALL putele(mat%mat, i, j, val) ! END IF END SUBROUTINE putele_zperiodic !=========================================================================== SUBROUTINE putele_pb(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(pbmat), INTENT(inout) :: mat DOUBLE PRECISION, INTENT (in) :: val INTEGER, INTENT (IN) :: i, j INTEGER :: lda, n, ib, irow, jcol ! lda = SIZE(mat%val, 1) n = mat%rank IF( i .LE. j ) THEN ! Upper triangular matrix irow = i; jcol = j ELSE ! Lower triangular matrix irow = j; jcol = i END IF ib = mat%ku + irow - jcol + 1 IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN WRITE(*,*) 'PUTELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE matrix***' END IF mat%val(ib,jcol) = val END SUBROUTINE putele_pb !=========================================================================== SUBROUTINE putele_zgb(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(zgbmat), INTENT(inout) :: mat DOUBLE COMPLEX, INTENT (in) :: val INTEGER, INTENT (in) :: i, j INTEGER :: lda, n, ib ! lda = SIZE(mat%val, 1) n = mat%rank ib = mat%kl + mat%ku + i - j + 1 IF( (ib .GT. lda) .OR. (j .GT. n)) THEN WRITE(*,*) 'GETELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE matrix ***' END IF mat%val(ib,j) = val END SUBROUTINE putele_zgb !=========================================================================== SUBROUTINE putele_zpb(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(zpbmat), INTENT(inout) :: mat DOUBLE COMPLEX, INTENT (in) :: val INTEGER, INTENT (IN) :: i, j INTEGER :: lda, n, ib, irow, jcol ! lda = SIZE(mat%val, 1) n = mat%rank IF( i .LE. j ) THEN ! Upper triangular matrix irow = i; jcol = j ib = mat%ku + irow - jcol + 1 IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN WRITE(*,*) 'GETELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE matrix***' END IF mat%val(ib,jcol) = val ELSE ! Lower triangular matrix irow = j; jcol = i ib = mat%ku + irow - jcol + 1 IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN WRITE(*,*) 'PUTELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE matrix***' END IF mat%val(ib,jcol) = CONJG(val) END IF END SUBROUTINE putele_zpb !=========================================================================== SUBROUTINE getcol_gb(mat, j, arr) ! ! Get a column from matrix ! TYPE(gbmat), INTENT(in) :: mat INTEGER, INTENT(in) :: j DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr INTEGER :: m, kl, ku INTEGER :: ibmin, ibmax, imin, imax ! kl = mat%kl ku = mat%ku m = mat%mrows IF( SIZE(arr) .LT. m ) THEN WRITE(*,*) 'GETCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF arr(1:m) = 0.0d0 imin = MAX(1,j-ku) imax = MIN(m, j+kl) ibmin = kl+ku+imin-j+1 ibmax = kl+ku+imax-j+1 arr(imin:imax) = mat%val(ibmin:ibmax,j) END SUBROUTINE getcol_gb !=========================================================================== SUBROUTINE getcol_periodic(mat, j, arr) ! ! Get a column from matrix ! TYPE(periodic_mat), INTENT(in) :: mat INTEGER, INTENT(in) :: j DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr INTEGER :: n, kl, ku ! kl = mat%mat%kl ku = mat%mat%ku n = mat%mat%rank ! CALL getcol(mat%mat, j, arr) ! IF( j.GE.n-kl+1 ) THEN arr(1:kl) = mat%matvt(1:kl,j) ELSE IF( j.LE.ku ) THEN arr(n-ku+1:n) = mat%matvt(kl+1:kl+ku,j) END IF END SUBROUTINE getcol_periodic !=========================================================================== SUBROUTINE getcol_zperiodic(mat, j, arr) ! ! Get a column from matrix ! TYPE(zperiodic_mat), INTENT(in) :: mat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr INTEGER :: n, kl, ku ! kl = mat%mat%kl ku = mat%mat%ku n = mat%mat%rank ! CALL getcol(mat%mat, j, arr) ! IF( j.GE.n-kl+1 ) THEN arr(1:kl) = mat%matvt(1:kl,j) ELSE IF( j.LE.ku ) THEN arr(n-ku+1:n) = mat%matvt(kl+1:kl+ku,j) END IF END SUBROUTINE getcol_zperiodic !=========================================================================== SUBROUTINE getcol_pb(mat, j, arr) ! ! Get a column from matrix ! TYPE(pbmat), INTENT(in) :: mat INTEGER, INTENT(in) :: j DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr INTEGER :: n, ku INTEGER :: i, ib, ibmin, ibmax, imin, imax ! ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'GETCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF arr(1:n) = 0.0d0 ! imin=MAX(1,j-ku); imax=j ! The column in the upper diagonal part ibmin=ku+1+imin-j ; ibmax=ku+1+imax-j arr(imin:imax) = mat%val(ibmin:ibmax,j) ! imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part DO i=imin,imax ib = ku+1+j-i arr(i) = mat%val(ib,i) END DO END SUBROUTINE getcol_pb !=========================================================================== SUBROUTINE getcol_zgb(mat, j, arr) ! ! Get a column from matrix ! TYPE(zgbmat), INTENT(in) :: mat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr INTEGER :: m, kl, ku INTEGER :: ibmin, ibmax, imin, imax ! kl = mat%kl ku = mat%ku m = mat%mrows IF( SIZE(arr) .LT. m ) THEN WRITE(*,*) 'GETCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF arr(1:m) = 0.0d0 imin = MAX(1,j-ku) imax = MIN(m, j+kl) ibmin = kl+ku+imin-j+1 ibmax = kl+ku+imax-j+1 arr(imin:imax) = mat%val(ibmin:ibmax,j) END SUBROUTINE getcol_zgb !=========================================================================== SUBROUTINE getcol_zpb(mat, j, arr) ! ! Get a column from matrix ! TYPE(zpbmat), INTENT(in) :: mat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr INTEGER :: n, ku INTEGER :: i, ib, ibmin, ibmax, imin, imax ! ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'GETCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF arr(1:n) = 0.0d0 ! imin=MAX(1,j-ku); imax=j ! The column in the upper diagonal part ibmin=ku+1+imin-j ; ibmax=ku+1+imax-j arr(imin:imax) = mat%val(ibmin:ibmax,j) ! imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part DO i=imin,imax ib = ku+1+j-i arr(i) = CONJG(mat%val(ib,i)) END DO END SUBROUTINE getcol_zpb !=========================================================================== SUBROUTINE getrow_gb(mat, i, arr) ! ! Get a row from matrix ! TYPE(gbmat), INTENT(in) :: mat INTEGER, INTENT(in) :: i DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr INTEGER :: n, kl, ku INTEGER :: j, ib, jmin, jmax ! kl = mat%kl ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'GETROW: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF arr(1:n) = 0.0d0 jmin = MAX(1,i-kl) jmax = MIN(n, i+ku) DO j=jmin,jmax ib = kl+ku+i-j+1 arr(j) = mat%val(ib,j) END DO END SUBROUTINE getrow_gb !=========================================================================== SUBROUTINE getrow_ge(mat, i, arr) ! ! Get a row from matrix ! TYPE(gemat), INTENT(in) :: mat INTEGER, INTENT(in) :: i DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr INTEGER :: n ! n = mat%ncols IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'GETROW: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF arr(1:n) = mat%val(i,1:n) END SUBROUTINE getrow_ge !=========================================================================== SUBROUTINE getrow_periodic(mat, i, arr) ! ! Get a row from matrix ! TYPE(periodic_mat), INTENT(in) :: mat INTEGER, INTENT(in) :: i DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr INTEGER :: n, kl, ku ! kl = mat%mat%kl ku = mat%mat%ku n = mat%mat%rank ! CALL getrow(mat%mat, i, arr) ! IF( i.LE.kl ) THEN arr(n-kl+1:n) = mat%matvt(i,n-kl+1:n) ELSE IF( i.GE.n-ku+1 ) THEN arr(1:ku) = mat%matvt(i-n+kl+ku,1:ku) END IF END SUBROUTINE getrow_periodic !=========================================================================== SUBROUTINE getrow_zperiodic(mat, i, arr) ! ! Get a row from matrix ! TYPE(zperiodic_mat), INTENT(in) :: mat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr INTEGER :: n, kl, ku ! kl = mat%mat%kl ku = mat%mat%ku n = mat%mat%rank ! CALL getrow(mat%mat, i, arr) ! IF( i.LE.kl ) THEN arr(n-kl+1:n) = mat%matvt(i,n-kl+1:n) ELSE IF( i.GE.n-ku+1 ) THEN arr(1:ku) = mat%matvt(i-n+kl+ku,1:ku) END IF END SUBROUTINE getrow_zperiodic !=========================================================================== SUBROUTINE getrow_pb(mat, i, arr) ! ! Get a row from matrix ! TYPE(pbmat), INTENT(in) :: mat INTEGER, INTENT(in) :: i DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr INTEGER :: n, ku INTEGER :: j, ib, ibmin, ibmax, jmin, jmax ! ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'GETROW: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF arr(1:n) = 0.0d0 ! jmin=i; jmax=MIN(n,i+ku) DO j=jmin,jmax ib=ku+1+i-j arr(j) = mat%val(ib,j) END DO ! jmin=MAX(1,i-ku); jmax=i-1 DO j=jmin,jmax ib=ku+1+j-i arr(j) = mat%val(ib,i) END DO END SUBROUTINE getrow_pb !=========================================================================== SUBROUTINE getrow_zgb(mat, i, arr) ! ! Get a row from matrix ! TYPE(zgbmat), INTENT(in) :: mat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr INTEGER :: n, kl, ku INTEGER :: j, ib, jmin, jmax ! kl = mat%kl ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'GETROW: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF arr(1:n) = 0.0d0 jmin = MAX(1,i-kl) jmax = MIN(n, i+ku) DO j=jmin,jmax ib = kl+ku+i-j+1 arr(j) = mat%val(ib,j) END DO END SUBROUTINE getrow_zgb !=========================================================================== SUBROUTINE getrow_zpb(mat, i, arr) ! ! Get a row from matrix ! TYPE(zpbmat), INTENT(in) :: mat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr INTEGER :: n, ku INTEGER :: j, ib, ibmin, ibmax, jmin, jmax ! ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'GETROW: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF arr(1:n) = 0.0d0 ! jmin=i; jmax=MIN(n,i+ku) DO j=jmin,jmax ib=ku+1+i-j arr(j) = mat%val(ib,j) END DO ! jmin=MAX(1,i-ku); jmax=i-1 DO j=jmin,jmax ib=ku+1+j-i arr(j) = CONJG(mat%val(ib,i)) END DO END SUBROUTINE getrow_zpb !=========================================================================== SUBROUTINE putcol_gb(mat, j, arr) ! ! Put a column from matrix ! TYPE(gbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: j DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr INTEGER :: m, kl, ku INTEGER :: ibmin, ibmax, imin, imax ! kl = mat%kl ku = mat%ku m = mat%mrows IF( SIZE(arr) .LT. m ) THEN WRITE(*,*) 'PUTCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF imin = MAX(1,j-ku) imax = MIN(m, j+kl) ibmin = kl+ku+imin-j+1 ibmax = kl+ku+imax-j+1 mat%val(ibmin:ibmax,j) = arr(imin:imax) END SUBROUTINE putcol_gb !=========================================================================== SUBROUTINE putcol_ge(mat, j, arr) ! ! Put a column from matrix ! TYPE(gemat), INTENT(inout) :: mat INTEGER, INTENT(in) :: j DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr INTEGER :: m ! m = mat%mrows IF( SIZE(arr) .LT. m ) THEN WRITE(*,*) 'PUTCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF mat%val(:,j) = arr(:) END SUBROUTINE putcol_ge !=========================================================================== SUBROUTINE putrow_periodic(mat, i, arr) ! ! Put a row to matrix ! TYPE(periodic_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr INTEGER :: n, kl, ku ! kl = mat%mat%kl ku = mat%mat%ku n = mat%mat%rank ! CALL putrow(mat%mat, i, arr) ! IF( i.LE.kl ) THEN mat%matvt(i,n-kl+1:n) = arr(n-kl+1:n) ELSE IF( i.GE.n-ku+1 ) THEN mat%matvt(i-n+kl+ku,1:ku) = arr(1:ku) END IF END SUBROUTINE putrow_periodic !=========================================================================== SUBROUTINE putrow_zperiodic(mat, i, arr) ! ! Put a row to matrix ! TYPE(zperiodic_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr INTEGER :: n, kl, ku ! kl = mat%mat%kl ku = mat%mat%ku n = mat%mat%rank ! CALL putrow(mat%mat, i, arr) ! IF( i.LE.kl ) THEN mat%matvt(i,n-kl+1:n) = arr(n-kl+1:n) ELSE IF( i.GE.n-ku+1 ) THEN mat%matvt(i-n+kl+ku,1:ku) = arr(1:ku) END IF END SUBROUTINE putrow_zperiodic !=========================================================================== SUBROUTINE putcol_periodic(mat, j, arr) ! ! Put a column into matrix ! TYPE(periodic_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: j DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr INTEGER :: n, kl, ku ! kl = mat%mat%kl ku = mat%mat%ku n = mat%mat%rank ! CALL putcol(mat%mat, j, arr) ! IF( j.GE.n-kl+1 ) THEN mat%matvt(1:kl,j) = arr(1:kl) ELSE IF( j.LE.ku ) THEN mat%matvt(kl+1:kl+ku,j) = arr(n-ku+1:n) END IF END SUBROUTINE putcol_periodic !=========================================================================== SUBROUTINE putcol_zperiodic(mat, j, arr) ! ! Put a column into matrix ! TYPE(zperiodic_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr INTEGER :: n, kl, ku ! kl = mat%mat%kl ku = mat%mat%ku n = mat%mat%rank ! CALL putcol(mat%mat, j, arr) ! IF( j.GE.n-kl+1 ) THEN mat%matvt(1:kl,j) = arr(1:kl) ELSE IF( j.LE.ku ) THEN mat%matvt(kl+1:kl+ku,j) = arr(n-ku+1:n) END IF END SUBROUTINE putcol_zperiodic !=========================================================================== SUBROUTINE putcol_pb(mat, j, arr) ! ! Put a column from matrix ! TYPE(pbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: j DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr INTEGER :: n, ku INTEGER :: i, ib, ibmin, ibmax, imin, imax ! ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'PUTCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF imin = MAX(1,j-ku); imax = j ! The column in the upper diagonal part ibmin = ku+imin-j+1; ibmax = ku+imax-j+1 mat%val(ibmin:ibmax,j) = arr(imin:imax) ! imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part DO i=imin,imax ib = ku+1+j-i mat%val(ib,i) = arr(i) END DO END SUBROUTINE putcol_pb !=========================================================================== SUBROUTINE putcol_zgb(mat, j, arr) ! ! Put a column from matrix ! TYPE(zgbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr INTEGER :: m, kl, ku INTEGER :: ibmin, ibmax, imin, imax ! kl = mat%kl ku = mat%ku m = mat%mrows IF( SIZE(arr) .LT. m ) THEN WRITE(*,*) 'PUTCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF imin = MAX(1,j-ku) imax = MIN(m, j+kl) ibmin = kl+ku+imin-j+1 ibmax = kl+ku+imax-j+1 mat%val(ibmin:ibmax,j) = arr(imin:imax) END SUBROUTINE putcol_zgb !=========================================================================== SUBROUTINE putcol_zpb(mat, j, arr) ! ! Put a column from matrix ! TYPE(zpbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr INTEGER :: n, ku, i, ib INTEGER :: ibmin, ibmax, imin, imax ! ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'PUTCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF ! imin=MAX(1,j-ku); imax=j ! The column in the upper diagonal part ibmin=ku+1+imin-j ; ibmax=ku+1+imax-j mat%val(ibmin:ibmax,j) = arr(imin:imax) ! imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part DO i=imin,imax ib = ku+1+j-i mat%val(ib,i) = CONJG(arr(i)) END DO END SUBROUTINE putcol_zpb !=========================================================================== SUBROUTINE putrow_gb(mat, i, arr) ! ! Put a row from matrix ! TYPE(gbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr INTEGER :: n, kl, ku INTEGER :: j, ib, jmin, jmax ! kl = mat%kl ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'GETCOL: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF jmin = MAX(1,i-kl) jmax = MIN(n, i+ku) DO j=jmin,jmax ib = kl+ku+i-j+1 mat%val(ib,j) = arr(j) END DO END SUBROUTINE putrow_gb !=========================================================================== SUBROUTINE putrow_ge(mat, i, arr) ! ! Put a row from matrix ! TYPE(gemat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr INTEGER :: n, kl, ku INTEGER :: j, ib, jmin, jmax ! n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'PUTROW: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF mat%val(i,:) = arr(:) END SUBROUTINE putrow_ge !=========================================================================== SUBROUTINE putrow_pb(mat, i, arr) ! ! Put a row from matrix ! TYPE(pbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr INTEGER :: n, ku INTEGER :: j, ib, jmin, jmax ! ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'PUTROW: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF jmin = i jmax = MIN(n, i+ku) DO j=jmin,jmax ib = ku+i-j+1 mat%val(ib,j) = arr(j) END DO ! jmin=MAX(1,i-ku); jmax=i-1 DO j=jmin,jmax ib=ku+1+j-i mat%val(ib,i) = arr(j) END DO END SUBROUTINE putrow_pb !=========================================================================== SUBROUTINE putrow_zgb(mat, i, arr) ! ! Put a row from matrix ! TYPE(zgbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr INTEGER :: n, kl, ku INTEGER :: j, ib, jmin, jmax ! kl = mat%kl ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'PUTROW: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF jmin = MAX(1,i-kl) jmax = MIN(n, i+ku) DO j=jmin,jmax ib = kl+ku+i-j+1 mat%val(ib,j) = arr(j) END DO END SUBROUTINE putrow_zgb !=========================================================================== SUBROUTINE putrow_zpb(mat, i, arr) ! ! Put a row from matrix ! TYPE(zpbmat), INTENT(inout) :: mat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr INTEGER :: n, ku INTEGER :: j, ib, jmin, jmax ! ku = mat%ku n = mat%rank IF( SIZE(arr) .LT. n ) THEN WRITE(*,*) 'PUTROW: size of arr too small' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF jmin = i jmax = MIN(n, i+ku) DO j=jmin,jmax ib = ku+i-j+1 mat%val(ib,j) = arr(j) END DO ! jmin=MAX(1,i-ku); jmax=i-1 DO j=jmin,jmax ib=ku+1+j-i mat%val(ib,i) = CONJG(arr(j)) END DO END SUBROUTINE putrow_zpb !=========================================================================== SUBROUTINE factor_gb(mat,flops) ! ! Factor the matrix, using Lapack ! TYPE(gbmat), INTENT(inout) :: mat DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops INTEGER :: lda, n, m, kl, ku INTEGER :: info DOUBLE PRECISION :: dopgb EXTERNAL dopgb ! lda = SIZE(mat%val, 1) kl = mat%kl ku = mat%ku m = mat%mrows n = mat%ncols CALL dgbtrf(m, n, kl, ku, mat%val, lda, mat%piv, info) IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GBTRF ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF IF( PRESENT(flops) ) THEN flops = dopgb('DGBTRF',m, n, kl, ku, mat%piv) END IF END SUBROUTINE factor_gb !=========================================================================== SUBROUTINE factor_periodic(mat) ! ! Factor the periodic GB matrix, using the ! Sherman-Morrisson-Woodburry formula ! TYPE(periodic_mat), INTENT(inout) :: mat TYPE(gemat) :: hmat DOUBLE PRECISION :: one=1.0d0 INTEGER :: bandw, mr, nc, i ! bandw = SIZE(mat%matvt,1) ! ! Factor A CALL factor(mat%mat) ! IF(bandw .EQ. 0 ) RETURN ! No off band terms ! ! U <-- A^(-1) * U CALL bsolve(mat%mat, mat%matu) ! ! H <-- 1 + V^T * U mr = SIZE(mat%matvt, 1) nc = SIZE(mat%matvt, 2) CALL init(mr, 0, hmat) ! hmat is initialized to 0! DO i=1,mr hmat%val(i,i) = one END DO CALL dgemm('N', 'N', mr, mr, nc, one, mat%matvt, mr, & & mat%matu, nc, one, hmat%val, mr) ! ! V^T <-- H^(-1) V^T CALL factor(hmat) CALL bsolve(hmat, mat%matvt) CALL destroy(hmat) ! END SUBROUTINE factor_periodic !=========================================================================== SUBROUTINE factor_zperiodic(mat) ! ! Factor the periodic GB matrix, using the ! Sherman-Morrisson-Woodburry formula ! TYPE(zperiodic_mat), INTENT(inout) :: mat TYPE(zgemat) :: hmat DOUBLE COMPLEX :: one=1.0d0 INTEGER :: bandw, mr, nc, i ! bandw = SIZE(mat%matvt,1) ! ! Factor A CALL factor(mat%mat) ! IF(bandw .EQ. 0 ) RETURN ! No off band terms ! ! U <-- A^(-1) * U CALL bsolve(mat%mat, mat%matu) ! ! H <-- 1 + V^T * U mr = SIZE(mat%matvt, 1) nc = SIZE(mat%matvt, 2) CALL init(mr, 0, hmat) ! hmat is initialized to 0! DO i=1,mr hmat%val(i,i) = one END DO CALL zgemm('N', 'N', mr, mr, nc, one, mat%matvt, mr, & & mat%matu, nc, one, hmat%val, mr) ! ! V^T <-- H^(-1) V^T CALL factor(hmat) CALL bsolve(hmat, mat%matvt) CALL destroy(hmat) ! END SUBROUTINE factor_zperiodic !=========================================================================== SUBROUTINE factor_pb(mat,flops) ! ! Factor the matrix, using Lapack ! TYPE(pbmat), INTENT(inout) :: mat DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops INTEGER :: lda, n, ku INTEGER :: info DOUBLE PRECISION :: dopla EXTERNAL dopla ! lda = SIZE(mat%val, 1) ku = mat%ku n = mat%rank CALL dpbtrf('U', n, ku, mat%val, lda, info) IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from PBTRF ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF IF( PRESENT(flops) ) THEN flops = dopla('DPBTRF', n, n, ku, ku, 1) END IF END SUBROUTINE factor_pb !=========================================================================== SUBROUTINE factor_ge(mat,flops) ! ! Factor the matrix, using Lapack ! TYPE(gemat), INTENT(inout) :: mat DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops INTEGER :: n, m INTEGER :: info DOUBLE PRECISION :: dopla EXTERNAL dopla ! m = mat%mrows n = mat%ncols CALL dgetrf(m, n, mat%val, m, mat%piv, info) IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GETRF ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF IF( PRESENT(flops) ) THEN flops = dopla('DGETRF',m, n, 0, 0, 0) END IF END SUBROUTINE factor_ge !=========================================================================== SUBROUTINE factor_zgb(mat,flops) ! ! Factor the matrix, using Lapack ! TYPE(zgbmat), INTENT(inout) :: mat DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops INTEGER :: lda, n, m, kl, ku INTEGER :: info DOUBLE PRECISION :: dopgb EXTERNAL dopgb ! lda = SIZE(mat%val, 1) kl = mat%kl ku = mat%ku m = mat%mrows n = mat%ncols CALL zgbtrf(m, n, kl, ku, mat%val, lda, mat%piv, info) IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GBTRF ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF IF( PRESENT(flops) ) THEN flops = dopgb('ZGBTRF',m, n, kl, ku, mat%piv) END IF END SUBROUTINE factor_zgb !=========================================================================== SUBROUTINE factor_zpb(mat,flops) ! ! Factor the matrix, using Lapack ! TYPE(zpbmat), INTENT(inout) :: mat DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops INTEGER :: lda, n, ku INTEGER :: info DOUBLE PRECISION :: dopla EXTERNAL dopla ! lda = SIZE(mat%val, 1) ku = mat%ku n = mat%rank CALL zpbtrf('U', n, ku, mat%val, lda, info) IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from PBTRF ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF IF( PRESENT(flops) ) THEN flops = dopla('ZPBTRF', n, n, ku, ku, 1) END IF END SUBROUTINE factor_zpb !=========================================================================== SUBROUTINE factor_zge(mat,flops) ! ! Factor the matrix, using Lapack ! TYPE(zgemat), INTENT(inout) :: mat DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops INTEGER :: n, m INTEGER :: info DOUBLE PRECISION :: dopla EXTERNAL dopla ! m = mat%mrows n = mat%ncols CALL zgetrf(m, n, mat%val, m, mat%piv, info) IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GETRF ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF IF( PRESENT(flops) ) THEN flops = dopla('ZGETRF',m, n, 0, 0, 0) END IF END SUBROUTINE factor_zge !=========================================================================== SUBROUTINE bsolve_gb1(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(gbmat), INTENT(inout) :: mat DOUBLE PRECISION, DIMENSION (:) :: rhs DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol INTEGER :: lda, n, kl, ku INTEGER :: info !---------------------------------------------------------------------- lda = SIZE(mat%val, 1) kl = mat%kl ku = mat%ku n = mat%rank ! IF( PRESENT(sol) ) THEN sol = rhs CALL dgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, sol, n, info) ELSE CALL dgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GBTRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_gb1 !=========================================================================== SUBROUTINE bsolve_periodic1(mat, rhs, sol) ! ! Backsolve, using the Sherman-Morrison-Woodburry formula ! TYPE(periodic_mat), INTENT(inout) :: mat DOUBLE PRECISION, DIMENSION (:) :: rhs DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol DOUBLE PRECISION :: one=1.0d0, zero=0.0d0, minus1=-1.0d0 DOUBLE PRECISION, ALLOCATABLE :: tarr(:,:) INTEGER :: rank, bandw, nrhs INTEGER :: info !---------------------------------------------------------------------- rank = mat%mat%rank bandw = SIZE(mat%matvt,1) nrhs = 1 ! ! Solve Ay = f IF( PRESENT(sol) ) THEN CALL bsolve(mat%mat, rhs, sol) ELSE CALL bsolve(mat%mat, rhs) END IF ! IF(bandw .EQ. 0 ) RETURN ! No off band terms ! ! t = V^T*y ( = W^T*y ) ALLOCATE(tarr(bandw,nrhs)) IF( PRESENT(sol) ) THEN CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, & & rank, zero, tarr, bandw) ELSE CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, & & rank, zero, tarr, bandw) END IF ! ! y = y - U*t ( = y-Z*t) IF( PRESENT(sol) ) THEN CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & & bandw, one, sol, rank) ELSE CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & & bandw, one, rhs, rank) END IF ! DEALLOCATE(tarr) END SUBROUTINE bsolve_periodic1 !=========================================================================== SUBROUTINE bsolve_zperiodic1(mat, rhs, sol) ! ! Backsolve, using the Sherman-Morrison-Woodburry formula ! TYPE(zperiodic_mat), INTENT(inout) :: mat DOUBLE COMPLEX, DIMENSION (:) :: rhs DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0, minus1=-1.0d0 DOUBLE COMPLEX, ALLOCATABLE :: tarr(:,:) INTEGER :: rank, bandw, nrhs INTEGER :: info !---------------------------------------------------------------------- rank = mat%mat%rank bandw = SIZE(mat%matvt,1) nrhs = 1 ! ! Solve Ay = f IF( PRESENT(sol) ) THEN CALL bsolve(mat%mat, rhs, sol) ELSE CALL bsolve(mat%mat, rhs) END IF ! IF(bandw .EQ. 0 ) RETURN ! No off band terms ! ! t = V^T*y ( = W^T*y ) ALLOCATE(tarr(bandw,nrhs)) IF( PRESENT(sol) ) THEN CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, & & rank, zero, tarr, bandw) ELSE CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, & & rank, zero, tarr, bandw) END IF ! ! y = y - U*t ( = y-Z*t) IF( PRESENT(sol) ) THEN CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & & bandw, one, sol, rank) ELSE CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & & bandw, one, rhs, rank) END IF ! DEALLOCATE(tarr) END SUBROUTINE bsolve_zperiodic1 !=========================================================================== SUBROUTINE bsolve_pb1(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(pbmat), INTENT(inout) :: mat DOUBLE PRECISION, DIMENSION (:) :: rhs DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol INTEGER :: lda, n, ku INTEGER :: info !---------------------------------------------------------------------- lda = SIZE(mat%val, 1) ku = mat%ku n = mat%rank ! IF( PRESENT(sol) ) THEN sol = rhs CALL dpbtrs('U', n, ku, 1, mat%val, lda, sol, n, info) ELSE CALL dpbtrs('U', n, ku, 1, mat%val, lda, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from PBTRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_pb1 !=========================================================================== SUBROUTINE bsolve_ge1(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(gemat), INTENT(inout) :: mat DOUBLE PRECISION, DIMENSION (:) :: rhs DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol INTEGER :: n INTEGER :: info !---------------------------------------------------------------------- n = mat%rank ! IF( PRESENT(sol) ) THEN sol = rhs CALL dgetrs('N', n, 1, mat%val, n, mat%piv, sol, n, info) ELSE CALL dgetrs('N', n, 1, mat%val, n, mat%piv, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GETRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_ge1 !=========================================================================== SUBROUTINE bsolve_gbn(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(gbmat), INTENT(inout) :: mat DOUBLE PRECISION, DIMENSION (:,:) :: rhs DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol INTEGER :: lda, n, nrhs, kl, ku INTEGER :: info !---------------------------------------------------------------------- lda = SIZE(mat%val, 1) kl = mat%kl ku = mat%ku n = mat%rank nrhs = SIZE(rhs,2) ! IF( PRESENT(sol) ) THEN sol(:,1:nrhs) = rhs(:,1:nrhs) CALL dgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, sol, n, info) ELSE CALL dgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GBTRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_gbn !=========================================================================== SUBROUTINE bsolve_periodicn(mat, rhs, sol) ! ! Backsolve, using the Sherman-Morrison-Woodburry formula ! TYPE(periodic_mat), INTENT(inout) :: mat DOUBLE PRECISION, DIMENSION (:,:) :: rhs DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol DOUBLE PRECISION :: one=1.0d0, zero=0.0d0, minus1=-1.0d0 DOUBLE PRECISION, ALLOCATABLE :: tarr(:,:) INTEGER :: rank, bandw, nrhs INTEGER :: info !---------------------------------------------------------------------- rank = mat%mat%rank bandw = SIZE(mat%matvt,1) nrhs = SIZE(rhs,2) ! ! Solve Ay = f IF( PRESENT(sol) ) THEN CALL bsolve(mat%mat, rhs, sol) ELSE CALL bsolve(mat%mat, rhs) END IF ! IF(bandw .EQ. 0 ) RETURN ! No off band terms ! ! t = V^T*y ( = W^T*y ) ALLOCATE(tarr(bandw,nrhs)) IF( PRESENT(sol) ) THEN CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, & & rank, zero, tarr, bandw) ELSE CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, & & rank, zero, tarr, bandw) END IF ! ! y = y - U*t ( = y-Z*t) IF( PRESENT(sol) ) THEN CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & & bandw, one, sol, rank) ELSE CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & & bandw, one, rhs, rank) END IF ! DEALLOCATE(tarr) END SUBROUTINE bsolve_periodicn !=========================================================================== SUBROUTINE bsolve_zperiodicn(mat, rhs, sol) ! ! Backsolve, using the Sherman-Morrison-Woodburry formula ! TYPE(zperiodic_mat), INTENT(inout) :: mat DOUBLE COMPLEX, DIMENSION (:,:) :: rhs DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0, minus1=-1.0d0 DOUBLE COMPLEX, ALLOCATABLE :: tarr(:,:) INTEGER :: rank, bandw, nrhs INTEGER :: info !---------------------------------------------------------------------- rank = mat%mat%rank bandw = SIZE(mat%matvt,1) nrhs = SIZE(rhs,2) ! ! Solve Ay = f IF( PRESENT(sol) ) THEN CALL bsolve(mat%mat, rhs, sol) ELSE CALL bsolve(mat%mat, rhs) END IF ! IF(bandw .EQ. 0 ) RETURN ! No off band terms ! ! t = V^T*y ( = W^T*y ) ALLOCATE(tarr(bandw,nrhs)) IF( PRESENT(sol) ) THEN CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, & & rank, zero, tarr, bandw) ELSE CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, & & rank, zero, tarr, bandw) END IF ! ! y = y - U*t ( = y-Z*t) IF( PRESENT(sol) ) THEN CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & & bandw, one, sol, rank) ELSE CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & & bandw, one, rhs, rank) END IF ! DEALLOCATE(tarr) END SUBROUTINE bsolve_zperiodicn !=========================================================================== SUBROUTINE bsolve_pbn(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(pbmat), INTENT(inout) :: mat DOUBLE PRECISION, DIMENSION (:,:) :: rhs DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol INTEGER :: lda, n, nrhs, ku INTEGER :: info !---------------------------------------------------------------------- lda = SIZE(mat%val, 1) ku = mat%ku n = mat%rank nrhs = SIZE(rhs,2) ! IF( PRESENT(sol) ) THEN sol(:,1:nrhs) = rhs(:,1:nrhs) CALL dpbtrs('U', n, ku, nrhs, mat%val, lda, sol, n, info) ELSE CALL dpbtrs('U', n, ku, nrhs, mat%val, lda, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GBTRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_pbn !=========================================================================== SUBROUTINE bsolve_gen(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(gemat), INTENT(inout) :: mat DOUBLE PRECISION, DIMENSION (:,:) :: rhs DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol INTEGER :: n, nrhs INTEGER :: info !---------------------------------------------------------------------- n = mat%rank nrhs = SIZE(rhs,2) ! IF( PRESENT(sol) ) THEN sol(:,1:nrhs) = rhs CALL dgetrs('N', n, nrhs, mat%val, n, mat%piv, sol, n, info) ELSE CALL dgetrs('N', n, nrhs, mat%val, n, mat%piv, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GETRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_gen !=========================================================================== SUBROUTINE bsolve_zgb1(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(zgbmat), INTENT(inout) :: mat DOUBLE COMPLEX, DIMENSION (:) :: rhs DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol INTEGER :: lda, n, kl, ku INTEGER :: info !---------------------------------------------------------------------- lda = SIZE(mat%val, 1) kl = mat%kl ku = mat%ku n = mat%rank ! IF( PRESENT(sol) ) THEN sol = rhs CALL zgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, sol, n, info) ELSE CALL zgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GBTRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_zgb1 !=========================================================================== SUBROUTINE bsolve_zpb1(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(zpbmat), INTENT(inout) :: mat DOUBLE COMPLEX, DIMENSION (:) :: rhs DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol INTEGER :: lda, n, ku INTEGER :: info !---------------------------------------------------------------------- lda = SIZE(mat%val, 1) ku = mat%ku n = mat%rank ! IF( PRESENT(sol) ) THEN sol = rhs CALL zpbtrs('U', n, ku, 1, mat%val, lda, sol, n, info) ELSE CALL zpbtrs('U', n, ku, 1, mat%val, lda, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from PBTRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_zpb1 !=========================================================================== SUBROUTINE bsolve_zge1(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(zgemat), INTENT(inout) :: mat DOUBLE COMPLEX, DIMENSION (:) :: rhs DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol INTEGER :: n INTEGER :: info !---------------------------------------------------------------------- n = mat%rank ! IF( PRESENT(sol) ) THEN sol = rhs CALL zgetrs('N', n, 1, mat%val, n, mat%piv, sol, n, info) ELSE CALL zgetrs('N', n, 1, mat%val, n, mat%piv, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GETRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_zge1 !=========================================================================== SUBROUTINE bsolve_zgbn(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(zgbmat), INTENT(inout) :: mat DOUBLE COMPLEX, DIMENSION (:,:) :: rhs DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol INTEGER :: lda, n, nrhs, kl, ku INTEGER :: info !---------------------------------------------------------------------- lda = SIZE(mat%val, 1) kl = mat%kl ku = mat%ku n = mat%rank nrhs = SIZE(rhs,2) ! IF( PRESENT(sol) ) THEN sol(:,1:nrhs) = rhs(:,1:nrhs) CALL zgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, sol, n, info) ELSE CALL zgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GBTRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_zgbn !=========================================================================== SUBROUTINE bsolve_zpbn(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(zpbmat), INTENT(inout) :: mat DOUBLE COMPLEX, DIMENSION (:,:) :: rhs DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol INTEGER :: lda, n, nrhs, ku INTEGER :: info !---------------------------------------------------------------------- lda = SIZE(mat%val, 1) ku = mat%ku n = mat%rank nrhs = SIZE(rhs,2) ! IF( PRESENT(sol) ) THEN sol(:,1:nrhs) = rhs(:,1:nrhs) CALL zpbtrs('U', n, ku, nrhs, mat%val, lda, sol, n, info) ELSE CALL zpbtrs('U', n, ku, nrhs, mat%val, lda, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GBTRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_zpbn !=========================================================================== SUBROUTINE bsolve_zgen(mat, rhs, sol) ! ! Backsolve, using Lapack ! TYPE(zgemat), INTENT(inout) :: mat DOUBLE COMPLEX, DIMENSION (:,:) :: rhs DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol INTEGER :: n, nrhs INTEGER :: info !---------------------------------------------------------------------- n = mat%rank nrhs = SIZE(rhs,2) ! IF( PRESENT(sol) ) THEN sol(:,1:nrhs) = rhs CALL zgetrs('N', n, nrhs, mat%val, n, mat%piv, sol, n, info) ELSE CALL zgetrs('N', n, nrhs, mat%val, n, mat%piv, rhs, n, info) END IF IF( info .NE. 0) THEN WRITE(*,*) 'FACTOR: info from GETRS ', info STOP '*** Abnormal EXIT in MODULE matrix ***' END IF END SUBROUTINE bsolve_zgen !=========================================================================== FUNCTION vmx_gb(mat, x, trans) RESULT(vmx) ! ! Return product mat*x ! TYPE(gbmat), INTENT(in) :: mat DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x CHARACTER(len=1), OPTIONAL :: trans DOUBLE PRECISION, ALLOCATABLE :: vmx(:) DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 INTEGER :: lda, kl, ku, m, n, j, imin, imax, ibmin, ibmax CHARACTER(len=1) :: trans_loc ! lda = SIZE(mat%val, 1) kl = mat%kl ku = mat%ku m = mat%mrows n = mat%ncols trans_loc = 'N' IF(PRESENT(trans)) trans_loc = trans ! IF(trans_loc.EQ.'N') THEN ALLOCATE(vmx(m)) ELSE ALLOCATE(vmx(n)) END IF ! CALL dgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, x, 1, zero,& & vmx, 1) END FUNCTION vmx_gb !=========================================================================== FUNCTION vmx_ge(mat, x, trans) RESULT(vmx) ! ! Return product mat*x ! TYPE(gemat), INTENT(in) :: mat DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x CHARACTER(len=1), OPTIONAL :: trans DOUBLE PRECISION, ALLOCATABLE :: vmx(:) DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 INTEGER :: lda, m, n CHARACTER(len=1) :: trans_loc ! lda = SIZE(mat%val, 1) m = mat%mrows n = mat%ncols trans_loc = 'N' IF(PRESENT(trans)) trans_loc = trans ! IF(trans_loc.EQ.'N') THEN ALLOCATE(vmx(m)) ELSE ALLOCATE(vmx(n)) END IF ! CALL dgemv(trans_loc, m, n, one, mat%val, lda, x, 1, zero, vmx, 1) END FUNCTION vmx_ge !=========================================================================== FUNCTION vmx_gen(mat, x, trans) RESULT(vmx) ! ! Return product mat*x ! TYPE(gemat), INTENT(in) :: mat DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x CHARACTER(len=1), OPTIONAL :: trans DOUBLE PRECISION, ALLOCATABLE :: vmx(:,:) DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 INTEGER :: lda, ldb, m, n, k CHARACTER(len=1) :: trans_loc ! lda = SIZE(mat%val, 1) ldb = SIZE(x,1) trans_loc = 'N' IF(PRESENT(trans)) trans_loc = trans ! IF(trans_loc.EQ.'N') THEN m = mat%mrows n = SIZE(x,2) k = mat%ncols ALLOCATE(vmx(m,n)) ELSE m = mat%ncols n = SIZE(x,2) k = mat%mrows ALLOCATE(vmx(m,n)) END IF ! CALL dgemm(trans_loc, 'N', m, n, k, one, mat%val, lda, x, ldb, zero, vmx, & & lda) ! END FUNCTION vmx_gen !=========================================================================== FUNCTION vmx_zge(mat, x, trans) RESULT(vmx) ! ! Return product mat*x ! TYPE(zgemat), INTENT(in) :: mat DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x CHARACTER(len=1), OPTIONAL :: trans DOUBLE COMPLEX, ALLOCATABLE :: vmx(:) DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0 INTEGER :: lda, m, n CHARACTER(len=1) :: trans_loc ! lda = SIZE(mat%val, 1) m = mat%mrows n = mat%ncols trans_loc = 'N' IF(PRESENT(trans)) trans_loc = trans ! IF(trans_loc.EQ.'N') THEN ALLOCATE(vmx(m)) ELSE ALLOCATE(vmx(n)) END IF ! CALL zgemv(trans_loc, m, n, one, mat%val, lda, x, 1, zero, vmx, 1) END FUNCTION vmx_zge !=========================================================================== FUNCTION vmx_zgen(mat, x, trans) RESULT(vmx) ! ! Return product mat*x ! TYPE(zgemat), INTENT(in) :: mat DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x CHARACTER(len=1), OPTIONAL :: trans DOUBLE COMPLEX, ALLOCATABLE :: vmx(:,:) DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0 INTEGER :: lda, ldb, m, n, k CHARACTER(len=1) :: trans_loc ! lda = SIZE(mat%val, 1) ldb = SIZE(x,1) trans_loc = 'N' IF(PRESENT(trans)) trans_loc = trans ! IF(trans_loc.EQ.'N') THEN m = mat%mrows n = SIZE(x,2) k = mat%ncols ALLOCATE(vmx(m,n)) ELSE m = mat%ncols n = SIZE(x,2) k = mat%mrows ALLOCATE(vmx(m,n)) END IF ! CALL zgemm(trans_loc, 'N', m, n, k, one, mat%val, lda, x, ldb, zero, vmx, & & lda) ! END FUNCTION vmx_zgen !=========================================================================== FUNCTION vmx_periodic(mat, x) ! ! Return product mat*x ! TYPE(periodic_mat), INTENT(in) :: mat DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x DOUBLE PRECISION, DIMENSION(SIZE(x)) :: vmx_periodic INTEGER :: kl, ku, n, i, ii ! kl = mat%mat%kl ku = mat%mat%ku n = mat%mat%rank ! vmx_periodic = vmx(mat%mat, x) ! DO i=1,kl vmx_periodic(i) = vmx_periodic(i) + & & DOT_PRODUCT(mat%matvt(i,n-kl+1:n), x(n-kl+1:n)) END DO ! DO i=n-ku+1,n ii = i-n+ku+kl vmx_periodic(i) = vmx_periodic(i) + & & DOT_PRODUCT(mat%matvt(ii,1:ku), x(1:ku)) END DO END FUNCTION vmx_periodic !=========================================================================== FUNCTION vmx_zperiodic(mat, x) ! ! Return product mat*x ! TYPE(zperiodic_mat), INTENT(in) :: mat DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x DOUBLE COMPLEX, DIMENSION(SIZE(x)) :: vmx_zperiodic INTEGER :: kl, ku, n, i, ii ! kl = mat%mat%kl ku = mat%mat%ku n = mat%mat%rank ! vmx_zperiodic = vmx(mat%mat, x) ! DO i=1,kl vmx_zperiodic(i) = vmx_zperiodic(i) + & & DOT_PRODUCT(mat%matvt(i,n-kl+1:n), x(n-kl+1:n)) END DO ! DO i=n-ku+1,n ii = i-n+ku+kl vmx_zperiodic(i) = vmx_zperiodic(i) + & & DOT_PRODUCT(mat%matvt(ii,1:ku), x(1:ku)) END DO END FUNCTION vmx_zperiodic !=========================================================================== FUNCTION vmx_gbn(mat, x, trans) RESULT(vmx) ! ! Return product mat*x ! TYPE(gbmat), INTENT(in) :: mat DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x CHARACTER(len=1), OPTIONAL :: trans DOUBLE PRECISION, ALLOCATABLE :: vmx(:,:) DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 INTEGER :: lda, kl, ku, m, n, j, k, imin, imax, ibmin, ibmax CHARACTER(len=1) :: trans_loc ! lda = SIZE(mat%val, 1) kl = mat%kl ku = mat%ku m = mat%mrows n = mat%ncols trans_loc = 'N' IF(PRESENT(trans)) trans_loc = trans ! IF(trans_loc.EQ.'N') THEN ALLOCATE(vmx(m,SIZE(x,2))) ELSE ALLOCATE(vmx(n,SIZE(x,2))) END IF ! DO k=1,SIZE(x,2) CALL dgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, & & x(1,k), 1, zero, vmx(1,k), 1) END DO END FUNCTION vmx_gbn !=========================================================================== FUNCTION vmx_pb(mat, x) ! ! Return product mat*x ! TYPE(pbmat), INTENT(in) :: mat DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x DOUBLE PRECISION, DIMENSION(SIZE(x)) :: vmx_pb INTEGER :: lda, ku, n, i, j, imin, imax, ib, ibmin, ibmax ! lda = SIZE(mat%val, 1) ku = mat%ku n = mat%rank ! vmx_pb = 0.0d0 DO j=1,n imin = MAX(1,j-ku); imax = j ! The column in the upper diagonal part ibmin = ku+imin-j+1; ibmax = ku+imax-j+1 vmx_pb(imin:imax) = vmx_pb(imin:imax) + mat%val(ibmin:ibmax,j)*x(j) ! imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part DO i=imin,imax ib = ku+1+j-i vmx_pb(i) = vmx_pb(i) + mat%val(ib,i)*x(j) END DO END DO END FUNCTION vmx_pb !=========================================================================== FUNCTION vmx_pbn(mat, x) ! ! Return product mat*x ! TYPE(pbmat), INTENT(in) :: mat DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x DOUBLE PRECISION, DIMENSION(SIZE(x,1),SIZE(x,2)) :: vmx_pbn INTEGER :: lda, ku, n, i, j, k, imin, imax, ib, ibmin, ibmax ! lda = SIZE(mat%val, 1) ku = mat%ku n = mat%rank ! vmx_pbn = 0.0d0 DO j=1,n imin = MAX(1,j-ku); imax = j ! The column in the upper diagonal part ibmin = ku+imin-j+1; ibmax = ku+imax-j+1 DO k=1,SIZE(x,2) vmx_pbn(imin:imax,k) = vmx_pbn(imin:imax,k) + & & mat%val(ibmin:ibmax,j)*x(j,k) END DO ! imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part DO i=imin,imax ib = ku+1+j-i vmx_pbn(i,:) = vmx_pbn(i,:) + mat%val(ib,i)*x(j,:) END DO END DO END FUNCTION vmx_pbn !=========================================================================== FUNCTION vmx_zgb(mat, x, trans) RESULT(vmx) ! ! Return product mat*x ! TYPE(zgbmat), INTENT(in) :: mat DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x CHARACTER(len=1), OPTIONAL :: trans DOUBLE COMPLEX, ALLOCATABLE :: vmx(:) DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0 INTEGER :: lda, kl, ku, m, n, j, imin, imax, ibmin, ibmax CHARACTER(len=1) :: trans_loc ! lda = SIZE(mat%val, 1) kl = mat%kl ku = mat%ku m = mat%mrows n = mat%ncols trans_loc = 'N' IF(PRESENT(trans)) trans_loc = trans ! IF(trans_loc.EQ.'N') THEN ALLOCATE(vmx(m)) ELSE ALLOCATE(vmx(n)) END IF ! CALL zgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, x, 1, zero,& & vmx, 1) END FUNCTION vmx_zgb !=========================================================================== FUNCTION vmx_zgbn(mat, x, trans) RESULT(vmx) ! ! Return product mat*x ! TYPE(zgbmat), INTENT(in) :: mat DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x CHARACTER(len=1), OPTIONAL :: trans DOUBLE COMPLEX, ALLOCATABLE :: vmx(:,:) DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0 INTEGER :: lda, kl, ku, m, n, j, k, imin, imax, ibmin, ibmax CHARACTER(len=1) :: trans_loc ! lda = SIZE(mat%val, 1) kl = mat%kl ku = mat%ku m = mat%mrows n = mat%ncols trans_loc = 'N' IF(PRESENT(trans)) trans_loc = trans ! IF(trans_loc.EQ.'N') THEN ALLOCATE(vmx(m,SIZE(x,2))) ELSE ALLOCATE(vmx(n,SIZE(x,2))) END IF ! DO k=1,SIZE(x,2) CALL zgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, & & x(1,k), 1, zero, vmx(1,k), 1) END DO END FUNCTION vmx_zgbn !=========================================================================== FUNCTION vmx_zpb(mat, x) ! ! Return product mat*x ! TYPE(zpbmat), INTENT(in) :: mat DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x DOUBLE COMPLEX, DIMENSION(SIZE(x)) :: vmx_zpb INTEGER :: lda, ku, n, i, j, imin, imax, ib, ibmin, ibmax ! lda = SIZE(mat%val, 1) ku = mat%ku n = mat%rank ! vmx_zpb = 0.0d0 DO j=1,n imin = MAX(1,j-ku); imax = j ! The column in the upper diagonal part ibmin = ku+imin-j+1; ibmax = ku+imax-j+1 vmx_zpb(imin:imax) = vmx_zpb(imin:imax) + mat%val(ibmin:ibmax,j)*x(j) ! imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part DO i=imin,imax ib = ku+1+j-i vmx_zpb(i) = vmx_zpb(i) + CONJG(mat%val(ib,i))*x(j) END DO END DO END FUNCTION vmx_zpb !=========================================================================== FUNCTION vmx_zpbn(mat, x) ! ! Return product mat*x ! TYPE(zpbmat), INTENT(in) :: mat DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x DOUBLE COMPLEX, DIMENSION(SIZE(x,1),SIZE(x,2)) :: vmx_zpbn INTEGER :: lda, ku, n, i, j, k, imin, imax, ib, ibmin, ibmax ! lda = SIZE(mat%val, 1) ku = mat%ku n = mat%rank ! vmx_zpbn = 0.0d0 DO j=1,n imin = MAX(1,j-ku); imax = j ! The column in the upper diagonal part ibmin = ku+imin-j+1; ibmax = ku+imax-j+1 DO k=1,SIZE(x,2) vmx_zpbn(imin:imax,k) = vmx_zpbn(imin:imax,k) + & & mat%val(ibmin:ibmax,j)*x(j,k) END DO ! imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part DO i=imin,imax ib = ku+1+j-i vmx_zpbn(i,:) = vmx_zpbn(i,:) + CONJG(mat%val(ib,i))*x(j,:) END DO END DO END FUNCTION vmx_zpbn !=========================================================================== SUBROUTINE determinant_ge(mat, base, pow) ! ! Return the determinant of mat ! TYPE(gemat) :: mat INTEGER :: pow, i DOUBLE PRECISION :: base ! CALL factor(mat) base = 1.0d0 pow = 0 DO i=1,mat%rank IF( mat%piv(i) .NE. i) base = -base base = mat%val(i,i)*base IF( base .EQ. 0.0d0 ) THEN WRITE(*,*) 'DETERMINANT_GE: matrix is singular' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF DO IF( ABS(base) .GE. 1.0d0 ) EXIT base = 10.0d0*base pow = pow - 1 END DO DO IF( ABS(base) .LT. 10.0d0 ) EXIT base = base/10.0d0 pow = pow + 1 END DO END DO END SUBROUTINE determinant_ge !=========================================================================== SUBROUTINE determinant_gb(mat, base, pow) ! ! Return the determinant of mat ! TYPE(gbmat) :: mat INTEGER :: pow, i, ib DOUBLE PRECISION :: base ! CALL factor(mat) base = 1.0d0 pow = 0 ib=mat%kl + mat%ku + 1 DO i=1,mat%rank IF( mat%piv(i) .NE. i) base = -base base = mat%val(ib,i)*base IF( base .EQ. 0.0d0 ) THEN WRITE(*,*) 'DETERMINANT_GB: matrix is singular' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF DO IF( ABS(base) .GE. 1.0d0 ) EXIT base = 10.0d0*base pow = pow - 1 END DO DO IF( ABS(base) .LT. 10.0d0 ) EXIT base = base/10.0d0 pow = pow + 1 END DO END DO END SUBROUTINE determinant_gb !=========================================================================== SUBROUTINE determinant_pb(mat, base, pow) ! ! Return the determinant of mat ! TYPE(pbmat) :: mat INTEGER :: pow, i, ib DOUBLE PRECISION :: base ! CALL factor(mat) base = 1.0d0 pow = 0 ib = mat%ku + 1 DO i=1,mat%rank base = mat%val(ib,i)*base IF( base .EQ. 0.0d0 ) THEN WRITE(*,*) 'DETERMINANT_PB: matrix is singular' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF DO IF( ABS(base) .GE. 1.0d0 ) EXIT base = 10.0d0*base pow = pow - 1 END DO DO IF( ABS(base) .LT. 10.0d0 ) EXIT base = base/10.0d0 pow = pow + 1 END DO END DO base=base**2 pow=pow*2 END SUBROUTINE determinant_pb !=========================================================================== SUBROUTINE determinant_zge(mat, base, pow) ! ! Return the determinant of mat ! TYPE(zgemat) :: mat INTEGER :: pow, i DOUBLE COMPLEX :: base ! CALL factor(mat) base = 1.0d0 pow = 0 DO i=1,mat%rank IF( mat%piv(i) .NE. i) base = -base base = mat%val(i,i)*base IF( base .EQ. 0.0d0 ) THEN WRITE(*,*) 'DETERMINANT_ZGE: matrix is singular' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF DO IF( ABS(base) .GE. 1.0d0 ) EXIT base = 10.0d0*base pow = pow - 1 END DO DO IF( ABS(base) .LT. 10.0d0 ) EXIT base = base/10.0d0 pow = pow + 1 END DO END DO END SUBROUTINE determinant_zge !=========================================================================== SUBROUTINE determinant_zgb(mat, base, pow) ! ! Return the determinant of mat ! TYPE(zgbmat) :: mat INTEGER :: pow, i, ib DOUBLE COMPLEX :: base ! CALL factor(mat) base = 1.0d0 pow = 0 ib=mat%kl + mat%ku + 1 DO i=1,mat%rank IF( mat%piv(i) .NE. i) base = -base base = mat%val(ib,i)*base IF( base .EQ. 0.0d0 ) THEN WRITE(*,*) 'DETERMINANT_GB: matrix is singular' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF DO IF( ABS(base) .GE. 1.0d0 ) EXIT base = 10.0d0*base pow = pow - 1 END DO DO IF( ABS(base) .LT. 10.0d0 ) EXIT base = base/10.0d0 pow = pow + 1 END DO END DO END SUBROUTINE determinant_zgb !=========================================================================== SUBROUTINE determinant_zpb(mat, base, pow) ! ! Return the determinant of mat ! TYPE(zpbmat) :: mat INTEGER :: pow, i, ib DOUBLE COMPLEX :: base ! CALL factor(mat) base = 1.0d0 pow = 0 ib = mat%ku + 1 DO i=1,mat%rank base = mat%val(ib,i)*base IF( base .EQ. 0.0d0 ) THEN WRITE(*,*) 'DETERMINANT_PB: matrix is singular' STOP '*** Abnormal EXIT in MODULE matrix ***' END IF DO IF( ABS(base) .GE. 1.0d0 ) EXIT base = 10.0d0*base pow = pow - 1 END DO DO IF( ABS(base) .LT. 10.0d0 ) EXIT base = base/10.0d0 pow = pow + 1 END DO END DO base=base**2 pow=pow*2 END SUBROUTINE determinant_zpb !=========================================================================== SUBROUTINE putmat_gb(fid, label, mat, str) ! ! Write GB matrix in hdf5 file ! USE futils INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(gbmat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str IF(PRESENT(str)) THEN CALL putarr(fid, label, mat%val, str) ELSE CALL putarr(fid, label, mat%val) END IF CALL attach(fid, label, 'KL', mat%kl) CALL attach(fid, label, 'KU', mat%ku) CALL attach(fid, label, 'RANK', mat%rank) END SUBROUTINE putmat_gb !=========================================================================== SUBROUTINE getmat_gb(fid, label, mat, str) ! ! Read in GB matrix from hdf5 file ! USE futils INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(gbmat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str CALL getatt(fid, label, 'KL', mat%kl) CALL getatt(fid, label, 'KU', mat%ku) CALL getatt(fid, label, 'RANK', mat%rank) CALL getarr(fid, label, mat%val) END SUBROUTINE getmat_gb !=========================================================================== SUBROUTINE kron_ge(mata, matb, matc) ! ! Krocnecker product of 2 dense matrices ! TYPE(gemat), INTENT(in) :: mata, matb TYPE(gemat), INTENT(out) :: matc ! INTEGER :: i1, j1, i3, j3, m1, n1, m2, n2, m3, n3 ! m1 = mata%mrows n1 = mata%ncols m2 = matb%mrows n2 = matb%ncols m3 = m1*m2 n3 = n1*n2 ! CALL init(n3, 0, matc, mrows=m3) DO i1=1,m1 i3 = (i1-1)*m2 DO j1=1,n1 j3 = (j1-1)*n2 matc%val(i3+1:i3+m2,j3+1:j3+n2) = mata%val(i1,j1)*matb%val(1:m2,1:n2) END DO END DO END SUBROUTINE kron_ge !=========================================================================== END MODULE matrix diff --git a/src/multigrid_mod.f90 b/src/multigrid_mod.f90 index 192c76c..58173b8 100644 --- a/src/multigrid_mod.f90 +++ b/src/multigrid_mod.f90 @@ -1,2373 +1,2373 @@ !> !> @file multigrid_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE multigrid ! ! MULTIGRID: Implement Multigrid solver for Finite Elements ! and Fiinite Differences. ! ! T.M. Tran, CRPP-EPFL ! September 2012 ! USE bsplines USE matrix USE conmat_mod USE csr USE cds IMPLICIT NONE ! TYPE grid1d INTEGER :: n ! Number of intervals INTEGER :: rank ! Dimension of FE space DOUBLE PRECISION :: h DOUBLE PRECISION, ALLOCATABLE :: x(:) DOUBLE PRECISION, ALLOCATABLE :: v(:) DOUBLE PRECISION, ALLOCATABLE :: f(:) TYPE(spline1d) :: spl TYPE(gemat) :: transf ! Coarse to fine transfer matrix TYPE(gbmat), ALLOCATABLE :: mata ! FE matrix TYPE(gbmat), ALLOCATABLE :: matm ! mass matrix TYPE(gbmat), ALLOCATABLE :: mata_copy ! Used for direct_solve TYPE(gemat), ALLOCATABLE :: matap ! FE matrix TYPE(gemat), ALLOCATABLE :: matmp ! mass matrix TYPE(gemat), ALLOCATABLE :: matap_copy! Used for direct_solve END TYPE grid1d ! TYPE grid2d INTEGER :: n(2) ! Number of intervals INTEGER :: rank(2) ! Dimension of FE space DOUBLE PRECISION :: h(2) DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) ! DOUBLE PRECISION, ALLOCATABLE :: v(:,:) ! sol DOUBLE PRECISION, ALLOCATABLE :: f(:,:) ! rhs DOUBLE PRECISION, POINTER :: v1d(:) ! flatten sol DOUBLE PRECISION, POINTER :: f1d(:) ! flatten rhs ! TYPE(csr_mat),ALLOCATABLE :: mata TYPE(cds_mat),ALLOCATABLE :: mata_cds TYPE(spline2d) :: spl TYPE(gemat) :: transf(2) ! TYPE(csr_mat) :: matp(2) END TYPE grid2d ! TYPE mg_info INTEGER :: nu1 ! Relaxation down sweeps INTEGER :: nu2 ! Relaxation up sweeps INTEGER :: mu ! mu-cycle number INTEGER :: nu0 ! Number of FMG cycles INTEGER :: levels ! Number of mg levels CHARACTER(len=4) :: relax ! Type of relation DOUBLE PRECISION :: omega ! for weighted Jacobi relaxation LOGICAL :: nlscale=.FALSE. ! Scale restriction if .TRUE. END TYPE mg_info ! INTERFACE create_grid MODULE PROCEDURE create_grid_1d, create_grid_2d END INTERFACE create_grid INTERFACE disrhs MODULE PROCEDURE disrhs_1d, disrhs_2d END INTERFACE disrhs INTERFACE direct_solve MODULE PROCEDURE direct_solve_1d, direct_solve_2d END INTERFACE direct_solve INTERFACE mg MODULE PROCEDURE mg_1d, mg_2d END INTERFACE mg INTERFACE disc_err MODULE PROCEDURE disc_err_1d, disc_err_2d END INTERFACE disc_err INTERFACE jacobi MODULE PROCEDURE jacobi_cds, jacobi_csr, jacobi_gb, jacobi_ge END INTERFACE jacobi INTERFACE gs MODULE PROCEDURE gs_cds, gs_csr, gs_gb, gs_ge END INTERFACE gs INTERFACE restrict MODULE PROCEDURE restrict_1d, restrict_2d, restrict_2d_csr END INTERFACE restrict INTERFACE prolong MODULE PROCEDURE prolong_1d, prolong_2d, prolong_2d_csr END INTERFACE prolong INTERFACE printmat MODULE PROCEDURE printmat_mat, printmat_ge, printmat_gb, printmat_periodic END INTERFACE printmat INTERFACE massmat MODULE PROCEDURE massmat_ge, massmat_gb, massmat_periodic END INTERFACE massmat INTERFACE femat MODULE PROCEDURE femat_2d_csr, femat_ge, femat_gb, femat_periodic END INTERFACE femat INTERFACE ibcmat MODULE PROCEDURE ibcmat_1d, ibcmat_2d END INTERFACE ibcmat INTERFACE mod_transf MODULE PROCEDURE mod_transf_full, mod_transf_csr END INTERFACE mod_transf INTERFACE normf MODULE PROCEDURE normf_gb, normf_ge END INTERFACE normf INTERFACE residue MODULE PROCEDURE residue_gen, residue_csr, residue_cds, residue_gb, residue_ge END INTERFACE residue ! CONTAINS !-------------------------------------------------------------------------------- SUBROUTINE create_grid_1d(n, nidbas, ng_in, alpha, grids, period) ! ! Create an array of levels grids ! Compute mass matrix and prolongation matrices. ! INTEGER, INTENT(in) :: n ! Number of intervals in the finest grid INTEGER, INTENT(in) :: nidbas ! Order of splines INTEGER, INTENT(in) :: ng_in ! Number of proposed Gauss points INTEGER, INTENT(in) :: alpha ! geometric exponent TYPE(grid1d), INTENT(out) :: grids(:) LOGICAL, INTENT(in), OPTIONAL :: period ! LOGICAL :: nlper INTEGER :: n_current, nrank, ngauss INTEGER :: levels, l, i DOUBLE PRECISION :: h_current TYPE(gbmat) :: matm TYPE(gemat) :: matmp ! levels = SIZE(grids) nlper = .FALSE. IF(PRESENT(period)) nlper = period ! ngauss = CEILING(REAL(2*nidbas+alpha+1,8)/2.d0) ngauss = MAX(ng_in, ngauss) WRITE(*,'(a,i0)') 'ngauss = ', ngauss ! ! Allocate some matrices ! DO l=1,levels IF(nlper) THEN ALLOCATE(grids(l)%matmp) ALLOCATE(grids(l)%matap) ELSE ALLOCATE(grids(l)%matm) ALLOCATE(grids(l)%mata) END IF END DO ! n_current = n h_current = 1.0d0/REAL(n_current,8) DO l=1,levels IF(n_current .LT. 2 ) THEN PRINT*, 'CREATE_GRID: number intervals too small!' STOP END IF grids(l)%n = n_current grids(l)%h = h_current ALLOCATE(grids(l)%x(0:n_current)) grids(l)%x(0:n_current) = (/ (REAL(i,8)*h_current, i=0,n_current) /) CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%spl, period=nlper) CALL get_dim(grids(l)%spl, nrank) IF(nlper) nrank = n_current grids(l)%rank = nrank ALLOCATE(grids(l)%v(nrank)) ALLOCATE(grids(l)%f(nrank)) IF(nlper) THEN CALL massmat(grids(l)%spl, alpha, grids(l)%matmp) ELSE CALL massmat(grids(l)%spl, alpha, grids(l)%matm) END IF IF(l.GT.1) THEN CALL ctof_massmat(grids(l-1)%spl, grids(l)%spl, alpha, grids(l)%transf) IF(nlper) THEN CALL mcopy(grids(l-1)%matmp, matmp) CALL factor(matmp) CALL bsolve(matmp, grids(l)%transf%val) CALL destroy(matmp) ELSE CALL mcopy(grids(l-1)%matm, matm) CALL factor(matm) CALL bsolve(matm, grids(l)%transf%val) CALL destroy(matm) END IF END IF n_current = n_current/2 h_current = 2.0d0*h_current END DO END SUBROUTINE create_grid_1d !-------------------------------------------------------------------------------- SUBROUTINE create_grid_2d(x, y, nidbas, ng_in, alpha, grids, mat_type, period, & & debug_in) ! ! Create an array of levels grids ! Compute mass matrix and prolongation matrices. ! DOUBLE PRECISION, INTENT(in) :: x(0:), y(0:) ! Finest grid points INTEGER, INTENT(in) :: nidbas(2) ! Order of splines INTEGER, INTENT(in) :: alpha(2) ! geometric exponent INTEGER, INTENT(in) :: ng_in(2) ! Number of proposed Gauss points TYPE(grid2d), INTENT(out), TARGET :: grids(:) CHARACTER(*), INTENT(in), OPTIONAL :: mat_type ! csr (default) or cds LOGICAL, INTENT(in), OPTIONAL :: period(2) LOGICAL, INTENT(in), OPTIONAL :: debug_in ! LOGICAL, DIMENSION(2) :: nlper INTEGER, DIMENSION(2) :: n, sp_dim, ngauss LOGICAL :: nlcds LOGICAL :: debug INTEGER :: levels, l, rank2d TYPE(gemat) :: matm ! DOUBLE PRECISION, PARAMETER :: pi = 4.0d0*ATAN(1.0d0) ! ! Process input args ! n(1) = SIZE(x)-1 n(2) = SIZE(y)-1 levels = SIZE(grids) nlper = .FALSE. IF(PRESENT(period)) THEN nlper = period END IF IF(PRESENT(debug_in)) THEN debug = debug_in ELSE debug = .FALSE. END IF IF(PRESENT(mat_type)) THEN ! CSR matrix by default nlcds = mat_type.EQ.'cds' ELSE nlcds = .FALSE. END IF ! ! WARNING: Assume that only 2nd dim can be periodic!!! IF(nlper(1)) THEN WRITE(*,'(A)') 'CREATE_GRID: First dimension could not be periodic!' STOP END IF ! ngauss = CEILING(REAL(2*nidbas+1,8)/2.d0) ngauss = MAX(ng_in, ngauss) WRITE(*,'(a,2i4)') 'ngauss = ', ngauss ! DO l=1,levels ! ! Create mesh from finest grid mesh IF(MINVAL(n) .LT. 2 ) THEN PRINT*, 'CREATE_GRID: number intervals too small!' STOP END IF grids(l)%n = n ALLOCATE(grids(l)%x(0:n(1))) ALLOCATE(grids(l)%y(0:n(2))) IF(l.EQ.1) THEN grids(1)%x = x grids(1)%y = y ELSE grids(l)%x(:) = grids(l-1)%x(0::2) grids(l)%y(:) = grids(l-1)%y(0::2) END IF IF(debug) THEN WRITE(*,'(/a,i4,a,2l2)') 'l =', l, ' nlper =', nlper WRITE(*,'(a/(10(1pe12.3)))') 'x', grids(l)%x WRITE(*,'(a/(10(1pe12.3)))') 'y', grids(l)%y END IF ! ! Allocate mem for solution v and RHS f CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%y, grids(l)%spl, period=nlper) CALL get_dim(grids(l)%spl%sp1, sp_dim(1)) CALL get_dim(grids(l)%spl%sp2, sp_dim(2)) ALLOCATE(grids(l)%v(sp_dim(1), sp_dim(2))) ALLOCATE(grids(l)%f(sp_dim(1), sp_dim(2))) ! ! WARNING: Assume that only 2nd dim can be periodic!!! grids(l)%rank = sp_dim IF(nlper(2)) THEN grids(l)%rank(2) = n(2) END IF IF(debug) THEN WRITE(*,'(a,2i6)') 'Grid ranks', grids(l)%rank END IF ! ! Flatten version of sol and rhs rank2d = PRODUCT(grids(l)%rank) grids(l)%v1d(1:rank2d) => grids(l)%v grids(l)%f1d(1:rank2d) => grids(l)%f ! ! Matrix format for FE matrix IF(nlcds) THEN ALLOCATE(grids(l)%mata_cds) ELSE ALLOCATE(grids(l)%mata) END IF ! ! Coarse to fine mesh transfers for l>1 IF(l.GT.1) THEN CALL ctof_massmat(grids(l-1)%spl%sp1, grids(l)%spl%sp1, alpha(1), grids(l)%transf(1)) CALL ctof_massmat(grids(l-1)%spl%sp2, grids(l)%spl%sp2, alpha(2), grids(l)%transf(2)) ! CALL massmat(grids(l-1)%spl%sp1, alpha(1), matm) CALL factor(matm) CALL bsolve(matm, grids(l)%transf(1)%val) CALL full_to_csr(grids(l)%transf(1)%val, grids(l)%matp(1)) CALL destroy(matm) CALL destroy(grids(l)%transf(1)) ! CALL massmat(grids(l-1)%spl%sp2, alpha(2), matm) CALL factor(matm) CALL bsolve(matm, grids(l)%transf(2)%val) CALL full_to_csr(grids(l)%transf(2)%val, grids(l)%matp(2)) CALL destroy(matm) CALL destroy(grids(l)%transf(2)) END IF ! ! Next coarse grid n = n/2 END DO END SUBROUTINE create_grid_2d !-------------------------------------------------------------------------------- SUBROUTINE create_grid_fd(x, y, grids, info, mat_type, period, debug) ! ! FD version of create_grid ! DOUBLE PRECISION, INTENT(in) :: x(0:), y(0:) TYPE(grid2d), INTENT(out), TARGET :: grids(:) TYPE(mg_info), INTENT(inout) :: info ! info for MG CHARACTER(*), INTENT(in), OPTIONAL :: mat_type ! csr (default) or cds LOGICAL, INTENT(in), OPTIONAL :: period(2) LOGICAL, INTENT(in), OPTIONAL :: debug ! INTEGER :: nidbas(2)=1, ngauss(2)=4 ! Linear Splines \equiv 1st FD INTEGER :: alpha(2) = 1 ! Cartesian coordinate LOGICAL, DIMENSION(2) :: nlper LOGICAL :: nldebug LOGICAL :: nlcds INTEGER :: levels, n(2), sp_dim(2) INTEGER :: l, rank2d TYPE(gemat) :: matm !-------------------------------------------------------------------------------- ! ! Process input args ! n(1) = SIZE(x)-1 n(2) = SIZE(y)-1 levels = SIZE(grids) info%nlscale = .TRUE. ! Restriction should be scaled for FD nlper = .FALSE. IF(PRESENT(period)) nlper = period nldebug = .FALSE. IF(PRESENT(debug)) nldebug = debug IF(PRESENT(mat_type)) THEN ! CSR matrix by default nlcds = mat_type.EQ.'cds' ELSE nlcds = .FALSE. END IF ! DO l=1,levels ! ! Create mesh from finest grid mesh IF(MINVAL(n) .LT. 2 ) THEN PRINT*, 'CREATE_GRID: number intervals too small!' STOP END IF grids(l)%n = n ALLOCATE(grids(l)%x(0:n(1))) ALLOCATE(grids(l)%y(0:n(2))) IF(l.EQ.1) THEN grids(1)%x = x grids(1)%y = y ELSE grids(l)%x(:) = grids(l-1)%x(0::2) grids(l)%y(:) = grids(l-1)%y(0::2) END IF IF(nldebug) THEN WRITE(*,'(/a,i4,a,2l2)') 'l =', l, ' nlper =', nlper WRITE(*,'(a/(10(1pe12.3)))') 'x', grids(l)%x WRITE(*,'(a/(10(1pe12.3)))') 'y', grids(l)%y END IF ! ! Allocate mem for solution v and RHS f CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%y, grids(l)%spl, period=nlper) CALL get_dim(grids(l)%spl%sp1, sp_dim(1)) CALL get_dim(grids(l)%spl%sp2, sp_dim(2)) ALLOCATE(grids(l)%v(0:sp_dim(1)-1, 0:sp_dim(2)-1)) ALLOCATE(grids(l)%f(0:sp_dim(1)-1, 0:sp_dim(2)-1)) ! ! WARNING: Assume that only 2nd dim can be periodic!!! grids(l)%rank = sp_dim IF(nlper(2)) THEN grids(l)%rank(2) = n(2) END IF IF(nldebug) THEN WRITE(*,'(a,2i6)') 'Grid ranks', grids(l)%rank END IF ! ! Flatten version of sol and rhs rank2d = PRODUCT(grids(l)%rank) grids(l)%v1d(1:rank2d) => grids(l)%v grids(l)%f1d(1:rank2d) => grids(l)%f ! ! Matrix format for FD matrix IF(nlcds) THEN ALLOCATE(grids(l)%mata_cds) ELSE ALLOCATE(grids(l)%mata) END IF ! ! Coarse to fine mesh transfers for l>1 IF(l.GT.1) THEN CALL ctof_massmat(grids(l-1)%spl%sp1, grids(l)%spl%sp1, alpha(1), grids(l)%transf(1)) CALL ctof_massmat(grids(l-1)%spl%sp2, grids(l)%spl%sp2, alpha(2), grids(l)%transf(2)) ! CALL massmat(grids(l-1)%spl%sp1, alpha(1), matm) CALL factor(matm) CALL bsolve(matm, grids(l)%transf(1)%val) CALL full_to_csr(grids(l)%transf(1)%val, grids(l)%matp(1)) CALL destroy(matm) CALL destroy(grids(l)%transf(1)) ! CALL massmat(grids(l-1)%spl%sp2, alpha(2), matm) CALL factor(matm) CALL bsolve(matm, grids(l)%transf(2)%val) CALL full_to_csr(grids(l)%transf(2)%val, grids(l)%matp(2)) CALL destroy(matm) CALL destroy(grids(l)%transf(2)) END IF ! ! Next coarse grid n = n/2 END DO END SUBROUTINE create_grid_fd !-------------------------------------------------------------------------------- RECURSIVE SUBROUTINE fmg(grids, info, l) ! ! Execute a full multigrid V-cycle ! TYPE(grid1d), INTENT(inout) :: grids(:) TYPE(mg_info), INTENT(in) :: info INTEGER, INTENT(in) :: l INTEGER :: levels, k levels = info%levels ! IF(l.EQ.levels) THEN CALL direct_solve(grids(levels), grids(levels)%v) ELSE grids(l+1)%f = restrict(grids(l+1)%transf,grids(l)%f) CALL fmg(grids, info, l+1) grids(l)%v = prolong(grids(l+1)%transf,grids(l+1)%v) DO k=1,info%nu0 CALL mg(grids, info, l) END DO END IF END SUBROUTINE fmg !-------------------------------------------------------------------------------- RECURSIVE SUBROUTINE mg_1d(grids, info, l) ! ! Execute a recursive V-cycle ! TYPE(grid1d), INTENT(inout) :: grids(:) TYPE(mg_info), INTENT(in) :: info INTEGER, INTENT(in) :: l INTEGER :: levels, k LOGICAL :: nlper ! levels = info%levels nlper = grids(1)%spl%period ! IF(l.EQ.levels) THEN CALL direct_solve(grids(levels), grids(levels)%v) ELSE CALL relax(info%nu1) IF(nlper) THEN grids(l+1)%f = restrict(grids(l+1)%transf, & & grids(l)%f-vmx(grids(l)%matap, grids(l)%v)) ELSE grids(l+1)%f = restrict(grids(l+1)%transf, & & grids(l)%f-vmx(grids(l)%mata, grids(l)%v)) END IF grids(l+1)%v = 0.0d0 ! ! Only 1 call to the coarsest level DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) CALL mg(grids, info, l+1) END DO ! grids(l)%v = grids(l)%v + prolong(grids(l+1)%transf,grids(l+1)%v) CALL relax(info%nu2) END IF ! CONTAINS SUBROUTINE relax(nu) INTEGER, INTENT(in) :: nu SELECT CASE (TRIM(info%relax)) CASE ("jac") IF(nlper) THEN CALL jacobi(grids(l)%matap, info%omega, nu, grids(l)%v, grids(l)%f) ELSE CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v, grids(l)%f) END IF CASE ("gs") IF(nlper) THEN CALL gs(grids(l)%matap, nu, grids(l)%v, grids(l)%f) ELSE CALL gs(grids(l)%mata, nu, grids(l)%v, grids(l)%f) END IF CASE default PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!" STOP END SELECT END SUBROUTINE relax END SUBROUTINE mg_1d !-------------------------------------------------------------------------------- RECURSIVE SUBROUTINE mg_2d(grids, info, l) ! ! Execute a recursive V-cycle ! TYPE(grid2d), INTENT(inout) :: grids(:) TYPE(mg_info), INTENT(in) :: info INTEGER, INTENT(in) :: l ! DOUBLE PRECISION, ALLOCATABLE, TARGET :: resid(:,:) DOUBLE PRECISION, POINTER :: resid1d(:) INTEGER :: levels, k, m1, m2 ! levels = info%levels m1 = SIZE(grids(l)%v,1) m2 = SIZE(grids(l)%v,2) ! IF(l.EQ.levels) THEN grids(levels)%v = grids(levels)%f CALL direct_solve(grids(levels), grids(levels)%v1d) ELSE CALL relax(info%nu1) ALLOCATE(resid(m1,m2)); resid1d(1:m1*m2) => resid IF(ALLOCATED(grids(l)%mata)) THEN resid1d = grids(l)%f1d - vmx(grids(l)%mata, grids(l)%v1d) ELSE resid1d = grids(l)%f1d - vmx(grids(l)%mata_cds, grids(l)%v1d) END IF grids(l+1)%f = restrict(grids(l+1)%matp, resid) IF(info%nlscale) grids(l+1)%f = 0.25d0*grids(l+1)%f DEALLOCATE(resid) grids(l+1)%v = 0.0d0 ! ! Only 1 call to the coarsest level DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) CALL mg(grids, info, l+1) END DO ! grids(l)%v = grids(l)%v + prolong(grids(l+1)%matp,grids(l+1)%v) CALL relax(info%nu2) END IF ! CONTAINS SUBROUTINE relax(nu) INTEGER, INTENT(in) :: nu SELECT CASE (TRIM(info%relax)) CASE ("jac") IF(ALLOCATED(grids(l)%mata)) THEN CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v1d, grids(l)%f1d) ELSE CALL jacobi(grids(l)%mata_cds, info%omega, nu, grids(l)%v1d, grids(l)%f1d) END IF CASE ("gs") IF(ALLOCATED(grids(l)%mata)) THEN CALL gs(grids(l)%mata, nu, grids(l)%v1d, grids(l)%f1d) ELSE CALL gs(grids(l)%mata_cds, nu, grids(l)%v1d, grids(l)%f1d) END IF CASE default PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!" STOP END SELECT END SUBROUTINE relax END SUBROUTINE mg_2d !-------------------------------------------------------------------------------- RECURSIVE SUBROUTINE mg_cyl(grids, info, l, nluniq_in) ! ! Execute a recursive V-cycle ! TYPE(grid2d), INTENT(inout) :: grids(:) TYPE(mg_info), INTENT(in) :: info INTEGER, INTENT(in) :: l LOGICAL, INTENT(in), OPTIONAL :: nluniq_in ! DOUBLE PRECISION, ALLOCATABLE, TARGET :: resid(:,:) DOUBLE PRECISION, POINTER :: resid1d(:) INTEGER :: levels, k, m1, m2, r1, r2 LOGICAL :: nluniq ! levels = info%levels m1 = SIZE(grids(l)%v,1) m2 = SIZE(grids(l)%v,2) r1 = grids(l)%rank(1) ! r1 = m1 r2 = grids(l)%rank(2) ! r2 = m2-1 IF(PRESENT(nluniq_in)) THEN nluniq = nluniq_in ELSE nluniq = .TRUE. END IF ! IF(l.EQ.levels) THEN grids(levels)%v1d = grids(levels)%f1d CALL direct_solve(grids(levels), grids(levels)%v1d, debug=.FALSE.) ELSE CALL relax(info%nu1) ALLOCATE(resid(m1,m2)); resid1d(1:r1*r2) => resid IF(ALLOCATED(grids(l)%mata)) THEN resid1d(:) = grids(l)%f1d(:) - vmx(grids(l)%mata, grids(l)%v1d) ELSE resid1d(:) = grids(l)%f1d(:) - vmx(grids(l)%mata_cds, grids(l)%v1d) END IF ! grids(l+1)%f(:,:) = restrict_cyl(grids(l+1), resid, nluniq) ! DEALLOCATE(resid) grids(l+1)%v1d = 0.0d0 ! ! Only 1 call to the coarsest level DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) CALL mg_cyl(grids, info, l+1, nluniq) END DO ! grids(l)%v(:,1:r2) = grids(l)%v(:,1:r2) + & & prolong_cyl(grids(l+1),grids(l+1)%v, nluniq) ! CALL relax(info%nu2) END IF ! CONTAINS SUBROUTINE relax(nu) INTEGER, INTENT(in) :: nu SELECT CASE (TRIM(info%relax)) CASE ("jac") IF(ALLOCATED(grids(l)%mata)) THEN CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v1d, grids(l)%f1d) ELSE CALL jacobi(grids(l)%mata_cds, info%omega, nu, grids(l)%v1d, grids(l)%f1d) END IF CASE ("gs") IF(ALLOCATED(grids(l)%mata)) THEN CALL gs(grids(l)%mata, nu, grids(l)%v1d, grids(l)%f1d) ELSE CALL gs(grids(l)%mata_cds, nu, grids(l)%v1d, grids(l)%f1d) END IF CASE default PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!" STOP END SELECT END SUBROUTINE relax END SUBROUTINE mg_cyl !-------------------------------------------------------------------------------- FUNCTION prolong_1d(matp,vcoarse) RESULT(vfine) ! ! Prolongation ! TYPE(gemat), INTENT(in) :: matp DOUBLE PRECISION, INTENT(in) :: vcoarse(:) DOUBLE PRECISION :: vfine(matp%mrows) ! vfine = vmx(matp,vcoarse) END FUNCTION prolong_1d !-------------------------------------------------------------------------------- FUNCTION restrict_1d(matp,vfine) RESULT(vcoarse) ! ! Restriction ! TYPE(gemat), INTENT(in) :: matp DOUBLE PRECISION, INTENT(in) :: vfine(:) DOUBLE PRECISION :: vcoarse(matp%ncols) ! vcoarse = vmx(matp,vfine,'T') END FUNCTION restrict_1d !-------------------------------------------------------------------------------- FUNCTION prolong_2d(matp,vcoarse) RESULT(vfine) ! ! Prolongation ! TYPE(gemat), INTENT(in) :: matp(2) DOUBLE PRECISION, INTENT(in) :: vcoarse(:,:) DOUBLE PRECISION, ALLOCATABLE :: vfine(:,:) ! DOUBLE PRECISION, POINTER :: pmat1(:,:), pmat2(:,:) DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 INTEGER :: m1, m1p, m2, m2p ! pmat1 => matp(1)%val pmat2 => matp(2)%val m1 = SIZE(pmat1,1); m1p = SIZE(pmat1,2) m2 = SIZE(pmat2,1); m2p = SIZE(pmat2,2) ALLOCATE(vfine(m1,m2)) ALLOCATE(temp(m1,m2p)) ! ! Compute (P1) * V ! CALL dgemm('N', 'N', m1, m2p, m1p, one, pmat1, m1, vcoarse, m1p, zero, & & temp, m1) ! ! Compute (P1) * V * (P2)^T ! CALL dgemm('N', 'T', m1, m2, m2p, one, temp, m1, pmat2, m2, zero, & & vfine, m1) ! DEALLOCATE(temp) END FUNCTION prolong_2d !-------------------------------------------------------------------------------- FUNCTION prolong_2d_csr(matp,vcoarse) RESULT(vfine) ! ! Prolongation using CSR prolongation matrix ! TYPE(csr_mat), INTENT(in) :: matp(2) DOUBLE PRECISION, INTENT(in) :: vcoarse(:,:) DOUBLE PRECISION, ALLOCATABLE :: vfine(:,:) ! DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) INTEGER :: m1, m1p, m2, m2p INTEGER :: i, j, k, kk ! m1 = matp(1)%mrows; m1p = matp(1)%ncols m2 = matp(2)%mrows; m2p = matp(2)%ncols ALLOCATE(vfine(m1,m2)) ALLOCATE(temp(m1p,m2)) temp = 0.0d0 vfine = 0.0d0 ! ! Compute temp = V * (P2)^T ! t_ij = sum_{k=1}^{m2p} V_ik (P2)_jk, i=1:m1p, j=1:m2 ! DO j=1,m2 DO kk=matp(2)%irow(j),matp(2)%irow(j+1)-1 k = matp(2)%cols(kk) temp(:,j) = temp(:,j) + vcoarse(:,k)*matp(2)%val(kk) END DO END DO ! ! Compute (P1) * V * (P2)^T ! V_ij = sum_{k=1}^{m1p} (P1)_ik t_kj, i=1:m1, j=1:m2 ! DO i=1,m1 DO kk=matp(1)%irow(i),matp(1)%irow(i+1)-1 k = matp(1)%cols(kk) vfine(i,:) = vfine(i,:) + matp(1)%val(kk)*temp(k,:) END DO END DO ! DEALLOCATE(temp) END FUNCTION prolong_2d_csr !-------------------------------------------------------------------------------- FUNCTION restrict_2d(matp,vfine) RESULT(vcoarse) ! ! Restriction ! TYPE(gemat), INTENT(in) :: matp(2) DOUBLE PRECISION, INTENT(in) :: vfine(:,:) DOUBLE PRECISION, ALLOCATABLE :: vcoarse(:,:) ! DOUBLE PRECISION, POINTER :: pmat1(:,:), pmat2(:,:) DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 INTEGER :: m1, m1p, m2, m2p ! pmat1 => matp(1)%val pmat2 => matp(2)%val m1 = SIZE(pmat1,1); m1p = SIZE(pmat1,2) m2 = SIZE(pmat2,1); m2p = SIZE(pmat2,2) ALLOCATE(vcoarse(m1p,m2p)) ALLOCATE(temp(m1p,m2)) ! ! Compute (P1)^T * V ! CALL dgemm('T', 'N', m1p, m2, m1, one, pmat1, m1, vfine, m1, zero, & & temp, m1p) ! ! Compute (P1)^T * V * P2 ! CALL dgemm('N', 'N', m1p, m2p, m2, one, temp, m1p, pmat2, m2, zero, & & vcoarse, m1p) ! DEALLOCATE(temp) END FUNCTION restrict_2d !-------------------------------------------------------------------------------- FUNCTION restrict_2d_csr(matp,vfine) RESULT(vcoarse) ! ! Restriction using CSR prolongation matrix ! TYPE(csr_mat), INTENT(in) :: matp(2) DOUBLE PRECISION, INTENT(in) :: vfine(:,:) DOUBLE PRECISION, ALLOCATABLE :: vcoarse(:,:) ! DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) INTEGER :: m1, m1p, m2, m2p INTEGER :: i, ii, j, jj, k ! m1 = matp(1)%mrows; m1p = matp(1)%ncols m2 = matp(2)%mrows; m2p = matp(2)%ncols ALLOCATE(vcoarse(m1p,m2p)) ALLOCATE(temp(m1,m2p)) temp = 0.0d0 vcoarse = 0.0d0 ! ! Compute temp = V * (R2)^T = V * (P2) ! t_ij = sum_{k=1}^{m2} V_ik (P2)_kj, i=1:m1, j=1:m2p ! DO k=1,m2 DO jj=matp(2)%irow(k),matp(2)%irow(k+1)-1 j = matp(2)%cols(jj) temp(:,j) = temp(:,j) + vfine(:,k)*matp(2)%val(jj) END DO END DO ! ! Compute (R1) * V * (R2)^T = (P1)^T) * V * (P2) ! V_ij = sum_{k=1}^{m1p} (P1)_ki t_kj, i=1:m1p, j=1:m2p ! DO k=1,m1 DO ii=matp(1)%irow(k),matp(1)%irow(k+1)-1 i = matp(1)%cols(ii) vcoarse(i,:) = vcoarse(i,:) + matp(1)%val(ii)*temp(k,:) END DO END DO ! DEALLOCATE(temp) END FUNCTION restrict_2d_csr !-------------------------------------------------------------------------------- FUNCTION prolong_cyl(grid, vcoarse, nluniq) RESULT(vfine) ! ! Prolongation (cylindrical case) ! TYPE(grid2d) :: grid DOUBLE PRECISION, INTENT(inout) :: vcoarse(:,:) DOUBLE PRECISION, ALLOCATABLE :: vfine(:,:) LOGICAL, INTENT(in) :: nluniq ! DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) INTEGER :: m1, m1p, m2, m2p INTEGER :: i, j, k, kk ! m1 = grid%matp(1)%mrows; m1p = grid%matp(1)%ncols m2 = grid%matp(2)%mrows; m2p = grid%matp(2)%ncols ALLOCATE(vfine(m1,m2)) ALLOCATE(temp(m1p,m2)) temp = 0.0d0 vfine = 0.0d0 ! IF(nluniq) vcoarse(1,1:m2p-1) = vcoarse(1,m2p) ! ! Compute temp = V * (P2)^T ! t_ij = sum_{k=1}^{m2p} V_ik (P2)_jk, i=1:m1p, j=1:m2 ! DO j=1,m2 DO kk=grid%matp(2)%irow(j),grid%matp(2)%irow(j+1)-1 k = grid%matp(2)%cols(kk) temp(:,j) = temp(:,j) + vcoarse(:,k)*grid%matp(2)%val(kk) END DO END DO ! ! Compute (P1) * V * (P2)^T ! V_ij = sum_{k=1}^{m1p} (P1)_ik t_kj, i=1:m1, j=1:m2 ! DO i=1,m1 DO kk=grid%matp(1)%irow(i),grid%matp(1)%irow(i+1)-1 k = grid%matp(1)%cols(kk) vfine(i,:) = vfine(i,:) + grid%matp(1)%val(kk)*temp(k,:) END DO END DO !!$! !!$! Compute (P1) * V !!$! !!$ CALL dgemm('N', 'N', m1, m2p, m1p, one, pmat1, m1, vcoarse, m1p, zero, & !!$ & temp, m1) !!$! !!$! Compute (P1) * V * (P2)^T !!$! !!$ CALL dgemm('N', 'T', m1, m2, m2p, one, temp, m1, pmat2, m2, zero, & !!$ & vfine, m1) ! IF(nluniq) THEN vcoarse(1,1:m2p-1) = vcoarse(1,1:m2p-1) - vcoarse(1,m2p) vfine(1,1:m2-1) = vfine(1,1:m2-1) - vfine(1,m2) END IF ! DEALLOCATE(temp) END FUNCTION prolong_cyl !-------------------------------------------------------------------------------- FUNCTION restrict_cyl(grid, vfine, nluniq) RESULT(vcoarse) ! ! Restriction (cylindrical case) ! TYPE(grid2d) :: grid DOUBLE PRECISION, INTENT(inout) :: vfine(:,:) DOUBLE PRECISION, ALLOCATABLE :: vcoarse(:,:) LOGICAL, INTENT(in) :: nluniq ! DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) INTEGER :: m1, m1p, m2, m2p INTEGER :: i, ii, j, jj, k ! m1 = grid%matp(1)%mrows; m1p = grid%matp(1)%ncols m2 = grid%matp(2)%mrows; m2p = grid%matp(2)%ncols ALLOCATE(vcoarse(m1p,m2p)) ALLOCATE(temp(m1,m2p)) temp = 0.0d0 vcoarse = 0.0d0 ! IF(nluniq) vfine(1,1:m2) = vfine(1,m2)/REAL(m2,8) ! ! Compute temp = V * (R2)^T = V * (P2) ! t_ij = sum_{k=1}^{m2} V_ik (P2)_kj, i=1:m1, j=1:m2p ! DO k=1,m2 DO jj=grid%matp(2)%irow(k),grid%matp(2)%irow(k+1)-1 j = grid%matp(2)%cols(jj) temp(:,j) = temp(:,j) + vfine(:,k)*grid%matp(2)%val(jj) END DO END DO ! ! Compute (R1) * V * (R2)^T = (P1)^T) * V * (P2) ! V_ij = sum_{k=1}^{m1p} (P1)_ki t_kj, i=1:m1p, j=1:m2p ! DO k=1,m1 DO ii=grid%matp(1)%irow(k),grid%matp(1)%irow(k+1)-1 i = grid%matp(1)%cols(ii) vcoarse(i,:) = vcoarse(i,:) + grid%matp(1)%val(ii)*temp(k,:) END DO END DO ! !!$! Compute (P1)^T * V !!$! !!$ CALL dgemm('T', 'N', m1p, m2, m1, one, pmat1, m1, vfine, m1, zero, & !!$ & temp, m1p) !!$! !!$! Compute (P1)^T * V * P2 !!$! !!$ CALL dgemm('N', 'N', m1p, m2p, m2, one, temp, m1p, pmat2, m2, zero, & !!$ & vcoarse, m1p) ! IF(nluniq) THEN vfine(1,m2) = SUM(vfine(1,1:m2)); vfine(1,1:m2-1) = 0.0d0 vcoarse(1,m2p) = SUM(vcoarse(1,1:m2p)); vcoarse(1,m2p-1) = 0.0d0 END IF ! DEALLOCATE(temp) ! END FUNCTION restrict_cyl !-------------------------------------------------------------------------------- SUBROUTINE massmat_ge(spl, alpha, matm) ! ! Compute mass matrix ! TYPE(spline1d), INTENT(in) :: spl INTEGER, INTENT(in) :: alpha TYPE(gemat), INTENT(out) :: matm ! INTEGER :: nrank, nx, nidbas, kl, ku ! CALL get_dim(spl, nrank, nx, nidbas) kl=nidbas; ku=kl IF(spl%period) nrank = nx CALL init(nrank, 1, matm) CALL conmat(spl, matm, coefeq) CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) c(1) = x**alpha idt(1) = 0 idw(1) = 0 END SUBROUTINE coefeq END SUBROUTINE massmat_ge !-------------------------------------------------------------------------------- SUBROUTINE massmat_gb(spl, alpha, matm) ! ! Compute mass matrix ! TYPE(spline1d), INTENT(in) :: spl INTEGER, INTENT(in) :: alpha TYPE(gbmat), INTENT(out) :: matm ! INTEGER :: nrank, nx, nidbas, kl, ku ! CALL get_dim(spl, nrank, nx, nidbas) kl=nidbas; ku=kl CALL init(kl, ku, nrank, 1, matm) CALL conmat(spl, matm, coefeq) CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) c(1) = x**alpha idt(1) = 0 idw(1) = 0 END SUBROUTINE coefeq END SUBROUTINE massmat_gb !-------------------------------------------------------------------------------- SUBROUTINE massmat_periodic(spl, alpha, matm) ! ! Compute mass matrix (periodic case) ! TYPE(spline1d), INTENT(in) :: spl INTEGER, INTENT(in) :: alpha TYPE(periodic_mat), INTENT(out) :: matm ! INTEGER :: dim, nrank, nx, nidbas, kl, ku ! CALL get_dim(spl, dim, nx, nidbas) kl=nidbas; ku=kl nrank = nx CALL init(kl, ku, nrank, 1, matm) CALL conmat(spl, matm, coefeq) CONTAINS SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) c(1) = x**alpha idt(1) = 0 idw(1) = 0 END SUBROUTINE coefeq END SUBROUTINE massmat_periodic !-------------------------------------------------------------------------------- SUBROUTINE femat_2d_csr(spl, mat, coefeq, nterms, maxder_in, nat_order_in, & & noinit) ! ! Compute 2d fe CSR matrix ! TYPE(spline2d), INTENT(in) :: spl TYPE(csr_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: nterms INTEGER, INTENT(in), OPTIONAL :: maxder_in(2) LOGICAL, INTENT(in), OPTIONAL :: nat_order_in LOGICAL, INTENT(in), OPTIONAL :: noinit INTERFACE SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE ! INTEGER :: nrank, ndim(2), nints(2), maxder(2) LOGICAL :: nat_order, run_init ! CALL get_dim(spl, ndim, nints) IF(spl%sp2%period) THEN nrank = ndim(1)*nints(2) ELSE nrank = PRODUCT(ndim) END IF ! maxder = 1; IF(PRESENT(maxder_in)) maxder = maxder_in nat_order = .TRUE.; IF(PRESENT(nat_order_in)) nat_order = nat_order_in ! run_init = .TRUE. IF(PRESENT(noinit)) run_init = .NOT.noinit IF(run_init) CALL init(nrank, nterms, mat) ! CALL conmat(spl, mat, coefeq, maxder, nat_order) END SUBROUTINE femat_2d_csr !-------------------------------------------------------------------------------- SUBROUTINE femat_ge(spl, mat, coefeq) ! ! Compute fe matrix ! TYPE(spline1d), INTENT(in) :: spl TYPE(gemat), INTENT(out) :: mat INTERFACE SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE ! INTEGER :: nrank, nx, nidbas, kl, ku ! CALL get_dim(spl, nrank, nx, nidbas) kl=nidbas; ku=kl IF(spl%period) nrank = nx CALL init(nrank, 2, mat) CALL conmat(spl, mat, coefeq) END SUBROUTINE femat_ge !-------------------------------------------------------------------------------- SUBROUTINE femat_gb(spl, mat, coefeq) ! ! Compute fe matrix ! TYPE(spline1d), INTENT(in) :: spl TYPE(gbmat), INTENT(out) :: mat INTERFACE SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE ! INTEGER :: nrank, nx, nidbas, kl, ku ! CALL get_dim(spl, nrank, nx, nidbas) kl=nidbas; ku=kl CALL init(kl, ku, nrank, 2, mat) CALL conmat(spl, mat, coefeq) END SUBROUTINE femat_gb !-------------------------------------------------------------------------------- SUBROUTINE femat_periodic(spl, mat, coefeq) ! ! Compute fe matrix ! TYPE(spline1d), INTENT(in) :: spl TYPE(periodic_mat), INTENT(out) :: mat INTERFACE SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE PRECISION, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE ! INTEGER :: nrank, dim, nx, nidbas, kl, ku ! CALL get_dim(spl, dim, nx, nidbas) kl=nidbas; ku=kl nrank = nx CALL init(kl, ku, nrank, 2, mat) CALL conmat(spl, mat, coefeq) END SUBROUTINE femat_periodic !-------------------------------------------------------------------------------- SUBROUTINE ibcmat_1d(irow, mat) ! ! Set BC at row irow to 0 ! INTEGER, INTENT(in) :: irow TYPE(gbmat), INTENT(inout) :: mat ! DOUBLE PRECISION :: a(mat%rank) ! a(:)=0.0d0; a(irow)=1.0d0 CALL putrow(mat, irow, a) CALL putcol(mat, irow, a) END SUBROUTINE ibcmat_1d !-------------------------------------------------------------------------------- SUBROUTINE ibcmat_2d(grid, mat, nluniq_in) ! ! Impose BC on matrix (asume natural ordering) ! I = (j-1)*N1 + i, i=1:N1, j=1:N2 ! TYPE(grid2d), INTENT(in) :: grid TYPE(csr_mat), INTENT(inout) :: mat LOGICAL, INTENT(in), OPTIONAL :: nluniq_in ! DOUBLE PRECISION :: temp(mat%rank) INTEGER :: n1e, n2e, nrank, i, j, irow, jcol LOGICAL :: nlper1, nlper2, nlcart, nluniq ! n1e = grid%rank(1) n2e = grid%rank(2) nrank = mat%rank nlper1 = grid%spl%sp1%period nlper2 = grid%spl%sp2%period nlcart = (.NOT.nlper1) .AND. (.NOT.nlper2) IF(PRESENT(nluniq_in)) THEN nluniq = nluniq_in ELSE nluniq = .TRUE. END IF ! ! BC at x=0 ! Dirichlet for Cartesian, unicity for cylindrical problem IF(nlcart) THEN i=1 DO j=1,n2e irow = (j-1)*n1e + i temp = 0.0d0; temp(irow) = 1.0d0 CALL putrow(mat, irow, temp) CALL putcol(mat, irow, temp) END DO ELSE i=1 IF(nluniq) THEN CALL unicity END IF END IF ! ! BC at x=1 ! For both Cartesian and cylindrical i=n1e DO j=1,n2e irow = (j-1)*n1e + i temp = 0.0d0; temp(irow) = 1.0d0 CALL putrow(mat, irow, temp) CALL putcol(mat, irow, temp) END DO ! ! BC at y=0 ! Only for Cartesian problem IF(nlcart) THEN j=1 DO i=1,n1e irow = (j-1)*n1e + i temp = 0.0d0; temp(irow) = 1.0d0 CALL putrow(mat, irow, temp) CALL putcol(mat, irow, temp) END DO END IF ! ! BC at y=1 ! Only for Cartesian problem IF(nlcart) THEN j=n2e DO i=1,n1e irow = (j-1)*n1e + i temp = 0.0d0; temp(irow) = 1.0d0 CALL putrow(mat, irow, temp) CALL putcol(mat, irow, temp) END DO END IF ! CONTAINS SUBROUTINE unicity INTEGER :: irow0, jcol0 DOUBLE PRECISION :: temp_sum(mat%rank) ! irow0 = (n2e-1)*n1e + i jcol0 = irow0 ! ! Vertical sum temp_sum(:) = 0.0d0 DO j=1,n2e irow = (j-1)*n1e + i temp = 0.0d0 CALL getrow(mat, irow, temp) temp_sum(:) = temp_sum(:) + temp(:) END DO CALL putrow(mat, irow0, temp_sum) ! ! Horizontal sum temp_sum(:) = 0.0d0 DO j=1,n2e jcol = (j-1)*n1e + i temp = 0.0d0 CALL getcol(mat, jcol, temp) temp_sum(:) = temp_sum(:) + temp(:) END DO CALL putcol(mat, jcol0, temp_sum) ! ! The away operator DO j=1,n2e-1 irow = (j-1)*n1e + i temp = 0.0d0; temp(irow) = 1.0d0 CALL putrow(mat, irow, temp) CALL putcol(mat, irow, temp) END DO END SUBROUTINE unicity END SUBROUTINE ibcmat_2d !-------------------------------------------------------------------------------- SUBROUTINE mod_transf_full(mat,k) ! ! Modify grid transfer matrix. ! DOUBLE PRECISION, INTENT(inout) :: mat(:,:) INTEGER, INTENT(in) :: k INTEGER :: m, n ! m=SIZE(mat,1) n=SIZE(mat,2) ! ! Clear matrix small elements. WHERE( ABS(mat) < 1.d-8) mat=0.0d0 ! ! Left boundary IF(k.EQ.1 .OR. k.EQ.3) THEN mat(2:m,1) = 0.0d0 END IF ! ! Right boundary IF(k.EQ.2 .OR. k.EQ.3) THEN mat(1:m-1,n) = 0.0d0 END IF END SUBROUTINE mod_transf_full !-------------------------------------------------------------------------------- SUBROUTINE mod_transf_csr(mat,k) ! ! Modify grid transfer matrix. ! TYPE(csr_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: k ! DOUBLE PRECISION :: acol(mat%mrows) INTEGER :: m, n ! m=mat%mrows n=mat%ncols ! ! Left boundary acol = 0.0d0 IF(k.EQ.1 .OR. k.EQ.3) THEN CALL getele(mat, 1, 1, acol(1)) CALL putcol(mat, 1, acol) END IF ! ! Right boundary acol = 0.0d0 IF(k.EQ.2 .OR. k.EQ.3) THEN CALL getele(mat, m, n, acol(m)) CALL putcol(mat, n, acol) END IF END SUBROUTINE mod_transf_csr !-------------------------------------------------------------------------------- SUBROUTINE calc_pmat(grid1, grid2, pmat, debug_in) ! ! Compute prolongation matrix by collocation ! TYPE(grid1d), INTENT(in) :: grid1, grid2 TYPE(gemat), INTENT(out) :: pmat LOGICAL, OPTIONAL, INTENT(in) :: debug_in ! TYPE(gemat) :: mat_interp DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), fun2(:,:) DOUBLE PRECISION :: xinter INTEGER, ALLOCATABLE :: jcol(:) INTEGER :: nidbas, nfine, ncoarse, nderv LOGICAL :: nlper, debug INTEGER :: i, i0, ii, k, irow !================================================================================ ! 0. Prologue ! debug = .FALSE. IF(PRESENT(debug_in)) debug = debug_in ! nfine = grid1%n ncoarse = grid2%n nidbas = grid1%spl%order - 1 nlper = grid1%spl%period ! IF(nlper) THEN IF(ncoarse .LT. nidbas+1) THEN WRITE(*,'(/a)') '** NX/2 should be larger or equal to NIDBAS+1 **' STOP END IF END IF ! IF(debug) THEN WRITE(*,'(/2(a,i0))') 'nfine = ', nfine, ', ncoarse = ', ncoarse IF(nlper) WRITE(*,'(a)') 'Grids are periodic!' END IF ! ALLOCATE(jcol(0:nidbas)) ALLOCATE(fun(0:nidbas,1)) IF(nlper) THEN CALL init(ncoarse, 1, pmat, mrows=nfine) CALL init(nfine, 1, mat_interp) ELSE CALL init(ncoarse+nidbas, 1, pmat, mrows=nfine+nidbas) CALL init(nfine+nidbas, 1, mat_interp) END IF !================================================================================ ! 1. Interpolation matrix ! irow = 0 i0 = 1 ! ! Left bound IF(.NOT.nlper) THEN IF(MODULO(nidbas,2).EQ.1) THEN nderv = (nidbas-1)/2 ! ndidbas = 1, 3, 5, ... ALLOCATE(fun2(0:nidbas,nderv+1)) CALL basfun(grid1%x(0), grid1%spl, fun2, 1) jcol = 1 + (/ (i, i=0,nidbas) /) DO k=1,nderv+1 irow = irow+1 mat_interp%val(irow,jcol) = fun2(0:nidbas,k) END DO i0 = 2 ! Skip the first grid point ELSE nderv = nidbas/2-1 ! ndidbas = 2, 4, ... ALLOCATE(fun2(0:nidbas,nderv+1)) CALL basfun(grid1%x(0), grid1%spl, fun2, 1) jcol = 1 + (/ (i, i=0,nidbas) /) DO k=1,nderv+1 irow = irow+1 mat_interp%val(irow,jcol) = fun2(0:nidbas,k) END DO END IF END IF DO i=i0,nfine IF(MODULO(nidbas,2).EQ.0) THEN xinter = (grid1%x(i-1)+grid1%x(i))/2.0d0 ! Left bound of interval ELSE xinter = grid1%x(i-1) ! Left bound of interval END IF CALL basfun(xinter, grid1%spl, fun, i) irow = irow+1 DO k=0,nidbas jcol(k) = i+k END DO IF(nlper) jcol = MODULO(jcol-1,nfine)+1 mat_interp%val(irow,jcol) = fun(0:nidbas,1) END DO ! ! Right bound IF(.NOT.nlper) THEN CALL basfun(grid1%x(nfine), grid1%spl, fun2, nfine) jcol = nfine + (/ (i, i=0,nidbas) /) DO k=nderv+1,1,-1 irow = irow+1 mat_interp%val(irow,jcol) = fun2(0:nidbas,k) END DO END IF IF(debug) CALL printmat('** Interpolation matrix **', mat_interp) !================================================================================ ! 2. RHS matrix ! irow = 0 i0 = 1 DO i=1,ncoarse ii = 2*i-1 CALL comp_rhs(ii) CALL comp_rhs(ii+1) END DO IF(debug) CALL printmat('** RHS matrix **', pmat) !================================================================================ ! 3. Compute prolongation matrix ! CALL factor(mat_interp) CALL bsolve(mat_interp, pmat%val) !================================================================================ ! 9. Epilogue ! CALL destroy(mat_interp) DEALLOCATE(jcol) DEALLOCATE(fun) IF(ALLOCATED(fun2)) DEALLOCATE(fun2) ! CONTAINS SUBROUTINE comp_rhs(ii) INTEGER, INTENT(in) :: ii INTEGER :: k ! ! Left bounds for non-periodic cases IF(.NOT.nlper .AND. ii.EQ.1) THEN CALL basfun(grid1%x(0), grid2%spl, fun2, 1) jcol = 1 + (/ (k, k=0,nidbas) /) DO k=1,nderv+1 irow = irow+1 pmat%val(irow,jcol) = fun2(0:nidbas,k) END DO IF(MODULO(nidbas,2).EQ.1) RETURN ! Skip END IF ! IF(MODULO(nidbas,2).EQ.0) THEN xinter = (grid1%x(ii-1)+grid1%x(ii))/2.0d0 ! Left bound of interval ELSE xinter = grid1%x(ii-1) ! Left bound of interval END IF CALL basfun(xinter, grid2%spl, fun, i) irow = irow+1 DO k=0,nidbas jcol(k) = i+k END DO IF(nlper) jcol = MODULO(jcol-1,ncoarse)+1 pmat%val(irow,jcol) = fun(0:nidbas,1) ! ! Right bounds for non-periodic cases IF(.NOT.nlper .AND. ii.EQ.nfine) THEN CALL basfun(grid1%x(nfine), grid2%spl, fun2, ncoarse) jcol = ncoarse + (/ (k, k=0,nidbas) /) DO k=nderv+1,1,-1 irow = irow+1 pmat%val(irow,jcol) = fun2(0:nidbas,k) END DO END IF END SUBROUTINE comp_rhs END SUBROUTINE calc_pmat !-------------------------------------------------------------------------------- SUBROUTINE disrhs_1d(spl, farr, frhs) ! ! Projection of RHS on spline basis functions ! TYPE(spline1d) :: spl DOUBLE PRECISION, INTENT(out) :: farr(:) INTERFACE DOUBLE PRECISION FUNCTION frhs(x) DOUBLE PRECISION, INTENT(in) :: x END FUNCTION frhs END INTERFACE DOUBLE PRECISION :: contrib ! DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:) DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) INTEGER :: ndim, n, nidbas, ng INTEGER :: i, ig, it, irow LOGICAL :: nlper ! CALL get_dim(spl, ndim, n, nidbas) nlper = spl%period xg => spl%gausx ! xg(ng,n) wg => spl%gausw ! wg(ng,n) ng = SIZE(xg,1) ALLOCATE(fun(0:nidbas,1)) ! farr = 0.0d0 DO i=1,n DO ig=1,ng CALL basfun(xg(ig,i), spl, fun, i) contrib = wg(ig,i)*frhs(xg(ig,i)) DO it=0,nidbas irow = i+it IF(nlper) irow = MODULO(irow-1,n) +1 farr(irow) = farr(irow)+contrib*fun(it,1) END DO END DO END DO ! DEALLOCATE(fun) END SUBROUTINE disrhs_1d !-------------------------------------------------------------------------------- SUBROUTINE disrhs_2d(spl, farr, frhs) ! ! Projection of RHS on spline basis functions ! TYPE(spline2d) :: spl DOUBLE PRECISION, INTENT(out) :: farr(:,:) INTERFACE DOUBLE PRECISION FUNCTION frhs(x,y) DOUBLE PRECISION, INTENT(in) :: x,y END FUNCTION frhs END INTERFACE DOUBLE PRECISION :: contrib ! DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:) DOUBLE PRECISION, POINTER :: xg2(:,:), wg2(:,:) DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: fun2(:,:) INTEGER :: ndim1, n1, nidbas1, ng1 INTEGER :: ndim2, n2, nidbas2, ng2 INTEGER :: i1, ig1, it1, irow INTEGER :: i2, ig2, it2, jcol ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! xg1 => spl%sp1%gausx ! xg(ng,n) wg1 => spl%sp1%gausw ! wg(ng,n) ng1 = SIZE(xg1,1) xg2 => spl%sp2%gausx ! xg(ng,n) wg2 => spl%sp2%gausw ! wg(ng,n) ng2 = SIZE(xg2,1) ! ALLOCATE(fun1(0:nidbas1,1)) ALLOCATE(fun2(0:nidbas2,1)) ! farr = 0.0d0 DO i1=1,n1 DO ig1=1,ng1 CALL basfun(xg1(ig1,i1), spl%sp1, fun1, i1) DO i2=1,n2 DO ig2=1,ng2 CALL basfun(xg2(ig2,i2), spl%sp2, fun2, i2) contrib = wg1(ig1,i1)*wg2(ig2,i2)* & & frhs(xg1(ig1,i1), xg2(ig2,i2)) DO it1=0,nidbas1 irow = i1+it1 DO it2=0,nidbas2 jcol = i2+it2 farr(irow,jcol) = farr(irow,jcol) + & & contrib*fun1(it1,1)*fun2(it2,1) END DO END DO END DO END DO END DO END DO ! ! Case of periodic BC (only in 2nd dimension!) ! IF(spl%sp2%period) THEN DO jcol=1,nidbas2 farr(:,jcol) = farr(:,jcol)+farr(:,jcol+n2) farr(:,jcol+n2) = 0.0d0 END DO END IF DEALLOCATE(fun1) DEALLOCATE(fun2) END SUBROUTINE disrhs_2d !-------------------------------------------------------------------------------- SUBROUTINE ibcrhs(grid, f, nluniq_in) ! ! Impose BC on RHS ! TYPE(grid2d) :: grid DOUBLE PRECISION, INTENT(inout) :: f(:,:) LOGICAL, INTENT(in), OPTIONAL :: nluniq_in ! DOUBLE PRECISION :: temp INTEGER :: n1, n2 LOGICAL :: nlper1, nlper2, nlcyl, nluniq ! n1 = grid%rank(1) n2 = grid%rank(2) nlper1 = grid%spl%sp1%period nlper2 = grid%spl%sp2%period nlcyl = (.NOT.nlper1) .AND. (nlper2) IF(PRESENT(nluniq_in)) THEN nluniq = nluniq_in ELSE nluniq=.TRUE. END IF ! ! Cylindrical case, unicity at the axis, 0 at the right side IF(nlcyl) THEN ! IF(nluniq) THEN temp = SUM(f(1,1:n2)) f(1,n2) = temp f(1,1:n2-1) = 0.0d0 END IF f(n1,1:n2) = 0.0d0 ! ! Cartesian case: 0 on all 4 boundaries ELSE f(1,:) = 0.0d0; f(n1,:) = 0.0d0 f(:,1) = 0.0d0; f(:,n2) = 0.0d0 END IF END SUBROUTINE ibcrhs !-------------------------------------------------------------------------------- FUNCTION disc_err_1d(spl, f, fexact) RESULT(disc_err) ! ! L2 norm of discretization error ! TYPE(spline1d) :: spl DOUBLE PRECISION, INTENT(in) :: f(:) DOUBLE PRECISION :: disc_err INTERFACE FUNCTION fexact(x) DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: fexact(SIZE(x)) END FUNCTION fexact END INTERFACE ! DOUBLE PRECISION, ALLOCATABLE :: err(:,:) DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:) INTEGER :: ndim, n, nidbas, ng INTEGER :: ig ! CALL get_dim(spl, ndim, n, nidbas) xg => spl%gausx ! xg(ng,n) wg => spl%gausw ! wg(ng,n) ng = SIZE(xg,1) ! ALLOCATE(err(ng,n)) CALL gridval(spl, xg(1,:), err(1,:), 0, f) err(1,:) = (err(1,:) - fexact(xg(1,:)))**2 DO ig=2,ng CALL gridval(spl, xg(ig,:), err(ig,:), 0) err(ig,:) = (err(ig,:) - fexact(xg(ig,:)))**2 END DO ! disc_err = SQRT(SUM(wg*err)) ! DEALLOCATE(err) END FUNCTION disc_err_1d !-------------------------------------------------------------------------------- FUNCTION disc_err_2d(spl, f, fexact) RESULT(disc_err) ! ! L2 norm of discretization error ! TYPE(spline2d) :: spl DOUBLE PRECISION, INTENT(in) :: f(:,:) DOUBLE PRECISION :: disc_err INTERFACE FUNCTION fexact(x,y) DOUBLE PRECISION, INTENT(in) :: x(:), y(:) DOUBLE PRECISION :: fexact(SIZE(x),SIZE(y)) END FUNCTION fexact END INTERFACE ! DOUBLE PRECISION, ALLOCATABLE :: err(:,:) DOUBLE PRECISION, POINTER :: xg(:,:), wg1(:,:), yg(:,:), wg2(:,:) INTEGER, DIMENSION(2) :: ndim, n, ng INTEGER :: i, j, ig, jg LOGICAL :: nlper1, nlper2, nlcyl ! CALL get_dim(spl, ndim, n) xg => spl%sp1%gausx ! xg(ng,n) wg1 => spl%sp1%gausw ! wg(ng,n) ng(1) = SIZE(xg,1) yg => spl%sp2%gausx ! xg(ng,n) wg2 => spl%sp2%gausw ! wg(ng,n) ng(2) = SIZE(yg,1) ! nlper1 = spl%sp1%period nlper2 = spl%sp2%period nlcyl = (.NOT.nlper1) .AND. (nlper2) ! disc_err = 0.0d0 ALLOCATE(err(n(1),n(2))) DO ig=1,ng(1) DO jg=1,ng(2) IF(ig.EQ.1.AND.jg.EQ.1) THEN CALL gridval(spl, xg(ig,:), yg(jg,:), err, [0,0], f) ELSE CALL gridval(spl, xg(ig,:), yg(jg,:), err, [0,0]) END IF err = (err - fexact(xg(ig,:), yg(jg,:)))**2 DO i=1,n(1) DO j=1,n(2) IF(nlcyl) THEN disc_err = disc_err + xg(ig,i)*wg1(ig,i)*wg2(jg,j)*err(i,j) ELSE disc_err = disc_err + wg1(ig,i)*wg2(jg,j)*err(i,j) END IF END DO END DO END DO END DO disc_err = SQRT(disc_err) ! DEALLOCATE(err) END FUNCTION disc_err_2d !-------------------------------------------------------------------------------- SUBROUTINE back_transf(grid, u, nluniq_in) ! ! Back transform solution and use periodicity (cylindrical problem) ! TYPE(grid2d), INTENT(in) :: grid DOUBLE PRECISION, INTENT(inout) :: u(:,:) LOGICAL, INTENT(in), OPTIONAL :: nluniq_in ! LOGICAL :: nluniq INTEGER :: n, nidbas, j ! n = grid%n(2) nidbas = grid%spl%sp2%order-1 IF(PRESENT(nluniq_in)) THEN nluniq = nluniq_in ELSE nluniq = .TRUE. END IF ! ! Back transform IF(nluniq) THEN u(1,1:n-1) = u(1,n) END IF ! ! Periodicity DO j=1,nidbas u(:,j+n) = u(:,j) END DO END SUBROUTINE back_transf !-------------------------------------------------------------------------------- DOUBLE PRECISION FUNCTION normf_gb(matm, f) ! ! L2 norm of f represented by its expansion coefficients. ! TYPE(gbmat), INTENT(in) :: matm DOUBLE PRECISION, INTENT(in) :: f(:) normf_gb = SQRT(DOT_PRODUCT(f, vmx(matm,f))) END FUNCTION normf_gb !-------------------------------------------------------------------------------- DOUBLE PRECISION FUNCTION normf_ge(matm, f) ! ! L2 norm of f represented by its expansion coefficients. ! TYPE(gemat), INTENT(in) :: matm DOUBLE PRECISION, INTENT(in) :: f(:) normf_ge = SQRT(DOT_PRODUCT(f, vmx(matm,f))) END FUNCTION normf_ge !-------------------------------------------------------------------------------- DOUBLE PRECISION FUNCTION residue_gen(grid, f, u, p) ! ! Generic version of residue ! TYPE(grid2d) :: grid DOUBLE PRECISION, INTENT(in) :: f(:), u(:) DOUBLE PRECISION :: r(SIZE(f)) CHARACTER(len=*), OPTIONAL, INTENT(in) :: p ! IF(ALLOCATED(grid%mata)) THEN residue_gen = residue_csr(grid%mata, f, u, p) ELSE residue_gen = residue_cds(grid%mata_cds, f, u, p) END IF END FUNCTION residue_gen !-------------------------------------------------------------------------------- DOUBLE PRECISION FUNCTION residue_csr(mat, f, u, p) ! ! L2 norm of residue ||f-Av|| ! TYPE(csr_mat), INTENT(in) :: mat DOUBLE PRECISION, INTENT(in) :: f(:), u(:) DOUBLE PRECISION :: r(SIZE(f)) CHARACTER(len=*), OPTIONAL, INTENT(in) :: p ! CHARACTER(len=4) :: norm_type norm_type = '2' IF(PRESENT(p)) norm_type = p ! r = f-vmx(mat,u) SELECT CASE (norm_type) CASE('1') residue_csr = SUM(ABS(r)) CASE ('2') residue_csr = SQRT(DOT_PRODUCT(r,r)) CASE ('inf') residue_csr = MAXVAL(ABS(r)) END SELECT END FUNCTION residue_csr !-------------------------------------------------------------------------------- DOUBLE PRECISION FUNCTION residue_cds(mat, f, u, p) ! ! L2 norm of residue ||f-Av|| ! TYPE(cds_mat), INTENT(in) :: mat DOUBLE PRECISION, INTENT(in) :: f(:), u(:) DOUBLE PRECISION :: r(SIZE(f)) CHARACTER(len=*), OPTIONAL, INTENT(in) :: p ! CHARACTER(len=4) :: norm_type norm_type = '2' IF(PRESENT(p)) norm_type = p ! r = f-vmx(mat,u) SELECT CASE (norm_type) CASE('1') residue_cds = SUM(ABS(r)) CASE ('2') residue_cds = SQRT(DOT_PRODUCT(r,r)) CASE ('inf') residue_cds = MAXVAL(ABS(r)) END SELECT END FUNCTION residue_cds !-------------------------------------------------------------------------------- DOUBLE PRECISION FUNCTION residue_ge(mat, f, u) ! ! L2 norm of residue ||f-Av|| ! TYPE(gemat), INTENT(in) :: mat DOUBLE PRECISION, INTENT(in) :: f(:), u(:) DOUBLE PRECISION :: r(SIZE(f)) r = f-vmx(mat,u) residue_ge = SQRT(DOT_PRODUCT(r,r)) END FUNCTION residue_ge !-------------------------------------------------------------------------------- DOUBLE PRECISION FUNCTION residue_gb(mat, f, u) ! ! L2 norm of residue ||f-Av|| ! TYPE(gbmat), INTENT(in) :: mat DOUBLE PRECISION, INTENT(in) :: f(:), u(:) DOUBLE PRECISION :: r(SIZE(f)) r = f-vmx(mat,u) residue_gb = SQRT(DOT_PRODUCT(r,r)) END FUNCTION residue_gb !-------------------------------------------------------------------------------- SUBROUTINE ctof_massmat(splf, splc, alpha, matm) ! ! Compute coarse to fine mass matrix M(h,2h) ! TYPE(spline1d), INTENT(in) :: splf ! Spline on fine mesh TYPE(spline1d), INTENT(in) :: splc ! Spline on coarse mesh INTEGER, INTENT(in) :: alpha TYPE(gemat), INTENT(out) :: matm ! LOGICAL :: nlper INTEGER :: nf, nxf, nc, nxc, nidbas, kl, ku INTEGER :: ig, ngauss INTEGER :: i, ic, it, jw, irow, jcol ! DOUBLE PRECISION :: contrib DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:) DOUBLE PRECISION, ALLOCATABLE :: funf(:,:), func(:,:) ! nlper = splf%period .OR. splc%period CALL get_dim(splf, nf, nxf, nidbas) CALL get_gauss(splf, ngauss) CALL get_dim(splc, nc, nxc, nidbas) kl=nidbas; ku=kl IF(nlper) THEN nf = nxf nc = nxc END IF CALL init(nc, 1, matm, mrows=nf) ! Defne nf x nc matrix ! ALLOCATE(xg(ngauss),wg(ngauss)) ALLOCATE(funf(0:nidbas,1)) ALLOCATE(func(0:nidbas,1)) DO i=1,nxf ic = (i-1)/2+1 CALL get_gauss(splf, ngauss, i, xg, wg) DO ig=1,ngauss CALL basfun(xg(ig), splf, funf, i) CALL basfun(xg(ig), splc, func, ic) DO it=0,nidbas DO jw=0,nidbas contrib = wg(ig)*funf(it,1)*func(jw,1)*xg(ig)**alpha irow = i+it; IF(nlper) irow=MODULO(irow-1,nxf)+1 jcol = ic+jw; IF(nlper) jcol=MODULO(jcol-1,nxc)+1 CALL updtmat(matm, irow, jcol, contrib) END DO END DO END DO END DO ! DEALLOCATE(xg,wg) DEALLOCATE(funf,func) END SUBROUTINE ctof_massmat !-------------------------------------------------------------------------------- SUBROUTINE direct_solve_1d(grid, v) ! ! 1D direct solver ! TYPE(grid1d), INTENT(inout) :: grid DOUBLE PRECISION, INTENT(out) :: v(:) LOGICAL :: nlper ! nlper = grid%spl%period IF(nlper) THEN IF(.NOT.ALLOCATED(grid%matap_copy)) THEN ALLOCATE(grid%matap_copy) CALL mcopy(grid%matap, grid%matap_copy) CALL factor(grid%matap_copy) END IF CALL bsolve(grid%matap_copy, grid%f, v) ELSE IF(.NOT.ALLOCATED(grid%mata_copy)) THEN ALLOCATE(grid%mata_copy) CALL mcopy(grid%mata, grid%mata_copy) CALL factor(grid%mata_copy) END IF CALL bsolve(grid%mata_copy, grid%f, v) END IF END SUBROUTINE direct_solve_1d !-------------------------------------------------------------------------------- SUBROUTINE direct_solve_2d(grid, v, debug) ! ! 2D direct solver ! TYPE(grid2d), INTENT(inout) :: grid DOUBLE PRECISION, INTENT(inout) :: v(:) LOGICAL, INTENT(in), OPTIONAL :: debug LOGICAL :: dbg ! dbg = .FALSE. IF(PRESENT(debug)) dbg=debug ! IF(ALLOCATED(grid%mata)) THEN IF(.NOT.ALLOCATED(grid%mata%mumps)) THEN ALLOCATE(grid%mata%mumps) CALL csr2mumps(grid%mata, grid%mata%mumps) CALL factor(grid%mata%mumps, debug=dbg) END IF CALL bsolve(grid%mata%mumps, v, debug=dbg) ELSE IF(.NOT.ALLOCATED(grid%mata_cds%mumps)) THEN ALLOCATE(grid%mata_cds%mumps) CALL cds2mumps(grid%mata_cds, grid%mata_cds%mumps) CALL factor(grid%mata_cds%mumps, debug=dbg) END IF CALL bsolve(grid%mata_cds%mumps, v, debug=dbg) END IF ! ! Only cylindrical case IF(.NOT.grid%spl%sp1%period .AND. grid%spl%sp2%period) THEN END IF END SUBROUTINE direct_solve_2d !-------------------------------------------------------------------------------- SUBROUTINE jacobi_gb(mat, omega, nu, v, f) ! ! Weighted Jacobi relaxation ! TYPE(gbmat),INTENT(in) :: mat DOUBLE PRECISION, INTENT(in) :: omega INTEGER, INTENT(in) :: nu DOUBLE PRECISION, INTENT(inout) :: v(:) DOUBLE PRECISION, INTENT(in) :: f(:) ! DOUBLE PRECISION :: temp(SIZE(v)) DOUBLE PRECISION :: inv_diag(SIZE(v)) INTEGER :: k, kl, ku, n, i, j, jmin, jmax ! kl = mat%kl ku = mat%ku n = mat%rank ! inv_diag(:) = omega/mat%val(kl+ku+1,:) DO k=1,nu DO i=1,n jmin = MAX(1,i-kl) jmax = MIN(n, i+ku) temp(i) = f(i) DO j=jmin,i-1 temp(i) = temp(i) - mat%val(kl+ku+i-j+1,j)*v(j) END DO DO j=i+1,jmax temp(i) = temp(i) - mat%val(kl+ku+i-j+1,j)*v(j) END DO temp(i) = temp(i)*inv_diag(i) END DO v(:) = (1.d0-omega)*v(:) + temp(:) END DO END SUBROUTINE jacobi_gb !-------------------------------------------------------------------------------- SUBROUTINE jacobi_ge(mat, omega, nu, v, f) ! ! Weighted Jacobi relaxation ! TYPE(gemat),INTENT(in) :: mat DOUBLE PRECISION, INTENT(in) :: omega INTEGER, INTENT(in) :: nu DOUBLE PRECISION, INTENT(inout) :: v(:) DOUBLE PRECISION, INTENT(in) :: f(:) ! DOUBLE PRECISION :: temp(SIZE(v)) DOUBLE PRECISION :: inv_diag(SIZE(v)) INTEGER :: k, n, i, j ! n = mat%rank ! DO i=1,n inv_diag(i) = omega/mat%val(i,i) END DO ! DO k=1,nu DO i=1,n temp(i) = f(i) DO j=1,i-1 temp(i) = temp(i) - mat%val(i,j)*v(j) END DO DO j=i+1,n temp(i) = temp(i) - mat%val(i-j+1,j)*v(j) END DO temp(i) = temp(i)*inv_diag(i) END DO v(:) = (1.d0-omega)*v(:) + temp(:) END DO END SUBROUTINE jacobi_ge !-------------------------------------------------------------------------------- SUBROUTINE jacobi_csr(mat, omega, nu, v, f) ! ! Weighted Jacobi relaxation ! TYPE(csr_mat),INTENT(in) :: mat DOUBLE PRECISION, INTENT(in) :: omega INTEGER, INTENT(in) :: nu DOUBLE PRECISION, INTENT(inout) :: v(:) DOUBLE PRECISION, INTENT(in) :: f(:) ! DOUBLE PRECISION :: temp(SIZE(v)) DOUBLE PRECISION :: inv_diag(SIZE(v)) INTEGER :: k, n, i, j, jcol ! n = mat%rank ! inv_diag(:) = omega/mat%val(mat%idiag) DO k=1,nu temp(:) = f(:) DO i=1,n DO j = mat%irow(i), mat%irow(i+1)-1 jcol = mat%cols(j) IF(jcol.NE.i) THEN ! The diagonal temp(i) = temp(i) - mat%val(j)*v(jcol) END IF END DO END DO temp(:) = temp(:)*inv_diag(:) v(:) = (1.d0-omega)*v(:) + temp(:) END DO END SUBROUTINE jacobi_csr !-------------------------------------------------------------------------------- SUBROUTINE jacobi_cds(mat, omega, nu, v, f) ! ! Weighted Jacobi relaxation ! TYPE(cds_mat),INTENT(in) :: mat DOUBLE PRECISION, INTENT(in) :: omega INTEGER, INTENT(in) :: nu DOUBLE PRECISION, INTENT(inout) :: v(:) DOUBLE PRECISION, INTENT(in) :: f(:) ! DOUBLE PRECISION :: temp(SIZE(v)) DOUBLE PRECISION :: inv_diag(SIZE(v)) INTEGER :: k, n, i, id, d ! n = mat%rank ! inv_diag(:) = omega/mat%val(:,mat%dists(0)) DO k=1,nu temp(:) = f(:) DO id=-mat%kl,mat%ku ! f - (L+U)*v IF(id.EQ.0) CYCLE d = mat%dists(id) DO i=MAX(1,1-d), MIN(n,mat%rank-d) temp(i) = temp(i) - mat%val(i,id)*v(i+d) END DO END DO temp(:) = temp(:)*inv_diag(:) v(:) = (1.d0-omega)*v(:) + temp(:) END DO END SUBROUTINE jacobi_cds !-------------------------------------------------------------------------------- SUBROUTINE gs_gb(mat, nu, v, f) ! ! Gauss-Seidel relaxation ! TYPE(gbmat),INTENT(in) :: mat INTEGER, INTENT(in) :: nu DOUBLE PRECISION, INTENT(inout) :: v(:) DOUBLE PRECISION, INTENT(in) :: f(:) ! INTEGER :: k, kl, ku, n, i, j, jmin, jmax DOUBLE PRECISION :: inv_diag(SIZE(v)) ! kl = mat%kl ku = mat%ku n = mat%rank ! inv_diag(:) = 1.d0/mat%val(kl+ku+1,:) DO k=1,nu DO i=1,n jmin = MAX(1,i-kl) jmax = MIN(n, i+ku) v(i) = f(i) DO j=jmin,i-1 v(i) = v(i) - mat%val(kl+ku+i-j+1,j)*v(j) END DO DO j=i+1,jmax v(i) = v(i) - mat%val(kl+ku+i-j+1,j)*v(j) END DO v(i) = inv_diag(i)*v(i) END DO END DO END SUBROUTINE gs_gb !-------------------------------------------------------------------------------- SUBROUTINE gs_ge(mat, nu, v, f) ! ! Gauss-Seidel relaxation ! TYPE(gemat),INTENT(in) :: mat INTEGER, INTENT(in) :: nu DOUBLE PRECISION, INTENT(inout) :: v(:) DOUBLE PRECISION, INTENT(in) :: f(:) ! INTEGER :: k, n, i, j DOUBLE PRECISION :: inv_diag(SIZE(v)) ! n = mat%rank ! DO i=1,n inv_diag(i) = 1.d0/mat%val(i,i) END DO DO k=1,nu DO i=1,n v(i) = f(i) DO j=1,i-1 v(i) = v(i) - mat%val(i,j)*v(j) END DO DO j=i+1,n v(i) = v(i) - mat%val(i,j)*v(j) END DO v(i) = inv_diag(i)*v(i) END DO END DO END SUBROUTINE gs_ge !-------------------------------------------------------------------------------- SUBROUTINE gs_csr(mat, nu, v, f) ! ! Gauss-Seidel relaxation ! TYPE(csr_mat),INTENT(in) :: mat INTEGER, INTENT(in) :: nu DOUBLE PRECISION, INTENT(inout) :: v(:) DOUBLE PRECISION, INTENT(in) :: f(:) ! DOUBLE PRECISION :: inv_diag(SIZE(v)) INTEGER :: k, n, i, j, jcol ! n = mat%rank ! inv_diag(:) = 1.0d0/mat%val(mat%idiag) DO k=1,nu DO i=1,n v(i) = f(i) DO j = mat%irow(i), mat%irow(i+1)-1 jcol = mat%cols(j) IF(jcol.NE.i) THEN ! The diagonal v(i) = v(i) - mat%val(j)*v(jcol) END IF END DO v(i) = v(i)*inv_diag(i) END DO END DO END SUBROUTINE gs_csr !-------------------------------------------------------------------------------- SUBROUTINE gs_cds(mat, nu, v, f) ! ! Gauss-Seidel relaxation ! TYPE(cds_mat),INTENT(in) :: mat INTEGER, INTENT(in) :: nu DOUBLE PRECISION, INTENT(inout) :: v(:) DOUBLE PRECISION, INTENT(in) :: f(:) ! DOUBLE PRECISION :: temp(SIZE(v)) DOUBLE PRECISION :: inv_diag(SIZE(v)) INTEGER :: k, n, i, id, d ! n = mat%rank ! inv_diag(:) = 1.0d0/mat%val(:,mat%dists(0)) DO k=1,nu ! temp(:) = f(:) DO id=1,mat%ku ! t <- f - U*v d = mat%dists(id) DO i=MAX(1,1-d), MIN(n,n-d) temp(i) = temp(i) - mat%val(i,id)*v(i+d) END DO END DO ! DO i=1,n ! Solve (L+D)v=t v(i) = temp(i) DO id=-1,-mat%kl,-1 d = mat%dists(id) IF(i+d.LT.1) EXIT v(i) = v(i) - mat%val(i,id)*v(i+d) END DO v(i) = v(i)*inv_diag(i) END DO END DO END SUBROUTINE gs_cds !-------------------------------------------------------------------------------- SUBROUTINE printmat_mat(str, val) ! CHARACTER(len=*), INTENT(in) :: str DOUBLE PRECISION, INTENT(in) :: val(:,:) INTEGER :: mrows, ncols,i mrows=SIZE(val,1) ncols=SIZE(val,2) WRITE(*,'(/a)') TRIM(str) WRITE(*,'(2(a,i6))') 'M =', mrows, ', N =', ncols DO i=1,mrows WRITE(*,'(12(1pe12.3))') val(i,:) END DO WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', SUM(val,2) WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', SUM(val,1) END SUBROUTINE printmat_mat !-------------------------------------------------------------------------------- SUBROUTINE printmat_ge(str, mat) ! CHARACTER(len=*), INTENT(in) :: str TYPE(gemat), INTENT(in) :: mat INTEGER :: i DOUBLE PRECISION :: arow(mat%ncols) DOUBLE PRECISION :: sum_cols(mat%ncols), sum_rows(mat%mrows) sum_cols = 0.0d0 arow = 0.0d0 WRITE(*,'(/a)') TRIM(str) WRITE(*,'(2(a,i6))') 'M =', mat%mrows, ', N =', mat%ncols DO i=1,mat%mrows CALL getrow(mat,i,arow) sum_rows(i) = SUM(arow) sum_cols(:) = sum_cols(:) + arow(:) WRITE(*,'(12(1pe12.3))') arow END DO WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', sum_rows WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', sum_cols END SUBROUTINE printmat_ge !-------------------------------------------------------------------------------- SUBROUTINE printmat_gb(str, mat) ! CHARACTER(len=*), INTENT(in) :: str TYPE(gbmat), INTENT(in) :: mat INTEGER :: i DOUBLE PRECISION :: arow(mat%ncols) DOUBLE PRECISION :: sum_cols(mat%ncols), sum_rows(mat%mrows) sum_cols = 0.0d0 WRITE(*,'(/a)') TRIM(str) WRITE(*,'(2(a,i6))') 'M =', mat%mrows, ', N =', mat%ncols DO i=1,mat%mrows CALL getrow(mat,i,arow) sum_rows(i) = SUM(arow) sum_cols(:) = sum_cols(:) + arow(:) WRITE(*,'(8(1pe12.3))') arow END DO WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', sum_rows WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', sum_cols END SUBROUTINE printmat_gb !-------------------------------------------------------------------------------- SUBROUTINE printmat_periodic(str, mat) ! CHARACTER(len=*), INTENT(in) :: str TYPE(periodic_mat), INTENT(in) :: mat INTEGER :: i DOUBLE PRECISION :: arow(mat%mat%ncols) DOUBLE PRECISION :: sum_cols(mat%mat%ncols), sum_rows(mat%mat%mrows) sum_cols = 0.0d0 WRITE(*,'(/a)') TRIM(str) WRITE(*,'(2(a,i6))') 'M =', mat%mat%mrows, ', N =', mat%mat%ncols DO i=1,mat%mat%mrows CALL getrow(mat,i,arow) sum_rows(i) = SUM(arow) sum_cols(:) = sum_cols(:) + arow(:) WRITE(*,'(8(1pe12.3))') arow END DO WRITE(*,'(a/(8(1pe12.3)))') 'Sum or rows', sum_rows WRITE(*,'(a/(8(1pe12.3)))') 'Sum or cols', sum_cols END SUBROUTINE printmat_periodic !-------------------------------------------------------------------------------- SUBROUTINE printdiag_gb(str, mat) ! CHARACTER(len=*), INTENT(in) :: str TYPE(gbmat), INTENT(in) :: mat INTEGER :: ku, kl kl = mat%kl ku = mat%ku WRITE(*,'(a/(8(1pe12.3)))') str, mat%val(kl+ku+1,:) END SUBROUTINE printdiag_gb !-------------------------------------------------------------------------------- INTEGER FUNCTION get_lmax(n) INTEGER, INTENT(in) :: n INTEGER :: l, ncur l=1 ncur = n DO IF(ncur.EQ.2 .OR. MODULO(ncur,2).NE.0) EXIT ! Minimum N is 2 or odd. l=l+1 ncur = ncur/2 END DO get_lmax = l END FUNCTION get_lmax !-------------------------------------------------------------------------------- SUBROUTINE ibc_transf(grids, dir, k) ! ! Impose BC on transfer matrix ! TYPE(grid2d), INTENT(inout) :: grids(:) INTEGER, INTENT(in) :: dir INTEGER, INTENT(in) :: k ! INTEGER :: levels, l levels = SIZE(grids) DO l=2,levels CALL mod_transf(grids(l)%matp(dir),k) END DO END SUBROUTINE ibc_transf !-------------------------------------------------------------------------------- END MODULE multigrid diff --git a/src/mumps_mod.f90 b/src/mumps_mod.f90 index fe23546..d13a8cc 100644 --- a/src/mumps_mod.f90 +++ b/src/mumps_mod.f90 @@ -1,1728 +1,1728 @@ !> !> @file mumps_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE mumps_bsplines ! ! MUMPS_BSPLINES: Simple interface to the sparse direct solver MUMPS ! ! T.M. Tran, CRPP-EPFL ! June 2011 ! USE sparse IMPLICIT NONE INCLUDE 'dmumps_struc.h' INCLUDE 'zmumps_struc.h' ! TYPE mumps_mat INTEGER :: rank, nnz INTEGER :: nterms, kmat INTEGER :: istart, iend INTEGER :: nnz_start, nnz_end, nnz_loc LOGICAL :: nlsym LOGICAL :: nlpos LOGICAL :: nlforce_zero TYPE(spmat), POINTER :: mat => NULL() INTEGER, POINTER :: cols(:) => NULL() INTEGER, POINTER :: irow(:) => NULL() INTEGER, POINTER :: perm(:) => NULL() DOUBLE PRECISION, POINTER :: val(:) => NULL() TYPE(dmumps_struc) :: mumps_par END TYPE mumps_mat ! TYPE zmumps_mat INTEGER :: rank, nnz INTEGER :: nterms, kmat INTEGER :: istart, iend INTEGER :: nnz_start, nnz_end, nnz_loc LOGICAL :: nlsym LOGICAL :: nlherm LOGICAL :: nlpos LOGICAL :: nlforce_zero TYPE(zspmat), POINTER :: mat => NULL() INTEGER, POINTER :: cols(:) => NULL() INTEGER, POINTER :: irow(:) => NULL() INTEGER, POINTER :: perm(:) => NULL() DOUBLE COMPLEX, POINTER :: val(:) => NULL() TYPE(zmumps_struc) :: mumps_par END TYPE zmumps_mat ! INTERFACE init MODULE PROCEDURE init_mumps_mat, init_zmumps_mat END INTERFACE init ! INTERFACE clear_mat MODULE PROCEDURE clear_mumps_mat, clear_zmumps_mat END INTERFACE clear_mat ! INTERFACE updtmat MODULE PROCEDURE updt_mumps_mat, updt_zmumps_mat END INTERFACE updtmat ! INTERFACE putele MODULE PROCEDURE putele_mumps_mat, putele_zmumps_mat END INTERFACE putele ! INTERFACE getele MODULE PROCEDURE getele_mumps_mat, getele_zmumps_mat END INTERFACE getele ! INTERFACE putrow MODULE PROCEDURE putrow_mumps_mat, putrow_zmumps_mat END INTERFACE putrow ! INTERFACE getrow MODULE PROCEDURE getrow_mumps_mat, getrow_zmumps_mat END INTERFACE getrow ! INTERFACE putcol MODULE PROCEDURE putcol_mumps_mat, putcol_zmumps_mat END INTERFACE putcol ! INTERFACE getcol MODULE PROCEDURE getcol_mumps_mat, getcol_zmumps_mat END INTERFACE getcol ! INTERFACE get_count MODULE PROCEDURE get_count_mumps_mat, get_count_zmumps_mat END INTERFACE get_count ! INTERFACE to_mat MODULE PROCEDURE to_mumps_mat, to_zmumps_mat END INTERFACE to_mat ! INTERFACE reord_mat MODULE PROCEDURE reord_mumps_mat, reord_zmumps_mat END INTERFACE reord_mat ! INTERFACE numfact MODULE PROCEDURE numfact_mumps_mat, numfact_zmumps_mat END INTERFACE numfact ! INTERFACE factor MODULE PROCEDURE factor_mumps_mat, factor_zmumps_mat END INTERFACE factor ! INTERFACE bsolve MODULE PROCEDURE bsolve_mumps_mat1, bsolve_mumps_matn, & & bsolve_zmumps_mat1, bsolve_zmumps_matn END INTERFACE bsolve ! INTERFACE vmx MODULE PROCEDURE vmx_mumps_mat, vmx_mumps_matn, & & vmx_zmumps_mat, vmx_zmumps_matn END INTERFACE vmx ! INTERFACE destroy MODULE PROCEDURE destroy_mumps_mat, destroy_zmumps_mat END INTERFACE destroy ! INTERFACE putmat MODULE PROCEDURE put_mumps_mat, put_zmumps_mat END INTERFACE putmat ! INTERFACE getmat MODULE PROCEDURE get_mumps_mat, get_zmumps_mat END INTERFACE getmat ! INTERFACE mcopy MODULE PROCEDURE mcopy_mumps_mat, mcopy_zmumps_mat END INTERFACE mcopy ! INTERFACE maddto MODULE PROCEDURE maddto_mumps_mat, maddto_zmumps_mat END INTERFACE maddto ! INTERFACE psum_mat MODULE PROCEDURE psum_mumps_mat, psum_zmumps_mat END INTERFACE psum_mat ! INTERFACE p2p_mat MODULE PROCEDURE p2p_mumps_mat, p2p_zmumps_mat END INTERFACE p2p_mat ! CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_mumps_mat(n, nterms, mat, kmat, nlsym, nlpos, & & nlforce_zero, comm_in) ! ! Initialize an empty sparse mumps matrix ! USE pputils2 INCLUDE 'mpif.h' INTEGER, INTENT(in) :: n, nterms TYPE(mumps_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlpos LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero INTEGER, OPTIONAL, INTENT(in) :: comm_in ! INTEGER :: comm, nloc ! comm = MPI_COMM_SELF ! Default is serial! IF(PRESENT(comm_in)) comm = comm_in ! mat%rank = n mat%nterms = nterms mat%nnz = 0 mat%nlsym = .FALSE. mat%nlpos = .TRUE. mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val IF(PRESENT(kmat)) mat%kmat = kmat IF(PRESENT(nlsym)) mat%nlsym = nlsym IF(PRESENT(nlpos)) mat%nlpos = nlpos IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero ! ! Matrix partition ! CALL dist1d(comm, 1, n, mat%istart, nloc) mat%iend = mat%istart + nloc - 1 ! IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) ALLOCATE(mat%mat) CALL init(n, mat%mat, mat%istart, mat%iend) ! ! Initialize a MUMPS instance ! mat%mumps_par%N = n mat%mumps_par%NZ = 0 mat%mumps_par%COMM = comm mat%mumps_par%PAR = 1 ! Host involved in calculations IF(mat%nlsym) THEN IF(mat%nlpos) THEN mat%mumps_par%SYM = 1 ! symmetric, positive definite ELSE mat%mumps_par%SYM = 2 ! symmetric, indefinite END IF ELSE mat%mumps_par%SYM = 0 ! unsymmetric END IF ! mat%mumps_par%JOB = -1 ! Init phase CALL dmumps(mat%mumps_par) ! ! Nullify MUMPS pointers for distributed matrix ! NULLIFY(mat%mumps_par%A_loc) NULLIFY(mat%mumps_par%IRN_loc) NULLIFY(mat%mumps_par%JCN_loc) NULLIFY(mat%mumps_par%RHS) ! END SUBROUTINE init_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_zmumps_mat(n, nterms, mat, kmat, nlsym, nlherm, & & nlpos, nlforce_zero, comm_in) ! ! Initialize an empty sparse mumps matrix ! USE pputils2 INCLUDE 'mpif.h' INTEGER, INTENT(in) :: n, nterms TYPE(zmumps_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlherm LOGICAL, OPTIONAL, INTENT(in) :: nlpos LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero INTEGER, OPTIONAL, INTENT(in) :: comm_in ! INTEGER :: comm, nloc ! comm = MPI_COMM_SELF ! Default is serial! IF(PRESENT(comm_in)) comm = comm_in ! mat%rank = n mat%nterms = nterms mat%nnz = 0 mat%nlsym = .FALSE. mat%nlherm = .FALSE. mat%nlpos = .TRUE. mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val IF(PRESENT(kmat)) mat%kmat = kmat IF(PRESENT(nlsym)) mat%nlsym = nlsym IF(PRESENT(nlpos)) mat%nlpos = nlpos IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero ! ! Matrix partition ! CALL dist1d(comm, 1, n, mat%istart, nloc) mat%iend = mat%istart + nloc - 1 ! IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) ALLOCATE(mat%mat) CALL init(n, mat%mat, mat%istart, mat%iend) ! ! Initialize a MUMPS instance ! mat%mumps_par%N = n mat%mumps_par%NZ = 0 mat%mumps_par%COMM = comm mat%mumps_par%PAR = 1 ! Host involved in calculations mat%mumps_par%SYM = 0 ! General unsymmetric IF(mat%nlsym) THEN IF(mat%nlpos) THEN mat%mumps_par%SYM = 1 ! symmetric, positive definite ELSE mat%mumps_par%SYM = 2 ! symmetric, indefinite END IF END IF ! mat%mumps_par%JOB = -1 ! Init phase CALL zmumps(mat%mumps_par) ! ! WARNING: SYM=1 is currently (version 4.10.0) is treated as SYM=2. ! The Hermitian case is not implemented yet! ! ! Nullify MUMPS pointers for distributed matrix ! NULLIFY(mat%mumps_par%A_loc) NULLIFY(mat%mumps_par%IRN_loc) NULLIFY(mat%mumps_par%JCN_loc) NULLIFY(mat%mumps_par%RHS) ! END SUBROUTINE init_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE clear_mumps_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(mumps_mat) :: mat ! mat%val = 0.0d0 END SUBROUTINE clear_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE clear_zmumps_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(zmumps_mat) :: mat ! mat%val = (0.0d0, 0.0d0) END SUBROUTINE clear_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_mumps_mat(mat, i, j, val) ! ! Update element Aij of mumps matrix ! TYPE(mumps_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: k, s, e ! IF(mat%nlsym) THEN ! Store only upper part for symmetric matrices IF(i.GT.j) RETURN END IF IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j WRITE(*,'(a,2i6)') ' istart, iend ', mat%istart, mat%iend STOP '*** Abnormal EXIT in MODULE mumps_mod ***' END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL updtmat(mat%mat, i, j, val) ELSE s = mat%irow(i) - mat%nnz_start + 1 e = mat%irow(i+1) - mat%nnz_start k = isearch(mat%cols(s:e), j) IF( k.GE.0 ) THEN mat%val(s+k) = mat%val(s+k)+val ELSE WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE mumps_mod ***' END IF END IF END SUBROUTINE updt_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_zmumps_mat(mat, i, j, val) ! ! Update element Aij of mumps matrix ! TYPE(zmumps_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val INTEGER :: k, s, e ! IF(mat%nlherm .OR. mat%nlsym) THEN ! Store only upper part IF(i.GT.j) RETURN END IF IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j WRITE(*,'(a,2i6)') ' istart, iend ', mat%istart, mat%iend STOP '*** Abnormal EXIT in MODULE mumps_mod ***' END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL updtmat(mat%mat, i, j, val) ELSE s = mat%irow(i) - mat%nnz_start + 1 e = mat%irow(i+1) - mat%nnz_start k = isearch(mat%cols(s:e), j) IF( k.GE.0 ) THEN mat%val(s+k) = mat%val(s+k)+val ELSE WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE mumps_mod ***' END IF END IF END SUBROUTINE updt_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_mumps_mat(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(mumps_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: iput, jput INTEGER :: k, s, e ! iput = i jput = j IF(mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iput = j jput = i END IF END IF ! ! Do nothing if outside IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN ! IF(ASSOCIATED(mat%mat)) THEN CALL putele(mat%mat, iput, jput, val, & & nlforce_zero=mat%nlforce_zero) ELSE s = mat%irow(iput) - mat%nnz_start + 1 e = mat%irow(iput+1) - mat%nnz_start k = isearch(mat%cols(s:e), jput) IF( k.GE.0 ) THEN mat%val(s+k) = val ELSE IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE mumps_mod ***' END IF END IF END IF END SUBROUTINE putele_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_zmumps_mat(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(zmumps_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val DOUBLE COMPLEX :: valput INTEGER :: iput, jput INTEGER :: k, s, e ! iput = i jput = j valput = val IF(mat%nlherm .OR. mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iput = j jput = i IF(mat%nlherm) THEN valput = CONJG(val) ELSE valput = val END IF END IF END IF ! ! Do nothing if outside IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN ! IF(ASSOCIATED(mat%mat)) THEN CALL putele(mat%mat, iput, jput, valput, & & nlforce_zero=mat%nlforce_zero) ELSE s = mat%irow(iput) - mat%nnz_start + 1 e = mat%irow(iput+1) - mat%nnz_start k = isearch(mat%cols(s:e), jput) IF( k.GE.0 ) THEN mat%val(s+k) = valput ELSE IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE mumps_mod ***' END IF END IF END IF END SUBROUTINE putele_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_mumps_mat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(mumps_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(out) :: val INTEGER :: iget, jget INTEGER :: k, s, e ! iget = i jget = j IF(mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iget = j jget = i END IF END IF ! val = 0.0d0 ! Assume zero val if outside IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN ! IF(ASSOCIATED(mat%mat)) THEN CALL getele(mat%mat, iget, jget, val) ELSE s = mat%irow(iget) - mat%nnz_start + 1 e = mat%irow(iget+1) - mat%nnz_start k = isearch(mat%cols(s:e), jget) IF( k.GE.0 ) THEN val =mat%val(s+k) ELSE val = 0.0d0 ! Assume zero val if not found END IF END IF END SUBROUTINE getele_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_zmumps_mat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(zmumps_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(out) :: val DOUBLE COMPLEX :: valget INTEGER :: iget, jget INTEGER :: k, s, e ! iget = i jget = j IF(mat%nlherm .OR. mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iget = j jget = i END IF END IF ! val = (0.0d0, 0.0d0) ! Assume zero val if outside IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN ! IF(ASSOCIATED(mat%mat)) THEN CALL getele(mat%mat, iget, jget, valget) ELSE s = mat%irow(iget) - mat%nnz_start + 1 e = mat%irow(iget+1) - mat%nnz_start k = isearch(mat%cols(s:e), jget) IF( k.GE.0 ) THEN valget =mat%val(s+k) ELSE valget = (0.0d0,0.0d0) ! Assume zero val if not found END IF END IF val = valget IF( i.GT.j ) THEN IF(mat%nlherm) THEN val = CONJG(valget) END IF END IF END SUBROUTINE getele_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_mumps_mat(amat, i, arr) ! ! Put a row into sparse matrix ! TYPE(mumps_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL putele(amat, i, j, arr(j)) END DO END SUBROUTINE putrow_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_zmumps_mat(amat, i, arr) ! ! Put a row into sparse matrix ! TYPE(zmumps_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL putele(amat, i, j, arr(j)) END DO END SUBROUTINE putrow_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_mumps_mat(amat, i, arr) ! ! Get a row from sparse matrix ! TYPE(mumps_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL getele(amat, i, j, arr(j)) END DO END SUBROUTINE getrow_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_zmumps_mat(amat, i, arr) ! ! Get a row from sparse matrix ! TYPE(zmumps_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL getele(amat, i, j, arr(j)) END DO END SUBROUTINE getrow_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_mumps_mat(amat, j, arr) ! ! Put a column into sparse matrix ! TYPE(mumps_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: i ! DO i=amat%istart,amat%iend CALL putele(amat, i, j, arr(i)) END DO END SUBROUTINE putcol_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_zmumps_mat(amat, j, arr) ! ! Put a column into sparse matrix ! TYPE(zmumps_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER :: i ! DO i=amat%istart,amat%iend CALL putele(amat, i, j, arr(i)) END DO END SUBROUTINE putcol_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_mumps_mat(amat, j, arr) ! ! Get a column from sparse matrix ! TYPE(mumps_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: i ! DO i=amat%istart,amat%iend CALL getele(amat, i, j, arr(i)) END DO END SUBROUTINE getcol_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_zmumps_mat(amat, j, arr) ! ! Get a column from sparse matrix ! TYPE(zmumps_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER :: i ! DO i=amat%istart,amat%iend CALL getele(amat, i, j, arr(i)) END DO END SUBROUTINE getcol_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_mumps_mat(mat, nnz) ! ! Number of non-zeros in sparse matrix ! TYPE(mumps_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) INTEGER :: i ! IF(ASSOCIATED(mat%mat)) THEN get_count_mumps_mat = get_count(mat%mat, nnz) ELSE get_count_mumps_mat = mat%nnz IF(PRESENT(nnz)) THEN DO i=mat%istart,mat%iend nnz(i) = mat%irow(i+1)-mat%irow(i) END DO END IF END IF END FUNCTION get_count_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_zmumps_mat(mat, nnz) ! ! Number of non-zeros in sparse matrix ! TYPE(zmumps_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) INTEGER :: i ! IF(ASSOCIATED(mat%mat)) THEN get_count_zmumps_mat = get_count(mat%mat, nnz) ELSE get_count_zmumps_mat = mat%nnz IF(PRESENT(nnz)) THEN DO i=mat%istart,mat%iend nnz(i) = mat%irow(i+1)-mat%irow(i) END DO END IF END IF END FUNCTION get_count_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE to_mumps_mat(mat, nlkeep) ! ! Convert linked list spmat to mumps matrice structure ! INCLUDE 'mpif.h' TYPE(mumps_mat) :: mat LOGICAL, INTENT(in), OPTIONAL :: nlkeep INTEGER :: i, nnz, rank, s, e INTEGER :: comm, ierr, nnz_loc LOGICAL :: nlclean ! nlclean = .TRUE. IF(PRESENT(nlkeep)) THEN nlclean = .NOT. nlkeep END IF ! comm = mat%mumps_par%COMM mat%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix ! ! Allocate the Mumps matrix structure ! CSR format: (cols, irow, val) or (JCN, irow, A) ! COO format: (IRN, JCN, A) or (IRN, cols, val) ! rank = mat%rank nnz_loc = get_count(mat) mat%nnz_start = 0 CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) mat%nnz_start = mat%nnz_start + 1 mat%nnz_end = mat%nnz_start + nnz_loc - 1 mat%nnz_loc = nnz_loc CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) ! mat%mumps_par%N = rank mat%mumps_par%NZ_loc = nnz_loc ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%val(nnz_loc)) ALLOCATE(mat%cols(nnz_loc)) ALLOCATE(mat%irow(mat%istart:mat%iend+1)) ! IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc) ALLOCATE(mat%mumps_par%IRN_loc(nnz_loc)) mat%mumps_par%A_loc => mat%val mat%mumps_par%JCN_loc => mat%cols ! ! Fill Mumps structure and deallocate the sparse rows ! mat%irow(mat%istart) = mat%nnz_start DO i=mat%istart,mat%iend mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz s = mat%irow(i) - mat%nnz_start + 1 e = mat%irow(i+1) - mat%nnz_start CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) mat%mumps_par%IRN_loc(s:e) = i IF(nlclean) CALL destroy(mat%mat%row(i)) END DO IF(nlclean) DEALLOCATE(mat%mat) END SUBROUTINE to_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE to_zmumps_mat(mat, nlkeep) ! ! Convert linked list spmat to mumps matrice structure ! INCLUDE 'mpif.h' TYPE(zmumps_mat) :: mat LOGICAL, INTENT(in), OPTIONAL :: nlkeep INTEGER :: i, nnz, rank, s, e INTEGER :: comm, ierr, nnz_loc LOGICAL :: nlclean ! nlclean = .TRUE. IF(PRESENT(nlkeep)) THEN nlclean = .NOT. nlkeep END IF ! comm = mat%mumps_par%COMM mat%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix ! ! Allocate the Mumps matrix structure ! CSR format: (cols, irow, val) or (JCN, irow, A) ! COO format: (IRN, JCN, A) or (IRN, cols, val) ! rank = mat%rank nnz_loc = get_count(mat) mat%nnz_start = 0 CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) mat%nnz_start = mat%nnz_start + 1 mat%nnz_end = mat%nnz_start + nnz_loc - 1 mat%nnz_loc = nnz_loc CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) ! mat%mumps_par%N = rank mat%mumps_par%NZ_loc = nnz_loc ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%val(nnz_loc)) ALLOCATE(mat%cols(nnz_loc)) ALLOCATE(mat%irow(mat%istart:mat%iend+1)) ! IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc) ALLOCATE(mat%mumps_par%IRN_loc(nnz_loc)) mat%mumps_par%A_loc => mat%val mat%mumps_par%JCN_loc => mat%cols ! ! Fill Mumps structure and deallocate the sparse rows ! mat%irow(mat%istart) = mat%nnz_start DO i=mat%istart,mat%iend mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz s = mat%irow(i) - mat%nnz_start + 1 e = mat%irow(i+1) - mat%nnz_start CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) mat%mumps_par%IRN_loc(s:e) = i IF(nlclean) CALL destroy(mat%mat%row(i)) END DO IF(nlclean) DEALLOCATE(mat%mat) END SUBROUTINE to_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE reord_mumps_mat(mat, nlmetis, debug) ! ! Reordering and symbolic factorization ! TYPE(mumps_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlmetis LOGICAL, OPTIONAL, INTENT(in) :: debug ! ! Verbose messages ! mat%mumps_par%ICNTL(3) = 0 IF(PRESENT(debug)) THEN IF(debug) mat%mumps_par%ICNTL(3) = 6 END IF ! ! Ordering ! mat%mumps_par%ICNTL(7) = 7 ! Automatic choice IF(PRESENT(nlmetis)) THEN IF(nlmetis) mat%mumps_par%ICNTL(7) = 5 ! use METIS nested dissection END IF ! mat%mumps_par%JOB = 1 CALL dmumps(mat%mumps_par) mat%perm => mat%mumps_par%SYM_PERM IF(mat%mumps_par%INFOG(1).NE.0) THEN WRITE(*,'(a,2i12)') 'REORD: Reordering failed with error', & & mat%mumps_par%INFOG(1:2) STOP END IF END SUBROUTINE reord_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE reord_zmumps_mat(mat, nlmetis, debug) ! ! Reordering and symbolic factorization ! TYPE(zmumps_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlmetis LOGICAL, OPTIONAL, INTENT(in) :: debug ! ! Verbose messages ! mat%mumps_par%ICNTL(3) = 0 IF(PRESENT(debug)) THEN IF(debug) mat%mumps_par%ICNTL(3) = 6 END IF ! ! Ordering ! mat%mumps_par%ICNTL(7) = 7 ! Automatic choice IF(PRESENT(nlmetis)) THEN IF(nlmetis) mat%mumps_par%ICNTL(7) = 5 ! use METIS nested dissection END IF ! mat%mumps_par%JOB = 1 CALL zmumps(mat%mumps_par) IF(mat%mumps_par%INFOG(1).NE.0) THEN WRITE(*,'(a,2i12)') 'REORD: Reordering failed with error', & & mat%mumps_par%INFOG(1:2) STOP END IF END SUBROUTINE reord_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE numfact_mumps_mat(mat, debug) ! ! Numerical factorization ! TYPE(mumps_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: debug ! ! Verbose messages ! mat%mumps_par%ICNTL(3) = 0 IF(PRESENT(debug)) THEN IF(debug) mat%mumps_par%ICNTL(3) = 6 END IF ! mat%mumps_par%JOB = 2 CALL dmumps(mat%mumps_par) IF(mat%mumps_par%INFOG(1).NE.0) THEN WRITE(*,'(a,2i12)') 'FACTOR: Reordering failed with error', & & mat%mumps_par%INFOG(1:2) STOP END IF ! END SUBROUTINE numfact_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE numfact_zmumps_mat(mat, debug) ! ! Numerical factorization ! TYPE(zmumps_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: debug ! ! Verbose messages ! mat%mumps_par%ICNTL(3) = 0 IF(PRESENT(debug)) THEN IF(debug) mat%mumps_par%ICNTL(3) = 6 END IF ! mat%mumps_par%JOB = 2 CALL zmumps(mat%mumps_par) IF(mat%mumps_par%INFOG(1).NE.0) THEN WRITE(*,'(a,2i12)') 'FACTOR: Reordering failed with error', & & mat%mumps_par%INFOG(1:2) STOP END IF ! END SUBROUTINE numfact_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE factor_mumps_mat(mat, nlreord, nlmetis, debug) ! ! Factor (create +reorder + factor) a mumps_mat matrix ! TYPE(mumps_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlreord LOGICAL, OPTIONAL, INTENT(in) :: nlmetis LOGICAL, OPTIONAL, INTENT(in) :: debug LOGICAL :: mlreord !---------------------------------------------------------------------- ! 1.0 Creation from the sparse rows ! IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF !---------------------------------------------------------------------- ! 2.0 Reordering and symbolic factorization phase ! mlreord = .TRUE. IF(PRESENT(nlreord)) mlreord = nlreord IF(mlreord) THEN CALL reord_mat(mat, nlmetis, debug) END IF !---------------------------------------------------------------------- ! 3.0 Numerical factorization ! CALL numfact(mat, debug) END SUBROUTINE factor_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE factor_zmumps_mat(mat, nlreord, nlmetis, debug) ! ! Factor (create +reorder + factor) a mumps_mat matrix ! TYPE(zmumps_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlreord LOGICAL, OPTIONAL, INTENT(in) :: nlmetis LOGICAL, OPTIONAL, INTENT(in) :: debug LOGICAL :: mlreord !---------------------------------------------------------------------- ! 1.0 Creation from the sparse rows ! IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF !---------------------------------------------------------------------- ! 2.0 Reordering and symbolic factorization phase ! mlreord = .TRUE. IF(PRESENT(nlreord)) mlreord = nlreord IF(mlreord) THEN CALL reord_mat(mat, nlmetis, debug) END IF !---------------------------------------------------------------------- ! 3.0 Numerical factorization ! CALL numfact(mat, debug) END SUBROUTINE factor_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_mumps_mat1(mat, rhs, sol, nref, debug) ! ! Backsolve, using Mumps ! INCLUDE 'mpif.h' TYPE(mumps_mat) :: mat DOUBLE PRECISION, INTENT(inout) :: rhs(:) DOUBLE PRECISION, OPTIONAL, INTENT(inout) :: sol(:) INTEGER, OPTIONAL, INTENT(in) :: nref LOGICAL, OPTIONAL, INTENT(in) :: debug ! INTEGER :: nrank, ierr ! nrank = SIZE(rhs,1) ! ! Verbose messages ! mat%mumps_par%ICNTL(3) = 0 IF(PRESENT(debug)) THEN IF(debug) mat%mumps_par%ICNTL(3) = 6 END IF ! IF(mat%mumps_par%MYID .EQ. 0) THEN mat%mumps_par%NRHS = 1 mat%mumps_par%LRHS = nrank mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref ! ALLOCATE(mat%mumps_par%RHS(nrank)) mat%mumps_par%RHS = rhs END IF ! mat%mumps_par%JOB = 3 CALL dmumps(mat%mumps_par) ! ! The solution will be broadcasted to everyone ! IF(PRESENT(sol)) THEN IF(mat%mumps_par%MYID .EQ. 0) sol=mat%mumps_par%RHS CALL mpi_bcast(sol, nrank, MPI_DOUBLE_PRECISION, & & 0, mat%mumps_par%COMM, ierr) ELSE IF(mat%mumps_par%MYID .EQ. 0) rhs=mat%mumps_par%RHS CALL mpi_bcast(rhs, nrank, MPI_DOUBLE_PRECISION, & & 0, mat%mumps_par%COMM, ierr) END IF ! IF(mat%mumps_par%MYID .EQ. 0) THEN DEALLOCATE(mat%mumps_par%RHS) IF(mat%mumps_par%INFOG(1).NE.0) THEN WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', & & mat%mumps_par%INFOG(1:2) STOP END IF END IF END SUBROUTINE bsolve_mumps_mat1 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_zmumps_mat1(mat, rhs, sol, nref, debug) ! ! Backsolve, using Mumps ! INCLUDE 'mpif.h' TYPE(zmumps_mat) :: mat DOUBLE COMPLEX, INTENT(inout) :: rhs(:) DOUBLE COMPLEX, OPTIONAL, INTENT(inout) :: sol(:) INTEGER, OPTIONAL, INTENT(in) :: nref LOGICAL, OPTIONAL, INTENT(in) :: debug ! INTEGER :: nrank, ierr ! nrank = SIZE(rhs,1) ! ! Verbose messages ! mat%mumps_par%ICNTL(3) = 0 IF(PRESENT(debug)) THEN IF(debug) mat%mumps_par%ICNTL(3) = 6 END IF ! IF(mat%mumps_par%MYID .EQ. 0) THEN mat%mumps_par%NRHS = 1 mat%mumps_par%LRHS = nrank mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref ! ALLOCATE(mat%mumps_par%RHS(nrank)) mat%mumps_par%RHS = rhs END IF ! mat%mumps_par%JOB = 3 CALL zmumps(mat%mumps_par) ! ! The solution will be broadcasted to everyone ! IF(PRESENT(sol)) THEN IF(mat%mumps_par%MYID .EQ. 0) sol=mat%mumps_par%RHS CALL mpi_bcast(sol, SIZE(rhs), MPI_DOUBLE_COMPLEX, & & 0, mat%mumps_par%COMM, ierr) ELSE IF(mat%mumps_par%MYID .EQ. 0) rhs=mat%mumps_par%RHS CALL mpi_bcast(rhs, SIZE(rhs), MPI_DOUBLE_COMPLEX, & & 0, mat%mumps_par%COMM, ierr) END IF ! IF(mat%mumps_par%MYID .EQ. 0) THEN DEALLOCATE(mat%mumps_par%RHS) IF(mat%mumps_par%INFOG(1).NE.0) THEN WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', & & mat%mumps_par%INFOG(1:2) STOP END IF END IF END SUBROUTINE bsolve_zmumps_mat1 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_mumps_matn(mat, rhs, sol, nref, debug) ! ! Backsolve, using Mumps ! INCLUDE 'mpif.h' TYPE(mumps_mat) :: mat DOUBLE PRECISION, INTENT(inout) :: rhs(:,:) DOUBLE PRECISION, OPTIONAL, INTENT(inout) :: sol(:,:) INTEGER, OPTIONAL, INTENT(in) :: nref LOGICAL, OPTIONAL, INTENT(in) :: debug ! INTEGER :: nrank, nrhs, ierr ! nrank = SIZE(rhs,1) nrhs = SIZE(rhs,2) ! ! Verbose messages ! mat%mumps_par%ICNTL(3) = 0 IF(PRESENT(debug)) THEN IF(debug) mat%mumps_par%ICNTL(3) = 6 END IF ! IF(mat%mumps_par%MYID .EQ. 0) THEN mat%mumps_par%NRHS = nrhs mat%mumps_par%LRHS = nrank mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref ! ALLOCATE(mat%mumps_par%RHS(nrhs*nrank)) mat%mumps_par%RHS = RESHAPE(rhs, SHAPE(mat%mumps_par%RHS)) END IF ! mat%mumps_par%JOB = 3 CALL dmumps(mat%mumps_par) ! ! The solution will be broadcasted to everyone ! IF(PRESENT(sol)) THEN IF(mat%mumps_par%MYID .EQ. 0) sol=RESHAPE(mat%mumps_par%RHS, SHAPE(sol)) CALL mpi_bcast(sol, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_PRECISION, & & 0, mat%mumps_par%COMM, ierr) ELSE IF(mat%mumps_par%MYID .EQ. 0) rhs=RESHAPE(mat%mumps_par%RHS, SHAPE(rhs)) CALL mpi_bcast(rhs, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_PRECISION, & & 0, mat%mumps_par%COMM, ierr) END IF ! IF(mat%mumps_par%MYID .EQ. 0) THEN DEALLOCATE(mat%mumps_par%RHS) IF(mat%mumps_par%INFOG(1).NE.0) THEN WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', & & mat%mumps_par%INFOG(1:2) STOP END IF END IF END SUBROUTINE bsolve_mumps_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_zmumps_matn(mat, rhs, sol, nref, debug) ! ! Backsolve, using Mumps ! INCLUDE 'mpif.h' TYPE(zmumps_mat) :: mat DOUBLE COMPLEX, INTENT(inout) :: rhs(:,:) DOUBLE COMPLEX, OPTIONAL, INTENT(inout) :: sol(:,:) INTEGER, OPTIONAL, INTENT(in) :: nref LOGICAL, OPTIONAL, INTENT(in) :: debug ! INTEGER :: nrank, nrhs, ierr ! nrank = SIZE(rhs,1) nrhs = SIZE(rhs,2) ! ! Verbose messages ! mat%mumps_par%ICNTL(3) = 0 IF(PRESENT(debug)) THEN IF(debug) mat%mumps_par%ICNTL(3) = 6 END IF ! IF(mat%mumps_par%MYID .EQ. 0) THEN mat%mumps_par%NRHS = nrhs mat%mumps_par%LRHS = nrank mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref ! ALLOCATE(mat%mumps_par%RHS(nrhs*nrank)) mat%mumps_par%RHS = RESHAPE(rhs, SHAPE(mat%mumps_par%RHS)) END IF ! mat%mumps_par%JOB = 3 CALL zmumps(mat%mumps_par) ! ! The solution will be broadcasted to everyone ! IF(PRESENT(sol)) THEN IF(mat%mumps_par%MYID .EQ. 0) sol=RESHAPE(mat%mumps_par%RHS, SHAPE(sol)) CALL mpi_bcast(sol, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_COMPLEX, & & 0, mat%mumps_par%COMM, ierr) ELSE IF(mat%mumps_par%MYID .EQ. 0) rhs=RESHAPE(mat%mumps_par%RHS, SHAPE(rhs)) CALL mpi_bcast(rhs, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_COMPLEX, & & 0, mat%mumps_par%COMM, ierr) END IF ! IF(mat%mumps_par%MYID .EQ. 0) THEN DEALLOCATE(mat%mumps_par%RHS) IF(mat%mumps_par%INFOG(1).NE.0) THEN WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', & & mat%mumps_par%INFOG(1:2) STOP END IF END IF END SUBROUTINE bsolve_zmumps_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_mumps_mat(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(mumps_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:) DOUBLE PRECISION :: yarr(SIZE(xarr)) DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 CHARACTER(len=6) :: matdescra INTEGER :: n, i, j ! n = mat%rank ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE matdescra = 'g' END IF ! CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, & & beta, yarr) #else yarr = 0.0d0 DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + mat%val(j)*xarr(i) END DO END IF END DO #endif ! END FUNCTION vmx_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_zmumps_mat(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(zmumps_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: xarr(:) DOUBLE COMPLEX :: yarr(SIZE(xarr)) DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) INTEGER :: n, i, j CHARACTER(len=6) :: matdescra ! n = mat%rank ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE IF(mat%nlherm) THEN matdescra = 'hun' ELSE matdescra = 'g' END IF CALL mkl_zcsrmv('N', n, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, & & beta, yarr) #else yarr = (0.0d0,0.0d0) DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + mat%val(j)*xarr(i) END DO ELSE IF(mat%nlherm) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + CONJG(mat%val(j))*xarr(i) END DO END IF END DO #endif ! END FUNCTION vmx_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_mumps_matn(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(mumps_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:,:) DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 INTEGER :: n, nrhs, i, j CHARACTER(len=6) :: matdescra ! n = mat%rank nrhs = SIZE(xarr,2) ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE matdescra = 'g' END IF ! CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,& & mat%cols, mat%irow(1), mat%irow(2), xarr, & & n, beta, yarr, n) #else yarr = 0.0d0 DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + mat%val(j)*xarr(i,:) END DO END IF END DO #endif ! END FUNCTION vmx_mumps_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_zmumps_matn(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(zmumps_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: xarr(:,:) DOUBLE COMPLEX :: yarr(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) INTEGER :: n, nrhs, i, j CHARACTER(len=6) :: matdescra ! n = mat%rank nrhs = SIZE(xarr,2) ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE IF(mat%nlherm) THEN matdescra = 'hun' ELSE matdescra = 'g' END IF ! CALL mkl_zcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, n, & & beta, yarr, n) #else yarr = (0.0d0,0.0d0) DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + mat%val(j)*xarr(i,:) END DO ELSE IF(mat%nlherm) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + CONJG(mat%val(j))*xarr(i,:) END DO END IF END DO #endif ! END FUNCTION vmx_zmumps_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_mumps_mat(mat) ! ! Deallocate the sparse matrix mat ! TYPE(mumps_mat) :: mat ! IF(ASSOCIATED(mat%mat)) THEN CALL destroy(mat%mat) DEALLOCATE(mat%mat) END IF ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ! IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc) mat%mumps_par%JOB = -2 CALL dmumps(mat%mumps_par) IF(mat%mumps_par%INFOG(1).NE.0) THEN WRITE(*,'(a,2i12)') 'DESTROY: Reordering failed with error', & & mat%mumps_par%INFOG(1:2) STOP END IF END SUBROUTINE destroy_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_zmumps_mat(mat) ! ! Deallocate the sparse matrix mat ! TYPE(zmumps_mat) :: mat ! IF(ASSOCIATED(mat%mat)) THEN CALL destroy(mat%mat) DEALLOCATE(mat%mat) END IF ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ! IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc) mat%mumps_par%JOB = -2 CALL zmumps(mat%mumps_par) IF(mat%mumps_par%INFOG(1).NE.0) THEN WRITE(*,'(a,2i12)') 'DESTROY: Reordering failed with error', & & mat%mumps_par%INFOG(1:2) STOP END IF END SUBROUTINE destroy_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE put_mumps_mat(fid, label, mat, str) ! ! Write matrix to hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(mumps_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str CHARACTER(len=128) :: mumps_grp ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'NNZ', mat%nnz) CALL attach(fid, label, 'NLSYM', mat%nlsym) CALL attach(fid, label, 'NLPOS', mat%nlpos) CALL putarr(fid, TRIM(label)//'/irow', mat%irow) CALL putarr(fid, TRIM(label)//'/cols', mat%cols) CALL putarr(fid, TRIM(label)//'/perm', mat%perm) CALL putarr(fid, TRIM(label)//'/val', mat%val) ! mumps_grp = TRIM(label)//'/mumps_par' CALL creatg(fid, mumps_grp) CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR) CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM) CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN) ! END SUBROUTINE put_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE put_zmumps_mat(fid, label, mat, str) ! ! Write matrix to hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(zmumps_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str CHARACTER(len=128) :: mumps_grp ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'NNZ', mat%nnz) CALL attach(fid, label, 'NLSYM', mat%nlsym) CALL attach(fid, label, 'NLPOS', mat%nlpos) CALL putarr(fid, TRIM(label)//'/irow', mat%irow) CALL putarr(fid, TRIM(label)//'/cols', mat%cols) CALL putarr(fid, TRIM(label)//'/val', mat%val) ! mumps_grp = TRIM(label)//'/mumps_par' CALL creatg(fid, mumps_grp) CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR) CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM) CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN) ! END SUBROUTINE put_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE get_mumps_mat(fid, label, mat) ! ! Read matrix from hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(mumps_mat) :: mat CHARACTER(len=128) :: mumps_grp ! CALL getatt(fid, label, 'RANK', mat%rank) CALL getatt(fid, label, 'NNZ', mat%nnz) CALL getatt(fid, label, 'NLSYM', mat%nlsym) CALL getarr(fid, TRIM(label)//'/irow', mat%irow) CALL getarr(fid, TRIM(label)//'/cols', mat%cols) CALL getarr(fid, TRIM(label)//'/perm', mat%perm) CALL getarr(fid, TRIM(label)//'/val', mat%val) ! mumps_grp = TRIM(label)//'/mumps_par' CALL getatt(fid, mumps_grp, 'PAR', mat%mumps_par%PAR) CALL getatt(fid, mumps_grp, 'SYM', mat%mumps_par%SYM) CALL getarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN) ! END SUBROUTINE get_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE get_zmumps_mat(fid, label, mat) ! ! Read matrix from hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(zmumps_mat) :: mat CHARACTER(len=128) :: mumps_grp ! CALL getatt(fid, label, 'RANK', mat%rank) CALL getatt(fid, label, 'NNZ', mat%nnz) CALL getatt(fid, label, 'NLSYM', mat%nlsym) CALL getarr(fid, TRIM(label)//'/irow', mat%irow) CALL getarr(fid, TRIM(label)//'/cols', mat%cols) CALL getarr(fid, TRIM(label)//'/perm', mat%perm) CALL getarr(fid, TRIM(label)//'/val', mat%val) ! mumps_grp = TRIM(label)//'/mumps_par' CALL getatt(fid, mumps_grp, 'PAR', mat%mumps_par%PAR) CALL getatt(fid, mumps_grp, 'SYM', mat%mumps_par%SYM) CALL getarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN) ! END SUBROUTINE get_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE mcopy_mumps_mat(mata, matb) ! ! Matrix copy: B = A (assume that B is already initialize) ! TYPE(mumps_mat) :: mata, matb INTEGER :: n, nnz, nnz_loc ! IF(ASSOCIATED(matb%mat)) THEN ! Sparse linled list not needed CALL destroy(matb%mat) DEALLOCATE(matb%mat) END IF ! n = mata%rank nnz = mata%nnz nnz_loc = mata%nnz_loc matb%nnz = nnz matb%nnz_loc = nnz_loc matb%nnz_start = mata%nnz_start matb%nnz_end = mata%nnz_end matb%istart = mata%istart matb%iend = mata%iend ! matb%mumps_par%NZ_loc = mata%mumps_par%NZ_loc ! IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) ALLOCATE(matb%val(nnz_loc)); matb%val = mata%val ALLOCATE(matb%cols(nnz_loc)); matb%cols = mata%cols ALLOCATE(matb%irow(matb%istart:matb%iend+1)); matb%irow = mata%irow ! ALLOCATE(matb%mumps_par%IRN_loc(nnz_loc)) matb%mumps_par%IRN_loc = mata%mumps_par%IRN_loc matb%mumps_par%A_loc => matb%val matb%mumps_par%JCN_loc => matb%cols END SUBROUTINE mcopy_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE mcopy_zmumps_mat(mata, matb) ! ! Matrix copy: B = A (assume that B is already initialize) ! TYPE(zmumps_mat) :: mata, matb INTEGER :: n, nnz, nnz_loc ! IF(ASSOCIATED(matb%mat)) THEN ! Sparse linled list not needed CALL destroy(matb%mat) DEALLOCATE(matb%mat) END IF ! n = mata%rank nnz = mata%nnz nnz_loc = mata%nnz_loc matb%nnz = nnz matb%nnz_loc = nnz_loc matb%nnz_start = mata%nnz_start matb%nnz_end = mata%nnz_end matb%istart = mata%istart matb%iend = mata%iend ! matb%mumps_par%NZ_loc = mata%mumps_par%NZ_loc ! IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) ALLOCATE(matb%val(nnz_loc)); matb%val = mata%val ALLOCATE(matb%cols(nnz_loc)); matb%cols = mata%cols ALLOCATE(matb%irow(matb%istart:matb%iend+1)); matb%irow = mata%irow ! ALLOCATE(matb%mumps_par%IRN_loc(nnz_loc)) matb%mumps_par%IRN_loc = mata%mumps_par%IRN_loc matb%mumps_par%A_loc => matb%val matb%mumps_par%JCN_loc => matb%cols END SUBROUTINE mcopy_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE maddto_mumps_mat(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(mumps_mat) :: mata, matb DOUBLE PRECISION :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE maddto_zmumps_mat(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(zmumps_mat) :: mata, matb DOUBLE COMPLEX :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE psum_mumps_mat(mat, comm) ! ! Parallel sum of sparse matrices ! INCLUDE "mpif.h" ! TYPE(mumps_mat) :: mat INCLUDE 'psum_mat.tpl' END SUBROUTINE psum_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE psum_zmumps_mat(mat, comm) ! ! Parallel sum of sparse matrices ! INCLUDE "mpif.h" ! TYPE(zmumps_mat) :: mat INCLUDE 'psum_mat.tpl' END SUBROUTINE psum_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE p2p_mumps_mat(mat, dest, extyp, op, comm) ! ! Point-to-point combine sparse matrix between 2 processes ! INCLUDE "mpif.h" ! TYPE(mumps_mat) :: mat DOUBLE PRECISION, ALLOCATABLE :: val(:) INTEGER :: mpi_type=MPI_DOUBLE_PRECISION ! INCLUDE "p2p_mat.tpl" END SUBROUTINE p2p_mumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE p2p_zmumps_mat(mat, dest, extyp, op, comm) ! ! Point-to-point combine sparse matrix between 2 processes ! INCLUDE "mpif.h" ! TYPE(zmumps_mat) :: mat DOUBLE COMPLEX, ALLOCATABLE :: val(:) INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX ! INCLUDE "p2p_mat.tpl" END SUBROUTINE p2p_zmumps_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE mumps_bsplines diff --git a/src/p2p_mat.tpl b/src/p2p_mat.tpl index c39e729..7d9e10e 100644 --- a/src/p2p_mat.tpl +++ b/src/p2p_mat.tpl @@ -1,119 +1,119 @@ !> !> @file p2p_mat.tpl !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> INTEGER, INTENT(in) :: dest CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv') CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt') INTEGER, INTENT(in) :: comm ! INTEGER :: ierr INTEGER :: nrank, nnz, nnz_rem INTEGER :: i, s, idx, bufsize, position CHARACTER(len=1), ALLOCATABLE :: sbuf(:), rbuf(:) INTEGER, ALLOCATABLE :: irow(:), cols(:) !-------------------------------------------------------------------------- ! 1.0 Prologue ! nrank = mat%rank nnz = get_count(mat) CALL mpi_sendrecv(nnz, 1, MPI_INTEGER, dest, 0, & & nnz_rem, 1, MPI_INTEGER, dest, 0, & & comm, MPI_STATUS_IGNORE, ierr) !-------------------------------------------------------------------------- ! 2.0 Send or sendrecv ! IF(extyp.EQ.'send' .OR. extyp.EQ.'sendrecv') THEN ! ! Allocate packed send buffer bufsize = 0 CALL mpi_pack_size(nrank+1, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s CALL mpi_pack_size(nnz, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s CALL mpi_pack_size(nnz, mpi_type, comm, s, ierr); bufsize=bufsize+s ALLOCATE(sbuf(bufsize)) ! ! Obtain matrix CSR arrays and pack CALL to_mat(mat, nlkeep=.TRUE.) position = 0 CALL mpi_pack(mat%irow, nrank+1, MPI_INTEGER, sbuf, bufsize, position, comm, ierr) CALL mpi_pack(mat%cols, nnz, MPI_INTEGER, sbuf, bufsize, position, comm, ierr) CALL mpi_pack(mat%val, nnz, mpi_type, sbuf, bufsize, position, comm, ierr) DEALLOCATE(mat%irow) DEALLOCATE(mat%cols) DEALLOCATE(mat%val) ! ! Communicate packed buffer IF(extyp.EQ.'send') THEN CALL mpi_send(sbuf, position, MPI_PACKED, dest, 0, comm, ierr) DEALLOCATE(sbuf) END IF END IF !-------------------------------------------------------------------------- ! 3.0 Sendrecv or recv ! IF(extyp.EQ.'recv' .OR. extyp.EQ.'sendrecv') THEN ! ! Allocate unpacked received buffer bufsize = 0 CALL mpi_pack_size(nrank+1, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s CALL mpi_pack_size(nnz_rem, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s CALL mpi_pack_size(nnz_rem, mpi_type, comm, s, ierr); bufsize=bufsize+s ALLOCATE(rbuf(bufsize)) ! ! Communicate packed buffer IF(extyp.EQ.'recv') THEN CALL mpi_recv(rbuf, bufsize, MPI_PACKED, dest, 0, comm, MPI_STATUS_IGNORE, ierr) ELSE IF(extyp.EQ.'sendrecv') THEN CALL mpi_sendrecv(sbuf, position, MPI_PACKED, dest, 0, & & rbuf, bufsize, MPI_PACKED, dest, 0, & & comm, MPI_STATUS_IGNORE, ierr) DEALLOCATE(sbuf) END IF ! ! Unpacked rbuf ALLOCATE(irow(nrank+1)) ALLOCATE(cols(nnz_rem)) ALLOCATE(val(nnz_rem)) position = 0 CALL mpi_unpack(rbuf, bufsize, position, irow, nrank+1, MPI_INTEGER, comm, ierr) CALL mpi_unpack(rbuf, bufsize, position, cols, nnz_rem, MPI_INTEGER, comm, ierr) CALL mpi_unpack(rbuf, bufsize, position, val, nnz_rem, mpi_type, comm, ierr) DEALLOCATE(rbuf) ! ! Update/replace sparse matrix DO i=1,nrank DO idx=irow(i),irow(i+1)-1 IF(op.EQ.'updt') THEN CALL updtmat(mat, i, cols(idx), val(idx)) ELSE IF(op.EQ.'put') THEN CALL putele(mat, i, cols(idx), val(idx)) END IF END DO END DO DEALLOCATE(irow) DEALLOCATE(cols) DEALLOCATE(val) ! END IF !-------------------------------------------------------------------------- diff --git a/src/pardiso_mod.f90 b/src/pardiso_mod.f90 index 4491b9c..b7fe14b 100644 --- a/src/pardiso_mod.f90 +++ b/src/pardiso_mod.f90 @@ -1,1605 +1,1605 @@ !> !> @file pardiso_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE pardiso_bsplines ! ! PARDISO_BSPLINES: Simple interface to the sparse direct solver PARDISO ! (MKL version). ! ! T.M. Tran, CRPP-EPFL ! November 2010 ! USE sparse IMPLICIT NONE ! TYPE pardiso_param INTEGER :: error, mtype, msglvl, phase, maxfct, mnum, nrhs INTEGER :: iparm(64) INTEGER*8 :: pt(64) END TYPE pardiso_param ! TYPE pardiso_mat INTEGER :: rank, nnz INTEGER :: nterms, kmat LOGICAL :: nlsym LOGICAL :: nlpos LOGICAL :: nlforce_zero TYPE(spmat), POINTER :: mat => NULL() INTEGER, POINTER :: cols(:) => NULL() INTEGER, POINTER :: irow(:) => NULL() INTEGER, POINTER :: perm(:) => NULL() DOUBLE PRECISION, POINTER :: val(:) => NULL() TYPE(pardiso_param) :: p END TYPE pardiso_mat ! TYPE zpardiso_mat INTEGER :: rank, nnz INTEGER :: nterms, kmat LOGICAL :: nlsym LOGICAL :: nlherm LOGICAL :: nlpos LOGICAL :: nlforce_zero TYPE(zspmat), POINTER :: mat => NULL() INTEGER, POINTER :: cols(:) => NULL() INTEGER, POINTER :: irow(:) => NULL() INTEGER, POINTER :: perm(:) => NULL() DOUBLE COMPLEX, POINTER :: val(:) => NULL() TYPE(pardiso_param) :: p END TYPE zpardiso_mat ! INTERFACE init MODULE PROCEDURE init_pardiso_mat, init_zpardiso_mat END INTERFACE init ! INTERFACE clear_mat MODULE PROCEDURE clear_pardiso_mat, clear_zpardiso_mat END INTERFACE clear_mat ! INTERFACE updtmat MODULE PROCEDURE updt_pardiso_mat, updt_zpardiso_mat END INTERFACE updtmat ! INTERFACE putele MODULE PROCEDURE putele_pardiso_mat, putele_zpardiso_mat END INTERFACE putele ! INTERFACE getele MODULE PROCEDURE getele_pardiso_mat, getele_zpardiso_mat END INTERFACE getele ! INTERFACE putrow MODULE PROCEDURE putrow_pardiso_mat, putrow_zpardiso_mat END INTERFACE putrow ! INTERFACE getrow MODULE PROCEDURE getrow_pardiso_mat, getrow_zpardiso_mat END INTERFACE getrow ! INTERFACE putcol MODULE PROCEDURE putcol_pardiso_mat, putcol_zpardiso_mat END INTERFACE putcol ! INTERFACE getcol MODULE PROCEDURE getcol_pardiso_mat, getcol_zpardiso_mat END INTERFACE getcol ! INTERFACE get_count MODULE PROCEDURE get_count_pardiso_mat, get_count_zpardiso_mat END INTERFACE get_count ! INTERFACE to_mat MODULE PROCEDURE to_pardiso_mat, to_zpardiso_mat END INTERFACE to_mat ! INTERFACE reord_mat MODULE PROCEDURE reord_pardiso_mat, reord_zpardiso_mat END INTERFACE reord_mat ! INTERFACE numfact MODULE PROCEDURE numfact_pardiso_mat, numfact_zpardiso_mat END INTERFACE numfact ! INTERFACE factor MODULE PROCEDURE factor_pardiso_mat, factor_zpardiso_mat END INTERFACE factor ! INTERFACE bsolve MODULE PROCEDURE bsolve_pardiso_mat1, bsolve_pardiso_matn, & & bsolve_zpardiso_mat1, bsolve_zpardiso_matn END INTERFACE bsolve ! INTERFACE vmx MODULE PROCEDURE vmx_pardiso_mat, vmx_pardiso_matn, & & vmx_zpardiso_mat, vmx_zpardiso_matn END INTERFACE vmx ! INTERFACE destroy MODULE PROCEDURE destroy_pardiso_mat, destroy_zpardiso_mat END INTERFACE destroy ! INTERFACE putmat MODULE PROCEDURE put_pardiso_mat, put_zpardiso_mat END INTERFACE putmat ! INTERFACE getmat MODULE PROCEDURE get_pardiso_mat, get_zpardiso_mat END INTERFACE getmat ! INTERFACE mcopy MODULE PROCEDURE mcopy_pardiso_mat, mcopy_zpardiso_mat END INTERFACE mcopy ! INTERFACE maddto MODULE PROCEDURE maddto_pardiso_mat, maddto_zpardiso_mat END INTERFACE maddto ! INTERFACE psum_mat MODULE PROCEDURE psum_pardiso_mat, psum_zpardiso_mat END INTERFACE psum_mat ! INTERFACE p2p_mat MODULE PROCEDURE p2p_pardiso_mat, p2p_zpardiso_mat END INTERFACE p2p_mat ! CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_pardiso_mat(n, nterms, mat, kmat, nlsym, nlpos, & & nlforce_zero) ! ! Initialize an empty sparse pardiso matrix ! INTEGER, INTENT(in) :: n, nterms TYPE(pardiso_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlpos LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero ! mat%rank = n mat%nterms = nterms mat%nnz = 0 mat%nlsym = .FALSE. mat%nlpos = .TRUE. mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val IF(PRESENT(kmat)) mat%kmat = kmat IF(PRESENT(nlsym)) mat%nlsym = nlsym IF(PRESENT(nlpos)) mat%nlpos = nlpos IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero ! IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) ALLOCATE(mat%mat) CALL init(n, mat%mat) ! mat%p%iparm = 0 CALL setup_pardiso(mat%p%iparm) mat%p%maxfct = 1 ! Max number of factorizations mat%p%mnum = 1 ! Actual matrix, shoild be 1<= num <= maxfct mat%p%error = 0 ! initialize error flag mat%p%msglvl = 1 ! print statistical information (0: no stat) IF(mat%nlsym) THEN IF(mat%nlpos) THEN mat%p%mtype = 2 ! symmetric, positive definite ELSE mat%p%mtype = -2 ! symmetric, indefinite END IF ELSE mat%p%mtype = 11 ! unsymmetric END IF mat%p%nrhs = 1 ! number of RHSs mat%p%pt(1:64) = 0 ! Initialize Pardiso address pointer (handle) ! CONTAINS SUBROUTINE setup_pardiso(iparm) INTEGER :: iparm(:) iparm(1) = 1 ! no solver default !!$ iparm(2) = 2 ! fill-in reordering from METIS iparm(2) = 0 ! Minimum degree fill-in reordering iparm(3) = 1 ! numbers of processors iparm(4) = 0 ! no iterative-direct algorithm iparm(5) = 0 ! no user fill-in reducing permutation iparm(6) = 0 ! =0 solution on the first n compoments of x iparm(7) = 0 ! not in use iparm(8) = 9 ! numbers of iterative refinement steps iparm(9) = 0 ! not in use iparm(10) = 13 ! perturbe the pivot elements with 1E-13 iparm(11) = 1 ! use nonsymmetric permutation and scaling MPS iparm(12) = 0 ! not in use iparm(13) = 0 ! not in use iparm(14) = 0 ! Output: number of perturbed pivots iparm(15) = 0 ! not in use iparm(16) = 0 ! not in use iparm(17) = 0 ! not in use iparm(18) = -1 ! Output: number of nonzeros in the factor LU iparm(19) = -1 ! Output: Mflops for LU factorization iparm(20) = 0 ! Output: Numbers of CG Iterations END SUBROUTINE setup_pardiso END SUBROUTINE init_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_zpardiso_mat(n, nterms, mat, kmat, nlsym, nlherm, & & nlpos, nlforce_zero) ! ! Initialize an empty sparse pardiso matrix ! INTEGER, INTENT(in) :: n, nterms TYPE(zpardiso_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlherm LOGICAL, OPTIONAL, INTENT(in) :: nlpos LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero ! mat%rank = n mat%nterms = nterms mat%nnz = 0 mat%nlsym = .FALSE. mat%nlherm = .FALSE. mat%nlpos = .TRUE. mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val IF(PRESENT(kmat)) mat%kmat = kmat IF(PRESENT(nlsym)) mat%nlsym = nlsym IF(PRESENT(nlherm)) mat%nlherm = nlherm IF(PRESENT(nlpos)) mat%nlpos = nlpos IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero ! IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) ALLOCATE(mat%mat) CALL init(n, mat%mat) ! mat%p%iparm = 0 CALL setup_pardiso(mat%p%iparm) mat%p%maxfct = 1 ! Max number of factorizations mat%p%mnum = 1 ! Actual matrix, shoild be 1<= num <= maxfct mat%p%error = 0 ! initialize error flag mat%p%msglvl = 1 ! print statistical information (0: no stat) IF(mat%nlherm) THEN IF(mat%nlpos) THEN mat%p%mtype = 4 ! hermitian, positive definite ELSE mat%p%mtype = -4 ! hermitian, indefinite END IF ELSE IF(mat%nlsym) THEN mat%p%mtype = 6 ! symmetric ELSE mat%p%mtype = 13 ! unsymmetric END IF mat%p%nrhs = 1 ! number of RHSs mat%p%pt(1:64) = 0 ! Initialize Pardiso address pointer (handle) ! CONTAINS SUBROUTINE setup_pardiso(iparm) INTEGER :: iparm(:) iparm(1) = 1 ! no solver default !!$ iparm(2) = 2 ! fill-in reordering from METIS iparm(2) = 0 ! Minimum degree fill-in reordering iparm(3) = 1 ! numbers of processors iparm(4) = 0 ! no iterative-direct algorithm iparm(5) = 0 ! no user fill-in reducing permutation iparm(6) = 0 ! =0 solution on the first n compoments of x iparm(7) = 0 ! not in use iparm(8) = 9 ! numbers of iterative refinement steps iparm(9) = 0 ! not in use iparm(10) = 13 ! perturbe the pivot elements with 1E-13 iparm(11) = 1 ! use nonsymmetric permutation and scaling MPS iparm(12) = 0 ! not in use iparm(13) = 0 ! not in use iparm(14) = 0 ! Output: number of perturbed pivots iparm(15) = 0 ! not in use iparm(16) = 0 ! not in use iparm(17) = 0 ! not in use iparm(18) = -1 ! Output: number of nonzeros in the factor LU iparm(19) = -1 ! Output: Mflops for LU factorization iparm(20) = 0 ! Output: Numbers of CG Iterations END SUBROUTINE setup_pardiso END SUBROUTINE init_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE clear_pardiso_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(pardiso_mat) :: mat ! mat%val = 0.0d0 mat%perm = 0 END SUBROUTINE clear_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE clear_zpardiso_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(zpardiso_mat) :: mat ! mat%val = (0.0d0, 0.0d0) mat%perm = 0 END SUBROUTINE clear_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_pardiso_mat(mat, i, j, val) ! ! Update element Aij of pardiso matrix ! TYPE(pardiso_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: k, s, e ! IF(mat%nlsym) THEN ! Store only upper part for symmetric matrices IF(i.GT.j) RETURN END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL updtmat(mat%mat, i, j, val) ELSE s = mat%irow(i) e = mat%irow(i+1)-1 k = isearch(mat%cols(s:e), j) IF( k.GE.0 ) THEN mat%val(s+k) = mat%val(s+k)+val ELSE WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' END IF END IF END SUBROUTINE updt_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_zpardiso_mat(mat, i, j, val) ! ! Update element Aij of pardiso matrix ! TYPE(zpardiso_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val INTEGER :: k, s, e ! IF(mat%nlherm .OR. mat%nlsym) THEN ! Store only upper part IF(i.GT.j) RETURN END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL updtmat(mat%mat, i, j, val) ELSE s = mat%irow(i) e = mat%irow(i+1)-1 k = isearch(mat%cols(s:e), j) IF( k.GE.0 ) THEN mat%val(s+k) = mat%val(s+k)+val ELSE WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' END IF END IF END SUBROUTINE updt_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_pardiso_mat(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(pardiso_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: iput, jput INTEGER :: k, s, e ! iput = i jput = j IF(mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iput = j jput = i END IF END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL putele(mat%mat, iput, jput, val, & & nlforce_zero=mat%nlforce_zero) ELSE s = mat%irow(iput) e = mat%irow(iput+1)-1 k = isearch(mat%cols(s:e), jput) IF( k.GE.0 ) THEN mat%val(s+k) = val ELSE IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' END IF END IF END IF END SUBROUTINE putele_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_zpardiso_mat(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(zpardiso_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val DOUBLE COMPLEX :: valput INTEGER :: iput, jput INTEGER :: k, s, e ! iput = i jput = j valput = val IF(mat%nlherm .OR. mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iput = j jput = i IF(mat%nlherm) THEN valput = CONJG(val) ELSE valput = val END IF END IF END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL putele(mat%mat, iput, jput, valput, & & nlforce_zero=mat%nlforce_zero) ELSE s = mat%irow(iput) e = mat%irow(iput+1)-1 k = isearch(mat%cols(s:e), jput) IF( k.GE.0 ) THEN mat%val(s+k) = valput ELSE IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' END IF END IF END IF END SUBROUTINE putele_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_pardiso_mat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(pardiso_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(out) :: val INTEGER :: iget, jget INTEGER :: k, s, e ! iget = i jget = j IF(mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iget = j jget = i END IF END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL getele(mat%mat, iget, jget, val) ELSE s = mat%irow(iget) e = mat%irow(iget+1)-1 k = isearch(mat%cols(s:e), jget) IF( k.GE.0 ) THEN val =mat%val(s+k) ELSE val = 0.0d0 ! Assume zero val if not found END IF END IF END SUBROUTINE getele_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_zpardiso_mat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(zpardiso_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(out) :: val DOUBLE COMPLEX :: valget INTEGER :: iget, jget INTEGER :: k, s, e ! iget = i jget = j IF(mat%nlherm .OR. mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iget = j jget = i END IF END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL getele(mat%mat, iget, jget, valget) ELSE s = mat%irow(iget) e = mat%irow(iget+1)-1 k = isearch(mat%cols(s:e), jget) IF( k.GE.0 ) THEN valget =mat%val(s+k) ELSE valget = (0.0d0,0.0d0) ! Assume zero val if not found END IF END IF val = valget IF( i.GT.j ) THEN IF(mat%nlherm) THEN val = CONJG(valget) END IF END IF END SUBROUTINE getele_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_pardiso_mat(amat, i, arr) ! ! Put a row into sparse matrix ! TYPE(pardiso_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL putele(amat, i, j, arr(j)) END DO END SUBROUTINE putrow_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_zpardiso_mat(amat, i, arr) ! ! Put a row into sparse matrix ! TYPE(zpardiso_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL putele(amat, i, j, arr(j)) END DO END SUBROUTINE putrow_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_pardiso_mat(amat, i, arr) ! ! Get a row from sparse matrix ! TYPE(pardiso_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL getele(amat, i, j, arr(j)) END DO END SUBROUTINE getrow_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_zpardiso_mat(amat, i, arr) ! ! Get a row from sparse matrix ! TYPE(zpardiso_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL getele(amat, i, j, arr(j)) END DO END SUBROUTINE getrow_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_pardiso_mat(amat, j, arr) ! ! Put a column into sparse matrix ! TYPE(pardiso_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: i ! DO i=1,amat%rank CALL putele(amat, i, j, arr(i)) END DO END SUBROUTINE putcol_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_zpardiso_mat(amat, j, arr) ! ! Put a column into sparse matrix ! TYPE(zpardiso_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER :: i ! DO i=1,amat%rank CALL putele(amat, i, j, arr(i)) END DO END SUBROUTINE putcol_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_pardiso_mat(amat, j, arr) ! ! Get a column from sparse matrix ! TYPE(pardiso_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: i ! DO i=1,amat%rank CALL getele(amat, i, j, arr(i)) END DO END SUBROUTINE getcol_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_zpardiso_mat(amat, j, arr) ! ! Get a column from sparse matrix ! TYPE(zpardiso_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER :: i ! DO i=1,amat%rank CALL getele(amat, i, j, arr(i)) END DO END SUBROUTINE getcol_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_pardiso_mat(mat, nnz) ! ! Number of non-zeros in sparse matrix ! TYPE(pardiso_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) INTEGER :: i ! IF(ASSOCIATED(mat%mat)) THEN get_count_pardiso_mat = get_count(mat%mat, nnz) ELSE get_count_pardiso_mat = mat%nnz IF(PRESENT(nnz)) THEN DO i=1,mat%rank nnz(i) = mat%irow(i+1)-mat%irow(i) END DO END IF END IF END FUNCTION get_count_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_zpardiso_mat(mat, nnz) ! ! Number of non-zeros in sparse matrix ! TYPE(zpardiso_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) INTEGER :: i ! IF(ASSOCIATED(mat%mat)) THEN get_count_zpardiso_mat = get_count(mat%mat, nnz) ELSE get_count_zpardiso_mat = mat%nnz IF(PRESENT(nnz)) THEN DO i=1,mat%rank nnz(i) = mat%irow(i+1)-mat%irow(i) END DO END IF END IF END FUNCTION get_count_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE to_pardiso_mat(mat, nlkeep) ! ! Convert linked list spmat to pardiso matrice structure ! TYPE(pardiso_mat) :: mat LOGICAL, INTENT(in), OPTIONAL :: nlkeep INTEGER :: i, nnz, rank, s, e LOGICAL :: nlclean ! nlclean = .TRUE. IF(PRESENT(nlkeep)) THEN nlclean = .NOT. nlkeep END IF ! ! Allocate the Pardiso matrix structure ! nnz = get_count(mat) rank = mat%rank mat%nnz = nnz IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%val(nnz)) ALLOCATE(mat%perm(rank)) ALLOCATE(mat%irow(rank+1)) ALLOCATE(mat%cols(nnz)) ! ! Fill Pardiso structure and optionnaly deallocate the sparse rows ! mat%irow = 1 DO i=1,rank mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz s = mat%irow(i) e = mat%irow(i+1)-1 CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) IF(nlclean) CALL destroy(mat%mat%row(i)) END DO IF(nlclean) DEALLOCATE(mat%mat) END SUBROUTINE to_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE to_zpardiso_mat(mat, nlkeep) ! ! Convert linked list spmat to pardiso matrice structure ! TYPE(zpardiso_mat) :: mat LOGICAL, INTENT(in), OPTIONAL :: nlkeep INTEGER :: i, nnz, rank, s, e LOGICAL :: nlclean ! nlclean = .TRUE. IF(PRESENT(nlkeep)) THEN nlclean = .NOT. nlkeep END IF ! ! Allocate the Pardiso matrix structure ! nnz = get_count(mat) rank = mat%rank mat%nnz = nnz IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%val(nnz)) ALLOCATE(mat%perm(rank)) ALLOCATE(mat%irow(rank+1)) ALLOCATE(mat%cols(nnz)) ! ! Fill Pardiso structure and deallocate the sparse rows ! mat%irow = 1 DO i=1,rank mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz s = mat%irow(i) e = mat%irow(i+1)-1 CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) IF(nlclean) CALL destroy(mat%mat%row(i)) END DO IF(nlclean) DEALLOCATE(mat%mat) END SUBROUTINE to_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE reord_pardiso_mat(mat, nlmetis, debug) ! ! Reordering and symbolic factorization ! TYPE(pardiso_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlmetis LOGICAL, OPTIONAL, INTENT(in) :: debug DOUBLE PRECISION :: dummy ! mat%p%iparm(2) = 0 ! use minimum degree algorithm IF(PRESENT(nlmetis)) THEN IF(nlmetis) mat%p%iparm(2) = 2 ! use METIS nested dissection END IF mat%p%iparm(5)= 2 ! return the permutation vector in mat%perm mat%p%phase = 11 ! Reordering and symbolic factorization mat%p%msglvl = 0 IF(PRESENT(debug)) THEN IF(debug) mat%p%msglvl = 1 END IF CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & dummy, dummy, mat%p%error) IF(mat%p%error.NE.0) THEN WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%error END IF END SUBROUTINE reord_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE reord_zpardiso_mat(mat, nlmetis, debug) ! ! Reordering and symbolic factorization ! TYPE(zpardiso_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlmetis LOGICAL, OPTIONAL, INTENT(in) :: debug DOUBLE COMPLEX :: dummy ! mat%p%iparm(2) = 0 ! use minimum degree algorithm IF(PRESENT(nlmetis)) THEN IF(nlmetis) mat%p%iparm(2) = 2 ! use METIS nested dissection END IF mat%p%iparm(5)= 2 ! return the permutation vector in mat%perm mat%p%phase = 11 ! Reordering and symbolic factorization mat%p%msglvl = 0 IF(PRESENT(debug)) THEN IF(debug) mat%p%msglvl = 1 END IF CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & dummy, dummy, mat%p%error) IF(mat%p%error.NE.0) THEN WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%error END IF END SUBROUTINE reord_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE numfact_pardiso_mat(mat, debug) ! ! Numerical factorization ! TYPE(pardiso_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: debug DOUBLE PRECISION :: dummy ! mat%p%phase = 22 ! Numerical factorization mat%p%msglvl = 0 IF(PRESENT(debug)) THEN IF(debug) mat%p%msglvl = 1 END IF CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & dummy, dummy, mat%p%error) IF(mat%p%error.NE.0) THEN WRITE(*,'(a,i4)') 'FACTOR: Factorization failed with error', mat%p%error END IF END SUBROUTINE numfact_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE numfact_zpardiso_mat(mat, debug) ! ! Numerical factorization ! TYPE(zpardiso_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: debug DOUBLE COMPLEX :: dummy ! mat%p%phase = 22 ! Numerical factorization mat%p%msglvl = 0 IF(PRESENT(debug)) THEN IF(debug) mat%p%msglvl = 1 END IF CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & dummy, dummy, mat%p%error) IF(mat%p%error.NE.0) THEN WRITE(*,'(a,i4)') 'FACTOR: Factorization failed with error', mat%p%error END IF END SUBROUTINE numfact_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE factor_pardiso_mat(mat, nlreord, nlmetis, debug) ! ! Factor (create +reorder + factor) a pardiso_mat matrix ! TYPE(pardiso_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlreord LOGICAL, OPTIONAL, INTENT(in) :: nlmetis LOGICAL, OPTIONAL, INTENT(in) :: debug LOGICAL :: mlreord !---------------------------------------------------------------------- ! 1.0 Creation from the sparse rows ! IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF !---------------------------------------------------------------------- ! 2.0 Reordering and symbolic factorization phase ! mlreord = .TRUE. IF(PRESENT(nlreord)) mlreord = nlreord IF(mlreord) THEN CALL reord_mat(mat, nlmetis, debug) END IF !---------------------------------------------------------------------- ! 3.0 Numerical factorization ! CALL numfact(mat, debug) END SUBROUTINE factor_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE factor_zpardiso_mat(mat, nlreord, nlmetis, debug) ! ! Factor (create +reorder + factor) a pardiso_mat matrix ! TYPE(zpardiso_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlreord LOGICAL, OPTIONAL, INTENT(in) :: nlmetis LOGICAL, OPTIONAL, INTENT(in) :: debug LOGICAL :: mlreord !---------------------------------------------------------------------- ! 1.0 Creation from the sparse rows ! IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF !---------------------------------------------------------------------- ! 2.0 Reordering and symbolic factorization phase ! mlreord = .TRUE. IF(PRESENT(nlreord)) mlreord = nlreord IF(mlreord) THEN CALL reord_mat(mat, nlmetis, debug) END IF !---------------------------------------------------------------------- ! 3.0 Numerical factorization ! CALL numfact(mat, debug) END SUBROUTINE factor_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_pardiso_mat1(mat, rhs, sol, nref, debug) ! ! Backsolve, using Pardiso ! TYPE(pardiso_mat) :: mat DOUBLE PRECISION :: rhs(:) DOUBLE PRECISION, OPTIONAL :: sol(:) INTEGER, OPTIONAL :: nref LOGICAL, OPTIONAL, INTENT(in) :: debug DOUBLE PRECISION :: dummy(SIZE(rhs)) ! mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) mat%p%iparm(8) = nref mat%p%phase = 33 ! Backsolve mat%p%nrhs = 1 mat%p%msglvl = 0 IF(PRESENT(debug)) THEN IF(debug) THEN mat%p%msglvl = 1 END IF END IF IF(PRESENT(sol)) THEN mat%p%iparm(6) = 0 CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & rhs, sol, mat%p%error) ELSE mat%p%iparm(6) = 1 CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & rhs, dummy, mat%p%error) END IF IF(mat%p%error.NE.0) THEN WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error END IF END SUBROUTINE bsolve_pardiso_mat1 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_zpardiso_mat1(mat, rhs, sol, nref, debug) ! ! Backsolve, using Pardiso ! TYPE(zpardiso_mat) :: mat DOUBLE COMPLEX :: rhs(:) DOUBLE COMPLEX, OPTIONAL :: sol(:) INTEGER, OPTIONAL :: nref LOGICAL, OPTIONAL, INTENT(in) :: debug DOUBLE COMPLEX :: dummy(SIZE(rhs)) ! mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) mat%p%iparm(8) = nref mat%p%phase = 33 ! Backsolve mat%p%nrhs = 1 mat%p%msglvl = 0 IF(PRESENT(debug)) THEN IF(debug) THEN mat%p%msglvl = 1 END IF END IF IF(PRESENT(sol)) THEN mat%p%iparm(6) = 0 CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & rhs, sol, mat%p%error) ELSE mat%p%iparm(6) = 1 CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & rhs, dummy, mat%p%error) END IF IF(mat%p%error.NE.0) THEN WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error END IF END SUBROUTINE bsolve_zpardiso_mat1 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_pardiso_matn(mat, rhs, sol, nref, debug) ! ! Backsolve, using Pardiso, multiple RHS ! TYPE(pardiso_mat) :: mat DOUBLE PRECISION :: rhs(:,:) DOUBLE PRECISION, OPTIONAL :: sol(:,:) INTEGER, OPTIONAL :: nref LOGICAL, OPTIONAL, INTENT(in) :: debug DOUBLE PRECISION :: dummy(SIZE(rhs,1),SIZE(rhs,2)) ! mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) mat%p%iparm(8) = nref mat%p%phase = 33 ! Backsolve mat%p%nrhs = SIZE(rhs,2) mat%p%msglvl = 0 IF(PRESENT(debug)) THEN IF(debug) THEN mat%p%msglvl = 1 END IF END IF IF(PRESENT(sol)) THEN mat%p%iparm(6) = 0 CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & rhs, sol, mat%p%error) ELSE mat%p%iparm(6) = 1 CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & rhs, dummy, mat%p%error) END IF IF(mat%p%error.NE.0) THEN WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error END IF END SUBROUTINE bsolve_pardiso_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_zpardiso_matn(mat, rhs, sol, nref, debug) ! ! Backsolve, using Pardiso, multiple RHS ! TYPE(zpardiso_mat) :: mat DOUBLE COMPLEX :: rhs(:,:) DOUBLE COMPLEX, OPTIONAL :: sol(:,:) INTEGER, OPTIONAL :: nref LOGICAL, OPTIONAL, INTENT(in) :: debug DOUBLE COMPLEX :: dummy(SIZE(rhs,1),SIZE(rhs,2)) ! mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) mat%p%iparm(8) = nref mat%p%phase = 33 ! Backsolve mat%p%nrhs = SIZE(rhs,2) mat%p%msglvl = 0 IF(PRESENT(debug)) THEN IF(debug) THEN mat%p%msglvl = 1 END IF END IF IF(PRESENT(sol)) THEN mat%p%iparm(6) = 0 CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & rhs, sol, mat%p%error) ELSE mat%p%iparm(6) = 1 CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & rhs, dummy, mat%p%error) END IF IF(mat%p%error.NE.0) THEN WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error END IF END SUBROUTINE bsolve_zpardiso_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_pardiso_mat(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(pardiso_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:) DOUBLE PRECISION :: yarr(SIZE(xarr)) DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 CHARACTER(len=6) :: matdescra INTEGER :: n, i, j ! n = mat%rank ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE matdescra = 'g' END IF ! CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, & & beta, yarr) #else yarr = 0.0d0 DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + mat%val(j)*xarr(i) END DO END IF END DO #endif ! END FUNCTION vmx_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_zpardiso_mat(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(zpardiso_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: xarr(:) DOUBLE COMPLEX :: yarr(SIZE(xarr)) DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) INTEGER :: n, i, j CHARACTER(len=6) :: matdescra ! n = mat%rank ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE IF(mat%nlherm) THEN matdescra = 'hun' ELSE matdescra = 'g' END IF CALL mkl_zcsrmv('N', n, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, & & beta, yarr) #else yarr = (0.0d0,0.0d0) DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + mat%val(j)*xarr(i) END DO ELSE IF(mat%nlherm) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + CONJG(mat%val(j))*xarr(i) END DO END IF END DO #endif ! END FUNCTION vmx_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_pardiso_matn(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(pardiso_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:,:) DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 INTEGER :: n, nrhs, i, j CHARACTER(len=6) :: matdescra ! n = mat%rank nrhs = SIZE(xarr,2) ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE matdescra = 'g' END IF ! CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,& & mat%cols, mat%irow(1), mat%irow(2), xarr, & & n, beta, yarr, n) #else yarr = 0.0d0 DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + mat%val(j)*xarr(i,:) END DO END IF END DO #endif ! END FUNCTION vmx_pardiso_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_zpardiso_matn(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(zpardiso_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: xarr(:,:) DOUBLE COMPLEX :: yarr(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) INTEGER :: n, nrhs, i, j CHARACTER(len=6) :: matdescra ! n = mat%rank nrhs = SIZE(xarr,2) ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE IF(mat%nlherm) THEN matdescra = 'hun' ELSE matdescra = 'g' END IF ! CALL mkl_zcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, n, & & beta, yarr, n) #else yarr = (0.0d0,0.0d0) DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + mat%val(j)*xarr(i,:) END DO ELSE IF(mat%nlherm) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + CONJG(mat%val(j))*xarr(i,:) END DO END IF END DO #endif ! END FUNCTION vmx_zpardiso_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_pardiso_mat(mat) ! ! Deallocate the sparse matrix mat ! TYPE(pardiso_mat) :: mat DOUBLE PRECISION :: dummy ! IF(ASSOCIATED(mat%mat)) THEN CALL destroy(mat%mat) DEALLOCATE(mat%mat) END IF ! IF(mat%p%phase .GT. 0) THEN mat%p%phase = 0 ! Release memory for factors CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & dummy, dummy, mat%p%error) IF(mat%p%error.NE.0) THEN WRITE(*,'(a,i4)') 'DESTROY: Mem release failed with error', mat%p%error END IF END IF ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) END SUBROUTINE destroy_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_zpardiso_mat(mat) ! ! Deallocate the sparse matrix mat ! TYPE(zpardiso_mat) :: mat DOUBLE COMPLEX :: dummy ! IF(ASSOCIATED(mat%mat)) THEN CALL destroy(mat%mat) DEALLOCATE(mat%mat) END IF ! IF(mat%p%phase .GT. 0) THEN mat%p%phase = 0 ! Release memory for factors CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & & dummy, dummy, mat%p%error) IF(mat%p%error.NE.0) THEN WRITE(*,'(a,i4)') 'DESTROY: Mem release failed with error', mat%p%error END IF END IF ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) END SUBROUTINE destroy_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE put_pardiso_mat(fid, label, mat, str) ! ! Write matrix to hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(pardiso_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'NNZ', mat%nnz) CALL attach(fid, label, 'NLSYM', mat%nlsym) CALL attach(fid, label, 'NLPOS', mat%nlpos) CALL putarr(fid, TRIM(label)//'/irow', mat%irow) CALL putarr(fid, TRIM(label)//'/cols', mat%cols) CALL putarr(fid, TRIM(label)//'/perm', mat%perm) CALL putarr(fid, TRIM(label)//'/val', mat%val) ! CALL creatg(fid, TRIM(label)//'/p') CALL attach(fid, TRIM(label)//'/p', 'error', mat%p%error) CALL attach(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype) CALL attach(fid, TRIM(label)//'/p', 'phase', mat%p%phase) CALL attach(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl) CALL attach(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct) CALL attach(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum) CALL attach(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs) CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) END SUBROUTINE put_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE put_zpardiso_mat(fid, label, mat, str) ! ! Write matrix to hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(zpardiso_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'NNZ', mat%nnz) CALL attach(fid, label, 'NLSYM', mat%nlsym) CALL attach(fid, label, 'NLPOS', mat%nlpos) CALL attach(fid, label, 'NLHERM', mat%nlherm) CALL putarr(fid, TRIM(label)//'/irow', mat%irow) CALL putarr(fid, TRIM(label)//'/cols', mat%cols) CALL putarr(fid, TRIM(label)//'/perm', mat%perm) CALL putarr(fid, TRIM(label)//'/val', mat%val) ! CALL creatg(fid, TRIM(label)//'/p') CALL attach(fid, TRIM(label)//'/p', 'error', mat%p%error) CALL attach(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype) CALL attach(fid, TRIM(label)//'/p', 'phase', mat%p%phase) CALL attach(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl) CALL attach(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct) CALL attach(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum) CALL attach(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs) CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) END SUBROUTINE put_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE get_pardiso_mat(fid, label, mat) ! ! Read matrix from hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(pardiso_mat) :: mat ! CALL getatt(fid, label, 'RANK', mat%rank) CALL getatt(fid, label, 'NNZ', mat%nnz) CALL getatt(fid, label, 'NLSYM', mat%nlsym) CALL getarr(fid, TRIM(label)//'/irow', mat%irow) CALL getarr(fid, TRIM(label)//'/cols', mat%cols) CALL getarr(fid, TRIM(label)//'/perm', mat%perm) CALL getarr(fid, TRIM(label)//'/val', mat%val) ! CALL getatt(fid, TRIM(label)//'/p', 'error', mat%p%error) CALL getatt(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype) CALL getatt(fid, TRIM(label)//'/p', 'phase', mat%p%phase) CALL getatt(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl) CALL getatt(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct) CALL getatt(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum) CALL getatt(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs) CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) END SUBROUTINE get_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE get_zpardiso_mat(fid, label, mat) ! ! Read matrix from hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(zpardiso_mat) :: mat ! CALL getatt(fid, label, 'RANK', mat%rank) CALL getatt(fid, label, 'NNZ', mat%nnz) CALL getatt(fid, label, 'SYM', mat%nlsym) CALL getatt(fid, label, 'HERM', mat%nlherm) CALL getarr(fid, TRIM(label)//'/irow', mat%irow) CALL getarr(fid, TRIM(label)//'/cols', mat%cols) CALL getarr(fid, TRIM(label)//'/perm', mat%perm) CALL getarr(fid, TRIM(label)//'/val', mat%val) ! CALL getatt(fid, TRIM(label)//'/p', 'error', mat%p%error) CALL getatt(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype) CALL getatt(fid, TRIM(label)//'/p', 'phase', mat%p%phase) CALL getatt(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl) CALL getatt(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct) CALL getatt(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum) CALL getatt(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs) CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) END SUBROUTINE get_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE mcopy_pardiso_mat(mata, matb) ! ! Matrix copy: B = A ! TYPE(pardiso_mat) :: mata, matb INTEGER :: n, nnz ! ! Assume that matb was already initialized by init_wsmp_mat. IF(matb%rank.LE.0) THEN WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' END IF ! IF(ASSOCIATED(matb%mat)) THEN CALL destroy(matb%mat) DEALLOCATE(matb%mat) END IF ! n = mata%rank nnz = mata%nnz matb%rank = n matb%nnz = nnz matb%nlsym = mata%nlsym matb%nlpos = mata%nlpos matb%nlforce_zero = mata%nlforce_zero ! IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) ALLOCATE(matb%val(nnz)); matb%val = mata%val ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow ALLOCATE(matb%perm(n)) END SUBROUTINE mcopy_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE mcopy_zpardiso_mat(mata, matb) ! ! Matrix copy: B = A ! TYPE(zpardiso_mat) :: mata, matb INTEGER :: n, nnz ! ! Assume that matb was already initialized by init_wsmp_mat. IF(matb%rank.LE.0) THEN WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' END IF ! IF(ASSOCIATED(matb%mat)) THEN CALL destroy(matb%mat) DEALLOCATE(matb%mat) END IF ! n = mata%rank nnz = mata%nnz matb%rank = n matb%nnz = nnz matb%nlsym = mata%nlsym matb%nlherm = mata%nlherm matb%nlpos = mata%nlpos matb%nlforce_zero = mata%nlforce_zero ! IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) ALLOCATE(matb%val(nnz)); matb%val = mata%val ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow ALLOCATE(matb%perm(n)) END SUBROUTINE mcopy_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE maddto_pardiso_mat(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(pardiso_mat) :: mata, matb DOUBLE PRECISION :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE maddto_zpardiso_mat(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(zpardiso_mat) :: mata, matb DOUBLE COMPLEX :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE psum_pardiso_mat(mat, comm) ! ! Parallel sum of sparse matrices ! INCLUDE "mpif.h" ! TYPE(pardiso_mat) :: mat INCLUDE 'psum_mat.tpl' END SUBROUTINE psum_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE psum_zpardiso_mat(mat, comm) ! ! Parallel sum of sparse matrices ! INCLUDE "mpif.h" ! TYPE(zpardiso_mat) :: mat INCLUDE 'psum_mat.tpl' END SUBROUTINE psum_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE p2p_pardiso_mat(mat, dest, extyp, op, comm) ! ! Point-to-point combine sparse matrix between 2 processes ! INCLUDE "mpif.h" ! TYPE(pardiso_mat) :: mat DOUBLE PRECISION, ALLOCATABLE :: val(:) INTEGER :: mpi_type=MPI_DOUBLE_PRECISION ! INCLUDE "p2p_mat.tpl" END SUBROUTINE p2p_pardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE p2p_zpardiso_mat(mat, dest, extyp, op, comm) ! ! Point-to-point combine sparse matrix between 2 processes ! INCLUDE "mpif.h" ! TYPE(zpardiso_mat) :: mat DOUBLE COMPLEX, ALLOCATABLE :: val(:) INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX ! INCLUDE "p2p_mat.tpl" END SUBROUTINE p2p_zpardiso_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pardiso_bsplines diff --git a/src/petsc_mod.F90 b/src/petsc_mod.F90 index 9909f17..acd8d8f 100644 --- a/src/petsc_mod.F90 +++ b/src/petsc_mod.F90 @@ -1,873 +1,873 @@ !> !> @file petsc_mod.F90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE petsc_bsplines ! ! PETSC_BSPLINES: Simple interface to the parallel iterative ! solver PETSC ! ! T.M. Tran, CRPP-EPFL ! June 2011 ! USE sparse IMPLICIT NONE #include "finclude/petsc.h90" ! TYPE petsc_mat INTEGER :: rank INTEGER(8) :: nnz, nnz_loc INTEGER :: nterms, kmat INTEGER :: istart, iend INTEGER, POINTER :: rcounts(:) => NULL() INTEGER, POINTER :: rdispls(:) => NULL() INTEGER :: comm LOGICAL :: nlsym LOGICAL :: nlforce_zero TYPE(spmat), POINTER :: mat => NULL() INTEGER, POINTER :: cols(:) => NULL() INTEGER, POINTER :: irow(:) => NULL() DOUBLE PRECISION, POINTER :: val(:) => NULL() ! Mat :: AMAT KSP :: SOLVER END TYPE petsc_mat ! INTERFACE init MODULE PROCEDURE init_petsc_mat END INTERFACE init ! INTERFACE clear_mat MODULE PROCEDURE clear_petsc_mat END INTERFACE clear_mat ! INTERFACE updtmat MODULE PROCEDURE updt_petsc_mat END INTERFACE updtmat ! INTERFACE putele MODULE PROCEDURE putele_petsc_mat END INTERFACE putele ! INTERFACE getele MODULE PROCEDURE getele_petsc_mat END INTERFACE getele ! INTERFACE putrow MODULE PROCEDURE putrow_petsc_mat END INTERFACE putrow ! INTERFACE getrow MODULE PROCEDURE getrow_petsc_mat END INTERFACE getrow ! INTERFACE putcol MODULE PROCEDURE putcol_petsc_mat END INTERFACE putcol ! INTERFACE getcol MODULE PROCEDURE getcol_petsc_mat END INTERFACE getcol ! INTERFACE get_count MODULE PROCEDURE get_count_petsc_mat END INTERFACE get_count ! INTERFACE to_mat MODULE PROCEDURE to_petsc_mat END INTERFACE to_mat ! INTERFACE save_mat MODULE PROCEDURE save_petsc_mat END INTERFACE save_mat ! INTERFACE load_mat MODULE PROCEDURE load_petsc_mat END INTERFACE load_mat ! INTERFACE bsolve MODULE PROCEDURE bsolve_petsc_mat1, bsolve_petsc_matn END INTERFACE bsolve ! INTERFACE vmx MODULE PROCEDURE vmx_petsc_mat, vmx_petsc_matn END INTERFACE vmx ! INTERFACE destroy MODULE PROCEDURE destroy_petsc_mat END INTERFACE destroy ! INTERFACE mcopy MODULE PROCEDURE mcopy_petsc_mat END INTERFACE mcopy ! INTERFACE maddto MODULE PROCEDURE maddto_petsc_mat END INTERFACE maddto ! CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_petsc_mat(n, nterms, mat, kmat, nlsym, & & nlforce_zero, comm) ! ! Initialize an empty sparse petsc matrix ! USE pputils2 INCLUDE 'mpif.h' ! INTEGER, INTENT(in) :: n, nterms TYPE(petsc_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero INTEGER, OPTIONAL, INTENT(in) :: comm ! INTEGER :: me, npes INTEGER :: i, ierr, nloc PetscBool :: flg !!$ PetscTruth :: flg ! Petsc version before 3.2 ! ! Prologue ! CALL mpi_comm_size(comm, npes, ierr) CALL mpi_comm_rank(comm, me, ierr) ! mat%rank = n mat%nterms = nterms mat%nnz = 0 mat%nlsym = .FALSE. mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val IF(PRESENT(kmat)) mat%kmat = kmat IF(PRESENT(nlsym)) mat%nlsym = nlsym IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero ! ! Inititialize the PETSC environment ! IF(PRESENT(comm)) THEN PETSC_COMM_WORLD = comm ELSE ! Single process Petsc PETSC_COMM_WORLD = MPI_COMM_SELF END IF CALL PetscInitialize(PETSC_NULL_CHARACTER, ierr) mat%comm = PETSC_COMM_WORLD ! ! Matrix partition ! CALL dist1d(mat%comm, 1, n, mat%istart, nloc) mat%iend = mat%istart + nloc - 1 ! IF(ASSOCIATED(mat%rcounts)) DEALLOCATE(mat%rcounts) IF(ASSOCIATED(mat%rdispls)) DEALLOCATE(mat%rdispls) ALLOCATE(mat%rcounts(0:npes-1)) ALLOCATE(mat%rdispls(0:npes-1)) CALL mpi_allgather(nloc, 1, MPI_INTEGER, mat%rcounts, 1, MPI_INTEGER, & & mat%comm, ierr) mat%rdispls(0) = 0 DO i=1,npes-1 mat%rdispls(i) = mat%rdispls(i-1)+mat%rcounts(i-1) END DO ! ! Initialize linked list for sparse matrix ! IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) ALLOCATE(mat%mat) CALL init(n, mat%mat, mat%istart, mat%iend) ! ! Create PETSC matrix ! CALL MatCreate(mat%comm, mat%AMAT, ierr) CALL MatSetSizes(mat%AMAT, nloc, nloc, n, n, ierr) CALL MatSetFromOptions(mat%AMAT, ierr) ! ! Create PETSC SOLVER ! CALL KSPCreate(mat%comm, mat%SOLVER, ierr) ! END SUBROUTINE init_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE clear_petsc_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(petsc_mat) :: mat ! IF(ASSOCIATED(mat%val)) THEN mat%val = 0.0d0 ELSE CALL MatZeroEntries(mat%AMAT) END IF END SUBROUTINE clear_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_petsc_mat(mat, i, j, val) ! ! Update element Aij of petsc matrix ! TYPE(petsc_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: ierr ! IF(mat%nlsym) THEN ! Store only upper part for symmetric matrices IF(i.GT.j) RETURN END IF IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j WRITE(*,'(a,2i6)') ' istart, iend ', mat%istart, mat%iend STOP '*** Abnormal EXIT in MODULE mumps_mod ***' END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL updtmat(mat%mat, i, j, val) ELSE CALL MatSetValue(mat%AMAT, i-1, j-1, ADD_VALUES, ierr) END IF END SUBROUTINE updt_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_petsc_mat(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(petsc_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: iput, jput INTEGER :: ierr ! iput = i jput = j IF(mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iput = j jput = i END IF END IF ! ! Do nothing if outside IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN ! IF(ASSOCIATED(mat%mat)) THEN CALL putele(mat%mat, iput, jput, val, & & nlforce_zero=mat%nlforce_zero) ELSE CALL MatSetValue(mat%AMAT, iput-1, jput-1, val, INSERT_VALUES, ierr) END IF END SUBROUTINE putele_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_petsc_mat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(petsc_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(out) :: val INTEGER :: iget, jget INTEGER :: ierr ! iget = i jget = j IF(mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iget = j jget = i END IF END IF ! val = 0.0d0 ! Assume zero val if outside IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN ! IF(ASSOCIATED(mat%mat)) THEN CALL getele(mat%mat, iget, jget, val) ELSE CALL MatGetValues(mat%AMAT, 1, iget-1, 1, jget-1, val, ierr) END IF END SUBROUTINE getele_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_petsc_mat(amat, i, arr, cols) ! ! Put a row into sparse matrix ! TYPE(petsc_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER, INTENT(in), OPTIONAL :: cols(:) INTEGER :: j ! IF(i.GT.amat%iend .OR. i.LT.amat%istart) RETURN ! Do nothing ! IF(PRESENT(cols)) THEN DO j=1,SIZE(cols) CALL putele(amat, i, cols(j), arr(j)) END DO ELSE DO j=1,amat%rank CALL putele(amat, i, j, arr(j)) END DO END IF END SUBROUTINE putrow_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_petsc_mat(amat, i, arr) ! ! Get a row from sparse matrix ! TYPE(petsc_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: j, ierr INTEGER :: ncols, cols(amat%rank) DOUBLE PRECISION :: vals(amat%rank) ! arr = 0.0d0 IF(i.GT.amat%iend .OR. i.LT.amat%istart) RETURN ! return 0 if outside IF(ASSOCIATED(amat%mat)) THEN DO j=1,amat%rank CALL getele(amat%mat, i, j, arr(j)) END DO ELSE CALL MatGetRow(amat%AMAT, i-1, ncols, cols, vals, ierr) ! 0-based array DO j=1,ncols arr(cols(j)+1) = vals(j) END DO CALL MatRestoreRow(amat%AMAT, i-1, ncols, cols, vals, ierr) END IF END SUBROUTINE getrow_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_petsc_mat(amat, j, arr) ! ! Put a column into sparse matrix ! TYPE(petsc_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: i ! DO i=amat%istart,amat%iend CALL putele(amat, i, j, arr(i)) END DO END SUBROUTINE putcol_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_petsc_mat(amat, j, arr) ! ! Get a column from sparse matrix ! TYPE(petsc_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: i ! arr = 0.0d0 DO i=amat%istart,amat%iend CALL getele(amat, i, j, arr(i)) END DO END SUBROUTINE getcol_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_petsc_mat(mat, nnz) ! ! Number of non-zeros in sparse matrix ! TYPE(petsc_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) INTEGER :: i, ierr DOUBLE PRECISION :: info(MAT_INFO_SIZE) ! IF(ASSOCIATED(mat%mat)) THEN get_count_petsc_mat = get_count(mat%mat, nnz) ELSE CALL MatGetInfo(mat%AMAT, MAT_LOCAL, info, ierr) get_count_petsc_mat = info(MAT_INFO_NZ_ALLOCATED) !!$ IF(PRESENT(nnz)) THEN !!$ DO i=1,mat%rank !!$ nnz(i) = mat%irow(i+1)-mat%irow(i) !!$ END DO !!$ END IF END IF END FUNCTION get_count_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE to_petsc_mat(mat, nlkeep) ! ! Convert linked list spmat to petsc matrice structure ! TYPE(petsc_mat) :: mat LOGICAL, INTENT(in), OPTIONAL :: nlkeep ! INTEGER :: me, i, j, jj, nnz, rank, s, e INTEGER :: istart, iend INTEGER :: iloc, k1, k2, ncol INTEGER :: d_nz, d_nnz(mat%istart:mat%iend) INTEGER :: o_nz, o_nnz(mat%istart:mat%iend) INTEGER :: comm, ierr LOGICAL :: nlclean ! nlclean = .TRUE. IF(PRESENT(nlkeep)) THEN nlclean = .NOT. nlkeep END IF ! comm = mat%comm CALL mpi_comm_rank(comm, me, ierr) ! ! Allocate the Petsc matrix structure ! rank = mat%rank mat%nnz_loc = get_count(mat) istart = mat%istart iend = mat%iend CALL mpi_allreduce(mat%nnz_loc, mat%nnz, 1, MPI_INTEGER8, MPI_SUM, comm, ierr) ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%val(mat%nnz_loc)) ALLOCATE(mat%cols(mat%nnz_loc)) ALLOCATE(mat%irow(mat%istart:mat%iend+1)) ! ! Get Sparse structure from linked list ! d_nnz(:) = 0 o_nnz(:) = 0 mat%irow(istart) = 1 DO i=istart,iend mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz s = mat%irow(i) e = mat%irow(i+1)-1 CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) d_nnz(i) = COUNT(mat%cols(s:e) .GE. istart .AND. & & mat%cols(s:e) .LE. iend) o_nnz(i) = mat%mat%row(i)%nnz - d_nnz(i) IF(nlclean) CALL destroy(mat%mat%row(i)) END DO IF(nlclean) DEALLOCATE(mat%mat) ! ! Petsc matrix preallocation ! CALL MatMPIAIJSetPreallocation(mat%AMAT, PETSC_NULL_INTEGER, & & d_nnz, PETSC_NULL_INTEGER, o_nnz, ierr) CALL MatSeqAIJSetPreallocation(mat%AMAT, PETSC_NULL_INTEGER, & & d_nnz, ierr) ! ! Petsc matrix assembly ! mat%cols = mat%cols-1 ! Start column index = 0 DO i=istart,iend iloc = i-istart+1 k1 = mat%irow(i) k2 = mat%irow(i+1) ncol = k2-k1 CALL MatSetValues(mat%AMAT, 1, i-1, ncol, mat%cols(k1:k2-1), & & mat%val(k1:k2-1), INSERT_VALUES, ierr) END DO ! CALL MatAssemblyBegin(mat%AMAT, MAT_FINAL_ASSEMBLY ,ierr) CALL MatAssemblyEnd(mat%AMAT, MAT_FINAL_ASSEMBLY, ierr) ! DEALLOCATE(mat%irow) DEALLOCATE(mat%cols) DEALLOCATE(mat%val) ! END SUBROUTINE to_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE save_petsc_mat(mat, file) ! ! Save matrix in PETSC binary format ! TYPE(petsc_mat) :: mat CHARACTER(len=*), INTENT(in) :: file ! INTEGER :: ierr PetscViewer :: viewer ! CALL PetscViewerBinaryOpen(mat%comm, TRIM(file), FILE_MODE_WRITE,& & viewer, ierr) CALL MatView(mat%AMAT, viewer, ierr) CALL PetscViewerDestroy(viewer, ierr) ! END SUBROUTINE save_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE load_petsc_mat(mat, file) ! ! Load matrix in PETSC binary format ! TYPE(petsc_mat) :: mat CHARACTER(len=*), INTENT(in) :: file ! INTEGER :: nloc, i, npes, ierr PetscViewer :: viewer ! CALL mpi_comm_size(mat%comm, npes, ierr) ! ! Clean up unneeded sparse matrix ! IF(ASSOCIATED(mat%mat)) THEN CALL destroy(mat%mat) DEALLOCATE(mat%mat) END IF ! ! Load matrix from file ! CALL PetscViewerBinaryOpen(mat%comm, TRIM(file), FILE_MODE_READ,& & viewer, ierr) CALL MatLoad(mat%AMAT, viewer, ierr) CALL PetscViewerDestroy(viewer, ierr) ! ! Some mat info ! CALL MatGetSize(mat%AMAT, mat%rank, PETSC_NULL_INTEGER, ierr) mat%nnz_loc = get_count(mat) CALL mpi_allreduce(mat%nnz_loc, mat%nnz, 1, MPI_INTEGER8, MPI_SUM, & & mat%comm, ierr) ! ! ! Recompute matrix partition from loaded matrix ! CALL MatGetOwnershipRange(mat%AMAT, mat%istart, mat%iend, ierr) mat%istart = mat%istart+1 ! Convert from Petsc definition nloc = mat%iend - mat%istart + 1 CALL mpi_allgather(nloc, 1, MPI_INTEGER, mat%rcounts, 1, MPI_INTEGER, & & mat%comm, ierr) mat%rdispls(0) = 0 DO i=1,npes-1 mat%rdispls(i) = mat%rdispls(i-1)+mat%rcounts(i-1) END DO ! END SUBROUTINE load_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_petsc_mat1(mat, rhs, sol, rtol_in, nitmax_in, nits) ! ! Backsolve, using Petsc ! TYPE(petsc_mat) :: mat DOUBLE PRECISION, INTENT(inout) :: rhs(:) DOUBLE PRECISION, INTENT(out), OPTIONAL :: sol(:) DOUBLE PRECISION, INTENT(in), OPTIONAL :: rtol_in INTEGER, INTENT(in), OPTIONAL :: nitmax_in INTEGER, INTENT(out), OPTIONAL :: nits ! DOUBLE PRECISION :: rtol=PETSC_DEFAULT_DOUBLE_PRECISION INTEGER :: nitmax=PETSC_DEFAULT_INTEGER INTEGER :: i, istart, iend, nrank_loc, nrank INTEGER :: npes, me, ierr INTEGER :: idx(mat%istart:mat%iend) ! Vec :: vec_rhs, vec_sol PetscScalar :: scal PetscScalar, POINTER :: psol_loc(:) KSPConvergedReason :: reason ! CALL mpi_comm_size(mat%comm, npes, ierr) CALL mpi_comm_rank(mat%comm, me, ierr) ! istart = mat%istart iend = mat%iend nrank_loc = iend-istart+1 nrank = mat%rank idx = (/ (i, i=istart,iend) /) - 1 ! 0-based petsc vector ! ! Create Vectors ! CALL VecCreate(mat%comm, vec_rhs, ierr) CALL VecSetSizes(vec_rhs, nrank_loc, nrank, ierr) CALL VecSetFromOptions(vec_rhs, ierr) CALL VecDuplicate(vec_rhs, vec_sol, ierr) ! ! Set solver ! IF(PRESENT(rtol_in)) rtol = rtol_in IF(PRESENT(nitmax_in)) nitmax = nitmax_in ! CALL KSPSetOperators(mat%SOLVER, mat%AMAT, mat%AMAT, SAME_PRECONDITIONER, ierr) CALL KSPSetTolerances(mat%SOLVER, rtol, PETSC_DEFAULT_DOUBLE_PRECISION,& & PETSC_DEFAULT_DOUBLE_PRECISION, nitmax, ierr) CALL KSPSetFromOptions(mat%SOLVER, ierr) ! ! Set RHS ! CALL VecSetValues(vec_rhs, nrank_loc, idx, rhs(istart), INSERT_VALUES, ierr) CALL VecAssemblyBegin(vec_rhs, ierr) CALL VecAssemblyEnd(vec_rhs, ierr) ! CALL KSPSolve(mat%SOLVER, vec_rhs, vec_sol, ierr) CALL KSPGetConvergedReason(mat%SOLVER, reason, ierr) IF(reason .LT. 0) THEN IF(me.EQ.0) WRITE(*,'(a,i4)') 'BSOLVE: diverges with reason', reason END IF IF(PRESENT(nits)) THEN CALL KSPGetIterationNumber(mat%SOLVER, nits, ierr) END IF ! ! Get global solutions on all MPI processes ! CALL VecGetArrayF90(vec_sol, psol_loc, ierr) ! IF(PRESENT(sol)) THEN CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, & & sol, mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, & & mat%comm, ierr) ELSE CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, & & rhs, mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, & & mat%comm, ierr) END IF ! CALL VecRestoreArrayF90(vec_sol, psol_loc, ierr) END SUBROUTINE bsolve_petsc_mat1 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_petsc_matn(mat, rhs, sol, rtol_in, nitmax_in, nits) ! ! Backsolve, using Petsc, multiple RHS ! TYPE(petsc_mat) :: mat DOUBLE PRECISION :: rhs(:,:) DOUBLE PRECISION, OPTIONAL :: sol(:,:) DOUBLE PRECISION, INTENT(in), OPTIONAL :: rtol_in INTEGER, INTENT(in), OPTIONAL :: nitmax_in INTEGER, INTENT(out), OPTIONAL :: nits(:) ! DOUBLE PRECISION :: rtol=PETSC_DEFAULT_DOUBLE_PRECISION INTEGER :: nitmax=PETSC_DEFAULT_INTEGER INTEGER :: j, nrhs INTEGER :: i, istart, iend, nrank_loc, nrank INTEGER :: npes, me, ierr INTEGER :: idx(mat%istart:mat%iend) ! Vec :: vec_rhs, vec_sol PetscScalar :: scal PetscScalar, POINTER :: psol_loc(:) KSPConvergedReason :: reason ! CALL mpi_comm_size(mat%comm, npes, ierr) CALL mpi_comm_rank(mat%comm, me, ierr) ! nrhs = SIZE(rhs,2) istart = mat%istart iend = mat%iend nrank_loc = iend-istart+1 nrank = mat%rank idx = (/ (i, i=istart,iend) /) - 1 ! 0-based petsc vector ! ! Create Vectors ! CALL VecCreate(mat%comm, vec_rhs, ierr) CALL VecSetSizes(vec_rhs, nrank_loc, nrank, ierr) CALL VecSetFromOptions(vec_rhs, ierr) CALL VecDuplicate(vec_rhs, vec_sol, ierr) ! ! Set solver ! IF(PRESENT(rtol_in)) rtol = rtol_in IF(PRESENT(nitmax_in)) nitmax = nitmax_in ! CALL KSPSetOperators(mat%SOLVER, mat%AMAT, mat%AMAT, SAME_PRECONDITIONER, ierr) CALL KSPSetTolerances(mat%SOLVER, rtol, PETSC_DEFAULT_DOUBLE_PRECISION,& & PETSC_DEFAULT_DOUBLE_PRECISION, nitmax, ierr) CALL KSPSetFromOptions(mat%SOLVER, ierr) ! ! Set RHS ! DO j=1,nrhs CALL VecSetValues(vec_rhs, nrank_loc, idx, rhs(istart,j), INSERT_VALUES, ierr) CALL VecAssemblyBegin(vec_rhs, ierr) CALL VecAssemblyEnd(vec_rhs, ierr) ! CALL KSPSolve(mat%SOLVER, vec_rhs, vec_sol, ierr) CALL KSPGetConvergedReason(mat%SOLVER, reason, ierr) IF(reason .LT. 0) THEN IF(me.EQ.0) THEN WRITE(*,'(a,i4,a,i8)') 'BSOLVE: diverges with reason', reason, & & ' for j =', j END IF END IF IF(PRESENT(nits)) THEN CALL KSPGetIterationNumber(mat%SOLVER, nits(j), ierr) END IF ! ! Get global solutions on all MPI processes ! CALL VecGetArrayF90(vec_sol, psol_loc, ierr) ! IF(PRESENT(sol)) THEN CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, & & sol(1,j), mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, & & mat%comm, ierr) ELSE CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, & & rhs(1,j), mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, & & mat%comm, ierr) END IF ! CALL VecRestoreArrayF90(vec_sol, psol_loc, ierr) END DO ! END SUBROUTINE bsolve_petsc_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_petsc_mat(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(petsc_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:) DOUBLE PRECISION :: yarr(SIZE(xarr)) DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 CHARACTER(len=6) :: matdescra INTEGER :: n, i, j ! n = mat%rank ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE matdescra = 'g' END IF ! CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, & & beta, yarr) #else yarr = 0.0d0 DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + mat%val(j)*xarr(i) END DO END IF END DO #endif ! END FUNCTION vmx_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_petsc_matn(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(petsc_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:,:) DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 INTEGER :: n, nrhs, i, j CHARACTER(len=6) :: matdescra ! n = mat%rank nrhs = SIZE(xarr,2) ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE matdescra = 'g' END IF ! CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,& & mat%cols, mat%irow(1), mat%irow(2), xarr, & & n, beta, yarr, n) #else yarr = 0.0d0 DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + mat%val(j)*xarr(i,:) END DO END IF END DO #endif ! END FUNCTION vmx_petsc_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_petsc_mat(mat) ! ! Deallocate the sparse matrix mat ! TYPE(petsc_mat) :: mat INTEGER :: ierr ! IF(ASSOCIATED(mat%mat)) THEN CALL destroy(mat%mat) DEALLOCATE(mat%mat) END IF ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ! CALL MatDestroy(mat%AMAT,ierr) END SUBROUTINE destroy_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE mcopy_petsc_mat(mata, matb) ! ! Matrix copy: B = A ! TYPE(petsc_mat) :: mata, matb INTEGER :: ierr ! ! Assume that matb was already initialized by init_petsc_mat. IF(matb%rank.LE.0) THEN WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' STOP '*** Abnormal EXIT in MODULE petsc_mod ***' END IF ! IF(ASSOCIATED(matb%mat)) THEN CALL destroy(matb%mat) DEALLOCATE(matb%mat) END IF ! matb%rank = mata%rank matb%nnz = mata%nnz matb%nnz_loc = mata%nnz_loc matb%istart = mata%istart matb%iend = mata%iend matb%nlsym = mata%nlsym matb%nlforce_zero = mata%nlforce_zero ! IF(ASSOCIATED(matb%mat)) THEN CALL destroy(matb%mat) DEALLOCATE(matb%mat) END IF ! ! Destroy existing PETSC matrix and recreate a new one from scratch ! CALL MatDestroy(matb%AMAT, ierr) CALL MatConvert(mata%AMAT, MATSAME, MAT_INITIAL_MATRIX, matb%AMAT, ierr) END SUBROUTINE mcopy_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE maddto_petsc_mat(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(petsc_mat) :: mata, matb DOUBLE PRECISION :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_petsc_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ! END MODULE petsc_bsplines diff --git a/src/psum_mat.tpl b/src/psum_mat.tpl index bf44e63..c13fe33 100644 --- a/src/psum_mat.tpl +++ b/src/psum_mat.tpl @@ -1,101 +1,101 @@ !> !> @file psum_mat.tpl !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> INTEGER, INTENT(in) :: comm ! INTEGER :: me, npes, ierr INTEGER :: n, r, i, base INTEGER :: newrank !--------------------------------------------------------------------- ! 1.0 Prologue ! CALL mpi_comm_size(comm, npes, ierr) CALL mpi_comm_rank(comm, me, ierr) ! ! Compute n and r defined by npes = 2**n+r i=1 n=0 DO WHILE (2*i.LE.npes) n=n+1 i=2*i END DO ! i=2**n r = npes-i !--------------------------------------------------------------------- ! 2.0 Node partition ! ! I: nodes with ranks < 2*r ! . nodes with even ranks receive from rank+1 and sum ! . odd ranks sends to rank-1 ! II: nodes with ranks >= 2*r ! . do nothing ! IF( me .LT. 2*r ) THEN IF( MODULO(me,2) .EQ. 0 ) THEN CALL p2p_mat(mat, me+1, 'recv', 'updt', comm) ELSE CALL p2p_mat(mat, me-1, 'send', 'updt', comm) END IF END IF !--------------------------------------------------------------------- ! 3.0 Binary tree reduction using new ranks ! ! Define new ranks IF( MODULO(me,2).EQ.0 .AND. me.LT.2*r ) THEN ! new rank in I newrank = me/2 ELSE IF( me.GE.2*r ) THEN ! new ranks in II newrank = me-r ELSE ! inactive ranks in I newrank = -1 END IF ! ! Reduction with 2**n (positive) newranks IF( newrank .GE. 0 ) THEN ! only for nodes with new rank > 0 base = 1 DO i=1,n CALL p2p_mat(mat, oldrank(IEOR(newrank,base)), & & 'sendrecv', 'updt', comm) base = base*2 END DO END IF !--------------------------------------------------------------------- ! 4.0 Final exchanche in I ! IF( me .LT. 2*r ) THEN IF( MODULO(me,2).EQ.0 ) THEN CALL p2p_mat(mat, me+1, 'send', 'put', comm) ELSE CALL p2p_mat(mat, me-1, 'recv', 'put', comm) END IF END IF !--------------------------------------------------------------------- CONTAINS INTEGER FUNCTION oldrank(rank) INTEGER, INTENT(in) :: rank IF(rank.LT.r) THEN oldrank = 2*rank ELSE oldrank = rank+r END IF END FUNCTION oldrank diff --git a/src/pwsmp_mod.f90 b/src/pwsmp_mod.f90 index b981cde..ee22c57 100644 --- a/src/pwsmp_mod.f90 +++ b/src/pwsmp_mod.f90 @@ -1,2032 +1,2032 @@ !> !> @file pwsmp_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE pwsmp_bsplines ! ! PWSMP_BSPLINES: Simple interface to the parallel sparse direct ! solver PWSMP. ! ! T.M. Tran, CRPP-EPFL ! December 2011 ! USE sparse IMPLICIT NONE ! INTEGER, SAVE :: current_matid = -1 INTEGER, SAVE :: last_matid = -1 ! TYPE wsmp_param INTEGER :: iparm(64) DOUBLE PRECISION :: dparm(64) END TYPE wsmp_param ! TYPE wsmp_mat INTEGER :: matid=-1 INTEGER :: rank=0, nnz INTEGER :: nterms, kmat, nrhs INTEGER :: comm INTEGER :: istart, iend, rank_loc INTEGER :: nnz_start, nnz_end, nnz_loc LOGICAL :: nlsym LOGICAL :: nlpos LOGICAL :: nlforce_zero TYPE(spmat), POINTER :: mat => NULL() INTEGER, POINTER :: cols(:) => NULL() INTEGER, POINTER :: irow(:) => NULL() INTEGER, POINTER :: perm(:) => NULL() INTEGER, POINTER :: invp(:) => NULL() INTEGER, POINTER :: mrp(:) => NULL() DOUBLE PRECISION, POINTER :: diag(:) => NULL() DOUBLE PRECISION, POINTER :: val(:) => NULL() DOUBLE PRECISION, POINTER :: aux(:) => NULL() TYPE(wsmp_param) :: p END TYPE wsmp_mat ! TYPE zwsmp_mat INTEGER :: matid=-1 INTEGER :: rank=0, nnz INTEGER :: nterms, kmat, nrhs INTEGER :: comm INTEGER :: istart, iend, rank_loc INTEGER :: nnz_start, nnz_end, nnz_loc LOGICAL :: nlherm LOGICAL :: nlsym LOGICAL :: nlpos LOGICAL :: nlforce_zero TYPE(zspmat), POINTER :: mat => NULL() INTEGER, POINTER :: cols(:) => NULL() INTEGER, POINTER :: irow(:) => NULL() INTEGER, POINTER :: perm(:) => NULL() INTEGER, POINTER :: invp(:) => NULL() INTEGER, POINTER :: mrp(:) => NULL() DOUBLE COMPLEX, POINTER :: diag(:) => NULL() DOUBLE COMPLEX, POINTER :: val(:) => NULL() DOUBLE COMPLEX, POINTER :: aux(:) => NULL() TYPE(wsmp_param) :: p END TYPE zwsmp_mat ! INTERFACE init MODULE PROCEDURE init_wsmp_mat, init_zwsmp_mat END INTERFACE init ! INTERFACE check_mat MODULE PROCEDURE check_wsmp_mat, check_zwsmp_mat END INTERFACE check_mat ! INTERFACE clear_mat MODULE PROCEDURE clear_wsmp_mat, clear_zwsmp_mat END INTERFACE clear_mat ! INTERFACE updtmat MODULE PROCEDURE updt_wsmp_mat, updt_zwsmp_mat END INTERFACE updtmat ! INTERFACE putele MODULE PROCEDURE putele_wsmp_mat, putele_zwsmp_mat END INTERFACE putele ! INTERFACE getele MODULE PROCEDURE getele_wsmp_mat, getele_zwsmp_mat END INTERFACE getele ! INTERFACE putrow MODULE PROCEDURE putrow_wsmp_mat, putrow_zwsmp_mat END INTERFACE putrow ! INTERFACE getrow MODULE PROCEDURE getrow_wsmp_mat, getrow_zwsmp_mat END INTERFACE getrow ! INTERFACE putcol MODULE PROCEDURE putcol_wsmp_mat, putcol_zwsmp_mat END INTERFACE putcol ! INTERFACE getcol MODULE PROCEDURE getcol_wsmp_mat, getcol_zwsmp_mat END INTERFACE getcol ! INTERFACE get_count MODULE PROCEDURE get_count_wsmp_mat, get_count_zwsmp_mat END INTERFACE get_count ! INTERFACE to_mat MODULE PROCEDURE to_wsmp_mat, to_zwsmp_mat END INTERFACE to_mat ! INTERFACE reord_mat MODULE PROCEDURE reord_wsmp_mat, reord_zwsmp_mat END INTERFACE reord_mat ! INTERFACE numfact MODULE PROCEDURE numfact_wsmp_mat, numfact_zwsmp_mat END INTERFACE numfact ! INTERFACE factor MODULE PROCEDURE factor_wsmp_mat, factor_zwsmp_mat END INTERFACE factor ! INTERFACE bsolve MODULE PROCEDURE bsolve_wsmp_mat1, bsolve_wsmp_matn, & & bsolve_zwsmp_mat1, bsolve_zwsmp_matn END INTERFACE bsolve ! INTERFACE vmx MODULE PROCEDURE vmx_wsmp_mat, vmx_wsmp_matn, & & vmx_zwsmp_mat, vmx_zwsmp_matn END INTERFACE vmx ! INTERFACE destroy MODULE PROCEDURE destroy_wsmp_mat, destroy_zwsmp_mat END INTERFACE destroy ! INTERFACE putmat MODULE PROCEDURE put_wsmp_mat, put_zwsmp_mat END INTERFACE putmat ! INTERFACE getmat MODULE PROCEDURE get_wsmp_mat, get_zwsmp_mat END INTERFACE getmat ! INTERFACE mcopy MODULE PROCEDURE mcopy_wsmp_mat, mcopy_zwsmp_mat END INTERFACE mcopy ! INTERFACE maddto MODULE PROCEDURE maddto_wsmp_mat, maddto_zwsmp_mat END INTERFACE maddto ! INTERFACE psum_mat MODULE PROCEDURE psum_wsmp_mat, psum_zwsmp_mat END INTERFACE psum_mat ! INTERFACE p2p_mat MODULE PROCEDURE p2p_wsmp_mat, p2p_zwsmp_mat END INTERFACE p2p_mat ! CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_wsmp_mat(n, nterms, mat, kmat, nlsym, nlpos, & & nlforce_zero, comm_in) ! ! Initialize an empty sparse wsmp matrix ! USE pputils2 INCLUDE 'mpif.h' INTEGER, INTENT(in) :: n, nterms TYPE(wsmp_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlpos LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero INTEGER, OPTIONAL, INTENT(in) :: comm_in ! INTEGER :: comm, nloc INTEGER :: info INTEGER :: idummy = 0 DOUBLE PRECISION :: dummy = 0.0d0 ! comm = MPI_COMM_WORLD IF(PRESENT(comm_in)) comm = comm_in mat%comm = comm ! ! Store away (valid) current matrix id ! IF(current_matid .GE. 0) THEN CALL wstoremat(current_matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i12)') 'INIT: WSTOREMAT failed WITH error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF last_matid = last_matid+1 mat%matid = last_matid current_matid = mat%matid ! ! Initialize sparse matrice structure ! mat%rank = n mat%nterms = nterms mat%nnz = 0 mat%nlsym = .FALSE. mat%nlpos = .TRUE. mat%nrhs = 1 mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val IF(PRESENT(kmat)) mat%kmat = kmat IF(PRESENT(nlsym)) mat%nlsym = nlsym IF(PRESENT(nlpos)) mat%nlpos = nlpos IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero ! ! Matrix partition ! CALL dist1d(comm, 1, n, mat%istart, nloc) mat%iend = mat%istart + nloc - 1 mat%rank_loc = nloc ! IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) ALLOCATE(mat%mat) CALL init(n, mat%mat, mat%istart, mat%iend) ! ! Fill 'iparm' and 'dparm' with default values ! mat%p%iparm(1:3) = 0 IF(mat%nlsym) THEN CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) IF(mat%nlpos) THEN mat%p%iparm(31) = 0 ELSE !!$ mat%p%iparm(31) = 1 ! LDL^T without pivoting mat%p%iparm(31) = 2 ! LDL^T with pivoting END IF CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i12)') 'INIT: Initialization failed with error', & & mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! CALL setup_wsmp(mat%p%iparm, mat%p%dparm) ! CONTAINS SUBROUTINE setup_wsmp(iparm, dparm) INTEGER :: iparm(:) DOUBLE PRECISION :: dparm(:) END SUBROUTINE setup_wsmp END SUBROUTINE init_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_zwsmp_mat(n, nterms, mat, kmat, nlsym, nlherm, & & nlpos, nlforce_zero, comm_in) ! ! Initialize an empty sparse wsmp matrix ! USE pputils2 INCLUDE 'mpif.h' INTEGER, INTENT(in) :: n, nterms TYPE(zwsmp_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlherm LOGICAL, OPTIONAL, INTENT(in) :: nlpos LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero INTEGER, OPTIONAL, INTENT(in) :: comm_in ! INTEGER :: comm, nloc INTEGER :: info INTEGER :: idummy = 0 DOUBLE COMPLEX :: dummy = 0.0d0 ! comm = MPI_COMM_WORLD IF(PRESENT(comm_in)) comm = comm_in mat%comm = comm ! ! Store away (valid) current matrix id ! IF(current_matid .GE. 0) THEN CALL wstoremat(current_matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i12)') 'INIT: WSTOREMAT failed WITH error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF last_matid = last_matid+1 mat%matid = last_matid current_matid = mat%matid ! ! Initialize sparse matrice structure ! mat%rank = n mat%nterms = nterms mat%nnz = 0 mat%nlsym = .FALSE. mat%nlherm = .FALSE. mat%nlpos = .TRUE. mat%nrhs = 1 mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val IF(PRESENT(kmat)) mat%kmat = kmat IF(PRESENT(nlsym)) mat%nlsym = nlsym IF(PRESENT(nlherm)) mat%nlherm = nlherm IF(PRESENT(nlpos)) mat%nlpos = nlpos IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero ! ! Matrix partition ! CALL dist1d(comm, 1, n, mat%istart, nloc) mat%iend = mat%istart + nloc - 1 mat%rank_loc = nloc ! IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) ALLOCATE(mat%mat) CALL init(n, mat%mat, mat%istart, mat%iend) ! ! Fill 'iparm' and 'dparm' with default values ! mat%p%iparm(1:3) = 0 IF(mat%nlherm .OR. mat%nlsym) THEN CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) IF(mat%nlherm) THEN IF(mat%nlpos) THEN mat%p%iparm(31) = 0 ! hermitian, positive definite ELSE mat%p%iparm(31) = 2 ! hermitian, no-definite, LDL^T with pivoting END IF ELSE mat%p%iparm(31) = 3 ! non-hermitian, symmetric END IF ELSE CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i12)') 'INIT: Initialization failed with error', & & mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF !!$ WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm !!$ WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm ! CALL setup_wsmp(mat%p%iparm, mat%p%dparm) ! CONTAINS SUBROUTINE setup_wsmp(iparm, dparm) INTEGER :: iparm(:) DOUBLE PRECISION :: dparm(:) END SUBROUTINE setup_wsmp END SUBROUTINE init_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE check_wsmp_mat(mat) ! ! Check matrice id and recall the matrice if not current ! TYPE(wsmp_mat) :: mat INTEGER :: info ! IF(.NOT.mat%nlsym) THEN IF( mat%matid.NE.current_matid ) THEN WRITE(*,'(a)') "Processing multi matrices is not possible "// & & "for non-symetric matrices." STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' ELSE RETURN END IF END IF ! IF( mat%matid.NE.current_matid ) THEN IF(current_matid .GE. 0) THEN CALL wstoremat(current_matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i3,a,i12)') 'Store matrix', current_matid, & & ' failed with error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF CALL wrecallmat(mat%matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i3,a,i12)') 'Recall matrix', mat%matid, & & ' failed with error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF current_matid = mat%matid END IF END SUBROUTINE check_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE check_zwsmp_mat(mat) ! ! Check matrice id and recall the matrice if not current ! TYPE(zwsmp_mat) :: mat INTEGER :: info ! IF(.NOT.mat%nlsym .AND. .NOT.mat%nlherm ) THEN IF( mat%matid.NE.current_matid ) THEN WRITE(*,'(a)') "Processing multi matrices is not possible "// & & "for non-symetric/non-hermitian matrices." STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' ELSE RETURN END IF END IF ! IF( mat%matid.NE.current_matid ) THEN IF(current_matid .GE. 0) THEN CALL wstoremat(current_matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i3,a,i12)') 'Store matrix', current_matid, & & ' failed with error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF CALL wrecallmat(mat%matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i3,a,i12)') 'Recall matrix', mat%matid, & & ' failed with error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF current_matid = mat%matid END IF END SUBROUTINE check_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE clear_wsmp_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(wsmp_mat) :: mat ! mat%val = 0.0d0 END SUBROUTINE clear_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE clear_zwsmp_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(zwsmp_mat) :: mat ! mat%val = (0.0d0, 0.0d0) END SUBROUTINE clear_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_wsmp_mat(mat, i, j, val) ! ! Update element Aij of wsmp matrix ! TYPE(wsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: k, s, e ! IF(mat%nlsym) THEN ! Store only upper part for symmetric matrices IF(i.GT.j) RETURN END IF IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j WRITE(*,'(a,2i6)') ' istart, iend ', mat%istart, mat%iend STOP '*** Abnormal EXIT in MODULE mumps_mod ***' END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL updtmat(mat%mat, i, j, val) ELSE s = mat%irow(i) e = mat%irow(i+1)-1 k = isearch(mat%cols(s:e), j) IF( k.GE.0 ) THEN mat%val(s+k) = mat%val(s+k)+val ELSE WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF END SUBROUTINE updt_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_zwsmp_mat(mat, i, j, val) ! ! Update element Aij of wsmp matrix ! TYPE(zwsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val INTEGER :: k, s, e ! IF(mat%nlherm .OR. mat%nlsym) THEN ! Store only upper part IF(i.GT.j) RETURN END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL updtmat(mat%mat, i, j, val) ELSE s = mat%irow(i) e = mat%irow(i+1)-1 k = isearch(mat%cols(s:e), j) IF( k.GE.0 ) THEN IF(mat%nlherm) THEN mat%val(s+k) = mat%val(s+k)+CONJG(val) ! CSR-UT* = CSC-LT ELSE mat%val(s+k) = mat%val(s+k)+val END IF ELSE WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF END SUBROUTINE updt_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_wsmp_mat(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(wsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: iput, jput INTEGER :: k, s, e ! iput = i jput = j IF(mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iput = j jput = i END IF END IF ! ! Do nothing if outside IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN ! IF(ASSOCIATED(mat%mat)) THEN CALL putele(mat%mat, iput, jput, val, & & nlforce_zero=mat%nlforce_zero) ELSE s = mat%irow(iput) e = mat%irow(iput+1) - 1 k = isearch(mat%cols(s:e), jput) IF( k.GE.0 ) THEN mat%val(s+k) = val ELSE IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF END IF END SUBROUTINE putele_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_zwsmp_mat(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(zwsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val DOUBLE COMPLEX :: valput INTEGER :: iput, jput INTEGER :: k, s, e ! iput = i jput = j valput = val IF(mat%nlsym .OR. mat%nlherm) THEN IF( i.GT.j ) THEN ! Lower triangular part iput = j jput = i IF(mat%nlherm) THEN valput = CONJG(val) ELSE valput = val END IF END IF END IF ! ! Do nothing if outside IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN ! IF(ASSOCIATED(mat%mat)) THEN CALL putele(mat%mat, iput, jput, valput, & & nlforce_zero=mat%nlforce_zero) ELSE s = mat%irow(iput) e = mat%irow(iput+1) - 1 k = isearch(mat%cols(s:e), jput) IF( k.GE.0 ) THEN IF(mat%nlherm) THEN mat%val(s+k) = CONJG(valput) ! CSR-UT* = CSC-LT ELSE mat%val(s+k) = valput END IF ELSE IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF END IF END SUBROUTINE putele_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_wsmp_mat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(wsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(out) :: val INTEGER :: iget, jget INTEGER :: k, s, e ! iget = i jget = j IF(mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iget = j jget = i END IF END IF ! val = 0.0d0 ! Assume zero val if outside IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN ! IF(ASSOCIATED(mat%mat)) THEN CALL getele(mat%mat, iget, jget, val) ELSE s = mat%irow(iget) e = mat%irow(iget+1) - 1 k = isearch(mat%cols(s:e), jget) IF( k.GE.0 ) THEN val =mat%val(s+k) ELSE val = 0.0d0 ! Assume zero val if not found END IF END IF END SUBROUTINE getele_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_zwsmp_mat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(zwsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(out) :: val DOUBLE COMPLEX :: valget INTEGER :: iget, jget INTEGER :: k, s, e ! iget = i jget = j IF(mat%nlherm .OR. mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iget = j jget = i END IF END IF ! val = (0.0d0, 0.0d0) ! Assume zero val if outside IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN ! IF(ASSOCIATED(mat%mat)) THEN CALL getele(mat%mat, iget, jget, valget) ELSE s = mat%irow(iget) e = mat%irow(iget+1) - 1 k = isearch(mat%cols(s:e), jget) IF( k.GE.0 ) THEN IF(mat%nlherm) THEN valget = CONJG(mat%val(s+k)) ! CSR-UT* = CSC-LT ELSE valget = mat%val(s+k) END IF ELSE valget = (0.0d0,0.0d0) ! Assume zero val if not found END IF END IF val = valget IF( i.GT.j ) THEN IF(mat%nlherm) THEN val = CONJG(valget) END IF END IF END SUBROUTINE getele_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_wsmp_mat(amat, i, arr) ! ! Put a row into sparse matrix ! TYPE(wsmp_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL putele(amat, i, j, arr(j)) END DO END SUBROUTINE putrow_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_zwsmp_mat(amat, i, arr) ! ! Put a row into sparse matrix ! TYPE(zwsmp_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL putele(amat, i, j, arr(j)) END DO END SUBROUTINE putrow_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_wsmp_mat(amat, i, arr) ! ! Get a row from sparse matrix ! TYPE(wsmp_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL getele(amat, i, j, arr(j)) END DO END SUBROUTINE getrow_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_zwsmp_mat(amat, i, arr) ! ! Get a row from sparse matrix ! TYPE(zwsmp_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL getele(amat, i, j, arr(j)) END DO END SUBROUTINE getrow_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_wsmp_mat(amat, j, arr) ! ! Put a column into sparse matrix ! TYPE(wsmp_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: i ! DO i=amat%istart,amat%iend CALL putele(amat, i, j, arr(i)) END DO END SUBROUTINE putcol_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_zwsmp_mat(amat, j, arr) ! ! Put a column into sparse matrix ! TYPE(zwsmp_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER :: i ! DO i=amat%istart,amat%iend CALL putele(amat, i, j, arr(i)) END DO END SUBROUTINE putcol_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_wsmp_mat(amat, j, arr) ! ! Get a column from sparse matrix ! TYPE(wsmp_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: i ! DO i=amat%istart,amat%iend CALL getele(amat, i, j, arr(i)) END DO END SUBROUTINE getcol_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_zwsmp_mat(amat, j, arr) ! ! Get a column from sparse matrix ! TYPE(zwsmp_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER :: i ! DO i=amat%istart,amat%iend CALL getele(amat, i, j, arr(i)) END DO END SUBROUTINE getcol_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_wsmp_mat(mat, nnz) ! ! Number of non-zeros in sparse matrix ! TYPE(wsmp_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) INTEGER :: i ! IF(ASSOCIATED(mat%mat)) THEN get_count_wsmp_mat = get_count(mat%mat, nnz) ELSE get_count_wsmp_mat = mat%nnz IF(PRESENT(nnz)) THEN DO i=mat%istart,mat%iend nnz(i) = mat%irow(i+1)-mat%irow(i) END DO END IF END IF END FUNCTION get_count_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_zwsmp_mat(mat, nnz) ! ! Number of non-zeros in sparse matrix ! TYPE(zwsmp_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) INTEGER :: i ! IF(ASSOCIATED(mat%mat)) THEN get_count_zwsmp_mat = get_count(mat%mat, nnz) ELSE get_count_zwsmp_mat = mat%nnz IF(PRESENT(nnz)) THEN DO i=mat%istart,mat%iend nnz(i) = mat%irow(i+1)-mat%irow(i) END DO END IF END IF END FUNCTION get_count_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE to_wsmp_mat(mat, nlkeep) ! ! Convert linked list spmat to wsmp matrice structure ! INCLUDE 'mpif.h' TYPE(wsmp_mat) :: mat LOGICAL, INTENT(in), OPTIONAL :: nlkeep INTEGER :: i, nnz, rank, s, e INTEGER :: comm, ierr, nnz_loc, rank_loc LOGICAL :: nlclean ! nlclean = .TRUE. IF(PRESENT(nlkeep)) THEN nlclean = .NOT. nlkeep END IF ! comm = mat%comm ! ! Allocate the WSMP matrix structure ! rank = mat%rank rank_loc = mat%rank ! nnz_loc = get_count(mat) mat%nnz_start = 0 CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) mat%nnz_start = mat%nnz_start + 1 mat%nnz_end = mat%nnz_start + nnz_loc - 1 mat%nnz_loc = nnz_loc CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ! ! Allocate LOCAL irow, cols and val IF(mat%nlsym) THEN ALLOCATE(mat%perm(rank)) ALLOCATE(mat%invp(rank)) END IF ALLOCATE(mat%val(nnz_loc)) ALLOCATE(mat%cols(nnz_loc)) ALLOCATE(mat%irow(mat%istart:mat%iend+1)) ! ! Fill WSMP structure and deallocate the sparse rows ! mat%irow(mat%istart) = 1 DO i=mat%istart,mat%iend mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz s = mat%irow(i) e = mat%irow(i+1)+1 CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) IF(nlclean) CALL destroy(mat%mat%row(i)) END DO IF(nlclean) DEALLOCATE(mat%mat) END SUBROUTINE to_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE to_zwsmp_mat(mat, nlkeep) ! ! Convert linked list spmat to wsmp matrice structure ! INCLUDE 'mpif.h' TYPE(zwsmp_mat) :: mat LOGICAL, INTENT(in), OPTIONAL :: nlkeep INTEGER :: i, nnz, rank, s, e INTEGER :: comm, ierr, nnz_loc, rank_loc LOGICAL :: nlclean ! nlclean = .TRUE. IF(PRESENT(nlkeep)) THEN nlclean = .NOT. nlkeep END IF ! comm = mat%comm ! ! Allocate the WSMP matrix structure ! rank = mat%rank rank_loc = mat%rank ! nnz_loc = get_count(mat) mat%nnz_start = 0 CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) mat%nnz_start = mat%nnz_start + 1 mat%nnz_end = mat%nnz_start + nnz_loc - 1 mat%nnz_loc = nnz_loc CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ! ! Allocate LOCAL irow, cols and val IF(mat%nlsym) THEN ALLOCATE(mat%perm(rank)) ALLOCATE(mat%invp(rank)) END IF ALLOCATE(mat%val(nnz_loc)) ALLOCATE(mat%cols(nnz_loc)) ALLOCATE(mat%irow(mat%istart:mat%iend+1)) ! ! Fill WSMP structure and deallocate the sparse rows ! mat%irow(mat%istart) = 1 DO i=mat%istart,mat%iend mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz s = mat%irow(i) e = mat%irow(i+1)+1 CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) IF(nlclean) CALL destroy(mat%mat%row(i)) END DO IF(mat%nlherm) THEN mat%val(:) = CONJG(mat%val(:)) ! CSR-UT* = CSC-LT END IF IF(nlclean) DEALLOCATE(mat%mat) END SUBROUTINE to_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE reord_wsmp_mat(mat) ! ! Reordering and symbolic factorization ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym) THEN mat%p%iparm(2) = 1 ! Ordering mat%p%iparm(3) = 2 ! Symbolic factorization CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE mat%p%iparm(2) = 1 ! Analysis and reordering mat%p%iparm(3) = 1 CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i12)') 'REORD: Reordering failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END SUBROUTINE reord_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE reord_zwsmp_mat(mat) ! ! Reordering and symbolic factorization ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym .OR. mat%nlherm) THEN mat%p%iparm(2) = 1 ! Ordering mat%p%iparm(3) = 2 ! Symbolic factorization CALL zssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) !!$ WRITE(*,'(a,i3/(10i8))') 'REORD: matrice', mat%matid, mat%perm ELSE mat%p%iparm(2) = 1 ! Analysis and reordering mat%p%iparm(3) = 1 CALL zgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i12)') 'REORD: Reordering failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END SUBROUTINE reord_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE numfact_wsmp_mat(mat) ! ! Numerical factorization ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym) THEN mat%p%iparm(2) = 3 ! Numerical factorization mat%p%iparm(3) = 3 CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE mat%p%iparm(2) = 2 ! Factorization mat%p%iparm(3) = 2 CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i12)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END SUBROUTINE numfact_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE numfact_zwsmp_mat(mat) ! ! Numerical factorization ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym .OR. mat%nlherm) THEN mat%p%iparm(2) = 3 ! Numerical factorization mat%p%iparm(3) = 3 CALL zssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE mat%p%iparm(2) = 2 ! Factorization mat%p%iparm(3) = 2 CALL zgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i12)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END SUBROUTINE numfact_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE factor_wsmp_mat(mat, nlreord) ! ! Factor (create +reorder + factor) a wsmp_mat matrix ! TYPE(wsmp_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlreord LOGICAL :: mlreord !---------------------------------------------------------------------- ! 1.0 Creation from the sparse rows ! IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF !---------------------------------------------------------------------- ! 2.0 Reordering and symbolic factorization phase ! mlreord = .TRUE. IF(PRESENT(nlreord)) mlreord = nlreord IF(mlreord) THEN CALL reord_mat(mat) END IF !---------------------------------------------------------------------- ! 3.0 Numerical factorization ! CALL numfact(mat) END SUBROUTINE factor_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE factor_zwsmp_mat(mat, nlreord) ! ! Factor (create +reorder + factor) a wsmp_mat matrix ! TYPE(zwsmp_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlreord LOGICAL :: mlreord !---------------------------------------------------------------------- ! 1.0 Creation from the sparse rows ! IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF !---------------------------------------------------------------------- ! 2.0 Reordering and symbolic factorization phase ! mlreord = .TRUE. IF(PRESENT(nlreord)) mlreord = nlreord IF(mlreord) THEN CALL reord_mat(mat) END IF !---------------------------------------------------------------------- ! 3.0 Numerical factorization ! CALL numfact(mat) END SUBROUTINE factor_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref) ! ! Backsolve, using Wsmp ! INCLUDE 'mpif.h' TYPE(wsmp_mat) :: mat DOUBLE PRECISION :: rhs(:) DOUBLE PRECISION, OPTIONAL :: sol(:) INTEGER, OPTIONAL :: nref ! DOUBLE PRECISION :: sol_loc(mat%rank_loc) INTEGER :: nloc, me, nprocs, ierr, i INTEGER, ALLOCATABLE :: nlocs(:), displs(:) DOUBLE PRECISION :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym) THEN mat%p%iparm(2) = 4 ! Back substitution mat%p%iparm(3) = 4 ELSE mat%p%iparm(2) = 3 ! Back substitution mat%p%iparm(3) = 3 END IF mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) THEN IF(mat%nlsym) THEN mat%p%iparm(3) = 5 ELSE mat%p%iparm(3) = 4 END IF mat%p%iparm(6) = nref END IF mat%nrhs = 1 ! ! Extract local rhs from global rhs ! sol_loc = rhs(mat%istart:mat%iend) ! IF(mat%nlsym) THEN CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, & & mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF ! IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! ! Allgatherv local sol ! CALL mpi_comm_rank(mat%comm, me, ierr) CALL mpi_comm_size(mat%comm, nprocs, ierr) ! ALLOCATE(displs(0:nprocs)) ALLOCATE(nlocs(0:nprocs-1)) ! nloc = mat%rank_loc CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, & & mat%comm, ierr) ! displs(0) = 0 DO i=0,nprocs-1 displs(i+1) = displs(i)+nlocs(i) END DO ! IF(PRESENT(sol)) THEN CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_PRECISION, & & sol, nlocs, displs, MPI_DOUBLE_PRECISION, & & mat%comm, ierr) ELSE CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_PRECISION, & & rhs, nlocs, displs, MPI_DOUBLE_PRECISION, & & mat%comm, ierr) END IF ! DEALLOCATE(nlocs) DEALLOCATE(displs) ! END SUBROUTINE bsolve_wsmp_mat1 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_zwsmp_mat1(mat, rhs, sol, nref) ! ! Backsolve, using Wsmp ! INCLUDE 'mpif.h' TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX :: rhs(:) DOUBLE COMPLEX, OPTIONAL :: sol(:) INTEGER, OPTIONAL :: nref ! DOUBLE COMPLEX :: sol_loc(mat%rank_loc) INTEGER :: nloc, me, nprocs, ierr, i INTEGER, ALLOCATABLE :: nlocs(:), displs(:) DOUBLE COMPLEX :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym .OR. mat%nlherm) THEN mat%p%iparm(2) = 4 ! Back substitution mat%p%iparm(3) = 4 ELSE mat%p%iparm(2) = 3 ! Back substitution mat%p%iparm(3) = 3 END IF mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) THEN IF(mat%nlsym .OR. mat%nlherm) THEN mat%p%iparm(3) = 5 ELSE mat%p%iparm(3) = 4 END IF mat%p%iparm(6) = nref END IF mat%nrhs = 1 ! ! Extract local rhs from global rhs ! sol_loc = rhs(mat%istart:mat%iend) ! IF(mat%nlsym .OR. mat%nlherm) THEN CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, & & mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF ! IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! ! Allgatherv local sol ! CALL mpi_comm_rank(mat%comm, me, ierr) CALL mpi_comm_size(mat%comm, nprocs, ierr) ! ALLOCATE(displs(0:nprocs)) ALLOCATE(nlocs(0:nprocs-1)) ! nloc = mat%rank_loc CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, & & mat%comm, ierr) ! displs(0) = 0 DO i=0,nprocs-1 displs(i+1) = displs(i)+nlocs(i) END DO ! IF(PRESENT(sol)) THEN CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_COMPLEX, & & sol, nlocs, displs, MPI_DOUBLE_COMPLEX, & & mat%comm, ierr) ELSE CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_COMPLEX, & & rhs, nlocs, displs, MPI_DOUBLE_COMPLEX, & & mat%comm, ierr) END IF ! DEALLOCATE(nlocs) DEALLOCATE(displs) ! END SUBROUTINE bsolve_zwsmp_mat1 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_wsmp_matn(mat, rhs, sol, nref) ! ! Backsolve, using Wsmp, multiple RHS ! INCLUDE 'mpif.h' TYPE(wsmp_mat) :: mat DOUBLE PRECISION :: rhs(:,:) DOUBLE PRECISION, OPTIONAL :: sol(:,:) INTEGER, OPTIONAL :: nref ! DOUBLE PRECISION :: sol_loc(mat%rank_loc,SIZE(rhs,2)) INTEGER :: nloc, me, nprocs, ierr, i INTEGER, ALLOCATABLE :: nlocs(:), displs(:) DOUBLE PRECISION :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym) THEN mat%p%iparm(2) = 4 ! Back substitution mat%p%iparm(3) = 4 ELSE mat%p%iparm(2) = 3 ! Back substitution mat%p%iparm(3) = 3 END IF mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) THEN IF(mat%nlsym) THEN mat%p%iparm(3) = 5 ELSE mat%p%iparm(3) = 4 END IF mat%p%iparm(6) = nref END IF mat%nrhs = SIZE(rhs,2) ! ! Extract local rhs from global rhs ! sol_loc(:,:) = rhs(mat%istart:mat%iend,:) ! IF(mat%nlsym) THEN CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, & & mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF ! IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! ! Allgatherv local sol ! CALL mpi_comm_rank(mat%comm, me, ierr) CALL mpi_comm_size(mat%comm, nprocs, ierr) ! ALLOCATE(displs(0:nprocs)) ALLOCATE(nlocs(0:nprocs-1)) ! nloc = mat%rank_loc CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, & & mat%comm, ierr) ! displs(0) = 0 DO i=0,nprocs-1 displs(i+1) = displs(i)+nlocs(i) END DO ! DO i=1,mat%nrhs IF(PRESENT(sol)) THEN CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_PRECISION, & & sol(1,i), nlocs, displs, MPI_DOUBLE_PRECISION, & & mat%comm, ierr) ELSE CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_PRECISION, & & rhs(1,i), nlocs, displs, MPI_DOUBLE_PRECISION, & & mat%comm, ierr) END IF END DO ! DEALLOCATE(nlocs) DEALLOCATE(displs) ! END SUBROUTINE bsolve_wsmp_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_zwsmp_matn(mat, rhs, sol, nref) ! ! Backsolve, using Wsmp, multiple RHS ! INCLUDE 'mpif.h' TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX :: rhs(:,:) DOUBLE COMPLEX, OPTIONAL :: sol(:,:) INTEGER, OPTIONAL :: nref ! DOUBLE COMPLEX :: sol_loc(mat%rank_loc,SIZE(rhs,2)) INTEGER :: nloc, me, nprocs, ierr, i INTEGER, ALLOCATABLE :: nlocs(:), displs(:) DOUBLE COMPLEX :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym .or. mat%nlherm) THEN mat%p%iparm(2) = 4 ! Back substitution mat%p%iparm(3) = 4 ELSE mat%p%iparm(2) = 3 ! Back substitution mat%p%iparm(3) = 3 END IF mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) THEN IF(mat%nlsym .OR. mat%nlherm) THEN mat%p%iparm(3) = 5 ELSE mat%p%iparm(3) = 4 END IF mat%p%iparm(6) = nref END IF mat%nrhs = SIZE(rhs,2) ! ! Extract local rhs from global rhs ! sol_loc(:,:) = rhs(mat%istart:mat%iend,:) ! IF(mat%nlsym) THEN CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, & & mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF ! IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! ! Allgatherv local sol ! CALL mpi_comm_rank(mat%comm, me, ierr) CALL mpi_comm_size(mat%comm, nprocs, ierr) ! ALLOCATE(displs(0:nprocs)) ALLOCATE(nlocs(0:nprocs-1)) ! nloc = mat%rank_loc CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, & & mat%comm, ierr) ! displs(0) = 0 DO i=0,nprocs-1 displs(i+1) = displs(i)+nlocs(i) END DO ! DO i=1,mat%nrhs IF(PRESENT(sol)) THEN CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_COMPLEX, & & sol(1,i), nlocs, displs, MPI_DOUBLE_COMPLEX, & & mat%comm, ierr) ELSE CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_COMPLEX, & & rhs(1,i), nlocs, displs, MPI_DOUBLE_COMPLEX, & & mat%comm, ierr) END IF END DO ! DEALLOCATE(nlocs) DEALLOCATE(displs) ! END SUBROUTINE bsolve_zwsmp_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_wsmp_mat(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:) DOUBLE PRECISION :: yarr(SIZE(xarr)) DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 CHARACTER(len=6) :: matdescra INTEGER :: n, i, j ! n = mat%rank ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE matdescra = 'g' END IF ! CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, & & beta, yarr) #else yarr = 0.0d0 DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + mat%val(j)*xarr(i) END DO END IF END DO #endif ! END FUNCTION vmx_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_zwsmp_mat(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: xarr(:) DOUBLE COMPLEX :: yarr(SIZE(xarr)) DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) INTEGER :: n, i, j CHARACTER(len=6) :: matdescra CHARACTER(len=1) :: transa ! n = mat%rank ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE IF(mat%nlherm) THEN matdescra = 'hun' ELSE matdescra = 'g' END IF transa='N' IF(mat%nlherm) THEN transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT END IF CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, & & beta, yarr) #else yarr = (0.0d0,0.0d0) DO i=1,n IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i) = yarr(i) + CONJG(mat%val(j))*xarr(mat%cols(j)) END DO ELSE DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) END DO END IF IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + mat%val(j)*xarr(i) END DO ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + mat%val(j)*xarr(i) END DO END IF END DO #endif ! END FUNCTION vmx_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_wsmp_matn(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:,:) DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 INTEGER :: n, nrhs, i, j CHARACTER(len=6) :: matdescra ! n = mat%rank nrhs = SIZE(xarr,2) ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE matdescra = 'g' END IF ! CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,& & mat%cols, mat%irow(1), mat%irow(2), xarr, & & n, beta, yarr, n) #else yarr = 0.0d0 DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + mat%val(j)*xarr(i,:) END DO END IF END DO #endif ! END FUNCTION vmx_wsmp_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_zwsmp_matn(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: xarr(:,:) DOUBLE COMPLEX :: yarr(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) INTEGER :: n, nrhs, i, j CHARACTER(len=6) :: matdescra CHARACTER(len=1) :: transa ! n = mat%rank nrhs = SIZE(xarr,2) ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE IF(mat%nlherm) THEN matdescra = 'hun' ELSE matdescra = 'g' END IF transa='N' IF(mat%nlherm) THEN transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT END IF ! CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, n, & & beta, yarr, n) #else yarr = (0.0d0,0.0d0) DO i=1,n IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i,:) = yarr(i,:) + CONJG(mat%val(j))*xarr(mat%cols(j),:) END DO ELSE DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) END DO END IF IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + mat%val(j)*xarr(i,:) END DO ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + mat%val(j)*xarr(i,:) END DO END IF END DO #endif ! END FUNCTION vmx_zwsmp_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_wsmp_mat(mat) ! ! Deallocate the sparse matrix mat ! TYPE(wsmp_mat) :: mat ! IF(ASSOCIATED(mat%mat)) THEN CALL destroy(mat%mat) DEALLOCATE(mat%mat) END IF ! ! Release memory for factors for symmetric matrix IF(mat%nlsym) THEN CALL check_mat(mat) CALL wsffree END IF ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp) IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux) END SUBROUTINE destroy_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_zwsmp_mat(mat) ! ! Deallocate the sparse matrix mat ! TYPE(zwsmp_mat) :: mat ! IF(ASSOCIATED(mat%mat)) THEN CALL destroy(mat%mat) DEALLOCATE(mat%mat) END IF ! ! Release memory for factors for symmetric/hermitian matrix IF(mat%nlsym .OR. mat%nlherm) THEN CALL check_mat(mat) CALL wsffree END IF ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp) IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux) END SUBROUTINE destroy_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE put_wsmp_mat(fid, label, mat, str) ! ! Write matrix to hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(wsmp_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'NNZ', mat%nnz) CALL attach(fid, label, 'NLSYM', mat%nlsym) CALL attach(fid, label, 'NLPOS', mat%nlpos) CALL putarr(fid, TRIM(label)//'/irow', mat%irow) CALL putarr(fid, TRIM(label)//'/cols', mat%cols) CALL putarr(fid, TRIM(label)//'/val', mat%val) ! CALL creatg(fid, TRIM(label)//'/p') CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) END SUBROUTINE put_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE put_zwsmp_mat(fid, label, mat, str) ! ! Write matrix to hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(zwsmp_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'NNZ', mat%nnz) CALL attach(fid, label, 'NLSYM', mat%nlsym) CALL attach(fid, label, 'NLPOS', mat%nlpos) CALL attach(fid, label, 'NLHERM', mat%nlherm) CALL putarr(fid, TRIM(label)//'/irow', mat%irow) CALL putarr(fid, TRIM(label)//'/cols', mat%cols) CALL putarr(fid, TRIM(label)//'/val', mat%val) ! CALL creatg(fid, TRIM(label)//'/p') CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) END SUBROUTINE put_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE get_wsmp_mat(fid, label, mat) ! ! Read matrix from hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(wsmp_mat) :: mat ! CALL getatt(fid, label, 'RANK', mat%rank) CALL getatt(fid, label, 'NNZ', mat%nnz) CALL getatt(fid, label, 'NLSYM', mat%nlsym) CALL getatt(fid, label, 'NLPOS', mat%nlpos) CALL getarr(fid, TRIM(label)//'/irow', mat%irow) CALL getarr(fid, TRIM(label)//'/cols', mat%cols) IF(mat%nlsym) THEN CALL getarr(fid, TRIM(label)//'/perm', mat%perm) CALL getarr(fid, TRIM(label)//'/invp', mat%invp) END IF CALL getarr(fid, TRIM(label)//'/val', mat%val) ! CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) END SUBROUTINE get_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE get_zwsmp_mat(fid, label, mat) ! ! Read matrix from hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(zwsmp_mat) :: mat ! CALL getatt(fid, label, 'RANK', mat%rank) CALL getatt(fid, label, 'NNZ', mat%nnz) CALL getatt(fid, label, 'NLSYM', mat%nlsym) CALL getatt(fid, label, 'NLPOS', mat%nlpos) CALL getatt(fid, label, 'NLHERM', mat%nlherm) CALL getarr(fid, TRIM(label)//'/irow', mat%irow) CALL getarr(fid, TRIM(label)//'/cols', mat%cols) IF(mat%nlsym) THEN CALL getarr(fid, TRIM(label)//'/perm', mat%perm) CALL getarr(fid, TRIM(label)//'/invp', mat%invp) END IF CALL getarr(fid, TRIM(label)//'/val', mat%val) ! CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) END SUBROUTINE get_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE mcopy_wsmp_mat(mata, matb) ! ! Matrix copy: B = A ! TYPE(wsmp_mat) :: mata, matb INTEGER :: n, nnz ! ! Assume that matb was already initialized by init_wsmp_mat. IF(matb%rank.LE.0) THEN WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! IF(ASSOCIATED(matb%mat)) THEN CALL destroy(matb%mat) DEALLOCATE(matb%mat) END IF ! n = mata%rank nnz = mata%nnz matb%rank = n matb%nnz = nnz matb%nlsym = mata%nlsym matb%nlpos = mata%nlpos matb%nlforce_zero = mata%nlforce_zero ! IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp) ALLOCATE(matb%val(nnz)); matb%val = mata%val ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow ALLOCATE(matb%perm(n)) IF(matb%nlsym) THEN ALLOCATE(matb%perm(n)) ALLOCATE(matb%invp(n)) END IF END SUBROUTINE mcopy_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE mcopy_zwsmp_mat(mata, matb) ! ! Matrix copy: B = A ! TYPE(zwsmp_mat) :: mata, matb INTEGER :: n, nnz ! ! Assume that matb was already initialized by init_wsmp_mat. IF(matb%rank.LE.0) THEN WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! IF(ASSOCIATED(matb%mat)) THEN CALL destroy(matb%mat) DEALLOCATE(matb%mat) END IF ! n = mata%rank nnz = mata%nnz matb%rank = n matb%nnz = nnz matb%nlsym = mata%nlsym matb%nlherm = mata%nlherm matb%nlpos = mata%nlpos matb%nlforce_zero = mata%nlforce_zero ! IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp) ALLOCATE(matb%val(nnz)); matb%val = mata%val ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow ALLOCATE(matb%perm(n)) IF(matb%nlsym) THEN ALLOCATE(matb%perm(n)) ALLOCATE(matb%invp(n)) END IF END SUBROUTINE mcopy_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE maddto_wsmp_mat(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(wsmp_mat) :: mata, matb DOUBLE PRECISION :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE maddto_zwsmp_mat(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(zwsmp_mat) :: mata, matb DOUBLE COMPLEX :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE psum_wsmp_mat(mat, comm) ! ! Parallel sum of sparse matrices ! INCLUDE "mpif.h" ! TYPE(wsmp_mat) :: mat INCLUDE 'psum_mat.tpl' END SUBROUTINE psum_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE psum_zwsmp_mat(mat, comm) ! ! Parallel sum of sparse matrices ! INCLUDE "mpif.h" ! TYPE(zwsmp_mat) :: mat INCLUDE 'psum_mat.tpl' END SUBROUTINE psum_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE p2p_wsmp_mat(mat, dest, extyp, op, comm) ! ! Point-to-point combine sparse matrix between 2 processes ! INCLUDE "mpif.h" ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION, ALLOCATABLE :: val(:) INTEGER :: mpi_type=MPI_DOUBLE_PRECISION ! INCLUDE "p2p_mat.tpl" END SUBROUTINE p2p_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE p2p_zwsmp_mat(mat, dest, extyp, op, comm) ! ! Point-to-point combine sparse matrix between 2 processes ! INCLUDE "mpif.h" ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX, ALLOCATABLE :: val(:) INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX ! INCLUDE "p2p_mat.tpl" END SUBROUTINE p2p_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pwsmp_bsplines diff --git a/src/sparse_mod.f90 b/src/sparse_mod.f90 index 5bc289e..2b0dd78 100644 --- a/src/sparse_mod.f90 +++ b/src/sparse_mod.f90 @@ -1,899 +1,899 @@ !> !> @file sparse_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE sparse ! ! SPARSE: Implement sparse matrix using dynamic linked lists ! as matrix rows. ! ! T.M. Tran, CRPP-EPFL ! October 2010 ! IMPLICIT NONE ! TYPE elt INTEGER :: index=0 DOUBLE PRECISION :: val=0.0d0 TYPE(elt), POINTER :: next => NULL() END TYPE elt ! TYPE zelt INTEGER :: index=0 DOUBLE COMPLEX :: val=(0.0d0, 0.0d0) TYPE(zelt), POINTER :: next => NULL() END TYPE zelt ! TYPE sprow INTEGER :: nnz=0 ! Number of non zeros in a row TYPE(elt), POINTER :: row0 => NULL() ! Points to head of a (sparse) row END TYPE sprow ! TYPE zsprow INTEGER :: nnz=0 ! Number of non zeros in a row TYPE(zelt), POINTER :: row0 => NULL() ! Points to head of a (sparse) row END TYPE zsprow ! TYPE spmat INTEGER :: rank TYPE(sprow), POINTER :: row(:) => NULL() END TYPE spmat ! TYPE zspmat INTEGER :: rank TYPE(zsprow), POINTER :: row(:) => NULL() END TYPE zspmat ! INTERFACE init MODULE PROCEDURE init_spmat, init_zspmat END INTERFACE init ! INTERFACE updtmat MODULE PROCEDURE updt_sp, updt_zsp, updt_spmat, updt_zspmat END INTERFACE updtmat ! INTERFACE putele MODULE PROCEDURE putele_sp, putele_zsp, putele_spmat, putele_zspmat END INTERFACE putele ! INTERFACE getele MODULE PROCEDURE getele_sp, getele_zsp, getele_spmat, getele_zspmat END INTERFACE getele ! INTERFACE putrow MODULE PROCEDURE putrow_csr, putrow_full, putrow_spmat, & & putrow_zcsr, putrow_zfull, putrow_zspmat END INTERFACE putrow ! INTERFACE getrow MODULE PROCEDURE getrow_csr, getrow_full, getrow_spmat, & & getrow_zcsr, getrow_zfull, getrow_zspmat END INTERFACE getrow ! INTERFACE putcol MODULE PROCEDURE putcol_spmat, putcol_zspmat END INTERFACE putcol ! INTERFACE getcol MODULE PROCEDURE getcol_spmat, getcol_zspmat END INTERFACE getcol ! INTERFACE get_count MODULE PROCEDURE get_count_sp, get_count_spmat, & & get_count_zsp, get_count_zspmat END INTERFACE get_count ! INTERFACE destroy MODULE PROCEDURE destroy_spmat, destroy_row, destroy_node, & & destroy_zspmat, destroy_zrow, destroy_znode END INTERFACE destroy ! CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_spmat(n, mat, istart, iend) ! ! Initial an empty sparse matrix ! INTEGER, INTENT(in) :: n INTEGER, INTENT(in), OPTIONAL :: istart, iend TYPE(spmat) :: mat ! mat%rank = n IF(ASSOCIATED(mat%row)) DEALLOCATE(mat%row) IF(PRESENT(istart)) THEN ALLOCATE(mat%row(istart:iend)) ELSE ALLOCATE(mat%row(n)) END IF ! END SUBROUTINE init_spmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_zspmat(n, mat, istart, iend) ! ! Initial an empty sparse matrix ! INTEGER, INTENT(in) :: n INTEGER, INTENT(in), OPTIONAL :: istart, iend TYPE(zspmat) :: mat ! mat%rank = n IF(ASSOCIATED(mat%row)) DEALLOCATE(mat%row) IF(PRESENT(istart)) THEN ALLOCATE(mat%row(istart:iend)) ELSE ALLOCATE(mat%row(n)) END IF ! END SUBROUTINE init_zspmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_sp(arow, j, val) ! ! Update element j of row arow or insert it in an increasing "index" ! TYPE(sprow), TARGET :: arow INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(in) :: val ! TYPE(elt), TARGET :: pre_root TYPE(elt), POINTER :: t, p ! pre_root%next => arow%row0 ! pre_root is linked to the head of the list. t => pre_root DO WHILE( ASSOCIATED(t%next) ) p => t%next IF( p%index .EQ. j ) THEN p%val = p%val+val RETURN END IF IF( p%index .GT. j ) EXIT t => t%next END DO ALLOCATE(p) p = elt(j, val, t%next) t%next => p ! arow%nnz = arow%nnz+1 arow%row0 => pre_root%next ! In case the head is altered END SUBROUTINE updt_sp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_zsp(arow, j, val) ! ! Update element j of row arow or insert it in an increasing "index" ! TYPE(zsprow), TARGET :: arow INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(in) :: val ! TYPE(zelt), TARGET :: pre_root TYPE(zelt), POINTER :: t, p ! pre_root%next => arow%row0 ! pre_root is linked to the head of the list. t => pre_root DO WHILE( ASSOCIATED(t%next) ) p => t%next IF( p%index .EQ. j ) THEN p%val = p%val+val RETURN END IF IF( p%index .GT. j ) EXIT t => t%next END DO ALLOCATE(p) p = zelt(j, val, t%next) t%next => p ! arow%nnz = arow%nnz+1 arow%row0 => pre_root%next ! In case the head is altered END SUBROUTINE updt_zsp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_spmat(mat, i, j, val) ! ! Update element Aij of sparse matrix ! TYPE(spmat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val ! CALL updt_sp(mat%row(i), j, val) END SUBROUTINE updt_spmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_zspmat(mat, i, j, val) ! ! Update element Aij of sparse matrix ! TYPE(zspmat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val ! CALL updt_zsp(mat%row(i), j, val) END SUBROUTINE updt_zspmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_sp(arow, j, val, found) ! ! Get element j from row arow ! TYPE(sprow), TARGET :: arow INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(out) :: val LOGICAL, INTENT(out), OPTIONAL :: found ! TYPE(elt), POINTER :: t INTEGER :: i ! val = 0.0d0 t => arow%row0 ! Start of a row DO WHILE( ASSOCIATED(t) ) IF(t%index .EQ. j) THEN val = t%val IF(PRESENT(found)) found = .TRUE. RETURN END IF t => t%next END DO IF(PRESENT(found)) found = .FALSE. END SUBROUTINE getele_sp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_zsp(arow, j, val, found) ! ! Get element j from row arow ! TYPE(zsprow), TARGET :: arow INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(out) :: val LOGICAL, INTENT(out), OPTIONAL :: found ! TYPE(zelt), POINTER :: t INTEGER :: i ! val = 0.0d0 t => arow%row0 ! Start of a row DO WHILE( ASSOCIATED(t) ) IF(t%index .EQ. j) THEN val = t%val IF(PRESENT(found)) found = .TRUE. RETURN END IF t => t%next END DO IF(PRESENT(found)) found = .FALSE. END SUBROUTINE getele_zsp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_spmat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(spmat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(out) :: val ! CALL getele(mat%row(i), j, val) END SUBROUTINE getele_spmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_zspmat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(zspmat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(out) :: val ! CALL getele(mat%row(i), j, val) END SUBROUTINE getele_zspmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_sp(arow, j, val, nlforce_zero) ! ! Put (overwrite) element j of row arow or insert it in an increasing "index" ! TYPE(sprow), TARGET :: arow INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(in) :: val LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! TYPE(elt), TARGET :: pre_root TYPE(elt), POINTER :: t, p LOGICAL :: rmnode ! pre_root%next => arow%row0 ! pre_root is linked to the head of the list. t => pre_root ! ! Remove node which has zero val or not? ! But never create new node with zero val ! rmnode = .TRUE. IF(PRESENT(nlforce_zero)) rmnode = .NOT.nlforce_zero ! DO WHILE( ASSOCIATED(t%next) ) p => t%next IF( p%index .EQ. j ) THEN IF(ABS(val).LE.EPSILON(0.0d0) .AND. rmnode) THEN ! Remove the node for zero val! t%next => p%next arow%nnz = arow%nnz-1 arow%row0 => pre_root%next ! In case the head is altered DEALLOCATE(p) ELSE p%val = val END IF RETURN END IF IF( p%index .GT. j ) EXIT t => t%next END DO ! ! Never create new node with zero val ! IF(ABS(val).GT.EPSILON(0.0d0)) THEN ALLOCATE(p) p = elt(j, val, t%next) t%next => p arow%nnz = arow%nnz+1 arow%row0 => pre_root%next END IF END SUBROUTINE putele_sp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_zsp(arow, j, val, nlforce_zero) ! ! Put (overwrite) element j of row arow or insert it in an increasing "index" ! TYPE(zsprow), TARGET :: arow INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(in) :: val LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! TYPE(zelt), TARGET :: pre_root TYPE(zelt), POINTER :: t, p LOGICAL :: rmnode ! pre_root%next => arow%row0 ! pre_root is linked to the head of the list. t => pre_root ! ! Remove node which has zero val or not? ! But never create new node with zero val ! rmnode = .TRUE. IF(PRESENT(nlforce_zero)) rmnode = .NOT.nlforce_zero ! DO WHILE( ASSOCIATED(t%next) ) p => t%next IF( p%index .EQ. j ) THEN IF(ABS(val).LE.EPSILON(0.0d0) .AND. rmnode) THEN ! Remove the node for zero val! t%next => p%next arow%nnz = arow%nnz-1 arow%row0 => pre_root%next ! In case the head is altered DEALLOCATE(p) ELSE p%val = val END IF RETURN END IF IF( p%index .GT. j ) EXIT t => t%next END DO ! ! Never create new node with zero val ! IF(ABS(val).GT.EPSILON(0.0d0)) THEN ALLOCATE(p) p = zelt(j, val, t%next) t%next => p arow%nnz = arow%nnz+1 arow%row0 => pre_root%next END IF END SUBROUTINE putele_zsp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_spmat(mat, i, j, val, nlforce_zero) ! ! Put element (i,j) of matrix ! TYPE(spmat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! CALL putele(mat%row(i), j, val, nlforce_zero) END SUBROUTINE putele_spmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_zspmat(mat, i, j, val, nlforce_zero) ! ! Put element (i,j) of matrix ! TYPE(zspmat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! CALL putele(mat%row(i), j, val, nlforce_zero) END SUBROUTINE putele_zspmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_sp(arow) ! ! Number of elements in arow ! TYPE(sprow), INTENT(in) :: arow TYPE(elt), POINTER :: t INTEGER :: i ! t => arow%row0 ! Start of a row i = 0 DO WHILE( ASSOCIATED(t) ) i=i+1 t => t%next END DO get_count_sp = i END FUNCTION get_count_sp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_zsp(arow) ! ! Number of elements in arow ! TYPE(zsprow), INTENT(in) :: arow TYPE(zelt), POINTER :: t INTEGER :: i ! t => arow%row0 ! Start of a row i = 0 DO WHILE( ASSOCIATED(t) ) i=i+1 t => t%next END DO get_count_zsp = i END FUNCTION get_count_zsp !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_spmat(mat, nnz) ! ! Number of non-zeros in sparse matrix ! TYPE(spmat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) ! INTEGER :: i, c(LBOUND(mat%row,1):UBOUND(mat%row,1)) DO i=LBOUND(mat%row,1),UBOUND(mat%row,1) c(i) = get_count_sp(mat%row(i)) END DO IF(PRESENT(nnz)) nnz = c get_count_spmat = SUM(c) END FUNCTION get_count_spmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_zspmat(mat, nnz) ! ! Number of non-zeros in sparse matrix ! TYPE(zspmat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) ! INTEGER :: i, c(LBOUND(mat%row,1):UBOUND(mat%row,1)) DO i=LBOUND(mat%row,1),UBOUND(mat%row,1) c(i) = get_count_zsp(mat%row(i)) END DO IF(PRESENT(nnz)) nnz = c get_count_zspmat = SUM(c) END FUNCTION get_count_zspmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_csr(arow, arr, col, count) ! ! Get a row from sparse row arow and put it in a CSR format ! TYPE(sprow), INTENT(in) :: arow DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER, INTENT(out) :: col(:) INTEGER, OPTIONAL, INTENT(out) :: count ! TYPE(elt), POINTER :: t INTEGER :: i ! t => arow%row0 ! Start of a row i = 0 DO WHILE( ASSOCIATED(t) ) i=i+1 col(i) = t%index arr(i) = t%val t => t%next END DO IF(PRESENT(count)) count = i END SUBROUTINE getrow_csr !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_zcsr(arow, arr, col, count) ! ! Get a row from sparse row arow and put it in a CSR format ! TYPE(zsprow), INTENT(in) :: arow DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER, INTENT(out) :: col(:) INTEGER, OPTIONAL, INTENT(out) :: count ! TYPE(zelt), POINTER :: t INTEGER :: i ! t => arow%row0 ! Start of a row i = 0 DO WHILE( ASSOCIATED(t) ) i=i+1 col(i) = t%index arr(i) = t%val t => t%next END DO IF(PRESENT(count)) count = i END SUBROUTINE getrow_zcsr !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_full(arow, arr, count) ! ! Get a row from sparse row arow and put it in an full row ! TYPE(sprow), INTENT(in) :: arow DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER, OPTIONAL, INTENT(out) :: count ! TYPE(elt), POINTER :: t INTEGER :: n, i, j ! n = SIZE(arr) arr = 0.0d0 t => arow%row0 ! Start of a row i = 0 DO WHILE( ASSOCIATED(t) ) i=i+1 j = t%index IF(j.LE.n) THEN arr(j) = t%val t => t%next ELSE WRITE(*,'(a)') 'GETROW_FULL: size of input ARR too small!' STOP END IF END DO IF(PRESENT(count)) count = i END SUBROUTINE getrow_full !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_zfull(arow, arr, count) ! ! Get a row from sparse row arow and put it in an full row ! TYPE(zsprow), INTENT(in) :: arow DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER, OPTIONAL, INTENT(out) :: count ! TYPE(zelt), POINTER :: t INTEGER :: n, i, j ! n = SIZE(arr) arr = 0.0d0 t => arow%row0 ! Start of a row i = 0 DO WHILE( ASSOCIATED(t) ) i=i+1 j = t%index IF(j.LE.n) THEN arr(j) = t%val t => t%next ELSE WRITE(*,'(a)') 'GETROW_FULL: size of input ARR too small!' STOP END IF END DO IF(PRESENT(count)) count = i END SUBROUTINE getrow_zfull !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_spmat(mat, i, arr, col) ! ! Get a row from sparse matrix ! TYPE(spmat), INTENT(in) :: mat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER, INTENT(out), OPTIONAL :: col(:) ! IF(PRESENT(col)) THEN ! The output row is defined by (col, arr) CALL getrow_csr(mat%row(i), arr, col) ELSE CALL getrow_full(mat%row(i), arr) END IF END SUBROUTINE getrow_spmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_zspmat(mat, i, arr, col) ! ! Get a row from sparse matrix ! TYPE(zspmat), INTENT(in) :: mat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER, INTENT(out), OPTIONAL :: col(:) ! IF(PRESENT(col)) THEN ! The output row is defined by (col, arr) CALL getrow_zcsr(mat%row(i), arr, col) ELSE CALL getrow_zfull(mat%row(i), arr) END IF END SUBROUTINE getrow_zspmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_csr(arow, arr, col, nlforce_zero) ! ! Put a row from sparse row arow and put it in a CSR format ! TYPE(sprow), INTENT(inout) :: arow DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER, INTENT(in) :: col(:) LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! INTEGER :: n, i ! n=SIZE(arr) DO i=1,n CALL putele(arow, col(i), arr(i), nlforce_zero) END DO END SUBROUTINE putrow_csr !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_zcsr(arow, arr, col, nlforce_zero) ! ! Put a row from sparse row arow and put it in a CSR format ! TYPE(zsprow), INTENT(inout) :: arow DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER, INTENT(in) :: col(:) LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! INTEGER :: n, i ! n=SIZE(arr) DO i=1,n CALL putele(arow, col(i), arr(i), nlforce_zero) END DO END SUBROUTINE putrow_zcsr !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_full(arow, arr, nlforce_zero) ! ! Put a row from sparse row arow and put it in a full row ! TYPE(sprow), INTENT(inout) :: arow DOUBLE PRECISION, INTENT(in) :: arr(:) LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! INTEGER :: n, i ! n=SIZE(arr) DO i=1,n CALL putele(arow, i, arr(i), nlforce_zero) END DO END SUBROUTINE putrow_full !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_zfull(arow, arr, nlforce_zero) ! ! Put a row from sparse row arow and put it in a full row ! TYPE(zsprow), INTENT(inout) :: arow DOUBLE COMPLEX, INTENT(in) :: arr(:) LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! INTEGER :: n, i ! n=SIZE(arr) DO i=1,n CALL putele(arow, i, arr(i), nlforce_zero) END DO END SUBROUTINE putrow_zfull !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_spmat(mat, i, arr, col, nlforce_zero) ! ! Put a row to matrix ! TYPE(spmat) :: mat INTEGER, intent(in) :: i DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER, INTENT(in), OPTIONAL :: col(:) LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! IF(PRESENT(col)) THEN ! The input row is defined by (col, arr) CALL putrow_csr(mat%row(i), arr, col, nlforce_zero) ELSE CALL putrow_full(mat%row(i), arr, nlforce_zero) END IF END SUBROUTINE putrow_spmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_zspmat(mat, i, arr, col, nlforce_zero) ! ! Put a row to matrix ! TYPE(zspmat) :: mat INTEGER, intent(in) :: i DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER, INTENT(in), OPTIONAL :: col(:) LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! IF(PRESENT(col)) THEN ! The input row is defined by (col, arr) CALL putrow_zcsr(mat%row(i), arr, col, nlforce_zero) ELSE CALL putrow_zfull(mat%row(i), arr, nlforce_zero) END IF END SUBROUTINE putrow_zspmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_spmat(mat, j, arr, nlforce_zero) ! ! Put a column to mtarix ! TYPE(spmat) :: mat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(in) :: arr(:) LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! INTEGER :: i DO i=1,mat%rank CALL putele(mat, i, j, arr(i), nlforce_zero) END DO END SUBROUTINE putcol_spmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_zspmat(mat, j, arr, nlforce_zero) ! ! Put a column to mtarix ! TYPE(zspmat) :: mat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(in) :: arr(:) LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero ! INTEGER :: i DO i=1,mat%rank CALL putele(mat, i, j, arr(i), nlforce_zero) END DO END SUBROUTINE putcol_zspmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_spmat(mat, j, arr) ! ! Get column j of matrix ! TYPE(spmat) :: mat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: i DO i=1,mat%rank CALL getele(mat, i, j, arr(i)) END DO END SUBROUTINE getcol_spmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_zspmat(mat, j, arr) ! ! Get column j of matrix ! TYPE(zspmat) :: mat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER :: i DO i=1,mat%rank CALL getele(mat, i, j, arr(i)) END DO END SUBROUTINE getcol_zspmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_row(arow) ! ! Deallocate the sparse row ! TYPE(sprow), INTENT(inout) :: arow ! IF(ASSOCIATED(arow%row0)) CALL destroy_node(arow%row0) arow%nnz = get_count(arow) END SUBROUTINE destroy_row !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_zrow(arow) ! ! Deallocate the sparse row ! TYPE(zsprow), INTENT(inout) :: arow ! IF(ASSOCIATED(arow%row0)) CALL destroy_znode(arow%row0) arow%nnz = get_count(arow) END SUBROUTINE destroy_zrow !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ RECURSIVE SUBROUTINE destroy_node(p) ! ! Deallocate recursively the linked list ! TYPE(elt), POINTER :: p ! IF(ASSOCIATED(p%next)) CALL destroy_node(p%next) DEALLOCATE(p) END SUBROUTINE destroy_node !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ RECURSIVE SUBROUTINE destroy_znode(p) ! ! Deallocate recursively the linked list ! TYPE(zelt), POINTER :: p ! IF(ASSOCIATED(p%next)) CALL destroy_znode(p%next) DEALLOCATE(p) END SUBROUTINE destroy_znode !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_spmat(mat) ! ! Deallocate the sparse matrix ! TYPE(spmat) :: mat INTEGER :: n, i ! n = mat%rank DO i=LBOUND(mat%row,1),UBOUND(mat%row,1) CALL destroy(mat%row(i)) END DO DEALLOCATE(mat%row) END SUBROUTINE destroy_spmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_zspmat(mat) ! ! Deallocate the sparse matrix ! TYPE(zspmat) :: mat INTEGER :: n, i ! n = mat%rank DO i=LBOUND(mat%row,1),UBOUND(mat%row,1) CALL destroy(mat%row(i)) END DO DEALLOCATE(mat%row) END SUBROUTINE destroy_zspmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION isearch(karr, k) ! ! Sequential search an ordered table of integers ! INTEGER, INTENT(in) :: karr(0:) INTEGER, INTENT(in) :: k INTEGER :: n ! n=SIZE(karr) isearch = -1 ! Failure IF( k.GT.karr(n-1)) RETURN ! isearch=0 DO IF( k.LE.karr(isearch)) EXIT isearch = isearch+1 END DO IF( k.NE.karr(isearch)) isearch = -1 ! Failure END FUNCTION isearch !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION isearch_bin(karr, k) ! ! Binary search an ordered table of integers ! INTEGER, INTENT(in) :: karr(0:) INTEGER, INTENT(in) :: k INTEGER :: n INTEGER :: l, u ! n=SIZE(karr) isearch_bin = -1 ! Failure IF( k.LT.karr(0) .OR. k.GT.karr(n-1)) RETURN ! l=0; u=n-1 DO WHILE(l.LE.u) isearch_bin = (l+u)/2 IF(k.EQ.karr(isearch_bin)) THEN RETURN ELSE IF(k.LT.karr(isearch_bin)) THEN u = isearch_bin-1 ELSE l = isearch_bin+1 END IF END DO isearch_bin = -1 ! Failure END FUNCTION isearch_bin ! END MODULE sparse diff --git a/src/tsparse3.f90 b/src/tsparse3.f90 index 0e75703..1f23792 100644 --- a/src/tsparse3.f90 +++ b/src/tsparse3.f90 @@ -1,705 +1,705 @@ !> !> @file tsparse3.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Solving the 2d PDE using splines and WSMP non-symmetric matrix ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! MODULE pde2d_mod USE bsplines USE wsmp_bsplines IMPLICIT NONE CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(wsmp_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) left2 = j CALL basfun(xg2, spl%sp2, fun2, left2) ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 !------------- contrib = 0.0d0 DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,ig1,ig2) * & & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) END DO END DO END DO CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(wsmp_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) zsum(:) = zsum(:) + arr(:) END DO CALL putrow(mat, ny, zsum) ! ! The horizontal sum on the NY-th column ! zsum = 0.0d0 DO j=1,ny arr = 0.0d0 CALL getcol(mat, j, arr) zsum(ny:) = zsum(ny:) + arr(ny:) END DO CALL putcol(mat, ny, zsum) ! ! The away operator ! DO j = 1,ny-1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO j = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(j) = 1.0d0 CALL putcol(mat, j, arr) END DO ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_mod PROGRAM main USE pde2d_mod USE futils ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol TYPE(spline2d) :: splxy TYPE(wsmp_mat) :: mat ! CHARACTER(len=128) :: file='pde2d_wsmp.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 DOUBLE PRECISION :: tconv, treord INTEGER :: nits=100 LOGICAL :: nlmetis, nlforce_zero ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, & & nlforce_zero, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlmetis = .FALSE. ! Use metis ordering or minimum degree nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix WRITE(*,'(a,i8)') 'nrank', nrank ! CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero) CALL dismat(splxy, mat) ALLOCATE(arr(nrank)) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr END IF ! ! BC on Matrix ! WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) CALL ibcmat(mat, ny) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr WRITE(*,'(a)') 'Last rows' DO i=nrank-ny,nrank CALL getrow(mat, i, arr) WRITE(*,'(10(1pe12.3))') arr END DO END IF tmat = seconds() - t0 ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, ny) ! CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL to_mat(mat) tconv = seconds() -t0 WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() ! t0 = seconds() CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.) CALL putmat(fid, '/MAT', mat) treord = seconds() - t0 ! t0 = seconds() CALL numfact(mat, debug=.FALSE.) tfact = seconds() - t0 WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() ! CALL bsolve(mat, rhs, sol, debug=.FALSE.) t0 = seconds() DO it=1,nits ! nits iterations for timing CALL bsolve(mat, rhs, sol) sol(1:ny-1) = sol(ny) END DO WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(26) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() tsolv = (seconds() - t0)/REAL(nits) ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 !=========================================================================== ! 5.0 Clear the matrix and recompute ! WRITE(*,'(/a)') 'Recompute the solver ...' t0 = seconds() CALL clear_mat(mat) CALL dismat(splxy, mat) CALL ibcmat(mat, ny) tmat = seconds()-t0 ! t0 = seconds() CALL numfact(mat, debug=.FALSE.) tfact = seconds()-t0 ! t0 = seconds() ALLOCATE(newsol(nrank)) CALL bsolve(mat, rhs, newsol) newsol(1:ny-1) = newsol(ny) tsolv = seconds()-t0 ! WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv ! DEALLOCATE(newsol) !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) CALL destroy_sp(splxy) CALL destroy(mat) ! CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/src/tsparse4.f90 b/src/tsparse4.f90 index 40b900f..b328bbe 100644 --- a/src/tsparse4.f90 +++ b/src/tsparse4.f90 @@ -1,722 +1,722 @@ !> !> @file tsparse4.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! Solving the 2d PDE using splines and PARDISO non-symmetric matrix ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my) ! MODULE pde2d_mod USE bsplines USE pardiso_bsplines IMPLICIT NONE CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE dismat(spl, mat) ! ! Assembly of FE matrix mat using spline spl ! TYPE(spline2d), INTENT(in) :: spl TYPE(pardiso_mat), INTENT(inout) :: mat ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2 INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) DOUBLE PRECISION:: contrib ! INTEGER :: kterms ! Number of terms in weak form INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form INTEGER, ALLOCATABLE :: left1(:), left2(:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! !=========================================================================== ! 2.0 Assembly loop ! ALLOCATE(left1(ng1)) ALLOCATE(left2(ng2)) DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) left1 = i CALL basfun(xg1, spl%sp1, fun1, left1) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) left2 = j CALL basfun(xg2, spl%sp2, fun2, left2) ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1), xg2(ig2), & & idert(:,:,ig1,ig2), & & iderw(:,:,ig1,ig2), & & coefs(:,ig1,ig2)) END DO END DO ! DO iw1=0,nidbas1 ! Weight function in dir 1 igw1 = i+iw1 DO iw2=0,nidbas2 ! Weight function in dir 2 igw2 = MODULO(j+iw2-1, n2) + 1 irow = igw2 + (igw1-1)*n2 DO it1=0,nidbas1 ! Test function in dir 1 igt1 = i+it1 DO it2=0,nidbas2 ! Test function in dir 2 igt2 = MODULO(j+it2-1, n2) + 1 jcol = igt2 + (igt1-1)*n2 !------------- contrib = 0.0d0 DO ig1=1,ng1 DO ig2=1,ng2 DO iterm=1,kterms contrib = contrib + & & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & & coefs(iterm,ig1,ig2) * & & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & & wg1(ig1) * wg2(ig2) END DO END DO END DO CALL updtmat(mat, irow, jcol, contrib) !------------- END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(left1,left2) ! CONTAINS SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) ! ! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy ! c(1) = x ! idt(1,1) = 1 idt(1,2) = 0 idw(1,1) = 1 idw(1,2) = 0 ! c(2) = 1.d0/x idt(2,1) = 0 idt(2,2) = 1 idw(2,1) = 0 idw(2,2) = 1 END SUBROUTINE coefeq END SUBROUTINE dismat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE disrhs(mbess, spl, rhs) ! ! Assembly the RHS using 2d spline spl ! INTEGER, INTENT(in) :: mbess TYPE(spline2d), INTENT(in) :: spl DOUBLE PRECISION, INTENT(out) :: rhs(:) INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: n2, nidbas2, ndim2, ng2 INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) DOUBLE PRECISION:: contrib !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) ! ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) ! ! Gauss quadature ! CALL get_gauss(spl%sp1, ng1) CALL get_gauss(spl%sp2, ng2) ALLOCATE(xg1(ng1), wg1(ng1)) ALLOCATE(xg2(ng1), wg2(ng1)) !=========================================================================== ! 2.0 Assembly loop ! nrank = SIZE(rhs) rhs(1:nrank) = 0.0d0 ! DO i=1,n1 CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) DO ig1=1,ng1 CALL basfun(xg1(ig1), spl%sp1, fun1, i) DO j=1,n2 CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) DO ig2=1,ng2 CALL basfun(xg2(ig2), spl%sp2, fun2, j) contrib = wg1(ig1)*wg2(ig2) * & & rhseq(xg1(ig1),xg2(ig2), mbess) DO k1=0,nidbas1 i1 = i+k1 DO k2=0,nidbas2 j2 = MODULO(j+k2-1,n2) + 1 ij = j2 + (i1-1)*n2 rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) END DO END DO END DO END DO END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xg1, wg1, fun1) DEALLOCATE(xg2, wg2, fun2) ! CONTAINS DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) DOUBLE PRECISION, INTENT(in) :: x1, x2 INTEGER, INTENT(in) :: m rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) END FUNCTION rhseq END SUBROUTINE disrhs ! !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcmat(mat, ny) ! ! Apply BC on matrix ! TYPE(pardiso_mat), INTENT(inout) :: mat INTEGER, INTENT(in) :: ny INTEGER :: nrank, i, j DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) !=========================================================================== ! 1.0 Prologue ! nrank = mat%rank ALLOCATE(zsum(nrank), arr(nrank)) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum on the NY-th row ! zsum = 0.0d0 DO i=1,ny arr = 0.0d0 CALL getrow(mat, i, arr) zsum(:) = zsum(:) + arr(:) END DO zsum(ny) = SUM(zsum(1:ny)) ! using symmetry CALL putrow(mat, ny, zsum) ! ! The away operator ! DO i = 1,ny-1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO ! !=========================================================================== ! 3.0 Dirichlet on right boundary ! DO i = nrank, nrank-ny+1, -1 arr = 0.0d0; arr(i) = 1.0d0 CALL putrow(mat, i, arr) END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(zsum, arr) ! END SUBROUTINE ibcmat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE ibcrhs(rhs, ny) ! ! Apply BC on RHS ! DOUBLE PRECISION, INTENT(inout) :: rhs(:) INTEGER, INTENT(in) :: ny INTEGER :: nrank DOUBLE PRECISION :: zsum !=========================================================================== ! 1.0 Prologue ! nrank = SIZE(rhs,1) !=========================================================================== ! 2.0 Unicity at the axis ! ! The vertical sum ! zsum = SUM(rhs(1:ny)) rhs(ny) = zsum rhs(1:ny-1) = 0.0d0 !=========================================================================== ! 3.0 Dirichlet on right boundary ! rhs(nrank-ny+1:nrank) = 0.0d0 END SUBROUTINE ibcrhs !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE pde2d_mod PROGRAM main USE pde2d_mod USE futils ! IMPLICIT NONE INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms LOGICAL :: nlppform INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it DOUBLE PRECISION :: pi, coefx(5), coefy(5) DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol TYPE(spline2d) :: splxy TYPE(pardiso_mat) :: mat, newmat ! CHARACTER(len=128) :: file='pde2d_sym_pardiso.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 DOUBLE PRECISION :: tconv, treord INTEGER :: nits=100 LOGICAL :: nlmetis, nlforce_zero, nlpos ! NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, & & nlpos, nlforce_zero, coefx, coefy !=========================================================================== ! 1.0 Prologue ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nidbas = (/3,3/) ! Degree of splines ngauss = (/4,4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not nlmetis = .FALSE. ! Use metis ordering or minimum degree nlforce_zero = .TRUE. ! Remove existing nodes with zero val in putele/row/ele nlpos = .TRUE. ! Matrix is positive definite coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! READ(*,newrun) WRITE(*,newrun) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny)) xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi CALL meshdist(coefy, ygrid, ny) ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'MBESS', mbess) !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, & & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) ! ! FE matrix assembly ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix WRITE(*,'(a,i8)') 'nrank', nrank ! CALL init(nrank, nterms, mat, nlsym=.FALSE., nlpos=nlpos, & & nlforce_zero=nlforce_zero) CALL dismat(splxy, mat) ALLOCATE(arr(nrank)) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr END IF ! ! BC on Matrix ! WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) CALL ibcmat(mat, ny) IF(nrank.LT.100) THEN DO i=1,nrank CALL getele(mat, i, i, arr(i)) END DO WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr WRITE(*,'(a)') 'Last rows' DO i=nrank-ny,nrank CALL getrow(mat, i, arr) WRITE(*,'(10(1pe12.3))') arr END DO END IF tmat = seconds() - t0 ! ! RHS assembly ! ALLOCATE(rhs(nrank), sol(nrank)) CALL disrhs(mbess, splxy, rhs) ! ! BC on RHS ! CALL ibcrhs(rhs, ny) ! CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL to_mat(mat) tconv = seconds() -t0 WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() ! t0 = seconds() CALL reord_mat(mat) CALL putmat(fid, '/MAT', mat) treord = seconds() - t0 ! t0 = seconds() CALL numfact(mat) tfact = seconds() - t0 WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) WRITE(*,'(a,i12)') 'Number of factorization MFLOPS = ',mat%p%iparm(19) gflops1 = mat%p%iparm(19) / tfact / 1.d3 ! CALL bsolve(mat, rhs, sol) WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol)) t0 = seconds() DO it=1,nits ! nits iterations for timing CALL bsolve(mat, rhs, sol) sol(1:ny-1) = sol(ny) END DO WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() tsolv = (seconds() - t0)/REAL(nits) ! ! Spline coefficients, taking into account of periodicity in y ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 bcoef(i,j) = sol(ij) END DO END DO WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) DO i=0,nx DO j=0,ny solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) END DO END DO jder = (/0,0/) ! ! Compute PPFORM/BCOEFS at first call to gridval CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) ! WRITE(*,'(/a)') '*** Checking solutions' t0 = seconds() DO it=1,nits ! nits iterations for timing CALL gridval(splxy, xgrid, ygrid, solcal, jder) END DO tgrid = (seconds() - t0)/REAL(nits) errsol = solana - solcal IF( SIZE(bcoef,2) .LE. 10 ) THEN CALL prnmat('BCOEF', bcoef) CALL prnmat('SOLANA', solana) CALL prnmat('SOLCAL', solcal) CALL prnmat('ERRSOL', errsol) END IF WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & norm2(errsol) / norm2(solana) WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid ! CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/sol', solcal, 'Solutions') CALL putarr(fid, '/solana', solana,'Exact solutions') CALL putarr(fid, '/errors', errsol, 'Errors') ! ! Check derivatives d/dx and d/dy ! WRITE(*,'(/a)') '*** Checking gradient' DO i=0,nx DO j=0,ny IF( mbess .EQ. 0 ) THEN solana(i,j) = -2.0d0 * xgrid(i) ELSE solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) END IF END DO END DO ! jder = (/1,0/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) errsol = solana - solcal CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) ! DO i=0,nx DO j=0,ny solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) END DO END DO ! jder = (/0,1/) CALL gridval(splxy, xgrid, ygrid, solcal, jder) CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') errsol = solana - solcal WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) ! WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 !=========================================================================== ! 5.0 Clear the matrix and recompute ! WRITE(*,'(/a)') 'Recompute the solver ...' t0 = seconds() CALL clear_mat(mat) CALL dismat(splxy, mat) CALL ibcmat(mat, ny) tmat = seconds()-t0 ! t0 = seconds() CALL numfact(mat) tfact = seconds()-t0 gflops1 = mat%p%iparm(19) / tfact / 1.d3 ! t0 = seconds() ALLOCATE(newsol(nrank)) CALL bsolve(mat, rhs, newsol) newsol(1:ny-1) = newsol(ny) tsolv = seconds()-t0 ! WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 ! !=========================================================================== ! 6.0 Another matrix to solve ! WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol)) !!$ PRINT*, 'current/last matid, matid', current_matid, last_matid, mat%matid ! WRITE(*,'(/a)') ' Another solver ...' ! CALL init(nrank, nterms, newmat, nlsym=.FALSE., nlpos=nlpos, & & nlforce_zero=nlforce_zero) CALL mcopy(mat, newmat) !!$ CALL clear_mat(newmat) !!$ CALL maddto(newmat, 1000.0d0, mat) CALL factor(newmat) !!$ CALL dismat(splxy, newmat) !!$ CALL ibcmat(newmat, ny) !!$ CALL to_mat(newmat) !!$ CALL reord_mat(newmat) !!$ CALL numfact(newmat) CALL bsolve(newmat, rhs, newsol) WRITE(*,'(/a, 1pe16.8)') 'Norm of newsol =', SQRT(DOT_PRODUCT(newsol,newsol)) !!$ PRINT*, 'current/last matid, matid', current_matid, last_matid, newmat%matid ! CALL bsolve(mat, rhs, sol) WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol)) !!$ PRINT*, 'current/last matid, matid', current_matid, last_matid, mat%matid ! !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(xgrid, rhs, sol) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(bcoef) DEALLOCATE(arr) DEALLOCATE(newsol) CALL destroy_sp(splxy) CALL destroy(mat) CALL destroy(newmat) ! CALL closef(fid) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:) DOUBLE PRECISION :: sum2 INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) sum2 = sum2 + x(i,j)**2 END DO END DO norm2 = SQRT(sum2) END FUNCTION norm2 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++ SUBROUTINE meshdist(c, x, nx) ! ! Construct an 1d non-equidistant mesh given a ! mesh distribution function. ! IMPLICIT NONE DOUBLE PRECISION, INTENT(in) :: c(5) INTEGER, INTENT(iN) :: nx DOUBLE PRECISION, INTENT(inout) :: x(0:nx) INTEGER :: nintg DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint DOUBLE PRECISION :: a, b, dx, f0, f1, scal INTEGER :: i, k ! a=x(0) b=x(nx) nintg = 10*nx ALLOCATE(xint(0:nintg), fint(0:nintg)) ! ! Mesh distribution ! dx = (b-a)/REAL(nintg) xint(0) = a fint(0) = 0.0d0 f1 = fdist(xint(0)) DO i=1,nintg f0 = f1 xint(i) = xint(i-1) + dx f1 = fdist(xint(i)) fint(i) = fint(i-1) + 0.5*(f0+f1) END DO ! ! Normalization ! scal = REAL(nx) / fint(nintg) fint(0:nintg) = fint(0:nintg) * scal !!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint ! ! Obtain mesh point by (inverse) interpolation ! k = 1 DO i=1,nintg-1 IF( fint(i) .GE. REAL(k) ) THEN x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & & (k-fint(i)) k = k+1 END IF END DO ! DEALLOCATE(xint, fint) CONTAINS DOUBLE PRECISION FUNCTION fdist(x) DOUBLE PRECISION, INTENT(in) :: x fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) END FUNCTION fdist END SUBROUTINE meshdist !+++ diff --git a/src/wsmp_mod.f90 b/src/wsmp_mod.f90 index 8026640..ff71b7f 100644 --- a/src/wsmp_mod.f90 +++ b/src/wsmp_mod.f90 @@ -1,1835 +1,1835 @@ !> !> @file wsmp_mod.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> MODULE wsmp_bsplines ! ! WSMP_BSPLINES: Simple interface to the sparse direct solver WSMP. ! ! T.M. Tran, CRPP-EPFL ! November 2011 ! USE sparse IMPLICIT NONE ! INTEGER, SAVE :: current_matid = -1 INTEGER, SAVE :: last_matid = -1 ! TYPE wsmp_param INTEGER :: iparm(64) DOUBLE PRECISION :: dparm(64) END TYPE wsmp_param ! TYPE wsmp_mat INTEGER :: matid=-1 INTEGER :: rank=0, nnz INTEGER :: nterms, kmat, nrhs LOGICAL :: nlsym LOGICAL :: nlpos LOGICAL :: nlforce_zero TYPE(spmat), POINTER :: mat => NULL() INTEGER, POINTER :: cols(:) => NULL() INTEGER, POINTER :: irow(:) => NULL() INTEGER, POINTER :: perm(:) => NULL() INTEGER, POINTER :: invp(:) => NULL() INTEGER, POINTER :: mrp(:) => NULL() DOUBLE PRECISION, POINTER :: diag(:) => NULL() DOUBLE PRECISION, POINTER :: val(:) => NULL() DOUBLE PRECISION, POINTER :: aux(:) => NULL() TYPE(wsmp_param) :: p END TYPE wsmp_mat ! TYPE zwsmp_mat INTEGER :: matid=-1 INTEGER :: rank=0, nnz INTEGER :: nterms, kmat, nrhs LOGICAL :: nlherm LOGICAL :: nlsym LOGICAL :: nlpos LOGICAL :: nlforce_zero TYPE(zspmat), POINTER :: mat => NULL() INTEGER, POINTER :: cols(:) => NULL() INTEGER, POINTER :: irow(:) => NULL() INTEGER, POINTER :: perm(:) => NULL() INTEGER, POINTER :: invp(:) => NULL() INTEGER, POINTER :: mrp(:) => NULL() DOUBLE COMPLEX, POINTER :: diag(:) => NULL() DOUBLE COMPLEX, POINTER :: val(:) => NULL() DOUBLE COMPLEX, POINTER :: aux(:) => NULL() TYPE(wsmp_param) :: p END TYPE zwsmp_mat ! INTERFACE init MODULE PROCEDURE init_wsmp_mat, init_zwsmp_mat END INTERFACE init ! INTERFACE check_mat MODULE PROCEDURE check_wsmp_mat, check_zwsmp_mat END INTERFACE check_mat ! INTERFACE clear_mat MODULE PROCEDURE clear_wsmp_mat, clear_zwsmp_mat END INTERFACE clear_mat ! INTERFACE updtmat MODULE PROCEDURE updt_wsmp_mat, updt_zwsmp_mat END INTERFACE updtmat ! INTERFACE putele MODULE PROCEDURE putele_wsmp_mat, putele_zwsmp_mat END INTERFACE putele ! INTERFACE getele MODULE PROCEDURE getele_wsmp_mat, getele_zwsmp_mat END INTERFACE getele ! INTERFACE putrow MODULE PROCEDURE putrow_wsmp_mat, putrow_zwsmp_mat END INTERFACE putrow ! INTERFACE getrow MODULE PROCEDURE getrow_wsmp_mat, getrow_zwsmp_mat END INTERFACE getrow ! INTERFACE putcol MODULE PROCEDURE putcol_wsmp_mat, putcol_zwsmp_mat END INTERFACE putcol ! INTERFACE getcol MODULE PROCEDURE getcol_wsmp_mat, getcol_zwsmp_mat END INTERFACE getcol ! INTERFACE get_count MODULE PROCEDURE get_count_wsmp_mat, get_count_zwsmp_mat END INTERFACE get_count ! INTERFACE to_mat MODULE PROCEDURE to_wsmp_mat, to_zwsmp_mat END INTERFACE to_mat ! INTERFACE reord_mat MODULE PROCEDURE reord_wsmp_mat, reord_zwsmp_mat END INTERFACE reord_mat ! INTERFACE numfact MODULE PROCEDURE numfact_wsmp_mat, numfact_zwsmp_mat END INTERFACE numfact ! INTERFACE factor MODULE PROCEDURE factor_wsmp_mat, factor_zwsmp_mat END INTERFACE factor ! INTERFACE bsolve MODULE PROCEDURE bsolve_wsmp_mat1, bsolve_wsmp_matn, & & bsolve_zwsmp_mat1, bsolve_zwsmp_matn END INTERFACE bsolve ! INTERFACE vmx MODULE PROCEDURE vmx_wsmp_mat, vmx_wsmp_matn, & & vmx_zwsmp_mat, vmx_zwsmp_matn END INTERFACE vmx ! INTERFACE destroy MODULE PROCEDURE destroy_wsmp_mat, destroy_zwsmp_mat END INTERFACE destroy ! INTERFACE putmat MODULE PROCEDURE put_wsmp_mat, put_zwsmp_mat END INTERFACE putmat ! INTERFACE getmat MODULE PROCEDURE get_wsmp_mat, get_zwsmp_mat END INTERFACE getmat ! INTERFACE mcopy MODULE PROCEDURE mcopy_wsmp_mat, mcopy_zwsmp_mat END INTERFACE mcopy ! INTERFACE maddto MODULE PROCEDURE maddto_wsmp_mat, maddto_zwsmp_mat END INTERFACE maddto ! INTERFACE psum_mat MODULE PROCEDURE psum_wsmp_mat, psum_zwsmp_mat END INTERFACE psum_mat ! INTERFACE p2p_mat MODULE PROCEDURE p2p_wsmp_mat, p2p_zwsmp_mat END INTERFACE p2p_mat ! CONTAINS !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_wsmp_mat(n, nterms, mat, kmat, nlsym, nlpos, & & nlforce_zero) ! ! Initialize an empty sparse wsmp matrix ! INTEGER, INTENT(in) :: n, nterms TYPE(wsmp_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlpos LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero INTEGER :: info INTEGER :: idummy = 0 DOUBLE PRECISION :: dummy = 0.0d0 ! ! Store away (valid) current matrix id ! IF(current_matid .GE. 0) THEN CALL wstoremat(current_matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i4)') 'INIT: WSTOREMAT failed WITH error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF last_matid = last_matid+1 mat%matid = last_matid current_matid = mat%matid ! ! Initialize sparse matrice structure ! mat%rank = n mat%nterms = nterms mat%nnz = 0 mat%nlsym = .FALSE. mat%nlpos = .TRUE. mat%nrhs = 1 mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val IF(PRESENT(kmat)) mat%kmat = kmat IF(PRESENT(nlsym)) mat%nlsym = nlsym IF(PRESENT(nlpos)) mat%nlpos = nlpos IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) ALLOCATE(mat%mat) CALL init(n, mat%mat) ! ! Fill 'iparm' and 'dparm' with default values ! mat%p%iparm(1:3) = 0 IF(mat%nlsym) THEN CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) IF(mat%nlpos) THEN mat%p%iparm(31) = 0 ELSE !!$ mat%p%iparm(31) = 1 ! LDL^T without pivoting mat%p%iparm(31) = 2 ! LDL^T with pivoting END IF ELSE CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i4)') 'INIT: Initialization failed with error', & & mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF !!$ WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm !!$ WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm ! CALL setup_wsmp(mat%p%iparm, mat%p%dparm) ! CONTAINS SUBROUTINE setup_wsmp(iparm, dparm) INTEGER :: iparm(:) DOUBLE PRECISION :: dparm(:) END SUBROUTINE setup_wsmp END SUBROUTINE init_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE init_zwsmp_mat(n, nterms, mat, kmat, nlsym, nlherm, & & nlpos, nlforce_zero) ! ! Initialize an empty sparse wsmp matrix ! INTEGER, INTENT(in) :: n, nterms TYPE(zwsmp_mat) :: mat INTEGER, OPTIONAL, INTENT(in) :: kmat LOGICAL, OPTIONAL, INTENT(in) :: nlsym LOGICAL, OPTIONAL, INTENT(in) :: nlherm LOGICAL, OPTIONAL, INTENT(in) :: nlpos LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero INTEGER :: info INTEGER :: idummy = 0 DOUBLE COMPLEX :: dummy = 0.0d0 ! ! Store away (valid) current matrix id ! IF(current_matid .GE. 0) THEN CALL wstoremat(current_matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i4)') 'INIT: WSTOREMAT failed WITH error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF last_matid = last_matid+1 mat%matid = last_matid current_matid = mat%matid ! ! Initialize sparse matrice structure ! mat%rank = n mat%nterms = nterms mat%nnz = 0 mat%nlsym = .FALSE. mat%nlherm = .FALSE. mat%nlpos = .TRUE. mat%nrhs = 1 mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val IF(PRESENT(kmat)) mat%kmat = kmat IF(PRESENT(nlsym)) mat%nlsym = nlsym IF(PRESENT(nlherm)) mat%nlherm = nlherm IF(PRESENT(nlpos)) mat%nlpos = nlpos IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) ALLOCATE(mat%mat) CALL init(n, mat%mat) ! ! Fill 'iparm' and 'dparm' with default values ! mat%p%iparm(1:3) = 0 IF(mat%nlherm .OR. mat%nlsym) THEN CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) IF(mat%nlherm) THEN IF(mat%nlpos) THEN mat%p%iparm(31) = 0 ! hermitian, positive definite ELSE mat%p%iparm(31) = 2 ! hermitian, no-definite, LDL^T with pivoting END IF ELSE mat%p%iparm(31) = 3 ! non-hermitian, symmetric END IF ELSE CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i4)') 'INIT: Initialization failed with error', & & mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF !!$ WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm !!$ WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm ! CALL setup_wsmp(mat%p%iparm, mat%p%dparm) ! CONTAINS SUBROUTINE setup_wsmp(iparm, dparm) INTEGER :: iparm(:) DOUBLE PRECISION :: dparm(:) END SUBROUTINE setup_wsmp END SUBROUTINE init_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE check_wsmp_mat(mat) ! ! Check matrice id and recall the matrice if not current ! TYPE(wsmp_mat) :: mat INTEGER :: info ! IF(.NOT.mat%nlsym) THEN IF( mat%matid.NE.current_matid ) THEN WRITE(*,'(a)') "Processing multi matrices is not possible "// & & "for non-symetric matrices." STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' ELSE RETURN END IF END IF ! IF( mat%matid.NE.current_matid ) THEN IF(current_matid .GE. 0) THEN CALL wstoremat(current_matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i3,a,i4)') 'Store matrix', current_matid, & & ' failed with error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF CALL wrecallmat(mat%matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i3,a,i4)') 'Recall matrix', mat%matid, & & ' failed with error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF current_matid = mat%matid END IF END SUBROUTINE check_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE check_zwsmp_mat(mat) ! ! Check matrice id and recall the matrice if not current ! TYPE(zwsmp_mat) :: mat INTEGER :: info ! IF(.NOT.mat%nlsym .AND. .NOT.mat%nlherm ) THEN IF( mat%matid.NE.current_matid ) THEN WRITE(*,'(a)') "Processing multi matrices is not possible "// & & "for non-symetric/non-hermitian matrices." STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' ELSE RETURN END IF END IF ! IF( mat%matid.NE.current_matid ) THEN IF(current_matid .GE. 0) THEN CALL wstoremat(current_matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i3,a,i4)') 'Store matrix', current_matid, & & ' failed with error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF CALL wrecallmat(mat%matid, info) IF(info.NE.0) THEN WRITE(*,'(a,i3,a,i4)') 'Recall matrix', mat%matid, & & ' failed with error', info STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF current_matid = mat%matid END IF END SUBROUTINE check_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE clear_wsmp_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(wsmp_mat) :: mat ! mat%val = 0.0d0 END SUBROUTINE clear_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE clear_zwsmp_mat(mat) ! ! Clear matrix, keeping its sparse structure unchanged ! TYPE(zwsmp_mat) :: mat ! mat%val = (0.0d0, 0.0d0) END SUBROUTINE clear_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_wsmp_mat(mat, i, j, val) ! ! Update element Aij of wsmp matrix ! TYPE(wsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: k, s, e ! IF(mat%nlsym) THEN ! Store only upper part for symmetric matrices IF(i.GT.j) RETURN END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL updtmat(mat%mat, i, j, val) ELSE s = mat%irow(i) e = mat%irow(i+1)-1 k = isearch(mat%cols(s:e), j) IF( k.GE.0 ) THEN mat%val(s+k) = mat%val(s+k)+val ELSE WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF END SUBROUTINE updt_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE updt_zwsmp_mat(mat, i, j, val) ! ! Update element Aij of wsmp matrix ! TYPE(zwsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val INTEGER :: k, s, e ! IF(mat%nlherm .OR. mat%nlsym) THEN ! Store only upper part IF(i.GT.j) RETURN END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL updtmat(mat%mat, i, j, val) ELSE s = mat%irow(i) e = mat%irow(i+1)-1 k = isearch(mat%cols(s:e), j) IF( k.GE.0 ) THEN IF(mat%nlherm) THEN mat%val(s+k) = mat%val(s+k)+CONJG(val) ! CSR-UT* = CSC-LT ELSE mat%val(s+k) = mat%val(s+k)+val END IF ELSE WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF END SUBROUTINE updt_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_wsmp_mat(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(wsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(in) :: val INTEGER :: iput, jput INTEGER :: k, s, e ! iput = i jput = j IF(mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iput = j jput = i END IF END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL putele(mat%mat, iput, jput, val, & & nlforce_zero=mat%nlforce_zero) ELSE s = mat%irow(iput) e = mat%irow(iput+1)-1 k = isearch(mat%cols(s:e), jput) IF( k.GE.0 ) THEN mat%val(s+k) = val ELSE IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF END IF END SUBROUTINE putele_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putele_zwsmp_mat(mat, i, j, val) ! ! Put element (i,j) of matrix ! TYPE(zwsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(in) :: val DOUBLE COMPLEX :: valput INTEGER :: iput, jput INTEGER :: k, s, e ! iput = i jput = j valput = val IF(mat%nlsym .OR. mat%nlherm) THEN IF( i.GT.j ) THEN ! Lower triangular part iput = j jput = i IF(mat%nlherm) THEN valput = CONJG(val) ELSE valput = val END IF END IF END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL putele(mat%mat, iput, jput, valput, & & nlforce_zero=mat%nlforce_zero) ELSE s = mat%irow(iput) e = mat%irow(iput+1)-1 k = isearch(mat%cols(s:e), jput) IF( k.GE.0 ) THEN IF(mat%nlherm) THEN mat%val(s+k) = CONJG(valput) ! CSR-UT* = CSC-LT ELSE mat%val(s+k) = valput END IF ELSE IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END IF END IF END SUBROUTINE putele_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_wsmp_mat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(wsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE PRECISION, INTENT(out) :: val INTEGER :: iget, jget INTEGER :: k, s, e ! iget = i jget = j IF(mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iget = j jget = i END IF END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL getele(mat%mat, iget, jget, val) ELSE s = mat%irow(iget) e = mat%irow(iget+1)-1 k = isearch(mat%cols(s:e), jget) IF( k.GE.0 ) THEN val =mat%val(s+k) ELSE val = 0.0d0 ! Assume zero val if not found END IF END IF END SUBROUTINE getele_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getele_zwsmp_mat(mat, i, j, val) ! ! Get element (i,j) of sparse matrix ! TYPE(zwsmp_mat) :: mat INTEGER, INTENT(in) :: i, j DOUBLE COMPLEX, INTENT(out) :: val DOUBLE COMPLEX :: valget INTEGER :: iget, jget INTEGER :: k, s, e ! iget = i jget = j IF(mat%nlherm .OR. mat%nlsym) THEN IF( i.GT.j ) THEN ! Lower triangular part iget = j jget = i END IF END IF ! IF(ASSOCIATED(mat%mat)) THEN CALL getele(mat%mat, iget, jget, valget) ELSE s = mat%irow(iget) e = mat%irow(iget+1)-1 k = isearch(mat%cols(s:e), jget) IF( k.GE.0 ) THEN IF(mat%nlherm) THEN valget = CONJG(mat%val(s+k)) ! CSR-UT* = CSC-LT ELSE valget = mat%val(s+k) END IF ELSE valget = (0.0d0,0.0d0) ! Assume zero val if not found END IF END IF val = valget IF( i.GT.j ) THEN IF(mat%nlherm) THEN val = CONJG(valget) END IF END IF END SUBROUTINE getele_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_wsmp_mat(amat, i, arr) ! ! Put a row into sparse matrix ! TYPE(wsmp_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL putele(amat, i, j, arr(j)) END DO END SUBROUTINE putrow_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putrow_zwsmp_mat(amat, i, arr) ! ! Put a row into sparse matrix ! TYPE(zwsmp_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL putele(amat, i, j, arr(j)) END DO END SUBROUTINE putrow_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_wsmp_mat(amat, i, arr) ! ! Get a row from sparse matrix ! TYPE(wsmp_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: i DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL getele(amat, i, j, arr(j)) END DO END SUBROUTINE getrow_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getrow_zwsmp_mat(amat, i, arr) ! ! Get a row from sparse matrix ! TYPE(zwsmp_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: i DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER :: j ! DO j=1,amat%rank CALL getele(amat, i, j, arr(j)) END DO END SUBROUTINE getrow_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_wsmp_mat(amat, j, arr) ! ! Put a column into sparse matrix ! TYPE(wsmp_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(in) :: arr(:) INTEGER :: i ! DO i=1,amat%rank CALL putele(amat, i, j, arr(i)) END DO END SUBROUTINE putcol_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE putcol_zwsmp_mat(amat, j, arr) ! ! Put a column into sparse matrix ! TYPE(zwsmp_mat), INTENT(inout) :: amat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(in) :: arr(:) INTEGER :: i ! DO i=1,amat%rank CALL putele(amat, i, j, arr(i)) END DO END SUBROUTINE putcol_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_wsmp_mat(amat, j, arr) ! ! Get a column from sparse matrix ! TYPE(wsmp_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: j DOUBLE PRECISION, INTENT(out) :: arr(:) INTEGER :: i ! DO i=1,amat%rank CALL getele(amat, i, j, arr(i)) END DO END SUBROUTINE getcol_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE getcol_zwsmp_mat(amat, j, arr) ! ! Get a column from sparse matrix ! TYPE(zwsmp_mat), INTENT(in) :: amat INTEGER, INTENT(in) :: j DOUBLE COMPLEX, INTENT(out) :: arr(:) INTEGER :: i ! DO i=1,amat%rank CALL getele(amat, i, j, arr(i)) END DO END SUBROUTINE getcol_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_wsmp_mat(mat, nnz) ! ! Number of non-zeros in sparse matrix ! TYPE(wsmp_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) INTEGER :: i ! IF(ASSOCIATED(mat%mat)) THEN get_count_wsmp_mat = get_count(mat%mat, nnz) ELSE get_count_wsmp_mat = mat%nnz IF(PRESENT(nnz)) THEN DO i=1,mat%rank nnz(i) = mat%irow(i+1)-mat%irow(i) END DO END IF END IF END FUNCTION get_count_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ INTEGER FUNCTION get_count_zwsmp_mat(mat, nnz) ! ! Number of non-zeros in sparse matrix ! TYPE(zwsmp_mat) :: mat INTEGER, INTENT(out), OPTIONAL :: nnz(:) INTEGER :: i ! IF(ASSOCIATED(mat%mat)) THEN get_count_zwsmp_mat = get_count(mat%mat, nnz) ELSE get_count_zwsmp_mat = mat%nnz IF(PRESENT(nnz)) THEN DO i=1,mat%rank nnz(i) = mat%irow(i+1)-mat%irow(i) END DO END IF END IF END FUNCTION get_count_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE to_wsmp_mat(mat, nlkeep) ! ! Convert linked list spmat to wsmp matrice structure ! TYPE(wsmp_mat) :: mat LOGICAL, INTENT(in), OPTIONAL :: nlkeep INTEGER :: i, nnz, rank, s, e LOGICAL :: nlclean ! nlclean = .TRUE. IF(PRESENT(nlkeep)) THEN nlclean = .NOT. nlkeep END IF ! ! Allocate the WSMP matrix structure ! nnz = get_count(mat) rank = mat%rank mat%nnz = nnz IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%val(nnz)) IF(mat%nlsym) THEN ALLOCATE(mat%perm(rank)) ALLOCATE(mat%invp(rank)) END IF ALLOCATE(mat%irow(rank+1)) ALLOCATE(mat%cols(nnz)) ! ! Fill WSMP structure and deallocate the sparse rows ! mat%irow = 1 DO i=1,rank mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz s = mat%irow(i) e = mat%irow(i+1)-1 CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) IF(nlclean) CALL destroy(mat%mat%row(i)) END DO IF(nlclean) DEALLOCATE(mat%mat) END SUBROUTINE to_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE to_zwsmp_mat(mat, nlkeep) ! ! Convert linked list spmat to wsmp matrice structure ! TYPE(zwsmp_mat) :: mat LOGICAL, INTENT(in), OPTIONAL :: nlkeep INTEGER :: i, nnz, rank, s, e LOGICAL :: nlclean ! nlclean = .TRUE. IF(PRESENT(nlkeep)) THEN nlclean = .NOT. nlkeep END IF ! ! Allocate the WSMP matrix structure ! nnz = get_count(mat) rank = mat%rank mat%nnz = nnz IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) ALLOCATE(mat%val(nnz)) IF(mat%nlsym) THEN ALLOCATE(mat%perm(rank)) ALLOCATE(mat%invp(rank)) END IF ALLOCATE(mat%irow(rank+1)) ALLOCATE(mat%cols(nnz)) ! ! Fill WSMP structure and deallocate the sparse rows ! mat%irow = 1 DO i=1,rank mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz s = mat%irow(i) e = mat%irow(i+1)-1 CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) IF(nlclean) CALL destroy(mat%mat%row(i)) END DO IF(mat%nlherm) THEN mat%val(:) = CONJG(mat%val(:)) ! CSR-UT* = CSC-LT END IF IF(nlclean) DEALLOCATE(mat%mat) END SUBROUTINE to_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE reord_wsmp_mat(mat) ! ! Reordering and symbolic factorization ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym) THEN mat%p%iparm(2) = 1 ! Ordering mat%p%iparm(3) = 2 ! Symbolic factorization CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE mat%p%iparm(2) = 1 ! Analysis and reordering mat%p%iparm(3) = 1 CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END SUBROUTINE reord_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE reord_zwsmp_mat(mat) ! ! Reordering and symbolic factorization ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym .OR. mat%nlherm) THEN mat%p%iparm(2) = 1 ! Ordering mat%p%iparm(3) = 2 ! Symbolic factorization CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) !!$ WRITE(*,'(a,i3/(10i8))') 'REORD: matrice', mat%matid, mat%perm ELSE mat%p%iparm(2) = 1 ! Analysis and reordering mat%p%iparm(3) = 1 CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END SUBROUTINE reord_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE numfact_wsmp_mat(mat) ! ! Numerical factorization ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym) THEN mat%p%iparm(2) = 3 ! Numerical factorization mat%p%iparm(3) = 3 CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE mat%p%iparm(2) = 2 ! Factorization mat%p%iparm(3) = 2 CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i4)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END SUBROUTINE numfact_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE numfact_zwsmp_mat(mat) ! ! Numerical factorization ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym .OR. mat%nlherm) THEN mat%p%iparm(2) = 3 ! Numerical factorization mat%p%iparm(3) = 3 CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE mat%p%iparm(2) = 2 ! Factorization mat%p%iparm(3) = 2 CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i4)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF END SUBROUTINE numfact_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE factor_wsmp_mat(mat, nlreord) ! ! Factor (create +reorder + factor) a wsmp_mat matrix ! TYPE(wsmp_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlreord LOGICAL :: mlreord !---------------------------------------------------------------------- ! 1.0 Creation from the sparse rows ! IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF !---------------------------------------------------------------------- ! 2.0 Reordering and symbolic factorization phase ! mlreord = .TRUE. IF(PRESENT(nlreord)) mlreord = nlreord IF(mlreord) THEN CALL reord_mat(mat) END IF !---------------------------------------------------------------------- ! 3.0 Numerical factorization ! CALL numfact(mat) END SUBROUTINE factor_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE factor_zwsmp_mat(mat, nlreord) ! ! Factor (create +reorder + factor) a wsmp_mat matrix ! TYPE(zwsmp_mat) :: mat LOGICAL, OPTIONAL, INTENT(in) :: nlreord LOGICAL :: mlreord !---------------------------------------------------------------------- ! 1.0 Creation from the sparse rows ! IF(ASSOCIATED(mat%mat)) THEN CALL to_mat(mat) END IF !---------------------------------------------------------------------- ! 2.0 Reordering and symbolic factorization phase ! mlreord = .TRUE. IF(PRESENT(nlreord)) mlreord = nlreord IF(mlreord) THEN CALL reord_mat(mat) END IF !---------------------------------------------------------------------- ! 3.0 Numerical factorization ! CALL numfact(mat) END SUBROUTINE factor_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref) ! ! Backsolve, using Wsmp ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION :: rhs(:) DOUBLE PRECISION, OPTIONAL :: sol(:) INTEGER, OPTIONAL :: nref DOUBLE PRECISION :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym) THEN mat%p%iparm(2) = 4 ! Back substitution mat%p%iparm(3) = 4 ELSE mat%p%iparm(2) = 3 ! Back substitution mat%p%iparm(3) = 3 END IF mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) THEN IF(mat%nlsym) THEN mat%p%iparm(3) = 5 ELSE mat%p%iparm(3) = 4 END IF mat%p%iparm(6) = nref END IF mat%nrhs = 1 IF(PRESENT(sol)) THEN sol = rhs IF(mat%nlsym) THEN CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, sol, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF ELSE IF(mat%nlsym) THEN CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! END SUBROUTINE bsolve_wsmp_mat1 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_zwsmp_mat1(mat, rhs, sol, nref) ! ! Backsolve, using Wsmp ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX :: rhs(:) DOUBLE COMPLEX, OPTIONAL :: sol(:) INTEGER, OPTIONAL :: nref DOUBLE COMPLEX :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym .OR. mat%nlherm) THEN mat%p%iparm(2) = 4 ! Back substitution mat%p%iparm(3) = 4 ELSE mat%p%iparm(2) = 3 ! Back substitution mat%p%iparm(3) = 3 END IF mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) THEN IF(mat%nlsym .OR. mat%nlherm) THEN mat%p%iparm(3) = 5 ELSE mat%p%iparm(3) = 4 END IF mat%p%iparm(6) = nref END IF mat%nrhs = 1 IF(PRESENT(sol)) THEN sol = rhs IF(mat%nlsym .OR. mat%nlherm) THEN CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, sol, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF ELSE IF(mat%nlsym .OR. mat%nlherm) THEN CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! END SUBROUTINE bsolve_zwsmp_mat1 !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_wsmp_matn(mat, rhs, sol, nref) ! ! Backsolve, using Wsmp, multiple RHS ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION :: rhs(:,:) DOUBLE PRECISION, OPTIONAL :: sol(:,:) INTEGER, OPTIONAL :: nref DOUBLE PRECISION :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym) THEN mat%p%iparm(2) = 4 ! Back substitution mat%p%iparm(3) = 4 ELSE mat%p%iparm(2) = 3 ! Back substitution mat%p%iparm(3) = 3 END IF mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) THEN IF(mat%nlsym) THEN mat%p%iparm(3) = 5 ELSE mat%p%iparm(3) = 4 END IF mat%p%iparm(6) = nref END IF mat%nrhs = SIZE(rhs,2) IF(PRESENT(sol)) THEN sol = rhs IF(mat%nlsym) THEN CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, sol, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF ELSE IF(mat%nlsym) THEN CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! END SUBROUTINE bsolve_wsmp_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE bsolve_zwsmp_matn(mat, rhs, sol, nref) ! ! Backsolve, using Wsmp, multiple RHS ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX :: rhs(:,:) DOUBLE COMPLEX, OPTIONAL :: sol(:,:) INTEGER, OPTIONAL :: nref DOUBLE COMPLEX :: dummy ! ! Recall the matrice if not current ! CALL check_mat(mat) ! IF(mat%nlsym .or. mat%nlherm) THEN mat%p%iparm(2) = 4 ! Back substitution mat%p%iparm(3) = 4 ELSE mat%p%iparm(2) = 3 ! Back substitution mat%p%iparm(3) = 3 END IF mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps IF(PRESENT(nref)) THEN IF(mat%nlsym .OR. mat%nlherm) THEN mat%p%iparm(3) = 5 ELSE mat%p%iparm(3) = 4 END IF mat%p%iparm(6) = nref END IF mat%nrhs = SIZE(rhs,2) IF(PRESENT(sol)) THEN sol = rhs IF(mat%nlsym .OR. mat%nlherm) THEN CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, sol, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF ELSE IF(mat%nlsym .OR. mat%nlherm) THEN CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & & mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, & & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & & mat%p%dparm) ELSE CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, & & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) END IF END IF IF(mat%p%iparm(64).NE.0) THEN WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64) STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! END SUBROUTINE bsolve_zwsmp_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_wsmp_mat(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:) DOUBLE PRECISION :: yarr(SIZE(xarr)) DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 CHARACTER(len=6) :: matdescra INTEGER :: n, i, j ! n = mat%rank ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE matdescra = 'g' END IF ! CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, & & beta, yarr) #else yarr = 0.0d0 DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + mat%val(j)*xarr(i) END DO END IF END DO #endif ! END FUNCTION vmx_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_zwsmp_mat(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: xarr(:) DOUBLE COMPLEX :: yarr(SIZE(xarr)) DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) INTEGER :: n, i, j CHARACTER(len=6) :: matdescra CHARACTER(len=1) :: transa ! n = mat%rank ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE IF(mat%nlherm) THEN matdescra = 'hun' ELSE matdescra = 'g' END IF transa='N' IF(mat%nlherm) THEN transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT END IF CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, & & beta, yarr) #else yarr = (0.0d0,0.0d0) DO i=1,n IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i) = yarr(i) + CONJG(mat%val(j))*xarr(mat%cols(j)) END DO ELSE DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) END DO END IF IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + mat%val(j)*xarr(i) END DO ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j)) = yarr(mat%cols(j)) & & + mat%val(j)*xarr(i) END DO END IF END DO #endif ! END FUNCTION vmx_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_wsmp_matn(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION, INTENT(in) :: xarr(:,:) DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 INTEGER :: n, nrhs, i, j CHARACTER(len=6) :: matdescra ! n = mat%rank nrhs = SIZE(xarr,2) ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE matdescra = 'g' END IF ! CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,& & mat%cols, mat%irow(1), mat%irow(2), xarr, & & n, beta, yarr, n) #else yarr = 0.0d0 DO i=1,n DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) END DO IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + mat%val(j)*xarr(i,:) END DO END IF END DO #endif ! END FUNCTION vmx_wsmp_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ FUNCTION vmx_zwsmp_matn(mat, xarr) RESULT(yarr) ! ! Return product mat*x ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX, INTENT(in) :: xarr(:,:) DOUBLE COMPLEX :: yarr(SIZE(xarr,1),SIZE(xarr,2)) ! DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) INTEGER :: n, nrhs, i, j CHARACTER(len=6) :: matdescra CHARACTER(len=1) :: transa ! n = mat%rank nrhs = SIZE(xarr,2) ! #ifdef MKL IF(mat%nlsym) THEN matdescra = 'sun' ELSE IF(mat%nlherm) THEN matdescra = 'hun' ELSE matdescra = 'g' END IF transa='N' IF(mat%nlherm) THEN transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT END IF ! CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val, & & mat%cols, mat%irow(1), mat%irow(2), xarr, n, & & beta, yarr, n) #else yarr = (0.0d0,0.0d0) DO i=1,n IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i,:) = yarr(i,:) + CONJG(mat%val(j))*xarr(mat%cols(j),:) END DO ELSE DO j=mat%irow(i), mat%irow(i+1)-1 yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) END DO END IF IF(mat%nlsym) THEN DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + mat%val(j)*xarr(i,:) END DO ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* DO j=mat%irow(i)+1, mat%irow(i+1)-1 yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & & + mat%val(j)*xarr(i,:) END DO END IF END DO #endif ! END FUNCTION vmx_zwsmp_matn !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_wsmp_mat(mat) ! ! Deallocate the sparse matrix mat ! TYPE(wsmp_mat) :: mat ! IF(ASSOCIATED(mat%mat)) THEN CALL destroy(mat%mat) DEALLOCATE(mat%mat) END IF ! ! Release memory for factors for symmetric matrix IF(mat%nlsym) THEN CALL check_mat(mat) CALL wsffree END IF ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp) IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux) END SUBROUTINE destroy_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE destroy_zwsmp_mat(mat) ! ! Deallocate the sparse matrix mat ! TYPE(zwsmp_mat) :: mat ! IF(ASSOCIATED(mat%mat)) THEN CALL destroy(mat%mat) DEALLOCATE(mat%mat) END IF ! ! Release memory for factors for symmetric/hermitian matrix IF(mat%nlsym .OR. mat%nlherm) THEN CALL check_mat(mat) CALL wsffree END IF ! IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp) IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag) IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux) END SUBROUTINE destroy_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE put_wsmp_mat(fid, label, mat, str) ! ! Write matrix to hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(wsmp_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'NNZ', mat%nnz) CALL attach(fid, label, 'NLSYM', mat%nlsym) CALL attach(fid, label, 'NLPOS', mat%nlpos) CALL putarr(fid, TRIM(label)//'/irow', mat%irow) CALL putarr(fid, TRIM(label)//'/cols', mat%cols) CALL putarr(fid, TRIM(label)//'/val', mat%val) ! CALL creatg(fid, TRIM(label)//'/p') CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) END SUBROUTINE put_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE put_zwsmp_mat(fid, label, mat, str) ! ! Write matrix to hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(zwsmp_mat) :: mat CHARACTER(len=*), OPTIONAL, INTENT(in) :: str ! IF(PRESENT(str)) THEN CALL creatg(fid, label, str) ELSE CALL creatg(fid, label) END IF CALL attach(fid, label, 'RANK', mat%rank) CALL attach(fid, label, 'NNZ', mat%nnz) CALL attach(fid, label, 'NLSYM', mat%nlsym) CALL attach(fid, label, 'NLPOS', mat%nlpos) CALL attach(fid, label, 'NLHERM', mat%nlherm) CALL putarr(fid, TRIM(label)//'/irow', mat%irow) CALL putarr(fid, TRIM(label)//'/cols', mat%cols) CALL putarr(fid, TRIM(label)//'/val', mat%val) ! CALL creatg(fid, TRIM(label)//'/p') CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) END SUBROUTINE put_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE get_wsmp_mat(fid, label, mat) ! ! Read matrix from hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(wsmp_mat) :: mat ! CALL getatt(fid, label, 'RANK', mat%rank) CALL getatt(fid, label, 'NNZ', mat%nnz) CALL getatt(fid, label, 'NLSYM', mat%nlsym) CALL getatt(fid, label, 'NLPOS', mat%nlpos) CALL getarr(fid, TRIM(label)//'/irow', mat%irow) CALL getarr(fid, TRIM(label)//'/cols', mat%cols) IF(mat%nlsym) THEN CALL getarr(fid, TRIM(label)//'/perm', mat%perm) CALL getarr(fid, TRIM(label)//'/invp', mat%invp) END IF CALL getarr(fid, TRIM(label)//'/val', mat%val) ! CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) END SUBROUTINE get_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE get_zwsmp_mat(fid, label, mat) ! ! Read matrix from hdf5 file ! USE futils ! INTEGER, INTENT(in) :: fid CHARACTER(len=*), INTENT(in) :: label TYPE(zwsmp_mat) :: mat ! CALL getatt(fid, label, 'RANK', mat%rank) CALL getatt(fid, label, 'NNZ', mat%nnz) CALL getatt(fid, label, 'NLSYM', mat%nlsym) CALL getatt(fid, label, 'NLPOS', mat%nlpos) CALL getatt(fid, label, 'NLHERM', mat%nlherm) CALL getarr(fid, TRIM(label)//'/irow', mat%irow) CALL getarr(fid, TRIM(label)//'/cols', mat%cols) IF(mat%nlsym) THEN CALL getarr(fid, TRIM(label)//'/perm', mat%perm) CALL getarr(fid, TRIM(label)//'/invp', mat%invp) END IF CALL getarr(fid, TRIM(label)//'/val', mat%val) ! CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) END SUBROUTINE get_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE mcopy_wsmp_mat(mata, matb) ! ! Matrix copy: B = A ! TYPE(wsmp_mat) :: mata, matb INTEGER :: n, nnz ! ! Assume that matb was already initialized by init_wsmp_mat. IF(matb%rank.LE.0) THEN WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! IF(ASSOCIATED(matb%mat)) THEN CALL destroy(matb%mat) DEALLOCATE(matb%mat) END IF ! n = mata%rank nnz = mata%nnz matb%rank = n matb%nnz = nnz matb%nlsym = mata%nlsym matb%nlpos = mata%nlpos matb%nlforce_zero = mata%nlforce_zero ! IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp) ALLOCATE(matb%val(nnz)); matb%val = mata%val ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow ALLOCATE(matb%perm(n)) IF(matb%nlsym) THEN ALLOCATE(matb%perm(n)) ALLOCATE(matb%invp(n)) END IF END SUBROUTINE mcopy_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE mcopy_zwsmp_mat(mata, matb) ! ! Matrix copy: B = A ! TYPE(zwsmp_mat) :: mata, matb INTEGER :: n, nnz ! ! Assume that matb was already initialized by init_wsmp_mat. IF(matb%rank.LE.0) THEN WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' END IF ! IF(ASSOCIATED(matb%mat)) THEN CALL destroy(matb%mat) DEALLOCATE(matb%mat) END IF ! n = mata%rank nnz = mata%nnz matb%rank = n matb%nnz = nnz matb%nlsym = mata%nlsym matb%nlherm = mata%nlherm matb%nlpos = mata%nlpos matb%nlforce_zero = mata%nlforce_zero ! IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp) ALLOCATE(matb%val(nnz)); matb%val = mata%val ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow ALLOCATE(matb%perm(n)) IF(matb%nlsym) THEN ALLOCATE(matb%perm(n)) ALLOCATE(matb%invp(n)) END IF END SUBROUTINE mcopy_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE maddto_wsmp_mat(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(wsmp_mat) :: mata, matb DOUBLE PRECISION :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE maddto_zwsmp_mat(mata, alpha, matb) ! ! A <- A + alpha*B ! TYPE(zwsmp_mat) :: mata, matb DOUBLE COMPLEX :: alpha ! mata%val = mata%val + alpha*matb%val END SUBROUTINE maddto_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE psum_wsmp_mat(mat, comm) ! ! Parallel sum of sparse matrices ! INCLUDE "mpif.h" ! TYPE(wsmp_mat) :: mat INCLUDE 'psum_mat.tpl' END SUBROUTINE psum_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE psum_zwsmp_mat(mat, comm) ! ! Parallel sum of sparse matrices ! INCLUDE "mpif.h" ! TYPE(zwsmp_mat) :: mat INCLUDE 'psum_mat.tpl' END SUBROUTINE psum_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE p2p_wsmp_mat(mat, dest, extyp, op, comm) ! ! Point-to-point combine sparse matrix between 2 processes ! INCLUDE "mpif.h" ! TYPE(wsmp_mat) :: mat DOUBLE PRECISION, ALLOCATABLE :: val(:) INTEGER :: mpi_type=MPI_DOUBLE_PRECISION ! INCLUDE "p2p_mat.tpl" END SUBROUTINE p2p_wsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ SUBROUTINE p2p_zwsmp_mat(mat, dest, extyp, op, comm) ! ! Point-to-point combine sparse matrix between 2 processes ! INCLUDE "mpif.h" ! TYPE(zwsmp_mat) :: mat DOUBLE COMPLEX, ALLOCATABLE :: val(:) INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX ! INCLUDE "p2p_mat.tpl" END SUBROUTINE p2p_zwsmp_mat !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ END MODULE wsmp_bsplines diff --git a/src/zconmat.tpl b/src/zconmat.tpl index 2d40218..8662a8f 100644 --- a/src/zconmat.tpl +++ b/src/zconmat.tpl @@ -1,214 +1,214 @@ !> !> @file zconmat.tpl !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! In this version s[lines are precalculted ! (on all n1/n2 intervals ! INTERFACE SUBROUTINE coefeq(x, y, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x, y INTEGER, INTENT(out) :: idt(:,:), idw(:,:) DOUBLE COMPLEX, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE INTEGER, OPTIONAL :: maxder(2) ! maximum oder of derivatives LOGICAL, OPTIONAL :: nat_order ! Natural ordering for 2d-1d mapping ! INTEGER :: n1, nidbas1, ndim1, n1e INTEGER :: n2, nidbas2, ndim2, n2e INTEGER :: ng1, ng2 INTEGER :: i1, i2, ig1, ig2 INTEGER :: igt1, igt2, igw1, igw2, irow, jcol INTEGER, ALLOCATABLE :: left1(:), left2(:) ! LOGICAL :: nlper1, nlper2, nlnat ! INTEGER :: kterms ! Number of terms in weak form INTEGER :: k, kmaxder, it1, iw1, it2, iw2 INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:) ! Derivative order DOUBLE COMPLEX :: one=(1.0d0,0.0d0), zero=(0.0d0,0.0d0) DOUBLE COMPLEX, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form ! DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:) DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:) DOUBLE COMPLEX, ALLOCATABLE :: mata(:,:,:,:), matc(:,:) DOUBLE COMPLEX, ALLOCATABLE :: matg(:,:,:), matf(:,:,:), matcg(:,:,:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl%sp1, ndim1, n1, nidbas1) CALL get_dim(spl%sp2, ndim2, n2, nidbas2) nlper1 = spl%sp1%period nlper2 = spl%sp2%period ! n1e = n1+nidbas1 ! Number of elements in 1st coordinate n2e = n2+nidbas2 ! Number of elements in 2nd coordinate IF(nlper2) n2e = n2 ! ! Gauss points and weights on all intervals ! xg1 => spl%sp1%gausx ! xg1(ng1,n1) wg1 => spl%sp1%gausw ! wg1(ng1,n1) ng1 = SIZE(xg1,1) xg2 => spl%sp2%gausx wg2 => spl%sp2%gausw ng2 = SIZE(xg2,1) ! ! Splines on all intervals ! kmaxder = 1 IF(PRESENT(maxder)) kmaxder = maxder(1) ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1)) ALLOCATE(left1(ng1)) DO i1=1,n1 left1 = i1 CALL basfun(xg1(:,i1), spl%sp1, fun1(:,:,:,i1), left1) END DO DEALLOCATE(left1) ! kmaxder = 1 IF(PRESENT(maxder)) kmaxder = maxder(2) ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2,n2)) ALLOCATE(left2(ng2)) DO i2=1,n2 left2 = i2 CALL basfun(xg2(:,i2), spl%sp2, fun2(:,:,:,i2), left2) END DO DEALLOCATE(left2) ! ! Ordering in local to global matrix mapping ! nlnat = .FALSE. IF(PRESENT(nat_order)) nlnat = nat_order !=========================================================================== ! 2.0 Assembly loop ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms,2)) ALLOCATE(iderw(kterms,2)) ALLOCATE(coefs(kterms,ng1,ng2)) ! ! Allocate local matrices ! ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2)) ALLOCATE(matc(ng1,ng2)) ALLOCATE(matg(0:nidbas2,0:nidbas2,ng2)) ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1)) ALLOCATE(matcg(ng1,0:nidbas2,0:nidbas2)) ! DO i1=1,n1 DO i2=1,n2 ! ! Coefficients of the weak form ! DO ig1=1,ng1 DO ig2=1,ng2 CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), & & idert, iderw, coefs(:,ig1,ig2)) END DO END DO ! ! Compute local matrix: A <- E*(C*D^T) + A ! mata = 0.0d0 DO k=1,kterms ! matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2) ! DO it1=0,nidbas1 DO iw1=0,nidbas1 DO ig1=1,ng1 matf(it1,iw1,ig1) = wg1(ig1,i1) * & & fun1(it1,idert(k,1),ig1,i1) * & & fun1(iw1,iderw(k,1),ig1,i1) END DO END DO END DO ! DO it2=0,nidbas2 DO iw2=0,nidbas2 DO ig2=1,ng2 matg(it2,iw2,ig2) = wg2(ig2,i2) * & & fun2(it2,idert(k,2),ig2,i2) * & & fun2(iw2,iderw(k,2),ig2,i2) END DO END DO END DO ! CALL zgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, one, & & matc, ng1, matg, (nidbas2+1)*(nidbas2+1), zero, & & matcg, ng1) CALL zgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), & & ng1, one, matf, (nidbas1+1)*(nidbas1+1), matcg, ng1, one, & & mata, (nidbas1+1)*(nidbas1+1)) ! END DO ! ! Map local matrix A to global matrix ! DO it1=0,nidbas1 igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1 DO it2=0,nidbas2 igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1 irow = glmap(igt1, igt2, n1e, n2e) DO iw1=0,nidbas1 igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1 DO iw2=0,nidbas2 igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1 jcol = glmap(igw1, igw2, n1e, n2e) CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2)) END DO END DO END DO END DO ! END DO END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun1) DEALLOCATE(fun2) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(mata) DEALLOCATE(matc) DEALLOCATE(matg) DEALLOCATE(matcg) DEALLOCATE(matf) ! CONTAINS INTEGER FUNCTION glmap(i,j,n1,n2) INTEGER, INTENT(in) :: i,j,n1,n2 IF(nlnat) THEN glmap = (j-1)*n1 + i ELSE glmap = (i-1)*n2 + j END IF END FUNCTION glmap diff --git a/src/zconmat_1d.tpl b/src/zconmat_1d.tpl index d066822..2de4621 100644 --- a/src/zconmat_1d.tpl +++ b/src/zconmat_1d.tpl @@ -1,144 +1,144 @@ !> !> @file zconmat_1d.tpl !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> -!> spclibs is free software: you can redistribute it and/or modify it under +!> SPClibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> -!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> ! ! In this version s[lines are precalculted ! (on all n1/n2 intervals ! INTERFACE SUBROUTINE coefeq(x, idt, idw, c) DOUBLE PRECISION, INTENT(in) :: x INTEGER, INTENT(out) :: idt(:), idw(:) DOUBLE COMPLEX, INTENT(out) :: c(:) END SUBROUTINE coefeq END INTERFACE INTEGER, OPTIONAL :: maxder ! maximum oder of derivatives ! INTEGER :: n1, nidbas1, ndim1, ng1 INTEGER :: i1, ig1 INTEGER :: irow, jcol INTEGER, ALLOCATABLE :: left1(:) ! LOGICAL :: nlper1 ! INTEGER :: kterms ! Number of terms in weak form INTEGER :: k, kmaxder, it1, iw1 INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order DOUBLE COMPLEX :: one=(1.0d0,0.0d0), zero=(0.0d0,0.0d0) DOUBLE COMPLEX, ALLOCATABLE :: coefs(:,:) ! Terms in weak form ! DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:) DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:) DOUBLE COMPLEX, ALLOCATABLE :: mata(:,:), matc(:) DOUBLE COMPLEX, ALLOCATABLE :: matf(:,:,:) !=========================================================================== ! 1.0 Prologue ! ! Properties of spline space ! CALL get_dim(spl, ndim1, n1, nidbas1) nlper1 = spl%period ! ! Gauss points and weights on all intervals ! xg1 => spl%gausx ! xg1(ng1,n1) wg1 => spl%gausw ! wg1(ng1,n1) ng1 = SIZE(xg1,1) ! ! Splines on all intervals ! kmaxder = 1 IF(PRESENT(maxder)) kmaxder = maxder ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1)) ALLOCATE(left1(ng1)) DO i1=1,n1 left1 = i1 CALL basfun(xg1(:,i1), spl, fun1(:,:,:,i1), left1) END DO DEALLOCATE(left1) !=========================================================================== ! 2.0 Assembly loop ! ! Weak form ! kterms = mat%nterms ALLOCATE(idert(kterms)) ALLOCATE(iderw(kterms)) ALLOCATE(coefs(kterms,ng1)) ! ! Allocate local matrices ! ALLOCATE(mata(0:nidbas1,0:nidbas1)) ALLOCATE(matc(ng1)) ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1)) ! DO i1=1,n1 ! ! Coefficients of the weak form ! DO ig1=1,ng1 CALL coefeq(xg1(ig1,i1), idert, iderw, coefs(:,ig1)) END DO ! ! Compute local matrix: A <- F*c + A ! mata = 0.0d0 DO k=1,kterms ! matc(1:ng1) = coefs(k,1:ng1) ! DO it1=0,nidbas1 DO iw1=0,nidbas1 DO ig1=1,ng1 matf(it1,iw1,ig1) = wg1(ig1,i1) * & & fun1(it1,idert(k),ig1,i1) * & & fun1(iw1,iderw(k),ig1,i1) END DO END DO END DO ! CALL zgemv('N', (nidbas1+1)*(nidbas1+1), ng1, one, matf, & & (nidbas1+1)*(nidbas1+1), matc, 1, one, mata, 1) END DO ! ! Map local matrix A to global matrix ! DO it1=0,nidbas1 irow = i1+it1; IF(nlper1) irow = MODULO(irow-1,n1) + 1 DO iw1=0,nidbas1 jcol = i1+iw1; IF(nlper1) jcol = MODULO(jcol-1,n1) + 1 CALL updtmat(mat, irow, jcol, mata(it1,iw1)) END DO END DO ! END DO !=========================================================================== ! 9.0 Epilogue ! DEALLOCATE(fun1) DEALLOCATE(idert, iderw, coefs) DEALLOCATE(mata) DEALLOCATE(matc) DEALLOCATE(matf) diff --git a/wk/CMakeLists.txt b/wk/CMakeLists.txt index 64a12ba..546792b 100644 --- a/wk/CMakeLists.txt +++ b/wk/CMakeLists.txt @@ -1,78 +1,78 @@ # # @file CMakeLists.txt # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Nicolas Richart # @author Trach-Minh Tran # set(BS_TESTS driv1 driv2 driv3 driv4 pde1d pde1dp pde1dp_cmpl pde2d pde2d_pb pde1dp_cmpl_dft pde3d fit1d fit1dbc fit1dp fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y moments optim1 optim2 optim3 tcdsmat tmassmat tbasfun tsparse1 test_kron ) if(HAS_PARDISO) set(BS_TESTS ${BS_TESTS} pde1dp_cmpl_pardiso pde2d_pardiso pde2d_sym_pardiso pde2d_sym_pardiso_dft ) endif() if(HAS_MUMPS) set(BS_TESTS ${BS_TESTS} pde2d_mumps pde1dp_cmpl_mumps ) endif() set(RUNTESTS "${CMAKE_CURRENT_SOURCE_DIR}/runtest.sh") set(BIN_DIR "${bsplines_tests_BINARY_DIR}") set(INPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}") foreach(prog ${BS_TESTS}) add_test(${prog} ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ${RUNTESTS} ${BIN_DIR}/${prog} ${INPUT_DIR} ) endforeach() # Special cases! if(HAS_PARDISO) add_test(tsparse2 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ${BIN_DIR}/tsparse2 ) endif() add_test(ppde3d ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ${BIN_DIR}/ppde3d ${INPUT_DIR}/ppde3d.in ) add_test(ppde3d_pb ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ${BIN_DIR}/ppde3d ${INPUT_DIR}/ppde3d_pb.in ) diff --git a/wk/runtest.sh b/wk/runtest.sh index 810d2bb..d4cc61d 100644 --- a/wk/runtest.sh +++ b/wk/runtest.sh @@ -1,37 +1,37 @@ # # @file runtest.sh # # @brief # # @copyright # Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) # SPC (Swiss Plasma Center) # -# spclibs is free software: you can redistribute it and/or modify it under +# SPClibs is free software: you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # -# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . # # @authors # (in alphabetical order) # @author Trach-Minh Tran # #!/bin/sh progname=$1 input_dir=$2 prog=$(basename ${progname}) input_file=${input_dir}/${prog}.in ${progname} < $input_file exit $?