diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..13c63f7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.[oa] +*.mod +*~ +html +auto diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..03300f2 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,151 @@ +/** + * @file CMakeLists.txt + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + * @author Trach-Minh Tran + */ +cmake_minimum_required (VERSION 2.8.8) +#cmake_policy(SET CMP0053 NEW) + +project(bsplines_root Fortran C) + +#enable_language(Fortran) +enable_testing() + +option(BSPLINES_USE_MUMPS "Activate the mumps interface" ON) +if(NOT DEFINED BSPLINES_EXAMPLES) + option(BSPLINES_EXAMPLES "Compiles the examples" ON) +endif() + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake") + +# Assume we are on CRAY if ftn is the Fortran compiler +if (${CMAKE_Fortran_COMPILER} MATCHES "ftn$") + set(CRAY TRUE) + if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Cray") + set(cray_suffix cray) + elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "PGI") + set(cray_suffix pgi) + elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel") + set(cray_suffix intel) + elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU") + set(cray_suffix gnu) + endif() +else() + set(CRAY FALSE) +endif() + +if(POLICY CMP0074) + cmake_policy(SET CMP0074 NEW) +endif() + + +include(CMakeFlagsHandling) +# Compiler flags for debug/optimization +if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Cray") +elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel") + set(CMAKE_AR ${XIAR}) + add_flags(LANG Fortran TYPE DEBUG -traceback "-check bounds" "-warn unused") + add_flags(LANG Fortran TYPE RELEASE -xHost) +elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU") + add_flags(LANG Fortran TYPE DEBUG -fbounds-check -fbacktrace) +endif() + +if(NOT MUMPS) + set(MUMPS $ENV{MUMPS_ROOT}) +endif() + +# Installation root directory +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(PREFIX $ENV{PREFIX}) + if(PREFIX) + set(${CMAKE_INSTALL_PREFIX} ${PREFIX}) + else() + set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "..." FORCE) + endif() + message(STATUS "CMAKE_INSTALL_PREFIX is " ${CMAKE_INSTALL_PREFIX}) +endif() + +# Search and load the FUTILS configuration file +if(NOT TARGET futils) + find_package(futils PATHS ${FUTILS}/lib/cmake REQUIRED) +endif() + +if(BSPLINES_USE_MUMPS) + find_package(Mumps REQUIRED) + set(HAS_MUMPS ${MUMPS_FOUND}) +else() + set(HAS_MUMPS FALSE) +endif() + +# Find lapack/blas. Skip it if on CRAY! +if(CRAY) + set(BSPLINES_USE_PARDISO OFF) +endif() +include(blas) + +if(NOT BSPLINES_EXPORT_TARGETS) + set(BSPLINES_EXPORT_TARGETS bsplines-targets) +endif() + +find_package(MPI COMPONENTS Fortran REQUIRED) + +include(GNUInstallDirs) + +add_subdirectory(pppack) +add_subdirectory(pputils2) +add_subdirectory(fft) +add_subdirectory(src) + +if(HAS_MUMPS AND BSPLINES_EXAMPLES) + add_subdirectory(multigrid) +endif() + +if(BSPLINES_EXAMPLES) + add_subdirectory(examples) + add_subdirectory(wk) +endif() + +export(TARGETS pppack pputils2 bsplines fft + FILE "${CMAKE_BINARY_DIR}/bsplinesLibraryDepends.cmake") +export(PACKAGE bsplines) + +# install configuration files +if(BSPLINES_EXPORT_TARGETS MATCHES "bsplines-targets") + install(EXPORT bsplines-targets + DESTINATION lib/cmake + ) + + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/bsplines-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/cmake/bsplines-config.cmake @ONLY + ) + install(FILES + ${CMAKE_CURRENT_BINARY_DIR}/cmake/bsplines-config.cmake + DESTINATION lib/cmake + ) +endif() + +# enable packaging with CPack +include(CPack) + diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/COPYING @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/COPYING.lesser b/COPYING.lesser new file mode 100644 index 0000000..0a04128 --- /dev/null +++ b/COPYING.lesser @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/cmake/CMakeFlagsHandling.cmake b/cmake/CMakeFlagsHandling.cmake new file mode 100644 index 0000000..56b5fae --- /dev/null +++ b/cmake/CMakeFlagsHandling.cmake @@ -0,0 +1,100 @@ +/** + * @file CMakeFlagsHandling.cmake + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + */ +if(_CMAKE_FLAGS_HANDLING) + return() +endif() +set(_CMAKE_FLAGS_HANDLING TRUE) + +#=============================================================================== +# Compilation options handling +#=============================================================================== +macro(_get_flags_message lang type desc) + if(${lang} MATCHES "C.." OR ${lang} MATCHES "Fortran") + set(${desc} "Flags used by the compiler") + elseif(${lang} MATCHES ".*_LINKER") + set(${desc} "Flags used by the linker") + endif() + + if(${lang} MATCHES "SHARED_LINKER") + set(${desc} "${desc} during the creation of shared libraries") + elseif(${lang} MATCHES "MODULE_LINKER") + set(${desc} "${desc} during the creation of modules") + elseif(${lang} MATCHES "STATIC_LINKER") + set(${desc} "${desc} linker during the creation of static libraries") + endif() + + if(${type} MATCHES "ALL") + set(${desc} "${desc} during all build types") + else() + set(${desc} "${desc} during ${type} builds") + endif() +endmacro() + +#=============================================================================== + +function(handle_flags) + include(CMakeParseArguments) + cmake_parse_arguments(_flags + "ADD;REMOVE" "LANG;TYPE" "" + ${ARGN} + ) + + if(NOT _flags_LANG) + set(_flags_LANG ${FLAGS_HANDLING_DEFAULT_LANGUAGE}) + endif() + + set(_variable CMAKE_${_flags_LANG}_FLAGS) + + if (_flags_TYPE) + set(_variable ${_variable}_${_flags_TYPE}) + else() + set(_flags_TYPE "ALL") + endif() + + _get_flags_message(${_flags_LANG} ${_flags_TYPE} _desc) + foreach(flag ${_flags_UNPARSED_ARGUMENTS}) + if (_flags_ADD) + string(REPLACE "${flag}" "match" _temp_var "${${_variable}}") + if(NOT _temp_var MATCHES "match") + set(${_variable} "${flag} ${${_variable}}" CACHE STRING ${_desc} FORCE) + endif() + elseif(_flags_REMOVE) + string(REPLACE "${flag} " "" ${_variable} "${${_variable}}") + set(${_variable} "${${_variable}}" CACHE STRING ${_desc} FORCE) + endif() + endforeach() +endfunction() + +#=============================================================================== +function(add_flags) + handle_flags(ADD ${ARGN}) +endfunction() + +#=============================================================================== +function(remove_flags) + handle_flags(REMOVE ${ARGN}) +endfunction() +#=============================================================================== diff --git a/cmake/CheckFindMumps.c b/cmake/CheckFindMumps.c new file mode 100644 index 0000000..3b006f7 --- /dev/null +++ b/cmake/CheckFindMumps.c @@ -0,0 +1,105 @@ +/** + * @file CheckFindMumps.c + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + */ +#include + +#if !defined(MUMPS_SEQ) +# include +#endif + +#define JOB_INIT -1 +#define JOB_END -2 +#define JOB_COMPLETE 6 +#define USE_COMM_WORLD -987654 + +#define icntl(n) id.icntl[n - 1] + +int main(int argc, char **argv) { + int n = 2; + int nz = 2; + + int irn[2] = {1, 2}; + int jcn[2] = {1, 2}; + Real a[2]; + Real rhs[2]; + +#if !defined(MUMPS_SEQ) + MPI_Init(&argc, &argv); +#endif + + rhs[0] = 1.0; rhs[1]=4.0; + a[0] = 1.0; a[1] = 2.0; + + id.job = JOB_INIT; + id.par = 1; + id.sym = 0; + +#if !defined(MUMPS_SEQ) + id.comm_fortran = USE_COMM_WORLD; +#endif + + mumps_c(&id); + + // Default Scaling + icntl(8) = 77; + + // Assembled matrix + icntl(5) = 0; + + /// Default centralized dense second member + icntl(20) = 0; + icntl(21) = 0; + + // automatic choice for analysis analysis + icntl(28) = 0; + + // fully distributed + icntl(18) = 3; + + id.n = n; + + id.nz_loc = nz; + id.irn_loc = irn; + id.jcn_loc = jcn; + + id.a_loc = a; + id.rhs = rhs; + + icntl(1) = -1; + icntl(2) = -1; + icntl(3) = -1; + icntl(4) = 0; + + + id.job = JOB_COMPLETE; + mumps_c(&id); + + id.job=JOB_END; + mumps_c(&id); + + printf("Solution is : (%8.2f %8.2f)\n", rhs[0], rhs[1]); + + return 0; +} diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake new file mode 100644 index 0000000..1fba7ca --- /dev/null +++ b/cmake/FindFFTW.cmake @@ -0,0 +1,46 @@ +/** + * @file FindFFTW.cmake + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + */ +if(CMAKE_C_COMPILER_ID MATCHES "Cray") + set(_cray TRUE) +endif() + +# Find FFTW2 +if (${_cray}) +# set(FFTW_LIBRARY "-ldfftw") + set(FFTW_LIBRARY "${FFTW_DIR}/libdfftw.a") +else() + find_library(FFTW_LIBRARY NAMES fftw PATHS ${FFTW}/lib) + find_library(FFTW_LIBRARY NAMES fftw3 PATHS ${FFTW}/lib) + find_path(FFTW_INCLUDES fftw_f77.h ${FFTW}/include) + find_path(FFTW_INCLUDES fftw_f77.i ${FFTW}/include) + find_path(FFTW_INCLUDES fftw.h ${FFTW}/include) + find_path(FFTW_INCLUDES fftw3.h ${FFTW}/include) +endif() + +mark_as_advanced(FFTW_LIBRARY FFTW_INCLUDES) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(FFTW DEFAULT_MSG FFTW_LIBRARY) diff --git a/cmake/FindMETIS.cmake b/cmake/FindMETIS.cmake new file mode 100644 index 0000000..2207580 --- /dev/null +++ b/cmake/FindMETIS.cmake @@ -0,0 +1,62 @@ +/** + * @file FindMETIS.cmake + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + */ +find_path(METIS_INCLUDE_DIR metis.h + PATHS "${METIS_DIR}" + ENV METIS_DIR + PATH_SUFFIXES include + ) + +find_library(METIS_LIBRARY NAMES metis + PATHS "${METIS_DIR}" + ENV METIS_DIR + PATH_SUFFIXES lib + ) + +mark_as_advanced(METIS_LIBRARY METIS_INCLUDE_DIR) + +#=============================================================================== +include(FindPackageHandleStandardArgs) +if(CMAKE_VERSION VERSION_GREATER 2.8.12) + if(METIS_INCLUDE_DIR) + file(STRINGS ${METIS_INCLUDE_DIR}/metis.h _versions + REGEX "^#define\ +METIS_VER_(MAJOR|MINOR|SUBMINOR) .*") + foreach(_ver ${_versions}) + string(REGEX MATCH "METIS_VER_(MAJOR|MINOR|SUBMINOR) *([0-9.]+)" _tmp "${_ver}") + set(_metis_${CMAKE_MATCH_1} ${CMAKE_MATCH_2}) + endforeach() + set(METIS_VERSION "${_metis_MAJOR}.${_metis_MINOR}" CACHE INTERNAL "") + endif() + + find_package_handle_standard_args(METIS + REQUIRED_VARS + METIS_LIBRARY + METIS_INCLUDE_DIR + VERSION_VAR + METIS_VERSION) +else() + find_package_handle_standard_args(METIS DEFAULT_MSG + METIS_LIBRARY METIS_INCLUDE_DIR) +endif() diff --git a/cmake/FindMumps.cmake b/cmake/FindMumps.cmake new file mode 100644 index 0000000..8d8e76d --- /dev/null +++ b/cmake/FindMumps.cmake @@ -0,0 +1,314 @@ +/** + * @file FindMumps.cmake + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + */ +#=============================================================================== +# @file FindMumps.cmake +# +# @author Nicolas Richart +# +# @date creation: Fri Oct 24 2014 +# @date last modification: Wed Jan 13 2016 +# +# @brief The find_package file for the Mumps solver +# +# @section LICENSE +# +# Copyright (©) 2015 EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory +# (LSMS - Laboratoire de Simulation en Mécanique des Solides) +# +# Akantu is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# Akantu is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Akantu. If not, see . +# +#=============================================================================== +set(_MUMPS_COMPONENTS "sequential" "parallel" "double" "float" "complex_double" "complex_float") + +if(NOT Mumps_FIND_COMPONENTS) + set(Mumps_FIND_COMPONENTS "parallel" "double" "float" "complex_double" "complex_float") +endif() +#=============================================================================== +enable_language(Fortran) + +set(MUMPS_PRECISIONS) +set(MUMPS_PLAT) +foreach(_comp ${Mumps_FIND_COMPONENTS}) + if("${_comp}" STREQUAL "sequential") + set(MUMPS_PLAT _seq) #default plat on debian based distribution + endif() + + if("${_comp}" STREQUAL "float") + list(APPEND MUMPS_PRECISIONS s) + endif() + if("${_comp}" STREQUAL "double") + list(APPEND MUMPS_PRECISIONS d) + endif() + if("${_comp}" STREQUAL "complex_float") + list(APPEND MUMPS_PRECISIONS c) + endif() + if("${_comp}" STREQUAL "complex_double") + list(APPEND MUMPS_PRECISIONS z) + endif() +endforeach() + +if(NOT MUMPS_PRECISIONS) + set(MUMPS_PRECISIONS s d c z) +endif() + +list(GET MUMPS_PRECISIONS 0 _first_precision) + +string(TOUPPER "${_first_precision}" _u_first_precision) + +find_path(MUMPS_INCLUDE_DIR ${_first_precision}mumps_c.h + PATHS "${MUMPS_DIR}" + ENV MUMPS_DIR + PATH_SUFFIXES include + ) +mark_as_advanced(MUMPS_INCLUDE_DIR) + +set(_mumps_required_vars) +foreach(_precision ${MUMPS_PRECISIONS}) + string(TOUPPER "${_precision}" _u_precision) + find_library(MUMPS_LIBRARY_${_u_precision}MUMPS NAMES ${_precision}mumps${MUMPS_PREFIX} + PATHS "${MUMPS_DIR}" + ENV MUMPS_DIR + PATH_SUFFIXES lib + ) + mark_as_advanced(MUMPS_LIBRARY_${_u_precision}MUMPS) + list(APPEND _mumps_required_vars MUMPS_LIBRARY_${_u_precision}MUMPS) + + list(APPEND MUMPS_LIBRARIES_ALL ${MUMPS_LIBRARY_${_u_precision}MUMPS}) +endforeach() + + +if(MUMPS_LIBRARY_${_u_first_precision}MUMPS MATCHES ".*${_first_precision}mumps.*${CMAKE_STATIC_LIBRARY_SUFFIX}") + # Assuming mumps was compiled as a static library + set(MUMPS_LIBRARY_TYPE STATIC CACHE INTERNAL "" FORCE) + + if (CMAKE_Fortran_COMPILER MATCHES ".*gfortran") + set(_compiler_specific gfortran) + elseif (CMAKE_Fortran_COMPILER MATCHES ".*ifort") + set(_compiler_specific ifcore) + else() + message("Compiler ${CMAKE_Fortran_COMPILER} is not known, you will probably " + "have to add semething instead of this message to be able to test mumps " + "install") + endif() +else() + set(MUMPS_LIBRARY_TYPE SHARED CACHE INTERNAL "" FORCE) +endif() + + +function(mumps_add_dependency _pdep _libs) + string(TOUPPER ${_pdep} _u_pdep) + if(_pdep STREQUAL "mumps_common") + find_library(MUMPS_LIBRARY_COMMON mumps_common${MUMPS_PREFIX} + PATHS "${MUMPS_DIR}" + ENV MUMPS_DIR + PATH_SUFFIXES lib + ) + set(${_libs} ${MUMPS_LIBRARY_COMMON} PARENT_SCOPE) + mark_as_advanced(MUMPS_LIBRARY_COMMON) + elseif(_pdep STREQUAL "pord") + find_library(MUMPS_LIBRARY_PORD pord${MUMPS_PREFIX} + PATHS "${MUMPS_DIR}" + ENV MUMPS_DIR + PATH_SUFFIXES lib + ) + set(${_libs} ${MUMPS_LIBRARY_PORD} PARENT_SCOPE) + mark_as_advanced(MUMPS_LIBRARY_PORD) + elseif(_pdep MATCHES "Scotch") + find_package(Scotch REQUIRED ${ARGN} QUIET) + if(ARGN) + list(GET ARGN 1 _comp) + string(TOUPPER ${_comp} _u_comp) + set(${_libs} ${SCOTCH_LIBRARY_${_u_comp}} PARENT_SCOPE) + else() + set(${_libs} ${${_u_pdep}_LIBRARIES} PARENT_SCOPE) + endif() + elseif(_pdep MATCHES "MPI") + if(MUMPS_PLAT STREQUAL "_seq") + find_library(MUMPS_LIBRARY_MPISEQ mpiseq${MUMPS_PREFIX} + PATHS "${MUMPS_DIR}" + ENV MUMPS_DIR + PATH_SUFFIXES lib + ) + set(${_libs} ${MUMPS_LIBRARY_MPISEQ} PARENT_SCOPE) + mark_as_advanced(MUMPS_LIBRARY_MPISEQ) + else() + find_package(MPI REQUIRED C Fortran QUIET) + set(${_libs} ${MPI_C_LIBRARIES} ${MPI_Fortran_LIBRARIES} PARENT_SCOPE) + endif() + else() + find_package(${_pdep} REQUIRED QUIET) + set(${_libs} ${${_u_pdep}_LIBRARIES} ${${_u_pdep}_LIBRARY} PARENT_SCOPE) + endif() +endfunction() + +function(mumps_find_dependencies) + set(_libraries_all ${MUMPS_LIBRARIES_ALL}) + set(_include_dirs ${MUMPS_INCLUDE_DIR}) + + set(_mumps_test_dir "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}") + file(READ ${CMAKE_CURRENT_LIST_DIR}/CheckFindMumps.c _output) + file(WRITE "${_mumps_test_dir}/mumps_test_code.c" + "#include <${_first_precision}mumps_c.h> +${_u_first_precision}MUMPS_STRUC_C id; + +#define mumps_c ${_first_precision}mumps_c +#define Real ${_u_first_precision}MUMPS_REAL +") + + if(MUMPS_PLAT STREQUAL _seq) + file(APPEND "${_mumps_test_dir}/mumps_test_code.c" + "#define MUMPS_SEQ +") + else() + file(APPEND "${_mumps_test_dir}/mumps_test_code.c" + "// #undef MUMPS_SEQ +") + find_package(MPI REQUIRED) + list(APPEND _compiler_specific ${MPI_C_LIBRARIES}) + list(APPEND _include_dirs ${MPI_C_INCLUDE_PATH} ${MPI_INCLUDE_DIR}) + endif() + + file(APPEND "${_mumps_test_dir}/mumps_test_code.c" "${_output}") + + #=============================================================================== + set(_mumps_dep_symbol_BLAS ${_first_precision}gemm) + set(_mumps_dep_symbol_ScaLAPACK numroc) + set(_mumps_dep_symbol_MPI mpi_send) + set(_mumps_dep_symbol_Scotch SCOTCH_graphInit) + set(_mumps_dep_symbol_Scotch_ptscotch scotchfdgraphexit) + set(_mumps_dep_symbol_Scotch_esmumps esmumps) + set(_mumps_dep_symbol_mumps_common mumps_abort) + set(_mumps_dep_symbol_pord SPACE_ordering) + set(_mumps_dep_symbol_METIS metis_nodend) + set(_mumps_dep_symbol_ParMETIS ParMETIS_V3_NodeND) + + # added for fucking macosx that cannot fail at link + set(_mumps_run_dep_symbol_mumps_common mumps_fac_descband) + set(_mumps_run_dep_symbol_MPI mpi_bcast) + set(_mumps_run_dep_symbol_ScaLAPACK idamax) + + set(_mumps_dep_comp_Scotch_ptscotch COMPONENTS ptscotch) + set(_mumps_dep_comp_Scotch_esmumps COMPONENTS esmumps) + + set(_mumps_potential_dependencies mumps_common pord BLAS ScaLAPACK MPI + Scotch Scotch_ptscotch Scotch_esmumps METIS ParMETIS) + #=============================================================================== + + set(_retry_try_run TRUE) + set(_retry_count 0) + + # trying only as long as we add dependencies to avoid inifinte loop in case of an unkown dependency + while (_retry_try_run AND _retry_count LESS 100) + try_run(_mumps_run _mumps_compiles "${_mumps_test_dir}" "${_mumps_test_dir}/mumps_test_code.c" + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${_include_dirs}" + LINK_LIBRARIES ${_libraries_all} ${_libraries_all} ${_compiler_specific} + RUN_OUTPUT_VARIABLE _run + COMPILE_OUTPUT_VARIABLE _out) + + set(_retry_compile FALSE) + #message("COMPILATION outputs: \n${_out} \n RUN OUTPUT \n${_run}") + if(_mumps_compiles AND NOT (_mumps_run STREQUAL "FAILED_TO_RUN")) + break() + endif() + + foreach(_pdep ${_mumps_potential_dependencies}) + #message("CHECKING ${_pdep}") + set(_add_pdep FALSE) + if (NOT _mumps_compiles AND + _out MATCHES "undefined reference.*${_mumps_dep_symbol_${_pdep}}") + set(_add_pdep TRUE) + #message("NEED COMPILE ${_pdep}") + elseif(_mumps_run STREQUAL "FAILED_TO_RUN" AND + DEFINED _mumps_run_dep_symbol_${_pdep} AND + _run MATCHES "${_mumps_run_dep_symbol_${_pdep}}") + set(_add_pdep TRUE) + #message("NEED RUN ${_pdep}") + endif() + + if(_add_pdep) + mumps_add_dependency(${_pdep} _libs ${_mumps_dep_comp_${_pdep}}) + #message("ADDING ${_libs}") + if(NOT _libs) + message(FATAL_ERROR "MUMPS depends on ${_pdep} but no libraries where found") + endif() + list(APPEND _libraries_all ${_libs}) + set(_retry_try_run TRUE) + endif() + endforeach() + + math(EXPR _retry_count "${_retry_count} + 1") + endwhile() + + if(_retry_count GREATER 10) + message(FATAL_ERROR "Do not know what to do to link with mumps on your system, I give up!") + endif() + + if(APPLE) + # in doubt add some stuff because mumps was perhaps badly compiled + mumps_add_dependency(pord _libs) + list(APPEND _libraries_all ${_libs}) + endif() + + set(MUMPS_LIBRARIES_ALL ${_libraries_all} PARENT_SCOPE) +endfunction() + +mumps_find_dependencies() + +set(MUMPS_LIBRARIES ${MUMPS_LIBRARIES_ALL} CACHE INTERNAL "" FORCE) + +#=============================================================================== +include(FindPackageHandleStandardArgs) +if(CMAKE_VERSION VERSION_GREATER 2.8.12) + if(MUMPS_INCLUDE_DIR) + file(STRINGS ${MUMPS_INCLUDE_DIR}/dmumps_c.h _versions + REGEX "^#define MUMPS_VERSION .*") + foreach(_ver ${_versions}) + string(REGEX MATCH "MUMPS_VERSION *\"([0-9.]+)\"" _tmp "${_ver}") + set(_mumps_VERSION ${CMAKE_MATCH_1}) + endforeach() + set(MUMPS_VERSION "${_mumps_VERSION}" CACHE INTERNAL "") + endif() + + find_package_handle_standard_args(Mumps + REQUIRED_VARS ${_mumps_required_vars} + MUMPS_INCLUDE_DIR + VERSION_VAR MUMPS_VERSION + ) +else() + find_package_handle_standard_args(Mumps DEFAULT_MSG + ${_mumps_required_vars} MUMPS_INCLUDE_DIR) +endif() diff --git a/cmake/FindPETSc.cmake b/cmake/FindPETSc.cmake new file mode 100644 index 0000000..04ad8d1 --- /dev/null +++ b/cmake/FindPETSc.cmake @@ -0,0 +1,92 @@ +/** + * @file FindPETSc.cmake + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + */ +# - Try to find PETSc +# PETSC_FOUND - system has PETSc +# PETSC_INCLUDE_DIRS - the PETSc include directories +# PETSC_LIBRARIES - Link these to use PETSc +# PETSC_VERSION - Version string (MAJOR.MINOR.SUBMINOR) + +if(PETSc_FIND_REQUIRED) + find_package(PkgConfig REQUIRED) +else() + find_package(PkgConfig QUIET) + if(NOT PKG_CONFIG_FOUND) + return() + endif() +endif() + +pkg_search_module(_petsc PETSc) + +# Some debug code +#get_property(_vars DIRECTORY PROPERTY VARIABLES) +#foreach(_var ${_vars}) +# if ("${_var}" MATCHES "^_petsc") +# message("${_var} -> ${${_var}}") +# endif() +#endforeach() + +if(_petsc_FOUND AND _petsc_VERSION) + set(PETSC_VERSION ${_petsc_VERSION}) +endif() + +if(_petsc_FOUND) + set(_petsc_libs) + foreach(_lib ${_petsc_LIBRARIES}) + string(TOUPPER "${_lib}" _u_lib) + find_library(PETSC_LIBRARY_${_u_lib} ${_lib} PATHS ${_petsc_LIBRARY_DIRS}) + list(APPEND _petsc_libs ${PETSC_LIBRARY_${_u_lib}}) + mark_as_advanced(PETSC_LIBRARY_${_u_lib}) + endforeach() + + if (NOT _petsc_INCLUDE_DIRS) + pkg_get_variable(_petsc_INCLUDE_DIRS ${_petsc_MODULE_NAME} includedir) + #message(${_petsc_INCLUDE_DIRS}) + endif() + + find_path(PETSC_Fortran_INCLUDE_DIRS "finclude/petsc.h" + PATHS ${_petsc_INCLUDE_DIRS}/petsc + NO_CMAKE_PATH + NO_DEFAULT_PATH + ) + + set(PETSC_LIBRARIES ${_petsc_libs} CACHE FILEPATH "") + set(PETSC_INCLUDE_DIRS ${_petsc_INCLUDE_DIRS} CACHE PATH "") + + + add_library(petsc::petsc INTERFACE IMPORTED) + set_property(TARGET petsc::petsc PROPERTY INTERFACE_LINK_LIBRARIES ${PETSC_LIBRARIES}) + set_property(TARGET petsc::petsc PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${PETSC_INCLUDE_DIRS}) + + add_library(petsc::petscf INTERFACE IMPORTED) + target_link_libraries(petsc::petscf INTERFACE petsc::petsc) + set_property(TARGET petsc::petscf PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${PETSC_Fortran_INCLUDE_DIRS}) + +endif() + +include (FindPackageHandleStandardArgs) +find_package_handle_standard_args(PETSc + REQUIRED_VARS PETSC_LIBRARIES PETSC_INCLUDE_DIRS + VERSION_VAR PETSC_VERSION) diff --git a/cmake/FindParMETIS.cmake b/cmake/FindParMETIS.cmake new file mode 100644 index 0000000..6a927e1 --- /dev/null +++ b/cmake/FindParMETIS.cmake @@ -0,0 +1,62 @@ +/** + * @file FindParMETIS.cmake + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + */ +find_path(PARMETIS_INCLUDE_DIR parmetis.h + PATHS "${PARMETIS_DIR}" + ENV PARMETIS_DIR + PATH_SUFFIXES include + ) + +find_library(PARMETIS_LIBRARY NAMES parmetis + PATHS "${PARMETIS_DIR}" + ENV PARMETIS_DIR + PATH_SUFFIXES lib + ) + +mark_as_advanced(PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR) + +#=============================================================================== +include(FindPackageHandleStandardArgs) +if(CMAKE_VERSION VERSION_GREATER 2.8.12) + if(PARMETIS_INCLUDE_DIR) + file(STRINGS ${PARMETIS_INCLUDE_DIR}/parmetis.h _versions + REGEX "^#define\ +PARMETIS_(MAJOR|MINOR|SUBMINOR)_VERSION .*") + foreach(_ver ${_versions}) + string(REGEX MATCH "PARMETIS_(MAJOR|MINOR|SUBMINOR)_VERSION *([0-9.]+)" _tmp "${_ver}") + set(_parmetis_${CMAKE_MATCH_1} ${CMAKE_MATCH_2}) + endforeach() + set(PARMETIS_VERSION "${_parmetis_MAJOR}.${_parmetis_MINOR}" CACHE INTERNAL "") + endif() + + find_package_handle_standard_args(ParMETIS + REQUIRED_VARS + PARMETIS_LIBRARY + PARMETIS_INCLUDE_DIR + VERSION_VAR + PARMETIS_VERSION) +else() + find_package_handle_standard_args(ParMETIS DEFAULT_MSG + PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR) +endif() diff --git a/cmake/FindScaLAPACK.cmake b/cmake/FindScaLAPACK.cmake new file mode 100644 index 0000000..b3ef5ad --- /dev/null +++ b/cmake/FindScaLAPACK.cmake @@ -0,0 +1,181 @@ +/** + * @file FindScaLAPACK.cmake + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + */ +#=============================================================================== +# @file FindScaLAPACK.cmake +# +# @author Nicolas Richart +# +# @date creation: Tue Mar 31 2015 +# @date last modification: Wed Jan 13 2016 +# +# @brief The find_package file for the Mumps solver +# +# @section LICENSE +# +# Copyright (©) 2015 EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory +# (LSMS - Laboratoire de Simulation en Mécanique des Solides) +# +# Akantu is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# Akantu is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Akantu. If not, see . +# +#=============================================================================== +set(SCALAPACK_VENDOR "Auto" CACHE + STRING "Vendor for scalapack (Auto, Netlib, Intel_(i?)lp64_(openmpi|intelmpi|sgimpt)") +mark_as_advanced(SCALAPACK_VENDOR) +set_property(CACHE SCALAPACK_VENDOR PROPERTY STRINGS + Auto Netlib + Intel_lp64_openmpi Intel_lp64_intelmpi Intel_lp64_sgimpt + Intel_ilp64_openmpi Intel_ilp64_intelmpi Intel_ilp64_sgimpt) + + +macro(scalapack_find_library prefix target name list_libraries list_headers) + foreach(_lib ${list_libraries}) + find_library(${prefix}_${_lib}_LIBRARY NAMES ${_lib} + PATHS ${prefix}_DIR + ) + mark_as_advanced(${prefix}_${_lib}_LIBRARY) + + if(${prefix}_${_lib}_LIBRARY) + list(APPEND ${prefix}_libraries ${${prefix}_${_lib}_LIBRARY}) + + get_filename_component(_ext ${${prefix}_${_lib}_LIBRARY} EXT) + if(NOT TARGET ${target}) + if("${_ext}" STREQUAL "${CMAKE_SHARED_LIBRARY_SUFFIX}") + add_library(${target} SHARED IMPORTED) + get_filename_component(_soname ${${prefix}_${_lib}_LIBRARY} NAME) + set_property(TARGET ${target} PROPERTY IMPORTED_SONAME ${_soname}) + else() + add_library(${target}_${name} STATIC IMPORTED) + endif() + set_property(TARGET ${target} PROPERTY + IMPORTED_LOCATION ${${prefix}_${_lib}_LIBRARY}) + else() + if("${_ext}" STREQUAL "${CMAKE_SHARED_LIBRARY_SUFFIX}") + set_property(TARGET ${target} APPEND PROPERTY + IMPORTED_LINK_DEPENDENT_LIBRARIES ${${prefix}_${_lib}_LIBRARY} + ) + else() + set_property(TARGET ${target} APPEND PROPERTY + IMPORTED_LINK_INTERFACE_LIBRARIES ${${prefix}_${_lib}_LIBRARY} + ) + endif() + endif() + else() + unset(${prefix}_${_lib}_LIBRARY CACHE) + endif() + endforeach() + + if(${prefix}_libraries) + foreach(_hdr ${list_headers}) + get_filename_component(_hdr_name ${_hdr} NAME_WE) + find_path(${prefix}_${_hdr_name}_INCLUDE_DIR NAMES ${_hdr} + PATHS ${prefix}_DIR) + mark_as_advanced(${prefix}_${_hdr_name}_INCLUDE_DIR) + + if(${prefix}_${_hdr_name}_INCLUDE_DIR) + list(APPEND ${prefix}_include_dir ${${prefix}_${_hdr_name}_INCLUDE_DIR}) + set_property(TARGET ${target} APPEND PROPERTY + INTERFACE_INCLUDE_DIRECTORIES ${${prefix}_${_lib}_INCLUDE_DIR} + ) + else() + unset(${prefix}_${_lib}_INCLUDE_DIR CACHE) + endif() + endforeach() + endif() +endmacro() + +set(SCALAPACK_libraries) +set(SCALAPACK_INCLUDE_DIR) + +if(SCALAPACK_VENDOR STREQUAL "Auto" OR SCALAPACK_VENDOR STREQUAL "Netlib") + if(NOT SCALAPACK_libraries) + scalapack_find_library( + SCALAPACK + ScaLAPACK + "netlib" + "scalapack;blacsC;blacsF77;blacs" + "" + ) + endif() +endif() + +foreach(_precision lp64 ilp64) + foreach(_mpi intelmpi openmpi sgimpt) + if(NOT SCALAPACK_libraries) + if(SCALAPACK_VENDOR STREQUAL "Auto" OR SCALAPACK_VENDOR STREQUAL "Intel_${_precision}_${_mpi}") + if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + set(_mkl_common "mkl_intel_${_precision}") + + else() + set(_mkl_common "mkl_gf_${_precision}") + endif() + scalapack_find_library( + SCALAPACK + ScaLAPACK + "intel_${_precision}_${_mpi}" + "mkl_scalapack_${_precision};${_mkl_common};mkl_sequential;mkl_core;mkl_blacs_${_mpi}_${_precision}" + "mkl_scalapack.h" + ) + + if(SCALAPACK_libraries AND _precision STREQUAL "ilp64") + set_property(TARGET ${target} APPEND PROPERTY + INTERFACE_COMPILE_DEFINITIONS MKL_ILP64} + ) + endif() + + if(EXISTS ${SCALAPACK_include_dir}/mkl_version.h) + file(STRINGS ${SCALAPACK_include_dir}/mkl_version.h _versions + REGEX "^#define\ +__INTEL_MKL(_MINOR|_UPDATE)?__ .*") + foreach(_ver ${_versions}) + string(REGEX MATCH "__INTEL_MKL(_MINOR|_UPDATE)?__ *([0-9.]+)" _tmp "${_ver}") + set(_mkl${CMAKE_MATCH_1} ${CMAKE_MATCH_2}) + endforeach() + set(SCALAPACK_VERSION "mkl:${_mkl}.${_mkl_MINOR}.${_mkl_UPDATE}" CACHE INTERNAL "") + endif() + + endif() + endif() + endforeach() +endforeach() + +set(SCALAPACK_LIBRARIES ${SCALAPACK_libraries} CACHE INTERNAL "") +set(SCALAPACK_INCLUDE_DIR ${SCALAPACK_include_dir} CACHE INTERNAL "") + +#=============================================================================== +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ScaLAPACK + REQUIRED_VARS SCALAPACK_LIBRARIES + VERSION_VAR SCALAPACK_VERSION) diff --git a/cmake/FindScotch.cmake b/cmake/FindScotch.cmake new file mode 100644 index 0000000..bc7829e --- /dev/null +++ b/cmake/FindScotch.cmake @@ -0,0 +1,270 @@ +/** + * @file FindScotch.cmake + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + */ +#=============================================================================== +# @file FindScotch.cmake +# +# @author Nicolas Richart +# +# @date creation: Fri Oct 24 2014 +# @date last modification: Wed Jan 13 2016 +# +# @brief The find_package file for Scotch +# +# @section LICENSE +# +# Copyright (©) 2015 EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory +# (LSMS - Laboratoire de Simulation en Mécanique des Solides) +# +# Akantu is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# Akantu is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Akantu. If not, see . +# +#=============================================================================== + +set(_SCOTCH_COMPONENTS "metis" "parmetis" "esmumps" "ptscotch") + +if(NOT Scotch_FIND_COMPONENTS) + set(Scotch_FIND_COMPONENTS) +endif() + +find_path(SCOTCH_INCLUDE_DIR scotch.h PATHS "${SCOTCH_DIR}" ENV SCOTCH_DIR + PATH_SUFFIXES include include/scotch + ) + + +find_library(SCOTCH_LIBRARY scotch PATHS "${SCOTCH_DIR}" ENV SCOTCH_DIR PATH_SUFFIXES lib) + +set(_scotch_test_dir "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}") +file(WRITE "${_scotch_test_dir}/scotch_test_code.c" + "#include +#include +#include + +int main() { + SCOTCH_Graph graph; + SCOTCH_graphInit(&graph); + return 0; +} +") + +#=============================================================================== +include(FindPackageHandleStandardArgs) +if(CMAKE_VERSION VERSION_GREATER 2.8.12) + if(SCOTCH_INCLUDE_DIR) + file(STRINGS ${SCOTCH_INCLUDE_DIR}/scotch.h _versions + REGEX "^#define\ +SCOTCH_(VERSION|RELEASE|PATCHLEVEL) .*") + foreach(_ver ${_versions}) + string(REGEX MATCH "SCOTCH_(VERSION|RELEASE|PATCHLEVEL) *([0-9.]+)" _tmp "${_ver}") + set(_scotch_${CMAKE_MATCH_1} ${CMAKE_MATCH_2}) + endforeach() + set(SCOTCH_VERSION "${_scotch_VERSION}.${_scotch_RELEASE}.${_scotch_PATCHLEVEL}" CACHE INTERNAL "") + endif() + find_package_handle_standard_args(Scotch + REQUIRED_VARS SCOTCH_LIBRARY SCOTCH_INCLUDE_DIR + VERSION_VAR SCOTCH_VERSION) +else() + find_package_handle_standard_args(Scotch DEFAULT_MSG + SCOTCH_LIBRARY SCOTCH_INCLUDE_DIR) +endif() + +set(SCOTCH_LIBRARIES_ALL ${SCOTCH_LIBRARY}) + +try_compile(_scotch_compiles "${_scotch_test_dir}" SOURCES "${_scotch_test_dir}/scotch_test_code.c" + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${SCOTCH_INCLUDE_DIR}" + LINK_LIBRARIES ${SCOTCH_LIBRARY} + OUTPUT_VARIABLE _out) + +get_filename_component(_scotch_hint "${SCOTCH_LIBRARY}" DIRECTORY) + +if(SCOTCH_LIBRARY MATCHES ".*scotch.*${CMAKE_STATIC_LIBRARY_SUFFIX}") + # Assuming scotch was compiled as a static library + set(SCOTCH_LIBRARY_TYPE STATIC CACHE INTERNAL "" FORCE) +else() + set(SCOTCH_LIBRARY_TYPE SHARED CACHE INTERNAL "" FORCE) +endif() + +if(NOT _scotch_compiles) + if(_out MATCHES "SCOTCH_errorPrint") + find_library(SCOTCH_LIBRARY_ERR scotcherr + HINTS ${_scotch_hint}) + find_library(SCOTCH_LIBRARY_ERREXIT scotcherrexit + HINTS ${_scotch_hint}) + + if(NOT TARGET Scotch::err) + add_library(Scotch::err ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL) + endif() + if(NOT TARGET Scotch::errexit) + add_library(Scotch::errexit ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL) + endif() + + set_target_properties(Scotch::errexit PROPERTIES + IMPORTED_LOCATION "${SCOTCH_LIBRARY_ERREXIT}" + INTERFACE_INCLUDE_DIRECTORIES "${SCOTCH_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C") + + set_target_properties(Scotch::err PROPERTIES + IMPORTED_LOCATION "${SCOTCH_LIBRARY_ERR}" + INTERFACE_INCLUDE_DIRECTORIES "${SCOTCH_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + INTERFACE_LINK_LIBRARIES "Scotch::errexit") + + mark_as_advanced(SCOTCH_LIBRARY_ERR + SCOTCH_LIBRARY_ERREXIT) + + list(APPEND SCOTCH_LIBRARIES_ALL ${SCOTCH_LIBRARY_ERR} ${SCOTCH_LIBRARY_ERREXIT}) + + set(_scotch_link_lib INTERFACE_LINK_LIBRARIES "Scotch::err") + endif() +endif() + +if(NOT TARGET Scotch::scotch) + add_library(Scotch::scotch ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL) +endif() +set_target_properties(Scotch::scotch PROPERTIES + IMPORTED_LOCATION "${SCOTCH_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${SCOTCH_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + ${_scotch_link_lib}) + +set(SCOTCH_LIBRARIES ${SCOTCH_LIBRARIES_ALL} CACHE INTERNAL "Libraries for Scotch" FORCE) + +mark_as_advanced(SCOTCH_LIBRARY + SCOTCH_INCLUDE_DIR + SCOTCH_LIBRARIES) + + +if("${Scotch_FIND_COMPONENTS}" MATCHES "esmumps") + find_library(SCOTCH_LIBRARY_ESMUMPS esmumps HINTS ${_scotch_hint}) + + if(NOT TARGET Scotch::esmumps) + add_library(Scotch::esmumps ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL) + endif() + set_target_properties(Scotch::esmumps PROPERTIES + IMPORTED_LOCATION "${SCOTCH_LIBRARY_ESMUMPS}" + INTERFACE_INCLUDE_DIRECTORIES "${SCOTCH_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C") + + + mark_as_advanced(SCOTCH_LIBRARY_ESMUMPS) +endif() + +if("${Scotch_FIND_COMPONENTS}" MATCHES "metis") + find_library(SCOTCH_LIBRARY_METIS scotchmetis HINTS ${_scotch_hint}) + + if(NOT TARGET Scotch::metis) + add_library(Scotch::metis ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL) + endif() + set_target_properties(Scotch::metis PROPERTIES + IMPORTED_LOCATION "${SCOTCH_LIBRARY_METIS}" + INTERFACE_INCLUDE_DIRECTORIES "${SCOTCH_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C") + + mark_as_advanced(SCOTCH_LIBRARY_METIS) +endif() + +if("${Scotch_FIND_COMPONENTS}" MATCHES "parmetis") + find_library(SCOTCH_LIBRARY_PARMETIS scotchparmetis HINTS ${_scotch_hint}) + + if(NOT TARGET Scotch::parmetis) + add_library(Scotch::parmetis ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL) + endif() + set_target_properties(Scotch::parmetis PROPERTIES + IMPORTED_LOCATION "${SCOTCH_LIBRARY_PARMETIS}" + INTERFACE_INCLUDE_DIRECTORIES "${SCOTCH_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + INTERFACE_INCLUDE_DIRECTORIES "Scotch::metis") + mark_as_advanced(SCOTCH_LIBRARY_PARMETIS) +endif() + +# +##=============================================================================== +if("${Scotch_FIND_COMPONENTS}" MATCHES "ptscotch") + file(WRITE "${_scotch_test_dir}/ptscotch_test_code.c" + "#include +#include +#include +#include + +int main() { + SCOTCH_Dgraph graph; + SCOTCH_dgraphInit(&graph, MPI_COMM_WORLD); + return 0; +} +") + + find_package(MPI REQUIRED) + + find_library(SCOTCH_LIBRARY_PTSCOTCH ptscotch HINTS ${_scotch_hint}) + + try_compile(_scotch_compiles "${_scotch_test_dir}" SOURCES "${_scotch_test_dir}/ptscotch_test_code.c" + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${SCOTCH_INCLUDE_DIR};${MPI_C_INCLUDE_PATH}" + LINK_LIBRARIES ${SCOTCH_LIBRARY_PTSCOTCH} ${MPI_C_LIBRARIES} + OUTPUT_VARIABLE _out) + + if(NOT _scotch_compiles) + if(_out MATCHES "SCOTCH_archExit") + set(_scotch_link_lib INTERFACE_LINK_LIBRARIES "Scotch::scotch") + endif() + endif() + + if(NOT TARGET Scotch::ptscotch) + add_library(Scotch::ptscotch ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL) + endif() + set_target_properties(Scotch::ptscotch PROPERTIES + IMPORTED_LOCATION "${SCOTCH_LIBRARY_PTSCOTCH}" + INTERFACE_INCLUDE_DIRECTORIES "${SCOTCH_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + ${_scotch_link_lib}) + + set(PTSCOTCH_LIBRARIES ${SCOTCH_LIBRARY_PTSCOTCH} ${SCOTCH_LIBRARIES} CACHE INTERNAL "Libraries for PT-Scotch" FORCE) + + mark_as_advanced(SCOTCH_LIBRARY_PTSCOTCH + PTSCOTCH_LIBRARIES) + + if("${Scotch_FIND_COMPONENTS}" MATCHES "esmumps") + find_library(SCOTCH_LIBRARY_PTESMUMPS ptesmumps + HINTS ${_scotch_hint} PATH_SUFFIXES lib .) + + if(NOT TARGET Scotch::ptesmumps) + add_library(Scotch::ptesmumps ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL) + endif() + set_target_properties(Scotch::ptesmumps PROPERTIES + IMPORTED_LOCATION "${SCOTCH_LIBRARY_ESMUMPS}" + INTERFACE_INCLUDE_DIRECTORIES "${SCOTCH_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C") + + mark_as_advanced(SCOTCH_LIBRARY_PTESMUMPS) + endif() +endif() diff --git a/cmake/blas.cmake b/cmake/blas.cmake new file mode 100644 index 0000000..9648874 --- /dev/null +++ b/cmake/blas.cmake @@ -0,0 +1,63 @@ +/** + * @file blas.cmake + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + */ +set(_default_blas $ENV{BLA_VENDOR}) +if(NOT _default_blas) + set(_default_blas All) +endif() +set(BSPLINES_USE_BLAS_VENDOR "${_default_blas}" CACHE STRING "Version of blas to use") +mark_as_advanced(BSPLINES_USE_BLAS_VENDOR) +set_property(CACHE BSPLINES_USE_BLAS_VENDOR PROPERTY STRINGS + All + ACML + ACML_GPU + ACML_MP + ATLAS + Apple + CXML + DXML + Generic + Goto + IBMESSL + Intel + Intel10_32 + Intel10_64lp + Intel10_64lp_seq + NAS + OpenBLAS + PhiPACK + SCSL + SGIMATH + SunPerf + ) + +if(BSPLINES_USE_PARDISO) + set(BSPLINES_USE_BLAS_VENDOR Intel10_64lp CACHE STRING "" INTERNAL) +endif() + +set(ENV{BLA_VENDOR} ${BSPLINES_USE_BLAS_VENDOR}) + +find_package(BLAS REQUIRED) +find_package(LAPACK REQUIRED) diff --git a/cmake/bsplines-config.cmake.in b/cmake/bsplines-config.cmake.in new file mode 100644 index 0000000..bd85d31 --- /dev/null +++ b/cmake/bsplines-config.cmake.in @@ -0,0 +1,54 @@ +/** + * @file bsplines-config.cmake.in + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Trach-Minh Tran + */ +# - Config file for the BSPLINES package +# It defines the target the following variables: +# FFLAGS - Fortran compile flags +# BSPLINES_MODS - include directories for bsplines modules +# BSPLINES_LIBS - bsplines library +# BSPLINES_EXTRA_INCS - additional include directories +# BSPLINES_EXTRA_LIBS - additional libraries +# HAS_PARDISO - BSPLINES built with PARDISO +# HAS_MUMPS - BSPLINES built with MUMPS +# MPIEXEC - MPI launcher +# MPIEXEC_NUMPROC_FLAG - Number of MPI processes flag + +# Compute paths +get_filename_component(_dir "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component(_prefix "${_dir}/../.." ABSOLUTE) + +# Import the targets +include("${_prefix}/lib/cmake/bsplines-targets.cmake") + +# Report other information +set(FFLAGS "@CMAKE_Fortran_FLAGS@") +set(BSPLINES_MODS "${_prefix}/include") +set(BSPLINES_LIBS fft bsplines pppack pputils2) +set(BSPLINES_EXTRA_INCS "@EXTRA_INCS@") +set(BSPLINES_EXTRA_LIBS "@EXTRA_LIBS@") +set(HAS_PARDISO "@HAS_PARDISO@") +set(HAS_MUMPS "@HAS_MUMPS@") +set(MPIEXEC "@MPIEXEC@") +set(MPIEXEC_NUMPROC_FLAG "@MPIEXEC_NUMPROC_FLAG@") diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile new file mode 100644 index 0000000..bc32a3b --- /dev/null +++ b/docs/doxygen/Doxyfile @@ -0,0 +1,75 @@ +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +PROJECT_NAME = "SPClibs" +PROJECT_BRIEF = "" +PROJECT_LOGO = img/epfl_logo.png +OUTPUT_DIRECTORY = ./ +OPTIMIZE_FOR_FORTRAN = YES +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = YES + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +EXTRACT_ALL = YES +EXTRACT_PRIVATE = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +INPUT = ../../src/ +INPUT += ../../pppack/ +INPUT += ../../pputils2/ +INPUT += ../../fft/ +FILE_PATTERNS = *.f90 *.F90 *.tpl *.c +RECURSIVE = YES + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +SOURCE_BROWSER = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_DYNAMIC_SECTIONS = YES +USE_MATHJAX = YES +MATHJAX_RELPATH = https://cdn.mathjax.org/mathjax/latest +GENERATE_TREEVIEW = YES +HTML_FOOTER = customfooter.html +HTML_EXTRA_STYLESHEET = doxygen-awesome-css/doxygen-awesome.css custom.css +HTML_COLORSTYLE_HUE = 209 +HTML_COLORSTYLE_SAT = 255 +HTML_COLORSTYLE_GAMMA = 113 + +#--------------------------------------------------------------------------- +# Configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +GENERATE_LATEX = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +CLASS_DIAGRAMS = YES +HAVE_DOT = YES +CALL_GRAPH = YES +CALLER_GRAPH = YES +DOT_GRAPH_MAX_NODES = 60 +DOT_IMAGE_FORMAT = svg + +#--------------------------------------------------------------------------- +# List of user-defined commands +#--------------------------------------------------------------------------- +ALIASES += "merge=\xrefitem merge \"Merge comments\" \"Merge comments\"" + +DISTRIBUTE_GROUP_DOC = YES diff --git a/docs/doxygen/custom.css b/docs/doxygen/custom.css new file mode 100644 index 0000000..815563f --- /dev/null +++ b/docs/doxygen/custom.css @@ -0,0 +1,3 @@ +:root { + --content-maxwidth: 1200px; +} diff --git a/docs/doxygen/customfooter.html b/docs/doxygen/customfooter.html new file mode 100644 index 0000000..cde195a --- /dev/null +++ b/docs/doxygen/customfooter.html @@ -0,0 +1,21 @@ + + + + + + + + + + diff --git a/docs/doxygen/doxygen-awesome-css/doxygen-awesome.css b/docs/doxygen/doxygen-awesome-css/doxygen-awesome.css new file mode 100644 index 0000000..5256f64 --- /dev/null +++ b/docs/doxygen/doxygen-awesome-css/doxygen-awesome.css @@ -0,0 +1,1364 @@ +/** + +Doxygen Awesome +https://github.com/jothepro/doxygen-awesome-css + +MIT License + +Copyright (c) 2021 jothepro + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ + +:root { + /* primary theme color. This will affect the entire websites color scheme: links, arrows, labels, ... */ + --primary-color: #1982d2; + --primary-dark-color: #00559f; + --primary-light-color: #7aabd6; + --primary-lighter-color: #cae1f1; + --primary-lightest-color: #e9f1f8; + + /* page base colors */ + --page-background-color: white; + --page-foreground-color: #2c3e50; + --page-secondary-foreground-color: #67727e; + + /* color for all separators on the website: hr, borders, ... */ + --separator-color: #dedede; + + /* border radius for all rounded components. Will affect many components, like dropdowns, memitems, codeblocks, ... */ + --border-radius-large: 8px; + --border-radius-small: 4px; + --border-radius-medium: 6px; + + /* default spacings. Most compontest reference these values for spacing, to provide uniform spacing on the page. */ + --spacing-small: 5px; + --spacing-medium: 10px; + --spacing-large: 16px; + + /* default box shadow used for raising an element above the normal content. Used in dropdowns, Searchresult, ... */ + --box-shadow: 0 2px 10px 0 rgba(0,0,0,.1); + + --odd-color: rgba(0,0,0,.03); + + /* font-families. will affect all text on the website + * font-family: the normal font for text, headlines, menus + * font-family-monospace: used for preformatted text in memtitle, code, fragments + */ + --font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif; + --font-family-monospace: source-code-pro,Menlo,Monaco,Consolas,Courier New,monospace; + + /* font sizes */ + --page-font-size: 15.6px; + --navigation-font-size: 14.4px; + --code-font-size: 14.4px; /* affects code, fragment */ + --title-font-size: 22px; + + /* content text properties. These only affect the page content, not the navigation or any other ui elements */ + --content-line-height: 27px; + /* The content is centered and constraint in it's width. To make the content fill the whole page, set the variable to auto.*/ + --content-maxwidth: 900px; + + /* colors for various content boxes: @warning, @note, @deprecated @bug */ + --warning-color: #fca49b; + --warning-color-dark: #b61825; + --warning-color-darker: #75070f; + --note-color: rgba(255,229,100,.3); + --note-color-dark: #c39900; + --note-color-darker: #8d7400; + --deprecated-color: rgb(214, 216, 224); + --deprecated-color-dark: #5b6269; + --deprecated-color-darker: #43454a; + --bug-color: rgb(246, 208, 178); + --bug-color-dark: #a53a00; + --bug-color-darker: #5b1d00; + --invariant-color: #b7f8d0; + --invariant-color-dark: #00ba44; + --invariant-color-darker: #008622; + + /* blockquote colors */ + --blockquote-background: #f5f5f5; + --blockquote-foreground: #727272; + + /* table colors */ + --tablehead-background: #f1f1f1; + --tablehead-foreground: var(--page-foreground-color); + + /* menu-display: block | none + * Visibility of the top navigation on screens >= 768px. On smaller screen the menu is always visible. + * `GENERATE_TREEVIEW` MUST be enabled! + */ + --menu-display: block; + + --menu-focus-foreground: var(--page-background-color); + --menu-focus-background: var(--primary-color); + --menu-selected-background: rgba(0,0,0,.05); + + + --header-background: var(--page-background-color); + --header-foreground: var(--page-foreground-color); + + /* searchbar colors */ + --searchbar-background: var(--side-nav-background); + --searchbar-foreground: var(--page-foreground-color); + + /* searchbar size + * (`searchbar-width` is only applied on screens >= 768px. + * on smaller screens the searchbar will always fill the entire screen width) */ + --searchbar-height: 33px; + --searchbar-width: 210px; + + /* code block colors */ + --code-background: #f5f5f5; + --code-foreground: var(--page-foreground-color); + + /* fragment colors */ + --fragment-background: #282c34; + --fragment-foreground: #ffffff; + --fragment-keyword: #cc99cd; + --fragment-keywordtype: #ab99cd; + --fragment-keywordflow: #e08000; + --fragment-token: #7ec699; + --fragment-comment: #999999; + --fragment-link: #98c0e3; + --fragment-preprocessor: #65cabe; + --fragment-linenumber-color: #cccccc; + --fragment-linenumber-background: #35393c; + --fragment-linenumber-border: #1f1f1f; + --fragment-lineheight: 20px; + + /* sidebar navigation (treeview) colors */ + --side-nav-background: #fbfbfb; + --side-nav-foreground: var(--page-foreground-color); + --side-nav-arrow-color: var(--page-background-color); + + /* height of an item in any tree / collapsable table */ + --tree-item-height: 30px; +} + +@media screen and (max-width: 767px) { + :root { + --page-font-size: 16px; + --navigation-font-size: 16px; + --code-font-size: 15px; /* affects code, fragment */ + --title-font-size: 22px; + } +} + +@media (prefers-color-scheme: dark) { + :root { + --primary-color: #00559f; + --primary-dark-color: #1982d2; + --primary-light-color: #4779ac; + --primary-lighter-color: #191e21; + --primary-lightest-color: #191a1c; + + --box-shadow: 0 2px 10px 0 rgba(0,0,0,.35); + + --odd-color: rgba(0,0,0,.1); + + --menu-selected-background: rgba(0,0,0,.4); + + --page-background-color: #1C1D1F; + --page-foreground-color: #d2dbde; + --page-secondary-foreground-color: #859399; + --separator-color: #000000; + --side-nav-background: #252628; + + --code-background: #2a2c2f; + + --tablehead-background: #2a2c2f; + + --blockquote-background: #1f2022; + --blockquote-foreground: #77848a; + + --warning-color: #b61825; + --warning-color-dark: #510a02; + --warning-color-darker: #f5b1aa; + --note-color: rgb(255, 183, 0); + --note-color-dark: #9f7300; + --note-color-darker: #fff6df; + --deprecated-color: rgb(88, 90, 96); + --deprecated-color-dark: #262e37; + --deprecated-color-darker: #a0a5b0; + --bug-color: rgb(248, 113, 0); + --bug-color-dark: #812a00; + --bug-color-darker: #ffd3be; + } +} + +body { + color: var(--page-foreground-color); + background-color: var(--page-background-color); + font-size: var(--page-font-size); +} + +body, table, div, p, dl, #nav-tree .label, .title, .sm-dox a, .sm-dox a:hover, .sm-dox a:focus, #projectname, .SelectItem, #MSearchField, .navpath li.navelem a, .navpath li.navelem a:hover { + font-family: var(--font-family); +} + +h1, h2, h3, h4, h5 { + margin-top: .9em; + font-weight: 600; + line-height: initial; +} + +p, div, table, dl { + font-size: var(--page-font-size); +} + +a, a.el:visited, a.el:hover, a.el:focus, a.el:active { + color: var(--primary-dark-color); +} + +/* + Title and top navigation + */ + +#top { + background: var(--header-background); + border-bottom: 1px solid var(--separator-color); +} + +@media screen and (min-width: 768px) { + #top { + display: flex; + flex-wrap: wrap; + justify-content: space-between; + align-items: center; + } +} + +#main-nav { + flex-grow: 5; + padding: var(--spacing-small) var(--spacing-medium); +} + +#titlearea { + width: auto; + padding: var(--spacing-medium) var(--spacing-large); + background: none; + color: var(--header-foreground); + border-bottom: none; +} + +@media screen and (max-width: 767px) { + #titlearea { + padding-bottom: var(--spacing-small); + } +} + +#titlearea table tbody tr { + height: auto !important; +} + +#projectname { + font-size: var(--title-font-size); + font-weight: 600; +} + +#projectnumber { + font-family: inherit; + font-size: 60%; +} + +#projectbrief { + font-family: inherit; + font-size: 80%; +} + +#projectlogo { + vertical-align: middle; +} + +#projectlogo img { + max-height: calc(var(--title-font-size) * 2); + margin-right: var(--spacing-small); +} + +.sm-dox, .tabs, .tabs2, .tabs3 { + background: none; + padding: 0; +} + +.tabs, .tabs2, .tabs3 { + border-bottom: 1px solid var(--separator-color); + margin-bottom: -1px; +} + +@media screen and (max-width: 767px) { + .sm-dox a span.sub-arrow { + background: var(--code-background); + } +} + +@media screen and (min-width: 768px) { + .sm-dox li, .tablist li { + display: var(--menu-display); + } + + .sm-dox a span.sub-arrow { + border-color: var(--header-foreground) transparent transparent transparent; + } + + .sm-dox a:hover span.sub-arrow { + border-color: var(--menu-focus-foreground) transparent transparent transparent; + } + + .sm-dox ul a span.sub-arrow { + border-color: transparent transparent transparent var(--header-foreground); + } + + .sm-dox ul a:hover span.sub-arrow { + border-color: transparent transparent transparent var(--menu-focus-foreground); + } +} + +.sm-dox ul { + background: var(--page-background-color); + box-shadow: var(--box-shadow); + border: 1px solid var(--separator-color); + border-radius: var(--border-radius-medium) !important; + padding: var(--spacing-small); + animation: ease-out 150ms slideInMenu; +} + +@keyframes slideInMenu { + from { + opacity: 0; + transform: translate(0px, -2px); + } + + to { + opacity: 1; + transform: translate(0px, 0px); + } +} + +.sm-dox ul a { + color: var(--page-foreground-color); + background: var(--page-background-color); + font-size: var(--navigation-font-size); +} + +.sm-dox>li>ul:after { + border-bottom-color: var(--page-background-color) !important; +} + +.sm-dox>li>ul:before { + border-bottom-color: var(--separator-color) !important; +} + +.sm-dox ul a:hover, .sm-dox ul a:active, .sm-dox ul a:focus { + font-size: var(--navigation-font-size); + color: var(--menu-focus-foreground); + text-shadow: none; + background-color: var(--menu-focus-background); + border-radius: var(--border-radius-small) !important; +} + +.sm-dox a, .sm-dox a:focus, .tablist li, .tablist li a, .tablist li.current a { + text-shadow: none; + background: transparent; + background-image: none !important; + color: var(--header-foreground); + font-weight: normal; + font-size: var(--navigation-font-size); +} + +.sm-dox a:focus { + outline: auto; +} + +.sm-dox a:hover, .sm-dox a:active, .tablist li a:hover { + text-shadow: none; + font-weight: normal; + background: var(--menu-focus-background); + color: var(--menu-focus-foreground); + border-radius: var(--border-radius-small) !important; + font-size: var(--navigation-font-size); +} + +.tablist li.current { + border-radius: var(--border-radius-small); + background: var(--menu-selected-background); +} + +.tablist li { + margin: var(--spacing-small) 0 var(--spacing-small) var(--spacing-small); +} + +.tablist a { + padding: 0 var(--spacing-large); +} + + +/* + Search box + */ + +#MSearchBox { + height: var(--searchbar-height); + background: var(--searchbar-background); + border-radius: var(--searchbar-height); + border: 1px solid var(--separator-color); + overflow: hidden; + width: var(--searchbar-width); + position: relative; + box-shadow: none; + display: block; + margin-top: 0; +} + +.left #MSearchSelect { + left: 0; +} + +.tabs .left #MSearchSelect { + padding-left: 0; +} + +.tabs #MSearchBox { + position: absolute; + right: var(--spacing-medium); +} + +@media screen and (max-width: 767px) { + .tabs #MSearchBox { + position: relative; + right: 0; + margin-left: var(--spacing-medium); + margin-top: 0; + } +} + +#MSearchSelectWindow, #MSearchResultsWindow { + z-index: 9999; +} + +#MSearchBox.MSearchBoxActive { + border-color: var(--primary-color); + box-shadow: inset 0 0 0 1px var(--primary-color); +} + +#main-menu > li:last-child { + margin-right: 0; +} + +@media screen and (max-width: 767px) { + #main-menu > li:last-child { + height: 50px; + } +} + +#MSearchField { + font-size: var(--navigation-font-size); + height: calc(var(--searchbar-height) - 2px); + background: transparent; + width: calc(var(--searchbar-width) - 64px); +} + +.MSearchBoxActive #MSearchField { + color: var(--searchbar-foreground); +} + +#MSearchSelect { + top: calc(calc(var(--searchbar-height) / 2) - 11px); +} + +.left #MSearchSelect { + padding-left: 8px; +} + +#MSearchBox span.left, #MSearchBox span.right { + background: none; +} + +#MSearchBox span.right { + padding-top: calc(calc(var(--searchbar-height) / 2) - 12px); +} + +.tabs #MSearchBox span.right { + top: calc(calc(var(--searchbar-height) / 2) - 12px); +} + +@keyframes slideInSearchResults { + from { + opacity: 0; + transform: translate(0, 15px); + } + + to { + opacity: 1; + transform: translate(0, 20px); + } +} + +#MSearchResultsWindow { + left: auto !important; + right: var(--spacing-medium); + border-radius: var(--border-radius-large); + border: 1px solid var(--separator-color); + transform: translate(0, 20px); + box-shadow: var(--box-shadow); + animation: ease-out 280ms slideInSearchResults; + background: var(--page-background-color); +} + +iframe#MSearchResults { + background: var(--page-background-color); + margin: 4px; +} + +#MSearchSelectWindow { + border: 1px solid var(--separator-color); + border-radius: var(--border-radius-medium); + box-shadow: var(--box-shadow); + background: var(--page-background-color); +} + +#MSearchSelectWindow a.SelectItem { + font-size: var(--navigation-font-size); + line-height: var(--content-line-height); + margin: 0 var(--spacing-small); + border-radius: var(--border-radius-small); + color: var(--page-foreground-color); +} + +#MSearchSelectWindow a.SelectItem:hover { + background: var(--menu-focus-background); + color: var(--menu-focus-foreground); +} + +@media screen and (max-width: 767px) { + #MSearchBox { + margin-top: var(--spacing-medium); + margin-bottom: var(--spacing-medium); + width: calc(100vw - 30px); + } + + #main-menu > li:last-child { + float: none !important; + } + + #MSearchField { + width: calc(100vw - 95px); + } + + @keyframes slideInSearchResultsMobile { + from { + opacity: 0; + transform: translate(0, 15px); + } + + to { + opacity: 1; + transform: translate(0, 20px); + } + } + + #MSearchResultsWindow { + left: var(--spacing-medium) !important; + right: var(--spacing-medium); + overflow: auto; + transform: translate(0, 20px); + animation: ease-out 280ms slideInSearchResultsMobile; + } +} + +/* + Tree view + */ + +#side-nav { + padding: 0 !important; + background: var(--side-nav-background); +} + +@media screen and (max-width: 767px) { + #side-nav { + display: none; + } + + #doc-content { + margin-left: 0 !important; + height: auto !important; + padding-bottom: calc(2 * var(--spacing-large)); + } +} + +#nav-tree { + background: transparent; +} + +#nav-tree .label { + font-size: var(--navigation-font-size); +} + +#nav-tree .item { + height: var(--tree-item-height); + line-height: var(--tree-item-height); +} + +#nav-sync { + top: 12px !important; + right: 12px; +} + +#nav-tree .selected { + text-shadow: none; + background-image: none; + background-color: transparent; + box-shadow: inset 4px 0 0 0 var(--primary-dark-color); +} + +#nav-tree a { + color: var(--side-nav-foreground); +} + +#nav-tree a:focus { + outline-style: auto; +} + +.arrow { + color: var(--primary-light-color); + font-family: serif; + height: auto; + text-align: right; +} + +#nav-tree .arrow { + opacity: 0; +} + +#nav-tree div.item:hover .arrow, #nav-tree a:focus .arrow { + opacity: 1; +} + +#nav-tree .selected a { + color: var(--primary-dark-color); + font-weight: bolder; +} + +.ui-resizable-e { + background: var(--separator-color); + width: 1px; +} + +/* + Contents + */ + +div.header { + border-bottom: 1px solid var(--separator-color); + background-color: var(--page-background-color); + background-image: none; +} + +div.contents, div.header .title, div.header .summary { + max-width: var(--content-maxwidth); +} + +div.contents, div.header .title { + line-height: initial; + margin: calc(var(--spacing-medium) + .2em) auto var(--spacing-medium) auto; +} + +div.header .summary { + margin: var(--spacing-medium) auto 0 auto; +} + +div.headertitle { + padding: 0; +} + +div.header .title { + font-weight: 600; + font-size: 210%; + padding: var(--spacing-medium) var(--spacing-large); + word-break: break-word; +} + +div.header .summary { + width: auto; + display: block; + float: none; + padding: 0 var(--spacing-large); +} + +td.memSeparator { + border-color: var(--separator-color); +} + +.mdescLeft, .mdescRight, .memItemLeft, .memItemRight, .memTemplItemLeft, .memTemplItemRight, .memTemplParams { + background: var(--code-background); +} + +.mdescRight { + color: var(--page-secondary-foreground-color); +} + +span.mlabel { + background: var(--primary-color); + border: none; + padding: 4px 9px; + border-radius: 12px; + margin-right: var(--spacing-medium); +} + +span.mlabel:last-of-type { + margin-right: 2px; +} + +div.contents { + padding: 0 var(--spacing-large); +} + +div.contents p, div.contents li { + line-height: var(--content-line-height); +} + +div.contents div.dyncontent { + margin: var(--spacing-medium) 0; +} + +@media (prefers-color-scheme: dark) { + div.contents div.dyncontent img { + filter: hue-rotate(180deg) invert(); + } +} + +h2.groupheader { + border-bottom: 1px solid var(--separator-color); + color: var(--page-foreground-color); +} + +blockquote { + padding: var(--spacing-small) var(--spacing-medium); + background: var(--blockquote-background); + color: var(--blockquote-foreground); + border-left: 2px solid var(--blockquote-foreground); + margin: 0; +} + +blockquote p { + margin: var(--spacing-small) 0 var(--spacing-medium) 0; +} +.paramname { + color: var(--primary-dark-color); +} + +.glow { + text-shadow: 0 0 15px var(--primary-light-color) !important; +} + +.alphachar a { + color: var(--page-foreground-color); +} + +/* + Table of Contents + */ + +div.toc { + background-color: var(--side-nav-background); + border: 1px solid var(--separator-color); + border-radius: var(--border-radius-medium); + box-shadow: var(--box-shadow); + padding: 0 var(--spacing-large); + margin: 0 0 var(--spacing-medium) var(--spacing-medium); +} + +div.toc h3 { + color: var(--side-nav-foreground); + font-size: var(--navigation-font-size); + margin: var(--spacing-large) 0; +} + +div.toc li { + font-size: var(--navigation-font-size); + padding: 0; + background: none; +} + +div.toc li:before { + content: '↓'; + font-weight: 800; + font-family: var(--font-family); + margin-right: var(--spacing-small); + color: var(--side-nav-foreground); + opacity: .4; +} + +div.toc ul li.level1 { + margin: 0; +} + +div.toc ul li.level2, div.toc ul li.level3 { + margin-top: 0; +} + + +@media screen and (max-width: 767px) { + div.toc { + float: none; + width: auto; + margin: 0 0 var(--spacing-medium) 0; + } +} + +/* + Code & Fragments + */ + +code, div.fragment, pre.fragment { + border-radius: var(--border-radius-small); + border: none; + overflow: hidden; +} + +code { + display: inline; + background: var(--code-background); + color: var(--code-foreground); + padding: 2px 6px; + word-break: break-word; +} + +div.fragment, pre.fragment { + margin: var(--spacing-medium) 0; + padding: 14px 16px; + background: var(--fragment-background); + color: var(--fragment-foreground); + overflow-x: auto; +} + +@media screen and (max-width: 767px) { + div.fragment, pre.fragment { + border-top-right-radius: 0; + border-bottom-right-radius: 0; + } + + .contents > div.fragment, .textblock > div.fragment, .textblock > pre.fragment { + margin: var(--spacing-medium) calc(0px - var(--spacing-large)); + border-radius: 0; + } + + .textblock li > .fragment { + margin: var(--spacing-medium) calc(0px - var(--spacing-large)); + } + + .memdoc li > .fragment { + margin: var(--spacing-medium) calc(0px - var(--spacing-medium)); + } + + .memdoc > div.fragment, .memdoc > pre.fragment, dl dd > div.fragment, dl dd pre.fragment { + margin: var(--spacing-medium) calc(0px - var(--spacing-medium)); + border-radius: 0; + } +} + +code, code a, pre.fragment, div.fragment, div.fragment .line, div.fragment span, div.fragment .line a, div.fragment .line span { + font-family: var(--font-family-monospace); + font-size: var(--code-font-size) !important; +} + +div.line:after { + margin-right: var(--spacing-medium); +} + +div.fragment .line, pre.fragment { + white-space: pre; + word-wrap: initial; + line-height: var(--fragment-lineheight); +} + +div.fragment span.keyword { + color: var(--fragment-keyword); +} + +div.fragment span.keywordtype { + color: var(--fragment-keywordtype); +} + +div.fragment span.keywordflow { + color: var(--fragment-keywordflow); +} + +div.fragment span.stringliteral { + color: var(--fragment-token) +} + +div.fragment span.comment { + color: var(--fragment-comment); +} + +div.fragment a.code { + color: var(--fragment-link); +} + +div.fragment span.preprocessor { + color: var(--fragment-preprocessor); +} + +div.fragment span.lineno { + display: inline-block; + width: 27px; + border-right: none; + background: var(--fragment-linenumber-background); + color: var(--fragment-linenumber-color); +} + +div.fragment span.lineno a { + background: none; + color: var(--fragment-link); +} + +div.fragment .line:first-child .lineno { + box-shadow: -999999px 0px 0 999999px var(--fragment-linenumber-background), -999998px 0px 0 999999px var(--fragment-linenumber-border); +} + +/* + dl warning, attention, note, deprecated, bug, ... + */ + +dl.warning, dl.attention, dl.note, dl.deprecated, dl.bug, dl.invariant, dl.pre { + padding: var(--spacing-medium); + margin: var(--spacing-medium) 0; + color: var(--page-background-color); + overflow: hidden; + margin-left: 0; + border-radius: var(--border-radius-small); +} + +dl.section dd { + margin-bottom: 2px; +} + +dl.warning, dl.attention { + background: var(--warning-color); + border-left: 8px solid var(--warning-color-dark); + color: var(--warning-color-darker); +} + +dl.warning dt, dl.attention dt { + color: var(--warning-color-dark); +} + +dl.note { + background: var(--note-color); + border-left: 8px solid var(--note-color-dark); + color: var(--note-color-darker); +} + +dl.note dt { + color: var(--note-color-dark); +} + +dl.bug { + background: var(--bug-color); + border-left: 8px solid var(--bug-color-dark); + color: var(--bug-color-darker); +} + +dl.bug dt a { + color: var(--bug-color-dark) !important; +} + +dl.deprecated { + background: var(--deprecated-color); + border-left: 8px solid var(--deprecated-color-dark); + color: var(--deprecated-color-darker); +} + +dl.deprecated dt a { + color: var(--deprecated-color-dark) !important; +} + +dl.section dd, dl.bug dd, dl.deprecated dd { + margin-inline-start: 0px; +} + +dl.invariant, dl.pre { + background: var(--invariant-color); + border-left: 8px solid var(--invariant-color-dark); + color: var(--invariant-color-darker); +} + +/* + memitem + */ + +div.memdoc, div.memproto, h2.memtitle { + box-shadow: none; + background-image: none; + border: none; +} + +div.memdoc { + padding: 0 var(--spacing-medium); + background: var(--page-background-color); +} + +h2.memtitle, div.memitem { + border: 1px solid var(--separator-color); +} + +div.memproto, h2.memtitle { + background: var(--code-background); + text-shadow: none; +} + +h2.memtitle { + font-weight: 500; + font-family: monospace, fixed; + border-bottom: none; + border-top-left-radius: var(--border-radius-medium); + border-top-right-radius: var(--border-radius-medium); + word-break: break-all; +} + +a:target + h2.memtitle, a:target + h2.memtitle + div.memitem { + border-color: var(--primary-light-color); +} + +a:target + h2.memtitle { + box-shadow: -3px -3px 3px 0 var(--primary-lightest-color), 3px -3px 3px 0 var(--primary-lightest-color); +} + +a:target + h2.memtitle + div.memitem { + box-shadow: 0 0 10px 0 var(--primary-lighter-color); +} + +div.memitem { + border-top-right-radius: var(--border-radius-medium); + border-bottom-right-radius: var(--border-radius-medium); + border-bottom-left-radius: var(--border-radius-medium); + overflow: hidden; + display: block !important; +} + +div.memdoc { + border-radius: 0; +} + +div.memproto { + border-radius: 0 var(--border-radius-small) 0 0; + overflow: auto; + border-bottom: 1px solid var(--separator-color); + padding: var(--spacing-medium); + margin-bottom: -1px; +} + +div.memtitle { + border-top-right-radius: var(--border-radius-medium); + border-top-left-radius: var(--border-radius-medium); +} + +div.memproto table.memname { + font-family: monospace, fixed; + color: var(--page-foreground-color); +} + +table.mlabels, table.mlabels > tbody { + display: block; +} + +td.mlabels-left { + width: auto; +} + +table.mlabels > tbody > tr:first-child { + display: flex; + justify-content: space-between; + flex-wrap: wrap; +} + +.memname, .memitem span.mlabels { + margin: 0 +} + +/* + reflist + */ + +dl.reflist { + border-radius: var(--border-radius-medium); + border: 1px solid var(--separator-color); + overflow: hidden; + padding: 0; +} + + +dl.reflist dt, dl.reflist dd { + box-shadow: none; + text-shadow: none; + background-image: none; + border: none; + padding: 12px; +} + + +dl.reflist dt { + border-radius: 0; + background: var(--code-background); + border-bottom: 1px solid var(--separator-color); + color: var(--page-foreground-color) +} + + +dl.reflist dd { + background: none; +} + +/* + Table + */ + +table.markdownTable, table.fieldtable { + width: 100%; + border: 1px solid var(--separator-color); + margin: var(--spacing-medium) 0; +} + +table.fieldtable { + box-shadow: none; + border-radius: var(--border-radius-small); +} + +th.markdownTableHeadLeft, th.markdownTableHeadRight, th.markdownTableHeadCenter, th.markdownTableHeadNone { + background: var(--tablehead-background); + color: var(--tablehead-foreground); + font-weight: 600; +} + +table.markdownTable td, table.markdownTable th, table.fieldtable dt { + border: 1px solid var(--separator-color); + padding: var(--spacing-small) var(--spacing-medium); +} + +table.fieldtable th { + font-size: var(--page-font-size); + font-weight: 600; + background-image: none; + background-color: var(--tablehead-background); + color: var(--tablehead-foreground); + border-bottom: 1px solid var(--separator-color); +} + +.fieldtable td.fieldtype, .fieldtable td.fieldname { + border-bottom: 1px solid var(--separator-color); + border-right: 1px solid var(--separator-color); +} + +.fieldtable td.fielddoc { + border-bottom: 1px solid var(--separator-color); +} + +.memberdecls td.glow, .fieldtable tr.glow { + background-color: var(--primary-light-color); + box-shadow: 0 0 15px var(--primary-lighter-color); +} + +table.memberdecls { + display: block; + overflow-x: auto; + overflow-y: hidden; +} + + +/* + Horizontal Rule + */ + +hr { + margin-top: var(--spacing-large); + margin-bottom: var(--spacing-large); + border-top:1px solid var(--separator-color); +} + +.contents hr { + box-shadow: var(--content-maxwidth) 0 0 0 var(--separator-color), calc(0px - var(--content-maxwidth)) 0 0 0 var(--separator-color); +} + +.contents img { + max-width: 100%; +} + +/* + Directories + */ +div.directory { + border-top: 1px solid var(--separator-color); + border-bottom: 1px solid var(--separator-color); + width: auto; +} + +table.directory { + font-family: var(--font-family); + font-size: var(--page-font-size); + font-weight: normal; +} + +.directory td.entry { + padding: var(--spacing-small); + display: flex; + align-items: center; +} + +.directory tr.even { + background-color: var(--odd-color); +} + +.icona { + width: auto; + height: auto; + margin: 0 var(--spacing-small); +} + +.icon { + background: var(--primary-dark-color); + width: 18px; + height: 18px; + line-height: 18px; +} + +.iconfopen, .icondoc, .iconfclosed { + background-position: center; + margin-bottom: 0; +} + +.icondoc { + filter: saturate(0.2); +} + +@media screen and (max-width: 767px) { + div.directory { + margin-left: calc(0px - var(--spacing-medium)); + margin-right: calc(0px - var(--spacing-medium)); + } +} + +@media (prefers-color-scheme: dark) { + .iconfopen, .iconfclosed { + filter: hue-rotate(180deg) invert(); + } +} + +/* + Class list + */ + +.classindex dl.odd { + background: var(--odd-color); + border-radius: var(--border-radius-small); +} + +@media screen and (max-width: 767px) { + .classindex { + margin: 0 calc(0px - var(--spacing-small)); + } +} + +/* + Footer and nav-path + */ + +#nav-path { + margin-bottom: -1px; + width: 100%; +} + +#nav-path ul { + background-image: none; + background: var(--page-background-color); + border: none; + border-top: 1px solid var(--separator-color); + border-bottom: 1px solid var(--separator-color); + font-size: var(--navigation-font-size); +} + +img.footer { + width: 60px; +} + +.navpath li.footer { + color: var(--page-secondary-foreground-color); +} + +address.footer { + margin-bottom: var(--spacing-large); +} + +#nav-path li.navelem { + background-image: none; + display: flex; + align-items: center; +} + +.navpath li.navelem a { + text-shadow: none; + display: inline-block; + color: var(--primary-dark-color) +} + +li.navelem { + padding: 0; + margin-left: -8px; +} + +li.navelem:first-child { + margin-left: var(--spacing-large); +} + +li.navelem:first-child:before { + display: none; +} + +#nav-path li.navelem:after { + content: ''; + border: 5px solid var(--page-background-color); + border-bottom-color: transparent; + border-right-color: transparent; + border-top-color: transparent; + transform: scaleY(4.2); + z-index: 10; + margin-left: 6px; +} + +#nav-path li.navelem:before { + content: ''; + border: 5px solid var(--separator-color); + border-bottom-color: transparent; + border-right-color: transparent; + border-top-color: transparent; + transform: scaleY(3.2); + margin-right: var(--spacing-small); +} + +@media (prefers-color-scheme: dark) { + #nav-path li.navelem:after { + text-shadow: 3px 0 0 var(--separator-color), 8px 0 6px rgba(0,0,0,0.4); + } +} + +.navpath li.navelem a:hover { + color: var(--primary-color); +} diff --git a/docs/doxygen/img/epfl_logo.png b/docs/doxygen/img/epfl_logo.png new file mode 100644 index 0000000..f888916 Binary files /dev/null and b/docs/doxygen/img/epfl_logo.png differ diff --git a/docs/manual/Makefile b/docs/manual/Makefile new file mode 100644 index 0000000..206a081 --- /dev/null +++ b/docs/manual/Makefile @@ -0,0 +1,59 @@ +# +# @file bsplines.tex +# +# @brief +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Trach-Minh Tran +# +dvi: bsplines.dvi +pdf: bsplines.pdf +ps: bsplines.ps + +.SUFFIXES: +.SUFFIXES: .sgml .html .tex .dvi .pdf .ps .txt + +.tex.dvi: + latex $< + @while ( grep "Rerun to get cross-references" \ + ${<:tex=log} > /dev/null ); do \ + latex $<; \ + done + latex $< + +.dvi.pdf: + dvipdf $< + +.dvi.ps: + dvips $< + +bsplines.dvi: bsplines.tex driv1.eps fit.eps + +solvers.dvi: solvers.tex + +dirichlet_2d.dvi: dirichlet_2d.tex + +using_bsplines.dvi: using_bsplines.tex + +clean: + rm -f *~ *.dvi *.log *.aux *.out *~ *.toc *.flc *.bbl *.blg + +distclean: clean + rm -f bsplines.ps diff --git a/docs/manual/basfun_perf_helios.eps b/docs/manual/basfun_perf_helios.eps new file mode 100644 index 0000000..85152cb --- /dev/null +++ b/docs/manual/basfun_perf_helios.eps @@ -0,0 +1,1281 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 2.6.27.56-0.1-default #1 SMP 2010-12-01 16:57:58 +0100 x86_64. +%%Title: /home/ttran/bsplines_hlst-dev/opt_wk/basfun_perf_helios.eps +%%CreationDate: 03/27/2012 13:25:30 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: 23 77 571 764 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 23 77 571 764 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 0276 9168 csm + + 0 0 6586 8240 rc +85 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 6587 8241 rf +6 w +0 2811 5104 0 0 -2811 856 3429 4 MP +PP +-5104 0 0 2811 5104 0 0 -2811 856 3429 5 MP stroke +4 w +DO +0 sg + 856 3429 mt 856 618 L + 856 618 mt 856 618 L +1876 3429 mt 1876 618 L +1876 618 mt 1876 618 L +2897 3429 mt 2897 618 L +2897 618 mt 2897 618 L +3918 3429 mt 3918 618 L +3918 618 mt 3918 618 L +4939 3429 mt 4939 618 L +4939 618 mt 4939 618 L +5960 3429 mt 5960 618 L +5960 618 mt 5960 618 L + 856 3429 mt 5960 3429 L +5960 3429 mt 5960 3429 L + 856 2866 mt 5960 2866 L +5960 2866 mt 5960 2866 L + 856 2304 mt 5960 2304 L +5960 2304 mt 5960 2304 L + 856 1742 mt 5960 1742 L +5960 1742 mt 5960 1742 L + 856 1180 mt 5960 1180 L +5960 1180 mt 5960 1180 L + 856 618 mt 5960 618 L +5960 618 mt 5960 618 L +SO +6 w + 856 3429 mt 5960 3429 L + 856 618 mt 5960 618 L + 856 3429 mt 856 618 L +5960 3429 mt 5960 618 L + 856 3429 mt 5960 3429 L + 856 3429 mt 856 618 L + 856 3429 mt 856 3403 L + 856 618 mt 856 643 L +DO + 856 3429 mt 856 618 L + 856 618 mt 856 618 L +SO + 856 3429 mt 856 3377 L + 856 618 mt 856 669 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 768 3611 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + + 901 3537 mt +(0) s +1163 3429 mt 1163 3403 L +1163 618 mt 1163 643 L +DO +1163 3429 mt 1163 618 L +1163 618 mt 1163 618 L +SO +1343 3429 mt 1343 3403 L +1343 618 mt 1343 643 L +DO +1343 3429 mt 1343 618 L +1343 618 mt 1343 618 L +SO +1470 3429 mt 1470 3403 L +1470 618 mt 1470 643 L +DO +1470 3429 mt 1470 618 L +1470 618 mt 1470 618 L +SO +1569 3429 mt 1569 3403 L +1569 618 mt 1569 643 L +DO +1569 3429 mt 1569 618 L +1569 618 mt 1569 618 L +SO +1650 3429 mt 1650 3403 L +1650 618 mt 1650 643 L +DO +1650 3429 mt 1650 618 L +1650 618 mt 1650 618 L +SO +1718 3429 mt 1718 3403 L +1718 618 mt 1718 643 L +DO +1718 3429 mt 1718 618 L +1718 618 mt 1718 618 L +SO +1777 3429 mt 1777 3403 L +1777 618 mt 1777 643 L +DO +1777 3429 mt 1777 618 L +1777 618 mt 1777 618 L +SO +1830 3429 mt 1830 3403 L +1830 618 mt 1830 643 L +DO +1830 3429 mt 1830 618 L +1830 618 mt 1830 618 L +SO +1876 3429 mt 1876 3403 L +1876 618 mt 1876 643 L +DO +1876 3429 mt 1876 618 L +1876 618 mt 1876 618 L +SO +1876 3429 mt 1876 3377 L +1876 618 mt 1876 669 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1788 3611 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +1921 3537 mt +(1) s +2184 3429 mt 2184 3403 L +2184 618 mt 2184 643 L +DO +2184 3429 mt 2184 618 L +2184 618 mt 2184 618 L +SO +2363 3429 mt 2363 3403 L +2363 618 mt 2363 643 L +DO +2363 3429 mt 2363 618 L +2363 618 mt 2363 618 L +SO +2491 3429 mt 2491 3403 L +2491 618 mt 2491 643 L +DO +2491 3429 mt 2491 618 L +2491 618 mt 2491 618 L +SO +2590 3429 mt 2590 3403 L +2590 618 mt 2590 643 L +DO +2590 3429 mt 2590 618 L +2590 618 mt 2590 618 L +SO +2671 3429 mt 2671 3403 L +2671 618 mt 2671 643 L +DO +2671 3429 mt 2671 618 L +2671 618 mt 2671 618 L +SO +2739 3429 mt 2739 3403 L +2739 618 mt 2739 643 L +DO +2739 3429 mt 2739 618 L +2739 618 mt 2739 618 L +SO +2798 3429 mt 2798 3403 L +2798 618 mt 2798 643 L +DO +2798 3429 mt 2798 618 L +2798 618 mt 2798 618 L +SO +2850 3429 mt 2850 3403 L +2850 618 mt 2850 643 L +DO +2850 3429 mt 2850 618 L +2850 618 mt 2850 618 L +SO +2897 3429 mt 2897 3403 L +2897 618 mt 2897 643 L +DO +2897 3429 mt 2897 618 L +2897 618 mt 2897 618 L +SO +2897 3429 mt 2897 3377 L +2897 618 mt 2897 669 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +2809 3611 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +2942 3537 mt +(2) s +3204 3429 mt 3204 3403 L +3204 618 mt 3204 643 L +DO +3204 3429 mt 3204 618 L +3204 618 mt 3204 618 L +SO +3384 3429 mt 3384 3403 L +3384 618 mt 3384 643 L +DO +3384 3429 mt 3384 618 L +3384 618 mt 3384 618 L +SO +3512 3429 mt 3512 3403 L +3512 618 mt 3512 643 L +DO +3512 3429 mt 3512 618 L +3512 618 mt 3512 618 L +SO +3611 3429 mt 3611 3403 L +3611 618 mt 3611 643 L +DO +3611 3429 mt 3611 618 L +3611 618 mt 3611 618 L +SO +3691 3429 mt 3691 3403 L +3691 618 mt 3691 643 L +DO +3691 3429 mt 3691 618 L +3691 618 mt 3691 618 L +SO +3760 3429 mt 3760 3403 L +3760 618 mt 3760 643 L +DO +3760 3429 mt 3760 618 L +3760 618 mt 3760 618 L +SO +3819 3429 mt 3819 3403 L +3819 618 mt 3819 643 L +DO +3819 3429 mt 3819 618 L +3819 618 mt 3819 618 L +SO +3871 3429 mt 3871 3403 L +3871 618 mt 3871 643 L +DO +3871 3429 mt 3871 618 L +3871 618 mt 3871 618 L +SO +3918 3429 mt 3918 3403 L +3918 618 mt 3918 643 L +DO +3918 3429 mt 3918 618 L +3918 618 mt 3918 618 L +SO +3918 3429 mt 3918 3377 L +3918 618 mt 3918 669 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +3830 3611 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +3963 3537 mt +(3) s +4225 3429 mt 4225 3403 L +4225 618 mt 4225 643 L +DO +4225 3429 mt 4225 618 L +4225 618 mt 4225 618 L +SO +4405 3429 mt 4405 3403 L +4405 618 mt 4405 643 L +DO +4405 3429 mt 4405 618 L +4405 618 mt 4405 618 L +SO +4532 3429 mt 4532 3403 L +4532 618 mt 4532 643 L +DO +4532 3429 mt 4532 618 L +4532 618 mt 4532 618 L +SO +4631 3429 mt 4631 3403 L +4631 618 mt 4631 643 L +DO +4631 3429 mt 4631 618 L +4631 618 mt 4631 618 L +SO +4712 3429 mt 4712 3403 L +4712 618 mt 4712 643 L +DO +4712 3429 mt 4712 618 L +4712 618 mt 4712 618 L +SO +4781 3429 mt 4781 3403 L +4781 618 mt 4781 643 L +DO +4781 3429 mt 4781 618 L +4781 618 mt 4781 618 L +SO +4840 3429 mt 4840 3403 L +4840 618 mt 4840 643 L +DO +4840 3429 mt 4840 618 L +4840 618 mt 4840 618 L +SO +4892 3429 mt 4892 3403 L +4892 618 mt 4892 643 L +DO +4892 3429 mt 4892 618 L +4892 618 mt 4892 618 L +SO +4939 3429 mt 4939 3403 L +4939 618 mt 4939 643 L +DO +4939 3429 mt 4939 618 L +4939 618 mt 4939 618 L +SO +4939 3429 mt 4939 3377 L +4939 618 mt 4939 669 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +4851 3611 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +4984 3537 mt +(4) s +5246 3429 mt 5246 3403 L +5246 618 mt 5246 643 L +DO +5246 3429 mt 5246 618 L +5246 618 mt 5246 618 L +SO +5426 3429 mt 5426 3403 L +5426 618 mt 5426 643 L +DO +5426 3429 mt 5426 618 L +5426 618 mt 5426 618 L +SO +5553 3429 mt 5553 3403 L +5553 618 mt 5553 643 L +DO +5553 3429 mt 5553 618 L +5553 618 mt 5553 618 L +SO +5652 3429 mt 5652 3403 L +5652 618 mt 5652 643 L +DO +5652 3429 mt 5652 618 L +5652 618 mt 5652 618 L +SO +5733 3429 mt 5733 3403 L +5733 618 mt 5733 643 L +DO +5733 3429 mt 5733 618 L +5733 618 mt 5733 618 L +SO +5801 3429 mt 5801 3403 L +5801 618 mt 5801 643 L +DO +5801 3429 mt 5801 618 L +5801 618 mt 5801 618 L +SO +5861 3429 mt 5861 3403 L +5861 618 mt 5861 643 L +DO +5861 3429 mt 5861 618 L +5861 618 mt 5861 618 L +SO +5913 3429 mt 5913 3403 L +5913 618 mt 5913 643 L +DO +5913 3429 mt 5913 618 L +5913 618 mt 5913 618 L +SO +5960 3429 mt 5960 3403 L +5960 618 mt 5960 643 L +DO +5960 3429 mt 5960 618 L +5960 618 mt 5960 618 L +SO +5960 3429 mt 5960 3377 L +5960 618 mt 5960 669 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +5872 3611 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +6005 3537 mt +(5) s + 856 3429 mt 907 3429 L +5960 3429 mt 5908 3429 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 755 3473 mt +(0) s + 856 2866 mt 907 2866 L +5960 2866 mt 5908 2866 L + 755 2910 mt +(1) s + 856 2304 mt 907 2304 L +5960 2304 mt 5908 2304 L + 755 2348 mt +(2) s + 856 1742 mt 907 1742 L +5960 1742 mt 5908 1742 L + 755 1786 mt +(3) s + 856 1180 mt 907 1180 L +5960 1180 mt 5908 1180 L + 755 1224 mt +(4) s + 856 618 mt 907 618 L +5960 618 mt 5908 618 L + 755 662 mt +(5) s + 856 3429 mt 5960 3429 L + 856 618 mt 5960 618 L + 856 3429 mt 856 618 L +5960 3429 mt 5960 618 L +gs 856 618 5105 2812 rc +/c8 { 0.000000 0.000000 1.000000 sr} bdef +c8 +307 32 308 108 307 266 307 73 308 91 307 176 307 -58 307 -473 +308 175 307 -360 307 -393 308 -335 307 -274 307 -228 307 -135 856 3247 16 MP stroke +gr + +c8 + 36 36 856 3247 FO + 36 36 1163 3112 FO + 36 36 1470 2884 FO + 36 36 1777 2610 FO + 36 36 2085 2275 FO + 36 36 2392 1882 FO + 36 36 2699 1522 FO + 36 36 3007 1697 FO + 36 36 3314 1224 FO + 36 36 3621 1166 FO + 36 36 3928 1342 FO + 36 36 4236 1433 FO + 36 36 4543 1506 FO + 36 36 4850 1772 FO + 36 36 5158 1880 FO + 36 36 5465 1912 FO +gs 856 618 5105 2812 rc +/c9 { 0.000000 0.500000 0.000000 sr} bdef +c9 +307 8 308 26 307 65 307 38 308 86 307 179 307 170 307 -57 +308 -42 307 -156 307 -142 308 -238 307 -248 307 -150 307 -90 856 3267 16 MP stroke +gr + +c9 +0 j +0 -58 -58 0 0 58 58 0 827 3238 5 MP +DP +0 -58 -58 0 0 58 58 0 1134 3148 5 MP +DP +0 -58 -58 0 0 58 58 0 1441 2998 5 MP +DP +0 -58 -58 0 0 58 58 0 1748 2750 5 MP +DP +0 -58 -58 0 0 58 58 0 2056 2512 5 MP +DP +0 -58 -58 0 0 58 58 0 2363 2370 5 MP +DP +0 -58 -58 0 0 58 58 0 2670 2214 5 MP +DP +0 -58 -58 0 0 58 58 0 2978 2172 5 MP +DP +0 -58 -58 0 0 58 58 0 3285 2115 5 MP +DP +0 -58 -58 0 0 58 58 0 3592 2285 5 MP +DP +0 -58 -58 0 0 58 58 0 3899 2464 5 MP +DP +0 -58 -58 0 0 58 58 0 4207 2550 5 MP +DP +0 -58 -58 0 0 58 58 0 4514 2588 5 MP +DP +0 -58 -58 0 0 58 58 0 4821 2653 5 MP +DP +0 -58 -58 0 0 58 58 0 5129 2679 5 MP +DP +0 -58 -58 0 0 58 58 0 5436 2687 5 MP +DP +gs 856 618 5105 2812 rc +gr + +0 sg +2714 3754 mt +(Number of grouped points) s + 673 2907 mt -90 rotate +(BASFUN Vectorization Speed Up) s +90 rotate +2682 523 mt +(HELIOS, Quadratic Splines) s + 839 3472 mt +( ) s +5944 660 mt +( ) s +1 sg +0 334 1616 0 0 -334 4284 1012 4 MP +PP +-1616 0 0 334 1616 0 0 -334 4284 1012 5 MP stroke +4 w +DO +SO +6 w +0 sg +4284 1012 mt 5900 1012 L +4284 678 mt 5900 678 L +4284 1012 mt 4284 678 L +5900 1012 mt 5900 678 L +4284 1012 mt 5900 1012 L +4284 1012 mt 4284 678 L +4284 1012 mt 5900 1012 L +4284 678 mt 5900 678 L +4284 1012 mt 4284 678 L +5900 1012 mt 5900 678 L +4749 812 mt +(Periodic Splines) s +gs 4284 678 1617 335 rc +c8 +358 0 4355 769 2 MP stroke +gs 4461 696 147 147 rc + 36 36 4534 769 FO +gr + +gr + +c8 +0 sg +4749 963 mt +(Non-perodic Splines) s +gs 4284 678 1617 335 rc +c9 +358 0 4355 920 2 MP stroke +gs 4461 847 147 147 rc +0 -58 -58 0 0 58 58 0 4505 891 5 MP +DP +gr + +gr + +c9 +1 sg +0 2812 5104 0 0 -2812 856 7334 4 MP +PP +-5104 0 0 2812 5104 0 0 -2812 856 7334 5 MP stroke +4 w +DO +0 sg + 856 7334 mt 856 4522 L + 856 4522 mt 856 4522 L +1876 7334 mt 1876 4522 L +1876 4522 mt 1876 4522 L +2897 7334 mt 2897 4522 L +2897 4522 mt 2897 4522 L +3918 7334 mt 3918 4522 L +3918 4522 mt 3918 4522 L +4939 7334 mt 4939 4522 L +4939 4522 mt 4939 4522 L +5960 7334 mt 5960 4522 L +5960 4522 mt 5960 4522 L + 856 7334 mt 5960 7334 L +5960 7334 mt 5960 7334 L + 856 6771 mt 5960 6771 L +5960 6771 mt 5960 6771 L + 856 6209 mt 5960 6209 L +5960 6209 mt 5960 6209 L + 856 5646 mt 5960 5646 L +5960 5646 mt 5960 5646 L + 856 5084 mt 5960 5084 L +5960 5084 mt 5960 5084 L + 856 4522 mt 5960 4522 L +5960 4522 mt 5960 4522 L +SO +6 w + 856 7334 mt 5960 7334 L + 856 4522 mt 5960 4522 L + 856 7334 mt 856 4522 L +5960 7334 mt 5960 4522 L + 856 7334 mt 5960 7334 L + 856 7334 mt 856 4522 L + 856 7334 mt 856 7308 L + 856 4522 mt 856 4547 L +DO + 856 7334 mt 856 4522 L + 856 4522 mt 856 4522 L +SO + 856 7334 mt 856 7282 L + 856 4522 mt 856 4573 L + 768 7516 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + + 901 7442 mt +(0) s +1163 7334 mt 1163 7308 L +1163 4522 mt 1163 4547 L +DO +1163 7334 mt 1163 4522 L +1163 4522 mt 1163 4522 L +SO +1343 7334 mt 1343 7308 L +1343 4522 mt 1343 4547 L +DO +1343 7334 mt 1343 4522 L +1343 4522 mt 1343 4522 L +SO +1470 7334 mt 1470 7308 L +1470 4522 mt 1470 4547 L +DO +1470 7334 mt 1470 4522 L +1470 4522 mt 1470 4522 L +SO +1569 7334 mt 1569 7308 L +1569 4522 mt 1569 4547 L +DO +1569 7334 mt 1569 4522 L +1569 4522 mt 1569 4522 L +SO +1650 7334 mt 1650 7308 L +1650 4522 mt 1650 4547 L +DO +1650 7334 mt 1650 4522 L +1650 4522 mt 1650 4522 L +SO +1718 7334 mt 1718 7308 L +1718 4522 mt 1718 4547 L +DO +1718 7334 mt 1718 4522 L +1718 4522 mt 1718 4522 L +SO +1777 7334 mt 1777 7308 L +1777 4522 mt 1777 4547 L +DO +1777 7334 mt 1777 4522 L +1777 4522 mt 1777 4522 L +SO +1830 7334 mt 1830 7308 L +1830 4522 mt 1830 4547 L +DO +1830 7334 mt 1830 4522 L +1830 4522 mt 1830 4522 L +SO +1876 7334 mt 1876 7308 L +1876 4522 mt 1876 4547 L +DO +1876 7334 mt 1876 4522 L +1876 4522 mt 1876 4522 L +SO +1876 7334 mt 1876 7282 L +1876 4522 mt 1876 4573 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1788 7516 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +1921 7442 mt +(1) s +2184 7334 mt 2184 7308 L +2184 4522 mt 2184 4547 L +DO +2184 7334 mt 2184 4522 L +2184 4522 mt 2184 4522 L +SO +2363 7334 mt 2363 7308 L +2363 4522 mt 2363 4547 L +DO +2363 7334 mt 2363 4522 L +2363 4522 mt 2363 4522 L +SO +2491 7334 mt 2491 7308 L +2491 4522 mt 2491 4547 L +DO +2491 7334 mt 2491 4522 L +2491 4522 mt 2491 4522 L +SO +2590 7334 mt 2590 7308 L +2590 4522 mt 2590 4547 L +DO +2590 7334 mt 2590 4522 L +2590 4522 mt 2590 4522 L +SO +2671 7334 mt 2671 7308 L +2671 4522 mt 2671 4547 L +DO +2671 7334 mt 2671 4522 L +2671 4522 mt 2671 4522 L +SO +2739 7334 mt 2739 7308 L +2739 4522 mt 2739 4547 L +DO +2739 7334 mt 2739 4522 L +2739 4522 mt 2739 4522 L +SO +2798 7334 mt 2798 7308 L +2798 4522 mt 2798 4547 L +DO +2798 7334 mt 2798 4522 L +2798 4522 mt 2798 4522 L +SO +2850 7334 mt 2850 7308 L +2850 4522 mt 2850 4547 L +DO +2850 7334 mt 2850 4522 L +2850 4522 mt 2850 4522 L +SO +2897 7334 mt 2897 7308 L +2897 4522 mt 2897 4547 L +DO +2897 7334 mt 2897 4522 L +2897 4522 mt 2897 4522 L +SO +2897 7334 mt 2897 7282 L +2897 4522 mt 2897 4573 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +2809 7516 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +2942 7442 mt +(2) s +3204 7334 mt 3204 7308 L +3204 4522 mt 3204 4547 L +DO +3204 7334 mt 3204 4522 L +3204 4522 mt 3204 4522 L +SO +3384 7334 mt 3384 7308 L +3384 4522 mt 3384 4547 L +DO +3384 7334 mt 3384 4522 L +3384 4522 mt 3384 4522 L +SO +3512 7334 mt 3512 7308 L +3512 4522 mt 3512 4547 L +DO +3512 7334 mt 3512 4522 L +3512 4522 mt 3512 4522 L +SO +3611 7334 mt 3611 7308 L +3611 4522 mt 3611 4547 L +DO +3611 7334 mt 3611 4522 L +3611 4522 mt 3611 4522 L +SO +3691 7334 mt 3691 7308 L +3691 4522 mt 3691 4547 L +DO +3691 7334 mt 3691 4522 L +3691 4522 mt 3691 4522 L +SO +3760 7334 mt 3760 7308 L +3760 4522 mt 3760 4547 L +DO +3760 7334 mt 3760 4522 L +3760 4522 mt 3760 4522 L +SO +3819 7334 mt 3819 7308 L +3819 4522 mt 3819 4547 L +DO +3819 7334 mt 3819 4522 L +3819 4522 mt 3819 4522 L +SO +3871 7334 mt 3871 7308 L +3871 4522 mt 3871 4547 L +DO +3871 7334 mt 3871 4522 L +3871 4522 mt 3871 4522 L +SO +3918 7334 mt 3918 7308 L +3918 4522 mt 3918 4547 L +DO +3918 7334 mt 3918 4522 L +3918 4522 mt 3918 4522 L +SO +3918 7334 mt 3918 7282 L +3918 4522 mt 3918 4573 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +3830 7516 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +3963 7442 mt +(3) s +4225 7334 mt 4225 7308 L +4225 4522 mt 4225 4547 L +DO +4225 7334 mt 4225 4522 L +4225 4522 mt 4225 4522 L +SO +4405 7334 mt 4405 7308 L +4405 4522 mt 4405 4547 L +DO +4405 7334 mt 4405 4522 L +4405 4522 mt 4405 4522 L +SO +4532 7334 mt 4532 7308 L +4532 4522 mt 4532 4547 L +DO +4532 7334 mt 4532 4522 L +4532 4522 mt 4532 4522 L +SO +4631 7334 mt 4631 7308 L +4631 4522 mt 4631 4547 L +DO +4631 7334 mt 4631 4522 L +4631 4522 mt 4631 4522 L +SO +4712 7334 mt 4712 7308 L +4712 4522 mt 4712 4547 L +DO +4712 7334 mt 4712 4522 L +4712 4522 mt 4712 4522 L +SO +4781 7334 mt 4781 7308 L +4781 4522 mt 4781 4547 L +DO +4781 7334 mt 4781 4522 L +4781 4522 mt 4781 4522 L +SO +4840 7334 mt 4840 7308 L +4840 4522 mt 4840 4547 L +DO +4840 7334 mt 4840 4522 L +4840 4522 mt 4840 4522 L +SO +4892 7334 mt 4892 7308 L +4892 4522 mt 4892 4547 L +DO +4892 7334 mt 4892 4522 L +4892 4522 mt 4892 4522 L +SO +4939 7334 mt 4939 7308 L +4939 4522 mt 4939 4547 L +DO +4939 7334 mt 4939 4522 L +4939 4522 mt 4939 4522 L +SO +4939 7334 mt 4939 7282 L +4939 4522 mt 4939 4573 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +4851 7516 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +4984 7442 mt +(4) s +5246 7334 mt 5246 7308 L +5246 4522 mt 5246 4547 L +DO +5246 7334 mt 5246 4522 L +5246 4522 mt 5246 4522 L +SO +5426 7334 mt 5426 7308 L +5426 4522 mt 5426 4547 L +DO +5426 7334 mt 5426 4522 L +5426 4522 mt 5426 4522 L +SO +5553 7334 mt 5553 7308 L +5553 4522 mt 5553 4547 L +DO +5553 7334 mt 5553 4522 L +5553 4522 mt 5553 4522 L +SO +5652 7334 mt 5652 7308 L +5652 4522 mt 5652 4547 L +DO +5652 7334 mt 5652 4522 L +5652 4522 mt 5652 4522 L +SO +5733 7334 mt 5733 7308 L +5733 4522 mt 5733 4547 L +DO +5733 7334 mt 5733 4522 L +5733 4522 mt 5733 4522 L +SO +5801 7334 mt 5801 7308 L +5801 4522 mt 5801 4547 L +DO +5801 7334 mt 5801 4522 L +5801 4522 mt 5801 4522 L +SO +5861 7334 mt 5861 7308 L +5861 4522 mt 5861 4547 L +DO +5861 7334 mt 5861 4522 L +5861 4522 mt 5861 4522 L +SO +5913 7334 mt 5913 7308 L +5913 4522 mt 5913 4547 L +DO +5913 7334 mt 5913 4522 L +5913 4522 mt 5913 4522 L +SO +5960 7334 mt 5960 7308 L +5960 4522 mt 5960 4547 L +DO +5960 7334 mt 5960 4522 L +5960 4522 mt 5960 4522 L +SO +5960 7334 mt 5960 7282 L +5960 4522 mt 5960 4573 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +5872 7516 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +6005 7442 mt +(5) s + 856 7334 mt 907 7334 L +5960 7334 mt 5908 7334 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 755 7378 mt +(0) s + 856 6771 mt 907 6771 L +5960 6771 mt 5908 6771 L + 755 6815 mt +(1) s + 856 6209 mt 907 6209 L +5960 6209 mt 5908 6209 L + 755 6253 mt +(2) s + 856 5646 mt 907 5646 L +5960 5646 mt 5908 5646 L + 755 5690 mt +(3) s + 856 5084 mt 907 5084 L +5960 5084 mt 5908 5084 L + 755 5128 mt +(4) s + 856 4522 mt 907 4522 L +5960 4522 mt 5908 4522 L + 755 4566 mt +(5) s + 856 7334 mt 5960 7334 L + 856 4522 mt 5960 4522 L + 856 7334 mt 856 4522 L +5960 7334 mt 5960 4522 L +gs 856 4522 5105 2813 rc +c8 +307 35 308 61 307 276 307 107 308 53 307 111 307 -429 307 -115 +308 223 307 -345 307 -360 308 -367 307 -260 307 -220 307 -151 856 7134 16 MP stroke +gr + +c8 + 36 36 856 7134 FO + 36 36 1163 6983 FO + 36 36 1470 6763 FO + 36 36 1777 6503 FO + 36 36 2085 6136 FO + 36 36 2392 5776 FO + 36 36 2699 5431 FO + 36 36 3007 5654 FO + 36 36 3314 5539 FO + 36 36 3621 5110 FO + 36 36 3928 5221 FO + 36 36 4236 5274 FO + 36 36 4543 5381 FO + 36 36 4850 5657 FO + 36 36 5158 5718 FO + 36 36 5465 5753 FO +gs 856 4522 5105 2813 rc +c9 +307 3 308 8 307 45 307 49 308 46 307 81 307 267 307 -19 +308 -26 307 -61 307 -140 308 -173 307 -209 307 -137 307 -84 856 7174 16 MP stroke +gr + +c9 +0 -58 -58 0 0 58 58 0 827 7145 5 MP +DP +0 -58 -58 0 0 58 58 0 1134 7061 5 MP +DP +0 -58 -58 0 0 58 58 0 1441 6924 5 MP +DP +0 -58 -58 0 0 58 58 0 1748 6715 5 MP +DP +0 -58 -58 0 0 58 58 0 2056 6542 5 MP +DP +0 -58 -58 0 0 58 58 0 2363 6402 5 MP +DP +0 -58 -58 0 0 58 58 0 2670 6341 5 MP +DP +0 -58 -58 0 0 58 58 0 2978 6315 5 MP +DP +0 -58 -58 0 0 58 58 0 3285 6296 5 MP +DP +0 -58 -58 0 0 58 58 0 3592 6563 5 MP +DP +0 -58 -58 0 0 58 58 0 3899 6644 5 MP +DP +0 -58 -58 0 0 58 58 0 4207 6690 5 MP +DP +0 -58 -58 0 0 58 58 0 4514 6739 5 MP +DP +0 -58 -58 0 0 58 58 0 4821 6784 5 MP +DP +0 -58 -58 0 0 58 58 0 5129 6792 5 MP +DP +0 -58 -58 0 0 58 58 0 5436 6795 5 MP +DP +gs 856 4522 5105 2813 rc +gr + +0 sg +2714 7659 mt +(Number of grouped points) s + 673 6812 mt -90 rotate +(BASFUN Vectorization Speed Up) s +90 rotate +2788 4427 mt +(HELIOS, Cubic Splines) s + 839 7377 mt +( ) s +5944 4564 mt +( ) s +1 sg +0 334 1616 0 0 -334 4284 4916 4 MP +PP +-1616 0 0 334 1616 0 0 -334 4284 4916 5 MP stroke +4 w +DO +SO +6 w +0 sg +4284 4916 mt 5900 4916 L +4284 4582 mt 5900 4582 L +4284 4916 mt 4284 4582 L +5900 4916 mt 5900 4582 L +4284 4916 mt 5900 4916 L +4284 4916 mt 4284 4582 L +4284 4916 mt 5900 4916 L +4284 4582 mt 5900 4582 L +4284 4916 mt 4284 4582 L +5900 4916 mt 5900 4582 L +4749 4716 mt +(Periodic Splines) s +gs 4284 4582 1617 335 rc +c8 +358 0 4355 4673 2 MP stroke +gs 4461 4600 147 147 rc + 36 36 4534 4673 FO +gr + +gr + +c8 +0 sg +4749 4867 mt +(Non-perodic Splines) s +gs 4284 4582 1617 335 rc +c9 +358 0 4355 4824 2 MP stroke +gs 4461 4751 147 147 rc +0 -58 -58 0 0 58 58 0 4505 4795 5 MP +DP +gr + +gr + +c9 + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/docs/manual/basfun_perf_hpcff.eps b/docs/manual/basfun_perf_hpcff.eps new file mode 100644 index 0000000..ccd1bd6 --- /dev/null +++ b/docs/manual/basfun_perf_hpcff.eps @@ -0,0 +1,1293 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 2.6.27.56-0.1-default #1 SMP 2010-12-01 16:57:58 +0100 x86_64. +%%Title: /home/ttran/bsplines_hlst-dev/opt_wk/basfun_perf_hpcff.eps +%%CreationDate: 03/27/2012 13:00:06 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: 23 78 571 763 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 23 78 571 763 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 0276 9156 csm + + 0 0 6586 8220 rc +85 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 6587 8221 rf +6 w +0 2805 5104 0 0 -2805 856 3421 4 MP +PP +-5104 0 0 2805 5104 0 0 -2805 856 3421 5 MP stroke +4 w +DO +0 sg + 856 3421 mt 856 616 L + 856 616 mt 856 616 L +1876 3421 mt 1876 616 L +1876 616 mt 1876 616 L +2897 3421 mt 2897 616 L +2897 616 mt 2897 616 L +3918 3421 mt 3918 616 L +3918 616 mt 3918 616 L +4939 3421 mt 4939 616 L +4939 616 mt 4939 616 L +5960 3421 mt 5960 616 L +5960 616 mt 5960 616 L + 856 3421 mt 5960 3421 L +5960 3421 mt 5960 3421 L + 856 2953 mt 5960 2953 L +5960 2953 mt 5960 2953 L + 856 2486 mt 5960 2486 L +5960 2486 mt 5960 2486 L + 856 2018 mt 5960 2018 L +5960 2018 mt 5960 2018 L + 856 1551 mt 5960 1551 L +5960 1551 mt 5960 1551 L + 856 1083 mt 5960 1083 L +5960 1083 mt 5960 1083 L + 856 616 mt 5960 616 L +5960 616 mt 5960 616 L +SO +6 w + 856 3421 mt 5960 3421 L + 856 616 mt 5960 616 L + 856 3421 mt 856 616 L +5960 3421 mt 5960 616 L + 856 3421 mt 5960 3421 L + 856 3421 mt 856 616 L + 856 3421 mt 856 3395 L + 856 616 mt 856 641 L +DO + 856 3421 mt 856 616 L + 856 616 mt 856 616 L +SO + 856 3421 mt 856 3369 L + 856 616 mt 856 667 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 768 3603 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + + 901 3529 mt +(0) s +1163 3421 mt 1163 3395 L +1163 616 mt 1163 641 L +DO +1163 3421 mt 1163 616 L +1163 616 mt 1163 616 L +SO +1343 3421 mt 1343 3395 L +1343 616 mt 1343 641 L +DO +1343 3421 mt 1343 616 L +1343 616 mt 1343 616 L +SO +1470 3421 mt 1470 3395 L +1470 616 mt 1470 641 L +DO +1470 3421 mt 1470 616 L +1470 616 mt 1470 616 L +SO +1569 3421 mt 1569 3395 L +1569 616 mt 1569 641 L +DO +1569 3421 mt 1569 616 L +1569 616 mt 1569 616 L +SO +1650 3421 mt 1650 3395 L +1650 616 mt 1650 641 L +DO +1650 3421 mt 1650 616 L +1650 616 mt 1650 616 L +SO +1718 3421 mt 1718 3395 L +1718 616 mt 1718 641 L +DO +1718 3421 mt 1718 616 L +1718 616 mt 1718 616 L +SO +1777 3421 mt 1777 3395 L +1777 616 mt 1777 641 L +DO +1777 3421 mt 1777 616 L +1777 616 mt 1777 616 L +SO +1830 3421 mt 1830 3395 L +1830 616 mt 1830 641 L +DO +1830 3421 mt 1830 616 L +1830 616 mt 1830 616 L +SO +1876 3421 mt 1876 3395 L +1876 616 mt 1876 641 L +DO +1876 3421 mt 1876 616 L +1876 616 mt 1876 616 L +SO +1876 3421 mt 1876 3369 L +1876 616 mt 1876 667 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1788 3603 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +1921 3529 mt +(1) s +2184 3421 mt 2184 3395 L +2184 616 mt 2184 641 L +DO +2184 3421 mt 2184 616 L +2184 616 mt 2184 616 L +SO +2363 3421 mt 2363 3395 L +2363 616 mt 2363 641 L +DO +2363 3421 mt 2363 616 L +2363 616 mt 2363 616 L +SO +2491 3421 mt 2491 3395 L +2491 616 mt 2491 641 L +DO +2491 3421 mt 2491 616 L +2491 616 mt 2491 616 L +SO +2590 3421 mt 2590 3395 L +2590 616 mt 2590 641 L +DO +2590 3421 mt 2590 616 L +2590 616 mt 2590 616 L +SO +2671 3421 mt 2671 3395 L +2671 616 mt 2671 641 L +DO +2671 3421 mt 2671 616 L +2671 616 mt 2671 616 L +SO +2739 3421 mt 2739 3395 L +2739 616 mt 2739 641 L +DO +2739 3421 mt 2739 616 L +2739 616 mt 2739 616 L +SO +2798 3421 mt 2798 3395 L +2798 616 mt 2798 641 L +DO +2798 3421 mt 2798 616 L +2798 616 mt 2798 616 L +SO +2850 3421 mt 2850 3395 L +2850 616 mt 2850 641 L +DO +2850 3421 mt 2850 616 L +2850 616 mt 2850 616 L +SO +2897 3421 mt 2897 3395 L +2897 616 mt 2897 641 L +DO +2897 3421 mt 2897 616 L +2897 616 mt 2897 616 L +SO +2897 3421 mt 2897 3369 L +2897 616 mt 2897 667 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +2809 3603 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +2942 3529 mt +(2) s +3204 3421 mt 3204 3395 L +3204 616 mt 3204 641 L +DO +3204 3421 mt 3204 616 L +3204 616 mt 3204 616 L +SO +3384 3421 mt 3384 3395 L +3384 616 mt 3384 641 L +DO +3384 3421 mt 3384 616 L +3384 616 mt 3384 616 L +SO +3512 3421 mt 3512 3395 L +3512 616 mt 3512 641 L +DO +3512 3421 mt 3512 616 L +3512 616 mt 3512 616 L +SO +3611 3421 mt 3611 3395 L +3611 616 mt 3611 641 L +DO +3611 3421 mt 3611 616 L +3611 616 mt 3611 616 L +SO +3691 3421 mt 3691 3395 L +3691 616 mt 3691 641 L +DO +3691 3421 mt 3691 616 L +3691 616 mt 3691 616 L +SO +3760 3421 mt 3760 3395 L +3760 616 mt 3760 641 L +DO +3760 3421 mt 3760 616 L +3760 616 mt 3760 616 L +SO +3819 3421 mt 3819 3395 L +3819 616 mt 3819 641 L +DO +3819 3421 mt 3819 616 L +3819 616 mt 3819 616 L +SO +3871 3421 mt 3871 3395 L +3871 616 mt 3871 641 L +DO +3871 3421 mt 3871 616 L +3871 616 mt 3871 616 L +SO +3918 3421 mt 3918 3395 L +3918 616 mt 3918 641 L +DO +3918 3421 mt 3918 616 L +3918 616 mt 3918 616 L +SO +3918 3421 mt 3918 3369 L +3918 616 mt 3918 667 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +3830 3603 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +3963 3529 mt +(3) s +4225 3421 mt 4225 3395 L +4225 616 mt 4225 641 L +DO +4225 3421 mt 4225 616 L +4225 616 mt 4225 616 L +SO +4405 3421 mt 4405 3395 L +4405 616 mt 4405 641 L +DO +4405 3421 mt 4405 616 L +4405 616 mt 4405 616 L +SO +4532 3421 mt 4532 3395 L +4532 616 mt 4532 641 L +DO +4532 3421 mt 4532 616 L +4532 616 mt 4532 616 L +SO +4631 3421 mt 4631 3395 L +4631 616 mt 4631 641 L +DO +4631 3421 mt 4631 616 L +4631 616 mt 4631 616 L +SO +4712 3421 mt 4712 3395 L +4712 616 mt 4712 641 L +DO +4712 3421 mt 4712 616 L +4712 616 mt 4712 616 L +SO +4781 3421 mt 4781 3395 L +4781 616 mt 4781 641 L +DO +4781 3421 mt 4781 616 L +4781 616 mt 4781 616 L +SO +4840 3421 mt 4840 3395 L +4840 616 mt 4840 641 L +DO +4840 3421 mt 4840 616 L +4840 616 mt 4840 616 L +SO +4892 3421 mt 4892 3395 L +4892 616 mt 4892 641 L +DO +4892 3421 mt 4892 616 L +4892 616 mt 4892 616 L +SO +4939 3421 mt 4939 3395 L +4939 616 mt 4939 641 L +DO +4939 3421 mt 4939 616 L +4939 616 mt 4939 616 L +SO +4939 3421 mt 4939 3369 L +4939 616 mt 4939 667 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +4851 3603 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +4984 3529 mt +(4) s +5246 3421 mt 5246 3395 L +5246 616 mt 5246 641 L +DO +5246 3421 mt 5246 616 L +5246 616 mt 5246 616 L +SO +5426 3421 mt 5426 3395 L +5426 616 mt 5426 641 L +DO +5426 3421 mt 5426 616 L +5426 616 mt 5426 616 L +SO +5553 3421 mt 5553 3395 L +5553 616 mt 5553 641 L +DO +5553 3421 mt 5553 616 L +5553 616 mt 5553 616 L +SO +5652 3421 mt 5652 3395 L +5652 616 mt 5652 641 L +DO +5652 3421 mt 5652 616 L +5652 616 mt 5652 616 L +SO +5733 3421 mt 5733 3395 L +5733 616 mt 5733 641 L +DO +5733 3421 mt 5733 616 L +5733 616 mt 5733 616 L +SO +5801 3421 mt 5801 3395 L +5801 616 mt 5801 641 L +DO +5801 3421 mt 5801 616 L +5801 616 mt 5801 616 L +SO +5861 3421 mt 5861 3395 L +5861 616 mt 5861 641 L +DO +5861 3421 mt 5861 616 L +5861 616 mt 5861 616 L +SO +5913 3421 mt 5913 3395 L +5913 616 mt 5913 641 L +DO +5913 3421 mt 5913 616 L +5913 616 mt 5913 616 L +SO +5960 3421 mt 5960 3395 L +5960 616 mt 5960 641 L +DO +5960 3421 mt 5960 616 L +5960 616 mt 5960 616 L +SO +5960 3421 mt 5960 3369 L +5960 616 mt 5960 667 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +5872 3603 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +6005 3529 mt +(5) s + 856 3421 mt 907 3421 L +5960 3421 mt 5908 3421 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 755 3465 mt +(0) s + 856 2953 mt 907 2953 L +5960 2953 mt 5908 2953 L + 655 2997 mt +(0.5) s + 856 2486 mt 907 2486 L +5960 2486 mt 5908 2486 L + 755 2530 mt +(1) s + 856 2018 mt 907 2018 L +5960 2018 mt 5908 2018 L + 655 2062 mt +(1.5) s + 856 1551 mt 907 1551 L +5960 1551 mt 5908 1551 L + 755 1595 mt +(2) s + 856 1083 mt 907 1083 L +5960 1083 mt 5908 1083 L + 655 1127 mt +(2.5) s + 856 616 mt 907 616 L +5960 616 mt 5908 616 L + 755 660 mt +(3) s + 856 3421 mt 5960 3421 L + 856 616 mt 5960 616 L + 856 3421 mt 856 616 L +5960 3421 mt 5960 616 L +gs 856 616 5105 2806 rc +/c8 { 0.000000 0.000000 1.000000 sr} bdef +c8 +307 45 308 100 307 122 307 76 308 48 307 8 307 -38 307 -46 +308 -318 307 -160 307 -278 308 -287 307 -433 307 -379 307 -227 856 3059 16 MP stroke +gr + +c8 + 36 36 856 3059 FO + 36 36 1163 2832 FO + 36 36 1470 2453 FO + 36 36 1777 2020 FO + 36 36 2085 1733 FO + 36 36 2392 1455 FO + 36 36 2699 1295 FO + 36 36 3007 977 FO + 36 36 3314 931 FO + 36 36 3621 893 FO + 36 36 3928 901 FO + 36 36 4236 949 FO + 36 36 4543 1025 FO + 36 36 4850 1147 FO + 36 36 5158 1247 FO + 36 36 5465 1292 FO +gs 856 616 5105 2806 rc +/c9 { 0.000000 0.500000 0.000000 sr} bdef +c9 +307 11 308 37 307 138 307 52 308 97 307 145 307 99 307 10 +308 -87 307 -133 307 -232 308 -312 307 -376 307 -262 307 -116 856 3127 16 MP stroke +gr + +c9 +0 j +0 -58 -58 0 0 58 58 0 827 3098 5 MP +DP +0 -58 -58 0 0 58 58 0 1134 2982 5 MP +DP +0 -58 -58 0 0 58 58 0 1441 2720 5 MP +DP +0 -58 -58 0 0 58 58 0 1748 2344 5 MP +DP +0 -58 -58 0 0 58 58 0 2056 2032 5 MP +DP +0 -58 -58 0 0 58 58 0 2363 1800 5 MP +DP +0 -58 -58 0 0 58 58 0 2670 1667 5 MP +DP +0 -58 -58 0 0 58 58 0 2978 1580 5 MP +DP +0 -58 -58 0 0 58 58 0 3285 1590 5 MP +DP +0 -58 -58 0 0 58 58 0 3592 1689 5 MP +DP +0 -58 -58 0 0 58 58 0 3899 1834 5 MP +DP +0 -58 -58 0 0 58 58 0 4207 1931 5 MP +DP +0 -58 -58 0 0 58 58 0 4514 1983 5 MP +DP +0 -58 -58 0 0 58 58 0 4821 2121 5 MP +DP +0 -58 -58 0 0 58 58 0 5129 2158 5 MP +DP +0 -58 -58 0 0 58 58 0 5436 2169 5 MP +DP +gs 856 616 5105 2806 rc +gr + +0 sg +2714 3746 mt +(Number of grouped points) s + 573 2902 mt -90 rotate +(BASFUN Vectorization Speed Up) s +90 rotate +2710 521 mt +(HPCFF, Quadratic Splines) s + 839 3464 mt +( ) s +5944 658 mt +( ) s +1 sg +0 438 1627 0 0 -438 2531 2901 4 MP +PP +-1627 0 0 438 1627 0 0 -438 2531 2901 5 MP stroke +4 w +DO +SO +6 w +0 sg +2531 2901 mt 4158 2901 L +2531 2463 mt 4158 2463 L +2531 2901 mt 2531 2463 L +4158 2901 mt 4158 2463 L +2531 2901 mt 4158 2901 L +2531 2901 mt 2531 2463 L +2531 2901 mt 4158 2901 L +2531 2463 mt 4158 2463 L +2531 2901 mt 2531 2463 L +4158 2901 mt 4158 2463 L +2999 2625 mt +(Periodic Splines) s +gs 2531 2463 1628 439 rc +c8 +360 0 2602 2583 2 MP stroke +gr + +c8 + 36 36 2782 2583 FO +gs 2531 2463 1628 439 rc +gr + +0 sg +2999 2824 mt +(Non-perodic Splines) s +gs 2531 2463 1628 439 rc +c9 +360 0 2602 2781 2 MP stroke +gr + +c9 +0 -58 -58 0 0 58 58 0 2753 2752 5 MP +DP +gs 2531 2463 1628 439 rc +gr + +1 sg +0 2805 5104 0 0 -2805 856 7316 4 MP +PP +-5104 0 0 2805 5104 0 0 -2805 856 7316 5 MP stroke +4 w +DO +0 sg + 856 7316 mt 856 4511 L + 856 4511 mt 856 4511 L +1876 7316 mt 1876 4511 L +1876 4511 mt 1876 4511 L +2897 7316 mt 2897 4511 L +2897 4511 mt 2897 4511 L +3918 7316 mt 3918 4511 L +3918 4511 mt 3918 4511 L +4939 7316 mt 4939 4511 L +4939 4511 mt 4939 4511 L +5960 7316 mt 5960 4511 L +5960 4511 mt 5960 4511 L + 856 7316 mt 5960 7316 L +5960 7316 mt 5960 7316 L + 856 6848 mt 5960 6848 L +5960 6848 mt 5960 6848 L + 856 6381 mt 5960 6381 L +5960 6381 mt 5960 6381 L + 856 5913 mt 5960 5913 L +5960 5913 mt 5960 5913 L + 856 5446 mt 5960 5446 L +5960 5446 mt 5960 5446 L + 856 4978 mt 5960 4978 L +5960 4978 mt 5960 4978 L + 856 4511 mt 5960 4511 L +5960 4511 mt 5960 4511 L +SO +6 w + 856 7316 mt 5960 7316 L + 856 4511 mt 5960 4511 L + 856 7316 mt 856 4511 L +5960 7316 mt 5960 4511 L + 856 7316 mt 5960 7316 L + 856 7316 mt 856 4511 L + 856 7316 mt 856 7290 L + 856 4511 mt 856 4536 L +DO + 856 7316 mt 856 4511 L + 856 4511 mt 856 4511 L +SO + 856 7316 mt 856 7264 L + 856 4511 mt 856 4562 L + 768 7498 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + + 901 7424 mt +(0) s +1163 7316 mt 1163 7290 L +1163 4511 mt 1163 4536 L +DO +1163 7316 mt 1163 4511 L +1163 4511 mt 1163 4511 L +SO +1343 7316 mt 1343 7290 L +1343 4511 mt 1343 4536 L +DO +1343 7316 mt 1343 4511 L +1343 4511 mt 1343 4511 L +SO +1470 7316 mt 1470 7290 L +1470 4511 mt 1470 4536 L +DO +1470 7316 mt 1470 4511 L +1470 4511 mt 1470 4511 L +SO +1569 7316 mt 1569 7290 L +1569 4511 mt 1569 4536 L +DO +1569 7316 mt 1569 4511 L +1569 4511 mt 1569 4511 L +SO +1650 7316 mt 1650 7290 L +1650 4511 mt 1650 4536 L +DO +1650 7316 mt 1650 4511 L +1650 4511 mt 1650 4511 L +SO +1718 7316 mt 1718 7290 L +1718 4511 mt 1718 4536 L +DO +1718 7316 mt 1718 4511 L +1718 4511 mt 1718 4511 L +SO +1777 7316 mt 1777 7290 L +1777 4511 mt 1777 4536 L +DO +1777 7316 mt 1777 4511 L +1777 4511 mt 1777 4511 L +SO +1830 7316 mt 1830 7290 L +1830 4511 mt 1830 4536 L +DO +1830 7316 mt 1830 4511 L +1830 4511 mt 1830 4511 L +SO +1876 7316 mt 1876 7290 L +1876 4511 mt 1876 4536 L +DO +1876 7316 mt 1876 4511 L +1876 4511 mt 1876 4511 L +SO +1876 7316 mt 1876 7264 L +1876 4511 mt 1876 4562 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +1788 7498 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +1921 7424 mt +(1) s +2184 7316 mt 2184 7290 L +2184 4511 mt 2184 4536 L +DO +2184 7316 mt 2184 4511 L +2184 4511 mt 2184 4511 L +SO +2363 7316 mt 2363 7290 L +2363 4511 mt 2363 4536 L +DO +2363 7316 mt 2363 4511 L +2363 4511 mt 2363 4511 L +SO +2491 7316 mt 2491 7290 L +2491 4511 mt 2491 4536 L +DO +2491 7316 mt 2491 4511 L +2491 4511 mt 2491 4511 L +SO +2590 7316 mt 2590 7290 L +2590 4511 mt 2590 4536 L +DO +2590 7316 mt 2590 4511 L +2590 4511 mt 2590 4511 L +SO +2671 7316 mt 2671 7290 L +2671 4511 mt 2671 4536 L +DO +2671 7316 mt 2671 4511 L +2671 4511 mt 2671 4511 L +SO +2739 7316 mt 2739 7290 L +2739 4511 mt 2739 4536 L +DO +2739 7316 mt 2739 4511 L +2739 4511 mt 2739 4511 L +SO +2798 7316 mt 2798 7290 L +2798 4511 mt 2798 4536 L +DO +2798 7316 mt 2798 4511 L +2798 4511 mt 2798 4511 L +SO +2850 7316 mt 2850 7290 L +2850 4511 mt 2850 4536 L +DO +2850 7316 mt 2850 4511 L +2850 4511 mt 2850 4511 L +SO +2897 7316 mt 2897 7290 L +2897 4511 mt 2897 4536 L +DO +2897 7316 mt 2897 4511 L +2897 4511 mt 2897 4511 L +SO +2897 7316 mt 2897 7264 L +2897 4511 mt 2897 4562 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +2809 7498 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +2942 7424 mt +(2) s +3204 7316 mt 3204 7290 L +3204 4511 mt 3204 4536 L +DO +3204 7316 mt 3204 4511 L +3204 4511 mt 3204 4511 L +SO +3384 7316 mt 3384 7290 L +3384 4511 mt 3384 4536 L +DO +3384 7316 mt 3384 4511 L +3384 4511 mt 3384 4511 L +SO +3512 7316 mt 3512 7290 L +3512 4511 mt 3512 4536 L +DO +3512 7316 mt 3512 4511 L +3512 4511 mt 3512 4511 L +SO +3611 7316 mt 3611 7290 L +3611 4511 mt 3611 4536 L +DO +3611 7316 mt 3611 4511 L +3611 4511 mt 3611 4511 L +SO +3691 7316 mt 3691 7290 L +3691 4511 mt 3691 4536 L +DO +3691 7316 mt 3691 4511 L +3691 4511 mt 3691 4511 L +SO +3760 7316 mt 3760 7290 L +3760 4511 mt 3760 4536 L +DO +3760 7316 mt 3760 4511 L +3760 4511 mt 3760 4511 L +SO +3819 7316 mt 3819 7290 L +3819 4511 mt 3819 4536 L +DO +3819 7316 mt 3819 4511 L +3819 4511 mt 3819 4511 L +SO +3871 7316 mt 3871 7290 L +3871 4511 mt 3871 4536 L +DO +3871 7316 mt 3871 4511 L +3871 4511 mt 3871 4511 L +SO +3918 7316 mt 3918 7290 L +3918 4511 mt 3918 4536 L +DO +3918 7316 mt 3918 4511 L +3918 4511 mt 3918 4511 L +SO +3918 7316 mt 3918 7264 L +3918 4511 mt 3918 4562 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +3830 7498 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +3963 7424 mt +(3) s +4225 7316 mt 4225 7290 L +4225 4511 mt 4225 4536 L +DO +4225 7316 mt 4225 4511 L +4225 4511 mt 4225 4511 L +SO +4405 7316 mt 4405 7290 L +4405 4511 mt 4405 4536 L +DO +4405 7316 mt 4405 4511 L +4405 4511 mt 4405 4511 L +SO +4532 7316 mt 4532 7290 L +4532 4511 mt 4532 4536 L +DO +4532 7316 mt 4532 4511 L +4532 4511 mt 4532 4511 L +SO +4631 7316 mt 4631 7290 L +4631 4511 mt 4631 4536 L +DO +4631 7316 mt 4631 4511 L +4631 4511 mt 4631 4511 L +SO +4712 7316 mt 4712 7290 L +4712 4511 mt 4712 4536 L +DO +4712 7316 mt 4712 4511 L +4712 4511 mt 4712 4511 L +SO +4781 7316 mt 4781 7290 L +4781 4511 mt 4781 4536 L +DO +4781 7316 mt 4781 4511 L +4781 4511 mt 4781 4511 L +SO +4840 7316 mt 4840 7290 L +4840 4511 mt 4840 4536 L +DO +4840 7316 mt 4840 4511 L +4840 4511 mt 4840 4511 L +SO +4892 7316 mt 4892 7290 L +4892 4511 mt 4892 4536 L +DO +4892 7316 mt 4892 4511 L +4892 4511 mt 4892 4511 L +SO +4939 7316 mt 4939 7290 L +4939 4511 mt 4939 4536 L +DO +4939 7316 mt 4939 4511 L +4939 4511 mt 4939 4511 L +SO +4939 7316 mt 4939 7264 L +4939 4511 mt 4939 4562 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +4851 7498 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +4984 7424 mt +(4) s +5246 7316 mt 5246 7290 L +5246 4511 mt 5246 4536 L +DO +5246 7316 mt 5246 4511 L +5246 4511 mt 5246 4511 L +SO +5426 7316 mt 5426 7290 L +5426 4511 mt 5426 4536 L +DO +5426 7316 mt 5426 4511 L +5426 4511 mt 5426 4511 L +SO +5553 7316 mt 5553 7290 L +5553 4511 mt 5553 4536 L +DO +5553 7316 mt 5553 4511 L +5553 4511 mt 5553 4511 L +SO +5652 7316 mt 5652 7290 L +5652 4511 mt 5652 4536 L +DO +5652 7316 mt 5652 4511 L +5652 4511 mt 5652 4511 L +SO +5733 7316 mt 5733 7290 L +5733 4511 mt 5733 4536 L +DO +5733 7316 mt 5733 4511 L +5733 4511 mt 5733 4511 L +SO +5801 7316 mt 5801 7290 L +5801 4511 mt 5801 4536 L +DO +5801 7316 mt 5801 4511 L +5801 4511 mt 5801 4511 L +SO +5861 7316 mt 5861 7290 L +5861 4511 mt 5861 4536 L +DO +5861 7316 mt 5861 4511 L +5861 4511 mt 5861 4511 L +SO +5913 7316 mt 5913 7290 L +5913 4511 mt 5913 4536 L +DO +5913 7316 mt 5913 4511 L +5913 4511 mt 5913 4511 L +SO +5960 7316 mt 5960 7290 L +5960 4511 mt 5960 4536 L +DO +5960 7316 mt 5960 4511 L +5960 4511 mt 5960 4511 L +SO +5960 7316 mt 5960 7264 L +5960 4511 mt 5960 4562 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + +5872 7498 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 80 FMSR + +6005 7424 mt +(5) s + 856 7316 mt 907 7316 L +5960 7316 mt 5908 7316 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 755 7360 mt +(0) s + 856 6848 mt 907 6848 L +5960 6848 mt 5908 6848 L + 655 6892 mt +(0.5) s + 856 6381 mt 907 6381 L +5960 6381 mt 5908 6381 L + 755 6425 mt +(1) s + 856 5913 mt 907 5913 L +5960 5913 mt 5908 5913 L + 655 5957 mt +(1.5) s + 856 5446 mt 907 5446 L +5960 5446 mt 5908 5446 L + 755 5490 mt +(2) s + 856 4978 mt 907 4978 L +5960 4978 mt 5908 4978 L + 655 5022 mt +(2.5) s + 856 4511 mt 907 4511 L +5960 4511 mt 5908 4511 L + 755 4555 mt +(3) s + 856 7316 mt 5960 7316 L + 856 4511 mt 5960 4511 L + 856 7316 mt 856 4511 L +5960 7316 mt 5960 4511 L +gs 856 4511 5105 2806 rc +c8 +307 29 308 56 307 52 307 17 308 13 307 50 307 -16 307 -14 +308 -71 307 -202 307 -265 308 -268 307 -442 307 -365 307 -240 856 6910 16 MP stroke +gr + +c8 + 36 36 856 6910 FO + 36 36 1163 6670 FO + 36 36 1470 6305 FO + 36 36 1777 5863 FO + 36 36 2085 5595 FO + 36 36 2392 5330 FO + 36 36 2699 5128 FO + 36 36 3007 5057 FO + 36 36 3314 5043 FO + 36 36 3621 5027 FO + 36 36 3928 5077 FO + 36 36 4236 5090 FO + 36 36 4543 5107 FO + 36 36 4850 5159 FO + 36 36 5158 5215 FO + 36 36 5465 5244 FO +gs 856 4511 5105 2806 rc +c9 +307 6 308 15 307 82 307 99 308 45 307 -43 307 402 307 -56 +308 49 307 -134 307 -144 308 -267 307 -321 307 -220 307 -94 856 7034 16 MP stroke +gr + +c9 +0 -58 -58 0 0 58 58 0 827 7005 5 MP +DP +0 -58 -58 0 0 58 58 0 1134 6911 5 MP +DP +0 -58 -58 0 0 58 58 0 1441 6691 5 MP +DP +0 -58 -58 0 0 58 58 0 1748 6370 5 MP +DP +0 -58 -58 0 0 58 58 0 2056 6103 5 MP +DP +0 -58 -58 0 0 58 58 0 2363 5959 5 MP +DP +0 -58 -58 0 0 58 58 0 2670 5825 5 MP +DP +0 -58 -58 0 0 58 58 0 2978 5874 5 MP +DP +0 -58 -58 0 0 58 58 0 3285 5818 5 MP +DP +0 -58 -58 0 0 58 58 0 3592 6220 5 MP +DP +0 -58 -58 0 0 58 58 0 3899 6177 5 MP +DP +0 -58 -58 0 0 58 58 0 4207 6222 5 MP +DP +0 -58 -58 0 0 58 58 0 4514 6321 5 MP +DP +0 -58 -58 0 0 58 58 0 4821 6403 5 MP +DP +0 -58 -58 0 0 58 58 0 5129 6418 5 MP +DP +0 -58 -58 0 0 58 58 0 5436 6424 5 MP +DP +gs 856 4511 5105 2806 rc +gr + +0 sg +2714 7641 mt +(Number of grouped points) s + 573 6797 mt -90 rotate +(BASFUN Vectorization Speed Up) s +90 rotate +2816 4416 mt +(HPCFF, Cubic Splines) s + 839 7359 mt +( ) s +5944 4553 mt +( ) s +1 sg +0 438 1627 0 0 -438 2307 6924 4 MP +PP +-1627 0 0 438 1627 0 0 -438 2307 6924 5 MP stroke +4 w +DO +SO +6 w +0 sg +2307 6924 mt 3934 6924 L +2307 6486 mt 3934 6486 L +2307 6924 mt 2307 6486 L +3934 6924 mt 3934 6486 L +2307 6924 mt 3934 6924 L +2307 6924 mt 2307 6486 L +2307 6924 mt 3934 6924 L +2307 6486 mt 3934 6486 L +2307 6924 mt 2307 6486 L +3934 6924 mt 3934 6486 L +2775 6648 mt +(Periodic Splines) s +gs 2307 6486 1628 439 rc +c8 +360 0 2378 6606 2 MP stroke +gs 2485 6533 147 147 rc + 36 36 2558 6606 FO +gr + +gr + +c8 +0 sg +2775 6847 mt +(Non-perodic Splines) s +gs 2307 6486 1628 439 rc +c9 +360 0 2378 6804 2 MP stroke +gs 2485 6731 147 147 rc +0 -58 -58 0 0 58 58 0 2529 6775 5 MP +DP +gr + +gr + +c9 + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/docs/manual/bsplines.pdf b/docs/manual/bsplines.pdf new file mode 100644 index 0000000..1ae347a Binary files /dev/null and b/docs/manual/bsplines.pdf differ diff --git a/docs/manual/bsplines.tex b/docs/manual/bsplines.tex new file mode 100644 index 0000000..ecc06a9 --- /dev/null +++ b/docs/manual/bsplines.tex @@ -0,0 +1,1297 @@ +% +% @file bsplines.tex +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% @author Stephan Brunner +% +\documentclass[a4paper]{article} +\usepackage{linuxdoc-sgml} +%\usepackage{a4wide} +\usepackage{graphicx} +\usepackage{hyperref} +\usepackage{amsmath} +%\usepackage{fancybox} +%\usepackage[notref]{showkeys} + +\title{\tt BSPLINES Reference Guide} +\author{Trach-Minh Tran, Stephan Brunner, Kurt Appert} +\date{v0.3, February 2012} +\abstract{Generalized splines of any order on irregular grids for + interpolation and solving PDEs with FEM.} + +\begin{document} +\maketitle +\tableofcontents + +\section{Properties of Splines} +In this section, several properties of splines will be shown in +a more or less rigorous way. The aim is mainly to provide a +minimum mathematical background for using the module \texttt{BSPLINES} for the +\emph{interpolation} problem as well as the \emph{Finite Element Method} +to solve PDEs. More rigorous mathematical proofs can be found +in the book by de Boor~\cite{deBoor}. + +\subsection{Recurrence Relation} +We start by defining a finite interval $[a,b]$ subdivided into $N_x$ +intervals: +\begin{equation} + a=t_0 \le t_1 \le \ldots \le t_{N_x}=b. +\end{equation} +The sequence $t_i, i=0,\ldots,N_x$ can be irregularly spaced. The $j^{th}$ +spline of degree $p$ defined on this sequence of grid points (also called +\textbf{knots}), is denoted by $\Lambda_{j}^{p}$ and can be constructed using +the following recurrence relation. Starting with the \emph{constant} spline + +\begin{equation} + \Lambda_i^0(x) = + \begin{cases} + 1& \text{if $t_i \le x < t_{i+1}$}, \\ + 0& \text{otherwise}. + \end{cases} +\end{equation} +the splines of degree $p>0$ for $t_i \le x < t_{i+1}$ can be constructed from + +\begin{eqnarray} + \Lambda_i^p &=& w_i^p\Lambda_i^{p-1} + (1- + w_{i+1}^p)\Lambda_{i+1}^{p-1}, \label{eq:recRel}\\ + w_{i}^{p} &=& \frac{x-t_i}{t_{i+p}-t_i}. +\end{eqnarray} +Thus the values of all \emph{non-zero} +splines up to degree $p$ in the interval $[t_i,t_{i+1}]$ fit into +the triangular array as shown in Fig.~\ref{fig:allSpl}. Starting from the +first column with $\Lambda_i^0=1$, one can compute each of the $p+1$ entries +in a subsequent column with Eq.~(\ref{eq:recRel}). Applying this procedure +to generate splines on every intervals $[t_i,t_{i+1}], i=0,\ldots,N_{x}-1$ +would produce the sequence of $N_{x}+p$ splines of degree $p$: +\( \Lambda_{-p}^{p},\ldots, \Lambda_{N_x-1}^{p}\). + +\subsection{Support and positivity} + The linear spline + \begin{equation*} +\Lambda_i^1 = w_i^1\Lambda_i^0 + (1-w_{i+1}^1)\Lambda_{i+1}^{0} + =\frac{x-t_i}{t_{i+1}-t_i}\Lambda_i^0 + + \frac{t_{i+2}-x}{t_{i+2}-t_{i+1}}\Lambda_{i+1}^0 + \end{equation*} +consists of 2 \emph{linear pieces} on $[t_i,t_{i+2}]$, forming a $C^0$ +function which breaks at $t_{i+1}$ and vanishes outside of this +interval. Likewise, the quadratic spline +\begin{eqnarray*} +\Lambda_i^2 &=& w_i^2\Lambda_i^1 + (1-w_{i+1}^2)\Lambda_{i+1}^{1} \\ +&=& w^2_i w^1_i \Lambda_i^0 + [w^2_i(1-w^1_{i+1})+w^1_{i+1}(1-w^2_{i+1})] +\Lambda_{i+1}^0 + (1-w^2_{i+1})(1-w^1_{i+2})\Lambda_{i+2}^0 +\end{eqnarray*} +consists of 3 \emph{parabolic pieces} on $[t_i,t_{i+3}]$ that +join to form a $C^1$ function which breaks at $t_{i+1}$ and $t_{i+2}$ +and vanishes outside of this interval. In general the spline of +degree $p$ can be expressed as: +\begin{equation} +\Lambda^p_i = \sum_{r=0}^{p}\, b_{i+r}^p\Lambda_{i+r}^0 +\end{equation} +where $b_{i+r}^p$ is a sum of products of $p$ linear functions, resulting +in $p+1$ polynomials of degree $p$, joining to form +a $C^{p-1}$ function which breaks at $t_i,\ldots,t_{i+p+1}$ +and vanishes outside of the \emph{support} $[t_i,t_{i+p+1}]$. From its construction, +$\Lambda^p_i$ is clearly \emph{strictly positive} on the interior of $[t_i,t_{i+p+1}]$. +\begin{equation} +\Lambda^p_i (x) > 0, \qquad t_i1$: +\[ \sum_{j=i-p+1}^{i}\,\Lambda_j^{p-1} = 1, \] +or that the sum of the next to last column in Fig.~\ref{fig:allSpl} is $1$, +we have, using the recurrence relation (\ref{eq:recRel}) +\begin{eqnarray*} +\sum_{j=i-p}^{i}\,\Lambda_j^{p} &=& \sum_{j=i-p}^{i}\, +\left( w^p_j\Lambda_j^{p-1} +(1-w^p_{j+1})\Lambda_{j+1}^{p-1} \right) \\ +&=& \sum_{j=i-p+1}^{i}\,w^p_j\Lambda_j^{p-1} + + \sum_{j=i-p+1}^{i}\,(1-w^p_j)\Lambda_j^{p-1} \\ +&=& \sum_{j=i-p+1}^{i}\,\Lambda_j^{p-1} = 1. +\end{eqnarray*} + +\subsection{Derivative of Splines} +The derivative of the splines of degree $p$ can be expressed in terms of the +splines of degree $p-1$ by the following relation: +\begin{equation} + \label{derivative of splines} + \frac{d}{dx}\Lambda_i^p = + p\left( + \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i} + - \frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} + \right). +\end{equation} +A straightforward consequence of this relation is that the splines of order +$p$ are $C^{p-1}$ continuous. The demonstration of Eq.(\ref{derivative of +splines}) is done by induction. One starts with the case $p=1$: +\begin{eqnarray*} +\frac{d}{dx}\Lambda_i^1 +& = & +\frac{d}{dx}\left[ +w_i^1\Lambda_i^0 + (1-w^1_{i+1})\Lambda_{i+1}^0 +\right] +\\ +& = & +\frac{d\,w_i^1}{dx}\Lambda_i^0 + \frac{d\,(1-w^1_{i+1})}{dx}\Lambda_{i+1}^0 ++ w_i^1\frac{d\,\Lambda_i^0}{dx} + (1-w^1_{i+1})\frac{d\,\Lambda_{i+1}^0}{dx} +\\ +& = & +\frac{1}{t_{i+1}-t_i}\Lambda_i^0 - \frac{1}{t_{i+2}-t_{i+1}}\Lambda_{i+1}^0, +\end{eqnarray*} +having used Eq.(\ref{eq:recRel}) and $d\,\Lambda_i^0/dx = 0$. One then assumes +Eq.(\ref{derivative of splines}) true for $p-1$ and demonstrates that it +remains true for $p$. This is done as follows: +\begin{eqnarray} +\label{demo deriv. 1} +\frac{d}{dx}\Lambda_i^p +& = & +\frac{d}{dx}\left[ +w_i^p\Lambda_i^{p-1} + (1-w^p_{i+1})\Lambda_{i+1}^{p-1} +\right] +\\ +\nonumber +& = & +\frac{d\,w_i^p}{dx}\Lambda_i^{p-1} ++ \frac{d\,(1-w^p_{i+1})}{dx}\Lambda_{i+1}^{p-1} ++ w_i^p\frac{d\,\Lambda_i^{p-1}}{dx} ++ (1-w^p_{i+1})\frac{d\,\Lambda_{i+1}^{p-1}}{dx} +\\ +\nonumber +& = & +\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i} +-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} +\\ +\label{demo deriv. 2} +&& ++ w_i^p (p-1)\left( + \frac{\Lambda_i^{p-2}}{t_{i+p-1}-t_i} + - \frac{\Lambda_{i+1}^{p-2}}{t_{i+p}-t_{i+1}} +\right) ++ (1-w_{i+1}^p) (p-1)\left( + \frac{\Lambda_{i+1}^{p-2}}{t_{i+p}-t_{i+1}} + - \frac{\Lambda_{i+2}^{p-2}}{t_{i+p+1}-t_{i+2}} + \right) +\end{eqnarray} +having used Eq.(\ref{eq:recRel}) to obtain (\ref{demo deriv. 1}), and the +induction hypothesis to obtain Eq.(\ref{demo deriv. 2}). Now, rearranging the +last two terms of Eq.(\ref{demo deriv. 2}), one easily obtains: +\begin{eqnarray} +\nonumber +\frac{d}{dx}\Lambda_i^p +& = & +\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i} +-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} +\\ +\nonumber +&& ++(p-1)\left[ +\frac{1}{t_{i+p}-t_i} +\left( +\frac{x-t_i}{t_{i+p-1}-t_i}\Lambda_i^{p-2} ++\frac{t_{i+p}-x}{t_{i+p}-t_{i+1}}\Lambda_{i+1}^{p-2} +\right) +\right. +\\ +\nonumber +&& +\hspace{2.cm} +\left. +-\frac{1}{t_{i+p+1}-t_{i+1}} +\left( +\frac{x-t_{i+1}}{t_{i+p}-t_{i+1}}\Lambda_{i+1}^{p-2} ++\frac{t_{i+p+1}-x}{t_{i+p+1}-t_{i+2}}\Lambda_{i+2}^{p-2} +\right) +\right] +\\ +\label{demo deriv. 3} +& = & +\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i} +-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} ++ (p-1)\left( +\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i} +-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} +\right) +\\ +\nonumber +& = & +p\left( +\frac{\Lambda_i^{p-1} }{t_{i+p}-t_i} +-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} +\right) +\end{eqnarray} +having again used Eq.(\ref{eq:recRel}) to obtain (\ref{demo deriv. 3}). This +completes the demonstration of relation (\ref{derivative of splines}). + + +\subsection{Integrals of Splines} + +With the proper normalization all splines of all degrees have unitary surface: +\begin{equation} + \label{integrals of splines} + \frac{p+1}{t_{i+p+1}-t_i}\int \Lambda_i^p(x)dx = 1. +\end{equation} +This relation holds trivially for $p=0$ and $p=1$. A recursive proof of the +general statement (\ref{integrals of splines}) starts assuming +\begin{equation} + \label{previousInt} + \frac{p}{t_{i+p}-t_i}\int \Lambda_i^{p-1}(x)dx = 1 +\end{equation} +to be true. Then using Eq.(\ref{derivative of splines}) multiplied by $x$ and integrating one obtains: +\begin{equation} +\nonumber +\int x \frac{d}{dx}\Lambda_i^p dx = -\int \Lambda_i^p dx = + p\int\left( \frac{x\Lambda_i^{p-1}}{t_{i+p}-t_i} + -\frac{x\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}} + \right)dx. +\end{equation} +Completing the fractions in the big parentheses in view of using +Eq.(\ref{eq:recRel}) one has +\begin{eqnarray} +\nonumber +\int \Lambda_i^p dx + & = & + -\, p\int \frac{x-t_i}{t_{i+p}-t_i}\Lambda_i^{p-1} dx + - p\int \frac{t_i}{t_{i+p}-t_i}\Lambda_i^{p-1} dx \\ +\nonumber + & & + -\, p\int \frac{t_{i+p+1}-x}{t_{i+p+1}-t_{i+1}}\Lambda_{i+1}^{p-1} dx + + p\int \frac{t_{i+p+1}}{t_{i+p+1}-t_{i+1}}\Lambda_{i+1}^{p-1} dx, +\end{eqnarray} +where the first and the third terms on the right correspond to $-p\int\Lambda_i^p dx $, +Eq.(\ref{eq:recRel}), and can be combined with the left side to yield +\begin{equation} +\label{proofIntegrals} +(1+p)\int\Lambda_i^p dx = t_{i+p+1}-t_i, +\end{equation} +where relation (\ref{previousInt}) has been used for the rest on the right hand side. +This concludes the proof of Eq.(\ref{integrals of splines}). + + +\subsection{Boundary Conditions} +Applying the recurrence relation to generate \emph{all} the splines on +the finite domain $[t_0,t_{N_x}]$ yields the $N_x+p$ splines +of degree $p$: +\begin{equation} + \Lambda^p_{-p},\Lambda^p_{-p+1},\ldots, \Lambda^p_{N_x-1}. \label{eq:splSeq} +\end{equation} +Note that \emph{additional} knots beyond both ends of $[t_0,t_{N_x}]$ have to +be defined to generate all these splines. + +\subsubsection{Periodic splines} +The extra knots are simply defined through periodicity.: +\begin{eqnarray} + t_{-\nu} &=& t_{N_x-\nu}-(b-a), \\ + t_{N_{x}+\nu} &=& t_{\nu}+(b-a), \qquad \nu=0,\ldots,p. +\end{eqnarray} +The $p+1$ leftmost splines in (\ref{eq:splSeq}) are thus identical to the rightmost splines: +\begin{equation} +\Lambda^p_{-\nu} = \Lambda^p_{N_x-\nu}, \qquad \nu=0,\ldots,p. +\end{equation} + +\subsubsection{Non-periodic splines} +The choice made in \texttt{BSPLINES} is simply: +\begin{equation} + t_{-p} = \cdots = t_{0} = a, \qquad b=t_{N_x}=\cdots=t_{N_x+p}. +\end{equation} +Thus in the first interval $[t_0,t_1]$, the first spline $\Lambda^p_{-p}$ +is constructed (refer to the first entry on each of the column of +Fig.~\ref{fig:allSpl}, with $i=0$) as follow: +\begin{eqnarray*} + \Lambda^1_{-1} &=& (1-w^1_{0})\Lambda^0_{0} = \frac{t_{1}-x}{t_{1}-t_{0}}\Lambda^0_{0}\\ + \Lambda^2_{-2} &=& (1-w^2_{-1})\Lambda^1_{-1} =\frac{t_{1}-x}{t_{1}-t_{-1}}\Lambda^1_{-1} + =\left(\frac{t_{1}-x}{t_{1}-t_{0}}\right)^{2}\Lambda^0_{0}\\ +\cdot & & \qquad\cdot\qquad\qquad\qquad\qquad \cdot \\ + \Lambda^p_{-p} &=& (1-w^p_{-p+1})\Lambda^p_{-p+1} =\frac{t_{1}-x}{t_{1}-t_{-p+1}}\Lambda^{p-1}_{-p+1} + =\left(\frac{t_{1}-x}{t_{1}-t_{0}}\right)^{p}\Lambda^0_{0} +\end{eqnarray*} + + +In the same manner, the generation of the \emph{last} spline $\Lambda^p_{N_x-1}$ (last entry +on each of the column of Fig.~\ref{fig:allSpl}, with $i=N_{x}-1$) yields: +\begin{eqnarray*} +\Lambda^1_{N_x-1} &=& w^1_{N_x-1}\Lambda^0_{N_x-1} = + \frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\Lambda^0_{N_x-1} \\ +\Lambda^2_{N_x-1} &=& w^2_{N_x-1}\Lambda^1_{N_x-1} = + \frac{x-t_{N_x-1}}{t_{N_x+1}-t_{N_x-1}}\Lambda^1_{N_x-1} + = \left(\frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\right)^{2}\Lambda^0_{N_x-1}\\ +\cdot & & \qquad\cdot\qquad\qquad\qquad\qquad \cdot \\ +\Lambda^p_{N_x-1} &=& w^p_{N_x-1}\Lambda^p_{N_x-1} = + \frac{x-t_{N_x-1}}{t_{N_x+p-1}-t_{N_x-1}}\Lambda^1_{N_x-1} + = \left(\frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\right)^{p}\Lambda^0_{N_x-1} +\end{eqnarray*} + +Since the sum of all splines is 1 and using the positivity of splines, all the +non-periodic splines, except the first (last) spline should vanish at $x=a$ ($x=b$): +\begin{equation} + \Lambda^p_{r}(a) = \delta_{r,-p}, \qquad\Lambda^p_{r}(b) = \delta_{r,N_x-1} +\end{equation} + +The spline derivatives at the boundaries $x=a$ and $x=b$ can be +derived using Eq.(\ref{derivative of splines}) as follow. At $x=a$ +(interval $[t_0,t_1]$), by noting that only the spline +$\Lambda^{p-1}_{-p+1}$ is non-zero at $x=a$ (see next to last column +of Fig.{\ref{fig:allSpl}, with $i=0$), it is easy to see that +there are only 2 non-zero derivatives +given by +\begin{equation} + \begin{split} + \frac{d}{dx}\Lambda^p_{-p}(a) =& + -\frac{p\,\Lambda^{p-1}_{-p+1}(a)}{t_1-t_{-p}} = -\frac{p}{t_1-t_0}, \\ + \frac{d}{dx}\Lambda^p_{-p+1}(a) =& + \frac{p\,\Lambda^{p-1}_{-p+1}(a)}{t_1-t_{-p+1}} = \frac{p}{t_1-t_0}, + \end{split} +\end{equation} +where we have used $t_0=t_{-1}=\ldots=t_{-p}=a$. Likewise, the 2 +non-zero derivatives of spline at the other boundary $x=b$ are +\begin{equation} + \begin{split} + \frac{d}{dx}\Lambda^p_{N_x-2}(b) =& + -\frac{p\,\Lambda^{p-1}_{N_x-1}(b)}{t_{N_x+p-1}-t_{N_x-1}} = -\frac{p}{t_{N_x}-t_{N_x-1}}, \\ + \frac{d}{dx}\Lambda^p_{N_x-1}(b) =& + \frac{p\,\Lambda^{p-1}_{N_x-1}(b)}{t_{N_x+p-1}-t_{N_x-1}} = \frac{p}{t_{N_x}-t_{N_x-1}}, + \end{split} +\end{equation} +where we have used $t_{N_x}=t_{N_x+1}=\ldots=t_{N_x+p}=b$. + +\subsubsection{Spline expansion} +In summary, the approximation of a function $f$ defined +in the interval $[a,b]$ using a basis (\textbf{Is this obvious?) }of +splines of degree $p$ associated with the sequence of knots +$t_i, i=-p,\ldots,N_{x}+p$ +can be written as +\begin{equation} + f(x) = \sum_{j=-p}^{N_x-1}\, c_j\Lambda^p_j(x), \qquad + \begin{array}{l} + \mbox{support of $\Lambda^p_j$:}\quad [t_{j},t_{j+p+1}],\\ + t_i \leq x < t_{i+1} \Longrightarrow \Lambda^p_{i-p}(x),\ldots, + \Lambda^p_{i}(x) \ge 0. + \end{array} +\end{equation} + +Note that the \emph{last} spline in the interval $[t_i,t_{i+1}]$, which can be written as +\[ \Lambda^p_{i}(x)=w^p_i(x) \Lambda^{p-1}_{i}(x)=\ldots=w^p_i(x) w^{p-1}_i(x) \ldots +w^{1}_i(x)\Lambda^{0}_{i}(x) \] +\emph{vanishes at the knot} $x=t_i$. Thus at any position $x$, the sum +involves $p+1$ terms except at the knots $t_i$ where there are only $p$ terms. + +It is sometimes more convenient to renumber the spline index $j$ so that it +starts from $0$. With this new numbering, the spline expansion becomes +\begin{equation} + f(x) = \sum_{j=0}^{N_x+p-1}\, c_j\Lambda^p_j(x), \qquad + \begin{array}{l} + \mbox{support of $\Lambda^p_j$:}\quad [t_{j-p},t_{j+1}], \\ + t_i \leq x < t_{i+1} \Longrightarrow \Lambda^p_{i}(x),\ldots, + \Lambda^p_{i+p}(x) \ge 0. + \end{array} + \label{eq:splExp} +\end{equation} + +In the \emph{periodic} case, there are $N_{x}$ \emph{independent} +spline coefficients since +\begin{equation} + c_{N_{x}+\nu} = c_{\nu}, \qquad \nu=0,\ldots,p-1. \label{eq:perSp} +\end{equation} + +In the \emph{non-periodic} case, +the first and the last spline coefficients $c_{0},\,c_{N_x+p-1}$ are +respectively the values of $f$ at the end points $a$ and $b$. + +The basis functions for both non-periodic and periodic cubic splines +($p=3$) are shown in Fig~.\ref{fig:cubic_splines} where this new numbering is +used. + + +\begin{figure}[htbp] + \centering + \includegraphics[angle=0,width=\hsize]{driv1} + \caption{The basis of non-periodic and periodic cubic splines. The periodic + splines $\Lambda_{10}$, $\Lambda_{11}$, $\Lambda_{12}$ denote the same + splines as $\Lambda_{0}$, $\Lambda_{1}$, $\Lambda_{2}$ respectively. } + \label{fig:cubic_splines} +\end{figure} + +\subsection{Spline Initialization with \texttt{SET\_SPLINE}} +The initialization of a spline is performed by calling the routine +\texttt{SET\_SPLINE}, passing the desired degree $p$ and the +sequence of grid points (or knots) $t_j, j=0,\ldots,N_x$. If +Gauss points on each of the intervals $[t_j,t_{j+1}]$ are needed, a non-zero +value of \texttt{NGAUSS} should be specified. The other input +argument is the \emph{optional} \texttt{LOGICAL} argument \texttt{PERIOD} +to define the periodicity of the splines. By default it is \texttt{.FALSE.}. +The routine returns the 1d spline \texttt{SP} which is of type +\texttt{TYPE(spline1d)}: +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE set_spline(p, ngauss, grid, sp, period) + INTEGER, INTENT(in) :: p, ngauss + DOUBLE PRECISION, INTENT(in) :: grid(:) + LOGICAL, OPTIONAL, INTENT(in) :: period + TYPE(spline1d), INTENT(out) :: sp + LOGICAL, OPTIONAL, INTENT(in) :: period +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} + +Besides the main characteristics of the spline (degree $p$ of splines, +number of grid intervals, dimension of splines $N_x+p$, etc.) +the following quantities will be determined and stored in \texttt{SP}: +\begin{itemize} + \item values and all the $p$ derivatives of the $p+1$ non-vanishing splines + on each knots $t_j$. These quantities will be used to speed up computation + of the spline expansion (\ref{eq:splExp}). + \item integrals of splines \(I_i=\int\Lambda_i(x)\,dx\). +\end{itemize} + +For a 2d spline +\begin{equation} + \Lambda^{p+q}_{ij}(x,y) = \Lambda^p_i(x)\Lambda^q_j(y), +\end{equation} +on a 2d structured mesh defined by the grid points +\texttt{grid1(0:N1), grid2(0:N2)}, the same call as in the 1d case can be +used, except that the scalars \texttt{p, ngauss, period} become 2 element arrays and +the output \texttt{SP} is now of type \texttt{TYPE(spline2d)}: +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + INTEGER :: p(2), ngauss(2) + LOGICAL, OPTIONAL :: period(2) + DOUBLE PRECISION, dimension(:) :: grid1, grid2 + TYPE(spline2d) :: sp2d +... + CALL set_spline(p, ngauss, grid1, grid2, sp2d, period) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} + +The derived type \texttt{spline2d} is a \emph{wrapper} of 2 \texttt{spline1d} +objects which can be accessed through \texttt{sp2d\%sp1} and \texttt{sp2d\%sp2}. + +Once \texttt{SET\_SPLINE} is called, the routine \texttt{GET\_DIM} can be +called to inquire the spline's essential characteristics such as dimension, +number of intervals and degree, for both 1d and 2d splines: \par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE get_dim(sp, dim, nx, nidbas) + TYPE(spline1d), INTENT(in) :: sp + INTEGER, INTENT(out) :: dim + INTEGER, OPTIONAL, INTENT(out) :: nx, nidbas +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} + +Integral of function $\int^b_a\,f(x)dx$ is computed from its spline +\texttt{sp} and splines coefficients in: +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + DOUBLE PRECISION FUNCTION fintg(sp, c) + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: c(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} + +For a 2d functions, the same function should be called with a 2d spline +\texttt{sp} and 2d array $c$. + +Finally \texttt{DESTROY\_SP(sp)} should be called when a spline +\texttt{sp} is not needed anymore to clean up memory space. + + +\subsection{Generating Splines with \texttt{DEF\_BASFUN}} + +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE def_basfun(xp, sp, fun, left) + DOUBLE PRECISION, INTENT(in) :: xp + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(out) :: fun(:,:) + INTEGER, OPTIONAL, INTENT(out) :: left +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} + +This routine computes, for a given point $\mbox{xp}\in [t_0,t_{N_x}]$, the value +and optionally the $m$ derivatives of the $p+1$ splines \texttt{sp} which were previously +defined and returns them in \texttt{fun(1:p+1,1:m+1)} with $m\leq p$. The maximum +number of computed derivatives $m$ is determined by the size of the second dimension +of the array \texttt{fun}. The subroutine will return the optional integer +\texttt{left} defined such that: +\[ +t_{\mbox{left}} \leq xp < t_{\mbox{left+1}}, \qquad 0\leq \mbox{left} \leq N_{x.-1}. +\] + +\subsection{Example 1: Values and derivatives of all splines} +In this example, we first initialize a cubic spline with +the knot sequence $t_0,\ldots,t_{N_x}$ with \texttt{SET\_SPLINE} +and then call \texttt{DEF\_BASFUN} to compute its values, first and second +derivatives on the mesh points \texttt{xp(1:npts)}. + +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + USE BSPLINES + INTEGER, PARAMETER :: nx=10, npts=100 + DOUBLE PRECISION :: t(0:nx), xp(npts) + DOUBLE PRECISION, ALLOCATABLE :: fxp0(:,:), fxp1(:,:), fxp2(:,:) + DOUBLE PRECISION :: fun(4,3) ! 4 cubic splines at a given xp + ! plus first and second derivatives. + INTEGER :: i, dim, left + TYPE(spline1d) :: sp +! +! Define t(0:nx), xp(npts) +! + CALL set_spline(3, 0, t, sp, period=.FALSE.) + CALL get_dim(sp, dim) + ALLOCATE(fxp0(npts,0:dim-1), fxp1(npts,0:dim-1), fxp2(npts,0:dim-1) + fxp0 = 0.0 + fxp1 = 0.0 + fxp2 = 0.0 + DO i=1,npts + CALL def_basfun(xp(i), sp, fun, left=left) + fxp0(i, left:left+3) = fun(1:4, 1) ! Value + fxp1(i, left:left+3) = fun(1:4, 2) ! 1st derivative + fxp2(i, left:left+3) = fun(1:4, 3) ! 2nd derivative + END DO + DEALLOCATE(fxp0, fxp1, fxp2) + CALL destroy_sp(sp) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} + +This code fragment will store \texttt{dim=nx+3=13} splines +and theirs first 2 derivatives in \texttt{fxp0}, \texttt{fxp1} and +\texttt{fxp2}. Change the \texttt{period} to \texttt{.TRUE.} to obtain +\emph{periodic} splines. + +\section{Spline Interpolation} +Given the interval $[a,b]$ discretized into $\{x_k,\,k=0,\ldots,N_g\}$ with +$x_0=a$ and $x_{N_g}=b$, the problem of interpolating $f(x), x\in [a,b]$ with +splines of degree $p$ is to solve the following equations +for the spline coefficients $c_i$: +\begin{equation} + \sum_{i=0}^{N_x+p-1}\,c_i\Lambda^p_{i}(x_k) = f(x_k),\quad k=0,\ldots,N_g. + \label{eq:intEq} +\end{equation} + +The sequence of knots $t_0,\ldots,t_{N_x}$ defines completely the splines +$\Lambda^p_i$ and its choice will be described in the following section. + +\subsection{Choice of knots} +If Eqs.~(\ref{eq:intEq}) are the only conditions for our interpolation +problem, the number of equations should match the number +of unknowns $c_i$. The number of knot intervals $N_x$ hence has to verify +\begin{equation} +N_x = N_g-p+1.\label{eq:knotNum} +\end{equation} + +For the \emph{periodic} case, taking into account the $p$ periodic spline conditions +(\ref{eq:perSp}) on +$c_i$ and $f(a)=f(b)$, this condition reduces to: +\begin{equation} +N_x = N_g. +\end{equation} + +For \emph{odd} values of the spline degree $p$, the knots $t_i$ could be placed on +the \emph{interpolation sites} $x_k$ while when $p$ is even, $t_i$ should not +be on $x_k$ to avoid a \emph{badly conditioned} linear system when solving +Eq.~(\ref{eq:intEq}). This leads to the following choice for $t_i$ in +\texttt{BSPLINES}: + +\subsubsection{Periodic splines} +The number of knots $N_x+1$ is \emph{equal} to the number of interpolation points +$N_g+1$ with +\begin{equation} + t_i = + \begin{cases} + x_i & \text{$p$ odd} \\ + (x_{i-1}+x_i)/2 & \text{$p$ even} + \end{cases} + ,\qquad i=0,\ldots,N_x +\end{equation} + +\subsubsection{Non-Periodic splines} +In order to satisfy the equality (\ref{eq:knotNum}), first, the 2 end points +are retained as knots: +\begin{equation} +t_0=x_0, \qquad t_{N_x} = x_{N_g}. +\end{equation} + +For even $p$, the first $p/2$ interpolation intervals are \emph{skipped}: +\begin{equation} +t_i = (x_{i+p/2-1} + x_{i+p/2})/2, \qquad i=1,\ldots,N_x-1, \label{eq:evenKnots} +\end{equation} + +while for odd $p$, $(p-1)/2$ interpolation points are \emph{skipped}: +\begin{equation} + t_i = x_{i+(p-1)/2 }, \qquad i=1,\ldots,N_x-1. \label{eq:oddKnots} +\end{equation} + +Instead of skipping grid points, an alternative would be to supplement the +system of equations (\ref{eq:intEq}) with conditions on derivatives of $f(x)$ +at one or both ends of $[a,b]$. This type of boundary conditions is not +implemented in the present version of the \texttt{BSPLINES} module. + +\subsection{The collocation matrix} +The \emph{collocation matrix} $\Lambda^p_i(x_k)$ of the interpolation problem +(\ref{eq:intEq}) is a square matrix. Each row has at most $p+1$ non-zero +terms. Let us consider separately the non-periodic and the periodic cases. + +\subsubsection{The non-periodic case} +\paragraph{Even spline degree} + +From (\ref{eq:evenKnots}), there are $p/2+1$ interpolation points \(x_{0}, \ldots, x_{p/2}\) +in the first knot interval $[t_0,t_1)$. Since there are at most $p+1$ non-zero +splines for any points in each interval (except for $x_0$ where $\Lambda_i(x_0)=\delta_{i,0}$, +the collocation matrix starts as: + +\begin{equation} + \left(\begin{array}{llllll} + \Lambda_0(x_{0}) & 0 &\cdots & \cdots & \cdots &\cdots \\ + \Lambda_0(x_{1}) &\Lambda_1(x_{1}) &\cdots &\Lambda_p(x_{1}) & 0 &\cdots \\ + \vdots &\vdots &\cdots &\vdots & 0 &\cdots \\ + \Lambda_0(x_{p/2})&\Lambda_1(x_{p/2}) &\cdots &\Lambda_p(x_{p/2}) & 0 &\cdots \\ + 0 &\Lambda_1(x_{p/2+1})&\cdots &\Lambda_p(x_{p/2+1})& \Lambda_{p+1}(x_{p/2+1})&0 \\ + 0 &\ddots &\ddots &\ddots & \ddots &\ddots + \end{array}\right) +\end{equation} + +The number of \emph{upper-diagonals} (non including the diagonal) is +obviously determined by the second row of the matrix above, which yields $p-1$. +Since the knot placement is identical for both ends of the interpolation mesh, +the matrix +$\Lambda_i(x_k)$ is \emph{banded} with half-bandwidths +\begin{equation} +kl=ku=p-1 +\end{equation} + +\paragraph{Odd spline degree} +Applying the same procedure, it is straightforward to show for $p$ odd and +from (\ref{eq:oddKnots}), that $x_0,\ldots.x_{(p-1)/2}$ are located in the +first knot interval $[t_0,t_1)$ and that the matrix has again the same +half-bandwidths as in the even $p$ case. + +The resulting interpolation problem can then be solved with +the usual \emph{banded matrix factorization} followed by a \emph{back-solve} +phase. + +\subsubsection{The periodic case} +Let consider the matrix for $p=3$ and $N_x=10$ (see lower figure of +Fig.~(\ref{fig:cubic_splines}): +\begin{equation} + \left(\begin{array}{llllll} + \Lambda_0(x_{0}) & \Lambda_1(x_{0}) & \Lambda_2(x_{0}) & 0 & \cdots & \\ + 0 & \Lambda_1(x_{1}) & \Lambda_2(x_{1}) & \Lambda_3(x_{1}) & 0 &\cdots \\ + \vdots & \ddots & \ddots & \ddots & \ddots &\ddots \\ + 0 & \cdots & 0 & \Lambda_7(x_{7}) & \Lambda_8(x_{7}) & \Lambda_9(x_{7}) \\ + \Lambda_0(x_{8}) & 0 & 0 & \cdots &\Lambda_8(x_{8}) & \Lambda_9(x_{8}) \\ + \Lambda_0(x_{9}) &\Lambda_1(x_{9}) & 0 & 0 & \cdots & \Lambda_9(x_{9}) + \end{array}\right) \label{eq:perMat} +\end{equation} +The matrix is ``almost triangular'' (except for the last 2 rows) and is not +\emph{diagonally dominant}! A more satisfactory (and symmetric in shape) matrix is +however obtained by simply renumbering the splines such that the sequence +starts with $-\lfloor p/2 \rfloor$ instead of $0$. This renumbered splines +are shown in Fig.~\ref{fig:fitSpl} for the cubic +and quadratic periodic splines. With this renumbering, the matrix +(\ref{eq:perMat}) has a more symmetric shape and is diagonally dominant: +\begin{equation} + \left(\begin{array}{lllll} + \Lambda_0(x_{0}) & \Lambda_1(x_{0}) & 0 & \cdots & \Lambda_9(x_{0}) \\ + \Lambda_0(x_{1}) & \Lambda_1(x_{1}) & \Lambda_2(x_{1}) & 0 &\cdots \\ + \vdots & \ddots & \ddots & \ddots &\ddots \\ + 0 & \cdots & \Lambda_7(x_{8}) & \Lambda_8(x_{8}) & \Lambda_9(x_{8}) \\ + \Lambda_0(x_{9}) & 0 & 0 & \Lambda_8(x_{9}) & \Lambda_9(x_{9}) + \end{array}\right) \label{eq:perMatnew} +\end{equation} + +In general, for arbitrary $p$ (even and odd values), the collocation matrix +$A=\Lambda_j(x_i)$ can be written as + +\begin{equation} + A = B + UV^T +\end{equation} + +where $B$ is a banded matrix with half-bandwidths $kl=ku=b=\lfloor p/2\rfloor$ and +rank $N_x$. $U$ and $V$ are $N_x\times 2b$ sparse matrices: + +\begin{equation} + U = \left( + \begin{matrix} + I & 0 \\ + 0 & 0 \\ + 0 & I + \end{matrix}\right), \qquad + V = \left( + \begin{matrix} + 0 & D^T \\ + 0 & 0 \\ + C^T & 0 + \end{matrix}\right), \qquad + V^T = \left( + \begin{matrix} + 0 & 0 & C \\ + D & 0 & 0 + \end{matrix}\right), \qquad +\end{equation} +where $C$, $D$ are the $b\times b$ \emph{off-band} sub-matrices and $I$, the +identity matrix. In the cubic spline example considered above, the +\emph{off-band} matrices are simply $1\times 1$ matrices with +$C=\Lambda_9(x_0)$ and $D=\Lambda_0(x_9)$. + +The inverse of $A$ can be deduced from the \emph{Sherman-Morrison-Woodbury formula} \cite{Golub}: + +\begin{eqnarray*} + A^{-1} &=& B^{-1} - B^{-1}U(1+V^{T}B^{-1}U)^{-1}V^{T}B^{-1} \\ + &=& B^{-1} - ZW^{T}B^{-1}, +\end{eqnarray*} +where +\begin{eqnarray*} + Z &=& B^{-1}U, \\ + H &=& 1+V^{T}B^{-1}U \\ + W^T &=& H^{-1}V^{T}. +\end{eqnarray*} + +The solution of the interpolation problem $Ax=f$ can then be reduced to a +\emph{factorization} and a \emph{back-solve} phase: + +\begin{enumerate} +\item Factorization + \begin{enumerate} + \item Factor: \( B \longleftarrow L_BU_B \) + \item Solve: \( (L_BU_B)Z = U, \quad U\longleftarrow Z \) + \item Compute: \( H = 1+V^{T}Z \) + \item Factor: \( H=L_HU_H \) + \item Solve: \( (L_HU_H)W^{T} = V^{T}, \quad V^{T}\longleftarrow W^{T} \) + \end{enumerate} +\item Back-solve + \begin{enumerate} + \item Solve: \( (L_BU_B)y = f \) + \item Compute: \( t = W^{T}y \) + \item Compute: \( x = y - Zt \) + \end{enumerate} +\end{enumerate} + +At the end of the factorization, only the (updated) matrices $B$, $U$ and +$V^{T}$, required in the back-solve phase, need to be saved. +Note that we avoid to store the product +$ZW^T$ because it is a \emph{big} $N_x\times N_x$ matrix. + +After the \emph{back-solve} step, the solution $x$ is \emph{shifted back} (by +$\lfloor p/2\rfloor$) and the appropriate periodicity condition is applied to +obtain the spline coefficients $c_j,\, j=0,\ldots,N_x+p-1$, as defined in +(\ref{eq:splExp}). + +\begin{figure}[htbp] + \centering + \includegraphics[angle=0,width=\hsize]{fit} + \caption{The periodic cubic and quadratic splines used + for interpolation. The spline knots are indicated by \emph{blue full circles} and + the interpolation points, by \emph{dashed vertical lines} } + \label{fig:fitSpl} +\end{figure} + +\subsection{\texttt{PP} representation} +The computation of $f(x)$ using directly the spline expansion Eq.~(\ref{eq:splExp}) can +be costly, because of the evaluation of the splines $\Lambda^p_j(x)$, +especially when interpolating on large number of points. Expanding $f(x)$, +using truncated Taylor series in each interval $[t_\mu,t_{\mu+1}]$, we obtain +the following \emph{Piecewise Polynomial Function} representation (or +\texttt{ppform}) of $f(x)$: +\begin{equation} + f(x) = \sum^p_{k=0}\, \Pi_{k\mu}(x-t_\mu)^k, \quad t_\mu\leq x. +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +\documentclass[a4paper]{article} +\usepackage{amsmath} +\title{\tt Some Notes on Boundary Conditions} +\author{Trach-Minh Tran} +\date{March 2012} + +\begin{document} +\maketitle + +\section{Neumann BC as an {essential} BC} +The original equation: +\begin{equation} + \mathbf{A \cdot u} = \mathbf{b} +\end{equation} +with the Neumann BC (1D case): +\begin{equation} + \alpha u_1 + \beta u_2 = c. +\end{equation} +From Eq.(20) of \cite{BSPLINES}: +\begin{equation} + \beta = -\alpha =\frac{p}{\Delta_1} +\end{equation} +where $p$ is the degree of spline and $\Delta_1$ is the lenght of the +first insterval. + +Transformation $(u_1, \ldots, u_n) \Rightarrow (\hat u_1, \ldots, \hat +u_n)$ defined by +\begin{equation} + \begin{array}{ccc} + \alpha u_1 + \beta u_2 = \hat u_1 & & u_1 = \frac{1}{\alpha}\hat u_1 - \frac{\beta}{\alpha}\hat u_2 \\ + u_2 = \hat u_2 & & u_2 = \hat u_2 \\ + \vdots & \Longrightarrow & \vdots \\ + u_N = \hat u_N & & u_N = \hat u_N. + \end{array} +\end{equation} +The original Neumann BC becomes now a \emph{inhomogeneous Dirichlet} +BC on $\mathbf{\hat u}$: +\begin{equation} + \hat u_1 = c. +\end{equation} +The transformed linear system can be written as: +\begin{equation} + \mathbf{(U^T\cdot A \cdot U)\cdot \hat u} = \mathbf{U^T\cdot b}, +\end{equation} +where $\mathbf{U}$ is given by + +\begin{equation} + \mathbf{U} = + \left(\begin{matrix} + \frac{1}{\alpha} & -\frac{\beta}{\alpha} & \dots & 0 \\ + 0 & 1 & \dots & 0 \\ + & & \ddots& \vdots \\ + 0 & 0 & \dots & 1 + \end{matrix}\right) +\end{equation} + +Thus, all the symmetry, hermiticity or positivity properties of the +original matrix are preserved with this matrix transformation! + +\section{Neumann BC as a \emph{natural} BC} +Multiplying the 1D Sturm-Liouville equation (see section 1.1.1 of +\cite{SOLVERS}) by spline $\Lambda_j(x)$ and integrating by parts, we +obtain the following boundary terms: + +\begin{equation} +-\Lambda_j(L) C_1(L) \phi'(L) + \Lambda_j(0) C_1(0) \phi'(0) +\end{equation} +To impose $\phi'(0) = a$ and noting that $\Lambda_j(0)=\delta_{j1}$, +you only need to add $[-aC_1(0)]$ to the first element of the +RHS. Likewise, for the BC $\phi'(L) = b$ you only need to add +$[bC_1(L)]$ to the last element of the RHS. No matrix manipulation (as +for the \emph{essential} BC) is required! Notice that if $a$ or $b$ is +zero, nothing needs to be done to impose these BC. That's the reason why +it is called \emph{natural} BC! + +A subtle point to be noted here is that using \emph{natural} BC, +$\phi'(0)$ \emph{is not} exaclty equal to $a$, althought it should +converge to $a$ as $(\Delta x)^p$ where $p$ is the spline degree, +while using the \emph{essential} BC, $\phi'(0)=a$ is \emph{exact}! + +\section{Diffusion Equation using second order time implicit method} +Let rewrite Eq.(74) of your notes in vector form and replace the +unkowns $n$ by $f$: +\begin{equation} + \mathbf{B} \frac{d \mathbf{f}}{dt} = \mathbf{M\cdot f}. +\end{equation} +Using a \emph{second order time centered} discretization, + +\begin{equation} + \begin{split} + \mathbf{B} \left(\frac{\mathbf{f}^{n+1}-\mathbf{f}^{n}}{\Delta t}\right) &= + \mathbf{M} \left(\frac{\mathbf{f}^{n+1}+\mathbf{f}^{n}}{2}\right) \\ +\Rightarrow & + \left(\mathbf{B} -\frac{\Delta t}{2} + \mathbf{M}\right)\mathbf{f}^{n+1} = + \left(\mathbf{B} +\frac{\Delta t}{2} + \mathbf{M}\right)\mathbf{f}^{n} + \end{split} +\end{equation} +\emph{Essential} BC has to be imposed on the matrix +\begin{equation} + \mathbf{B} -\frac{\Delta t}{2} \mathbf{M} +\end{equation} +while \emph{natural} BC is introduced while deriving the weak form +leading to the matrix $M$. +This method is \emph{unconditionnaly stable} and second order in +time. When linear splines are used for the space discretization, this +scheme is similar to the well-known \emph{Cranck-Nicolson} (see for +example Wikipedia) discretization for parabolic PDE. + +\begin{thebibliography}{99} +\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide. +\bibitem{SOLVERS} {\tt The SOLVERS in BSPLINES} Reference Guide. +\end{thebibliography} + +\end{document} diff --git a/docs/manual/solvers.pdf b/docs/manual/solvers.pdf new file mode 100644 index 0000000..72a28d1 Binary files /dev/null and b/docs/manual/solvers.pdf differ diff --git a/docs/manual/solvers.tex b/docs/manual/solvers.tex new file mode 100644 index 0000000..8abdf13 --- /dev/null +++ b/docs/manual/solvers.tex @@ -0,0 +1,2818 @@ +% +% @file solvers.tex +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +\documentclass[a4paper]{article} +\usepackage{linuxdoc-sgml} +\usepackage{graphicx} +\usepackage{hyperref} +\usepackage{amsmath} +%\usepackage{verbatim} +%\usepackage[notref]{showkeys} + +\title{\tt The Solvers in BSPLINES} +\author{Trach-Minh Tran} +\date{v0.6, December 2011} +\abstract{Implementation of a common simple interface to popular +solver packages (LAPACK, PARDISO, WSMP, PETSc, etc.). The main goal is to +provide an easy access to these packages in order to solve elliptic and +parabolic as well as some types of integro-differential +equations.} + +\begin{document} +\maketitle +\tableofcontents + +\section{Matrix form of the problem} +\subsection{Getting started} +\subsubsection{A one-dimensional problem} + +Let us start with the one-dimensional Sturm-Liouville differential +equation: + +\begin{equation*} + -\frac{d}{dx} \left[C_1(x) \frac{d\phi}{dx}\right] + C_2(x)\phi = \rho, +\end{equation*} +on the domain $0\leq x \leq L$ with suitable boundary conditions. +On a grid with $N$ intervals, the discretized solution $\phi$, using +the splines $\Lambda_i(x)$ of order $p$ can be written as +\begin{equation} +\label{sol1d} + \phi(x) = \sum_{i=0}^{d-1} \phi_i\Lambda_i(x), +\end{equation} +where \cite{BSPLINES} +\begin{equation*} + d = + \begin{cases} + N & \text{if $\phi$ is periodic}, \\ + N+p & \text{otherwise}, + \end{cases} +\end{equation*} +and $\phi_i$ are the unknowns of the following matrix equation: +\begin{equation} +\label{matEq1d} + \sum_{i'=0}^{d-1} A_{ii'}\phi_{i'} = \rho_i, \qquad i=0,\ldots, d-1. +\end{equation} +Here the matrix $A_{ii'}$ and the right-hand-side $\rho_i$ are +respectively given by: +\begin{equation} + \begin{split} + \label{matCoef1d} + A_{ii'} =& \int_{0}^{L}\!dx\,C_1\Lambda_{i}^{'}\Lambda_{i'}^{'} + + \int_{0}^{L}\!dx\,C_2\Lambda_{i}\Lambda_{i'}, \\ + \rho_i =& \int_{0}^{L}dx \rho\Lambda_i.\\ + \end{split} +\end{equation} +For more general differential operators, the matrix coefficients $A_{ii'}$ can be +written as a sum of contributing matrices of the form +\begin{equation} + \label{mat1d} + A_{ii'} = \int_{0}^{L}\!dx\,C\Lambda_{i}^{\alpha}\Lambda_{i'}^{\alpha'}, +\end{equation} +where $\Lambda_{i}^{\alpha}$ denotes the $\alpha^\text{th}$ +derivative of $\Lambda_{i}$. +As the splines $\Lambda_i$ have a support of $p+1$ intervals, the +matrix is sparse and its +coefficients $A_{ii'}$ are non-zero only for $|i-i'| \leq p$: hence the +matrix has a band structure of bandwidth equal to $2p+1$ +if the operator is purely differential. For an +integral equation such as +\begin{equation*} + \int_{0}^{L}\!dx' K(x,x')\phi(x') = \rho(x), +\end{equation*} +the discretization results in a \emph{dense} matrix of the form +\begin{equation} + \label{matIntg1d} + A_{ii'} = \int_{0}^{L}\!dx \Lambda_{i}(x) \int_{0}^{L}\!dx' + K(x,x')\Lambda_{i'}(x'). +\end{equation} +Note that when the kernel is separable $K(x,x') = U(x)V(x')$, the +matrix $A_{ii'}$ is a \emph{dyadic}: +\begin{equation} + A_{ii'} = \int_{0}^{L}\!dx U(x) \Lambda_{i}(x) \int_{0}^{L}\!dx + V(x)\Lambda_{i'}(x) = U_iV_{i'}. +\end{equation} + +\subsubsection{Periodic boundary conditions} +The splines $\Lambda_i$ are $N$-periodic ($\Lambda_{i+N}(x)=\Lambda_i(x-L)$). This +property can be easily enforced while constructing both $\rho_i$ +and the matrix $A_{ii'}$. This results in a solution +$\phi_i$ which is also $N$-periodic. + +\subsubsection{Non-periodic boundary conditions} +In {\tt BSPLINES} \cite{BSPLINES}, the constructed non-periodic splines are such that at the +boundaries $x=0$ and $x=L$: +\begin{equation} + \Lambda_i(0) = \delta_{i,0}, \qquad \Lambda_i(L) = \delta_{i,N+p-1}, +\end{equation} +which imply that, using (\ref{sol1d}) +\begin{equation} + \phi(0) = \phi_0, \qquad \phi(L) = \phi_{N+p-1}. +\end{equation} +It is thus possible to impose the Dirichlet boundary conditions +by a simple modification of the matrix $A_{ii'}$ as shown in +Appendix~\ref{DirichletCond}. + + +\subsection{Problems in more dimensions} +\subsubsection{Two-dimensional equations} +The results obtained above can be extended in a +straightforward manner. Assuming, for example a +\emph{polar like} $(r,\theta)$ coordinate system, +with the discretized solution and the right-hand side +written as: +\begin{equation} +\label{discreteEq2d} + \begin{split} + \phi(r,\theta) &= \sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1} + \phi_{ij}\Lambda_i(r) \Lambda_j(\theta) \\ + \rho_{ij} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta J(r,\theta) \rho(r,\theta) + \Lambda_i(r) \Lambda_j(\theta),\\ + \end{split} +\end{equation} +where $J(r,\theta)$ is the Jacobian, the matrix equation to solve is +\begin{equation} + \sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1} + A_{iji'j'}\phi_{i'j'} = \rho_{ij}, +\end{equation} +with the matrix $A_{iji'j'}$ expressed as a sum of matrices of the +form: +\begin{equation} + \label{mat2d} + A_{iji'j'} = \int_{0}^{R}\!dr \int_{0}^{2\pi}\!d\theta\, + C(r,\theta)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\, + \Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta). +\end{equation} + +\subsubsection{Three-dimensional equations} +Likewise, for the three-dimension case, assuming for example a +\emph{toroidal like} $(r,\theta, \varphi)$ coordinate system, +with the discretized solution and the right-hand side +written as: +\begin{equation} +\label{discreteEq3d} + \begin{split} + \phi(r,\theta, \varphi) &= + \sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1} + \sum_{k=0}^{N_\varphi-1} + \phi_{ijk}\Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi) \\ + \rho_{ijk} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta + \int_{0}^{2\pi} \!d\varphi J(r,\theta,\varphi) \rho(r,\theta,\varphi) + \Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi),\\ + \end{split} +\end{equation} +where $J(r,\theta,\varphi)$ is the Jacobian, the matrix equation to solve is +\begin{equation} + \sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}\sum_{k'=0}^{N_\varphi-1} + A_{ijki'j'k'}\phi_{i'j'k'} = \rho_{ijk}, +\end{equation} +with the matrix $A_{ijki'j'k'}$ expressed as a sum of matrices of the +form: +\begin{equation} + \label{mat3d} + A_{ijki'j'k'} = \int_{0}^{R}\!dr + \int_{0}^{2\pi}\!d\theta\ \int_{0}^{2\pi}\!d\varphi\, + C(r,\theta,\varphi)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\, + \Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta) + \Lambda_{k}^{\gamma}(\varphi)\Lambda_{k'}^{\gamma'}(\varphi). +\end{equation} + +\subsubsection{Unicity condition} +In the case of the polar coordinates $(r,\theta)$ considered above, +the unicity condition on the axis $r=0$ should be imposed. It can be +enforced by modifications of the matrix $A$ as described in +Appendix~\ref{unicityCond}. + +\subsubsection{One-dimensional numbering} +For two-dimensional (three-dimensional) problems, the solution +$\phi_{ij}$ ($\phi_{ijk}$) as well as the right-hand-side $\rho_{ij}$ +($\rho_{ijk}$) can be conveniently casted into one-dimensional +arrays. As an example, by numbering first the last index, we obtain the +following mappings: +\begin{equation} +\label{map1d} + \mu = + \begin{cases} + j + iN_\theta & \text{two-dimensional case} \\ + k + (j + iN_\theta)N_\varphi & \text{three-dimensional case} \\ + \end{cases} +\end{equation} +Using such a one-dimensional numbering, the matrix equation for the two and +three dimensional cases takes a more conventional form: +\begin{equation} + \sum_{\mu'=0}^{r-1} A_{\mu\mu'} \phi_{\mu'} = \rho_\mu, +\end{equation} +with the respective matrix ranks $r=(N_r+p_r)N_\theta$ and +$r=(N_r+p_r)N_\theta N_\varphi$. For a pure differential operator, the +matrix $A_{\mu\mu'}$ has a band structure of bandwidth +$b=2(p_r+1)N_\theta-1$ and $b=2(p_r+1)N_\theta N_\varphi-1$ respectively. It is +important to note that, except for the one-dimensional problem, there +are \emph{many} zeros inside the matrix band! + +\section{The module {\tt MATRIX}} +\subsection{Interface} + +The Fortran module {\tt MATRIX} contains easy-to-use routines to solve the +matrix equation formulated in the previous section, using the direct +solvers of LAPACK. The different matrix storage formats are defined, +using the Fortran derived datatypes. The different types in the +present version are listed in Table~\ref{matTypes}. + +\begin{table}[h] + \centering + \begin{tabular}{|l|l|}\hline + {\tt gemat} & General dense matrix \\ + {\tt gbmat} & General band matrix \\ + {\tt pbmat} & Symmetric positive-definite band matrix \\ + {\tt periodic\_mat} & Matrix obtained for example in \\ + & one-dimensional periodic problems\\ + \hline + \end{tabular} + \caption{The matrix types} + \label{matTypes} +\end{table} + +These types define {\tt DOUBLE PRECISION} real matrices. +{\tt DOUBLE COMPLEX} matrices are declared by prefixing +these types with the letter ``{\tt z}'', for example {\tt zgbmat}. +Note that {\tt zpbmat} defines a \emph{hermitian} positive-definite +complex matrix. +The \emph{generic} routines are defined for each of these types in Table~\ref{matRoutines}. +Note that the routine {\tt updtmat} is mainly used for the matrix assembly + while {\tt getxxx} and {\tt putxxx} are rather used to modify the + matrix, for example to impose boundary conditions. + +\begin{table}[h] +\centering +\begin{tabular}{|l|l|} + \hline + {\tt init} & Initializes the data structure \\ + {\tt destroy} & Free the data structure memory \\ + \hline + {\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\ + {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$, + row $i$, column $j$ \\ + {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$, + row $i$, column $j$ \\ + {\tt vmx} & Returns the matrix-vector product \\ + {\tt mcopy} & Copy a matrix to another matrix \\ + {\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\ + {\tt determinant} & Returns the matrix determinant \\ + \hline + {\tt factor} & Computes the LU (Cholesky for symmetric/hermitian \\ + & matrix) factorization \\ + {\tt bsolve} & Solves the linear system using the factorized matrix \\ + \hline +\end{tabular} + \caption{The generic routines in the {\tt MATRIX} module} + \label{matRoutines} +\end{table} +The complete description of each routine is given in +Appendix~\ref{matRef}. More information on how to use it can be obtained by +{\tt greping} its name on the examples found in the {\tt examples/} directory. + +\subsection{A two-dimensional example} +\label{twodEx} +Let's consider the Poisson equation using the cylindrical coordinates +$(r,\theta)$: +\begin{equation} + -\frac{1}{r}\frac{\partial}{\partial r} + \left[r\frac{\partial\phi}{\partial r}\right] + -\frac{1}{r^2}\frac{\partial^2\phi}{\partial\theta^2} = \rho, + \qquad \phi(r=1,\theta) = 0. +\end{equation} + +Assuming the exact solution +\begin{equation*} + \phi(r,\theta) = (1-r^2)r^m\cos m\theta, +\end{equation*} +the right-hand-side becomes +\begin{equation*} + \rho=4(m+1)r^{m}\cos m\theta. +\end{equation*} +The matrix and the right hand-side of the discretized problem are computed as + +\begin{equation} + \begin{split} + A_{iji'j'} &= \int_{0}^1\!dr \int_{0}^{2\pi}\!d\theta\,\left[ + r\,\Lambda'_{i}(r)\Lambda'_{i'}(r)\, + \Lambda_{j}(\theta)\Lambda_{j'}(\theta) + + \frac{1}{r}\,\Lambda_{i}(r)\Lambda_{i'}(r)\, + \Lambda'_{j}(\theta)\Lambda'_{j'}(\theta) \right] \\ + \rho_{ij} &= \int_{0}^1\!dr \int_{0}^{2\pi} \!d\theta\,\,r \rho(r,\theta) + \,\,\Lambda_i(r) \Lambda_j(\theta). + \end{split} +\end{equation} +In the example {\tt pde2d.f90} this problem is treated in detail. +In the following, only the calls to the {\tt MATRIX} routines +are reviewed to show how the matrix problem is solved using the {\tt + MATRIX} module. +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} +! +! Declare a General Band matrix +! + USE matrix + USE conmat_mod + TYPE(gbmat) :: mat +! +! Rank and bandwidth. nidbas(1) is the spline order in +! the first dimension r. +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals + ku = kl ! Number of super-diagnonals + nterms = 2 ! Number of terms in the weak form +! +! Initialize the matrix data structure +! + CALL init(kl, ku, nrank, nterms, mat) +! +! Construct the matrix, using 2D spline splxy +! and impose boundary conditions +! + CALL conmat(splxy, mat, coefeq_poisson) + CALL ibcmat(mat, ny) +! +! Compute the RHS, using the 2D spline splxy +! and impose boundary conditions +! + CALL disrhs(mbess, splxy, rhs) + CALL ibcrhs(rhs, ny) +! +! Factor the matrix and solve +! + CALL factor(mat) + CALL bsolve(mat, rhs, sol) +! +... +CONTAINS + SUBROUTINE coefeq_poisson(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! + c(1) = x + idt(1,1) = 1; idt(1,2) = 0 + idw(1,1) = 1; idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0; idt(2,2) = 1 + idw(2,1) = 0; idw(2,2) = 1 + END SUBROUTINE coefeq_poisson +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} + +Some explanations and remarks: +\begin{itemize} +\item The matrix construction is performed by {\tt conmat} which will + be described later in section \ref{secCONMAT}. The + \emph{weak form} is defined in the \emph{internal} + procedure {\tt coefeq\_poisson} and passed as an argument to + {\tt conmat}. See section \ref{secCONMAT} for a detailed + description of the variables {\tt c, idt, idw} returned by + {\tt coefeq\_poisson}. +\item Boundary conditions are imposed by modifications + of the matrix in subroutine {\tt ibcmat} (see file {\tt the + ibcmat.f90}), using the {\tt MATRIX} + routines {\tt getrow, putrow, getcol, putcol}. +\item The construction of the right-hand-side in {\tt disrhs} (see the + file {\tt disrhs.f90}) is computed using a Gauss quadrature.. +\item Using the {\tt pbmat} type instead of {\tt gbmat} (the matrix in + this example is symmetric and positive-definite!) requires only a few + modifications of the program (see the complete example {\tt + pde2d\_pb.f90}): + \begin{itemize} + \item Change the type {\tt gbmat} to {\tt pbmat} in all matrix declarations + \item Change the list of arguments in the routine {\tt init} to + ({\tt ku, nrank, nterms, mat}) + \item Small changes in the boundary conditions ({\tt ibcmat} and {\tt + ibcrhs}) to take into account the symmetry. + \end{itemize} +\item The module {\tt MATRIX} can be used independently of {\tt + BSPLINES} (which is used here only to compute the matrix and + right-hand-side), for example in a problem discretized using + Finite Differences. +\end{itemize} + +\section{Sparse matrix storage} +Using the \emph{band matrix format} for a pure differential operator +requires to store a full bandwidth $b=2(p_r+1)N_\theta-1$ for the +two-dimensional problem as shown in section 1, while there are only +$(2p_r+1)^2$ non-zero elements per matrix row. In three-dimensional +problem, it is much worse since $b=2(p_r+1)N_\theta N\varphi-1$ for +$(2p_r+1)^3$ non-zero elements. + +In order to reduce the matrix storage, a solution consists of just +storing the matrix non-zero elements and use the \emph{Sparse Direct +Solvers}. With an optimal \emph{renumbering} strategy (or +\emph{fill-in reducing ordering}), the +size of the factored matrix can be expected to be smaller than the +corresponding band matrix. + +An alternative is to use \emph{iterative} methods which usually need +less memory. + +Such a sparse matrix is implemented in the {\tt SPARSE} module where +each matrix row is represented by a \emph{\tt linked} list of elements +with sorted column index. The data structure of this sparse matrix is +wrapped up in a the Fortran data type {\tt spmat} for a real matrix and +{\tt zspmat} for a complex matrix. Most of the generic routines which +are already defined in the {\tt MATRIX} module are overloaded for these +matrix types. They are listed in Table~\ref{spmatRoutines}. The complete +documentation of these routines can be found in Appendix~\ref{spmatRef}. + +\begin{table}[h] +\centering +\begin{tabular}{|l|l|} + \hline + \emph{Matrix types} & {\tt spmat, zspmat} \\ + \hline\hline + {\tt init} & Initializes the data structure \\ + {\tt destroy} & Free the data structure memory \\ + \hline + {\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\ + {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$, + row $i$, column $j$ \\ + {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$, + row $i$, column $j$ \\ + \hline + {\tt get\_count} & Get the number of non-zero elements in matrix \\ + \hline +\end{tabular} + \caption{The generic routines in the {\tt SPARSE} module} + \label{spmatRoutines} +\end{table} + +It should be noted that this module is \emph{not} used +directly in solver problems. One usually uses instead modules which +are specific to a type of (direct or iterative) solver. As will be +shown in the next section, it is the routines in this solver module which +directly calls the routines defined in the {\tt SPARSE} module during the matrix +assembly. + +\section{Solvers using the module {\tt SPARSE}} +All the solvers discussed in this section use initially the module +{\tt SPARSE} to construct the sparse matrix. Once this construction +procedure is complete, this matrix is converted to the (usually more +efficient) format used by the solver. +In a time-dependent simulation where the problem matrix +changes but not the sparsity pattern, the subsequent matrix assembly +will be performed directly on this solver's format. + +Thus for example, the first time {\tt updtmat} is called +on a new matrix, it is the version from {\tt SPARSE}. Next, if +{\tt updtmat} is called again to modify the matrix, it will be the +solver's version, unless the matrix is re-initialized by a call to +{\tt destroy} followed by {\tt init}. This switch is completely +transparent for the user as shown through an example in the next +section. + +\subsection{The PARDISO direct solver} +The interface to PARDISO~\cite{PARDISO} is implemented in the +module {\tt PARDISO\_BSPLINES}. +The matrix type (symmetric, hermitian, positive-definite) is set +by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the +generic routine {\tt init}. All the other generic routines defined in +the module {MATRIX}, plus routines specific to the sparse solver, +are listed in Table~\ref{pardisoRoutines}. The complete documentation +of these routines is given in Appendix~\ref{pardisoRef}. + + +\begin{table}[h] +\centering +\begin{tabular}{|l|l|} + \hline + \emph{Matrix types} & {\tt pardiso\_mat, zpardiso\_mat} \\ + \hline\hline + {\tt init} & Initializes the data structure \\ + {\tt destroy} & Free the data structure memory \\ + \hline + {\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\ + {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$, + row $i$, column $j$ \\ + {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$, + row $i$, column $j$ \\ + {\tt vmx} & Returns the matrix-vector product \\ + {\tt mcopy} & Copy a matrix to another matrix \\ + {\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\ + {\tt clear\_mat}& Set the matrix elements to zero.\\ + {\tt psum\_mat} & Parallel sum of matrices \\ + {\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\ + {\tt get\_count}& Get the number of non-zero elements in matrix \\ + \hline + {\tt factor} & Factorization \\ + {\tt bsolve} & Solves the linear system using the factorized matrix \\ + {\tt to\_mat} & Convert to PARDISO CSR matrix format \\ + {\tt reord\_mat}& Reordering and symbolic factorization \\ + {\tt numfact} & Numerical factorization \\ + \hline +\end{tabular} + \caption{The generic routines in the {\tt PARDISO\_BSPLINES} module} + \label{pardisoRoutines} +\end{table} + +Below, a complete example solving a simple +two-dimensional Poisson discretized by the 5 point Finite Difference +method illustrates how to use the {\tt PARDISO\_BSPLINES} module. +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} +PROGRAM main + USE pardiso_bsplines + IMPLICIT NONE + TYPE(pardiso_mat) :: amat + DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:) + INTEGER :: nx=5, ny=4 + INTEGER :: n, nnz + INTEGER :: i, j, irow, jcol +! + WRITE(*,'(a)', advance='no') 'Enter nx, ny: ' + READ(*,*) nx, ny + n = nx*ny ! Rank of the matrix + ALLOCATE(rhs(n)) + ALLOCATE(sol(n)) + ALLOCATE(arow(n)) +! + CALL init(n, 1, amat, nlsym=.TRUE.) ! Pardiso mat, symmetric case +! +! Construct the matrix and RHS +! + DO j=1,ny + DO i=1,nx + arow = 0.0d0 + irow = numb(i,j) + arow(irow) = 4.0d0 + IF(i.GT.1) arow(numb(i-1,j)) = -1.0d0 + IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0 + IF(j.GT.1) arow(numb(i,j-1)) = -1.0d0 + IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0 + CALL putrow(amat, irow, arow) + rhs(irow) = SUM(arow) ! => the exact solution is 1 + END DO + END DO +! + WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat) +! +! Factor the amat matrix (Reordering, symbolic and numerical factorization) +! + CALL factor(amat) +! +! Back solve +! + CALL bsolve(amat, rhs, sol) +! +! Check solutions +! + WRITE(*,'(/a/(10f8.4))') 'Computed sol', sol + WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0)) +! +! Clean up +! + DEALLOCATE(rhs) + DEALLOCATE(sol) + DEALLOCATE(arow) + CALL destroy(amat) +CONTAINS + INTEGER FUNCTION numb(i,j) +! +! One-dimensional numbering +! Number first x then y +! + INTEGER, INTENT(in) :: i, j + numb = (j-1)*nx + i + END FUNCTION numb +END PROGRAM main +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} + +It should be noted that +\begin{itemize} +\item The routine {\tt putrow} in the matrix construction loop uses the + version from the {\tt SPARSE} module to create dynamically the + matrix row using the linked list. +\item The routine {\tt factor} calls successively the matrix conversion + {\tt to\_mat}, the reordering and symbolic factorization routine + {\tt reord\_mat} and finally the numerical factorization {\tt + numfact}. One could indeed call these three routines separately + instead of the single call to {\tt factor}, +\item After solving the linear system, if the matrix is modified by + calling for example {\tt putrow} again, it will modify + directly the converted matrix and not on the {\tt spmat} matrix + which is anyway \emph{destroyed} at the end of {\tt to\_mat}. +\item If the matrix sparsity changes, the matrix should be + re-initialized by calling the {\tt destroy} and {\tt init} routines. +\end{itemize} + +Other examples can be found by running ``{\tt grep pardiso\_mat}'' +on the F90 files in the directory {\tt examples/}. + +\subsection{The WSMP direct solver} +The interface to WSMP~\cite{WSMP} is implemented in the +module {\tt WSMP\_BSPLINES}. +The matrix type (symmetric, hermitian, positive-definite) is set +by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the +generic routine {\tt init}. All the other generic routines defined in +the module {MATRIX}, plus routines specific to the sparse solver, +are listed in Table~\ref{wsmpRoutines}. The complete documentation +of these routines is given in Appendix~\ref{wsmpRef}. + +\begin{table}[h] +\centering +\begin{tabular}{|l|l|} + \hline + \emph{Matrix types} & {\tt wsmp\_mat, zwsmp\_mat} \\ + \hline\hline + {\tt init} & Initializes the data structure \\ + {\tt destroy} & Free the data structure memory \\ + \hline + {\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\ + {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$, + row $i$, column $j$ \\ + {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$, + row $i$, column $j$ \\ + {\tt vmx} & Returns the matrix-vector product \\ + {\tt mcopy} & Copy a matrix to another matrix \\ + {\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\ + {\tt clear\_mat}& Set the matrix elements to zero.\\ + {\tt psum\_mat} & Parallel sum of matrices \\ + {\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\ + {\tt get\_count}& Get the number of non-zero elements in matrix \\ + \hline + {\tt factor} & Factorization \\ + {\tt bsolve} & Solves the linear system using the factorized matrix \\ + {\tt to\_mat} & Convert to WSMP CSR matrix format \\ + {\tt reord\_mat}& Reordering and symbolic factorization \\ + {\tt numfact} & Numerical factorization \\ + \hline +\end{tabular} + \caption{The generic routines in the {\tt WSMP\_BSPLINES} module} + \label{wsmpRoutines} +\end{table} + +The simple Poisson example using the {\tt PARDISO\_BSPLINES} module +shown in the previous section can be easily adapted to the WSMP +interface since there are only two lines to change: the {\tt USE} and +the matrix {\tt TYPE} lines. + +Other examples of how to use this interface can be found by running +``{\tt grep wsmp\_mat}'' on the F90 files the directory {\tt examples/}. + +The same solver functionality can be found in both the PARDISO and +WSMP solvers as one can verify by comparing Table~\ref{pardisoRoutines} +and Table~\ref{wsmpRoutines} or the description of routines in +Appendix~\ref{pardisoRef} and Appendix~\ref{wsmpRef}. +However, there +is an important difference. While in PARDISO (and indeed also in LAPACK), +it is possible to define several matrices to solve simultaneously, it +appears that in WSMP, this is possible \emph{only} for symmetric and +hermitian matrices: in the present 10.9 version, the +routines to store and recall the solver context +{\tt WSTOREMAT/WRECALLMAT} which are present in the symmetric version of +the library are missing in the general version! + +A separate module named {\tt PWSMP\_BSPLINES} added the MPI +\emph{parallelization} capability provided by WSMP. This parallel version +implements the same user interface as shown in Table~\ref{wsmpRoutines}. +The following considerations should be however taken in to account: +\begin{enumerate} +\item The coefficient matrix {\tt amat} is partitioned into blocks of +\emph{contiguous} rows, with their indices defined in +the interval [{\tt amat\%istart,amat\%iend}] which is defined after the call to +{\tt init}. +\item Calls to the routine {\tt updtmat} to update the matrix coefficients +should not specify a row index \emph{outside} this interval. +On the other hand, {\tt getxxx} will return 0 and {\tt putxxx} will +ignore it if a row index \emph{not} in the range [{\tt amat\%istart,amat\%iend}] +is passed to them. +\item An \emph{optional} MPI communicator can be given to {\tt init} +using the keyword {\tt comm\_in}. By default, the communicator +{\tt MPI\_COMM\_WORLD} is used. +\end{enumerate} + +A complete example using {\tt PWSMP\_BSPLINES} can be found in +{\tt examples/pde2d\_pwsmp.f90}. + +\subsection{The MUMPS direct solver} +{\tt MUMPS}~\cite{MUMPS} is a \emph{parallel sparse direct solver} +using {\tt MPI} and is implemented in the module +{\tt MUMPS\_BSPLINES}. User program using this module +should be compiled and linked with {\tt MPI} even if only the +serial version of the solver is needed, in which case the +{\tt MPI\_COMM\_SELF} is passed to the initialization routine +{\tt init} as an \emph{optional} argument with the keyword +{\tt comm\_in}. Otherwise a valid {\tt MPI} communicator should be +passed. By default {\tt comm\_in=MPI\_COMM\_SELF}. Note that it +is possible to use both serial and parallel solvers in the same +program to solve different matrix problems. + +As for {\tt PARDISO} and {\tt WSMP}, the same user interface to the +{\tt MUMPS} solver is used and summarized in +Table~\ref{mumpsRoutines}. The complete documentation +of these routines is given in Appendix~\ref{mumpsRef}. + +\begin{table}[h] +\centering +\begin{tabular}{|l|l|} + \hline + \emph{Matrix types} & {\tt mumps\_mat, zmumps\_mat} \\ + \hline\hline + {\tt init} & Initializes the data structure \\ + {\tt destroy} & Free the data structure memory \\ + \hline + {\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\ + {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$, + row $i$, column $j$ \\ + {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$, + row $i$, column $j$ \\ + {\tt vmx} & Returns the matrix-vector product \\ + {\tt mcopy} & Copy a matrix to another matrix \\ + {\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\ + {\tt clear\_mat}& Set the matrix elements to zero.\\ + {\tt psum\_mat} & Parallel sum of matrices \\ + {\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\ + {\tt get\_count}& Get the number of non-zero elements in matrix \\ + \hline + {\tt factor} & Factorization \\ + {\tt bsolve} & Solves the linear system using the factorized matrix \\ + {\tt to\_mat} & Convert to WSMP CSR matrix format \\ + {\tt reord\_mat}& Reordering and symbolic factorization \\ + {\tt numfact} & Numerical factorization \\ + \hline +\end{tabular} + \caption{The generic routines in the {\tt MUMPS\_BSPLINES} module} + \label{mumpsRoutines} +\end{table} + + +\section{Fourier solver \cite{McMillan}} + +\subsection{The matrix equation in Fourier space} +For a periodic one-dimensional problem, the solution $\phi_i$ and +the right-hand-side $\rho_i$ in (\ref{matEq1d}) are +$N$-periodic. Their Discrete Fourier Transform (DFT) can be defined as +\begin{equation} + \begin{split} + \hat{\phi}_k = \sum_{j=0}^{N-1} \phi_j e^{i\frac{2\pi}{N}kj}, &\qquad + \hat{\rho}_k = \sum_{j=0}^{N-1} \rho_j e^{i\frac{2\pi}{N}kj}, \\ + \phi_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\phi}_k e^{-i\frac{2\pi}{N}kj}, &\qquad + \rho_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\rho}_k e^{-i\frac{2\pi}{N}kj}. + \end{split} +\end{equation} +Taking the DFT of Eq.~(\ref{matEq1d}), we obtain the following matrix equation +in Fourier space: +\begin{equation} +\label{Fourier1d} + \sum_{k'=0}^{N-1} \hat{A}_{kk'}\hat{\phi}_{k'} = \hat{\rho}_{k}, +\end{equation} +where $\hat{A}_{kk'}$ is the DFT of the original matrix. Following the +notations in Eq.~(\ref{mat1d}) and assuming an \emph{equidistant} mesh +with the interval $\Delta=L/N$, each of the DFT matrices of the +weak form can be written as +\begin{equation} + \begin{split} + \hat{A}_{kk'} + &= \frac{1}{N}\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} + \sum_{j'=0}^{N-1} A_{jj'}e^{-i\frac{2\pi}{N}k'j'} \\ + &= \frac{1}{N}\int_{0}^L\!\!dx\,C(x) \, + \sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\, + \sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'} + \Lambda_{j'}^{\alpha'} (x) \\ + &= \frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \, + \sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\, + \sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'} + \Lambda_{j'}^{\alpha'} (x) + \end{split} +\end{equation} +Note that each of the last two sums is over the splines which are non-zero +at a given $x$. Using the translational symmetry of the periodic splines: +\begin{equation*} + \sum_j e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x) = + \sum_j e^{i\frac{2\pi}{N}kj} + \Lambda_{j-J}^\alpha(x-J\Delta) = + e^{i\frac{2\pi}{N}kJ}\, \hat{\Lambda}_{k}^\alpha(x-J\Delta), +\end{equation*} +where we have defined the DFT of splines $\hat{\Lambda}_{k}(x)$ as +\begin{equation} + \hat{\Lambda}_{k}^\alpha(x) = \sum_j\Lambda_j^\alpha(x) e^{i\frac{2\pi}{N}kj}, +\end{equation} +which are computed by the routine {\tt ft\_basfun} in the module +{\tt BSPLINES} for any spline order $p$ and derivative $\alpha \le +p$. The DFT matrices can now be written as: +\begin{equation*} + \hat{A}_{kk'} = + \frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \, + e^{i\frac{2\pi}{N}J(k-k')} \hat{\Lambda}_k^\alpha (x-J\Delta)\, + \left[\hat{\Lambda}_{k'}^{\alpha'} (x-J\Delta)\right]^{*}. +\end{equation*} + +Finally, making the variable transform $x\rightarrow x+J\Delta$ and +defining the DFT of the weak-form coefficient $C$ as +\begin{equation} + \hat{C}_{k}(x) = \sum_{J=0}^{N-1}C(x+J\Delta)\,e^{i\frac{2\pi}{N}Jk}, +\end{equation} +the DFT of the matrix $\hat{A}_{kk'}$ can be calculated as an +integration over the first interval: +\begin{equation} + \hat{A}_{kk'} = \frac{1}{N}\int_0^\Delta\!\!dx\, + \hat{C}_{k-k'}(x) \,\hat{\Lambda}_{k}^\alpha(x) + \left[\hat{\Lambda}_{k'}^{\alpha'}(x)\right]^{*}, +\end{equation} +which can be computed using again the same Gauss formula as before. +For uniform $C$, $\hat A_{kk'}$ is diagonal and the matrix equation +(\ref{Fourier1d}) reduces to a system of equations for the +uncoupled Fourier modes. + +When $C$ is non-uniform, $\hat A_{kk'}$ is \emph{dense}. +However in problems where the solution is expected to be +``smooth'', one can keep only a small number of Fourier modes, reducing +thus the rank of $\hat A_{kk'}$. Furthermore, if the coefficients +$C(x)$ of the differential equations are very smooth, peaked at a few (low +order) modes, the DFT matrix can become \emph{sparse}! + +The generalization to the two-dimensional problem (\ref{mat2d}) is +straightforward: + +\begin{gather} + \hat{A}_{im,i'm'} = + \frac{1}{N_\theta}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta \, + \hat{C}_{m-m'}(r,\theta) \,\hat{\Lambda}_{m}^\beta(\theta) + \left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\right\} + \Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha'}(r) \\ + \hat{C}_{m}(r,\theta) = + \sum_{j=0}^{N_\theta-1}C(r,\theta+j\Delta\theta)\, e^{i\frac{2\pi}{N_\theta}jm}. +\end{gather} + +Likewise, for the three-dimensional problem (\ref{mat3d}), we obtain + +\begin{gather} + \hat{A}_{imn,i'm'n'} = + \frac{1}{N_\theta N_\varphi}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta + \int_0^{\Delta\varphi}\!\!d\varphi \, + \hat{C}_{m-m',n-n'}(r,\theta,\varphi) \,\hat{\Lambda}_{m}^\beta(\theta) + \left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\,\hat{\Lambda}_{n}^\gamma(\varphi) + \left[\hat{\Lambda}_{n'}^{\gamma'}(\varphi)\right]^{*}\right\} + \Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha}(r) \\ + \hat{C}_{mn}(r,\theta,\varphi) = + \sum_{j=0}^{N_\theta-1}\sum_{k=0}^{N_\varphi-1}C(r,\theta+j\Delta\theta,\varphi+ + k\Delta\varphi)\, e^{i\frac{2\pi}{N_\theta}jm}\,e^{i\frac{2\pi}{N_\varphi}kn}. +\end{gather} + +Note that for axi-symmetric systems where the coefficients $C$ do not +depend on $\varphi$ +\begin{equation} + \hat{C}_{mn} = \hat{C}_{mn}\delta_{n,0} +\end{equation} +and thus the three-dimensional problem reduces to a set of independent +two-dimensional problems with +\begin{equation} + \begin{split} + \hat{A}^n_{im.i'm'} &= M_n \hat{A}_{im.i'm'} \\ + M_n &= \int_0^{\Delta\varphi}\!\!d\varphi \left|\hat{\Lambda}_n(\varphi)\right|^2. + \end{split} +\end{equation} + +\subsection{Integral equation} +The DFT matrices for differential operators derived above can be +extended to an integral operator of the following form: +\begin{equation} + \int_{0}^{L}\!dx' K(x,x')\,\phi(x') = \rho(x), +\end{equation} +where $\phi(x)$ is $L$-periodic. Using the same FE discretization as +above results in the following matrix in \emph{real} space: +\begin{equation} + A_{jj'} = \int_{0}^{L}\!dx\,\Lambda_j(x)\,\int_{0}^{L}\!dx'\, + K(x,x')\,\Lambda_{j'}(x'), +\end{equation} +and its DFT +\begin{equation} + \hat{A}_{kk'} = \frac{1}{N} + \sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!dx\, + \sum_{J'=0}^{N-1}\int_{J'\Delta}^{(J'+1)\Delta}\!dx'\,K(x,x')\, + e^{i\frac{2\pi}{N}kJ}\hat{\Lambda}_k(x-J\Delta)\, + e^{-i\frac{2\pi}{N}k'J'}\left[\hat{\Lambda}_{k'}(x-J'\Delta)\right]^{*}, +\end{equation} +Now, defining the DFT of the kernel as +\begin{equation} + \hat{K}_{kk'}(x,x') = + \sum_{J=0}^{N-1}\sum_{J'=0}^{N-1}K(x+J\Delta,x'+J'\Delta)\, + e^{i\frac{2\pi}{N}kJ}\,e^{-i\frac{2\pi}{N}k'J'}, +\end{equation} +the final expression for the DFT of the matrix $\hat{A}_{kk'}$ reduces to +\begin{equation} + \hat{A}_{kk'} = \frac{1}{N} + \int_0^\Delta\!dx\int_0^\Delta\!dx'\,\hat{K}_{kk'}(x,x')\, + \hat{\Lambda}_k (x)\, + \left[\hat{\Lambda}_{k'}(x')\right]^{*}. +\end{equation} +Again, notice that the dense matrix $\hat{A}$ can become \emph{sparse} +if only a limited number of Fourier modes are retained in the DFT of +the kernel $\hat{K}$. + +\subsection{A two-dimensional example with a non-uniform coefficient} +As a check, we considered here a two-dimensional example similar to the example in +section \ref{twodEx} but with a non-uniform coefficient: + +\begin{equation} + -\frac{1}{r}\frac{\partial}{\partial r} + \left[rC\frac{\partial\phi}{\partial r}\right] + -\frac{1}{r^2}\frac{\partial}{\partial \theta} + \left[C\frac{\partial\phi}{\partial \theta}\right] = \rho. +\end{equation} + +With $C(r,\theta) = 1+\epsilon r\cos\theta$, assuming the same exact solution as in +section (\ref{twodEx}) +\begin{equation} + \phi(r,\theta) = (1-r^2)r^m\cos m\theta, +\end{equation} +the right-hand side becomes +\begin{equation} + \begin{split} + \rho(r,\theta) = 4(m+1)r^m\cos m\theta & + + \frac{\epsilon r^m}{2}(4+5m-m/r^2)\cos(m-1)\theta \\ + &+ \frac{\epsilon r^m}{2}(4+3m+m/r^2)\cos(m+1)\theta. + \end{split} +\end{equation} + +This problem is solved in real space and Fourier space respectively in example +{\tt pde2d\_sym\_pardiso.f90} and example {\tt pde2d\_sym\_pardiso\_dft.f90}. +Both use the {\tt PARDISO\_BSPLINES} module to solve the sparse +matrix equation. It should be noted that the Fourier method should yield the +\emph{same solution} as found with the solver in real space if \emph{all} the +$N_\theta$ Fourier modes are kept. + +For the problem defined above with {\tt m=3}, by keeping only the seven Fourier +modes in $[-3,3]$ and the three mode couplings $[-1,0,1]$ in the +Fourier solver, we found that both methods yield the same (up to 5 +digits) \emph{relative discretization error}. Furthermore, increasing +the number of Fourier modes to $[-4,4]$ (note that the $m=\pm 4$ +Fourier components of the right hand side $\rho$ are not null) does +not increase the accuracy of the computed solution! + +In this example, the matrix in Fourier space has a rank which is +$N_\theta/7$ times smaller than in the solver in real space. The number of +non-zeros is also reduced by a factor of $(2p+1)/3$ since only 3 +Fourier mode coupling terms are considered. + +In general, the efficiency of such a \emph{matrix filter} is expected +to be problem-dependent. The Fourier solver should be tested in real simulations. + +\section{The matrix construction module {\tt CONMAT\_MOD}} +\label{secCONMAT} +The module implements the generic matrix construction subroutine +{\tt conmat}, using the algorithm detailed in Appendix~\ref{matAssembly}, +for 1D and 2D differential equations. The computed matrix is returned in the +argument {\tt mat} which can be a Lapack band matrix as well as a +PARDISO, WSMP or MUMPS sparse matrix. +The complete interface of the subroutine is given below. + +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE conmat(spl, mat, coefeq, maxder) + TYPE(spline1d|spline2d) :: spl + TYPE([z]gbmat|[z]pbmat|[z]periodic_mat|[z]pardiso|...) :: mat + INTEGER, INTENT(in), OPTIONAL :: maxder[(2)] + INTERFACE + SUBROUTINE coefeq(x, [y], idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, [y] + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Construct the FE matrix for 1D or 2D differential operator. +\item[Arguments:] \mbox{} +\begin{verbatim} + spl : 1D or 2D spline + mat : matrix object + coefeq : user provided subroutine (see below) + maxder : Maximum order of the derivatives in the weak form. + Equal to 1 (first derivative) by default. +\end{verbatim} +\end{description} + +The subroutine {\tt conmat} includes, in addition to the arguments +{\tt spl, mat} and {\tt maxder} described above, +an user provided subroutine as the third +argument {\tt coefeq} which computes all the weak +form coefficients defined in Eq.(\ref{locMat1d}) and Eq.(\ref{locMat2d}) +for a given point ($x$ for 1D case or $(x,y)$ for 2D case). The output +array {\tt c} will contain all the computed $C$ with its corresponding +derivative orders $(d,d')$ returned in {\tt idt, idw} respectively. +Other quantities required to calculate the coefficients $C$ could be +communicated to {\tt coefeq}, using for example a {\tt COMMON} block or +a {\tt MODULE}. + +An example of using this module can be found in section \ref{twodEx}. + +\appendix + +\section{Matrix assembly for differential operators} +\label{matAssembly} +\subsection{1D case} +\subsubsection{Local matrix} +The contribution to the discretized weak-form from the interval +$[x_i, x_{i+1}]$ where $i=1,\ldots,N$, is a sum of the \emph{local matrices} +\begin{equation} +\label{locMat1d} + \begin{split} + A^i_{\alpha\alpha'} &= \int_{x_i}^{x_{i+1}}\!\!dx \;C(x) + \Lambda^d_{\alpha}(x)\Lambda^{d'}_{\alpha'}(x) \\ + &\simeq \sum_{g=1}^{G}\, + \underbrace{w_g\Lambda^d_{\alpha}(x_g)\Lambda^{d'}_{\alpha'}(x_g)}_{F_{\alpha\alpha'g}} + \,\underbrace{C(x_g)}_{c_g}, + \end{split} +\end{equation} +where a $G$ point Gauss quadrature over the interval $[x_i, x_{i+1}]$ +is used to approximate the integral and +$\Lambda^d_{\alpha}$ denotes the $d^{th}$ derivative of splines +which are non zero in the interval $[x_i, x_{i+1}]$. For splines of +degree $p$, $\alpha=0,\ldots,p$. Note that the matrix can be written as a +\emph{matrix-vector product}: +\begin{equation} + \mathbf{A}= \mathbf{F} \cdot \mathbf{c}. +\end{equation} + +\subsubsection{Mapping to global matrix} +For $N$ intervals, the number of spline elements of degree $p$, is +$N_e=N+p$, or $N_e=N$ if the system is \emph{periodic}. +Once the local matrix $A_{\alpha\alpha'}$ is formed, it can be added +to the \emph{global} matrix using the mapping: +\begin{equation} + \begin{split} + A^g_{II'} &\leftarrow A^g_{II'} + A^i_{\alpha\alpha'} \\ + I = i+\alpha, & \qquad I' = i+\alpha' + \end{split} +\end{equation} +For periodic problems, the indices $I,I'$ are further transformed +by taking into account the periodicity $N$, using for example the +following {\tt FORTRAN} statement + +\begin{center} \tt + I = MODULO(I-1,N) + 1 \\ +\end{center} + +\subsection{2D case} +\subsubsection{Local matrix} +In this case, the local matrix obtained for the grid cell +$[x_i, x_{i+1}]\times[y_j, y_{j+1}]$ takes the form: +\begin{equation} +\label{locMat2d} + \begin{split} + A_{\alpha\alpha'\beta\beta'} &= \int_{x_i}^{x_{i+1}}\!\!dx + \int_{y_j}^{y_{j+1}}\!\!dy\,\Lambda^{d_1}_{\alpha}(x)\Lambda^{d_1'}_{\alpha'}(x)\;C(x,y) + \,\Lambda^{d_2}_{\beta}(y)\Lambda^{d_2'}_{\beta'}(y) \\ + &\simeq \sum_{g_1=1}^{G_1}\, + \underbrace{w_{g_1}\Lambda^{d_1}_{\alpha}(x_{g_1})\Lambda^{{d_1}'}_{\alpha'}(x_{g_1})}_{F_{\alpha\alpha'g_1}} + \;\sum_{g_2=1}^{G_2}\,\underbrace{C(x_{g_1},y_{g_2})}_{C_{g_1g_2}} + \underbrace{w_{g_2}\Lambda^{d_2}_{\beta}(y_{g_2})\Lambda^{{d_2}'}_{\beta'}(y_{g_2})}_{G_{\beta\beta'g_2}}, + \end{split} +\end{equation} +which can be computed efficiently as \emph{matrix-matrix products} +\begin{equation} + \mathbf{A} = \mathbf{F}\cdot\mathbf{C}\cdot\mathbf{G^{T}} +\end{equation} +\subsubsection{Mapping to global matrix} +The local to global element indices mapping on each of the two +dimensions can be defined as previously as +\begin{equation} + \begin{split} + I = i+\alpha, & \qquad I' = i+\alpha' \\ + J = j+\beta, & \qquad J' = j+\beta' + \end{split} +\end{equation} +If any of the 2 dimensions is periodic, the periodic condition have +to be applied to the corresponding global element index as explained +above. + +Furthermore, in order to reduce the 4 dimension array $A^g_{II'JJ'}$ +to the standard 2 dimension matrix, we number first the elements in +$y$ coordinate and obtain the following index transformation: +\begin{equation} + \mu = J + N^y_e(I-1), \qquad \mu' = J' + N^y_e(I'-1), +\end{equation} +where $N^y_e$ is the number of elements along the $y$ coordinate. The +\emph{global} matrix is then constructed from +\begin{equation} + A^g_{\mu\mu'} \leftarrow A^g_{\mu\mu'} + A_{\alpha\alpha'\beta\beta'} +\end{equation} + +\section{The boundary conditions} +\subsection{Dirichlet condition} +\label{DirichletCond} +\subsubsection{1D case} +Let us consider the boundary condition +$u(0)=c$. Since all the splines are 0 +at $x=0$, except for the first spline which is equal to 1, +$\Lambda_i(0)=\delta_{i,1}$, we have simply +\begin{equation} + c=u(0) = \sum_{i=1}^N u_i \Lambda_i(0) \quad \Longrightarrow + \quad u_{1} = c. +\end{equation} +The discretized linear system of equations, taking into account of +this BC, can thus be written as +\begin{equation} + \begin{split} + u_1 &= c\\ + \sum_{j=2}^N A_{ij}u_j &= f_i - A_{i1}c, \quad i=2,\ldots, N + \end{split} +\end{equation} +or in the following matrix form: +\begin{equation} + \left(\begin{matrix} + 1 & 0 & \cdots \\ + 0 & A_{22} & \cdots \\ + \vdots & \vdots & \ddots \\ + \end{matrix}\right) + \left(\begin{matrix} + u_{1} \\ + u_{2} \\ + \vdots\\ + u_{N} \\ + \end{matrix}\right) = + \left(\begin{matrix} + c \\ + f_{2} -cA_{21}\\ + \vdots\\ + f_{N} -cA_{N1}\\ + \end{matrix}\right) +\end{equation} +Note that (1) the transformed matrix preserves any symmetry or +positivity of the original matrix, (2) the first column of the +original matrix has to be saved in order to modify the RHS $f_i$ +but only for non zero $c$ and (3) in that case, one needs to save +only $[A_{i1}]_{i=2}^{p+1}$, where $p$ is the spline order. + +In summary, the procedure for imposing the Dirichlet BC $u_1=c$ can be +summarized as follows: +\begin{enumerate} +\item Matrix transformation + \begin{enumerate} + \item Clear the matrix row $i=1$ and set its diagonal term + $A_{11}$ to 1. + \item Get the matrix column $A_{j1}, \quad j=2,\ldots,p+1$ and save it. + \item Clear the matrix column $j=1$ and set its diagonal term + $A_{11}$ to 1. + \end{enumerate} +\item RHS transformation + \begin{enumerate} + \item Set $f_1\leftarrow c$. + \item Modify the RHS: $f_i\leftarrow f_i-A_{i1}c, \quad i=2,\ldots,p+1$. + \end{enumerate} +\end{enumerate} +If the original matrix \emph{is not symmetric}, only the steps (1a) +and (2a) are required, since the other steps are only necessary to +preserve the symmetry of the original matrix. + +\subsubsection{2D case} +In that case, let us write the solution $u(x,y)$ as +\begin{equation} + u(x,y) = \sum_{i=1}^{N_1}\sum_{j=1}^{N_2} u_{ij} \Lambda_i(x)\Lambda_j(y), +\end{equation} +where $N_1, N_2$ are the number of elements in each +dimension. Assuming the BC $u(0,y) = g(y)$, and since +$\Lambda_i(0)=\delta_{i1}$, the solutions $u_{ij}$ +should satisfy +\begin{equation} +\label{dirich_2d} + \sum_{j=1}^{N_2} u_{1j} \Lambda_j(y) = g(y). +\end{equation} +If $g(y)$ is constant $g(y)=c$, we obtain the trivial solution +$u_{1,j}=c$ since $\sum_{j=1}^{N_2} \Lambda_j(y)=1$ \cite{BSPLINES}. +For non-uniform $g$, +at least 2 methods can be used to obtain the $N_2$ unknowns $u_{1j}$ +satisfying the equation above: +\begin{enumerate} +\item By \emph{collocating} Eq.(\ref{dirich_2d}) on a \emph{suitable} + set of points $[y_k]_{ k=1}^{N_2}$, the problem is reduced to an + \emph{interpolation} one (see section ``Spline + Interpolation'' in \cite{BSPLINES}). +\item By \emph{minimizing} the residual norm of Eq.(\ref{dirich_2d}) + defined as follows: + \begin{gather} + R = \left\|\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)-g(y)\right\|^2 = + \int\!\!dy\left\{\left[\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\right]^2 + - 2g(y)\sum_{j=1}^{N_2} c_{j} \Lambda_j(y) +g^2(y)\right\}\\ + \frac{\partial R}{\partial c_k} = 2 \int\!\!dy\left[ + \sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\Lambda_k(y) + -g(y)\Lambda_k(y)\right] = 0, \quad k=1,\ldots,N_2, + \end{gather} + the boundary solutions $c_j$ can be calculated by solving the following + \emph{weak-form} of Eq.(\ref{dirich_2d}): + \begin{equation} + \sum_{j=1}^{N_2} c_{j} \int\!\!dy\Lambda_j(y)\Lambda_k(y) = + \int\!\!dy\Lambda_k(y) g(y), \qquad k=1,\ldots,N_2. + \end{equation} +\end{enumerate} +Once the values of $c_j$ known, the procedure described for the 1D case +above can be applied to satisfy each of the $N_2$ conditions $u_{1j}=c_j$. + +A full example for solving the cylindrical Laplace equation in +cylindrical coordinates: +\begin{equation} + \begin{split} + \frac{1}{r}\frac{\partial}{\partial r} + \left(r\frac{\partial\phi}{\partial r}\right) &+\frac{1}{r^2} + \frac{\partial^2\phi}{\partial \theta^2} = 0 \\ + \phi(r=1,\theta) &= \cos m\theta. + \end{split} +\end{equation} +is given in {\tt bpslines/examples/dirichlet/poisson.f90}. + +\subsection{Unicity on the axis} +\label{unicityCond} +Denoting the $N$ solutions at the axis by $(u_1, \ldots, u_N)$ , and +their transforms by $(\hat u_1, \ldots, \hat u_N)$ defined by + +\begin{equation} \begin{array}{ccc} + u_1-u_N = \hat u_1 & & u_1 = \hat u_1 + \hat u_N \\ + u_2-u_N = \hat u_2 & & u_2 = \hat u_2 + \hat u_N \\ + \vdots & \Longrightarrow & \vdots \\ + u_{N-1}-u_N = \hat u_{N-1} & & u_{N-1} = \hat u_{N-1} + \hat u_N \\ + u_N = \hat u_N & & u_N = \hat u_N, + \end{array} \label{eq:unicity1} \end{equation} +the unicity condition can be specified by simply imposing + +\begin{equation} + \hat u_1=\hat u_2=\ldots=\hat u_{N-1}=0. \label{eq:unicity2} +\end{equation} +From (\ref{eq:unicity1}), the \emph{transformation matrix} \(\mathbf U\) is defined +as + +\begin{equation} + \mathbf{u} = \mathbf{ U \cdot\hat u}, \qquad \mathbf{U} = + \left(\begin{matrix} + 1 & 0 & \dots & 0 & 1 \\ + 0 & 1 & \dots & 0 & 1 \\ + & & \ddots& & \vdots \\ + 0 & 0 & \dots & 1 & 1 \\ + 0 & 0 & \dots & 0 & 1 + \end{matrix}\right), \quad \mathbf{U^{T}} = + \left(\begin{matrix} + 1 & 0 & \dots & 0 & 0 \\ + 0 & 1 & \dots & 0 & 0 \\ + & & \ddots& & \vdots \\ + 0 & 0 & \dots & 1 & 0 \\ + 1 & 1 & \dots & 1 & 1 + \end{matrix}\right). +\end{equation} + + +\paragraph{Matrix product \( \mathbf{A\cdot U}\)} +\begin{equation} +\mathbf{ A\cdot U} = + \left(\begin{array}{lllll} + A_{1,1} & A_{1,2} & \dots & A_{1,N-1} & \sum_{j} A_{1,j} \\ + A_{2,1} & A_{2,2} & \dots & A_{2,N-1} & \sum_{j} A_{2,j} \\ + & & \ddots& & \vdots \\ + A_{N-1,1} & A_{N-1,2} & \dots & A_{N-1,N-1} & \sum_{j}A_{N-1,j} \\ + A_{N,1} & A_{N,2} & \dots & A_{N,N-1} & \sum_{j}A_{N,j} + \end{array}\right). +\end{equation} +Thus \emph{right multiply by \(\mathbf{U}\)} is equivalent to put the +\emph{the sum of each row on the last column}. + +\paragraph{Matrix product \( \mathbf{ U^T \cdot A}\)} +\begin{equation} +\mathbf{ U^T \cdot A} = + \left(\begin{array}{lllll} + A_{1,1} & A_{1,2} & \dots & A_{1,N-1} & A_{1,N} \\ + A_{2,1} & A_{2,2} & \dots & A_{2,N-1} & A_{2,N} \\ + & & \ddots& & \vdots \\ + A_{N-1,1} & A_{N-1,2} & \dots & A_{N-1,N-1} & A_{N-1,N} \\ + \sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} & + \sum_{i}A_{i,N} + \end{array}\right). +\end{equation} +Thus \emph{left multiply by \(\mathbf{\hat U}\)} is equivalent to put the +\emph{the sum of each column on the last row}. + +\paragraph{Product \( \mathbf{\hat U \cdot b}\)} +\begin{equation} +\mathbf{\hat b} = \mathbf{U^T\cdot b} = + \left(\begin{array}{l} + b_1 \\ + b_2 \\ + \vdots \\ + b_{N-1} \\ + \sum_{i} b_{i} + \end{array}\right), +\end{equation} + +\paragraph{Transformation of the original matrix equation} +The full original linear system, obtained from the discretization of the +2D \(r,\theta\) polar coordinates can be written as: + +\begin{equation} + \left(\begin{array}{ll} + \mathbf{A} & \mathbf{B} \\ + \mathbf{C} & \mathbf{D} + \end{array}\right) + \left(\begin{array}{l} + \mathbf{u} \\ + \mathbf{v} + \end{array}\right) = + \left(\begin{array}{l} + \mathbf{b} \\ + \mathbf{c} + \end{array}\right), \label{eq:orig_matrix_eq} +\end{equation} +where the solution array is split into the solutions \(\mathbf{u}\) at \(r=0\) and +the solutions \(\mathbf{v}\) on the remaining domain. The transformed system can +thus be written as + +\begin{equation*} + \left(\begin{array}{ll} + \mathbf{U^T} & 0 \\ + 0 & \mathbf{I} + \end{array}\right) + \left(\begin{array}{ll} + \mathbf{A} & \mathbf{B} \\ + \mathbf{C} & \mathbf{D} + \end{array}\right) + \left(\begin{array}{ll} + \mathbf{U} & 0 \\ + 0 & \mathbf{I} + \end{array}\right) + \left(\begin{array}{l} + \mathbf{\hat u} \\ + \mathbf{v} + \end{array}\right) = + \left(\begin{array}{ll} + \mathbf{U^T} &0 \\ + 0 & \mathbf{I} + \end{array}\right) + \left(\begin{array}{l} + \mathbf{b} \\ + \mathbf{c} + \end{array}\right), +\end{equation*} + +\begin{equation} + \Longrightarrow + \left(\begin{array}{cc} + \mathbf{U^TAU} & \mathbf{U^TB} \\ + \mathbf{CU} & \mathbf{D} + \end{array}\right) + \left(\begin{array}{l} + \mathbf{\hat u} \\ + \mathbf{v} + \end{array}\right) = + \left(\begin{array}{c} + \mathbf{U^Tb} \\ + \mathbf{c} + \end{array}\right), +\end{equation} +Notice that the transformation preserves any symmetry existing in the original system +(\ref{eq:orig_matrix_eq}). The transformed matrix is finally given in the following where +only the modified elements are shown and the sum is only over the first \(N\) +points in \(\theta\) direction. The \(\times\) symbol denotes unmodified elements. + +\begin{equation} + \left(\begin{array}{lllllll} + \times & \times & \times & \times & \sum_{j} A_{1,j} & \times & \times \\ + \times & \times & \times & \times & \sum_{j} A_{2,j} & \times & \times \\ + \times & \times & \times & \times & \vdots & \times & \times \\ + \times & \times & \times & \times & \sum_{j} A_{N-1,j} & \times & \times \\ + \sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} & + \sum_{i,j}A_{i,j} & \sum_{i}A_{i,N+1} & \dots \\ + \times & \times & \times & \times & \sum_{j} A_{N+1,j} & \times & \times \\ + \times & \times & \times & \times & \vdots & \times & \times + \end{array}\right) +\end{equation} +Only the \(N^{th}\) column and the \(N^{th}\) row are affected by the transformation. +Applying now the unicity condition (\ref{eq:unicity2}) the final transformed system +reads: + +\begin{equation} + \left(\begin{array}{lllllll} + 1 & 0 & \dots & 0 & 0 & 0 & 0 \\ + 0 & 1 & \dots & 0 & 0 & 0 & 0 \\ + 0 & 0 & \ddots & 0 & \vdots & 0 & 0 \\ + 0 & 0 & \dots & 1 & 0 & 0 & 0 \\ + 0 & 0 & \dots & 0 & \sum_{i,j}A_{i,j} & \sum_{i}A_{i,N+1} & \dots \\ + 0 & 0 & \dots & 0 & \sum_{j} A_{N+1,j} & \times & \times \\ + 0 & 0 & \dots & 0 & \vdots & \times & \times + \end{array}\right) + \left(\begin{array}{l} + \hat u_1 \\ + \hat u_2 \\ + \vdots\\ + \hat u_{N-1}\\ + \hat u_{N} \\ + u_{N+1} \\ + \vdots + \end{array}\right) = + \left(\begin{array}{l} + 0 \\ + 0 \\ + \vdots\\ + 0 \\ + \sum_{i} b_{i} \\ + b_{N+1} \\ + \vdots + \end{array}\right). +\end{equation} + + +\section{{\tt MATRIX} Reference} +\label{matRef} + +The following conventions are adopted in the routine descriptions: +\begin{itemize} +\item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)} + declares a variable which can be of type {\tt gemat} or {\tt zgemat}. +\item The symbol ``$|$'' is the logical {\tt OR} operator. Thus +\begin{verbatim} + TYPE([z]gemat|[z]gbmat) :: mat +\end{verbatim} +declares that {\tt mat} can be either of type {\tt gemat}, {\tt + zgemat}, {\tt pbmat} or {\tt zpbmat}. +\item In a same declaration block, if a scalar or array of type {\tt + DOUBLE PRECISION|COMPLEX} is declared together with a matrix object + which can be also complex, both should be either real + or complex. For example in the routine {\tt updtmat}, if {\tt mat} + of type {\tt zgbmat}, {\tt val} should be complex. +\end{itemize} + +\subsection{init} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} +TYPE([z]gemat) :: mat + SUBROUTINE init(n, nterms, mat ,kmat) +TYPE([z]gbmat) :: mat + SUBROUTINE init(kl, ku, n, nterms, mat, kmat) +TYPE([z]pbmat) :: mat + SUBROUTINE init(ku, n, nterms, mat, kmat) +TYPE([z]periodic_mat) :: mat + SUBROUTINE init(kl, ku, n, nterms, mat, kmat) + INTEGER, INTENT(in) :: kl, ku, n, nterms + INTEGER, OPTIONAL :: kmat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Initialize data structure for matrix +\item[Arguments:] \mbox{} +\begin{verbatim} + n : rank of matrix + kl, ku : number of sub and super diagonals + nterms : number of terms in weak form + kmat : matrix id + mat : matrix object +\end{verbatim} +\end{description} + +\subsection{destroy} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE destroy(mat) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Free matrix memory +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + +\subsection{updmat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE updtmat(mat, i, j, val) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat + INTEGER, INTENT(IN) :: i, j + DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Update (accumulate) element $A_{ij}$ +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : row index + j : column index + val : input value +\end{verbatim} +\end{description} + +\subsection{putele} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE putele(mat, i, j, val) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat + INTEGER, INTENT(IN) :: i, j + DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Overwrite element $A_{ij}$ +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : row index + j : column index + val : input value +\end{verbatim} +\end{description} + +\subsection{putrow} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE putrow(mat, i, arr) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat + INTEGER, INTENT(IN) :: i + DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Overwrite a matrix row +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : row index + arr : input array +\end{verbatim} +\end{description} + +\subsection{putcol} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE putcol(mat, j, arr) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat + INTEGER, INTENT(IN) :: j + DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Overwrite a matrix row +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + j : column index + arr : input array +\end{verbatim} +\end{description} + +\subsection{getele} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE getele(mat, i, j, val) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat + INTEGER, INTENT(IN) :: i, j + DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Get element $A_{ij}$ +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : row index + j : column index + val : output value +\end{verbatim} +\end{description} + +\subsection{getrow} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE getrow(mat, i, arr) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat + INTEGER, INTENT(IN) :: i + DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Get a matrix row +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : row index + arr : output array +\end{verbatim} +\end{description} + +\subsection{getcol} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE getcol(mat, j, arr) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat + INTEGER, INTENT(IN) :: j + DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Get a matrix column +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : column index + arr : output array +\end{verbatim} +\end{description} + +\subsection{vmx} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + FUNCTION vmx(mat, x) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat + DOUBLE PRECISION|COMPLEX, DIMENSION(:), INTENT(in) :: x + DOUBLE PRECISION|COMPLEX, DIMENSION(SIZE(x)) :: vmx +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Matrix-vector product $Ax$ +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + x : input array + vmx : output array +\end{verbatim} +\end{description} + +\subsection{mcopy} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE mcopy(mata, matb) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Matrix copy: $B = A$ +\item[Arguments:] \mbox{} +\begin{verbatim} + mata : input matrix object + matb : output matrix object +\end{verbatim} +\end{description} + +\subsection{maddto} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE maddto(mata, alpha, matb) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb + DOUBLE PRECISION|COMPLEX : alpha +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Matrix addition: $A \leftarrow A+\alpha B$ +\item[Arguments:] \mbox{} +\begin{verbatim} + mata : input matrix object + matb : output matrix object + alpha : input scalar +\end{verbatim} +\end{description} + +\subsection{determinant} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE determinant(mat, base, pow) + TYPE([z]gemat|[z]gbmat|[z]pbmat) :: mat + INTEGER, INTENT(out) :: pow + DOUBLE PRECISION|COMPLEX : base +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Returns the determinant of matrix as $D = \text{base}\times 10^{\text{pow}}$ +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : input matrix object + base : mantissa of determinant + pow : exponent of determinant +\end{verbatim} +\end{description} + +\subsection{factor} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE factor(mat) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +LU (Cholesky for symmetric/hermitian matrix) factorization +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : inout matrix object +\end{verbatim} +\end{description} + +\subsection{bsolve} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE bsolve(mat) + TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat + DOUBLE PRECISION|COMPLEX, DIMENSION [(:)] :: rhs + DOUBLE PRECISION|COMPLEX, DIMENSION [(:),] OPTIONAL, INTENT (out) :: sol +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Solve the linear system using the factored matrix, for a single or +multiple right-hand-side +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : input factored matrix object + rhs : input right-hand-side, overwriten by the solution if sol is not present + sol : contains solution +\end{verbatim} +\end{description} + +\section{{\tt SPMAT} Reference} +\label{spmatRef} + +The following conventions are adopted in the routine descriptions: +\begin{itemize} +\item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)} + declares a variable which can be of type {\tt gemat} or {\tt zgemat}. +\item The symbol ``$|$'' is the logical {\tt OR} operator. Thus +\begin{verbatim} + TYPE([z]gemat|[z]gbmat) :: mat +\end{verbatim} +declares that {\tt mat} can be either of type {\tt gemat}, {\tt + zgemat}, {\tt pbmat} or {\tt zpbmat}. +\item In a same declaration block, if a scalar or array of type {\tt + DOUBLE PRECISION|COMPLEX} is declared together with a matrix object + which can be also complex, both should be either real + or complex. For example in the routine {\tt updtmat}, if {\tt mat} + of type {\tt zgbmat}, {\tt val} should be complex. +\end{itemize} + +\subsection{init} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE init(n, mat, istart, iend) + TYPE([z]spmat) :: mat + INTEGER, INTENT(in), OPTIONAL :: istart, iend + INTEGER, INTENT(in) :: n +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Initialize an empty sparse matrix of $n$ rows. +\item[Arguments:] \mbox{} +\begin{verbatim} + n : rank of matrix + mat : matrix object + istart, iend : range of row indices. By default istart=1, iend=n +\end{verbatim} +\end{description} + +\subsection{destroy} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE destroy(mat) + TYPE([z]spmat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Free matrix memory +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + +\subsection{updmat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE updtmat(mat, i, j, val) + TYPE([z]spmat) :: mat + INTEGER, INTENT(IN) :: i, j + DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Update (accumulate) an existing element $A_{ij}$ or insert it in the +linked list in an increasing order in the column index j. +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : row index + j : column index + val : input value +\end{verbatim} +\end{description} + +\subsection{putele} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE putele(mat, i, j, val, nlforce_zero) + TYPE([z]pbmat) :: mat + INTEGER, INTENT(IN) :: i, j + DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Overwrite an existing element $A_{ij}$ or insert it in the +linked list in an increasing order in the column index j. +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : row index + j : column index + val : input value + nlforce_zero : Never remove an existing element when input is zero if TRUE + FALSE by default +\end{verbatim} +\end{description} + +\subsection{putrow} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE putrow(mat, i, arr, col, nlforce_zero) + TYPE([z]spmat) :: mat + INTEGER, INTENT(IN) :: i + DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:) + INTEGER, INTENT(in), OPTIONAL :: col(:) + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Overwrite a matrix row +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : row index + arr : input array + col : input array containing column indices + nlforce_zero : Never remove an existing element when input is zero if TRUE + FALSE by default +\end{verbatim} +\end{description} + +\subsection{putcol} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE putcol(mat, j, arr, nlforce_zero) + TYPE([z]spmat) :: mat + INTEGER, INTENT(IN) :: j + DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Overwrite a matrix row +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + j : column index + arr : input array + nlforce_zero : Never remove an existing non-zero element if .TRUE. + .FALSE. by default +\end{verbatim} +\end{description} + +\subsection{getele} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE getele(mat, i, j, val) + TYPE([z]spmat) :: mat + INTEGER, INTENT(IN) :: i, j + DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Get element $A_{ij}$ +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : row index + j : column index + val : output value +\end{verbatim} +\end{description} + +\subsection{getrow} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE getrow(mat, i, arr, col) + TYPE([z]spmat) :: mat + INTEGER, INTENT(IN) :: i + DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:) + INTEGER, INTENT(out), OPTIONAL :: col(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Get a matrix row and optionally the column indices +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : row index + arr : output array + col : output array containing column indices +\end{verbatim} +\end{description} + +\subsection{getcol} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE getcol(mat, j, arr) + TYPE([z]spmat) :: mat + INTEGER, INTENT(IN) :: j + DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Get a matrix column +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + i : column index + arr : output array +\end{verbatim} +\end{description} + +\subsection{get\_count} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + INTEGER FUNCTION get_count(mat, nnz) + TYPE([z]spmat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Returns the number of non-zeros and optionally an array of numbers +of non-zeros on each row +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + nnz : array containing numbers of non-zeros on each row. +\end{verbatim} +\end{description} + +\section{{\tt PARDISO\_BSPLINES} Reference} +\label{pardisoRef} +The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow, +getcol, vmx, mcopy, maddto} and {\tt destroy} +have \emph{exactly} the same list of arguments as +those from the {\tt MATRIX} module (as documented in +Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines +that have different arguments. The same conventions as before are used +for the routine description. + +\subsection{init} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, & + & nlforce_zero) + INTEGER, INTENT(in) :: n, nterms + TYPE([z]pardiso_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlpos + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Initialize the PARDISO solver. A SPMAT matrix of $n$ empty rows is initialized. +\item[Arguments:] \mbox{} +\begin{verbatim} + n : rank of matrix + nterms : number of terms in weak form + kmat : matrix id + mat : matrix object + nlsym : symmetric or not. Default is .FALSE. + nlherm : Hermitian or not for complex matrix . Default is .FALSE. + nlpos : Positive-definite or not. Default is .TRUE. + nlforce_zero : Never remove an existing non-zero element if .TRUE. + .TRUE. by default +\end{verbatim} +\end{description} + +\subsection{clear\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE clear_mat(mat) + TYPE([z]pardiso_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} + Clear matrix, keeping its sparse structure unchanged +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + +\subsection{psum\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE sum_mat(mat, comm) + TYPE([z]pardiso_mat) :: mat + INTEGER, INTENT(in) :: comm +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} + Parallel sum of matrices. Result matrix is placed in the sparse + matrix mat\%mat on all processes of comm. +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + comm : communicator +\end{verbatim} +\end{description} + +\subsection{p2p\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE p2p_mat(mat, dest, extyp, op, comm) + TYPE([z]pardiso_mat) :: mat + INTEGER, INTENT(in) :: dest + CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv') + CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt') + INTEGER, INTENT(in) :: comm +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} + Point-to-point combine sparse matrix between 2 processes. +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + dest : rank of remote process + extyp : exchange type ('send', 'recv', 'sendrecv') + op : operation type ('put', 'updt') + comm : communicator +\end{verbatim} +\end{description} + +\subsection{get\_count} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + INTEGER FUNCTION get_count(mat, nnz) + TYPE([z]pardiso_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Returns the number of non-zeros and optionally an array of numbers +of non-zeros on each row +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + nnz : array containing numbers of non-zeros on each row. +\end{verbatim} +\end{description} + +\subsection{factor} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE factor(mat, nlreord, nlmetis, debug) + TYPE([z]pardiso_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlreord + LOGICAL, OPTIONAL, INTENT(in) :: nlmetis + LOGICAL, OPTIONAL, INTENT(in) :: debug +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Wrapper of to\_mat, reord\_mat and numfact +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + nlreord : call reord_mat if .TRUE. (default is .TRUE.) + nlmetis : use METIS nested dissection for reoredering. Default + is minimum degree alogorithm. + debug : verbose output from PARDISO if .TRUE. Default is .FALSE. +\end{verbatim} +\end{description} + +\subsection{bsolve} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE bsolve_pardiso_mat1(mat, rhs, sol, nref, debug) + TYPE([z]pardiso_mat) :: mat + DOUBLE PRECISION|COMPLEX :: rhs(:) + DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:) + INTEGER, OPTIONAL :: nref + LOGICAL, OPTIONAL, INTENT(in) :: debug +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Wrapper of to\_mat, reord\_mat and numfact +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + rhs : input right-hand-side, overwriten by the solution if sol is not present + sol : contains solution + ref : maximum number of refinement steps. Default is 0 (no refinement). + debug : verbose output from PARDISO if .TRUE. Default is .FALSE. +\end{verbatim} +\end{description} + +\subsection{to\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE to_mat(mat) + TYPE([z]pardiso_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Convert linked list spmat to pardiso matrix structure +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + +\subsection{reord\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE reord_mat(mat, nlmetis, debug) + TYPE([z]pardiso_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlmetis + LOGICAL, OPTIONAL, INTENT(in) :: debug +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Reordering and symbolic factorization +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + nlmetis : use METIS nested dissection for reoredering. Default + is minimum degree alogorithm. + debug : verbose output from PARDISO if .TRUE. Default is .FALSE. +\end{verbatim} +\end{description} + +\subsection{numfact} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE numfact(mat, debug) + TYPE([z]pardiso_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: debug +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Numerical factorization +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + debug : verbose output from PARDISO if .TRUE. Default is .FALSE. +\end{verbatim} +\end{description} + +\section{{\tt [P]WSMP\_BSPLINES} Reference} +\label{wsmpRef} +The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow, +getcol, vmx, mcopy, maddto} and {\tt destroy} +have \emph{exactly} the same list of arguments as +those from the {\tt MATRIX} module (as documented in +Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines +that have different arguments. The same conventions as before are used +for the routine description. + +\subsection{init} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, & + & nlforce_zero, [comm_in]) + INTEGER, INTENT(in) :: n, nterms + TYPE([z]wsmp_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlpos + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero + INTEGER, OPTIONAL, INTENT(in) :: comm_in +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Initialize the WSMP solver. A SPMAT matrix of $n$ empty rows is initialized. +\item[Arguments:] \mbox{} +\begin{verbatim} + n : rank of matrix + nterms : number of terms in weak form + kmat : matrix id + mat : matrix object + nlsym : symmetric or not. Default is .FALSE. + nlherm : Hermitian or not for complex matrix . Default is .FALSE. + nlpos : Positive-definite or not. Default is .TRUE. + nlforce_zero : Never remove an existing non-zero element if .TRUE. + .TRUE. by default + comm_in : MPI communicator. By default MPI_COMM_WORLD (only in PWSMP_BSPLINES) +\end{verbatim} +\end{description} + +\subsection{clear\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE clear_mat(mat) + TYPE([z]wsmp_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} + Clear matrix, keeping its sparse structure unchanged +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + +\subsection{psum\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE sum_mat(mat, comm) + TYPE([z]wsmp_mat) :: mat + INTEGER, INTENT(in) :: comm +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} + Parallel sum of matrices. Result matrix is placed in the sparse + matrix mat\%mat on all processes of comm. +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + comm : communicator +\end{verbatim} +\end{description} + +\subsection{p2p\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE p2p_mat(mat, dest, extyp, op, comm) + TYPE([z]wsmp_mat) :: mat + INTEGER, INTENT(in) :: dest + CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv') + CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt') + INTEGER, INTENT(in) :: comm +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} + Point-to-point combine sparse matrix between 2 processes. +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + dest : rank of remote process + extyp : exchange type ('send', 'recv', 'sendrecv') + op : operation type ('put', 'updt') + comm : communicator +\end{verbatim} +\end{description} + +\subsection{get\_count} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + INTEGER FUNCTION get_count(mat, nnz) + TYPE([z]wsmp_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Returns the number of non-zeros and optionally an array of numbers +of non-zeros on each row +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + nnz : array containing numbers of non-zeros on each row. +\end{verbatim} +\end{description} + +\subsection{factor} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE factor(mat, nlreord) + TYPE([z]wsmp_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlreord +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Wrapper of to\_mat, reord\_mat and numfact +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + nlreord : call reord_mat if .TRUE. (default is .TRUE.) +\end{verbatim} +\end{description} + +\subsection{bsolve} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref) + TYPE([z]wsmp_mat) :: mat + DOUBLE PRECISION|COMPLEX :: rhs(:) + DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:) + INTEGER, OPTIONAL :: nref +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Wrapper of to\_mat, reord\_mat and numfact +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + rhs : input right-hand-side, overwriten by the solution if sol is not present + sol : contains solution + ref : maximum number of refinement steps. Default is 0 (no refinement). + debug : verbose output from WSMP if .TRUE. Default is .FALSE. +\end{verbatim} +\end{description} + +\subsection{to\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE to_mat(mat) + TYPE([z]wsmp_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Convert linked list spmat to wsmp matrix structure +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + +\subsection{reord\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE reord_mat(mat) + TYPE([z]wsmp_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Reordering and symbolic factorization +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + +\subsection{numfact} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE numfact(mat) + TYPE([z]wsmp_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Numerical factorization +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + +\section{{\tt MUMPS\_BSPLINES} Reference} +\label{mumpsRef} +The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow, +getcol, vmx, mcopy, maddto} and {\tt destroy} +have \emph{exactly} the same list of arguments as +those from the {\tt MATRIX} module (as documented in +Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines +that have different arguments. The same conventions as before are used +for the routine description. + +\subsection{init} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, & + & nlforce_zero, comm_in) + INTEGER, INTENT(in) :: n, nterms + TYPE([z]mumps_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlpos + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero + INTEGER, OPTIONAL, INTENT(in) :: comm_in +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Initialize the MUMPS solver. A SPMAT matrix of $n$ empty rows is initialized. +\item[Arguments:] \mbox{} +\begin{verbatim} + n : rank of matrix + nterms : number of terms in weak form + kmat : matrix id + mat : matrix object + nlsym : symmetric or not. Default is .FALSE. + nlherm : Hermitian or not for complex matrix . Default is .FALSE. + nlpos : Positive-definite or not. Default is .TRUE. + nlforce_zero : Never remove an existing non-zero element if .TRUE. + .TRUE. by default + comm_in : MPI communicator. By default MPI_COMM_SELF (serial mode). +\end{verbatim} +\end{description} + +\subsection{clear\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE clear_mat(mat) + TYPE([z]mumps_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} + Clear matrix, keeping its sparse structure unchanged +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + +\subsection{psum\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE sum_mat(mat, comm) + TYPE([z]mumps_mat) :: mat + INTEGER, INTENT(in) :: comm +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} + Parallel sum of matrices. Result matrix is placed in the sparse + matrix mat\%mat on all processes of comm. +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + comm : communicator +\end{verbatim} +\end{description} + +\subsection{p2p\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE p2p_mat(mat, dest, extyp, op, comm) + TYPE([z]mumps_mat) :: mat + INTEGER, INTENT(in) :: dest + CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv') + CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt') + INTEGER, INTENT(in) :: comm +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} + Point-to-point combine sparse matrix between 2 processes. +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + dest : rank of remote process + extyp : exchange type ('send', 'recv', 'sendrecv') + op : operation type ('put', 'updt') + comm : communicator +\end{verbatim} +\end{description} + +\subsection{get\_count} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + INTEGER FUNCTION get_count(mat, nnz) + TYPE([z]mumps_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Returns the number of non-zeros and optionally an array of numbers +of non-zeros on each row +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + nnz : array containing numbers of non-zeros on each row. +\end{verbatim} +\end{description} + +\subsection{factor} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE factor(mat, nlreord) + TYPE([z]mumps_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlreord +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Wrapper of to\_mat, reord\_mat and numfact +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + nlreord : call reord_mat if .TRUE. (default is .TRUE.) +\end{verbatim} +\end{description} + +\subsection{bsolve} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE bsolve_mumps_mat1(mat, rhs, sol, nref) + TYPE([z]mumps_mat) :: mat + DOUBLE PRECISION|COMPLEX :: rhs(:) + DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:) + INTEGER, OPTIONAL :: nref +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Wrapper of to\_mat, reord\_mat and numfact +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object + rhs : input right-hand-side, overwriten by the solution if sol is not present + sol : contains solution + ref : maximum number of refinement steps. Default is 0 (no refinement). + debug : verbose output from MUMPS if .TRUE. Default is .FALSE. +\end{verbatim} +\end{description} + +\subsection{to\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE to_mat(mat) + TYPE([z]mumps_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Convert linked list spmat to mumps matrix structure +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + +\subsection{reord\_mat} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE reord_mat(mat) + TYPE([z]mumps_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Reordering and symbolic factorization +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + +\subsection{numfact} +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + SUBROUTINE numfact(mat) + TYPE([z]mumps_mat) :: mat +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +\begin{description} +\item[Purpose:] \mbox{} +Numerical factorization +\item[Arguments:] \mbox{} +\begin{verbatim} + mat : matrix object +\end{verbatim} +\end{description} + + +\begin{thebibliography}{99} +\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide. +\bibitem{PARDISO} \url{http://www.pardiso-project.org/} +\bibitem{WSMP} \url{http://www-users.cs.umn.edu/~agupta/wsmp.html} +\bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/} +\bibitem{McMillan} B. F. McMillan, et. al. \emph{Rapid Fourier space + solution of linear partial integro-differential equations in + toroidal magnetic confinement geometries}, Computer Physics + Communications 181(4), + 715-719 (2010) +\end{thebibliography} + +\end{document} diff --git a/docs/manual/using_bsplines.pdf b/docs/manual/using_bsplines.pdf new file mode 100644 index 0000000..6ab9c4e Binary files /dev/null and b/docs/manual/using_bsplines.pdf differ diff --git a/docs/manual/using_bsplines.tex b/docs/manual/using_bsplines.tex new file mode 100644 index 0000000..fec665a --- /dev/null +++ b/docs/manual/using_bsplines.tex @@ -0,0 +1,366 @@ +% +% @file using_bsplines.tex +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +\documentclass[a4paper]{article} +\usepackage{linuxdoc-sgml} +\usepackage{graphicx} +\usepackage{hyperref} +\usepackage{amsmath} +%\usepackage{verbatim} +%\usepackage[notref]{showkeys} + +\title{\tt Using BSPLINES in Particle Codes} +\author{Trach-Minh Tran} +\date{v0.1, March 2012} +\abstract{These notes present some practical considerations on using + BSPLINES in particle codes, in particular for the charge or current + assignment as well as the field interpolation. Performance + measurements are done on an Intel Xeon X5570 and the more recent + Xeon E5-2680.} + +\begin{document} +\maketitle +%\tableofcontents + +\section{Introduction} +For simplicity, we assume in these notes that we are dealing with a +2D electrostatic particle code and the 2D Poisson equation is to be solved +using the Finite Element Method. Starting from the \emph{weak form} +and using the \emph{splines} for both \emph{basis} and \emph{test} +functions, the electrostatic field potential together with its +gradient and the right hand side can be computed from +\begin{equation} + \begin{split} + \phi(x,y) &= + \sum_{ij}\,c_{ij}\,\Lambda_i(x)\Lambda_j(y) \\ + \frac{\partial\phi}{\partial x} &= + \sum_{ij}\,c_{ij}\,\Lambda'_i(x)\Lambda_j(y) \\ + \frac{\partial\phi}{\partial y} &= + \sum_{ij}\,c_{ij}\,\Lambda_i(x)\Lambda'_j(y) \\ + S_{ij} &= \sum_{\mu=1}^{N_p}\, q_\mu\Lambda_i(x_\mu)\Lambda_j(y_\mu), + \end{split} +\end{equation} +where $c_{ij}$ are the solutions of the discretized Poisson equation +and $\{x_\mu,y_\mu\}$ are the coordinates of the $N_p$ simulation +particles. At each time step, the calculation of both the field $\phi$ +and its gradient (\emph{field interpolation}) for the particle +pusher and the construction of the RHS $S_{i}$ (\emph{charge + assignment}) involve thus the computation of a large number of +splines $\Lambda$ and its derivatives $\Lambda'$. + +Notice that the construction of the solver matrix requires also +the calculations of the splines. This operation is however performed only +once at the initial timestep in the (most common) case where the +matrix is time independent and thus will not be considered in further +these notes. + +\section{Computation of splines} +Let consider the grid defined by $x_i$, $i=1,\ldots,N+1$. Inside the +interval $[x_i, x_{i+1}]$, the $p+1$ non-zero splines of degree $p$ +can be computed efficiently using its polynomial representation given +by +\begin{equation} + \begin{split} + \Lambda_{i+\alpha}(x) &= \sum_{k=0}^{p}\, V^{i}_{k\alpha} + (x-x_i)^k, \qquad \alpha=1,\ldots,p+1, \\ + V^i_{k\alpha} &= + \left.\frac{1}{k!}\frac{d^k}{dx^k}\Lambda_{i+\alpha}(x)\right|_{x=x_i}. + \end{split} +\end{equation} +The $(p+1)^2N$ coefficients $V^i_{k\alpha}$ are precalculated and stored +during the spline initialization (in routine {\tt SET\_SPLINE}) by +using the \emph{recurrence relation} \cite{BSPLINES} to compute the spline and all its +$p$ derivatives. Note that for periodic splines on an equidistant +mesh, only $(p+1)^2$ coefficients $V_{k\alpha}$ are required since +the splines have \emph{translational invariance}. + +For a polynomial $P(x)=a_0+a_1x+\ldots +a_px^p$, its value can be +calculated together with it first derivative, using Horner's rule as: + +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + f = a(p) + fp = f + DO i=p-1,1,-1 + f = a(i) + x*f + fp = f + x*fp + END DO + f = a(0) + x*f +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} +showing that exactly $4p-2$ floating operations (flops) per point are +required. If only the value of the polynomial is needed, only $2p$ flops +per point are required. + +\section{Field interpolation} +\subsection{1D case} +Let considered first the 1D case. The spline expansion of $\phi$ for +$x_i\le x < x_{i+1}$ are expressed as +\begin{equation} + \phi(x) = \sum_{\alpha=0}^pc_{i+\alpha}\Lambda_{i+\alpha}(x). +\end{equation} +To calculate the field using this spline expansion, $p+1$ splines +have to be first calculated followed by the sum above, which +yields a total cost of $2(p+1)^2\sim 2p^2$ flops per point. This cost +can be reduced by observing that +$\phi(x)$ is a \emph{piecewise polynomial} (PP) of degree $p$ in +each interval. Its PP coefficients can be obtained from +\begin{equation} + \begin{split} + \phi(x) &= \sum_{\alpha=0}^pc_{i+\alpha}\sum_{k=0}^{p}\, V^{i}_{k\alpha} + (x-x_i)^k \\ + &= \sum_{k=0}^{p}\, \Pi^{i}_{k}(x-x_i)^k, \qquad + \Pi^{i}_{k}= \sum_{\alpha=0}^pc_{i+\alpha} V^{i}_{k\alpha} + \end{split} +\end{equation} +Once the $N(p+1)$ PP coefficients $\Pi^{i}_{k}$ have been calculated +from the spline expansion coefficients $c_{i+\alpha}$, only $2p$ flops per +point are required to obtain the field value, using the Horner's rule +described previously. + +\subsection{2D case} +Extension for the spline expansion and the PP representation for +$\phi(x,y)$ is straightforwards and yields, for $x_i\le x < x_{i+1}$, +$y_j\le y < y_{j+1}$: +\begin{equation} + \begin{split} + \phi(x,y) &= \sum_{\alpha=0}^{p1}\sum_{\beta=0}^{p2}c_{i+\alpha,j+\beta} + \Lambda_{i+\alpha}(x)\Lambda_{j+\beta}(y) \\ + \phi(x,y) &= \sum_{k=0}^{p1}\sum_{l=0}^{p2}\,\Pi^{ij}_{kl}(x-x_i)^k(y-y_j)^l, \qquad + \Pi^{ij}_{kl}= \sum_{\alpha=0}^{p1}\sum_{\beta=0}^{p2}c_{i+\alpha,j+\beta} V^{i}_{k\alpha}V^{j}_{l\beta}, + \end{split} +\end{equation} +where $ V^{i}_{k\alpha}$ and $V^{j}_{l\beta}$ are the PP +coefficients of the splines $\Lambda_{i+\alpha}(x)$ and +$\Lambda_{j+\beta}(y)$ respectively. Assuming the same spline order +$p$ in +both $x$ and $y$, the flop counts per point for the 2 representations are +respectively $2(3p+2)(p+1)\sim 6p^2$ and $2p(p+2)\sim 2p^2$, while the +storages required for the spline coefficients $c$ and the PP +coefficients $\Pi$ are $(N+p)^2\sim N^2$ and $N^2(p+1)^2$ respectively. + +\subsection{Implementation in BSPLINES} +The PP representation is selected by default in BSPLINES, +\emph{unless} the logical keyword {\tt NLPPFORM} is set to +{\tt .FALSE.} when calling the spline initialization routine {\tt + SET\_SPLINE}. The flop counts per point for both methods are +summarized in the table below +\begin{center} +\begin{tabular}{|l|c|c|} + \hline + & 1D & 2D \\\hline +Spline expansion & $2(p+1)^{2}$ & $2(3p+2)(p+1)$ \\ +PP representation & $2p$ & $2p(p+2)$ \\\hline +\end{tabular} +\end{center} +The routine {\tt GRIDVAL} computes the value of the +field or one of its derivatives. The first call to this routine +computes the PP coefficients $\Pi$ if {\tt NLPPFORM=.TRUE.} is +selected or just store the spline coefficients $c$ in the spline +internal data otherwise. In the following calls to {\tt GRIDVAL}, $c$ +should not be passed to the routines. + +Notice that the PP representation requires to store the $N^2(p+1)^2$ +PP coefficients in the 2D case, which is still acceptable. In the 3D +case, this storage requirement becomes $N^3(p+1)^3$ which can be +prohibitive! In this case the less efficient \emph{Spline expansion} +formulation should be selected. + +In the \emph{particle loop}, the routine {\tt GETGRAD} which computes +the function and all its first partial derivatives at once should be +called instead of {\tt GRIDVAL}. + +\section{Particle localization({\tt locintv})} +In both charge assignment and field interpolation, finding in which +interval of the spatial grid the particle is localized should be first +performed. This operation is trivial for the case of an equidistant +mesh. For non-equidistant mesh, an \emph{equidistant fine} mesh and its mapping to the +actual mesh are first constructed in the spline initialization routine +{\tt SET\_SPLINE} and used to localize the particles in the routine +{\tt LOCINTV}. + +\section{Performances} +From the considerations above, using BSPLINES to perform the charge +assignment and field interpolation in 2D and 3D particle codes might +result in large overheads because of the large number of calls to the +routines {\tt BASFUN} to compute the splines or {\tt GETGRAD} to perform the field +interpolation at a \emph{single} particle position. In the following, +the performances the 2D linearized gyrokinetic code GYGLES which has +been adapted to use BSPLINES are analyzed. Vectorization by grouping +the particles for both charge assignment and field interpolation is then +proposed as a way to speed up these two operations when using +BSPLINES. + +\subsection{Scalar performances} +Optimization of the scalar versions of {\tt BASFUN} and {\tt GETGRAD} +(when these routines are called with a \emph{single} particle) is +performed essentially by +\begin{itemize} +\item Minimizing the flop counts and reducing redundant operations. +\item Unrolling small loops, for example the loop over the $p+1$ + splines that are non-zero at a given position, for small $p$. +\item Define all routines called by {\tt BASFUN} and {\tt GETGRAD} as + \emph{internal procedures}. +\item Rearranging the memory layout of the multi-dimension array + containing the PP coefficients of the spline. +\end{itemize} + +The timings of the charge and current assignment (assign), the particle +pusher (push) and the main time loop for a 5 time step run of GYGLES, +on an Intel Xeon X5570 (hpcff.fz-juelich.de), using 4 MPI +processes and Intel Fortran 12.1.2 are summarized in the following +table + +\begin{center} +\begin{tabular}{lrrrrr} +\hline + & $T_0$(s) & $T_1$(s) & $T_2$(s) & $T_1/T_0$ & $T_2/T_1$ \\ +\hline + assign & 1.454E+01 & 2.126E+01 & 2.259E+01 & 1.46 & 1.06 \\ + push & 2.536E+01 & 3.080E+01 & 3.144E+01 & 1.21 & 1.02 \\ + mainloop & 4.197E+01 & 5.955E+01 & 6.149E+01 & 1.42 & 1.03 \\ +\hline +\end{tabular} +\end{center} + +where $T_0$ is the time in seconds obtained with the original code +while $T_1$ and $T_2$ are the times obtained with BSPLINES, +respectively using an \emph{equidistant} and \emph{non-equidistant} +radial mesh. In all the 3 runs, a quadratic splines were used. +The small difference between \emph{equidistant} and +\emph{non-equidistant} mesh comes mainly from the particle localization. + +The same run on an Intel Xeon E5-2680 (helios.iferc-csc.org), using +the same Intel compiler (with AVX instructions) yields + +\begin{center} +\begin{tabular}{lrrrrr} +\hline + & $T_0$(s) & $T_1$(s) & $T_2$(s) & $T_1/T_0$ & $T_2/T_1$ \\ +\hline + assign & 1.093E+01 & 1.987E+01 & 2.086E+01 & 1.82 & 1.05 \\ + push & 2.385E+01 & 2.868E+01 & 2.994E+01 & 1.20 & 1.04 \\ + mainloop & 3.656E+01 & 5.411E+01 & 5.598E+01 & 1.48 & 1.03 \\ +\hline +\end{tabular} +\end{center} + + +\subsection{Speed up by vectorization} +As found in the last section, using external routines from BSPLINES +instead of \emph{hard coding} the spline computations +results in a slowing down of 40--50\% for the main time +loop. As will shown later, this problem could be solved by \emph{grouping} the +particles and using the vectorized {\tt BASFUN} and {\tt GETGRAD} +routines. Such particle grouping can be done for example, by replacing the usual +particle loop by the following Fortran code fragment +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{verbatim} + nset = npart/ngroup + IF(MODULO(npt, ngroup).NE.0) nset = nset+1 + i2 = 0 + DO is=1,nset + i1 = i2+1 + i2 = MIN(i2+ngroup,npart) + CALL basfun(x(i1:i2), ...) + END DO +\end{verbatim} +\nopagebreak\hrule +\addvspace{\medskipamount} + +where {\tt npart} particles are partitioned into {\tt nset} groups, +each containing at most {\tt ngroup} particles. Vectorization of the +routines {\tt BASFUN} and {\tt GETGRAD} is achieved by moving whenever +is possible the loop over the {\tt ngroup} particles into the +innermost loop. + +The vectorization performances shown in Fig~.\ref{fig:basfun_hpcff} and +Fig~.\ref{fig:getgrad_hpcff}, respectively for {\tt BASFUN} and {\tt +GETGRAD} are obtained using version $12.1.2$ of Intel compiler on +an Intel Xeon X5570 (hpcff.fz-juelich.de). With a speedup of at least +2 for quadratic splines, the slowing down found previously in the +scalar version could be likely compensated. The new AVX instructions +present in the recent Intel Xeon E5-2680 (helios.iferc-csc.org) seems +to improve somewhat the vectorization performance as shown in +Fig~.\ref{fig:basfun_helios} and Fig~.\ref{fig:getgrad_helios}. + +\begin{figure} + \centering + \includegraphics[angle=0,width=\hsize]{basfun_perf_hpcff} + \caption{In this test, $10^5$ particles are distributed randomly on + an equidistant mesh of 64 intervals. On each point, all the $p+1$ + splines are computed. The particle localization routine {\tt + locintv} is included in the timing. In order to have a good + statistics in the measurements, $1'000$ iterations of the particle loop are considered.} + \label{fig:basfun_hpcff} +\end{figure} + +\begin{figure} + \centering + \includegraphics[angle=0,width=\hsize]{getgrad_perf_hpcff} + \caption{In this test, $10^5$ particles are distributed randomly on + an equidistant 2D $(x,y)$ mesh of $64\times 64$ intervals, where + the coordinate $y$ is periodic. On each point, the function + together with its gradient are computed, using the PP + representation. The particle localization routine {\tt + locintv} is included in the timing. In order to have a good + statistics in the measurements, $100$ iterations of the particle loop are considered.} + \label{fig:getgrad_hpcff} +\end{figure} + +\begin{figure} + \centering + \includegraphics[angle=0,width=\hsize]{basfun_perf_helios} + \caption{In this test, $10^5$ particles are distributed randomly on + an equidistant mesh of 64 intervals. On each point, all the $p+1$ + splines are computed. The particle localization routine {\tt + locintv} is included in the timing. In order to have a good + statistics in the measurements, $1'000$ iterations of the particle loop are considered.} + \label{fig:basfun_helios} +\end{figure} + +\begin{figure} + \centering + \includegraphics[angle=0,width=\hsize]{getgrad_perf_helios} + \caption{In this test, $10^5$ particles are distributed randomly on + an equidistant 2D $(x,y)$ mesh of $64\times 64$ intervals, where + the coordinate $y$ is periodic. On each point, the function + together with its gradient are computed, using the PP + representation. The particle localization routine {\tt + locintv} is included in the timing. In order to have a good + statistics in the measurements, $100$ iterations of the particle loop are considered.} + \label{fig:getgrad_helios} +\end{figure} + +\begin{thebibliography}{99} +\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide. +\end{thebibliography} + +\end{document} diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000..050fdcb --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,77 @@ +/** + * @file CMakeLists.txt + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + * @author Trach-Minh Tran + */ +project(bsplines_tests) + +add_library(local_util STATIC + pde1dp_mod.f90 + pde3d_mod.f90 + ppde3d_mod.f90 + ppde3d_pb_mod.f90 + tcdsmat_mod.f90 + meshdist.f90 + dismat.f90 + ibcmat.f90 + disrhs.f90 +) + +target_link_libraries(local_util PUBLIC bsplines) + +set(BS_TESTS + driv1 driv2 driv3 driv4 + pde1d pde1dp pde1dp_cmpl + pde2d pde2d_pb + pde1dp_cmpl_dft + pde3d ppde3d ppde3d_pb + fit1d fit1dbc fit1dp + fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y + moments optim1 optim2 optim3 + tcdsmat tmassmat tbasfun tsparse1 + basfun_perf getgrad_perf gridval_perf + test_kron + ) + +if(HAS_PARDISO) + set(BS_TESTS ${BS_TESTS} + pde1dp_cmpl_pardiso + pde2d_pardiso + pde2d_sym_pardiso + pde2d_sym_pardiso_dft + tsparse2 + ) +endif() + +if(HAS_MUMPS) + set(BS_TESTS ${BS_TESTS} + pde2d_mumps + pde1dp_cmpl_mumps + ) +endif() + +foreach(test ${BS_TESTS}) + add_executable(${test} ${test}.f90) + target_link_libraries(${test} local_util ${LIBS} ${EXTRA_LIBS}) +endforeach() diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 0000000..552d03f --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,506 @@ +# +# @file Makefile +# +# @brief +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Emmanuel Lanti +# @author Trach-Minh Tran +# +PREFIX=$(HOME) +XGRAFIX=/usr/local/xgrafix_1.2/src-double +# FUTILS=/usr/local/crpp/futils +# BSPLINES=/usr/local/crpp/bsplines +# PPUTILS2=../pputils2 +# PPPACK=../pppack +# SLATEC=/usr/local/slatec +# FFTW=/usr/local/fftw-2.1.5-opt +ARPACK=/usr/local/ARPACK +LAPACK95=$(MKL)/../../../mkl/include/intel64/lp64 + +MPIF90 = mpif90 +F90 = ifort +LD = $(MPIF90) + +debug = -g -traceback -check bounds -warn alignments -warn unused +optim = -O3 -xHOST + +OPT=$(debug) +#OPT=$(optim) + +F90FLAGS = $(OPT) -fPIC -I../fft -I$(BSPLINES)/include -I$(FUTILS)/include \ + -I$(FFTW)/include -I$(MKL)/../../include/intel64/lp64 +LDFLAGS = $(OPT) -fPIC -L$(FUTILS)/lib -L$(BSPLINES)/lib -L${HDF5}/lib -L$(FFTW)/lib64 \ + -L$(SLATEC)/lib -L$(ARPACK) + +CC = cc +CFLAGS = -O2 + +LIBS = -mkl=cluster -lbsplines -lpppack -lpppack -lpputils2 -lfutils -lfft -larpack \ + -lfftw -lhdf5_fortran -lhdf5 -lz -lpthread + +LIBS1 = -mkl=cluster -lbsplines1 -lpppack -lfutils \ + -lhdf5_fortran -lhdf5 -lz -lsz -lpthread + +ifdef WSMP +LDFLAGS += -L$(WSMP) +LIBS += -lwsmp64 +LIBS1 += -lpwsmp64 +endif + +ifdef MUMPS +F90FLAGS += -I$(MUMPS)/include -I$(LAPACK95) +LDFLAGS += -L$(MUMPS)/lib -L$(PARMETIS)/lib +LIBS += $(MUMPSLIBS) +endif + +ifdef PETSC_DIR +include ${PETSC_DIR}/conf/variables +F90FLAGS += -I$(PETSC_DIR)/include -I$(PETSC_DIR)/$(PETSC_ARCH)/include \ + -I$(MKL)/../../include +LIBS += ${PETSC_FORTRAN_LIB} ${PETSC_KSP_LIB} +endif + +PDE1DOBJS = pde1d.o +PDE2DOBJS = pde2d.o dismat.o disrhs.o ibcmat.o +FIT1DOBJJS = fit1d.o + +.SUFFIXES: +.SUFFIXES: .o .c .f90 .f + +.f90.o: + $(MPIF90) $(F90FLAGS) -c $< +.f.o: + $(F90) $(F90FLAGS) -c $< + +all: examples tmat + +EX_FILES = driv1 driv2 driv3 driv4 pde1d pde1dp pde1dp_cmpl pde3d ppde3d ppde3d_pb \ + pde2d pde2d_pb fit1d fit1dbc \ + fit1dp fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y \ + moments optim1 optim2 optim3 tmassmat tbasfun tcdsmat tsparse1 tsparse2 \ + pde2d_pardiso pde2d_sym_pardiso pde1dp_cmpl_pardiso pde1dp_cmpl_dft \ + pde2d_sym_pardiso_dft \ + pde1d_eig_csr pde1d_eig_gb pde1d_eig_ge + +ifdef WSMP +EX_FILES += pde2d_wsmp pde2d_pwsmp pde2d_sym_wsmp pde1dp_cmpl_wsmp +endif + +ifdef MUMPS +EX_FILES += pde2d_mumps pde1dp_cmpl_mumps pde1d_eig_zmumps +endif + +ifdef PETSC_DIR +EX_FILES += pde2d_petsc +endif + +examples: $(EX_FILES) + +tmat: tmatrix_gb tmatrix_pb tmatrix_zpb + +adv: adv.o extra.o + $(LD) $(LDFLAGS) -L$(XGRAFIX) -o $@ $< extra.o $(LIBS) \ + -lfftw -lXGF -lXGC -lX11 + cp -p $@ ../bin/ + +driv1: driv1.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +driv2: driv2.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +driv3: driv3.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +driv4: driv4.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde1d: $(PDE1DOBJS) + $(LD) $(LDFLAGS) -o $@ $(PDE1DOBJS) $(LIBS) + cp -p $@ ../bin/ + +pde1d_eig_csr: pde1d_eig_csr.o + $(LD) $(LDFLAGS) -o $@ pde1d_eig_csr.o $(LIBS) -lpputils2 + cp -p $@ ../bin/ + +pde1d_eig_zcsr: pde1d_eig_zcsr.o + $(LD) $(LDFLAGS) -o $@ pde1d_eig_zcsr.o $(LIBS) + cp -p $@ ../bin/ + +pde1d_eig_zmumps: pde1d_eig_zmumps.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) -lpputils2 + cp -p $@ ../bin/ + +pde1d_eig_zcsr: pde1d_eig_zcsr.o + $(LD) $(LDFLAGS) -o $@ pde1d_eig_zcsr.o $(LIBS) + cp -p $@ ../bin/ + +pde1d_eig_gb: pde1d_eig_gb.o + $(LD) $(LDFLAGS) -o $@ pde1d_eig_gb.o $(LIBS) + cp -p $@ ../bin/ + +pde1d_eig_ge: pde1d_eig_ge.o + $(LD) $(LDFLAGS) -o $@ pde1d_eig_ge.o $(LIBS) + cp -p $@ ../bin/ + +pde1dp: pde1dp.o pde1dp_mod.o + $(LD) $(LDFLAGS) -o $@ pde1dp.o pde1dp_mod.o $(LIBS) + cp -p $@ ../bin/ + +pde1dp_cmpl: pde1dp_cmpl.o + $(LD) $(LDFLAGS) -o $@ pde1dp_cmpl.o $(LIBS) + cp -p $@ ../bin/ + +pde1dp_KA: pde1dp_KA.o pde1dp_mod_KA.o + $(LD) $(LDFLAGS) -o $@ pde1dp_KA.o pde1dp_mod_KA.o $(LIBS) + cp -p $@ ../bin/ + +pde2d: $(PDE2DOBJS) + $(LD) $(LDFLAGS) -o $@ $(PDE2DOBJS) $(LIBS) + cp -p $@ ../bin/ + +pde3d: pde3d.o pde3d_mod.o + $(LD) $(LDFLAGS) -o $@ pde3d.o pde3d_mod.o $(LIBS) + cp -p $@ ../bin/ + +ppde3d: ppde3d.o ppde3d_mod.o + $(LD) $(LDFLAGS) -o $@ ppde3d.o ppde3d_mod.o $(LIBS) + cp -p $@ ../bin/ + +ppde3d_pb: ppde3d_pb.o ppde3d_pb_mod.o + $(LD) $(LDFLAGS) -o $@ ppde3d_pb.o ppde3d_pb_mod.o $(LIBS) + cp -p $@ ../bin/ + +pde2d_pb: pde2d_pb.o + $(LD) $(LDFLAGS) -o $@ pde2d_pb.o $(LIBS) + cp -p $@ ../bin/ + +pde2d_nh: pde2d_nh.o + $(LD) $(LDFLAGS) -o $@ pde2d_nh.o $(LIBS) + cp -p $@ ../bin/ + +tcdsmat: tcdsmat.o tcdsmat_mod.o meshdist.o + $(LD) $(LDFLAGS) -o $@ tcdsmat.o tcdsmat_mod.o meshdist.o $(LIBS) + cp -p $@ ../bin/ + +tmatrix_pb: tmatrix_pb.o + $(LD) $(LDFLAGS) -o $@ tmatrix_pb.o $(LIBS) + cp -p $@ ../bin/ + +tmatrix_zpb: tmatrix_zpb.o + $(LD) $(LDFLAGS) -o $@ tmatrix_zpb.o $(LIBS) + cp -p $@ ../bin/ + +tmatrix_gb: tmatrix_gb.o + $(LD) $(LDFLAGS) -o $@ tmatrix_gb.o $(LIBS) + cp -p $@ ../bin/ + +fit1d: $(FIT1DOBJJS) + $(LD) $(LDFLAGS) -o $@ $(FIT1DOBJJS) $(LIBS) + cp -p $@ ../bin/ + +fit1d_cmpl: fit1d_cmpl.o + $(LD) $(LDFLAGS) -o $@ fit1d_cmpl.o $(LIBS) + cp -p $@ ../bin/ + +gyro: gyro.o + $(LD) $(LDFLAGS) -o $@ gyro.o -lslatec $(LIBS) + cp -p $@ ../bin/ + +fit1dbc: fit1dbc.o + $(LD) $(LDFLAGS) -o $@ fit1dbc.o $(LIBS) + cp -p $@ ../bin/ + +fit1dp: fit1dp.o + $(LD) $(LDFLAGS) -o $@ fit1dp.o $(LIBS) + cp -p $@ ../bin/ + +fit2d: fit2d.o meshdist.o + $(LD) $(LDFLAGS) -o $@ fit2d.o meshdist.o $(LIBS) + cp -p $@ ../bin/ + +fit2d1d: fit2d1d.o meshdist.o + $(LD) $(LDFLAGS) -o $@ fit2d1d.o meshdist.o $(LIBS) + cp -p $@ ../bin/ + +fit2d_cmpl: fit2d_cmpl.o meshdist.o + $(LD) $(LDFLAGS) -o $@ fit2d_cmpl.o meshdist.o $(LIBS) + cp -p $@ ../bin/ + +fit2dbc: fit2dbc.o meshdist.o + $(LD) $(LDFLAGS) -o $@ fit2dbc.o meshdist.o $(LIBS) + cp -p $@ ../bin/ + +fit2dbc_x: fit2dbc_x.o meshdist.o + $(LD) $(LDFLAGS) -o $@ fit2dbc_x.o meshdist.o $(LIBS) + cp -p $@ ../bin/ + +fit2dbc_y: fit2dbc_y.o meshdist.o + $(LD) $(LDFLAGS) -o $@ fit2dbc_y.o meshdist.o $(LIBS) + cp -p $@ ../bin/ + +moments: moments.o + $(LD) $(LDFLAGS) -o $@ moments.o $(LIBS) + cp -p $@ ../bin/ + +mesh: mesh.o meshdist.o + $(LD) $(LDFLAGS) -o $@ mesh.o meshdist.o $(LIBS) + cp -p $@ ../bin/ + +optim1: optim1.o + $(LD) $(LDFLAGS) -o $@ optim1.o $(LIBS) + cp -p $@ ../bin/ + +optim2: optim2.o + $(LD) $(LDFLAGS) -o $@ optim2.o $(LIBS) + cp -p $@ ../bin/ + +optim3: optim3.o + $(LD) $(LDFLAGS) -o $@ optim3.o $(LIBS) + cp -p $@ ../bin/ + +tmassmat: tmassmat.o + $(LD) $(LDFLAGS) -o $@ tmassmat.o $(LIBS) + cp -p $@ ../bin/ + +tbasfun: tbasfun.o + $(LD) $(LDFLAGS) -o $@ tbasfun.o $(LIBS) + cp -p $@ ../bin/ + +basfun_perf: basfun_perf.o + $(LD) $(LDFLAGS) -o $@ basfun_perf.o $(LIBS) + cp -p $@ ../bin/ + +gridval_perf: gridval_perf.o + $(LD) $(LDFLAGS) -o $@ gridval_perf.o $(LIBS) + cp -p $@ ../bin/ + +getgrad_perf: getgrad_perf.o + $(LD) $(LDFLAGS) -o $@ getgrad_perf.o $(LIBS) + cp -p $@ ../bin/ + +basfun_perf1: basfun_perf1.o + $(LD) $(LDFLAGS) -o $@ basfun_perf1.o $(LIBS) + cp -p $@ ../bin/ + +tlocintv: tlocintv.o meshdist.o + $(LD) $(LDFLAGS) -o $@ tlocintv.o meshdist.o $(LIBS) + cp -p $@ ../bin/ + +tgausleg: tgausleg.o meshdist.o + $(LD) $(LDFLAGS) -o $@ tgausleg.o meshdist.o $(LIBS) + cp -p $@ ../bin/ + +poisson: poisson.o + $(LD) $(LDFLAGS) -o $@ poisson.o $(LIBS) + cp -p $@ ../bin/ + +poisson_mumps: poisson_mumps.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +poisson_petsc: poisson_petsc.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +tsparse1: tsparse1.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +tsparse2: tsparse2.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde2d_pardiso: pde2d_pardiso.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde2d_mumps: pde2d_mumps.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde2d_petsc: pde2d_petsc.o $(PPUTILS2)/pputils2.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde2d_sym_pardiso: pde2d_sym_pardiso.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde2d_sym_pardiso_dft: pde2d_sym_pardiso_dft.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde2d_sym_wsmp_dft: pde2d_sym_wsmp_dft.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +zssmp_ex1: zssmp_ex1.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +zpardiso_ex1: zpardiso_ex1.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde2d_sym_wsmp: pde2d_sym_wsmp.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde2d_wsmp: pde2d_wsmp.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde2d_pwsmp: pde2d_pwsmp.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS1) + cp -p $@ ../bin/ + +pde1dp_cmpl_pardiso: pde1dp_cmpl_pardiso.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde1dp_cmpl_mumps: pde1dp_cmpl_mumps.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde1dp_cmpl_dft: pde1dp_cmpl_dft.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +pde1dp_cmpl_wsmp: pde1dp_cmpl_wsmp.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +tspline: tspline.o + $(LD) $(LDFLAGS) -o $@ tspline.o $(LIBS) + cp -p $@ ../bin/ + +tpsum_mat: tpsum_mat.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +tp2p_mat: tp2p_mat.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + cp -p $@ ../bin/ + +test_pwsmp: test_pwsmp.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS1) + cp -p $@ ../bin/ + +driv1.o: +driv2.o: +driv3.o: +driv4.o: +pde1d.o: +pde1dp.o: pde1dp_mod.o +pde1dp_cmpl.o: +pde1dp_mod.o: +pde1dp_KA.o: pde1dp_mod_KA.o +pde1dp_mod_KA.o: +pde2d.o: +pde3d.o: pde3d_mod.o +pde3d_mod.o: +ppde3d.o: ppde3d_mod.o +ppde3d_mod.o: +ppde3d_pb.o: ppde3d_pb_mod.o +ppde3d_pb_mod.o: +pde2d_pb.o: +pde2d_nh.o: +tcdsmat.o: tcdsmat_mod.o +tcdsmat_mod.o: +fit1d.o: +fit1d_cmpl.o: +gyro.o: +fit1dbc.o: +fit1dp.o: +fit2d.o: +fit2d_cmpl.o: +fit2dbc.o: +dismat.o: +disrhs.o: +ibcmat.o: +adv.o: +tmatrix_pb.o: +tmatrix_zpb.o: +tmatrix_gb.o: +moments.o: +mesh.o: +optim1.o: +optim2.o: +optim3.o: +tmassmat.o: +tbasfun.o: +basfun_perf.o: +basfun_perf1.o: +tlocintv.o: +tgausleg.o: +poisson.o: +poisson_mumps.o: +tsparse1.o: +tsparse2.o: +pde2d_pardiso.o: +pde2d_mumps.o: +pde2d_petsc.o: +pde2d_sym_pardiso.o: +pde2d_sym_pardiso_dft.o: +pde2d_sym_wsmp_dft.o: +pde2d_wsmp.o: +pde2d_pwsmp.o: +pde2d_sym_wsmp.o: +pde1dp_cmpl_pardiso.o: +pde1dp_cmpl_mumps.o: +pde1dp_cmpl_dft.o: +pde1dp_cmpl_wsmp.o: +tpsum_mat.o: +tp2p_mat.o: +poisson_petsc.o: + +tags: + etags *.f *.f90 ../src/*.f90 $(PPPACK)/*.f90 + +clean: + rm -f *.o *.mod *~ a.out + +distclean: clean +# $(MAKE) -C ../src distclean +# $(MAKE) -C ../fft distclean +# $(MAKE) -C $(PPUTILS2) distclean + rm -f *.a *.mod pde1d pde1dp pde1dp_cmpl pde1dp_KA driv1 driv2 driv3 driv4 \ + tmatrix_pb tmatrix_gb tmatrix_zpb \ + pde2d pde2d_pb pde2d_nh pde3d ppde3d ppde3d_pb\ + fit1d fit1d_cmpl gyro fit1dbc fit1dp \ + fit2d fit2d1d fit2d_cmpl fit2dbc \ + fit2dbc_x fit2dbc_y adv moments tcdsmat poisson poisson_mumps\ + mesh optim1 optim2 optim3 tmassmat tbasfun \ + basfun_perf gridval_perf getgrad_perf tlocintv \ + tsparse1 tsparse2 \ + pde2d_pardiso pde2d_sym_pardiso pde2d_wsmp pde2d_sym_wsmp \ + pde1dp_cmpl_dft pde1dp_cmpl_wsmp pde1d_eig_csr pde1d_eig_pb pde1d_eig_ge \ + pde2d_sym_pardiso_dft pde1dp_cmpl_pardiso \ + pde2d_mumps pde1dp_cmpl_mumps tpsum_mat tp2p_mat pde2d_sym_wsmp_dft \ + poisson_petsc pde2d_petsc \ + pde1d_eig_csr pde1d_eig_zcsr pde1d_eig_zmumps pde1d_eig_gb pde1d_eig.ge \ + ../bin/* + diff --git a/examples/adv.f90 b/examples/adv.f90 new file mode 100644 index 0000000..8c40a29 --- /dev/null +++ b/examples/adv.f90 @@ -0,0 +1,320 @@ +!> +!> @file adv.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! 1d Periodic Advection: F(x,t) = F(x-u*dt,t-dt) +! using module bsplines +! + USE bsplines +! + IMPLICIT NONE + INCLUDE 'fftw_f77.h' + TYPE(spline1d) :: spl + INTEGER, PARAMETER :: nhistmx=1000, ncomb=4 + INTEGER :: nx, nidbas, dim + INTEGER :: nstep, nskipt, nhist, mhist + DOUBLE PRECISION :: a, b, dt, u, w, coefx(5) + DOUBLE PRECISION, DIMENSION(0:nhistmx) :: thist, tmass, tfmin, tfmax, ermass + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:), xshft(:) + DOUBLE PRECISION, ALLOCATABLE :: coefs(:), ferr(:), kx(:), ampl(:) + DOUBLE COMPLEX, ALLOCATABLE :: cfgrid(:), ffft(:) + INTEGER(8) :: forw + DOUBLE PRECISION :: time + INTEGER :: i + NAMELIST /newrun/ nx, nidbas, a, b, dt, u, w, coefx +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 100 ! Number of intevals in x + a = 0.0 ! Left boundary of interval + b = 100.0 ! Right boundary of interval + dt = 0.1 ! Time step + u = 1.0 ! Velocity + w = 2.0 ! Shape of initial function + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +!=========================================================================== +! 2.0 Define initial conditions +! +! Set up mesh +! + ALLOCATE(xgrid(0:nx), xshft(0:nx), fgrid(0:nx), ferr(0:nx)) + xgrid(0) = a + xgrid(nx) = b + CALL meshdist(coefx, xgrid, nx) +! +! Set up the spline interpolation +! + CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.) + CALL get_dim(spl, dim) + WRITE(*,'(a,i6)') 'dimension of splines', dim + ALLOCATE(coefs(dim)) +! +! Initial conditions +! + time = 0.0d0 + nstep = 0 + nskipt = 1 + DO i=0,nx-1 + fgrid(i) = finit(xgrid(i)) + END DO + fgrid(nx) = fgrid(0) + ferr = 0.0 + CALL get_splcoef(spl, fgrid, coefs) + WRITE(*,'(a/(10f8.3))') 'knots', spl%knots + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid + WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid + WRITE(*,'(a/(10f8.3))') 'coefs', coefs +! +! Set up FFT +! + ALLOCATE(kx(-nx/2+1:nx/2), ampl(-nx/2+1:nx/2)) + ALLOCATE(cfgrid(0:nx-1), ffft(0:nx-1)) + DO i=-nx/2+1,nx/2 + kx(i) = i + END DO + cfgrid(0:nx-1) = fgrid(0:nx-1) + CALL fftw_f77_create_plan(forw, nx, FFTW_FORWARD, FFTW_ESTIMATE) + CALL fftw_f77_one(forw, cfgrid, ffft) + ampl(0:nx/2) = ABS(ffft(0:nx/2)) + ampl(-nx/2+1:-1) = ABS(ffft(nx/2+1:nx-1)) +! +! Set up history arrays +! + nhist = 0 + thist(nhist) = 0.0 +!!$ tmass(nhist) = SUM(coefs(1:nx)) + tmass(nhist) = SUM(fgrid(1:nx)) + tfmin(nhist) = MINVAL(fgrid) + tfmax(nhist) = MAXVAL(fgrid) + ermass(nhist) = tmass(nhist)-tmass(0) + WRITE(*,'(a,(10f8.3))') 'Initial mass', tmass(nhist) +! +! Initialize Xgrafix +! + CALL xginit(3,'ADV','adv',' ',' ',' ',' ',time) +! + CALL xgset2d('linlin', 'X', 'F', 'open', 220, 60, 1.d0, 1.d0, & + & .FALSE., .FALSE., xgrid(0), xgrid(nx), -0.2d0, 1.2d0) + CALL xgcurve(xgrid, fgrid, nx+1, 1) +! + CALL xgset2d('linlin', 'X', 'FERR', 'open', 620, 60, 1.d0, 1.d0, & + & .FALSE., .FALSE., xgrid(0), xgrid(nx), -1.d0, 1.d0) + CALL xgcurve(xgrid, ferr, nx+1, 1) +! + CALL xgset2d('linlin', 'Time', 'Error Mass', 'open', 820, 400, 1.d0, 1.d0, & + & .TRUE., .TRUE., 0.d0, 1.d0, 0.d0, 1.d0) + CALL xgcurve(thist, ermass, nhist, 1) +! + CALL xgset2d('linlin', 'Time', 'Min/Max', 'open', 420, 400, 1.d0, 1.d0, & + & .TRUE., .TRUE., 0.d0, 1.d0, 0.d0, 1.d0) + CALL xgcurve(thist, tfmin, nhist, 1) + CALL xgcurve(thist, tfmax, nhist, 2) +! + CALL xgset2d('linlin', 'kx', 'Amplitude of F', 'open', 20, 400, 1.d0, 1.d0, & + & .FALSE., .FALSE., kx(-nx/2+1), kx(nx/2), & + & 0.0d0, MAXVAL(ampl)) + CALL xgcurve(kx, ampl, nx, 1) +! + CALL xgupdate +!=========================================================================== +! 3.0 Time loop +! + nskipt = 1 + DO + nstep = nstep+1 + time = time+dt + CALL xgevent +! +! Shift x +! + CALL get_splcoef(spl, fgrid, coefs) + xshft(0:nx) = xgrid(0:nx) - u*dt + CALL gridval(spl, xshft, fgrid, 0, coefs) +! + xshft(0:nx) = xgrid(0:nx) - u*time + DO i =0,nx + ferr(i) = fgrid(i) - finit(xshft(i)) + END DO +! + cfgrid(0:nx-1) = fgrid(0:nx-1) + CALL fftw_f77_one(forw, cfgrid, ffft) + ampl(0:nx/2) = ABS(ffft(0:nx/2)) + ampl(-nx/2+1:-1) = ABS(ffft(nx/2+1:nx-1)) +! +! Diagnostics +! + IF( MOD(nstep,nskipt) .EQ. 0 ) THEN + nhist = nhist+1 + IF( nhist .GT. nhistmx ) THEN + nskipt = ncomb*nskipt + mhist = nhist-1 + CALL packarr(mhist, thist, ncomb, nhist) + CALL packarr(mhist, tmass, ncomb, nhist) + CALL packarr(mhist, tfmin, ncomb, nhist) + CALL packarr(mhist, tfmax, ncomb, nhist) + CALL packarr(mhist, ermass, ncomb, nhist) + END IF + thist(nhist) = time + tmass(nhist) = SUM(fgrid(1:nx)) +!!$ tmass(nhist) = SUM(coefs(1:nx)) + tfmin(nhist) = MINVAL(fgrid) + tfmax(nhist) = MAXVAL(fgrid) + ermass(nhist) = (tmass(nhist)-tmass(0))/tmass(0) + END IF +! + CALL xgupdate + END DO +!=========================================================================== +! 9.0 Prologue +! + CALL fftw_f77_destroy_plan(forw) + DEALLOCATE(xgrid, fgrid, xshft, coefs) +CONTAINS + DOUBLE PRECISION FUNCTION finit(xx) +! +! A "box" function +! + DOUBLE PRECISION, INTENT(in) :: xx + DOUBLE PRECISION :: xl, xr, xl0, xr0, h, x, xlen + INTEGER :: kl, kr, klflag, krflag +! + xlen = b-a + x = a + MODULO(xx-a+xlen, xlen) +! + xl = 0.375*(b-a) + xr = 0.624*(b-a) + CALL interv(xgrid, nx+1, xl, kl, klflag) + CALL interv(xgrid, nx+1, xr, kr, krflag) + xl0 = xl + w*(xgrid(kl)-xgrid(kl-1)) + xr0 = xr - w*(xgrid(kr)-xgrid(kr-1)) + CALL interv(xgrid, nx+1, xl0, kl, klflag) + CALL interv(xgrid, nx+1, xr0, kr, krflag) + IF( x .LT. xl0 ) THEN + h = xgrid(kl)-xgrid(kl-1) + finit = EXP(-((x-xl0)/(w*h))**2) + ELSE IF( x .GT. xr0) THEN + h = xgrid(kr)-xgrid(kr-1) + finit = EXP(-((x-xr0)/(w*h))**2) + ELSE + finit = 1.0d0 + END IF + END FUNCTION finit + +END PROGRAM main +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ + +SUBROUTINE packarr(n, arr, skip, nhist) + IMPLICIT NONE + INTEGER :: n, skip, i, ii, nhist + DOUBLE PRECISION :: arr(0:n) + ii = 0 + DO i=0,n,skip + arr(ii) = arr(i) + ii=ii+1 + END DO + nhist = ii +END SUBROUTINE packarr +!+++ + +SUBROUTINE dump(filename, l) +! +! Is invoked when button "Dump" is pressed. +! + IMPLICIT NONE + CHARACTER(len=*) :: filename + INTEGER :: l + WRITE(*,'(a,a,a1)') 'Dumpfile = "', filename(1:l),'"' +END SUBROUTINE dump + +SUBROUTINE quit() +! +! Is invoked when button "Quit" is pressed +! + IMPLICIT NONE + PRINT*, 'Program terminated ...' +END SUBROUTINE quit diff --git a/examples/basfun_perf.f90 b/examples/basfun_perf.f90 new file mode 100644 index 0000000..7cab71f --- /dev/null +++ b/examples/basfun_perf.f90 @@ -0,0 +1,170 @@ +!> +!> @file basfun_perf.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Performance of scalar and vector versions of def_basfun +! + USE bsplines + IMPLICIT NONE + INTEGER :: nx, nidbas, nrank, npt=10, jdermx + DOUBLE PRECISION :: dx + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid + DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fun(:, :) + INTEGER :: left, i, i1, i2 + INTEGER :: ngroup, nset, nremain + TYPE(spline1d) :: splx + DOUBLE PRECISION :: t0, t1, seconds + DOUBLE PRECISION :: t_loop, t_locintv1, t_basfun1, t_locintv, t_basfun + INTEGER :: its, nits + INTEGER, ALLOCATABLE :: vleft(:) + DOUBLE PRECISION, ALLOCATABLE :: vfun(:,:,:) + LOGICAL :: nlperiod +! + NAMELIST /newrun/ nx, nidbas, npt, nits, ngroup, jdermx, nlperiod +! +!=============================================================================== +! +! 1D grid +! + nx = 10 + nidbas = 3 + npt = 1000000 + nits = 100 + ngroup = 10 + jdermx = 0 + nlperiod = .FALSE. + READ(*,newrun) + WRITE(*,newrun) + + ALLOCATE(xgrid(0:nx)) + dx = 1.0d0/REAL(nx) + xgrid = (/ (i*dx,i=0,nx) /) + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid +! +! Set up spline +! + CALL set_spline(nidbas, 4, xgrid, splx, period=nlperiod) + nrank = splx%dim + WRITE(*,'(a, i5)') 'nrank =', nrank + WRITE(*,'(a/(10f8.3))') 'knots', splx%knots +! + ALLOCATE(xpt(npt)) + ALLOCATE(fun(0:nidbas,0:jdermx)) ! Values and first derivatives of all Splines + CALL RANDOM_NUMBER(xpt) +!=============================================================================== +! 1.0 Scalar version +! +! loop + t0 = seconds() + DO its=1,nits + DO i=1,npt + END DO + END DO + t_loop = (seconds()-t0)/REAL(nits*npt,8) +! +! locintv + t0 = seconds() + DO its=1,nits + DO i=1,npt + CALL locintv(splx, xpt(i), left) + END DO + END DO + t_locintv1 = (seconds()-t0)/REAL(nits*npt,8) +! +! def_basfun + t0 = seconds() + DO its=1,nits + DO i=1,npt + CALL locintv(splx, xpt(i), left) + CALL basfun(xpt(i), splx, fun, left+1) + END DO + END DO + t_basfun1 = (seconds()-t0)/REAL(nits*npt,8) +! + WRITE(*,'(6x,3a12)') 'loop', 'locintv', 'basfun' + WRITE(*,'(6x,8(1pe12.3))') t_loop, t_locintv1, t_basfun1 +!=============================================================================== +! 2.0 Vector version +! + ngroup = 1 + DO WHILE (ngroup .LT. npt/2) + ALLOCATE(vleft(ngroup)) + ALLOCATE(vfun(0:nidbas, 0:jdermx, ngroup)) + nset = npt/ngroup + nremain = MODULO(npt, ngroup) + IF(nremain.NE.0) nset = nset+1 +! +! loop + t0 = seconds() + DO its=1,nits + i2 = 0 + DO i=1,nset + i1 = i2+1 + i2 = MIN(i2+ngroup,npt) + END DO + END DO + t_loop = (seconds()-t0)/REAL(nits*nset,8) +! +! locintv + t0 = seconds() + DO its=1,nits + i2 = 0 + DO i=1,nset + i1 = i2+1 + i2 = MIN(i2+ngroup,npt) + CALL locintv(splx, xpt(i1:i2), vleft) + END DO + END DO + t_locintv = (seconds()-t0)/REAL(nits*npt,8) +! +! basfun + t0 = seconds() + DO its=1,nits + i2 = 0 + DO i=1,nset + i1 = i2+1 + i2 = MIN(i2+ngroup,npt) + CALL locintv(splx, xpt(i1:i2), vleft) + CALL basfun(xpt(i1:i2), splx, vfun, vleft+1) + END DO + END DO + t_basfun = (seconds()-t0)/REAL(nits*npt,8) +! + WRITE(*,'(i6,8(1pe12.3))') ngroup, t_loop, t_locintv, t_basfun, & + & t_locintv1/t_locintv, t_basfun1/t_basfun + DEALLOCATE(vleft) + DEALLOCATE(vfun) + ngroup = ngroup*2 + END DO +!=============================================================================== +! +! Clean up +! + CALL destroy_sp(splx) + DEALLOCATE(xgrid) + DEALLOCATE(xpt) + DEALLOCATE(fun) +END PROGRAM main diff --git a/examples/dirichlet/Makefile b/examples/dirichlet/Makefile new file mode 100644 index 0000000..4f0edc9 --- /dev/null +++ b/examples/dirichlet/Makefile @@ -0,0 +1,62 @@ +# +# @file Makefile +# +# @brief +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Emmanuel Lanti +# @author Trach-Minh Tran +# +#BSPLINES = $(HOME)/bsplines +#FUTILS = $(HOME)/futils + +F90 = mpif90 +LD = $(F90) + +debug = -g -traceback -check bounds -warn alignments -warn nounused +optim = -O3 -xHOST + +F90FLAGS = $(OPT) -I$(BSPLINES)/include -I$(FUTILS)/include +LDFLAGS = $(OPT) -L$(BSPLINES)/lib -L$(FUTILS)/lib -L$(HDF5)/lib +LIBS = -lbsplines -lpppack -lfutils -lhdf5_fortran -lhdf5 -lz + +LDFLAGS += -g -L$(MKL) +LIBS += -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread + +OPT=$(debug) +#OPT=$(optim) + +.SUFFIXES: +.SUFFIXES: .o .c .f90 +.f90.o: + $(F90) $(F90FLAGS) -c $< + +all: poisson + +poisson: poisson.o poisson_mod.o + $(LD) $(LDFLAGS) -o $@ $^ $(LIBS) + +poisson.o: poisson_mod.o + +clean: + rm -f *.o *.mod + +distclean: clean + rm -f poisson a.out *~ *.h5 *.fig *.eps *.pdf diff --git a/examples/dirichlet/poisson.f90 b/examples/dirichlet/poisson.f90 new file mode 100644 index 0000000..fa96f4a --- /dev/null +++ b/examples/dirichlet/poisson.f90 @@ -0,0 +1,383 @@ +!> +!> @file poisson.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 2d Poisson in cylibdrical coordinates, using splines: +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 0, with f(x=1,y) = cos(my) +! exact solution: f(x,y) = r^m cos(my) +! + USE bsplines + USE matrix + USE conmat_mod + USE poisson_mod + USE futils +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, dirmeth, nterms + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + TYPE(spline2d) :: splxy + TYPE(pbmat) :: mat +! + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2, shifty + DOUBLE PRECISION :: err00, err10, err01 + INTEGER :: nits=500 +! + CHARACTER(len=128) :: file='poisson.h5' + INTEGER :: fid +! +! Dirichlet BC properties encapsulated in a derived datatype +! + TYPE(dirich) :: right_bc +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, dirmeth, & + & coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + dirmeth = 1 ! 1: use spline interpolation in Dirichlet BC + ! 2: residual minimization in Dirichlet BC + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals + ku = kl ! Number of super-diagnonals + WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku +! + CALL init(ku, nrank, nterms, mat) + t0 = seconds() + CALL conmat(splxy, mat, coefeq) + tmat = seconds() - t0 + ALLOCATE(arr(nrank)) +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! Store some usefull parameters in right_bc +! + right_bc%meth = dirmeth + right_bc%mbess = mbess + right_bc%n1 = nx + right_bc%n2 = ny + right_bc%nidbas1 = nidbas(1) + right_bc%nidbas2 = nidbas(2) +! +! BC on Matrix and RHS +! + CALL ibcmat(mat, right_bc) + CALL ibcrhs(rhs, ygrid, right_bc) + + WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL factor(mat) + tfact = seconds() - t0 + gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 + + t0 = seconds() + CALL bsolve(mat, rhs, sol) +! +! Backtransform of solution +! + sol(1:ny-1) = sol(ny) +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO +! + tsolv = seconds() - t0 + gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 +!=========================================================================== +! 4.0 Check the solution +! +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = f_exact(mbess, xgrid(i), ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM at first call to gridval + IF(nlppform) THEN + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) + END IF +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) +! + errsol = solana - solcal + WRITE(*,'(a/(8(1pe12.3)))') 'Error at the boundary r = 1', errsol(nx,:) + err00 = err2_norm(splxy, jder, mbess, f_exact) +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = 0.0d0 + ELSE + solana(i,j) = fx_exact(mbess,xgrid(i),ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') +! + errsol = solana - solcal + err10 = err2_norm(splxy, jder, mbess, fx_exact) +! + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = 0.0d0 + ELSE + solana(i,j) = fy_exact(mbess, xgrid(i), ygrid(j)) + END IF + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') +! + errsol = solana - solcal + err01 = err2_norm(splxy, jder, mbess, fy_exact) +! + WRITE(*,'(/a,3(1pe12.3))') 'Discretization errors', err00, err10, err01 +!=========================================================================== +! 9.0 Epilogue +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid + WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + CALL destroy_sp(splxy) + CALL destroy(mat) + CALL closef(fid) +! +!=========================================================================== +! +CONTAINS +!-- + DOUBLE PRECISION FUNCTION f_exact(m,x,y) + INTEGER,INTENT(in) :: m + DOUBLE PRECISION, INTENT(in) :: x, y + f_exact = (x**m)*COS(m*y) + END FUNCTION f_exact +!-- + DOUBLE PRECISION FUNCTION fx_exact(m,x,y) + INTEGER,INTENT(in) :: m + DOUBLE PRECISION, INTENT(in) :: x, y + fx_exact = m*(x**(m-1))*COS(m*y) + END FUNCTION fx_exact +!-- + DOUBLE PRECISION FUNCTION fy_exact(m,x,y) + INTEGER,INTENT(in) :: m + DOUBLE PRECISION, INTENT(in) :: x, y + fy_exact = -m*(x**m)*SIN(m*y) + END FUNCTION fy_exact +!-- + SUBROUTINE prntmat(str, a) + DOUBLE PRECISION, DIMENSION(:,:) :: a + CHARACTER(len=*) :: str + INTEGER :: i + WRITE(*,'(a)') TRIM(str) + DO i=1,SIZE(a,1) + WRITE(*,'(10f8.1)') a(i,:) + END DO + END SUBROUTINE prntmat +!-- + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +!-- + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS +!-- + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ + diff --git a/examples/dirichlet/poisson.in b/examples/dirichlet/poisson.in new file mode 100644 index 0000000..8655f50 --- /dev/null +++ b/examples/dirichlet/poisson.in @@ -0,0 +1,10 @@ +&newrun + nx = 16, ny = 16, + nidbas = 3,3, + ngauss = 4,4, + mbess = 3, + nlppform = t, + dirmeth = 2, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/examples/dirichlet/poisson.m b/examples/dirichlet/poisson.m new file mode 100644 index 0000000..3a56ac8 --- /dev/null +++ b/examples/dirichlet/poisson.m @@ -0,0 +1,88 @@ +% +% @file poisson.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='poisson.h5'; +m=3; +% +% Get data from data sets +% +r=hdf5read(file,'/xgrid'); +t=hdf5read(file,'/ygrid'); +sol=hdf5read(file,'/sol')'; +solexact=hdf5read(file,'/solana')'; +err=hdf5read(file,'/errors')'; +solr=hdf5read(file,'/derivx')'; +solt=hdf5read(file,'/derivy')'; + + +[R,T]=meshgrid(r,t); +x = R.*cos(T); y= R.*sin(T); +solx = cos(T).*solr - sin(T)./R.*solt; +soly = sin(T).*solr + cos(T)./R.*solt; + +figure +subplot(221) +pcolor(double(r),double(t),double(sol)); +shading interp +hold on, quiver(r,t,solr,solt) +xlabel('r'); ylabel('\theta') +title('R-THETA plane') +colorbar + +subplot(222) +pcolor(double(x),double(y),double(sol)) +shading interp +hold on, quiver(x,y,solx,soly) +hold off, axis image +xlabel('X'); ylabel('Y') +title('X-Y plane') +colorbar + +subplot(223) +surfc(double(x),double(y),double(sol)) +xlabel('X'); ylabel('Y'); +title('Solutions') + +subplot(224) +surfc(double(x),double(y),double(err)) +xlabel('X'); ylabel('Y'); +title('Errors') + +figure +subplot(211) +plot(r,sol(1,:),'o',r,solexact(1,:)) +xlabel('r') +ylabel('Solutions at \theta=0') +grid on +subplot(212) +tt=0:0.01:2*pi; +plot(t,sol(:,end),'o',tt,cos(m.*tt)) +xlabel('\theta') +ylabel('Solutions at r=1') +grid on + + + + diff --git a/examples/dirichlet/poisson_mod.f90 b/examples/dirichlet/poisson_mod.f90 new file mode 100644 index 0000000..ae234c5 --- /dev/null +++ b/examples/dirichlet/poisson_mod.f90 @@ -0,0 +1,354 @@ +!> +!> @file poisson_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE poisson_mod + IMPLICIT NONE +! +! Dirichlet BC encapsulated in a derived datatype +! + TYPE dirich + INTEGER :: meth, mbess, n1, n2, nidbas1, nidbas2 + INTEGER :: i0, i1 + DOUBLE PRECISION, POINTER :: amat(:,:) => NULL() + DOUBLE PRECISION, POINTER :: g(:) => NULL() + END TYPE dirich +! +CONTAINS + SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + USE bsplines + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) +! +! The RHS is 0 +! + rhs(:) = 0.0d0 + END SUBROUTINE disrhs +! + SUBROUTINE ibcmat(mat, bc) +! +! Apply BC on matrix +! + USE matrix + TYPE(pbmat), INTENT(inout) :: mat + TYPE(dirich) :: bc + INTEGER :: ny + INTEGER :: kl, ku, nrank, i, j, k + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) + INTEGER :: i0, i1, ii +!=========================================================================== +! 1.0 Prologue +! + ku = mat%ku + kl = ku + nrank = mat%rank + ny = bc%n2 +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + ALLOCATE(zsum(nrank)) + ALLOCATE(arr(nrank)) + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + DO j=1,ny+ku + zsum(j) = zsum(j) + arr(j) + END DO + END DO +! + zsum(ny) = SUM(zsum(1:ny)) ! using symmetry + CALL putrow(mat, ny, zsum) + DEALLOCATE(zsum) +! +! The away operator +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO + DEALLOCATE(arr) +!=========================================================================== +! 3.0 Dirichlet on right boundary +! +!!$ i0 = nrank - ku +!!$ i1 = nrank - ny + i0 = (bc%n1-1)*bc%n2 + 1 + i1 = nrank - bc%n2 + bc%i0 = i0 + bc%i1 = i1 +! + IF(ASSOCIATED(bc%amat)) DEALLOCATE(bc%amat) + IF(ASSOCIATED(bc%g)) DEALLOCATE(bc%g) + ALLOCATE(bc%amat(i0:i1,ny)) + ALLOCATE(bc%g(ny)) +! + WRITE(*,'(/a,2i6)') 'IBCMAT: i0, i1 =', i0, i1 +! +! Extract and save the last ny columns of matrix +! + ALLOCATE(arr(nrank)) + DO k=1,ny + j = nrank-ny+k + CALL getcol(mat, j, arr) + bc%amat(i0:i1,k) = arr(i0:i1) + IF( ANY(arr(1:i0-1) .NE. 0.0d0) ) THEN + WRITE(*,'(a,i4)') 'i0 is underestimated for j =', j + END IF + END DO +! +! The away operator +! + DO k=1,ny + j = nrank-ny+k + arr = 0.0d0; arr(j) = 1.0d0 + CALL putrow(mat, j, arr) + END DO +! + DEALLOCATE(arr) +! + END SUBROUTINE ibcmat +!+++ + SUBROUTINE ibcrhs(rhs, ygrid, bc) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + DOUBLE PRECISION, INTENT(in) :: ygrid(:) + TYPE(dirich) :: bc +! + INTEGER :: nrank, ny, m, i0, i1 + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) + ny = bc%n2 + m = bc%mbess +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! +! Get spline coefs at boundary r=1 +! + SELECT CASE (bc%meth) + CASE(1) + CALL dirich_interp(ygrid, bc, frhs) + CASE(2) + CALL dirich_minres(ygrid, bc, frhs) + END SELECT +! +! Modify RHS +! + i0 = bc%i0 + i1 = bc%i1 + rhs(i0:i1) = rhs(i0:i1) - MATMUL(bc%amat, bc%g) + rhs(i1+1:nrank) = bc%g(1:ny) + CONTAINS + DOUBLE PRECISION FUNCTION frhs(x) + DOUBLE PRECISION, INTENT(in) :: x + frhs = COS(m*x) + END FUNCTION frhs + END SUBROUTINE ibcrhs +!++++ + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq +!++++ + SUBROUTINE dirich_interp(ygrid, bc, frhs) +! +! Dirichlet BC by interpolation +! + USE bsplines + DOUBLE PRECISION, INTENT(in) :: ygrid(:) + TYPE(dirich) :: bc + INTERFACE + DOUBLE PRECISION FUNCTION frhs(x) + DOUBLE PRECISION, INTENT(in) :: x + END FUNCTION frhs + END INTERFACE +! + INTEGER :: nidbas, dim, n2, i + DOUBLE PRECISION :: shifty + DOUBLE PRECISION :: gval(SIZE(ygrid)) + DOUBLE PRECISION, ALLOCATABLE :: coefs(:) + DOUBLE PRECISION :: ygrid_interp(SIZE(ygrid)) + TYPE(spline1d) :: spl_interp +! + nidbas = bc%nidbas2 + n2 = bc%n2 +! + IF(MODULO(nidbas,2) .EQ. 0 ) THEN + shifty = 0.5d0*(ygrid(2)-ygrid(1)) + ygrid_interp(:) = ygrid(:) + shifty + ELSE + ygrid_interp(:) = ygrid(:) + END IF + CALL set_splcoef(nidbas, ygrid_interp, spl_interp, period=.TRUE.) + CALL get_dim(spl_interp, dim) + ALLOCATE(coefs(dim)) +! + DO i=1,SIZE(ygrid) + gval(i) = frhs(ygrid_interp(i)) + END DO + CALL get_splcoef(spl_interp, gval, coefs) +! +! Store spline coefs in bc +! + bc%g(1:n2) = coefs(1:n2) +! + DEALLOCATE(coefs) + CALL destroy_sp(spl_interp) + END SUBROUTINE dirich_interp +!++++ + SUBROUTINE dirich_minres(xgrid, bc, frhs) +! +! Dirichlet BC by minimization of residual +! + USE bsplines + USE matrix + USE conmat_mod + DOUBLE PRECISION, INTENT(in) :: xgrid(:) + TYPE(dirich) :: bc + INTERFACE + DOUBLE PRECISION FUNCTION frhs(x) + DOUBLE PRECISION, INTENT(in) :: x + END FUNCTION frhs + END INTERFACE +! + INTEGER :: nx, nidbas, ngauss, kl, ku + TYPE(periodic_mat) :: mass_mat + TYPE(spline1d) :: spl +! + nidbas = bc%nidbas2 + ngauss = nidbas+1 + nx = bc%n2 + kl = nidbas + ku = kl +! + CALL set_spline(nidbas, ngauss, xgrid, spl, period=.TRUE.) + CALL init(kl, ku, nx, 1, mass_mat) + CALL conmat(spl, mass_mat, coefeq_mass) + CALL conrhs(spl, bc%g, frhs) + CALL factor(mass_mat) + CALL bsolve(mass_mat, bc%g) +! + CALL destroy(mass_mat) + CALL destroy_sp(spl) +! + CONTAINS + SUBROUTINE coefeq_mass(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + c(1) = 1.0d0 + idt(1) = 0 + idw(1) = 0 + END SUBROUTINE coefeq_mass + END SUBROUTINE dirich_minres +!++++ + DOUBLE PRECISION FUNCTION err2_norm(spl, jder, mbess, fexact) +! +! Compute error L2 norm unsing Gauss points +! + USE bsplines + TYPE(spline2d) :: spl + INTEGER, INTENT(in) :: jder(:) + INTEGER, INTENT(in) :: mbess + INTERFACE + DOUBLE PRECISION FUNCTION fexact(m,x,y) + INTEGER, INTENT(in) :: m + DOUBLE PRECISION, INTENT(in) :: x, y + END FUNCTION fexact + END INTERFACE +! + DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:) + INTEGER :: i1, ig1, n1, nidbas1, ndim1, ng1 + INTEGER :: i2, ig2, n2, nidbas2, ndim2, ng2 + DOUBLE PRECISION :: contrib + DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:), sol(:,:) +! +! Gauss points and weights on all intervals +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + xg1 => spl%sp1%gausx ! xg1(ng1,n1) + wg1 => spl%sp1%gausw ! wg1(ng1,n1) + ng1 = SIZE(xg1,1) + xg2 => spl%sp2%gausx + wg2 => spl%sp2%gausw + ng2 = SIZE(xg2,1) +! + err2_norm = 0.0d0 + ALLOCATE(x(ng1), y(ng2)) + ALLOCATE(sol(ng1,ng2)) + DO i1=1,n1 + x=xg1(:,i1) + DO i2=1,n2 + y=xg2(:,i2) + CALL gridval(spl, x, y, sol, jder) + DO ig1=1,ng1 + DO ig2=1,ng2 + contrib = wg1(ig1,i1)*wg2(ig2,i2)*(sol(ig1,ig2) - & + & fexact(mbess,x(ig1),y(ig2)))**2 + err2_norm = err2_norm + x(ig1)*contrib !use same inner-product in weak-form + END DO + END DO + END DO + END DO + DEALLOCATE(x) + DEALLOCATE(y) + DEALLOCATE(sol) + err2_norm = SQRT(err2_norm) + END FUNCTION err2_norm +END MODULE poisson_mod diff --git a/examples/dirichlet/run_poisson.sh b/examples/dirichlet/run_poisson.sh new file mode 100644 index 0000000..dafc434 --- /dev/null +++ b/examples/dirichlet/run_poisson.sh @@ -0,0 +1,46 @@ +# +# @file run_poisson.sh +# +# @brief +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Trach-Minh Tran +# +#!/bin/sh +EXEC=./poisson + +cat > in0 < in1 + $EXEC < in1 | grep 'Discretization errors ' +done +rm -f in? diff --git a/examples/dismat.f90 b/examples/dismat.f90 new file mode 100644 index 0000000..a15e8df --- /dev/null +++ b/examples/dismat.f90 @@ -0,0 +1,157 @@ +!> +!> @file dismat.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + USE bsplines + USE matrix + IMPLICIT NONE + TYPE(spline2d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + left2 = j + CALL basfun(xg2, spl%sp2, fun2, left2) +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 +!------------- + contrib = 0.0d0 + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,ig1,ig2) * & + & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) + END DO + END DO + END DO + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) +! +CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq +END SUBROUTINE dismat diff --git a/examples/disrhs.f90 b/examples/disrhs.f90 new file mode 100644 index 0000000..24cbb84 --- /dev/null +++ b/examples/disrhs.f90 @@ -0,0 +1,198 @@ +!> +!> @file disrhs.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + USE bsplines + IMPLICIT NONE + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2 + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! +CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq +END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +SUBROUTINE disrhs3(mbess, npow, spl, rhs) +! +! Assembly the RHS using 3d spline spl +! + USE bsplines + IMPLICIT NONE + INTEGER, INTENT(in) :: mbess, npow + TYPE(spline2d1d), TARGET :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:,:) +! + TYPE(spline1d), POINTER :: sp1, sp2, sp3 + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: n3, nidbas3, ndim3, ng3 + INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + sp1 => spl%sp12%sp1 + sp2 => spl%sp12%sp2 + sp3 => spl%sp3 +! + CALL get_dim(sp1, ndim1, n1, nidbas1) + CALL get_dim(sp2, ndim2, n2, nidbas2) + CALL get_dim(sp3, ndim3, n3, nidbas3) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(sp1, ng1) + CALL get_gauss(sp2, ng2) + CALL get_gauss(sp3, ng3) + WRITE(*,'(/a, 3i3)') 'Gauss points and weights, ngauss =', ng1, ng2, ng3 + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng2), wg2(ng2)) + ALLOCATE(xg3(ng3), wg3(ng3)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs,1) + rhs(1:nrank,1:n3) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), sp2, fun2, j) + DO k=1,n3 + CALL get_gauss(sp3, ng3, k, xg3, wg3) + DO ig3=1,ng3 + CALL basfun(xg3(ig3), sp3, fun3, k) + contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * & + & rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + DO k3=0,nidbas3 + kk = MODULO(k+k3-1,n3) + 1 + rhs(ij,kk) = rhs(ij, kk) + & + & contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1) + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(xg3, wg3, fun3) +! +CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n) + DOUBLE PRECISION, INTENT(in) :: x1, x2, x3 + INTEGER, INTENT(in) :: m, n + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n + END FUNCTION rhseq +END SUBROUTINE disrhs3 diff --git a/examples/driv1.f90 b/examples/driv1.f90 new file mode 100644 index 0000000..03a9329 --- /dev/null +++ b/examples/driv1.f90 @@ -0,0 +1,189 @@ +!> +!> @file driv1.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Basis splines on a 2d grid. +! + USE bsplines + USE futils + IMPLICIT NONE + TYPE(spline1d) :: spx, spy + INTEGER :: nx=10, ny=8, nidbas=2, ngauss=4, npts=1000 + DOUBLE PRECISION :: a, b, coefx(5), coefy(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), ygrid(:), fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xp(:), funxp(:,:), yp(:), funyp(:,:) + DOUBLE PRECISION :: dx, dy + INTEGER :: i, j, left + CHARACTER(len=256) :: title + INTEGER :: fid + NAMELIST /newrun/ nx, ny, nidbas, ngauss, a, b, coefx, coefy +!=========================================================================== + nidbas = 3 + ngauss = 4 + nx = 10 ! Number of intevals in x + ny = 8 ! Number of intevals in y + a = 0.0d0 ! Left boundary of interval + b = 1.0d0 ! Right boundary of interval + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + READ(*,newrun) + WRITE(*,newrun) +!=========================================================================== +! 1.0 Set up grids +! + ALLOCATE( xgrid(0:nx) ) + xgrid(0) = a + xgrid(nx) = b + CALL meshdist(coefx, xgrid, nx) +! +!!$ dy = 2.d0*pi/REAL(ny) + dy = 1.0d0 + ALLOCATE( ygrid(0:ny) ) + ygrid(0) = a + ygrid(ny) = b + CALL meshdist(coefy, ygrid, ny) +!=========================================================================== +! 2.0 Set up splines on (x,y) +! + CALL set_spline(nidbas, ngauss, xgrid, spx) + CALL set_spline(nidbas, ngauss, ygrid, spy, period=.TRUE.) + WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of x-splines', LBOUND(spx%knots), & + & ':',UBOUND(spx%knots), spx%knots + WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', spx%dim + WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of y-splines', LBOUND(spy%knots), & + & ':',UBOUND(spy%knots), spy%knots + WRITE(*,'(2(a,i5, 2x))') 'NY =', ny, 'DIM =', spy%dim +!=========================================================================== +! 3.0 Graph the splines on (x,y) +! + ALLOCATE( fun(nidbas+1,1) ) ! Only 0-th derivative +!!$ ALLOCATE( fun(nidbas+1,0:1) ) ! Only 0-th derivative + ALLOCATE( xp(npts), funxp(npts,0:spx%dim-1) ) + ALLOCATE( yp(npts-1), funyp(npts-1,0:spy%dim-1) ) +! +! Splines in X (non-peridic) +! + WRITE(*,'(a)') 'Splines in x' + dx = (xgrid(nx)-xgrid(0)) / REAL(NPTS-1) + DO i=1,npts + xp(i) = xgrid(0) + (i-1)*dx + CALL locintv(spx, xp(i), left) + CALL basfun(xp(i), spx, fun, left+1) + funxp(i,left:left+nidbas) = fun(:,1) + END DO +! +! Splines in Y (periodic) +! + WRITE(*,'(a)') 'Splines in y' + dy = (ygrid(ny)-ygrid(0)) / REAL(NPTS-1) + DO i=1,npts-1 + yp(i) = ygrid(0) + (i-1)*dy + CALL locintv(spy, yp(i), left) + CALL basfun(yp(i), spy, fun, left+1) + funyp(i,left:left+nidbas) = fun(:,1) + END DO +! +! Create hdf5 file +! + CALL creatf('driv1.h5', fid, real_prec='d') +! + WRITE(title,'(a,i3,5x,a,i6)') 'Splines of degree =', nidbas, 'NX =', nx + CALL putarr(fid, 'X', xp) + CALL putarr(fid, 'KNOTSX', spx%knots) + CALL putarr(fid, 'splinesx', funxp, TRIM(title)) + CALL putarr(fid, 'KNOTSY', spy%knots) +! + WRITE(title,'(a,i3,5x,a,i6)') 'Periodic splines of degree =', nidbas, 'NY =', ny + CALL putarr(fid, 'Y', yp) + CALL putarr(fid, 'splinesy', funyp, TRIM(title)) + CALL closef(fid) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgrid, ygrid) + DEALLOCATE(xp, funxp) + CALL destroy_sp(spx) + CALL destroy_sp(spy) +END PROGRAM main +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/driv2.f90 b/examples/driv2.f90 new file mode 100644 index 0000000..b67b578 --- /dev/null +++ b/examples/driv2.f90 @@ -0,0 +1,180 @@ +!> +!> @file driv2.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Integration of splines +! + USE bsplines +! + IMPLICIT NONE + TYPE(spline1d) :: spx + INTEGER :: nx, nidbas, ngauss + DOUBLE PRECISION :: a, b, coefx(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fun(:,:), finteg(:) + DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:) + DOUBLE PRECISION :: support, res, err + INTEGER :: i, ig, j, jj, left + INTEGER :: dim, ng + LOGICAL :: periodic + NAMELIST /newrun/ periodic, nx, nidbas, ngauss, a, b, coefx +!=========================================================================== +! 1.0 Set up grids +! +! Read in data specific to run +! + periodic = .FALSE. + nidbas = 3 + ngauss = 4 + nx = 10 ! Number of intevals in x + a = 0.0d0 ! Left boundary of interval + b = 1.0d0 ! Right boundary of interval + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid/knots +! + ALLOCATE(xgrid(0:nx)) + xgrid(0) = a + xgrid(nx) = b + CALL meshdist(coefx, xgrid, nx) + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) +!=========================================================================== +! 2.0 Set up splines +! + CALL set_spline(nidbas, ngauss, xgrid, spx, periodic) + WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of x-splines', LBOUND(spx%knots), & + & ':',UBOUND(spx%knots), spx%knots + WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', spx%dim +!=========================================================================== +! 3.0 Integrate all splines +! + CALL get_dim(spx, dim) + ALLOCATE( finteg(0:dim-1), xg(ngauss), wg(ngauss), fun(0:nidbas,1) ) + finteg = 0.0 + fun = 0.0 + DO i=1,nx ! Loop thru the intervals + CALL get_gauss(spx, ng, i, xg, wg) + DO ig=1,ng ! Loop thru Gauss points + CALL basfun(xg(ig), spx, fun, i) + left = i-1 + DO j=0,nidbas ! Loop thru the splines in this interval + jj = left+j + IF( periodic ) jj = MODULO(left+j, nx) + finteg(jj) = finteg(jj) + wg(ig)*fun(j,1) + END DO + END DO + END DO +!!$ IF( periodic ) THEN +!!$ DO i=nx,dim-1 +!!$ finteg(i) = finteg(i-nx) +!!$ END DO +!!$ END IF +! + WRITE(*,'(a/(10f10.5))') 'Integrals of splines', finteg + PRINT*, 'Sum of finteg', SUM(finteg) +!!$ IF( periodic ) THEN +!!$ PRINT*, 'Sum of finteg', SUM(finteg(0:nx-1)) +!!$ ELSE +!!$ PRINT*, 'Sum of finteg', SUM(finteg) +!!$ END IF +! + WRITE(*,'(a/(10f10.5))') 'Integrals of splines from module', spx%intspl + PRINT*, 'Sum of finteg', SUM(spx%intspl) + WRITE(*,'(a5,4a12)') '#', 'I', 'S', '(p+1)I/S', '(p+1)I/S-1' + DO i=0,spx%dim-1 + support = spx%knots(i+1)-spx%knots(i-nidbas) + res = spx%intspl(i)/support*(nidbas+1) + err = res - 1.0d0 + WRITE(*,'(i5,4(1pe12.4))') i, spx%intspl(i), support, res, err + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE( finteg, xg, wg, fun ) + DEALLOCATE(xgrid) + CALL destroy_sp(spx) +END PROGRAM main +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/driv3.f90 b/examples/driv3.f90 new file mode 100644 index 0000000..8df6e99 --- /dev/null +++ b/examples/driv3.f90 @@ -0,0 +1,159 @@ +!> +!> @file driv3.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Computation of croos mass matrix between two splines sp1 & sp2 +! sp1 and sp2 can be splines of any type (i.e. either set up with set_spline or +! set_splcoef) and of any order. + +! + USE bsplines +! + IMPLICIT NONE + TYPE(spline1d) :: sp1, sp2 + INTEGER :: nx, nidbas1, nidbas2, ngauss + INTEGER :: i, j + DOUBLE PRECISION :: a, b, coefx(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:) + DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMat + LOGICAL :: periodic1, periodic2 + NAMELIST /newrun/ nx, a, b, coefx, nidbas1, nidbas2, periodic1, periodic2 +!=========================================================================== +! 1.0 Set up grids +! +! Read in data specific to run +! + nx = 8 ! Number of intevals in x + a = 0.0d0 ! Left boundary of interval + b = 1.0d0 ! Right boundary of interval + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + periodic1 = .FALSE. + periodic2 = .FALSE. + nidbas1 = 3 + nidbas2 = 2 + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid/knots +! + ALLOCATE(xgrid(0:nx)) + xgrid(0 ) = a + xgrid(nx) = b + CALL meshdist(coefx, xgrid, nx) + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) +!=========================================================================== +! 2.0 Set up splines +! + ngauss = 1 ! Gauss points initialized with set_spline are in fact not used + ! for computing cross mass matrix + ! First spline set up as for solving a PDE with FEMs + CALL set_spline(nidbas1, ngauss, xgrid, sp1, periodic1) + + ! Second spline set up as for interpolation + CALL set_splcoef(nidbas2, xgrid, sp2, periodic2) + + WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp1', LBOUND(sp1%knots), & + & ':',UBOUND(sp1%knots), sp1%knots + WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp2', LBOUND(sp2%knots), & + & ':',UBOUND(sp2%knots), sp2%knots + WRITE(*,'(3(a,i5, 2x))') 'NX =', nx, 'DIM sp1 =', sp1%dim, 'DIM sp2 =', sp2%dim +!=========================================================================== +! 3.0 Compute cross mass matrix +! + CALL CompMassMatrix(sp1, sp2, a, b, MassMat) + + WRITE(*, "(a)") "Cross-mass matrix between splines sp1 & sp2:" + DO i = 1, SIZE(MassMat, 1) + WRITE(*, "(15f13.5)") (MassMat(i, j), j = 1, MIN(SIZE(MassMat, 2), 15)) + END DO + +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(MassMat) + DEALLOCATE(xgrid) + CALL destroy_sp(sp1) + CALL destroy_sp(sp2) + +END PROGRAM main +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/driv4.f90 b/examples/driv4.f90 new file mode 100644 index 0000000..63711b7 --- /dev/null +++ b/examples/driv4.f90 @@ -0,0 +1,225 @@ +!> +!> @file driv4.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Computation of croos mass matrix between two splines sp1 & sp2 +! sp1 and sp2 can be splines of any type (i.e. either set up with set_spline or +! set_splcoef) and of any order. + +! + USE bsplines + USE matrix +! + IMPLICIT NONE + TYPE(gbmat) :: matm + INTEGER :: mrows, ncols, kl, ku + DOUBLE PRECISION, ALLOCATABLE :: avec(:,:), bvec(:,:), matfull(:,:) +! + TYPE(zgbmat) :: zmatm + DOUBLE COMPLEX, ALLOCATABLE :: zavec(:,:), zbvec(:,:) + DOUBLE PRECISION :: dznrm2 +! + TYPE(spline1d) :: sp1, sp2 + INTEGER :: nx, nidbas1, nidbas2, ngauss + INTEGER :: i, j + DOUBLE PRECISION :: a, b, coefx(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:) + DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMat + LOGICAL :: periodic1, periodic2 + NAMELIST /newrun/ nx, a, b, coefx, nidbas1, nidbas2, periodic1, periodic2 +!=========================================================================== +! 1.0 Set up grids +! +! Read in data specific to run +! + nx = 8 ! Number of intevals in x + a = 0.0d0 ! Left boundary of interval + b = 1.0d0 ! Right boundary of interval + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + periodic1 = .FALSE. + periodic2 = .FALSE. + nidbas1 = 3 + nidbas2 = 2 + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid/knots +! + ALLOCATE(xgrid(0:nx)) + xgrid(0 ) = a + xgrid(nx) = b + CALL meshdist(coefx, xgrid, nx) + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) +!=========================================================================== +! 2.0 Set up splines +! + ngauss = 1 ! Gauss points initialized with set_spline are in fact not used + ! for computing cross mass matrix + ! First spline set up as for solving a PDE with FEMs + CALL set_spline(nidbas1, ngauss, xgrid, sp1, periodic1) + + ! Second spline set up as for interpolation +!!$ CALL set_splcoef(nidbas2, xgrid, sp2, periodic2) + CALL set_spline(nidbas2, ngauss, xgrid, sp2, periodic2) + + WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp1', LBOUND(sp1%knots), & + & ':',UBOUND(sp1%knots), sp1%knots + WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp2', LBOUND(sp2%knots), & + & ':',UBOUND(sp2%knots), sp2%knots + WRITE(*,'(3(a,i5, 2x))') 'NX =', nx, 'DIM sp1 =', sp1%dim, 'DIM sp2 =', sp2%dim +!=========================================================================== +! 3.0 Compute cross mass matrix +! + CALL CompMassMatrix(sp1, sp2, a, b, MassMat) + + WRITE(*, "(a)") "Cross-mass matrix between splines sp1 & sp2:" + DO i = 1, SIZE(MassMat, 1) + WRITE(*, "(15f10.5)") (MassMat(i, j), j = 1, MIN(SIZE(MassMat, 2), 15)) + END DO +! +! Should equal to 1 for splines i "not close to the boundaries": +! p1 .LT. i .LE. N +! + WRITE(*,'(/a/(15f8.5))') 'Sum of cols * NX', SUM(MassMat,dim=2)*REAL(nx,8) +!=========================================================================== +! 3.0 Use DGB matrice +! +!!$ mrows = nx+nidbas1 +!!$ ncols = nx+nidbas2 + CALL get_dim(sp1, mrows) + CALL get_dim(sp2, ncols) + kl = nidbas1 + ku = nidbas2 + CALL init(kl, ku, ncols, 1, matm, mrows=mrows) + WRITE(*,'(/a, 2i3)') 'Band matrix:, kl, ku =', kl, ku +! + CALL CompMassMatrix(sp1, sp2, a, b, matm) +! + DO i=1,SIZE(matm%val,1) + WRITE(*,'(15f10.5)') matm%val(i,:) + END DO +! + WRITE(*,'(/a)') 'Full matrix' + ALLOCATE(matfull(mrows,ncols)) + matfull = 0.0d0 + DO i=1,mrows + CALL getrow(matm, i, matfull(i,:)) + WRITE(*,'(15f10.5)') matfull(i,:) + END DO + WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(matfull-MassMat)) +! +! Check VMX + ALLOCATE(avec(ncols,2)) + ALLOCATE(bvec(mrows,2)) + avec = 1.0d0 + bvec = vmx(matm,avec)*REAL(nx,8) + WRITE(*,'(a)') 'M*a, with a=1' + DO j=1,2 + WRITE(*,'(15f8.5)') bvec(:,j) + END DO +!=========================================================================== +! 4.0 Test complex version +! + CALL init(kl, ku, ncols, 1, zmatm, mrows=mrows) + CALL CompMassMatrix(sp1, sp2, a, b, zmatm) + ALLOCATE(zavec(ncols,2)) + ALLOCATE(zbvec(mrows,2)) + zavec = (1.0d0,0.0d0) + zbvec = vmx(zmatm,zavec)*REAL(nx,8) + zbvec = zbvec-bvec + WRITE(*,'(/a)') 'Check complex version' + WRITE(*,'(a,2(1pe12.4))') 'Norm of errors =', & + & (dznrm2(mrows, zbvec(1,j), 1), j=1,2) + zmatm%val = zmatm%val-matm%val + WRITE(*,'(a,1pe12.4)') 'Diff of matrix elements =', MAXVAL(ABS(zmatm%val)) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(MassMat) + DEALLOCATE(xgrid) + CALL destroy_sp(sp1) + CALL destroy_sp(sp2) + call destroy(matm) + +END PROGRAM main +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/extra.c b/examples/extra.c new file mode 100644 index 0000000..572cecd --- /dev/null +++ b/examples/extra.c @@ -0,0 +1,49 @@ +/** + * @file extra.c + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Trach-Minh Tran + */ +/**********************************************/ +#include + +void quit_(); +void dump_(char *filename, int *l); + +void Dump(filename) +char *filename; +{ + /* The user's dump routine should go here. */ + int l = strlen(filename); + dump_(filename, &l); + +} /* End DUMP */ + +/**********************************************/ + +void Quit() +{ + /* The user's quit routine should go here. */ + + quit_(); + +} /* End QUIT */ diff --git a/examples/fit1d.f90 b/examples/fit1d.f90 new file mode 100644 index 0000000..db3b786 --- /dev/null +++ b/examples/fit1d.f90 @@ -0,0 +1,251 @@ +!> +!> @file fit1d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Fit grid value function to a spline of any order +! + USE bsplines + USE futils +! + IMPLICIT NONE + INTEGER :: nx, nidbas + DOUBLE PRECISION :: a, b, coefx(5) +!!$ DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:), coefs(:) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:,:), coefs(:,:) + INTEGER :: i, dim, left + TYPE(spline1d) :: spl + DOUBLE PRECISION :: dx + INTEGER :: npts + DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:) + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, POINTER :: splines(:,:) => null() +! + CHARACTER(len=128) :: file='fit1d.h5' + INTEGER :: fid +! + NAMELIST /newrun/ nx, nidbas, a, b, coefx +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 10 ! Number of intevals in x + a = 0.0d0 ! Left boundary of interval + b = 1.0d0 ! Right boundary of interval + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid and function values +! + ALLOCATE(xgrid(0:nx), fgrid(0:nx,1)) + xgrid(0) = a + xgrid(nx) = b + CALL meshdist(coefx, xgrid, nx) + fgrid(:,1) = func(xgrid) + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid + WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid +! +! Create hdf5 file +! + CALL creatf(file, fid, 'FIT1D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) +!=========================================================================== +! 2.0 Spline interpolation +! +! Set up the spline interpolation +!!$ CALL splcoef_setup(nidbas, xgrid, spl) + CALL set_splcoef(nidbas, xgrid, spl) + PRINT*, 'nlequid =', spl%nlequid +! +! Compute spline values and derivatives at Boundaries + ALLOCATE(fun(nidbas+1,0:nidbas)) + WRITE(*,'(/a)') 'spline at the left boundary' + CALL locintv(spl, a, left) + CALL basfun(a, spl, fun, left+1) + DO i=0,nidbas + WRITE(*,'(8(1pe12.4))') fun(:,i) + END DO +! + WRITE(*,'(/a)') 'spline at the right boundary' + CALL locintv(spl, b, left) + CALL basfun(b, spl, fun, left+1) + DO i=0,nidbas + WRITE(*,'(8(1pe12.4))') fun(:,i) + END DO + DEALLOCATE(fun) +! + CALL get_dim(spl, dim) + ALLOCATE(coefs(dim,1)) + WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim + WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), & + & ':',UBOUND(spl%knots), spl%knots +! +! From given grid values fgrid, compute the spline coefs + CALL get_splcoef(spl, fgrid, coefs) + WRITE(*,'(a/(10f8.3))') 'coefs', coefs +! +! Plot all splines + npts = 100 + ALLOCATE(xpt(npts)) + dx = (b-a)/REAL(npts-1) + DO i=1,npts + xpt(i) = a + (i-1)*dx + END DO + CALL allsplines(spl, xpt, splines) + CALL putarr(fid, '/X', xpt) + CALL putarr(fid,'/SPLINES', splines) +! +! Check interpolation + ALLOCATE(fcalc(npts), fexact(npts), err(npts)) + fexact = func(xpt) +! +! Function values + CALL gridval(spl, xpt, fcalc, 0, coefs(:,1)) + err = fexact - fcalc + CALL putarr(fid, '/FEXACT', fexact, 'Exact values') + CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values') + CALL putarr(fid, '/ERROR', err, 'Interpolation errors') + WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) +! +! Derivatives values + CALL gridval(spl, xpt, fcalc, 1) + fexact = func1(xpt) + err = fexact - fcalc + CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative') + CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative') + CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative') + WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) +! + +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xpt, splines, fexact, fcalc, err) + DEALLOCATE(xgrid, fgrid) + CALL destroy_sp(spl) + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION func(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: func(SIZE(x)) +!!$ INTEGER :: n +!!$ n = SIZE(x) +!!$ func = 1.d0+x*(1.d0+x*(1.d0+x)) +!!$ func(1:n/2) = 1.0d0 +!!$ func(n/2+1:n) = 0.5d0 + func = EXP(-8.*x*x) + END FUNCTION func +! + FUNCTION func1(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: func1(SIZE(x)) +!!$ INTEGER :: n +!!$ n = SIZE(x) +!!$ func = 1.d0+x*(1.d0+x*(1.d0+x)) +!!$ func(1:n/2) = 1.0d0 +!!$ func(n/2+1:n) = 0.5d0 + func1 = -16.d0*x*EXP(-8.*x*x) + END FUNCTION func1 +! + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 + INTEGER :: i +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + sum2 = sum2 + x(i)**2 + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +END PROGRAM main +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/fit1d_cmpl.f90 b/examples/fit1d_cmpl.f90 new file mode 100644 index 0000000..ba3c7db --- /dev/null +++ b/examples/fit1d_cmpl.f90 @@ -0,0 +1,106 @@ +!> +!> @file fit1d_cmpl.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Fit a 1d complex function +! + USE bsplines + IMPLICIT NONE + INTEGER,PARAMETER :: NX=10, NIDBAS=3, NPTS=40 + DOUBLE PRECISION :: pi, dx, xgrid(0:NX), xpt(NPTS), err + DOUBLE COMPLEX, ALLOCATABLE :: coefs(:) + DOUBLE COMPLEX :: fgrid(0:NX), fexact(NPTS), fcalc(NPTS) + INTEGER :: dim, i + TYPE(spline1d) :: spl +!================================================================================ +! +! Define grid and function values on grid +! + pi = 4.0d0*ATAN(1.0d0) + xgrid(0) = 0.0d0 + dx = 2.0d0*pi/NX + DO i=1,NX + xgrid(i) = xgrid(0) + i*dx + END DO +! + fgrid = func(xgrid) +! + WRITE(*,'(2a10)') 'x', 'f' + DO i=0,NX + WRITE(*,'(3f10.4)') xgrid(i), fgrid(i) + END DO +! +! Set up spline +! + CALL set_splcoef(NIDBAS, xgrid, spl, period=.TRUE.) + CALL get_dim(spl, dim) + ALLOCATE(coefs(dim)) +! +! Get Spline coefficients +! + CALL get_splcoef(spl, fgrid, coefs) + WRITE(*,'(a)') 'Interpolation coefs' + DO i=1,dim + WRITE(*,'(2(1pe12.3))') coefs(i) + END DO +! +! Check interpolation +! + CALL RANDOM_NUMBER(xpt) + xpt = (2.0d0*pi) * xpt + fexact = func(xpt) +! + CALL gridval(spl, xpt, fcalc, 0, coefs) +! + WRITE(*,'(a10,2a20)') 'x', 'fexact', 'fcacl' + DO i=1,NPTS + WRITE(*,'(5f10.4)') xpt(i), fexact(i), fcalc(i) + END DO + err = norm2(fcalc-fexact) + WRITE(*,'(a,1pe12.3)') 'error', err +! +! Clean up +! + DEALLOCATE(coefs) + CALL destroy_sp(spl) +!================================================================================ +CONTAINS + FUNCTION func(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE COMPLEX :: func(size(x)) + func = EXP( CMPLX(0.0d0, x)) + END FUNCTION func +! + FUNCTION norm2(x) +! +! Compute the 2-norm of vector x +! + DOUBLE PRECISION :: norm2 + DOUBLE COMPLEX, INTENT(in) :: x(:) +! + norm2 = SQRT(DOT_PRODUCT(x,x)) + END FUNCTION norm2 +END PROGRAM main diff --git a/examples/fit1dbc.f90 b/examples/fit1dbc.f90 new file mode 100644 index 0000000..503a469 --- /dev/null +++ b/examples/fit1dbc.f90 @@ -0,0 +1,254 @@ +!> +!> @file fit1dbc.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Fit grid value function to a spline of any order +! BC using derivatives at both ends. +! + USE bsplines + USE futils +! + IMPLICIT NONE + INTEGER :: nx, nidbas + DOUBLE PRECISION :: a, b, coefx(5) +!!$ DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:), coefs(:) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:,:), coefs(:,:) + INTEGER :: i, dim + TYPE(spline1d) :: spl + DOUBLE PRECISION :: dx + INTEGER :: npts + DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:) + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, POINTER :: splines(:,:) => null() + INTEGER :: ibc(2,10) +!!$ DOUBLE PRECISION :: fbc(2,10) + DOUBLE PRECISION :: fbc(2,10,1) +! + CHARACTER(len=128) :: file='fit1d.h5' + INTEGER :: fid +! + NAMELIST /newrun/ nx, nidbas, a, b, coefx, ibc, fbc +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 10 ! Number of intevals in x + a = 0.0d0 ! Left boundary of interval + b = 1.0d0 ! Right boundary of interval + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + ibc(1,1:10) = (/2,3,4,5,6,7,8,9,10,11/) + ibc(2,1:10) = ibc(1,1:10) + fbc = 0.0 +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid and function values +! + ALLOCATE(xgrid(0:nx), fgrid(0:nx,1)) + xgrid(0) = a + xgrid(nx) = b + CALL meshdist(coefx, xgrid, nx) + fgrid(:,1) = func(xgrid) + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid + WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid +! +! Create hdf5 file +! + CALL creatf(file, fid, 'FIT1D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) +!=========================================================================== +! 2.0 Spline interpolation +! +! Set up the spline interpolation + CALL set_splcoef(nidbas, xgrid, spl, ibc=ibc) +! +! Compute spline values and derivatives at Boundaries + ALLOCATE(fun(nidbas+1,0:nidbas)) + WRITE(*,'(/a)') 'spline at the left boundary' + CALL basfun(a, spl, fun, 1) + DO i=0,nidbas + WRITE(*,'(8(1pe12.4))') fun(:,i) + END DO +! + WRITE(*,'(/a)') 'spline at the right boundary' + CALL basfun(b, spl, fun, nx) + DO i=0,nidbas + WRITE(*,'(8(1pe12.4))') fun(:,i) + END DO + DEALLOCATE(fun) +! + CALL get_dim(spl, dim) + ALLOCATE(coefs(dim,1)) + WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim + WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), & + & ':',UBOUND(spl%knots), spl%knots +! +! From given grid values fgrid, compute the spline coefs + CALL get_splcoef(spl, fgrid, coefs, fbc) + WRITE(*,'(a/(10f8.3))') 'coefs', coefs +! +! Plot all splines + npts = 100 + ALLOCATE(xpt(npts)) + dx = (b-a)/REAL(npts-1) + DO i=1,npts + xpt(i) = a + (i-1)*dx + END DO + CALL allsplines(spl, xpt, splines) + CALL putarr(fid, '/X', xpt) + CALL putarr(fid,'/SPLINES', splines) +! +! Check interpolation + ALLOCATE(fcalc(npts), fexact(npts), err(npts)) + fexact = func(xpt) +! +! Function values + CALL gridval(spl, xpt, fcalc, 0, coefs(:,1)) + err = fexact - fcalc + CALL putarr(fid, '/FEXACT', fexact, 'Exact values') + CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values') + CALL putarr(fid, '/ERROR', err, 'Interpolation errors') + WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) +! +! Derivatives values + CALL gridval(spl, xpt, fcalc, 1) + fexact = func1(xpt) + err = fexact - fcalc + CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative') + CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative') + CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative') + WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) +! + +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xpt, splines, fexact, fcalc, err) + DEALLOCATE(xgrid, fgrid) + CALL destroy_sp(spl) + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION func(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: func(SIZE(x)) +!!$ INTEGER :: n +!!$ n = SIZE(x) +!!$ func = 1.d0+x*(1.d0+x*(1.d0+x)) +!!$ func(1:n/2) = 1.0d0 +!!$ func(n/2+1:n) = 0.5d0 + func = EXP(-8.*x*x) + END FUNCTION func +! + FUNCTION func1(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: func1(SIZE(x)) +!!$ INTEGER :: n +!!$ n = SIZE(x) +!!$ func = 1.d0+x*(1.d0+x*(1.d0+x)) +!!$ func(1:n/2) = 1.0d0 +!!$ func(n/2+1:n) = 0.5d0 + func1 = -16.d0*x*EXP(-8.*x*x) + END FUNCTION func1 +! + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 + INTEGER :: i +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + sum2 = sum2 + x(i)**2 + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +END PROGRAM main +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/fit1dp.f90 b/examples/fit1dp.f90 new file mode 100644 index 0000000..97ca6be --- /dev/null +++ b/examples/fit1dp.f90 @@ -0,0 +1,227 @@ +!> +!> @file fit1dp.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Fit grid value function to a spline of any order +! Periodic case. +! + USE bsplines + USE futils +! + IMPLICIT NONE + INTEGER :: nx, nidbas + DOUBLE PRECISION :: a, b, coefx(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:,:), coefs(:,:) + INTEGER :: i, dim + TYPE(spline1d) :: spl + DOUBLE PRECISION :: dx, x0, x1 + INTEGER :: npts + DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:) + DOUBLE PRECISION, POINTER :: splines(:,:) => null() +! + CHARACTER(len=128) :: file='fit1d.h5' + INTEGER :: fid +! + NAMELIST /newrun/ nx, nidbas, a, b, coefx +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 10 ! Number of intevals in x + a = 0.0d0 ! Left boundary of interval + b = 1.0d0 ! Right boundary of interval + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid and function values +! + ALLOCATE(xgrid(0:nx), fgrid(0:nx,1)) + xgrid(0) = a + xgrid(nx) = b + CALL meshdist(coefx, xgrid, nx) + fgrid(:,1) = func(xgrid) + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid + WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid +! +! Create hdf5 file +! + CALL creatf(file, fid, 'FIT1D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) +!=========================================================================== +! 2.0 Spline interpolation +! +! Set up the spline interpolation + CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.) + CALL get_dim(spl, dim) + WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim + WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), & + & ':',UBOUND(spl%knots), spl%knots +! + ALLOCATE(coefs(dim,1)) +! +! From given grid values fgrid, compute the spline coefs + CALL get_splcoef(spl, fgrid, coefs) + WRITE(*,'(a/(10f8.3))') 'coefs', coefs +! +! Plot all splines +! + npts = 100 + ALLOCATE(xpt(npts)) +!!$ x0 = a +!!$ x1 = b + x0 = spl%knots(0) + x1 = spl%knots(nx) + dx = (x1 -x0)/REAL(npts) ! Last point b not inluded + DO i=1,npts + xpt(i) = x0 + (i-1)*dx + END DO + CALL allsplines(spl, xpt, splines) + CALL putarr(fid, '/X', xpt) + CALL putarr(fid,'/SPLINES', splines) +! +! Check interpolation +! + ALLOCATE(fcalc(npts), fexact(npts), err(npts)) + fexact = func(xpt) +! + CALL gridval(spl, xpt, fcalc, 0, coefs(:,1)) + err = fexact - fcalc + CALL putarr(fid, '/FEXACT', fexact, 'Exact values') + CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values') + CALL putarr(fid, '/ERROR', err, 'Interpolation errors') + WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) +! +! Derivatives values + CALL gridval(spl, xpt, fcalc, 1) + fexact = func1(xpt) + err = fexact - fcalc + CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative') + CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative') + CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative') + WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xpt, splines, fexact, fcalc, err) + DEALLOCATE(xgrid, fgrid) + CALL destroy_sp(spl) + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION func(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: func(SIZE(x)) + DOUBLE PRECISION :: pi + pi = 4.0*ATAN(1.0d0) + func = SIN(2.d0*pi*x) + 2.0d0*COS(8.d0*pi*x) + END FUNCTION func + FUNCTION func1(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: func1(SIZE(x)) + DOUBLE PRECISION :: pi + pi = 4.0*ATAN(1.0d0) + func1 = 2.d0*pi*COS(2.d0*pi*x) - 16.0d0*pi*SIN(8.d0*pi*x) + END FUNCTION func1 +! + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 + INTEGER :: i +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + sum2 = sum2 + x(i)**2 + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +END PROGRAM main +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/fit2d.f90 b/examples/fit2d.f90 new file mode 100644 index 0000000..ab598de --- /dev/null +++ b/examples/fit2d.f90 @@ -0,0 +1,159 @@ +!> +!> @file fit2d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Fit 2d grid value function to a 2d spline of any order +! + USE bsplines + USE futils +! + IMPLICIT NONE + CHARACTER(len=128) :: file='fit2d.h5' + INTEGER :: fid + INTEGER :: nx, ny, nidbas(2), mbes, dims(2) + INTEGER, PARAMETER :: nptx=100, npty=100 + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:) + DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty) + DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs + TYPE(spline2d) :: splxy + DOUBLE PRECISION :: mem + INTEGER :: i, j +! + NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intevals in x + ny = 8 ! Number of intevals in y + nidbas = (/3,3/) ! Degree of splines + mbes = 2 + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) + fgrid = func(xgrid,ygrid) +! + WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny) + IF( nx.LE.10 .AND. ny.LE.10 ) THEN + WRITE(*,'(a)') 'FGRID' + DO j=0,ny + WRITE(*,'(12f8.3)') fgrid(:,j) + END DO + WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() + END IF +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'MBESS', mbes) +!=========================================================================== +! 2.0 Spline interpolation +! +! Setup the spline interpolation + CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) + WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() +! +! Compute spline interpolation coefficients + CALL get_dim(splxy, dims) + ALLOCATE(bcoefs(dims(1),dims(2))) + WRITE(*,'(a,2i4)') 'Dims of spline', dims +! + CALL get_splcoef(splxy, fgrid, bcoefs) +!=========================================================================== +! 2.0 Check interpolation +! + dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1) + dy=(ygrid(ny)-ygrid(0))/REAL(npty-1) + DO i=1,nptx + xpt(i) = xgrid(0) + (i-1)*dx + END DO + DO i=1,npty + ypt(i) = ygrid(0) + (i-1)*dy + END DO + fexact = func(xpt,ypt) + CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs) + errs = fcalc-fexact + WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs) +! + CALL putarr(fid, '/xpt', xpt, 'r') + CALL putarr(fid, '/ypt', ypt, '\theta') + CALL putarr(fid, '/fcalc', fcalc, 'Interpolated') + CALL putarr(fid, '/fexact', fexact, 'Exact') + CALL putarr(fid, '/errs', errs, 'Errors') +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(bcoefs) + DEALLOCATE(xgrid, ygrid, fgrid) + CALL destroy_sp(splxy) + CALL closef(fid) +! +CONTAINS + FUNCTION func(x,y) + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: func(SIZE(x), SIZE(y)) + DOUBLE PRECISION :: zy + INTEGER :: j + DO j=1,SIZE(y) + zy = -mbes * SIN(mbes*y(j)) + func(:,j) =(1-x(:)**2) * x(:)**mbes * zy + END DO + END FUNCTION func +! + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 + INTEGER :: i +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + sum2 = sum2 + x(i)**2 + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +END PROGRAM main diff --git a/examples/fit2d1d.f90 b/examples/fit2d1d.f90 new file mode 100644 index 0000000..e0ffec3 --- /dev/null +++ b/examples/fit2d1d.f90 @@ -0,0 +1,154 @@ +!> +!> @file fit2d1d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Fit 2d grid value function to a 2d spline of any order +! Interpolating on an grid (x_i,y_j) or a set of particle +! positions (x_p,y_p). +! + USE bsplines +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), mbes, dims(2) + INTEGER, PARAMETER :: nptx=100, npty=100 + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:) + DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty) + DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs + DOUBLE PRECISION, DIMENSION(nptx*npty) :: xp, yp, fcalcp, fexactp, errsp + TYPE(spline2d) :: splxy + DOUBLE PRECISION :: mem + INTEGER :: i, j +! + NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intevals in x + ny = 8 ! Number of intevals in y + nidbas = (/3,3/) ! Degree of splines + mbes = 2 + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) + fgrid = func(xgrid,ygrid) +!=========================================================================== +! 2.0 Spline interpolation +! +! Setup the spline interpolation + CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) +! +! Compute spline interpolation coefficients + CALL get_dim(splxy, dims) + ALLOCATE(bcoefs(dims(1),dims(2))) + WRITE(*,'(a,2i4)') 'Dims of spline', dims +! + CALL get_splcoef(splxy, fgrid, bcoefs) +!=========================================================================== +! 2.0 Check interpolation +! + dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1) + dy=(ygrid(ny)-ygrid(0))/REAL(npty-1) + DO i=1,nptx + xpt(i) = xgrid(0) + (i-1)*dx + END DO + DO i=1,npty + ypt(i) = ygrid(0) + (i-1)*dy + END DO + fexact = func(xpt,ypt) + CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs) + errs = fcalc-fexact + WRITE(*,*) 'Using the GRIDVAL2D2D' + WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs) +! +! The 2d1d version + WRITE(*,*) 'Using the GRIDVAL2D1D' + CALL RANDOM_NUMBER(xp) + CALL RANDOM_NUMBER(yp) + yp=2.0*pi*yp + fexactp = func1(xp,yp) +!!$ CALL gridval(splxy, xp, yp, fcalcp, (/0,0/), bcoefs) + CALL gridval(splxy, xp, yp, fcalcp, (/0,0/)) + errsp = fcalcp-fexactp + WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errsp), MAXVAL(errsp) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(bcoefs) + DEALLOCATE(xgrid, ygrid, fgrid) + CALL destroy_sp(splxy) +! +CONTAINS + FUNCTION func(x,y) + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: func(SIZE(x), SIZE(y)) + DOUBLE PRECISION :: zy + INTEGER :: j + DO j=1,SIZE(y) + zy = -mbes * SIN(mbes*y(j)) + func(:,j) =(1-x(:)**2) * x(:)**mbes * zy + END DO + END FUNCTION func + FUNCTION func1(x,y) + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: func1(SIZE(x)) + DOUBLE PRECISION :: zy + INTEGER :: j + DO j=1,SIZE(x) + zy = -mbes * SIN(mbes*y(j)) + func1(j) =(1-x(j)**2) * x(j)**mbes * zy + END DO + END FUNCTION func1 +! + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 + INTEGER :: i +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + sum2 = sum2 + x(i)**2 + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +END PROGRAM main diff --git a/examples/fit2d_cmpl.f90 b/examples/fit2d_cmpl.f90 new file mode 100644 index 0000000..6e6a4ee --- /dev/null +++ b/examples/fit2d_cmpl.f90 @@ -0,0 +1,132 @@ +!> +!> @file fit2d_cmpl.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Fit 2d grid value function to a 2d spline of any order +! + USE bsplines +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), mbes, dims(2) + INTEGER, PARAMETER :: npt=10000 + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:) + DOUBLE COMPLEX, ALLOCATABLE :: fgrid(:,:), fgrid_calc(:,:), bcoefs(:,:) + DOUBLE PRECISION :: dx, dy, xpt(npt), ypt(npt), errs(npt) + DOUBLE COMPLEX, DIMENSION(npt) :: fcalc, fexact + TYPE(spline2d) :: splxy + INTEGER :: i, j +! + NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intevals in x + ny = 8 ! Number of intevals in y + nidbas = (/3,3/) ! Degree of splines + mbes = 2 + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + ALLOCATE(fgrid(0:nx,0:ny), fgrid_calc(0:nx,0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) + fgrid = func2(xgrid,ygrid) +!=========================================================================== +! 2.0 Spline interpolation +! +! Setup the spline interpolation + CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) +! +! Compute spline interpolation coefficients + CALL get_dim(splxy, dims) + ALLOCATE(bcoefs(dims(1),dims(2))) + WRITE(*,'(a,2i4)') 'Dims of spline', dims +! + CALL get_splcoef(splxy, fgrid, bcoefs) +!=========================================================================== +! 2.0 Check interpolation +! + CALL RANDOM_NUMBER(xpt) + CALL RANDOM_NUMBER(ypt) + ypt(:) = ypt(:)*2.0*pi + fexact = func1(xpt,ypt) +! + CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs) + errs = ABS(fcalc-fexact) + WRITE(*,'(a,2(1pe12.3))') 'Max errors (on random points)', MAXVAL(errs) +! + CALL gridval(splxy, xgrid, ygrid, fgrid_calc, (/0,0/)) + WRITE(*,'(a,2(1pe12.3))') 'Max errors (on grid points)', & + & MAXVAL(ABS(fgrid_calc-fgrid)) +! + fgrid_calc = 0.0 + DO j=0,ny + ypt(1:nx+1) = ygrid(j) + CALL gridval(splxy, xgrid, ypt(1:nx+1), fgrid_calc(:,j), (/0,0/)) + END DO + WRITE(*,'(a,2(1pe12.3))') 'Max errors (on grid points)', & + & MAXVAL(ABS(fgrid_calc-fgrid)) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(bcoefs) + DEALLOCATE(xgrid, ygrid, fgrid, fgrid_calc) + CALL destroy_sp(splxy) +! +CONTAINS + FUNCTION func2(x,y) + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE COMPLEX :: func2(SIZE(x), SIZE(y)) + DOUBLE COMPLEX :: zy + INTEGER :: j + DO j=1,SIZE(y) + zy = -mbes * CMPLX(SIN(mbes*y(j)), COS(mbes*y(j))) + func2(:,j) =(1-x(:)**2) * x(:)**mbes * zy + END DO + END FUNCTION func2 + FUNCTION func1(x,y) + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE COMPLEX :: func1(SIZE(x)) + DOUBLE COMPLEX :: zy + INTEGER :: j + DO j=1,SIZE(x) + zy = -mbes * CMPLX(SIN(mbes*y(j)), COS(mbes*y(j))) + func1(j) =(1-x(j)**2) * x(j)**mbes * zy + END DO + END FUNCTION func1 +END PROGRAM main diff --git a/examples/fit2dbc.f90 b/examples/fit2dbc.f90 new file mode 100644 index 0000000..75862c0 --- /dev/null +++ b/examples/fit2dbc.f90 @@ -0,0 +1,183 @@ +!> +!> @file fit2dbc.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Fit 2d grid value function to a 2d spline of any order +! BC using derivatives at both ends, in the non-periodic direction. +! + USE bsplines + USE futils +! + IMPLICIT NONE + CHARACTER(len=128) :: file='fit2d.h5' + INTEGER :: fid + INTEGER :: nx, ny, nidbas(2), mbes, dims(2) + INTEGER, PARAMETER :: nptx=100, npty=100 + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:) + DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty) + DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs + TYPE(spline2d) :: splxy + DOUBLE PRECISION :: mem + INTEGER :: i, j, ii + INTEGER :: ibc1(2,10), ibc2(2,10) + DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:) +! + NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy + +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intevals in x + ny = 8 ! Number of intevals in y + nidbas = (/3,3/) ! Degree of splines + mbes = 2 + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 1.0d0 + CALL meshdist(coefy, ygrid, ny) + fgrid = func(xgrid,ygrid) +! + WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny) + IF( nx.LE.10 .AND. ny.LE.10 ) THEN + WRITE(*,'(a)') 'FGRID' + DO j=0,ny + WRITE(*,'(12f8.3)') fgrid(:,j) + END DO + WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() + END IF +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'MBESS', mbes) +!=========================================================================== +! 2.0 Spline interpolation +! +! Setup the spline interpolation + ii=1 ! Start with first derivative + DO i = 1, nidbas(1)/2 + ibc1(1,i) = ii+i-1 + ibc1(2,i) = ii+i-1 + END DO + ibc2 = ibc1 + CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .FALSE./),& + & ibc1=ibc1, ibc2=ibc2) + WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() +! +! Compute spline interpolation coefficients + CALL get_dim(splxy, dims) + ALLOCATE(bcoefs(dims(1),dims(2))) + WRITE(*,'(a,2i4)') 'Dims of spline', dims +! + ALLOCATE(fbc(2, nidbas(1)/2, 0:ny)) + fbc=0.0d0 +! + WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:) + WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:) +! + CALL get_splcoef(splxy, fgrid, bcoefs, fbc1=fbc, fbc2=fbc) +! + DEALLOCATE(fbc) +!=========================================================================== +! 2.0 Check interpolation +! + dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1) + dy=(ygrid(ny)-ygrid(0))/REAL(npty-1) + DO i=1,nptx + xpt(i) = xgrid(0) + (i-1)*dx + END DO + DO i=1,npty + ypt(i) = ygrid(0) + (i-1)*dy + END DO + fexact = func(xpt,ypt) + CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs) + errs = fcalc-fexact + WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs) +! + CALL putarr(fid, '/xpt', xpt, 'r') + CALL putarr(fid, '/ypt', ypt, '\theta') + CALL putarr(fid, '/bcoefs', bcoefs, 'bcoefs') + CALL putarr(fid, '/fcalc', fcalc, 'Interpolated') + CALL putarr(fid, '/fexact', fexact, 'Exact') + CALL putarr(fid, '/errs', errs, 'Errors') +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(bcoefs) + DEALLOCATE(xgrid, ygrid, fgrid) + CALL destroy_sp(splxy) + CALL closef(fid) +! +!=========================================================================== +CONTAINS + FUNCTION func(x,y) +! +! A function with zeo derivatives at both ends +! + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: func(SIZE(x), SIZE(y)) + DOUBLE PRECISION :: zy + INTEGER :: j + DO j=1,SIZE(y) + zy = y(j)*y(j)*(y(j)-1.5d0) + func(:,j) = x(:)*x(:)*(x(:)-1.5d0) + zy + END DO + END FUNCTION func +! + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 + INTEGER :: i +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + sum2 = sum2 + x(i)**2 + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +END PROGRAM main diff --git a/examples/fit2dbc_x.f90 b/examples/fit2dbc_x.f90 new file mode 100644 index 0000000..8795668 --- /dev/null +++ b/examples/fit2dbc_x.f90 @@ -0,0 +1,202 @@ +!> +!> @file fit2dbc_x.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Fit 2d grid value function to a 2d spline of any order +! BC using derivatives at both ends, in the non-periodic direction. +! +! Testing BC on derivative along first direction +! + USE bsplines + USE futils +! + IMPLICIT NONE + CHARACTER(len=128) :: file='fit2d.h5' + INTEGER :: fid + INTEGER :: nx, ny, nidbas(2), mbes, dims(2) + INTEGER, PARAMETER :: nptx=100, npty=100 + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:) + DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty) + DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs + TYPE(spline2d) :: splxy + DOUBLE PRECISION :: mem + INTEGER :: i, j, ii + INTEGER :: ibc(2,10) + DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:) +! + NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy + +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intevals in x + ny = 8 ! Number of intevals in y + nidbas = (/3,3/) ! Degree of splines + mbes = 2 + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) + fgrid = func(xgrid,ygrid) +! + WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny) + IF( nx.LE.10 .AND. ny.LE.10 ) THEN + WRITE(*,'(a)') 'FGRID(x, y)' + DO j=0,ny + WRITE(*,'(12f8.3)') fgrid(:,j) + END DO + WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() + END IF +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'MBESS', mbes) +!=========================================================================== +! 2.0 Spline interpolation +! +! Setup the spline interpolation + ii=1 ! Start with first derivative + DO i = 1, nidbas(1)/2 + ibc(1,i) = ii+i-1 + ibc(2,i) = ii+i-1 + END DO + CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./),& + & ibc1=ibc) + WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() +! +! Compute spline interpolation coefficients + CALL get_dim(splxy, dims) + ALLOCATE(bcoefs(dims(1),dims(2))) + WRITE(*,'(a,2i4)') 'Dims of spline', dims +! + ALLOCATE(fbc(2, nidbas(1)/2, 0:ny)) + fbc=0.0d0 +! +!!$! Exact first derivatives at boundaries +!!$ fbc(1,1:1,:) = func1(xgrid(0:0), ygrid(0:ny)) +!!$ fbc(2,1:1,:) = func1(xgrid(nx:nx), ygrid(0:ny)) +! +!!$! Derivatives at boundaries approximated with FD +!!$ DO j=0,ny +!!$ fbc(1,1,j+1) = fgrid(1,j)-fgrid(0,j) +!!$ fbc(2,1,j+1) = fgrid(nx,j)-fgrid(nx-1,j) +!!$ END DO +!!$ fbc(1,1,:) = fbc(1,1,:)/(xgrid(1)-xgrid(0)) +!!$ fbc(2,1,:) = fbc(2,1,:)/(xgrid(nx)-xgrid(nx-1)) +! + WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:) + WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:) +! + CALL get_splcoef(splxy, fgrid, bcoefs, fbc1=fbc) +! + DEALLOCATE(fbc) +!=========================================================================== +! 2.0 Check interpolation +! + dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1) + dy=(ygrid(ny)-ygrid(0))/REAL(npty-1) + DO i=1,nptx + xpt(i) = xgrid(0) + (i-1)*dx + END DO + DO i=1,npty + ypt(i) = ygrid(0) + (i-1)*dy + END DO + fexact = func(xpt,ypt) + CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs) + errs = fcalc-fexact + WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs) +! + CALL putarr(fid, '/xpt', xpt, 'r') + CALL putarr(fid, '/ypt', ypt, '\theta') + CALL putarr(fid, '/fcalc', fcalc, 'Interpolated') + CALL putarr(fid, '/fexact', fexact, 'Exact') + CALL putarr(fid, '/errs', errs, 'Errors') +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(bcoefs) + DEALLOCATE(xgrid, ygrid, fgrid) + CALL destroy_sp(splxy) + CALL closef(fid) +! +!=========================================================================== +CONTAINS + FUNCTION func(x,y) + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: func(SIZE(x), SIZE(y)) + DOUBLE PRECISION :: zy + INTEGER :: j + DO j=1,SIZE(y) + zy = -mbes * SIN(mbes*y(j)) + func(:,j) =(1-x(:)**2) * x(:)**mbes * zy + END DO + END FUNCTION func + FUNCTION func1(x,y) + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: func1(SIZE(x), SIZE(y)) + DOUBLE PRECISION :: zy + INTEGER :: j + DO j=1,SIZE(y) + zy = -mbes * SIN(mbes*y(j)) + func1(:,j) = (mbes - (mbes+2.0d0)*x(:)**2) * x(:)**(mbes-1) * zy + END DO + END FUNCTION func1 +! + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 + INTEGER :: i +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + sum2 = sum2 + x(i)**2 + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +END PROGRAM main diff --git a/examples/fit2dbc_y.f90 b/examples/fit2dbc_y.f90 new file mode 100644 index 0000000..ff73d65 --- /dev/null +++ b/examples/fit2dbc_y.f90 @@ -0,0 +1,202 @@ +!> +!> @file fit2dbc_y.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Fit 2d grid value function to a 2d spline of any order +! BC using derivatives at both ends, in the non-periodic direction. +! +! Testing BC on derivative along second direction +! + USE bsplines + USE futils +! + IMPLICIT NONE + CHARACTER(len=128) :: file='fit2d.h5' + INTEGER :: fid + INTEGER :: nx, ny, nidbas(2), mbes, dims(2) + INTEGER, PARAMETER :: nptx=100, npty=100 + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:) + DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty) + DOUBLE PRECISION, DIMENSION(npty,nptx) :: fcalc, fexact, errs + TYPE(spline2d) :: splxy + DOUBLE PRECISION :: mem + INTEGER :: i, j, ii + INTEGER :: ibc(2,10) + DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:) +! + NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy + +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intevals in x + ny = 8 ! Number of intevals in y + nidbas = (/3,3/) ! Degree of splines + mbes = 2 + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:ny,0:nx)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) + fgrid = TRANSPOSE(func(xgrid,ygrid)) +! + WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny) + IF( nx.LE.10 .AND. ny.LE.10 ) THEN + WRITE(*,'(a)') 'FGRID(y, x)' + DO j=0,ny + WRITE(*,'(12f8.3)') fgrid(j,:) + END DO + WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() + END IF +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'MBESS', mbes) +!=========================================================================== +! 2.0 Spline interpolation +! +! Setup the spline interpolation + ii=1 ! Start with first derivative + DO i = 1, nidbas(1)/2 + ibc(1,i) = ii+i-1 + ibc(2,i) = ii+i-1 + END DO + CALL set_splcoef((/nidbas(2), nidbas(1)/), ygrid, xgrid, splxy, (/.TRUE., .FALSE./),& + & ibc2=ibc) + WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem() +! +! Compute spline interpolation coefficients + CALL get_dim(splxy, dims) + ALLOCATE(bcoefs(dims(1),dims(2))) + WRITE(*,'(a,2i4)') 'Dims of spline', dims +! + ALLOCATE(fbc(2, nidbas(1)/2, 0:ny)) + fbc=0.0d0 +! +!!$! Exact first derivatives at boundaries +!!$ fbc(1,1:1,:) = func1(xgrid(0:0), ygrid(0:ny)) +!!$ fbc(2,1:1,:) = func1(xgrid(nx:nx), ygrid(0:ny)) +! +!!$! Derivatives at boundaries approximated with FD +!!$ DO j=0,ny +!!$ fbc(1,1,j+1) = fgrid(1,j)-fgrid(0,j) +!!$ fbc(2,1,j+1) = fgrid(nx,j)-fgrid(nx-1,j) +!!$ END DO +!!$ fbc(1,1,:) = fbc(1,1,:)/(xgrid(1)-xgrid(0)) +!!$ fbc(2,1,:) = fbc(2,1,:)/(xgrid(nx)-xgrid(nx-1)) +! + WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:) + WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:) +! + CALL get_splcoef(splxy, fgrid, bcoefs, fbc2=fbc) +! + DEALLOCATE(fbc) +!=========================================================================== +! 2.0 Check interpolation +! + dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1) + dy=(ygrid(ny)-ygrid(0))/REAL(npty-1) + DO i=1,nptx + xpt(i) = xgrid(0) + (i-1)*dx + END DO + DO i=1,npty + ypt(i) = ygrid(0) + (i-1)*dy + END DO + fexact = TRANSPOSE(func(xpt,ypt)) + CALL gridval(splxy, ypt, xpt, fcalc, (/0,0/), bcoefs) + errs = fcalc-fexact + WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs) +! + CALL putarr(fid, '/xpt', xpt, 'r') + CALL putarr(fid, '/ypt', ypt, '\theta') + CALL putarr(fid, '/fcalc', fcalc, 'Interpolated') + CALL putarr(fid, '/fexact', fexact, 'Exact') + CALL putarr(fid, '/errs', errs, 'Errors') +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(bcoefs) + DEALLOCATE(xgrid, ygrid, fgrid) + CALL destroy_sp(splxy) + CALL closef(fid) +! +!=========================================================================== +CONTAINS + FUNCTION func(x,y) + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: func(SIZE(x), SIZE(y)) + DOUBLE PRECISION :: zy + INTEGER :: j + DO j=1,SIZE(y) + zy = -mbes * SIN(mbes*y(j)) + func(:,j) =(1-x(:)**2) * x(:)**mbes * zy + END DO + END FUNCTION func + FUNCTION func1(x,y) + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: func1(SIZE(x), SIZE(y)) + DOUBLE PRECISION :: zy + INTEGER :: j + DO j=1,SIZE(y) + zy = -mbes * SIN(mbes*y(j)) + func1(:,j) = (mbes - (mbes+2.0d0)*x(:)**2) * x(:)**(mbes-1) * zy + END DO + END FUNCTION func1 +! + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 + INTEGER :: i +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + sum2 = sum2 + x(i)**2 + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +END PROGRAM main diff --git a/examples/getgrad_perf.f90 b/examples/getgrad_perf.f90 new file mode 100644 index 0000000..0a168c9 --- /dev/null +++ b/examples/getgrad_perf.f90 @@ -0,0 +1,221 @@ +!> +!> @file getgrad_perf.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test and compare performance of using "spline" and +! "pp" forms. 2D case +! + USE bsplines +! + IMPLICIT NONE + INTEGER :: nx, ny, ngauss(2), nidbas(2), nits + INTEGER :: npt, d1, d2 + INTEGER :: i, j, its, ngroup=4 + INTEGER :: i1, i2, nset, nremain + DOUBLE PRECISION :: pi, dx, dy + DOUBLE PRECISION :: seconds, t0, t1, tscal, tvec + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs + DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fgrid00(:), fgrid01(:), fgrid10(:) + DOUBLE PRECISION, ALLOCATABLE :: fgrad00(:), fgrad01(:), fgrad10(:) + TYPE(spline2d) :: splxy +! + NAMELIST /newrun/ nx, ny, nidbas, npt, nits +!=============================================================================== +! 0.0 Prologue +! +! 2D grid +! + nx = 8 + ny = 8 + nidbas = (/ 3, 3 /) + npt = 100000 + nits =100 + READ(*,newrun) + WRITE(*,newrun) +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + dx = 1.0d0/REAL(nx) + xgrid = (/ (i*dx,i=0,nx) /) + dy = 2.0d0*pi/REAL(ny) + ygrid = (/ (j*dy,j=0,ny) /) +! + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) +! +! Set up spline +! + ngauss = 4 + CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) + d1 = splxy%sp1%dim + d2 = splxy%sp2%dim + WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2 + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots +! + ALLOCATE(xpt(npt), ypt(npt)) + CALL RANDOM_NUMBER(xpt) + CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt +! + ALLOCATE(coefs(d1,d2)) + ALLOCATE(fgrad00(npt), fgrad01(npt), fgrad10(npt)) + ALLOCATE(fgrid00(npt), fgrid01(npt), fgrid10(npt)) +! +!=============================================================================== +! 1.0 PPFORM +! + coefs = 1.0d0 ! => f=1, all derivatives = 0! +! + splxy%sp1%nlppform = .TRUE. + splxy%sp2%nlppform = .TRUE. + CALL gridval(splxy, xpt, ypt, fgrid00, (/0,0/), coefs) +! +! Vector GRIDVAL + WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp' + ngroup = 1 + DO WHILE (ngroup.LT.npt/2) + nset = npt/ngroup + nremain = MODULO(npt, ngroup) + IF(nremain.NE.0) nset = nset+1 + t0 = seconds() + i2 = 0 + DO i=1,nset + i1 = i2+1 + i2 = MIN(i2+ngroup,npt) + DO its=1,nits + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid00(i1:i2), (/0,0/)) + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid01(i1:i2), (/0,1/)) + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid10(i1:i2), (/1,0/)) + END DO + END DO + t1 = seconds()-t0 + tvec = t1/REAL(npt*nits) + IF(ngroup.EQ.1) tscal=tvec + WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec + ngroup = 2*ngroup + END DO + WRITE(*,'(/a,3(1pe12.3))') 'GRIDVAL PPFORM: Max errors', & + & MAXVAL(ABS(fgrid00-1.0d0)), MAXVAL(ABS(fgrid01)), MAXVAL(ABS(fgrid10)) +! +! Vector GETGRAD + WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp' + ngroup = 1 + DO WHILE (ngroup.LT.npt/2) + nset = npt/ngroup + nremain = MODULO(npt, ngroup) + IF(nremain.NE.0) nset = nset+1 + t0 = seconds() + i2 = 0 + DO i=1,nset + i1 = i2+1 + i2 = MIN(i2+ngroup,npt) + DO its=1,nits + CALL getgrad(splxy, xpt(i1:i2), ypt(i1:i2), & + & fgrad00(i1:i2), fgrad10(i1:i2), fgrad01(i1:i2)) + END DO + END DO + t1 = seconds()-t0 + tvec = t1/REAL(npt*nits) + IF(ngroup.EQ.1) tscal=tvec + WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec + ngroup = 2*ngroup + END DO + WRITE(*,'(/a,3(1pe12.3))') 'GETGRAD PPFORM: Max errors', & + & MAXVAL(ABS(fgrad00-fgrid00)), MAXVAL(ABS(fgrad01-fgrid01)), & + & MAXVAL(ABS(fgrad10-fgrid10)) +!=============================================================================== +! 2.0 Spline expansion +! + coefs = 1.0d0 ! => f=1, all derivatives = 0! +! + splxy%sp1%nlppform = .FALSE. + splxy%sp2%nlppform = .FALSE. + CALL gridval(splxy, xpt, ypt, fgrid00, (/0,0/), coefs) +! +! Vector GRIDVAL + WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp' + ngroup = 1 + DO WHILE (ngroup.LT.npt/2) + nset = npt/ngroup + nremain = MODULO(npt, ngroup) + IF(nremain.NE.0) nset = nset+1 + t0 = seconds() + i2 = 0 + DO i=1,nset + i1 = i2+1 + i2 = MIN(i2+ngroup,npt) + DO its=1,nits + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid00(i1:i2), (/0,0/)) + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid01(i1:i2), (/0,1/)) + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid10(i1:i2), (/1,0/)) + END DO + END DO + t1 = seconds()-t0 + tvec = t1/REAL(npt*nits) + IF(ngroup.EQ.1) tscal=tvec + WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec + ngroup = 2*ngroup + END DO + WRITE(*,'(/a,3(1pe12.3))') 'GRIDVAL SPLINE: Max errors', & + & MAXVAL(ABS(fgrid00-1.0d0)), MAXVAL(ABS(fgrid01)), MAXVAL(ABS(fgrid10)) +! +! Vector GETGRAD + WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp' + ngroup = 1 + DO WHILE (ngroup.LT.npt/2) + nset = npt/ngroup + nremain = MODULO(npt, ngroup) + IF(nremain.NE.0) nset = nset+1 + t0 = seconds() + i2 = 0 + DO i=1,nset + i1 = i2+1 + i2 = MIN(i2+ngroup,npt) + DO its=1,nits + CALL getgrad(splxy, xpt(i1:i2), ypt(i1:i2), & + & fgrad00(i1:i2), fgrad10(i1:i2), fgrad01(i1:i2)) + END DO + END DO + t1 = seconds()-t0 + tvec = t1/REAL(npt*nits) + IF(ngroup.EQ.1) tscal=tvec + WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec + ngroup = 2*ngroup + END DO + WRITE(*,'(/a,3(1pe12.3))') 'GETGRAD SPLINE: Max errors', & + & MAXVAL(ABS(fgrad00-fgrid00)), MAXVAL(ABS(fgrad01-fgrid01)), & + & MAXVAL(ABS(fgrad10-fgrid10)) +!=============================================================================== + +! +! Clean up +! + CALL destroy_sp(splxy) + DEALLOCATE(xgrid, ygrid, coefs) + DEALLOCATE(xpt, ypt, fgrid00, fgrid01, fgrid10) + DEALLOCATE(fgrad00, fgrad01, fgrad10) +END PROGRAM main diff --git a/examples/gridval_perf.f90 b/examples/gridval_perf.f90 new file mode 100644 index 0000000..d098531 --- /dev/null +++ b/examples/gridval_perf.f90 @@ -0,0 +1,194 @@ +!> +!> @file gridval_perf.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test and compare performance of using "spline" and +! "pp" forms. 2D case +! + USE bsplines +! + IMPLICIT NONE + INTEGER :: nx, ny, ngauss(2), nidbas(2), nits + INTEGER :: npt, d1, d2 + INTEGER :: i, j, its, ngroup=4 + INTEGER :: i1, i2, nset, nremain + DOUBLE PRECISION :: pi, dx, dy + DOUBLE PRECISION :: seconds, t0, t1, tscal, tvec + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs + DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fpt00(:), fpt01(:), fpt10(:) + DOUBLE PRECISION, ALLOCATABLE :: fscal00(:), fscal01(:), fscal10(:) + TYPE(spline2d) :: splxy +! + NAMELIST /newrun/ nx, ny, nidbas, npt, nits +!=============================================================================== +! 0.0 Prologue +! +! 2D grid +! + nx = 8 + ny = 8 + nidbas = (/ 3, 3 /) + npt = 100000 + nits =100 + READ(*,newrun) + WRITE(*,newrun) +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + dx = 1.0d0/REAL(nx) + xgrid = (/ (i*dx,i=0,nx) /) + dy = 2.0d0*pi/REAL(ny) + ygrid = (/ (j*dy,j=0,ny) /) +! + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) +! +! Set up spline +! + ngauss = 4 + CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) + d1 = splxy%sp1%dim + d2 = splxy%sp2%dim + WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2 + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots +!=============================================================================== +! 1.0 PPFORM +! + ALLOCATE(xpt(npt), ypt(npt)) + CALL RANDOM_NUMBER(xpt) + CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt +! + ALLOCATE(coefs(d1,d2)) + ALLOCATE(fscal00(npt), fscal01(npt), fscal10(npt)) + ALLOCATE(fpt00(npt), fpt01(npt), fpt10(npt)) +! + coefs = 1.0d0 ! => f=1, all derivatives = 0! +! + splxy%sp1%nlppform = .TRUE. + splxy%sp2%nlppform = .TRUE. + CALL gridval(splxy, xpt, ypt, fscal00, (/0,0/), coefs) +! +! Scalar PPFORM + t0 = seconds() + DO i=1,npt + DO its=1,nits + CALL gridval(splxy, xpt(i), ypt(i), fscal00(i), (/0,0/)) + CALL gridval(splxy, xpt(i), ypt(i), fscal01(i), (/0,1/)) + CALL gridval(splxy, xpt(i), ypt(i), fscal10(i), (/1,0/)) + END DO + END DO + t1 = seconds()-t0 + tscal = t1/REAL(npt*nits,8) + WRITE(*,'(/a,3(1pe12.3))') 'Scalar PPFORM: Max errors', & + & MAXVAL(ABS(fscal00-1.0d0)), MAXVAL(ABS(fscal01)), MAXVAL(ABS(fscal10)) + WRITE(*,'(a,3(1pe12.3))') 'time(s)', tscal +! +! Vector PPFORM + WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp' + ngroup = 1 + DO WHILE (ngroup.LT.npt/2) + nset = npt/ngroup + nremain = MODULO(npt, ngroup) + IF(nremain.NE.0) nset = nset+1 + t0 = seconds() + i2 = 0 + DO i=1,nset + i1 = i2+1 + i2 = MIN(i2+ngroup,npt) + DO its=1,nits + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt00(i1:i2), (/0,0/)) + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt01(i1:i2), (/0,1/)) + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt10(i1:i2), (/1,0/)) + END DO + END DO + t1 = seconds()-t0 + tvec = t1/REAL(npt*nits) + WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec + ngroup = 2*ngroup + END DO + WRITE(*,'(/a,3(1pe12.3))') 'Vector PPFORM: Max errors', & + & MAXVAL(ABS(fpt00-fscal00)), MAXVAL(ABS(fpt01-fscal01)), MAXVAL(ABS(fpt10-fscal10)) +!=============================================================================== +! 2.0 Sline expansion +! + coefs = 1.0d0 ! => f=1, all derivatives = 0! +! + splxy%sp1%nlppform = .FALSE. + splxy%sp2%nlppform = .FALSE. + CALL gridval(splxy, xpt, ypt, fscal00, (/0,0/), coefs) +! +! Scalar SPLINE + t0 = seconds() + DO i=1,npt + DO its=1,nits + CALL gridval(splxy, xpt(i), ypt(i), fscal00(i), (/0,0/)) + CALL gridval(splxy, xpt(i), ypt(i), fscal01(i), (/0,1/)) + CALL gridval(splxy, xpt(i), ypt(i), fscal10(i), (/1,0/)) + END DO + END DO + t1 = seconds()-t0 + tscal = t1/REAL(npt*nits,8) + WRITE(*,'(/a,3(1pe12.3))') 'Scalar SPLINE: Max errors', & + & MAXVAL(ABS(fscal00-1.0d0)), MAXVAL(ABS(fscal01)), MAXVAL(ABS(fscal10)) + WRITE(*,'(a,3(1pe12.3))') 'time(s)', tscal +! +! Vector SPLINE + WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp' + ngroup = 1 + DO WHILE (ngroup.LT.npt/2) + nset = npt/ngroup + nremain = MODULO(npt, ngroup) + IF(nremain.NE.0) nset = nset+1 + t0 = seconds() + i2 = 0 + DO i=1,nset + i1 = i2+1 + i2 = MIN(i2+ngroup,npt) + DO its=1,nits + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt00(i1:i2), (/0,0/)) + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt01(i1:i2), (/0,1/)) + CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt10(i1:i2), (/1,0/)) + END DO + END DO + t1 = seconds()-t0 + tvec = t1/REAL(npt*nits) + WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec + ngroup = 2*ngroup + END DO + WRITE(*,'(/a,3(1pe12.3))') 'Vector SPLINE: Max errors', & + & MAXVAL(ABS(fpt00-fscal00)), MAXVAL(ABS(fpt01-fscal01)), MAXVAL(ABS(fpt10-fscal10)) +!=============================================================================== + +! +! Clean up +! + CALL destroy_sp(splxy) + DEALLOCATE(xgrid, ygrid, coefs) + DEALLOCATE(xpt, ypt, fpt00, fpt01, fpt10) + DEALLOCATE(fscal00, fscal01, fscal10) +END PROGRAM main diff --git a/examples/gyro.f90 b/examples/gyro.f90 new file mode 100644 index 0000000..d5a79e8 --- /dev/null +++ b/examples/gyro.f90 @@ -0,0 +1,179 @@ +!> +!> @file gyro.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Gyro-average using splines. +! F(x)=cos(x) => \bar F(x,rho) = J_0(rho) cos(x) +! + USE bsplines + USE futils +! + IMPLICIT NONE + INTEGER, PARAMETER :: nx=10, nidbas=3, dim=nx+nidbas, npt=100, & + & nrho=21, nnq=5 + DOUBLE PRECISION :: xgrid(0:nx), fgrid(0:nx), coefs(dim) + DOUBLE PRECISION :: xpt(npt), fcalc(npt), fexact(npt) + DOUBLE PRECISION :: averf(0:nx), averfexact(0:nx) + DOUBLE PRECISION, POINTER :: splines(:,:) + TYPE(spline1d) :: spl + DOUBLE PRECISION :: pi, twopi, dx, lperiod, dth + DOUBLE PRECISION :: drho, rho(nrho), erraver(nrho,nnq) + INTEGER :: i, j, nq(nnq) + DOUBLE PRECISION :: dbesj0 +! + CHARACTER(len=128) :: file='gyro.h5' + INTEGER :: fid +! + INTERFACE + SUBROUTINE gyro(spl, xgrid, coefs, rho, nq, averf) + USE bsplines + TYPE(spline1d) :: spl + DOUBLE PRECISION, INTENT(in) :: xgrid(0:), coefs(:), rho + INTEGER, INTENT(in) :: nq + DOUBLE PRECISION, INTENT(out) :: averf(0:) + END SUBROUTINE gyro + END INTERFACE +! + pi = 4.0d0*ATAN(1.0d0) + twopi = 2.0d0*pi +! +! Create hdf5 file + CALL creatf(file, fid, 'gyro Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) +! +! Grid and function values + dx = twopi/nx + xgrid(0) = 0.0d0 + DO i=1,nx + xgrid(i) = xgrid(0) + i*dx + END DO + lperiod = xgrid(nx)-xgrid(0) + fgrid = func(xgrid) +! +! Spline interpolation + CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.) + CALL get_splcoef(spl, fgrid, coefs) + WRITE(*,'(a)') 'Spline coefficients' + WRITE(*,'(i5,f12.4)') (i-1, coefs(i), i=1,dim) +! +! Error of interpolation + CALL RANDOM_NUMBER(xpt) + xpt = twopi*xpt + CALL gridval(spl, xpt, fcalc, 0, coefs) + fexact = func(xpt) +!!$ WRITE(*,'(a)') 'Interpolated and exact f' +!!$ WRITE(*,'(3(1pe12.3))') (xpt(i), fcalc(i), fexact(i), i=1,npt) + WRITE(*,'(a,1pe12.3)') 'Interpolation error', norm2(fcalc-fexact) + CALL putarr(fid, '/X', xpt) + CALL putarr(fid, '/FEXACT', fexact, 'Exact values') + CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values') +! +! Gyro-averaged of F at grid points xgrid(0:nx-1) + drho = 5.0d0/nrho + DO i=1,nrho + rho(i) = i*drho + DO j=1,nnq + nq(j) = 2**(j+1) + CALL gyro(spl, xgrid, coefs, rho(i), nq(j), averf) + averfexact = dbesj0(rho(i))*COS(xgrid) + erraver(i,j) = norm2(averfexact-averf) + END DO + END DO +! + WRITE(*,'(a,f8.3,i5)') 'averaged f at rho, nq =', rho(nrho), nq(nnq) + WRITE(*,'(3(1pe12.3))') (xgrid(i),averf(i),averfexact(i), i=0,nx) + CALL putarr(fid, '/XGRID', xgrid) + CALL putarr(fid, '/AVERF', averf, 'Averaged F') + CALL putarr(fid, '/AVERFEXACT', averfexact, 'Averaged F exact') +! + CALL putarr(fid, '/RHO', rho) + CALL putarr(fid, '/NQ', nq) + CALL putarr(fid, '/ERRAVER', erraver) +! +! Clean up + CALL destroy_sp(spl) + CALL closef(fid) +CONTAINS + FUNCTION func(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: func(SIZE(x)) + func = COS(x) + END FUNCTION func + FUNCTION norm2(x) + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:) + norm2 = SQRT(DOT_PRODUCT(x,x)) + END FUNCTION norm2 +END PROGRAM main +! +SUBROUTINE gyro(spl, xgrid, coefs, rho, nq, averf) +! +! Gyro-average using spline SPL and NQ point quadratire for +! theta-integration. +! + USE bsplines + IMPLICIT NONE + TYPE(spline1d) :: spl + DOUBLE PRECISION, INTENT(in) :: xgrid(0:), coefs(:), rho + INTEGER, INTENT(in) :: nq + DOUBLE PRECISION, INTENT(out) :: averf(0:) +! + DOUBLE PRECISION :: th(nq), wth(nq), xq(nq) + DOUBLE PRECISION, ALLOCATABLE :: avermat(:,:) + DOUBLE PRECISION :: pi, twopi, lperiod, dth + DOUBLE PRECISION, POINTER :: splines(:,:) + INTEGER :: i, j, iq, nx, dim +! + pi = 4.0d0*ATAN(1.0d0) + twopi = 2.0d0*pi +! +! Quadrature in theta + dth = twopi/nq + th(1) = -pi + dth/2.0d0 + DO iq=2,nq + th(iq) = th(iq-1)+dth + END DO + wth = dth +! +! Gyro-averaging matrix + CALL get_dim(spl, dim, nx) + lperiod = xgrid(nx)-xgrid(0) + ALLOCATE(avermat(0:nx,dim)) + DO i=0,nx + xq = xgrid(i) + rho*COS(th) + xq = xgrid(0) + MODULO(xq-xgrid(0), lperiod) + CALL allsplines(spl, xq, splines) + DO j=1,dim + avermat(i,j) = DOT_PRODUCT(wth, splines(:,j))/twopi + END DO + END DO +! +! Gyro-averaged of F at grid points xgrid(0:nx) + averf = MATMUL(avermat, coefs) +! + DEALLOCATE(avermat) +END SUBROUTINE gyro diff --git a/examples/ibcmat.f90 b/examples/ibcmat.f90 new file mode 100644 index 0000000..d48ea56 --- /dev/null +++ b/examples/ibcmat.f90 @@ -0,0 +1,176 @@ +!> +!> @file ibcmat.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + USE matrix + IMPLICIT NONE + TYPE(gbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: kl, ku, nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) + INTEGER :: i0, ii + INTEGER :: i0_arr(ny), col(ny) +!=========================================================================== +! 1.0 Prologue +! + + kl = mat%kl + ku = mat%ku + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +! + i0 = nrank - ku + WRITE(*,'(a,i6)') 'Estimated i0', i0 + DO i=1,ny + arr = 0.0d0 + col(i) = nrank-ny+i + CALL getcol(mat, nrank-ny+i, arr) + DO ii=1,nrank + i0_arr(i)=ii + IF(arr(ii) .NE. 0.0d0) EXIT + END DO + END DO +!!$ WRITE(*,'(a/(10i6))') 'col', col +!!$ WRITE(*,'(a/(10i6))') 'i0_arr', i0_arr +! +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + DO j=1,ny+ku + zsum(j) = zsum(j) + arr(j) + END DO + END DO + CALL putrow(mat, ny, zsum) +! +! The horizontal sum on the NY-th column +! + zsum = 0.0d0 + DO j=1,ny + arr = 0.0d0 + CALL getcol(mat, j, arr) + DO i=ny,ny+kl + zsum(i) = zsum(i) + arr(i) + END DO + END DO + CALL putcol(mat, ny, zsum) +! +! The away operator +! + DO j = 1,ny-1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO j = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! +END SUBROUTINE ibcmat +!+++ +SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 +END SUBROUTINE ibcrhs +!+++ +SUBROUTINE ibcrhs3(rhs, ny) +! +! Apply BC on RHS +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(inout) :: rhs(:,:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, nz, k + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) + nz = SIZE(rhs,2) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + DO k=1,nz + zsum = SUM(rhs(1:ny,k)) + rhs(ny,k) = zsum + rhs(1:ny-1,k) = 0.0d0 + END DO +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO k=1,nz + rhs(nrank-ny+1:nrank,k) = 0.0d0 + END DO +END SUBROUTINE ibcrhs3 diff --git a/examples/mesh.f90 b/examples/mesh.f90 new file mode 100644 index 0000000..aa102a9 --- /dev/null +++ b/examples/mesh.f90 @@ -0,0 +1,66 @@ +!> +!> @file mesh.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Equidistant and non-equidistant mesh +! + USE bsplines + IMPLICIT NONE + INTEGER :: i, nx + DOUBLE PRECISION :: coefs(5), dev, dx + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid +! + INTERFACE + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + END INTERFACE +! + NAMELIST /newrun/ nx, coefs +! + nx = 8 ! Number oh intevals in x + coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + ! see function FDIST in MESHDIST +! + READ(*,newrun) + WRITE(*,newrun) +! + ALLOCATE(xgrid(0:nx)) + xgrid(0) = 0.0d0 + xgrid(nx) = 1.0d0 + CALL meshdist(coefs, xgrid, nx) + PRINT*, 'Mesh is equidistant?', is_equid(xgrid, dev) + PRINT*, 'dev =', dev +! + dx = 1.0d0/REAL(nx,8) + xgrid = (/ (i*dx, i=0,nx) /) + PRINT*, 'Mesh is equidistant?', is_equid(xgrid, dev) + PRINT*, 'dev =', dev +! + DEALLOCATE(xgrid) +END PROGRAM main diff --git a/examples/meshdist.f90 b/examples/meshdist.f90 new file mode 100644 index 0000000..a20c2fd --- /dev/null +++ b/examples/meshdist.f90 @@ -0,0 +1,82 @@ +!> +!> @file meshdist.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +SUBROUTINE meshdist(c, x, nx) +! +! Construct a 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist + diff --git a/examples/moments.f90 b/examples/moments.f90 new file mode 100644 index 0000000..0cb925b --- /dev/null +++ b/examples/moments.f90 @@ -0,0 +1,228 @@ +!> +!> @file moments.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Compute moments of f(x), using its Spline representation +! +MODULE globals + USE bsplines + USE matrix + IMPLICIT NONE + DOUBLE PRECISION, PARAMETER :: pi = 3.14159265359d0 + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), rhs(:), sol(:) + DOUBLE PRECISION, ALLOCATABLE :: finteg(:,:), moms(:) + TYPE(spline1d), SAVE :: splx + TYPE(gbmat), SAVE :: mat +CONTAINS +!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat) +! +! Assembly FE matrix mat using spline spl +! + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=i+iw; jcol=i+jt + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! + CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) +! +! Mass matrix +! + c(1) = 1.d0 + idt(1) = 0 + idw(1) = 0 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(spl, rhs) +! +! Assenbly the RHS using spline spl +! + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) +! + ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(1:nrank) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + contrib = wgauss(igauss)*rhseq(xgauss(igauss)) + rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(v) + DOUBLE PRECISION, INTENT(in) :: v + rhseq = SQRT(1.0d0/(2.0d0*pi)) * EXP(-0.5d0*v**2) + END FUNCTION rhseq + END SUBROUTINE disrhs +END MODULE globals +! +!================================================================================ +PROGRAM main + USE bsplines + USE globals + IMPLICIT NONE + INTEGER :: nidbas, nx, nmoms, ngauss, rank, kl, ku + INTEGER :: i + DOUBLE PRECISION :: a, b, dx +! +! Input +! + WRITE(*,'(a)') 'Enter, nidbas, a, b, nx, nmoms' + READ(*,*) nidbas, a, b, nx, nmoms +! +! Equidistant mesh +! + ALLOCATE(xgrid(0:nx)) + dx = (b-a)/REAL(nx) + DO i=0, nx + xgrid(i) = a + i*dx + END DO + WRITE(*,'(a/(8(1pe12.4)))') 'XGRID', xgrid +! +! Set up spline +! + ngauss = nidbas+1 + CALL set_spline(nidbas, ngauss, xgrid, splx) +! +! Mass matrix +! + CALL get_dim(splx, rank) ! Rank of the FE Mass matrix + kl = nidbas + ku = kl + CALL init(kl, ku, rank, 1, mat) + WRITE(*,'(a,3i6)') 'kl, ku, rank', kl, ku, rank + CALL dismat(splx, mat) +! +! RHS +! + ALLOCATE(rhs(rank), sol(rank)) + CALL disrhs(splx, rhs) +! +! Solve for Spline coefs +! + CALL factor(mat) + CALL bsolve(mat, rhs, sol) + WRITE(*,'(a/(8(1pe12.4)))') 'SOL', sol + WRITE(*,'(a,1pe20.12)') ' Integral of sol using FINTG', fintg(splx, sol) +! +! Moments +! + ALLOCATE(finteg(rank,0:nmoms), moms(0:nmoms)) + CALL calc_integ(splx, finteg) + DO i=0,nmoms + moms(i) = DOT_PRODUCT(sol(:), finteg(:,i)) + END DO + WRITE(*,'(a,i3)') 'Moments of orders from 0 to', nmoms + WRITE(*,'(8(1pe20.12))') moms +! + DEALLOCATE(finteg, moms) + DEALLOCATE(rhs, sol) + DEALLOCATE(xgrid) + CALL destroy(mat) + CALL destroy_sp(splx) +END PROGRAM main diff --git a/examples/optim1.f90 b/examples/optim1.f90 new file mode 100644 index 0000000..8db612e --- /dev/null +++ b/examples/optim1.f90 @@ -0,0 +1,138 @@ +!> +!> @file optim1.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test and compare performance of using "spline" and +! "pp" forms. 1D case +! + USE bsplines +! + IMPLICIT NONE + INTEGER :: nx, nidbas, nrank, npt=1000000 + INTEGER :: i + DOUBLE PRECISION :: dx + DOUBLE PRECISION :: seconds, t0, t1 + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, coefs + DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fpt0(:), fpt1(:), fun(:, :) + INTEGER, ALLOCATABLE :: left(:) + TYPE(spline1d) :: splx +! + NAMELIST /newrun/ nx, nidbas, npt +!=============================================================================== +! +! 1D grid +! + nx = 10 + nidbas = 3 + npt = 1000000 + READ(*,newrun) + WRITE(*,newrun) + + ALLOCATE(xgrid(0:nx)) + dx = 1.0d0/REAL(nx) + xgrid = (/ (i*dx,i=0,nx) /) + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid +! +! Set up spline +! + CALL set_spline(nidbas, 4, xgrid, splx) + nrank = splx%dim + WRITE(*,'(a, i5)') 'nrank =', nrank + WRITE(*,'(a/(10f8.3))') 'knots', splx%knots +! + ALLOCATE(xpt(npt)) + ALLOCATE(left(npt)) + ALLOCATE(fun(0:nidbas,0:1)) ! Values and first derivatives of all Splines + CALL RANDOM_NUMBER(xpt) + CALL locintv(splx, xpt, left) +!=============================================================================== +! +! Check def_basfun_opt +! + CALL basfun_recur(xpt(101), splx, fun, left(101)+1) + WRITE(*,'(/a,f20.15,i4/(2f20.15))') 'BASFUN_RECUR at X=', xpt(101), left(101),& + & (fun(:,i), i=0,1) +! +!!$ CALL def_basfun(xpt(101), splx, fun) + CALL basfun(xpt(101), splx, fun, left(101)+1) + WRITE(*,'(/a,f20.15/(2f20.15))') 'DEF_BASFUN at X=', xpt(101), & + & (fun(:,i), i=0,1) +! +! Performance of basis function computations +! + t0 = seconds() + DO i=1,npt + CALL basfun_recur(xpt(i), splx, fun, left(i)+1) + END DO + WRITE(*,'(/a,1pe12.3)') 'BASFUN_RECUR time (s)', (seconds()-t0)/REAL(npt) +! + t0 = seconds() + DO i=1,npt +!!$ CALL def_basfun(xpt(i), splx, fun) + CALL basfun(xpt(i), splx, fun, left(i)+1) + END DO + WRITE(*,'(/a,1pe12.3)') 'DEF_BASFUN time (s)', (seconds()-t0)/REAL(npt) +!=============================================================================== +! +! Check and performance of GRIDVAL using PPFORM and SPLINE expansion +! + ALLOCATE(coefs(nrank)) + DEALLOCATE(xpt) + ALLOCATE(xpt(npt), fpt0(npt), fpt1(npt)) + CALL RANDOM_NUMBER(xpt) +! + splx%nlppform = .TRUE. + coefs = 1.0d0 +! + CALL gridval(splx, xpt(1:1), fpt0(1:1), 0, coefs) +! + t0 = seconds() + CALL gridval(splx, xpt, fpt1, 1) + CALL gridval(splx, xpt, fpt0, 0) + t1 = seconds()-t0 + WRITE(*,'(/a,2(1pe12.3))') 'PPFORM: Max errors', & + & MAXVAL(ABS(fpt0-1.0d0)) ,MAXVAL(ABS(fpt1)) + WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) +! + splx%nlppform = .FALSE. + coefs = 1.0d0 + CALL gridval(splx, xpt(1:1), fpt0(1:1), 0, coefs) + t0 = seconds() + CALL gridval(splx, xpt, fpt1, 1) + CALL gridval(splx, xpt, fpt0, 0) + t1 = seconds()-t0 + WRITE(*,'(/a,2(1pe12.3))') 'SPLINES: Max errors', & + & MAXVAL(ABS(fpt0-1.0d0)) ,MAXVAL(ABS(fpt1)) + WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) +!=============================================================================== +! +! Clean up +! + CALL destroy_sp(splx) + DEALLOCATE(xgrid, coefs) + DEALLOCATE(xpt, fpt0, fpt1) + DEALLOCATE(fun) +END PROGRAM main diff --git a/examples/optim2.f90 b/examples/optim2.f90 new file mode 100644 index 0000000..ffb9773 --- /dev/null +++ b/examples/optim2.f90 @@ -0,0 +1,119 @@ +!> +!> @file optim2.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test and compare performance of using "spline" and +! "pp" forms. 2D case +! + USE bsplines +! + IMPLICIT NONE + INTEGER :: nx, ny, ngauss(2), nidbas(2) + INTEGER :: npt, d1, d2 + INTEGER :: i, j + DOUBLE PRECISION :: pi, dx, dy + DOUBLE PRECISION :: seconds, t0, t1 + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs + DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fpt00(:), fpt01(:), fpt10(:) + TYPE(spline2d) :: splxy +! + NAMELIST /newrun/ nx, ny, nidbas, npt +!=============================================================================== +! +! 2D grid +! + nx = 8 + ny = 8 + nidbas = (/ 3, 3 /) + npt = 1000000 + READ(*,newrun) + WRITE(*,newrun) +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + dx = 1.0d0/REAL(nx) + xgrid = (/ (i*dx,i=0,nx) /) + dy = 2.0d0*pi/REAL(ny) + ygrid = (/ (j*dy,j=0,ny) /) +! + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) +! +! Set up spline +! + ngauss = 4 + CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./)) + d1 = splxy%sp1%dim + d2 = splxy%sp2%dim + WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2 + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots +!=============================================================================== +! +! Check and performance of GRIDVAL using PPFORM and SPLINE expansion +! + ALLOCATE(xpt(npt), ypt(npt)) + CALL RANDOM_NUMBER(xpt) + CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt +! + ALLOCATE(coefs(d1,d2)) + ALLOCATE(fpt00(npt), fpt01(npt), fpt10(npt)) +! + coefs = 1.0d0 +! + splxy%sp1%nlppform = .TRUE. + splxy%sp2%nlppform = .TRUE. + CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/), coefs) +! + t0 = seconds() + CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/)) + CALL gridval(splxy, xpt, ypt, fpt01, (/0,1/)) + CALL gridval(splxy, xpt, ypt, fpt10, (/1,0/)) + t1 = seconds()-t0 + WRITE(*,'(/a,3(1pe12.3))') 'PPFORM: Max errors', & + & MAXVAL(ABS(fpt00-1.0d0)), MAXVAL(ABS(fpt01)), MAXVAL(ABS(fpt10)) + WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) +! + splxy%sp1%nlppform = .FALSE. + splxy%sp2%nlppform = .FALSE. + CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/), coefs) + t0 = seconds() + CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/)) + CALL gridval(splxy, xpt, ypt, fpt01, (/0,1/)) + CALL gridval(splxy, xpt, ypt, fpt10, (/1,0/)) + t1 = seconds()-t0 + WRITE(*,'(/a,3(1pe12.3))') 'SPLINES: Max errors', & + & MAXVAL(ABS(fpt00-1.0d0)), MAXVAL(ABS(fpt01)), MAXVAL(ABS(fpt10)) + WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) +!=============================================================================== +! +! Clean up +! + CALL destroy_sp(splxy) + DEALLOCATE(xgrid, ygrid, coefs) + DEALLOCATE(xpt, ypt, fpt00, fpt01, fpt10) +END PROGRAM main diff --git a/examples/optim3.f90 b/examples/optim3.f90 new file mode 100644 index 0000000..9a617a0 --- /dev/null +++ b/examples/optim3.f90 @@ -0,0 +1,137 @@ +!> +!> @file optim3.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test and compare performance of using "spline" and +! "pp" forms. 2D1D case +! + USE bsplines +! + IMPLICIT NONE + INTEGER :: nx, ny, nz, ngauss(3), nidbas(3) + INTEGER :: npt, d1, d2, d3 + INTEGER :: i, j, k + DOUBLE PRECISION :: pi, dx, dy, dz + DOUBLE PRECISION :: seconds, t0, t1 + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid + DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: coefs + DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), zpt(:) + DOUBLE PRECISION, ALLOCATABLE :: fpt000(:), fpt100(:), fpt010(:), fpt001(:) + TYPE(spline2d1d) :: splxyz +! + NAMELIST /newrun/ nx, ny, nz, nidbas, npt +!=============================================================================== +! +! 2D grid +! + nx = 8 + ny = 8 + nz = 8 + nidbas = (/ 3, 3, 3 /) + npt = 1000000 + READ(*,newrun) + WRITE(*,newrun) +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz)) + dx = 1.0d0/REAL(nx) + xgrid = (/ (i*dx,i=0,nx) /) + dy = 2.0d0*pi/REAL(ny) + ygrid = (/ (j*dy,j=0,ny) /) + dz = 2.0d0*pi/REAL(nz) + zgrid = (/ (k*dz,k=0,nz) /) +! + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) + WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz) +! +! Set up spline +! + ngauss = 4 + CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid, splxyz, & + & (/.FALSE., .TRUE., .TRUE./)) + d1 = splxyz%sp12%sp1%dim + d2 = splxyz%sp12%sp2%dim + d3 = splxyz%sp3%dim + WRITE(*,'(a,3i4)') 'd1, d2, d3 =', d1, d2, d3 + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxyz%sp12%sp1%knots + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxyz%sp12%sp2%knots + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Z', splxyz%sp3%knots +!=============================================================================== +! +! Check and performance of GRIDVAL using PPFORM and SPLINE expansion +! + ALLOCATE(xpt(npt), ypt(npt), zpt(npt)) + CALL RANDOM_NUMBER(xpt) + CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt + CALL RANDOM_NUMBER(zpt); zpt = 2.0d0*pi*zpt +! + ALLOCATE(coefs(d1,d2,d3)) + ALLOCATE(fpt000(npt), fpt100(npt), fpt010(npt), fpt001(npt)) +! + coefs = 1.0d0 +! + splxyz%sp12%sp1%nlppform = .TRUE. + splxyz%sp12%sp2%nlppform = .TRUE. + splxyz%sp3%nlppform = .TRUE. + CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/), coefs) + t0 = seconds() + CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/)) + CALL gridval(splxyz, xpt, ypt, zpt, fpt100, (/1,0,0/)) + CALL gridval(splxyz, xpt, ypt, zpt, fpt010, (/0,1,0/)) + CALL gridval(splxyz, xpt, ypt, zpt, fpt001, (/0,0,1/)) + t1 = seconds()-t0 + WRITE(*,'(/a,4(1pe12.3))') 'PPFORM: Max errors', & + & MAXVAL(ABS(fpt000-1.0d0)), & + & MAXVAL(ABS(fpt100)), & + & MAXVAL(ABS(fpt010)), & + & MAXVAL(ABS(fpt001)) + WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) +! + splxyz%sp12%sp1%nlppform = .FALSE. + splxyz%sp12%sp2%nlppform = .FALSE. + splxyz%sp3%nlppform = .FALSE. + CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/), coefs) + t0 = seconds() + CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/)) + CALL gridval(splxyz, xpt, ypt, zpt, fpt100, (/1,0,0/)) + CALL gridval(splxyz, xpt, ypt, zpt, fpt010, (/0,1,0/)) + CALL gridval(splxyz, xpt, ypt, zpt, fpt001, (/0,0,1/)) + t1 = seconds()-t0 + WRITE(*,'(/a,4(1pe12.3))') 'SPLINES: Max errors', & + & MAXVAL(ABS(fpt000-1.0d0)), & + & MAXVAL(ABS(fpt100)), & + & MAXVAL(ABS(fpt010)), & + & MAXVAL(ABS(fpt001)) + WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt) +!=============================================================================== +! +! Clean up +! + CALL destroy_sp(splxyz) + DEALLOCATE(xgrid, ygrid, zgrid, coefs) + DEALLOCATE(xpt, ypt, fpt000,fpt100, fpt010, fpt001) +END PROGRAM main diff --git a/examples/pde1d.f90 b/examples/pde1d.f90 new file mode 100644 index 0000000..86584b8 --- /dev/null +++ b/examples/pde1d.f90 @@ -0,0 +1,419 @@ +!> +!> @file pde1d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 1d differential eqation using splines: +! +! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 +! exact solution: f(r) = 1 - r^k +! + USE bsplines + USE matrix + USE futils + USE conmat_mod + IMPLICIT NONE + INTEGER :: nx, nidbas, ngauss, kdiff + INTEGER :: i, nrank, kl, ku + LOGICAL :: nlppform + DOUBLE PRECISION :: coefs(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol + TYPE(spline1d) :: splx + TYPE(gbmat) :: mat +! + CHARACTER(len=128) :: file='pde1d.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: solcal, solana, errsol + DOUBLE PRECISION :: seconds, t0, tmat, tfact, tsolv +! + INTERFACE + SUBROUTINE dismat(spl, mat) + USE bsplines + USE matrix + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + END SUBROUTINE dismat + SUBROUTINE disrhs(kdiff, spl, rhs) + USE bsplines + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + END SUBROUTINE disrhs + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + END INTERFACE +! + NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + kdiff = 2 ! Exponent of differential problem + nlppform = .TRUE. ! Use PPFORM for gridval or not + coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + ! see function FDIST in MESHDIST +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + xgrid(0) = 0.0d0 + xgrid(nx) = 1.0d0 + CALL meshdist(coefs, xgrid, nx) + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE1D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL attach(fid, '/', 'NGAUSS', ngauss) + CALL attach(fid, '/', 'KDIFF', kdiff) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) + CALL get_dim(splx, nrank) ! Rank of the FE matrix + WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid +! +! FE matrix assembly +! + kl = nidbas + ku = kl + CALL init(kl, ku, nrank, 1, mat) + WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank +!!$ CALL dismat(splx, mat) + CALL conmat(splx, mat, coefeq) +! + ALLOCATE(arr(nrank)) +!!$ WRITE(*,'(/a)') 'Matrice before BC' +!!$ DO i=1,nrank +!!$ CALL getrow(mat, i, arr) +!!$ WRITE(*,'(12f8.3)') arr, SUM(arr) +!!$ END DO +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(kdiff, splx, rhs) +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs +! +! Set BC f(r=1) = 0 on matrix +! + arr(1:nrank-1) = 0.0d0 + arr(nrank) = 1.0d0 + CALL putrow(mat, nrank, arr) + CALL putcol(mat, nrank, arr) + tmat = seconds() - t0 +!!$ WRITE(*,'(/a)') 'Matrice after BC' +!!$ DO i=1,nrank +!!$ CALL getrow(mat, i, arr) +!!$ WRITE(*,'(12f8.3)') arr +!!$ END DO +! +! Set BC f(r=1) = 0 on RHS +! + rhs(nrank) = 0.0d0 +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS after BC', rhs + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + CALL putarr(fid, '/MAT', mat%val, 'GB matrice with BC') + CALL attach(fid, '/MAT', 'KL', mat%kl) + CALL attach(fid, '/MAT', 'KU', mat%ku) + CALL attach(fid, '/MAT', 'RANK', mat%rank) +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL factor(mat) + tfact = seconds() - t0 + + t0 = seconds() + CALL bsolve(mat, rhs, sol) + tsolv = seconds() - t0 +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'SOL', sol + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +! + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splx, sol), & + & fintg(splx, sol)-REAL(kdiff,8)/REAL(kdiff+1,8) +!=========================================================================== +! 4.0 Check the solution and its 1st derivative +! + ALLOCATE(solcal(0:nx,0:2), solana(0:nx,0:2), errsol(0:nx,0:2)) + DO i =0,nx + solana(i,0) = 1.0d0-xgrid(i)**kdiff + solana(i,1) = -kdiff*xgrid(i)**(kdiff-1) + solana(i,2) = -kdiff*(kdiff-1)*xgrid(i)**(kdiff-2) + END DO + CALL gridval(splx, xgrid, solcal(:,0), 0, sol) ! Compute PPFORM and grid values + CALL gridval(splx, xgrid, solcal(:,1), 1) ! 1st derivative + CALL gridval(splx, xgrid, solcal(:,2), 2) ! 2nd derivative + errsol = solana - solcal +! + CALL putarr(fid, '/XGRID', xgrid) + CALL putarr(fid, '/SOLCAL', solcal) + CALL putarr(fid, '/SOLANA', solana) + CALL putarr(fid, '/ERR', errsol) +! + CALL creatg(fid, '/spline') + CALL attach(fid, '/spline', 'order', splx%order) + CALL putarr(fid, '/spline/knots', splx%knots, 'Spline knots') + WRITE(*,'(a,3(1pe12.3))') 'Rel. discretization errors (solution and derivatives).', & + & (SQRT( DOT_PRODUCT(errsol(:,i),errsol(:,i)) / & + & DOT_PRODUCT(solana(:,i),solana(:,i)) ), i=0,2) +! + WRITE(*,'(a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(arr) + CALL destroy(mat) + CALL destroy_sp(splx) + CALL closef(fid) +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function defined in FDIST +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ +SUBROUTINE dismat(spl, mat) +! +! Assembly FE matrix mat using spline spl +! + USE bsplines + USE matrix + IMPLICIT NONE + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=i+iw; jcol=i+jt + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END SUBROUTINE dismat +!+++ +SUBROUTINE disrhs(kdiff, spl, rhs) +! +! Assenbly the RHS using spline spl +! + USE bsplines + IMPLICIT NONE + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, left + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) +!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas +! + ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) +!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(1:nrank) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) +!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + left = i-1 +!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left + contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) + rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +! +CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x,k) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: k + rhseq = k*k*x**(k-1) + END FUNCTION rhseq +END SUBROUTINE disrhs diff --git a/examples/pde1d_eig.f90 b/examples/pde1d_eig.f90 new file mode 100644 index 0000000..698efea --- /dev/null +++ b/examples/pde1d_eig.f90 @@ -0,0 +1,459 @@ +!> +!> @file pde1d_eig.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 1d differential eqation using splines: +! +! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 +! exact solution: f(r) = 1 - r^k +! + USE bsplines + USE matrix + USE futils + USE conmat_mod + IMPLICIT NONE + INTEGER :: nx, nidbas, ngauss, kdiff + INTEGER :: i, nrank, kl, ku + LOGICAL :: nlppform + DOUBLE PRECISION :: coefs(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol + TYPE(spline1d) :: splx + TYPE(gemat) :: mat +!!$ TYPE(gbmat) :: mat +! + CHARACTER(len=128) :: file='pde1d.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack +! + DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:) + INTEGER :: j, info +! + INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, & + & lworkl=maxncv*(maxncv+8), zero=0.0d0 + DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), & + & d(maxncv,2), resid(maxn), w(maxn), & + & tol=0.0d0, sigma + DOUBLE PRECISION, EXTERNAL :: dnrm2 + INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, & + & mode1=1, ierr + INTEGER :: ido, ipntr(11), iparam(11) + + CHARACTER(len=1) :: bmat='I' + CHARACTER(len=2) :: which='SA' + LOGICAL :: rvec, select(maxncv) + +! + INTERFACE + SUBROUTINE dismat(spl, mat) + USE bsplines + USE matrix + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + END SUBROUTINE dismat + SUBROUTINE disrhs(kdiff, spl, rhs) + USE bsplines + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + END SUBROUTINE disrhs + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + END INTERFACE +! + NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + kdiff = 2 ! Exponent of differential problem + nlppform = .TRUE. ! Use PPFORM for gridval or not + coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + ! see function FDIST in MESHDIST +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + xgrid(0) = 0.0d0 + xgrid(nx) = 1.0d0 + CALL meshdist(coefs, xgrid, nx) + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE1D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL attach(fid, '/', 'NGAUSS', ngauss) + CALL attach(fid, '/', 'KDIFF', kdiff) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) + CALL get_dim(splx, nrank) ! Rank of the FE matrix + WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid +! +! FE matrix assembly +! + kl = nidbas + ku = kl +!!$ CALL init(kl, ku, nrank, 1, mat) + WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank + CALL init(nrank, 1, mat) +!!$ CALL dismat(splx, mat) + CALL conmat(splx, mat, coefeq) +! + ALLOCATE(arr(nrank)) +!!$ WRITE(*,'(/a)') 'Matrice before BC' +!!$ DO i=1,nrank +!!$ CALL getrow(mat, i, arr) +!!$ WRITE(*,'(12f8.3)') arr, SUM(arr) +!!$ END DO +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(kdiff, splx, rhs) +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs +! +! Set BC f(r=1) = 0 on matrix +! + arr(1:nrank-1) = 0.0d0 + arr(nrank) = 1.0d0 + CALL putrow(mat, nrank, arr) + CALL putcol(mat, nrank, arr) + tmat = seconds() - t0 +! +!=========================================================================== +! 3.0 Eigevalue problem +! +! Using Lapack dsyev +! + t0 = seconds() + ALLOCATE(mata(nrank,nrank)) + ALLOCATE(eigvals(nrank)) + ALLOCATE(work(3*nrank)) + mata=0.0d0 + DO j=1,nrank + mata(j:nrank,j) = mat%val(j:nrank,j) + END DO + CALL putarr(fid, '/MAT', mata, 'matrix A') + CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info) + teig = seconds() - t0 + PRINT*,'Info from DSYEV', info, arr(1) + IF(info.EQ.0) THEN + CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A') + CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A') + WRITE(*,'(a/(10f10.4))') 'eigval', eigvals + END IF +! +! Using Arpack +! + ido = 0 + iparam(1) = ishfts + iparam(3) = maxitr + iparam(7) = mode1 +! + IF(nev.GT.0) THEN + t0 = seconds() + DO ! ARPACK reverse communication loop + CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, & + & ncv, v, maxn, iparam, ipntr, workd, workl,& + & lworkl, info) + IF(ido.EQ.-1 .OR. ido.EQ.1) THEN +!!$ WRITE(*,'(a/(10i4))') 'ipntr', ipntr + CALL av(nrank,workd(ipntr(1)), workd(ipntr(2))) + CYCLE + END IF + IF(info .LT. 0) THEN + PRINT*, 'Error in _saupd with info =', info + ELSE + rvec = .TRUE. + CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, & + & bmat, nrank, which, nev, tol, resid, ncv, v, maxn, & + & iparam, ipntr, workd, workl, lworkl, ierr ) + IF( ierr .NE. 0 ) THEN + PRINT*,'Error in _seupd with ierr =', ierr + ELSE + nconv = iparam(5) + PRINT*, '--- eigenvalues and error ---' + DO j=1,nconv ! Residual norms +!!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue +!!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1) +!!$ d(j,2) = dnrm2(nrank, w, 1) +!!$ d(j,2) = d(j,2)/abs(d(j,1)) + WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1) +!!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A') +!!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A') + END DO + EXIT + END IF + END IF + END DO ! End of ARPACK reverse communication loop + tarpack = seconds()-t0 + END IF +!=========================================================================== +! 9.0 Epilogue +! + WRITE(*,'(a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig + WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(arr) + CALL destroy(mat) + CALL destroy_sp(splx) + CALL closef(fid) +CONTAINS + SUBROUTINE av(n,v,w) +! +! Matrix vector product: w <- Av + INTEGER, INTENT(in) :: n + DOUBLE PRECISION, INTENT(in) :: v(*) + DOUBLE PRECISION, INTENT(out) :: w(*) + w(1:n) = vmx(mat,v(1:n)) + END SUBROUTINE av + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function defined in FDIST +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ +SUBROUTINE dismat(spl, mat) +! +! Assembly FE matrix mat using spline spl +! + USE bsplines + USE matrix + IMPLICIT NONE + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=i+iw; jcol=i+jt + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END SUBROUTINE dismat +!+++ +SUBROUTINE disrhs(kdiff, spl, rhs) +! +! Assenbly the RHS using spline spl +! + USE bsplines + IMPLICIT NONE + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, left + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) +!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas +! + ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) +!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(1:nrank) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) +!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + left = i-1 +!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left + contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) + rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +! +CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x,k) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: k + rhseq = k*k*x**(k-1) + END FUNCTION rhseq +END SUBROUTINE disrhs diff --git a/examples/pde1d_eig_csr.f90 b/examples/pde1d_eig_csr.f90 new file mode 100644 index 0000000..02996ca --- /dev/null +++ b/examples/pde1d_eig_csr.f90 @@ -0,0 +1,469 @@ +!> +!> @file pde1d_eig_csr.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 1d differential eqation using splines: +! +! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 +! exact solution: f(r) = 1 - r^k +! + USE bsplines + USE csr + USE futils + USE conmat_mod + IMPLICIT NONE + INTEGER :: nx, nidbas, ngauss, kdiff + INTEGER :: i, nrank, kl, ku + LOGICAL :: nlppform + DOUBLE PRECISION :: coefs(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol + TYPE(spline1d) :: splx + TYPE(csr_mat) :: mat +!!$ TYPE(gemat) :: mat +!!$ TYPE(gbmat) :: mat +! + CHARACTER(len=128) :: file='pde1d.h5' + INTEGER :: fid, ffid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack +! + DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:) + INTEGER :: j, info +! + INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, & + & lworkl=maxncv*(maxncv+8), zero=0.0d0 + DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), & + & d(maxncv,2), resid(maxn), w(maxn), & + & tol=0.0d0, sigma + DOUBLE PRECISION, EXTERNAL :: dnrm2 + INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, & + & mode1=1, ierr + INTEGER :: ido, ipntr(11), iparam(11) + + CHARACTER(len=1) :: bmat='I' + CHARACTER(len=2) :: which='SA' + LOGICAL :: rvec, select(maxncv) + +! + INTERFACE + SUBROUTINE dismat(spl, mat) + USE bsplines + USE matrix + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + END SUBROUTINE dismat + SUBROUTINE disrhs(kdiff, spl, rhs) + USE bsplines + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + END SUBROUTINE disrhs + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + END INTERFACE +! + NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + kdiff = 2 ! Exponent of differential problem + nlppform = .TRUE. ! Use PPFORM for gridval or not + coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + ! see function FDIST in MESHDIST +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + xgrid(0) = 0.0d0 + xgrid(nx) = 1.0d0 + CALL meshdist(coefs, xgrid, nx) + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE1D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL attach(fid, '/', 'NGAUSS', ngauss) + CALL attach(fid, '/', 'KDIFF', kdiff) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) + CALL get_dim(splx, nrank) ! Rank of the FE matrix + WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid +! +! FE matrix assembly +! + kl = nidbas + ku = kl + WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank + CALL init(nrank, 1, mat) +!!$ CALL dismat(splx, mat) + CALL conmat(splx, mat, coefeq) + CALL to_mat(mat) + CALL creatf('pde1d_eig.h5', ffid, 'PDE1D Result File') + PRINT*, 'rank', mat%rank + PRINT*, 'nnz', mat%nnz + PRINT*, 'irow', mat%irow + PRINT*, 'cols', mat%cols + CALL putmat(ffid,'/MAT',mat,'FE matrix') + CALL closef(ffid) +! +! + ALLOCATE(arr(nrank)) +!!$ WRITE(*,'(/a)') 'Matrice before BC' +!!$ DO i=1,nrank +!!$ CALL getrow(mat, i, arr) +!!$ WRITE(*,'(12f8.3)') arr, SUM(arr) +!!$ END DO +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(kdiff, splx, rhs) +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs +!!$! +!!$! Set BC f(r=1) = 0 on matrix +!!$! +!!$ arr(1:nrank-1) = 0.0d0 +!!$ arr(nrank) = 1.0d0 +!!$ CALL putrow(mat, nrank, arr) +!!$ CALL putcol(mat, nrank, arr) + CALL putmat(fid,'/MATA', mat, 'FE matrix') + tmat = seconds() - t0 +! +!=========================================================================== +! 3.0 Eigevalue problem +! +! Using Lapack dsyev +! + t0 = seconds() + ALLOCATE(mata(nrank,nrank)) + ALLOCATE(eigvals(nrank)) + ALLOCATE(work(3*nrank)) + mata=0.0d0 + DO j=1,nrank + CALL getcol(mat, j, mata(:,j)) ! mata is a dense matrix + END DO + CALL putarr(fid, '/MAT', mata, 'matrix A') + CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info) + teig = seconds() - t0 + PRINT*,'Info from DSYEV', info, arr(1) + IF(info.EQ.0) THEN + CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A') + CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A') + WRITE(*,'(a/(10f10.4))') 'eigval', eigvals + END IF +! +! Using Arpack +! + ido = 0 + iparam(1) = ishfts + iparam(3) = maxitr + iparam(7) = mode1 +! + IF(nev.GT.0) THEN + t0 = seconds() + DO ! ARPACK reverse communication loop + CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, & + & ncv, v, maxn, iparam, ipntr, workd, workl,& + & lworkl, info) + IF(ido.EQ.-1 .OR. ido.EQ.1) THEN +!!$ WRITE(*,'(a/(10i4))') 'ipntr', ipntr + CALL av(nrank,workd(ipntr(1)), workd(ipntr(2))) + CYCLE + END IF + IF(info .LT. 0) THEN + PRINT*, 'Error in _saupd with info =', info + ELSE + rvec = .TRUE. + CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, & + & bmat, nrank, which, nev, tol, resid, ncv, v, maxn, & + & iparam, ipntr, workd, workl, lworkl, ierr ) + IF( ierr .NE. 0 ) THEN + PRINT*,'Error in _seupd with ierr =', ierr + ELSE + nconv = iparam(5) + PRINT*, '--- eigenvalues and error ---' + DO j=1,nconv ! Residual norms +!!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue +!!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1) +!!$ d(j,2) = dnrm2(nrank, w, 1) +!!$ d(j,2) = d(j,2)/abs(d(j,1)) + WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1) +!!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A') +!!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A') + END DO + EXIT + END IF + END IF + END DO ! End of ARPACK reverse communication loop + tarpack = seconds()-t0 + END IF +!=========================================================================== +! 9.0 Epilogue +! + WRITE(*,'(a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig + WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(arr) + CALL destroy(mat) + CALL destroy_sp(splx) + CALL closef(fid) +CONTAINS + SUBROUTINE av(n,v,w) +! +! Matrix vector product: w <- Av + INTEGER, INTENT(in) :: n + DOUBLE PRECISION, INTENT(in) :: v(*) + DOUBLE PRECISION, INTENT(out) :: w(*) + w(1:n) = vmx(mat,v(1:n)) + END SUBROUTINE av + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function defined in FDIST +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ +SUBROUTINE dismat(spl, mat) +! +! Assembly FE matrix mat using spline spl +! + USE bsplines + USE matrix + IMPLICIT NONE + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=i+iw; jcol=i+jt + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END SUBROUTINE dismat +!+++ +SUBROUTINE disrhs(kdiff, spl, rhs) +! +! Assenbly the RHS using spline spl +! + USE bsplines + IMPLICIT NONE + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, left + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) +!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas +! + ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) +!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(1:nrank) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) +!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + left = i-1 +!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left + contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) + rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +! +CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x,k) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: k + rhseq = k*k*x**(k-1) + END FUNCTION rhseq +END SUBROUTINE disrhs diff --git a/examples/pde1d_eig_gb.f90 b/examples/pde1d_eig_gb.f90 new file mode 100644 index 0000000..c80f22d --- /dev/null +++ b/examples/pde1d_eig_gb.f90 @@ -0,0 +1,460 @@ +!> +!> @file pde1d_eig_gb.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 1d differential eqation using splines: +! +! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 +! exact solution: f(r) = 1 - r^k +! + USE bsplines + USE matrix + USE futils + USE conmat_mod + IMPLICIT NONE + INTEGER :: nx, nidbas, ngauss, kdiff + INTEGER :: i, nrank, kl, ku + LOGICAL :: nlppform + DOUBLE PRECISION :: coefs(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol + TYPE(spline1d) :: splx + TYPE(gemat) :: mat +!!$ TYPE(gbmat) :: mat +! + CHARACTER(len=128) :: file='pde1d.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack +! + DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:) + INTEGER :: j, info +! + INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, & + & lworkl=maxncv*(maxncv+8), zero=0.0d0 + DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), & + & d(maxncv,2), resid(maxn), w(maxn), & + & tol=0.0d0, sigma + DOUBLE PRECISION, EXTERNAL :: dnrm2 + INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, & + & mode1=1, ierr + INTEGER :: ido, ipntr(11), iparam(11) + + CHARACTER(len=1) :: bmat='I' + CHARACTER(len=2) :: which='SA' + LOGICAL :: rvec, select(maxncv) + +! + INTERFACE + SUBROUTINE dismat(spl, mat) + USE bsplines + USE matrix + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + END SUBROUTINE dismat + SUBROUTINE disrhs(kdiff, spl, rhs) + USE bsplines + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + END SUBROUTINE disrhs + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + END INTERFACE +! + NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + kdiff = 2 ! Exponent of differential problem + nlppform = .TRUE. ! Use PPFORM for gridval or not + coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + ! see function FDIST in MESHDIST +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + xgrid(0) = 0.0d0 + xgrid(nx) = 1.0d0 + CALL meshdist(coefs, xgrid, nx) + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE1D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL attach(fid, '/', 'NGAUSS', ngauss) + CALL attach(fid, '/', 'KDIFF', kdiff) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) + CALL get_dim(splx, nrank) ! Rank of the FE matrix + WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid +! +! FE matrix assembly +! + kl = nidbas + ku = kl +!!$ CALL init(kl, ku, nrank, 1, mat) +!!$ WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank + CALL init(nrank, 1, mat) +!!$ CALL dismat(splx, mat) + CALL conmat(splx, mat, coefeq) +! + ALLOCATE(arr(nrank)) +!!$ WRITE(*,'(/a)') 'Matrice before BC' +!!$ DO i=1,nrank +!!$ CALL getrow(mat, i, arr) +!!$ WRITE(*,'(12f8.3)') arr, SUM(arr) +!!$ END DO +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(kdiff, splx, rhs) +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs +! +! Set BC f(r=1) = 0 on matrix +! + arr(1:nrank-1) = 0.0d0 + arr(nrank) = 1.0d0 + CALL putrow(mat, nrank, arr) + CALL putcol(mat, nrank, arr) + tmat = seconds() - t0 +! +!=========================================================================== +! 3.0 Eigevalue problem +! +! Using Lapack dsyev +! + t0 = seconds() + ALLOCATE(mata(nrank,nrank)) + ALLOCATE(eigvals(nrank)) + ALLOCATE(work(3*nrank)) + mata=0.0d0 + DO j=1,nrank + mata(j:nrank,j) = mat%val(j:nrank,j) + END DO + CALL putarr(fid, '/MAT', mata, 'matrix A') + CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info) + teig = seconds() - t0 + PRINT*,'Info from DSYEV', info, arr(1) + IF(info.EQ.0) THEN + CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A') + CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A') + WRITE(*,'(a/(10f10.4))') 'eigval', eigvals + END IF +! +! Using Arpack +! + ido = 0 + iparam(1) = ishfts + iparam(3) = maxitr + iparam(7) = mode1 +! + IF(nev.GT.0) THEN + t0 = seconds() + DO ! ARPACK reverse communication loop + CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, & + & ncv, v, maxn, iparam, ipntr, workd, workl,& + & lworkl, info) + IF(ido.EQ.-1 .OR. ido.EQ.1) THEN + PRINT*, 'Error in _saupd with info =', info + WRITE(*,'(a/(10i4))') 'ipntr', ipntr + CALL av(nrank,workd(ipntr(1)), workd(ipntr(2))) + CYCLE + END IF + IF(info .LT. 0) THEN + PRINT*, 'Error in _saupd with info =', info + ELSE + rvec = .TRUE. + CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, & + & bmat, nrank, which, nev, tol, resid, ncv, v, maxn, & + & iparam, ipntr, workd, workl, lworkl, ierr ) + IF( ierr .NE. 0 ) THEN + PRINT*,'Error in _seupd with ierr =', ierr + ELSE + nconv = iparam(5) + PRINT*, '--- eigenvalues and error ---' + DO j=1,nconv ! Residual norms +!!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue +!!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1) +!!$ d(j,2) = dnrm2(nrank, w, 1) +!!$ d(j,2) = d(j,2)/abs(d(j,1)) + WRITE(*,'(i3,2(1pe12.4))') j, d(j,1), eigvals(j)-d(j,1) +!!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A') +!!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A') + END DO + EXIT + END IF + END IF + END DO ! End of ARPACK reverse communication loop + tarpack = seconds()-t0 + END IF +!=========================================================================== +! 9.0 Epilogue +! + WRITE(*,'(a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig + WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(arr) + CALL destroy(mat) + CALL destroy_sp(splx) + CALL closef(fid) +CONTAINS + SUBROUTINE av(n,v,w) +! +! Matrix vector product: w <- Av + INTEGER, INTENT(in) :: n + DOUBLE PRECISION, INTENT(in) :: v(*) + DOUBLE PRECISION, INTENT(out) :: w(*) + w(1:n) = vmx(mat,v(1:n)) + END SUBROUTINE av + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function defined in FDIST +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ +SUBROUTINE dismat(spl, mat) +! +! Assembly FE matrix mat using spline spl +! + USE bsplines + USE matrix + IMPLICIT NONE + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=i+iw; jcol=i+jt + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END SUBROUTINE dismat +!+++ +SUBROUTINE disrhs(kdiff, spl, rhs) +! +! Assenbly the RHS using spline spl +! + USE bsplines + IMPLICIT NONE + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, left + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) +!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas +! + ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) +!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(1:nrank) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) +!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + left = i-1 +!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left + contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) + rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +! +CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x,k) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: k + rhseq = k*k*x**(k-1) + END FUNCTION rhseq +END SUBROUTINE disrhs diff --git a/examples/pde1d_eig_ge.f90 b/examples/pde1d_eig_ge.f90 new file mode 100644 index 0000000..713a66e --- /dev/null +++ b/examples/pde1d_eig_ge.f90 @@ -0,0 +1,474 @@ +!> +!> @file pde1d_eig_ge.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 1d differential eqation using splines: +! +! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 +! exact solution: f(r) = 1 - r^k +! + USE bsplines + USE matrix + USE futils + USE conmat_mod + IMPLICIT NONE + INTEGER :: nx, nidbas, ngauss, kdiff + INTEGER :: i, nrank, kl, ku + LOGICAL :: nlppform + DOUBLE PRECISION :: coefs(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol + TYPE(spline1d) :: splx + TYPE(gemat) :: mat +! + CHARACTER(len=128) :: file='pde1d.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack +! + DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvecsr(:,:), eigvecsl(:,:), & + & wr(:), wi(:), work(:) + INTEGER :: j, info +! + INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=maxn + DOUBLE PRECISION :: v(maxn,maxncv),workd(3*maxn), workev(3*maxncv), & + & d(maxncv,2), resid(maxn), w(maxncv,maxn), & + & zero=0.0d0, tol=0.0d0, sigmar, sigmai + DOUBLE PRECISION, ALLOCATABLE :: workl(:) + DOUBLE PRECISION, EXTERNAL :: dnrm2 + INTEGER :: nev=10, ncv=30, ishfts=1, maxitr=300, nconv, & + & mode1=1, ierr, lworkl + INTEGER :: ido, ipntr(11), iparam(11) + + CHARACTER(len=1) :: bmat='I' + CHARACTER(len=2) :: which='SA' + LOGICAL :: rvec, select(maxncv) + +! + INTERFACE + SUBROUTINE dismat(spl, mat) + USE bsplines + USE matrix + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + END SUBROUTINE dismat + SUBROUTINE disrhs(kdiff, spl, rhs) + USE bsplines + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + END SUBROUTINE disrhs + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + END INTERFACE +! + NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, & + & nev, ncv, which +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + kdiff = 2 ! Exponent of differential problem + nlppform = .TRUE. ! Use PPFORM for gridval or not + coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + ! see function FDIST in MESHDIST +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + xgrid(0) = 0.0d0 + xgrid(nx) = 1.0d0 + CALL meshdist(coefs, xgrid, nx) + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE1D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL attach(fid, '/', 'NGAUSS', ngauss) + CALL attach(fid, '/', 'KDIFF', kdiff) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) + CALL get_dim(splx, nrank) ! Rank of the FE matrix + WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid +! +! FE matrix assembly +! + kl = nidbas + ku = kl +!!$ CALL init(kl, ku, nrank, 1, mat) + WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank + CALL init(nrank, 1, mat) +!!$ CALL dismat(splx, mat) + CALL conmat(splx, mat, coefeq) +! + ALLOCATE(arr(nrank)) +!!$ WRITE(*,'(/a)') 'Matrice before BC' +!!$ DO i=1,nrank +!!$ CALL getrow(mat, i, arr) +!!$ WRITE(*,'(12f8.3)') arr, SUM(arr) +!!$ END DO +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(kdiff, splx, rhs) +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs +! +! Set BC f(r=1) = 0 on matrix +! + arr(1:nrank-1) = 0.0d0 + arr(nrank) = 1.0d0 + CALL putrow(mat, nrank, arr) + CALL putcol(mat, nrank, arr) + tmat = seconds() - t0 +! +!=========================================================================== +! 3.0 Eigevalue problem +! +! Using Lapack dgeev +! + t0 = seconds() + ALLOCATE(mata(nrank,nrank)) + ALLOCATE(eigvecsr(nrank,nrank)) + ALLOCATE(eigvecsl(nrank,nrank)) + ALLOCATE(work(4*nrank)) + ALLOCATE(wr(nrank), wi(nrank)) + mata(:,:) = mat%val(:,:) + CALL putarr(fid, '/MAT', mata, 'matrix A') +!!$ CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info) + CALL dgeev('N', 'V', nrank, mata, nrank, wr, wi, eigvecsl, SIZE(eigvecsl,1), & + & eigvecsr, SIZE(eigvecsr,1), work, SIZE(work), info) + teig = seconds() - t0 + PRINT*,'Info from DGEEV', info, arr(1) + IF(info.EQ.0) THEN + CALL putarr(fid, '/REIGVS', wr, 'Real of eigenvalues of A') + CALL putarr(fid, '/IEIGVS', wi, 'Imag of eigenvalues of A') + CALL putarr(fid, '/EIGVECL', eigvecsl, 'left eigenvalues of A') + CALL putarr(fid, '/EIGVECR', eigvecsr, 'right eigenvalues of A') + WRITE(*,'(a/(10f10.4))') 'Real eigval', wr + WRITE(*,'(a/(10f10.4))') 'Imag eigval', wi + END IF +! +! Using Arpack +! + ido = 0 + iparam(1) = ishfts + iparam(3) = maxitr + iparam(7) = mode1 +! + lworkl = 3*ncv**2+6*ncv + ALLOCATE(workl(lworkl)) + +! + t0 = seconds() + DO ! ARPACK reverse communication loop + CALL dnaupd(ido, bmat, nrank, which, nev, tol, resid, & + & ncv, v, maxn, iparam, ipntr, workd, workl,& + & lworkl, info) + IF(ido.EQ.-1 .OR. ido.EQ.1) THEN + CALL av(nrank,workd(ipntr(1)), workd(ipntr(2))) + CYCLE + END IF + PRINT*, 'INFO =', info + IF(info .LT. 0) THEN + PRINT*, 'Error in dnaupd with info =', info + ELSE + rvec = .TRUE. + CALL dneupd(rvec, 'A', select, d, d(1,2), v, size(v,1), & + & sigmar, sigmai, workev, bmat, nrank, which, nev, tol, & + & resid, ncv, v, size(v,1), iparam, ipntr, workd, workl, & + & lworkl, ierr ) + IF( ierr .NE. 0 ) THEN + PRINT*,'Error in dneupd with ierr =', ierr + ELSE + nconv = iparam(5) + PRINT*, '--- Real eigenvalues and comprison with Lapack results ---' +! eiegvalues and diff with Lapack results + DO j=1,nconv + WRITE(*,'(i3,3(1pe12.4))') j, d(j,1), wr(j), wr(j)-d(j,1) + END DO + PRINT*, '--- Imag eigenvalues and comprison with Lapack results ---' + DO j=1,nconv + WRITE(*,'(i3,3(1pe12.4))') j, d(j,2), wi(j), wi(j)-d(j,2) + END DO +!!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue +!!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1) +!!$ d(j,2) = dnrm2(nrank, w, 1) +!!$ d(j,2) = d(j,2)/abs(d(j,1)) + CALL putarr(fid, '/EIGVS', d(1:nconv,:), 'ARPACK eigenvalues of A') +!!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A') +!!$ END DO + END IF + EXIT + END IF + END DO ! End of ARPACK reverse communication loop + tarpack = seconds()-t0 + +!=========================================================================== +! 9.0 Epilogue +! + WRITE(*,'(a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig + WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(arr) + CALL destroy(mat) + CALL destroy_sp(splx) + CALL closef(fid) +CONTAINS + SUBROUTINE av(n,v,w) +! +! Matrix vector product: w <- Av + INTEGER, INTENT(in) :: n + DOUBLE PRECISION, INTENT(in) :: v(*) + DOUBLE PRECISION, INTENT(out) :: w(*) + w(1:n) = vmx(mat,v(1:n)) + END SUBROUTINE av + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function defined in FDIST +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ +SUBROUTINE dismat(spl, mat) +! +! Assembly FE matrix mat using spline spl +! + USE bsplines + USE matrix + IMPLICIT NONE + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=i+iw; jcol=i+jt + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END SUBROUTINE dismat +!+++ +SUBROUTINE disrhs(kdiff, spl, rhs) +! +! Assenbly the RHS using spline spl +! + USE bsplines + IMPLICIT NONE + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, left + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) +!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas +! + ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) +!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(1:nrank) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) +!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + left = i-1 +!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left + contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) + rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +! +CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x,k) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: k + rhseq = k*k*x**(k-1) + END FUNCTION rhseq +END SUBROUTINE disrhs diff --git a/examples/pde1d_eig_zcsr.f90 b/examples/pde1d_eig_zcsr.f90 new file mode 100644 index 0000000..d467456 --- /dev/null +++ b/examples/pde1d_eig_zcsr.f90 @@ -0,0 +1,481 @@ +!> +!> @file pde1d_eig_zcsr.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 1d differential eqation using splines: +! +! -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0 +! exact solution: f(r) = 1 - r^k +! + USE bsplines + USE csr + USE futils + USE f95_precision, ONLY: WP => DP + USE lapack95, ONLY: geev + IMPLICIT NONE + INTEGER :: nx, nidbas, ngauss, kdiff + INTEGER :: i, nrank, kl, ku + LOGICAL :: nlppform + DOUBLE PRECISION :: coefs(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol + TYPE(spline1d) :: splx + TYPE(zcsr_mat) :: mat +! + CHARACTER(len=128) :: file='pde1d.h5' + INTEGER :: fid, ffid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack +! +! Lapack95 GEEV arguments + COMPLEX(WP), ALLOCATABLE :: mata(:,:), w(:) + REAL(WP), ALLOCATABLE :: WR(:), WI(:) + INTEGER :: j, info +! + INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25 + INTEGER :: lworkl, zero=0.0d0 + DOUBLE PRECISION :: d(maxncv,2), tol=0.0d0, rwork(maxncv) + DOUBLE COMPLEX :: v(maxn,maxncv), resid(maxn), sigma + DOUBLE COMPLEX :: workd(3*maxncv), lwork(2*maxn) + DOUBLE COMPLEX, ALLOCATABLE :: workl(:), vl(:,:), vr(:,:) + DOUBLE PRECISION, EXTERNAL :: dnrm2 + INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, & + & mode1=1, ierr + INTEGER :: ido, ipntr(14), iparam(11) + + CHARACTER(len=1) :: bmat='I' + CHARACTER(len=2) :: which='SA' + LOGICAL :: rvec, select(maxncv) + +! + INTERFACE + SUBROUTINE dismat(spl, mat) + USE bsplines + USE csr + TYPE(spline1d), INTENT(in) :: spl + TYPE(zcsr_mat), INTENT(inout) :: mat + END SUBROUTINE dismat + SUBROUTINE disrhs(kdiff, spl, rhs) + USE bsplines + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + END SUBROUTINE disrhs + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + END INTERFACE +! + NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + kdiff = 2 ! Exponent of differential problem + nlppform = .TRUE. ! Use PPFORM for gridval or not + coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + ! see function FDIST in MESHDIST +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + xgrid(0) = 0.0d0 + xgrid(nx) = 1.0d0 + CALL meshdist(coefs, xgrid, nx) + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE1D Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL attach(fid, '/', 'NGAUSS', ngauss) + CALL attach(fid, '/', 'KDIFF', kdiff) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) + CALL get_dim(splx, nrank) ! Rank of the FE matrix + WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid +! +! FE matrix assembly +! + kl = nidbas + ku = kl + WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank + CALL init(nrank, 1, mat) + CALL dismat(splx, mat) +!!$ CALL conmat(splx, mat, coefeq) + CALL to_mat(mat) + PRINT*,'MAT after to_mat', mat%val + CALL creatf('pde1d_eig.h5', ffid, 'PDE1D Result File', real_prec='D') + PRINT*, 'rank', mat%rank + PRINT*, 'nnz', mat%nnz + PRINT*, 'irow', mat%irow + PRINT*, 'cols', mat%cols + CALL putmat(ffid,'/MAT',mat,'FE matrix') + PRINT*, 'MAT',mat%val + CALL closef(ffid) + STOP +! +! + ALLOCATE(arr(nrank)) +!!$ WRITE(*,'(/a)') 'Matrice before BC' +!!$ DO i=1,nrank +!!$ CALL getrow(mat, i, arr) +!!$ WRITE(*,'(12f8.3)') arr, SUM(arr) +!!$ END DO +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(kdiff, splx, rhs) +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs +!!$! +!!$! Set BC f(r=1) = 0 on matrix +!!$! +!!$ arr(1:nrank-1) = 0.0d0 +!!$ arr(nrank) = 1.0d0 +!!$ CALL putrow(mat, nrank, arr) +!!$ CALL putcol(mat, nrank, arr) + CALL putmat(fid,'/MATA', mat, 'FE matrix') + tmat = seconds() - t0 +! +!=========================================================================== +! 3.0 Eigevalue problem +! +! Using Lapack dsyev +! + t0 = seconds() + ALLOCATE(mata(nrank,nrank)) + ALLOCATE(w(nrank)) + ALLOCATE(wr(nrank), wi(nrank)) + ALLOCATE(vl(nrank,nrank), vr(nrank,nrank)) + mata=0.0d0 + DO j=1,nrank + CALL getcol(mat, j, mata(:,j)) ! convert to dense matrix mata + END DO + CALL putarr(fid, '/MAT', mata, 'matrix A') + CALL geev(mata, w) + wr(:) = REAL(w(:)) + wi(:) = AIMAG(w(:)) + teig = seconds() - t0 + PRINT*,'Info from ZGEEV', info, arr(1) + IF(info.EQ.0) THEN + CALL putarr(fid, '/REIGVS', wr, 'eigenvalues of A') + CALL putarr(fid, '/IEIGVS', wi, 'eigenvectors of A') + WRITE(*,'(a/(10f10.4))') 'Real eigval', wr + WRITE(*,'(a/(10f10.4))') 'Imag eigval', wi + END IF +! +! Using Arpack +! + ido = 0 + iparam(1) = ishfts + iparam(3) = maxitr + iparam(7) = mode1 +! + lworkl = 3*ncv**2+5*ncv + ALLOCATE(workl(lworkl)) +! + IF(nev.GT.0) THEN + t0 = seconds() + DO ! ARPACK reverse communication loop + CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, ncv, & + & v, SIZE(v,1), iparam, ipntr, workd, workl, lworkl, & + & rwork, info) + IF(ido.EQ.-1 .OR. ido.EQ.1) THEN +!!$ WRITE(*,'(a/(10i4))') 'ipntr', ipntr + CALL av(nrank,workd(ipntr(1)), workd(ipntr(2))) + CYCLE + END IF + IF(info .LT. 0) THEN + PRINT*, 'Error in _saupd with info =', info + ELSE + rvec = .TRUE. + CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, & + & bmat, nrank, which, nev, tol, resid, ncv, v, maxn, & + & iparam, ipntr, workd, workl, lworkl, ierr ) + IF( ierr .NE. 0 ) THEN + PRINT*,'Error in _seupd with ierr =', ierr + ELSE + nconv = iparam(5) + PRINT*, '--- eigenvalues and error ---' + DO j=1,nconv ! Residual norms +!!$ CALL av(nrank,v(1,j), w) ! d(1,j) is the j^th eigenvalue +!!$ CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1) +!!$ d(j,2) = dnrm2(nrank, w, 1) +!!$ d(j,2) = d(j,2)/abs(d(j,1)) + WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1) +!!$ CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A') +!!$ CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A') + END DO + EXIT + END IF + END IF + END DO ! End of ARPACK reverse communication loop + tarpack = seconds()-t0 + END IF +!=========================================================================== +! 9.0 Epilogue +! + WRITE(*,'(a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig + WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(arr) + CALL destroy(mat) + CALL destroy_sp(splx) + CALL closef(fid) +CONTAINS + SUBROUTINE av(n,v,w) +! +! Matrix vector product: w <- Av + INTEGER, INTENT(in) :: n + DOUBLE COMPLEX, INTENT(in) :: v(*) + DOUBLE COMPLEX, INTENT(out) :: w(*) + w(1:n) = vmx(mat,v(1:n)) + END SUBROUTINE av + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function defined in FDIST +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ +SUBROUTINE dismat(spl, mat) +! +! Assembly FE matrix mat using spline spl +! + USE bsplines + USE csr + IMPLICIT NONE + TYPE(spline1d), INTENT(in) :: spl + TYPE(zcsr_mat), INTENT(inout) :: mat + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=i+iw; jcol=i+jt + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END SUBROUTINE dismat +!+++ +SUBROUTINE disrhs(kdiff, spl, rhs) +! +! Assenbly the RHS using spline spl +! + USE bsplines + IMPLICIT NONE + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, left + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) +!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas +! + ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) +!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(1:nrank) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) +!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + left = i-1 +!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left + contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) + rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +! +CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x,k) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: k + rhseq = k*k*x**(k-1) + END FUNCTION rhseq +END SUBROUTINE disrhs diff --git a/examples/pde1d_eig_zmumps.f90 b/examples/pde1d_eig_zmumps.f90 new file mode 100644 index 0000000..ec17841 --- /dev/null +++ b/examples/pde1d_eig_zmumps.f90 @@ -0,0 +1,460 @@ +!> +!> @file pde1d_eig_zmumps.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 1d differential eqation using splines: +! +! Solve the standard eigenvalue: +! A*x = \lambda *x or inv(A)*x = 1/\lambda * x using Arpack and MUMPS. +! where A is obtained from discretozation of +! -d/dr[r d/dr] f = k^2 r^(k-1) +! + USE bsplines + USE mumps_bsplines + USE futils + IMPLICIT NONE + INTEGER :: nx, nidbas, ngauss, kdiff + INTEGER :: i, nrank + LOGICAL :: nlppform + DOUBLE PRECISION :: coefs(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid + TYPE(spline1d) :: splx + TYPE(zmumps_mat) :: mat +! + INTEGER :: ierr + INTEGER :: fid + CHARACTER(len=32) :: str + DOUBLE PRECISION :: seconds, t0, tmat, tfac, tarpack +! +! Arpack: Solve the standard eigenvalue problem +! + INTEGER :: nev = 10, ncv = 10 + LOGICAL :: nlinv = .FALSE. ! Solve inv(A) = 1/\lambda * x if nlinv=.TRUE. + CHARACTER(len=2) :: which='SM' + INTEGER :: info=0 ! Use random vector to start the Arnoldi iterations + INTEGER :: ido=0 ! Reverse communications + LOGICAL :: rvec + LOGICAL, ALLOCATABLE :: select(:) + INTEGER :: iparam(11), ipntr(14), nconv + DOUBLE PRECISION :: tol=0.0d0 + CHARACTER(len=1) :: bmat='I' +! + INTEGER :: lworkl + DOUBLE COMPLEX, ALLOCATABLE :: workl(:), workd(:), workev(:) + DOUBLE COMPLEX, ALLOCATABLE :: eig_vals(:), eig_vecs(:,:), resid(:) + DOUBLE COMPLEX :: sigma + DOUBLE PRECISION, ALLOCATABLE :: rwork(:) +! + INTERFACE + SUBROUTINE dismat(spl, mat) + USE bsplines + USE mumps_bsplines + TYPE(spline1d), INTENT(in) :: spl + TYPE(zmumps_mat), INTENT(inout) :: mat + END SUBROUTINE dismat + SUBROUTINE disrhs(kdiff, spl, rhs) + USE bsplines + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + END SUBROUTINE disrhs + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + END INTERFACE +! + NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, & + & nev, ncv, nlinv, which, tol +!=========================================================================== +! 1.0 Prologue +! + CALL mpi_init(ierr) +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + kdiff = 2 ! Exponent of differential problem + nlppform = .TRUE. ! Use PPFORM for gridval or not + coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + ! see function FDIST in MESHDIST +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + xgrid(0) = 0.0d0 + xgrid(nx) = 1.0d0 + CALL meshdist(coefs, xgrid, nx) + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform) + CALL get_dim(splx, nrank) ! Rank of the FE matrix + WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid +! +! FE matrix assembly +! + WRITE(*,'(a,i6)') ' nrank', nrank + CALL init(nrank, 1, mat) + CALL dismat(splx, mat) + CALL to_mat(mat) + tmat = seconds() - t0 +! + mat%mumps_par%IRN => mat%mumps_par%IRN_loc ! Work around for single proc. + PRINT*, 'nnz_loc', mat%nnz_loc + PRINT*, 'mat%mumps_par%N', mat%mumps_par%N + PRINT*, 'mat%mumps_par%NZ_loc', mat%mumps_par%NZ_loc + PRINT*, 'size of mat%mumps_par%IRN', SIZE(mat%mumps_par%IRN) + PRINT*, 'mat%istart,mat%iend', mat%istart,mat%iend +! + CALL creatf('pde1d_eig_zmumps.h5', fid, 'PDE1D Result File', real_prec='d') + PRINT*, 'rank', mat%rank + PRINT*, 'nnz', mat%nnz +! + CALL putmat(fid,'/MAT',mat,'FE matrix') +! + IF(nlinv) THEN + t0 = seconds() + CALL factor(mat) + tfac = seconds()-t0 + END IF +!=========================================================================== +! 3.0 Solve the standard eigenvalue problem +! + lworkl = 3*ncv**2 + 5*ncv + ALLOCATE(workl(lworkl)) + ALLOCATE(workd(3*nrank)) + ALLOCATE(workev(2*ncv)) + ALLOCATE(eig_vals(ncv), eig_vecs(nrank,ncv)) + ALLOCATE(resid(nrank)) + ALLOCATE(rwork(ncv)) +! + iparam(1) = 1 ! shfts + iparam(3) = 300 ! Max. number of iterations + iparam(7) = 1 ! Regular mode +! +! The reverse communication loop +! + t0 = seconds() + DO + CALL znaupd (ido, bmat, nrank, which, nev, tol, resid, ncv, & + & eig_vecs, nrank, iparam, ipntr, workd, workl, lworkl, & + & rwork, info ) +! + IF(ido .EQ. -1 .OR. ido .EQ. 1) THEN ! Compute A*v + CALL av(nrank, workd(ipntr(1)), workd(ipntr(2))) + CYCLE + END IF +! + IF(info .LT. 0) THEN ! Error + PRINT*, 'Error in _naupd with info =', info + EXIT + ELSE + rvec = .TRUE. + ALLOCATE(select(ncv)) + CALL zneupd (rvec, 'A', select, eig_vals, eig_vecs, nrank, & + & sigma, workev, bmat, nrank, which, nev, tol, resid, & + & ncv, eig_vecs, nrank, iparam, ipntr, workd, workl, lworkl,& + & rwork, ierr) + IF(ierr .NE. 0) THEN + PRINT*, 'Error in _neupd with ierr =', ierr + EXIT + ELSE + nconv = iparam(5) + PRINT*,'Number of converged eigenvalues', nconv + IF(nlinv) THEN + eig_vals(1:nconv) = (1.d0,0.0d0) / eig_vals(1:nconv) + END IF + WRITE(*,'(2(1pe12.3))') eig_vals(1:nconv) + CALL putarr(fid, '/eig_vals', eig_vals(1:nconv)) + CALL putarr(fid, '/eig_vecs', eig_vecs(1:nrank,1:nconv)) + DO i=1,nconv +!!$ WRITE(*,'(/a,2(pe20.6))') '*** eigen value =', eig_vals(i) +!!$ WRITE(*,'(a/(10(1pe12.4)))') 'Real of eigen vector', & +!!$ & REAL(eig_vecs(1:nrank,i)) +!!$ WRITE(*,'(a/(10(1pe12.4)))') 'Imag of eigen vector', & +!!$ & aimag(eig_vecs(1:nrank,i)) + WRITE(str,'(a,i3.3)') '/eig_vecs_',i + CALL putarr(fid, TRIM(str), eig_vecs(1:nrank,i)) + END DO + EXIT + END IF + END IF + END DO ! End of reverse commuinication loop + IF(info .EQ. 1) THEN + PRINT*, 'Maximum number of iterations reached!' + PRINT*, 'IPARAM(5) =', iparam(5) + END IF + PRINT*, 'Number of Arnoldi iterations', iparam(3) + tarpack = seconds() - t0 +!=========================================================================== +! 9.0 Epilogue +! + WRITE(*,'(a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + IF(nlinv) THEN + WRITE(*,'(a,1pe12.3)') 'Matrice factorization time (s) ', tfac + END IF + WRITE(*,'(a,1pe12.3)') 'Arpack time (s) ', tarpack +! + DEALLOCATE(xgrid) + CALL destroy(mat) + CALL destroy_sp(splx) + CALL closef(fid) + CALL mpi_finalize(ierr) +! +CONTAINS + SUBROUTINE av(n,v,w) +! + INTEGER, INTENT(in) :: n + DOUBLE COMPLEX, INTENT(in) :: v(*) + DOUBLE COMPLEX, INTENT(out) :: w(*) +! + IF(nlinv) THEN + w(1:n) = v(1:n) + CALL bsolve(mat,w(1:n)) ! Solve A*w = v or w=inv(A)*v + ELSE + w(1:n) = vmx(mat,v(1:n)) ! A*v matrix product + END IF + END SUBROUTINE av +! + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function defined in FDIST +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ +SUBROUTINE dismat(spl, mat) +! +! Assembly FE matrix mat using spline spl +! + USE bsplines + USE mumps_bsplines + IMPLICIT NONE + TYPE(spline1d), INTENT(in) :: spl + TYPE(zmumps_mat), INTENT(inout) :: mat + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=i+iw; jcol=i+jt + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) +! +! Weak form = Int(x*dw/dx*dt/dx)dx +! + c(1) = x + idt(1) = 1 + idw(1) = 1 + END SUBROUTINE coefeq +END SUBROUTINE dismat +!+++ +SUBROUTINE disrhs(kdiff, spl, rhs) +! +! Assenbly the RHS using spline spl +! + USE bsplines + IMPLICIT NONE + INTEGER, INTENT(in) :: kdiff + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, left + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, nrank, nx, nidbas) +!!$ WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas +! + ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) +!!$ WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(1:nrank) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) +!!$ WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + left = i-1 +!!$ WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left + contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff) + rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1) + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +! +CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x,k) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: k + rhseq = k*k*x**(k-1) + END FUNCTION rhseq +END SUBROUTINE disrhs diff --git a/examples/pde1dp.f90 b/examples/pde1dp.f90 new file mode 100644 index 0000000..7eb32bd --- /dev/null +++ b/examples/pde1dp.f90 @@ -0,0 +1,170 @@ +!> +!> @file pde1dp.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! 1D PDE with priodic BC +! + USE pde1dp_mod + USE bsplines + USE matrix + USE futils + USE conmat_mod +! + IMPLICIT NONE + CHARACTER(len=128) :: file='pde1dp.h5' + INTEGER :: fid + INTEGER :: nx, nidbas, ngauss, ibcoef + INTEGER :: nrank, kl, ku, dim + DOUBLE PRECISION :: coefs(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, sol, rhs + INTEGER, PARAMETER :: npts=100 + DOUBLE PRECISION, DIMENSION(0:npts-1) :: xpts, frhs + DOUBLE PRECISION :: dx, errmx + DOUBLE PRECISION, ALLOCATABLE :: arr(:,:) + TYPE(periodic_mat) :: mat + INTEGER :: i, j +! + NAMELIST /newrun/ nx, nidbas, ngauss, ibcoef, coefs +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + ibcoef = 1 ! Index of non-zero spline coef for RHS + coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + ! see function FDIST in MESHDIST +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + xgrid(0) = 0.0d0 + xgrid(nx) = 1.0d0 + CALL meshdist(coefs, xgrid, nx) + WRITE(*,'(/a/(10f8.3))') 'XGRID', xgrid(0:nx) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE1DP Result File') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL attach(fid, '/', 'NGAUSS', ngauss) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up periodic spline +! + CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) + nrank = nx ! Rank of the FE matrix +! +! Mass matrix assembly +! + kl = nidbas + ku = kl + CALL init(kl, ku, nrank, 1, mat) + CALL get_dim(splx, dim) + WRITE(*,'(/a,4i6)') 'kl, ku, nrank, dim', kl, ku, nrank, dim +!!$ CALL dismat(splx, mat) + CALL conmat(splx, mat, coefeq_mass) +! +! Store matrix in hdf5 file +! + ALLOCATE(arr(nrank,nrank)) + DO j=1,nrank + CALL getcol(mat, j, arr(:,j)) + END DO + CALL putarr(fid, '/mata', arr) + DEALLOCATE(arr) +! +! Check RHS constructed using input spline coefs. +! + ALLOCATE(bcoef(0:dim-1)) + bcoef = 0.0d0; bcoef(ibcoef-1) = 1.0d0 +! + DO i=nrank,dim-1 ! Periodicity to fill array of spline coefs + bcoef(i) = bcoef(MODULO(i,nrank)) + END DO + WRITE(*,'(/a/(10f8.3))') 'bcoef from input', bcoef + dx = (xgrid(nx)-xgrid(0))/npts + DO i=0,npts-1 + xpts(i) = xgrid(0) + i*dx + frhs(i) = rhseq(xpts(i)) + END DO + CALL creatg(fid, '/rhs') + CALL putarr(fid,'/rhs/x', xpts) + CALL putarr(fid,'/rhs/f', frhs) +! +! Assembly RHS and check A*x = f, using method vmx +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(splx, rhs) + sol = vmx(mat, bcoef(0:nrank-1)) + WRITE(*,'(/6x,3a12)') 'A*x', 'rhs', 'Err' + errmx = 0.0d0 + DO i=1,nrank + WRITE(*,'(i6,3(1pe12.3))') i, sol(i), rhs(i), sol(i)-rhs(i) + errmx=MAX(errmx,ABS(sol(i)-rhs(i))) + END DO + WRITE(*,'(a,1pe12.3)') 'Max. error =', errmx +! +! Factor and solve +! + CALL factor(mat) + CALL bsolve(mat, rhs, sol) + WRITE(*,'(/6x,3a12)') 'Computed', 'Exact', 'Err' + errmx = 0.0d0 + DO i=1,nrank + WRITE(*,'(i6,3(1pe12.3))') i, sol(i), bcoef(i-1), sol(i)-bcoef(i-1) + errmx=MAX(errmx,ABS(sol(i)-bcoef(i-1))) + END DO + WRITE(*,'(a,1pe12.3)') 'Max. error =', errmx +!=========================================================================== +! 9.0 Clean up +! + DEALLOCATE(xgrid) + DEALLOCATE(bcoef) + DEALLOCATE(rhs, sol) + CALL destroy(mat) + CALL destroy_sp(splx) + CALL closef(fid) +CONTAINS + SUBROUTINE coefeq_mass(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Mass matrix +! + c(1) = 1.0d0 + idt(1) = 0 + idw(1) = 0 + END SUBROUTINE coefeq_mass +END PROGRAM main diff --git a/examples/pde1dp_cmpl.f90 b/examples/pde1dp_cmpl.f90 new file mode 100644 index 0000000..48b2d1e --- /dev/null +++ b/examples/pde1dp_cmpl.f90 @@ -0,0 +1,403 @@ +!> +!> @file pde1dp_cmpl.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE pde1dp_cmpl_mod + USE bsplines + USE matrix + IMPLICIT NONE +! +CONTAINS + SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta) +! + TYPE(spline1d) :: spl + INTEGER, INTENT(in) :: mmode + DOUBLE COMPLEX, INTENT(in) :: alpha, beta + DOUBLE COMPLEX, INTENT(out) :: rhs(:) +! + INTEGER :: dim, nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, it, irow + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX :: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + nrank = SIZE(rhs) + CALL get_dim(spl, dim, nx, nidbas) +! + ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(:) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + contrib = wgauss(igauss) * rhseq(xgauss(igauss)) + DO it=0,nidbas + irow=MODULO(i+it-1,nx) + 1 ! Periodic BC + rhs(irow) = rhs(irow) + contrib*fun(it,1) + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +CONTAINS + DOUBLE COMPLEX FUNCTION rhseq(x) + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION :: arg + arg = mmode*x + rhseq = (mmode**2*alpha-beta)*COS(arg) + END FUNCTION rhseq +END SUBROUTINE disrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat, alpha, beta) +! + TYPE(spline1d) :: spl + TYPE(zperiodic_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: alpha, beta +! + INTEGER :: dim, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE COMPLEX, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, dim, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC + jcol=MODULO(i+jt-1,nx) + 1 + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt)) +! + c(1) = alpha + idt(1) = 1 + idw(1) = 1 +! + c(2) = -beta + idt(2) = 0 + idw(2) = 0 + END SUBROUTINE coefeq +END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE save_mat(fid, label, mat) +! +! Save zperiodic_mat in dense format +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(zperiodic_mat) :: mat + INTEGER :: j, n + DOUBLE COMPLEX, ALLOCATABLE :: fullmat(:,:) +! + n=mat%mat%rank + ALLOCATE(fullmat(n,n)) + DO j=1,n + CALL getcol(mat, j, fullmat(:,j)) + END DO + CALL putarr(fid, label, fullmat) + DEALLOCATE(fullmat) + END SUBROUTINE save_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION norm2(x) +! +! Compute the 2-norm of complex array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE COMPLEX, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 +! + sum2 = DOT_PRODUCT(x,x) + norm2 = SQRT(sum2) + END FUNCTION norm2 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE meshdist(mmode, x) +! +! Construct a 1d non-equidistant mesh given a +! mesh distribution function. +! + INTEGER, INTENT(in) :: mmode + DOUBLE PRECISION, INTENT(inout) :: x(0:) + INTEGER :: nx, nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + nx = SIZE(x)-1 + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) + CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = 2.0 + COS(mmode*x) + END FUNCTION fdist + END SUBROUTINE meshdist +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde1dp_cmpl_mod +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! +PROGRAM main +! +! 1D complex PDE with periodic BC +! + USE pde1dp_cmpl_mod + USE bsplines + USE matrix + USE futils +! + IMPLICIT NONE + TYPE(spline1d) :: splx + TYPE(zperiodic_mat) :: mat + INTEGER :: kl, ku, nrank +! + CHARACTER(len=128) :: file='pde1dp_cmpl.h5' + INTEGER :: fid + INTEGER :: nx, nidbas, ngauss, mmode, npt, dim + LOGICAL :: nlequid + DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 + DOUBLE PRECISION :: dx + DOUBLE COMPLEX :: alpha, beta + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: err_norm + INTEGER :: i +! + NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE. + mmode = 1 ! Fourier mode + alpha = (1.0, 1.0) ! Complex "diffusion" + beta = 1.0 + npt = 100 +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + dx = 2.d0*pi/REAL(nx,8) + xgrid = (/ (i*dx,i=0,nx) /) + IF( .NOT. nlequid ) THEN + CALL meshdist(mmode, xgrid) + END IF + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL putarr(fid, '/xgrid', xgrid) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up periodic spline +! + CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) + WRITE(*,'(a,l6)') 'nlequid =', nlequid + nrank = nx ! Rank of the FE matrix +! +! FE matrix assembly +! + kl = nidbas + ku = kl + CALL init(kl, ku, nrank, 2, mat) + CALL get_dim(splx, dim) + WRITE(*,'(/a,4i6)') 'kl, ku, nrank, dim', kl, ku, nrank, dim + CALL dismat(splx, mat, alpha, beta) +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(splx, rhs, mmode, alpha, beta) +! + CALL save_mat(fid, '/mat', mat) + CALL putarr(fid, '/rhs', rhs) +! +! Factor and solve +! + CALL factor(mat) + CALL bsolve(mat, rhs, sol) + CALL putarr(fid, '/sol', sol) +!=========================================================================== +! 3.0 Check solution +! +! Exact solution + ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt)) + dx=2.0d0*pi/REAL(npt,8) + x = (/ (i*dx, i=0,npt) /) + solana = COS(mmode*x) +! +! Prolongate solution using periodicity +! + ALLOCATE(bcoef(dim)) + bcoef(1:nrank) = sol(1:nrank) + DO i=nrank+1,dim + bcoef(i) = bcoef(MODULO(i-1,nrank)+1) + END DO +! +! Interpolate field +! + CALL gridval(splx, x, solcal, 0, bcoef) +! + err = ABS(solcal-solana) + CALL putarr(fid, '/x', x) + CALL putarr(fid, '/solana', solana) + CALL putarr(fid, '/solcal', solcal) + CALL putarr(fid, '/err', err) +! +! Compute discretization error norm by Gauss integration +! + err_norm=0.0 + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) + DO i=1,nx + CALL get_gauss(splx, ngauss, i, xgauss, wgauss) + CALL gridval(splx, xgauss, solcal(1:ngauss), 0) + solana(1:ngauss) = COS(mmode*xgauss) + err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), & + & solana(1:ngauss)-solcal(1:ngauss)) + err_norm = err_norm + SUM(wgauss*err(1:ngauss)) + END DO + err_norm = SQRT(err_norm) + WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm +! + DEALLOCATE(x, solcal, solana, err) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(bcoef) +!=========================================================================== +! 9.0 Clean up +! + DEALLOCATE(xgrid) + DEALLOCATE(rhs, sol) + CALL destroy(mat) + CALL destroy_sp(splx) + CALL closef(fid) +END PROGRAM main diff --git a/examples/pde1dp_cmpl_dft.f90 b/examples/pde1dp_cmpl_dft.f90 new file mode 100644 index 0000000..d866885 --- /dev/null +++ b/examples/pde1dp_cmpl_dft.f90 @@ -0,0 +1,290 @@ +!> +!> @file pde1dp_cmpl_dft.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE pde1dp_cmpl_dft_mod + USE bsplines + IMPLICIT NONE +! +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat, alpha, beta) +! + USE bsplines + TYPE(spline1d) :: spl + DOUBLE COMPLEX :: mat(:) + DOUBLE COMPLEX, INTENT(in) :: alpha, beta +! + INTEGER :: dim, nx, nidbas, ngauss, intv, igauss + DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, dim, nx, nidbas) + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) + ALLOCATE(ft_fun(0:nx-1,2)) ! Up to first derivative +! +! Weak form: integration on first interval +! + intv = 1 + CALL get_gauss(spl, ngauss, intv, xgauss, wgauss) + mat = 0.0d0 + DO igauss=1,ngauss + CALL ft_basfun(xgauss(igauss), spl, ft_fun, intv) + mat(:) = mat(:) + wgauss(igauss) * ( & + & alpha*ft_fun(:,2)*CONJG(ft_fun(:,2)) & + & - beta*ft_fun(:,1)*CONJG(ft_fun(:,1)) & + & ) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(ft_fun) + DEALLOCATE(xgauss, wgauss) +! + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta) +! + TYPE(spline1d) :: spl + INTEGER, INTENT(in) :: mmode + DOUBLE COMPLEX, INTENT(in) :: alpha, beta + DOUBLE COMPLEX, INTENT(out) :: rhs(:) +! + INTEGER :: dim, nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, it, irow + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX :: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + nrank = SIZE(rhs) + CALL get_dim(spl, dim, nx, nidbas) +! + ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(:) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + contrib = wgauss(igauss) * rhseq(xgauss(igauss)) + DO it=0,nidbas + irow=MODULO(i+it-1,nx) + 1 ! Periodic BC + rhs(irow) = rhs(irow) + contrib*fun(it,1) + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + CONTAINS + DOUBLE COMPLEX FUNCTION rhseq(x) + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION :: arg + arg = mmode*x + rhseq = (mmode**2*alpha-beta)*COS(arg) + END FUNCTION rhseq + END SUBROUTINE disrhs +END MODULE pde1dp_cmpl_dft_mod +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! +PROGRAM main +! +! 1D complex PDE with periodic BC, using DFT +! + USE pde1dp_cmpl_dft_mod + USE bsplines + USE matrix + USE futils + USE fft +! + IMPLICIT NONE + TYPE(spline1d) :: splx + DOUBLE COMPLEX, ALLOCATABLE :: mat(:) + INTEGER ::nrank +! + CHARACTER(len=128) :: file='pde1dp_cmpl_dft.h5' + INTEGER :: fid + INTEGER :: nx, nidbas, ngauss, mmode, npt, dim + DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 + DOUBLE PRECISION :: dx + DOUBLE COMPLEX :: alpha, beta + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol_shifted, rhs_shifted + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: err_norm + INTEGER :: i + INTEGER :: k, kmin, kmax +! + NAMELIST /newrun/ nx, nidbas, ngauss, alpha, beta, mmode, npt +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + mmode = 1 ! Fourier mode + alpha = (1.0, 1.0) ! Complex "diffusion" + beta = 1.0 + npt = 100 +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + dx = 2.d0*pi/REAL(nx,8) + xgrid = (/ (i*dx,i=0,nx) /) + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL putarr(fid, '/xgrid', xgrid) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up periodic spline +! + CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) + nrank = nx ! Rank of the FE matrix + CALL get_dim(splx, dim) + WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim +! +! Init DFT + kmin = -nx/2 + kmax = nx/2-1 + CALL init_dft(splx, kmin, kmax) +! +! FE matrix assembly in Fourier space +! + ALLOCATE(mat(0:nx-1)) + CALL dismat(splx, mat, alpha, beta) + CALL putarr(fid, '/mat', mat) +! +! RHS assembly in real space +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(splx, rhs, mmode, alpha, beta) +!=========================================================================== +! 3.0 Solve the dicretized system +! +! Fourier solve +! + CALL putarr(fid, '/rhs', rhs) +! + CALL fourcol(rhs, 1) +! + ALLOCATE(rhs_shifted(kmin:kmax)) + ALLOCATE(sol_shifted(kmin:kmax)) + DO k=kmin,kmax + rhs_shifted(k) = rhs(MODULO(k+nx,nx)+1)/REAL(nx,8) + END DO + sol_shifted = rhs_shifted / mat + DO k=kmin,kmax + sol(MODULO(k+nx,nx)+1) = sol_shifted(k) + END DO +! + CALL putarr(fid, '/rhs_fft', rhs) + CALL putarr(fid, '/sol_fft', sol) +! +! Solution in real space +! + CALL fourcol(sol,-1) + CALL putarr(fid, '/sol', sol) +!=========================================================================== +! 4.0 Check solution +! +! Exact solution + ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt)) + dx=2.0d0*pi/REAL(npt,8) + x = (/ (i*dx, i=0,npt) /) + solana = COS(mmode*x) +! +! Prolongate solution using periodicity +! + ALLOCATE(bcoef(dim)) + bcoef(1:nrank) = sol(1:nrank) + DO i=nrank+1,dim + bcoef(i) = bcoef(MODULO(i-1,nrank)+1) + END DO +! +! Interpolate field +! + CALL gridval(splx, x, solcal, 0, bcoef) +! + err = ABS(solcal-solana) + CALL putarr(fid, '/x', x) + CALL putarr(fid, '/solana', solana) + CALL putarr(fid, '/solcal', solcal) + CALL putarr(fid, '/err', err) +! +! Compute discretization error norm by Gauss integration +! + err_norm=0.0 + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) + DO i=1,nx + CALL get_gauss(splx, ngauss, i, xgauss, wgauss) + CALL gridval(splx, xgauss, solcal(1:ngauss), 0) + solana(1:ngauss) = COS(mmode*xgauss) + err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), & + & solana(1:ngauss)-solcal(1:ngauss)) + err_norm = err_norm + SUM(wgauss*err(1:ngauss)) + END DO + err_norm = SQRT(err_norm) + WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm +!! +!=========================================================================== +! 9.0 Clean up +! + DEALLOCATE(xgrid) + DEALLOCATE(mat) + CALL destroy_sp(splx) + CALL closef(fid) +END PROGRAM main diff --git a/examples/pde1dp_cmpl_mumps.f90 b/examples/pde1dp_cmpl_mumps.f90 new file mode 100644 index 0000000..ab14f2a --- /dev/null +++ b/examples/pde1dp_cmpl_mumps.f90 @@ -0,0 +1,478 @@ +!> +!> @file pde1dp_cmpl_mumps.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE pde1dp_cmpl_mumps_mod + USE bsplines + USE mumps_bsplines + IMPLICIT NONE +! +CONTAINS + SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta) +! + TYPE(spline1d) :: spl + INTEGER, INTENT(in) :: mmode + DOUBLE COMPLEX, INTENT(in) :: alpha, beta + DOUBLE COMPLEX, INTENT(out) :: rhs(:) +! + INTEGER :: dim, nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, it, irow + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX :: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + nrank = SIZE(rhs) + CALL get_dim(spl, dim, nx, nidbas) +! + ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(:) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + contrib = wgauss(igauss) * rhseq(xgauss(igauss)) + DO it=0,nidbas + irow=MODULO(i+it-1,nx) + 1 ! Periodic BC + rhs(irow) = rhs(irow) + contrib*fun(it,1) + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +CONTAINS + DOUBLE COMPLEX FUNCTION rhseq(x) + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION :: arg + arg = mmode*x + rhseq = (mmode**2*alpha-beta)*COS(arg) + END FUNCTION rhseq +END SUBROUTINE disrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat, alpha, beta) +! + TYPE(spline1d) :: spl + TYPE(zmumps_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: alpha, beta +! + INTEGER :: dim, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE COMPLEX, ALLOCATABLE :: coefs(:) +! + INTEGER :: istart, iend +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, dim, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + istart = mat%istart + iend = mat%iend + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO iw=0,nidbas + irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC + IF( irow.GE.istart .AND. irow.LE.iend) THEN + DO jt=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + jcol=MODULO(i+jt-1,nx) + 1 + CALL updtmat(mat, irow, jcol, contrib) + END DO + END IF + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt)) +! + c(1) = alpha + idt(1) = 1 + idw(1) = 1 +! + c(2) = -beta + idt(2) = 0 + idw(2) = 0 + END SUBROUTINE coefeq +END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION norm2(x) +! +! Compute the 2-norm of complex array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE COMPLEX, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 +! + sum2 = DOT_PRODUCT(x,x) + norm2 = SQRT(sum2) + END FUNCTION norm2 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE meshdist(mmode, x) +! +! Construct a 1d non-equidistant mesh given a +! mesh distribution function. +! + INTEGER, INTENT(in) :: mmode + DOUBLE PRECISION, INTENT(inout) :: x(0:) + INTEGER :: nx, nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + nx = SIZE(x)-1 + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) + CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = 2.0 + COS(mmode*x) + END FUNCTION fdist + END SUBROUTINE meshdist +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde1dp_cmpl_mumps_mod +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! +PROGRAM main +! +! 1D complex PDE with periodic BC +! + USE pde1dp_cmpl_mumps_mod + USE futils +! + IMPLICIT NONE + INCLUDE 'mpif.h' + TYPE(spline1d) :: splx + TYPE(zmumps_mat) :: mat + TYPE(zmumps_mat) :: newmat + INTEGER :: kl, ku, nrank +! + CHARACTER(len=128) :: file='pde1dp_cmpl_mumps.h5' + INTEGER :: fid + INTEGER :: nx, nidbas, ngauss, mmode, npt, dim + LOGICAL :: nlequid + LOGICAL :: nlsym, nlherm, nlpos + DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 + DOUBLE PRECISION :: dx + DOUBLE COMPLEX :: alpha, beta + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: err_norm + INTEGER :: i + INTEGER :: ierr, me + INTEGER :: nzfact +! + NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, & + & nlsym, nlherm, nlpos +!=========================================================================== +! 1.0 Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE. + mmode = 1 ! Fourier mode + alpha = (1.0, 1.0) ! Complex "diffusion" + beta = 1.0 + npt = 100 + nlsym = .TRUE. ! Is matrice symmetric + nlherm = .FALSE. ! Is matrice hermitian + nlpos = .TRUE. ! and positive definite ? +! + IF(me.EQ.0) THEN + READ(*,newrun) + WRITE(*,newrun) + END IF + CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nidbas, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ngauss, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlequid, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(mmode, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(alpha, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(beta, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(npt, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlherm, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlpos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + dx = 2.d0*pi/REAL(nx,8) + xgrid = (/ (i*dx,i=0,nx) /) + IF( .NOT. nlequid ) THEN + CALL meshdist(mmode, xgrid) + END IF + IF(me.EQ.0) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid +! +! Create hdf5 file +! + IF(me.EQ.0) THEN + CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL putarr(fid, '/xgrid', xgrid) + END IF +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up periodic spline +! + CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) + CALL get_dim(splx, dim) + nrank = nx ! Rank of the FE matrix +! +! FE matrix assembly +! + CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) + WRITE(*,'(a,i4.4,a,3i6)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, & + & mat%iend-mat%istart+1 +! + CALL dismat(splx, mat, alpha, beta) +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(splx, rhs, mmode, alpha, beta) +! + IF(me.EQ.0) CALL putarr(fid, '/rhs', rhs) +! +! Factor and solve +! + CALL factor(mat, debug=.FALSE.) + CALL bsolve(mat, rhs, sol, debug=.FALSE.) +! + nzfact = mat%mumps_par%INFOG(29) + IF(nzfact<0) THEN + nzfact = -nzfact*1000000 + END IF + IF(me.EQ.0) THEN + CALL putarr(fid, '/sol', sol) + WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ',get_count(mat) + WRITE(*,'(a,i8)') 'Number of nonzeros in factors of A = ',nzfact +! +! Compute residue +! + WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs) + END IF +!=========================================================================== +! 3.0 Check solution +! + IF(me.EQ.0) THEN +! +! Exact solution + ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt)) + dx=2.0d0*pi/REAL(npt,8) + x = (/ (i*dx, i=0,npt) /) + solana = COS(mmode*x) +! +! Prolongate solution using periodicity +! + ALLOCATE(bcoef(dim)) + bcoef(1:nrank) = sol(1:nrank) + DO i=nrank+1,dim + bcoef(i) = bcoef(MODULO(i-1,nrank)+1) + END DO +! +! Interpolate field +! + CALL gridval(splx, x, solcal, 0, bcoef) +! + err = ABS(solcal-solana) + CALL putarr(fid, '/x', x) + CALL putarr(fid, '/solana', solana) + CALL putarr(fid, '/solcal', solcal) + CALL putarr(fid, '/err', err) +! +! Compute discretization error norm by Gauss integration +! + err_norm=0.0 + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) + DO i=1,nx + CALL get_gauss(splx, ngauss, i, xgauss, wgauss) + CALL gridval(splx, xgauss, solcal(1:ngauss), 0) + solana(1:ngauss) = COS(mmode*xgauss) + err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), & + & solana(1:ngauss)-solcal(1:ngauss)) + err_norm = err_norm + SUM(wgauss*err(1:ngauss)) + END DO + err_norm = SQRT(err_norm) + WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm + END IF +! +!=========================================================================== +! 4.0 Test of getrow/putrow, getcol/putcol and mcopy +! + CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) + ALLOCATE(arow(nrank), newsol(nrank)) +! + DO i=1,nrank + CALL getrow(mat, i, arow) + CALL putrow(newmat, i, arow) + END DO + CALL factor(newmat) + CALL bsolve(newmat, rhs, newsol) + WRITE(*,'(/a)') 'putrow/getrow ...' + WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) + WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) +! + DO i=1,nrank + CALL getcol(mat, i, arow) + CALL putcol(newmat, i, arow) + END DO + CALL factor(newmat) + CALL bsolve(newmat, rhs, newsol) + WRITE(*,'(/a)') 'putcol/getcol ...' + WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) + WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) +! + CALL clear_mat(newmat) + CALL mcopy(mat, newmat) + WRITE(*,'(/a)') 'mcopy ...' + newmat%val = (1000.0d0,0.0d0)*newmat%val + CALL factor(newmat) + CALL bsolve(newmat, rhs, newsol) + WRITE(*,'(a)') 'Backsolve the new system' + WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol) +! + WRITE(*,'(a)') 'Destroy NEWMAT ...' + CALL destroy(newmat) +! + CALL bsolve(mat, rhs, sol) + WRITE(*,'(/a)') 'Backsolve the old system' + WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol) +! + WRITE(*,'(a)') 'Destroy MAT ...' + CALL destroy(mat) +! +!!$ WRITE(*,'(/a)') 'Should crash since NEWMAT is gone!' +!!$ CALL bsolve(newmat, rhs, newsol) +!=========================================================================== +! 9.0 Clean up +! + IF(me.EQ.0) THEN + DEALLOCATE(x, solcal, solana, err) + DEALLOCATE(bcoef) + DEALLOCATE(xgauss, wgauss) + END IF + DEALLOCATE(xgrid) + DEALLOCATE(rhs, sol) + DEALLOCATE(arow, newsol) + CALL destroy_sp(splx) + IF(me.EQ.0) CALL closef(fid) + CALL mpi_finalize(ierr) +END PROGRAM main diff --git a/examples/pde1dp_cmpl_pardiso.f90 b/examples/pde1dp_cmpl_pardiso.f90 new file mode 100644 index 0000000..e3cf4a5 --- /dev/null +++ b/examples/pde1dp_cmpl_pardiso.f90 @@ -0,0 +1,457 @@ +!> +!> @file pde1dp_cmpl_pardiso.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE pde1dp_cmpl_pardiso_mod + USE bsplines + USE pardiso_bsplines + IMPLICIT NONE +! +CONTAINS + SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta) +! + TYPE(spline1d) :: spl + INTEGER, INTENT(in) :: mmode + DOUBLE COMPLEX, INTENT(in) :: alpha, beta + DOUBLE COMPLEX, INTENT(out) :: rhs(:) +! + INTEGER :: dim, nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, it, irow + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX :: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + nrank = SIZE(rhs) + CALL get_dim(spl, dim, nx, nidbas) +! + ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(:) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + contrib = wgauss(igauss) * rhseq(xgauss(igauss)) + DO it=0,nidbas + irow=MODULO(i+it-1,nx) + 1 ! Periodic BC + rhs(irow) = rhs(irow) + contrib*fun(it,1) + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +CONTAINS + DOUBLE COMPLEX FUNCTION rhseq(x) + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION :: arg + arg = mmode*x + rhseq = (mmode**2*alpha-beta)*COS(arg) + END FUNCTION rhseq +END SUBROUTINE disrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat, alpha, beta) +! + TYPE(spline1d) :: spl + TYPE(zpardiso_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: alpha, beta +! + INTEGER :: dim, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE COMPLEX, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, dim, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC + jcol=MODULO(i+jt-1,nx) + 1 + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt)) +! + c(1) = alpha + idt(1) = 1 + idw(1) = 1 +! + c(2) = -beta + idt(2) = 0 + idw(2) = 0 + END SUBROUTINE coefeq +END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION norm2(x) +! +! Compute the 2-norm of complex array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE COMPLEX, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 +! + sum2 = DOT_PRODUCT(x,x) + norm2 = SQRT(sum2) + END FUNCTION norm2 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE meshdist(mmode, x) +! +! Construct a 1d non-equidistant mesh given a +! mesh distribution function. +! + INTEGER, INTENT(in) :: mmode + DOUBLE PRECISION, INTENT(inout) :: x(0:) + INTEGER :: nx, nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + nx = SIZE(x)-1 + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) + CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = 2.0 + COS(mmode*x) + END FUNCTION fdist + END SUBROUTINE meshdist +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde1dp_cmpl_pardiso_mod +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! +PROGRAM main +! +! 1D complex PDE with periodic BC +! + USE pde1dp_cmpl_pardiso_mod + USE futils + USE conmat_mod +! + IMPLICIT NONE + TYPE(spline1d) :: splx + TYPE(zpardiso_mat) :: mat + TYPE(zpardiso_mat) :: newmat + INTEGER :: kl, ku, nrank +! + CHARACTER(len=128) :: file='pde1dp_cmpl_pardiso.h5' + INTEGER :: fid + INTEGER :: nx, nidbas, ngauss, mmode, npt, dim + LOGICAL :: nlequid + LOGICAL :: nlsym, nlherm, nlpos + DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 + DOUBLE PRECISION :: dx + DOUBLE COMPLEX :: alpha, beta + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: err_norm + INTEGER :: i +! + NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, & + & nlsym, nlherm, nlpos +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE. + mmode = 1 ! Fourier mode + alpha = (1.0, 1.0) ! Complex "diffusion" + beta = 1.0 + npt = 100 + nlsym = .TRUE. ! Is matrice symmetric + nlherm = .FALSE. ! Is matrice hermitian + nlpos = .TRUE. ! and positive definite ? +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + dx = 2.d0*pi/REAL(nx,8) + xgrid = (/ (i*dx,i=0,nx) /) + IF( .NOT. nlequid ) THEN + CALL meshdist(mmode, xgrid) + END IF + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL putarr(fid, '/xgrid', xgrid) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up periodic spline +! + CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) + WRITE(*,'(a,l6)') 'nlequid =', nlequid + nrank = nx ! Rank of the FE matrix +! +! FE matrix assembly +! + CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) + CALL get_dim(splx, dim) + WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim +!!$ CALL dismat(splx, mat, alpha, beta) + CALL conmat(splx, mat, coefeq) +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(splx, rhs, mmode, alpha, beta) +! + CALL putarr(fid, '/rhs', rhs) +! +! Factor and solve +! + WRITE(*,'(a/(10i6))') 'iparm', mat%p%iparm + CALL factor(mat) + CALL putmat(fid,'/MAT', mat) + CALL bsolve(mat, rhs, sol) + CALL putarr(fid, '/sol', sol) + WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ',get_count(mat) + WRITE(*,'(a,i8)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) +! +! Compute residue +! + WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs) +!=========================================================================== +! 3.0 Check solution +! +! Exact solution + ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt)) + dx=2.0d0*pi/REAL(npt,8) + x = (/ (i*dx, i=0,npt) /) + solana = COS(mmode*x) +! +! Prolongate solution using periodicity +! + ALLOCATE(bcoef(dim)) + bcoef(1:nrank) = sol(1:nrank) + DO i=nrank+1,dim + bcoef(i) = bcoef(MODULO(i-1,nrank)+1) + END DO +! +! Interpolate field +! + CALL gridval(splx, x, solcal, 0, bcoef) +! + err = ABS(solcal-solana) + CALL putarr(fid, '/x', x) + CALL putarr(fid, '/solana', solana) + CALL putarr(fid, '/solcal', solcal) + CALL putarr(fid, '/err', err) +! +! Compute discretization error norm by Gauss integration +! + err_norm=0.0 + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) + DO i=1,nx + CALL get_gauss(splx, ngauss, i, xgauss, wgauss) + CALL gridval(splx, xgauss, solcal(1:ngauss), 0) + solana(1:ngauss) = COS(mmode*xgauss) + err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), & + & solana(1:ngauss)-solcal(1:ngauss)) + err_norm = err_norm + SUM(wgauss*err(1:ngauss)) + END DO + err_norm = SQRT(err_norm) + WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm +! +!=========================================================================== +! 4.0 Test of getrow/putrow, getcol/putcol and mcopy +! + CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) + ALLOCATE(arow(nrank), newsol(nrank)) +! + DO i=1,nrank + CALL getrow(mat, i, arow) + CALL putrow(newmat, i, arow) + END DO + CALL factor(newmat) + CALL bsolve(newmat, rhs, newsol) + WRITE(*,'(/a)') 'putrow/getrow ...' + WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) + WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) +! + DO i=1,nrank + CALL getcol(mat, i, arow) + CALL putcol(newmat, i, arow) + END DO + CALL factor(newmat) + CALL bsolve(newmat, rhs, newsol) + WRITE(*,'(/a)') 'putcol/getcol ...' + WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) + WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) +! + CALL clear_mat(newmat) + CALL mcopy(mat, newmat) + WRITE(*,'(/a)') 'mcopy ...' + newmat%val = (1000.0d0,0.0d0)*newmat%val + CALL factor(newmat) + CALL bsolve(newmat, rhs, newsol) + WRITE(*,'(a)') 'Backsolve the new system' + WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol) +! + WRITE(*,'(a)') 'Destroy NEWMAT ...' + CALL destroy(newmat) +! + CALL bsolve(mat, rhs, sol) + WRITE(*,'(/a)') 'Backsolve the old system' + WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol) +! + WRITE(*,'(a)') 'Destroy MAT ...' + CALL destroy(mat) +!!$! +!!$ WRITE(*,'(/a)') 'Should crash since NEWMAT is gone!' +!!$ CALL bsolve(newmat, rhs, newsol) +!!$ WRITE(*,'(a)') 'Backsolve the new system' +!!$ WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol) +!=========================================================================== +! 9.0 Clean up +! + DEALLOCATE(x, solcal, solana, err) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(bcoef) + DEALLOCATE(xgrid) + DEALLOCATE(rhs, sol) + DEALLOCATE(arow, newsol) + CALL destroy_sp(splx) + CALL closef(fid) +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE COMPLEX, INTENT(out) :: c(:) +! + c(1) = alpha + idt(1) = 1 + idw(1) = 1 +! + c(2) = -beta + idt(2) = 0 + idw(2) = 0 + END SUBROUTINE coefeq +END PROGRAM main diff --git a/examples/pde1dp_cmpl_wsmp.f90 b/examples/pde1dp_cmpl_wsmp.f90 new file mode 100644 index 0000000..1760563 --- /dev/null +++ b/examples/pde1dp_cmpl_wsmp.f90 @@ -0,0 +1,436 @@ +!> +!> @file pde1dp_cmpl_wsmp.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE pde1dp_cmpl_wsmp_mod + USE bsplines + USE wsmp_bsplines + IMPLICIT NONE +! +CONTAINS + SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta) +! + TYPE(spline1d) :: spl + INTEGER, INTENT(in) :: mmode + DOUBLE COMPLEX, INTENT(in) :: alpha, beta + DOUBLE COMPLEX, INTENT(out) :: rhs(:) +! + INTEGER :: dim, nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, it, irow + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX :: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + nrank = SIZE(rhs) + CALL get_dim(spl, dim, nx, nidbas) +! + ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(:) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + contrib = wgauss(igauss) * rhseq(xgauss(igauss)) + DO it=0,nidbas + irow=MODULO(i+it-1,nx) + 1 ! Periodic BC + rhs(irow) = rhs(irow) + contrib*fun(it,1) + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) +CONTAINS + DOUBLE COMPLEX FUNCTION rhseq(x) + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION :: arg + arg = mmode*x + rhseq = (mmode**2*alpha-beta)*COS(arg) + END FUNCTION rhseq +END SUBROUTINE disrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat, alpha, beta) +! + TYPE(spline1d) :: spl + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: alpha, beta +! + INTEGER :: dim, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE COMPLEX, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, dim, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC + jcol=MODULO(i+jt-1,nx) + 1 + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) +! +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt)) +! + c(1) = alpha + idt(1) = 1 + idw(1) = 1 +! + c(2) = -beta + idt(2) = 0 + idw(2) = 0 + END SUBROUTINE coefeq +END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION norm2(x) +! +! Compute the 2-norm of complex array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE COMPLEX, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2 +! + sum2 = DOT_PRODUCT(x,x) + norm2 = SQRT(sum2) + END FUNCTION norm2 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE meshdist(mmode, x) +! +! Construct a 1d non-equidistant mesh given a +! mesh distribution function. +! + INTEGER, INTENT(in) :: mmode + DOUBLE PRECISION, INTENT(inout) :: x(0:) + INTEGER :: nx, nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + nx = SIZE(x)-1 + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) + CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = 2.0 + COS(mmode*x) + END FUNCTION fdist + END SUBROUTINE meshdist +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde1dp_cmpl_wsmp_mod +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! +PROGRAM main +! +! 1D complex PDE with periodic BC +! + USE pde1dp_cmpl_wsmp_mod + USE futils +! + IMPLICIT NONE + TYPE(spline1d) :: splx + TYPE(zwsmp_mat) :: mat + TYPE(zwsmp_mat) :: newmat + INTEGER :: kl, ku, nrank +! + CHARACTER(len=128) :: file='pde1dp_cmpl_wsmp.h5' + INTEGER :: fid + INTEGER :: nx, nidbas, ngauss, mmode, npt, dim + LOGICAL :: nlequid + LOGICAL :: nlsym, nlherm, nlpos + DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 + DOUBLE PRECISION :: dx + DOUBLE COMPLEX :: alpha, beta + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: err_norm + INTEGER :: i +! + NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, & + & nlsym, nlherm, nlpos +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + nlequid = .TRUE. ! Use exact sol. as mesh dist. function if .FALSE. + mmode = 1 ! Fourier mode + alpha = (1.0, 1.0) ! Complex "diffusion" + beta = 1.0 + npt = 100 + nlsym = .TRUE. ! Is matrice symmetric + nlherm = .FALSE. ! Is matrice hermitian + nlpos = .TRUE. ! and positive definite ? +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + dx = 2.d0*pi/REAL(nx,8) + xgrid = (/ (i*dx,i=0,nx) /) + IF( .NOT. nlequid ) THEN + CALL meshdist(mmode, xgrid) + END IF + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL putarr(fid, '/xgrid', xgrid) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up periodic spline +! + CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.) + WRITE(*,'(a,l6)') 'nlequid =', nlequid + nrank = nx ! Rank of the FE matrix +! +! FE matrix assembly +! + CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) + CALL get_dim(splx, dim) + WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim + CALL dismat(splx, mat, alpha, beta) +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(splx, rhs, mmode, alpha, beta) +! + CALL putarr(fid, '/rhs', rhs) +! +! Factor and solve +! +!!$ CALL factor(mat, nlmetis=.TRUE.) + CALL factor(mat) + CALL putmat(fid,'/MAT', mat) + CALL bsolve(mat, rhs, sol) + CALL putarr(fid, '/sol', sol) + WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ',get_count(mat) + WRITE(*,'(a,i8)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) +! +! Compute residue +! + WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs) +!=========================================================================== +! 3.0 Check solution +! +! Exact solution + ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt)) + dx=2.0d0*pi/REAL(npt,8) + x = (/ (i*dx, i=0,npt) /) + solana = COS(mmode*x) +! +! Prolongate solution using periodicity +! + ALLOCATE(bcoef(dim)) + bcoef(1:nrank) = sol(1:nrank) + DO i=nrank+1,dim + bcoef(i) = bcoef(MODULO(i-1,nrank)+1) + END DO +! +! Interpolate field +! + CALL gridval(splx, x, solcal, 0, bcoef) +! + err = ABS(solcal-solana) + CALL putarr(fid, '/x', x) + CALL putarr(fid, '/solana', solana) + CALL putarr(fid, '/solcal', solcal) + CALL putarr(fid, '/err', err) +! +! Compute discretization error norm by Gauss integration +! + err_norm=0.0 + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) + DO i=1,nx + CALL get_gauss(splx, ngauss, i, xgauss, wgauss) + CALL gridval(splx, xgauss, solcal(1:ngauss), 0) + solana(1:ngauss) = COS(mmode*xgauss) + err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), & + & solana(1:ngauss)-solcal(1:ngauss)) + err_norm = err_norm + SUM(wgauss*err(1:ngauss)) + END DO + err_norm = SQRT(err_norm) + WRITE(*,'(a,1pe12.3)') 'Discretization error ', err_norm +! +!=========================================================================== +! 4.0 Test of getrow/putrow, getcol/putcol and mcopy +! + CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos) + ALLOCATE(arow(nrank), newsol(nrank)) +! + DO i=1,nrank + CALL getrow(mat, i, arow) + CALL putrow(newmat, i, arow) + END DO + CALL factor(newmat) + CALL bsolve(newmat, rhs, newsol) + WRITE(*,'(/a)') 'putrow/getrow ...' + WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) + WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) +! + DO i=1,nrank + CALL getcol(mat, i, arow) + CALL putcol(newmat, i, arow) + END DO + CALL factor(newmat) + CALL bsolve(newmat, rhs, newsol) + WRITE(*,'(/a)') 'putcol/getcol ...' + WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs) + WRITE(*,'(a,1pe12.3)') 'Error ', norm2(sol-newsol) +! + CALL clear_mat(newmat) + CALL mcopy(mat, newmat) + WRITE(*,'(/a)') 'mcopy ...' + newmat%val = (1000.0d0,0.0d0)*newmat%val + CALL factor(newmat) + CALL bsolve(newmat, rhs, newsol) + WRITE(*,'(a, i3)') 'Backsolve the system', newmat%matid + WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol) +! + CALL bsolve(mat, rhs, sol) + WRITE(*,'(a, i3)') 'Backsolve the system', mat%matid + WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol) +! + CALL bsolve(newmat, rhs, newsol) + WRITE(*,'(a, i3)') 'Backsolve the system', newmat%matid + WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol) +!=========================================================================== +! 9.0 Clean up +! + DEALLOCATE(x, solcal, solana, err) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(bcoef) + DEALLOCATE(xgrid) + DEALLOCATE(rhs, sol) + DEALLOCATE(arow, newsol) + CALL destroy(mat) + CALL destroy(newmat) + CALL destroy_sp(splx) + CALL closef(fid) +END PROGRAM main diff --git a/examples/pde1dp_mod.f90 b/examples/pde1dp_mod.f90 new file mode 100644 index 0000000..0a508ac --- /dev/null +++ b/examples/pde1dp_mod.f90 @@ -0,0 +1,225 @@ +!> +!> @file pde1dp_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE pde1dp_mod +! + USE bsplines + USE matrix + IMPLICIT NONE + DOUBLE PRECISION, ALLOCATABLE :: bcoef(:) + TYPE(spline1d), SAVE :: splx +! +CONTAINS + SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function defined in FDIST +! + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) + CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist + END SUBROUTINE meshdist +!+++ + SUBROUTINE dismat(spl, mat) +! +! Assembly FE matrix (with periodic BC) mat using spline spl +! + TYPE(spline1d), INTENT(in) :: spl + TYPE(periodic_mat), INTENT(inout) :: mat + INTEGER :: dim, nx, nidbas, ngauss + INTEGER :: i, igauss, iterm, iw, jt, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, dim, nx, nidbas) + ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative +! +! Weak form +! + kterms = mat%mat%nterms + ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms)) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + CALL coefeq(xgauss(igauss), idert, iderw, coefs) + DO iterm=1,kterms + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,idert(iterm)) * coefs(iterm) * & + & fun(iw,iderw(iterm)) * wgauss(igauss) + irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC + jcol=MODULO(i+jt-1,nx) + 1 + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + DEALLOCATE(iderw, idert, coefs) + END SUBROUTINE dismat +! + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt)) +! +! Mass matrix +! + c(1) = 1.0d0 + idt(1) = 0 + idw(1) = 0 + END SUBROUTINE coefeq +!+++ + SUBROUTINE disrhs(spl, rhs) +! +! Assenbly the RHS using spline spl +! + TYPE(spline1d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: dim, nrank, nx, nidbas, ngauss + INTEGER :: i, igauss, it, irow + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + nrank = SIZE(rhs) + CALL get_dim(spl, dim, nx, nidbas) +! + ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + rhs(:) = 0.0d0 +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + contrib = wgauss(igauss) * rhseq(xgauss(igauss)) + DO it=0,nidbas + irow=MODULO(i+it-1,nx) + 1 ! Periodic BC + rhs(irow) = rhs(irow) + contrib*fun(it,1) + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + END SUBROUTINE disrhs +! +DOUBLE PRECISION FUNCTION rhseq(x) + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION :: xarr(1), farr(1) + INTEGER, SAVE :: icall =0 + xarr(1) = x + IF( icall.EQ.0 ) THEN + icall = icall+1 + CALL gridval(splx, xarr, farr, 0, bcoef) + ELSE + CALL gridval(splx, xarr, farr, 0) + END IF + rhseq = farr(1) +END FUNCTION rhseq + +END MODULE pde1dp_mod diff --git a/examples/pde2d.f90 b/examples/pde2d.f90 new file mode 100644 index 0000000..b881711 --- /dev/null +++ b/examples/pde2d.f90 @@ -0,0 +1,409 @@ +!> +!> @file pde2d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 2d PDE using splines: +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! + USE bsplines + USE matrix + USE futils + USE conmat_mod +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + LOGICAL :: nlppform, nlconmat + INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + TYPE(spline2d) :: splxy + TYPE(gbmat) :: mat +! + CHARACTER(len=128) :: file='pde2d.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + INTEGER :: nits=500 +! + INTERFACE + SUBROUTINE dismat(spl, mat) + USE bsplines + USE matrix + TYPE(spline2d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + END SUBROUTINE dismat + SUBROUTINE disrhs(mbess, spl, rhs) + USE bsplines + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + END SUBROUTINE disrhs + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + SUBROUTINE ibcmat(mat, ny) + USE matrix + TYPE(gbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + END SUBROUTINE ibcmat + SUBROUTINE ibcrhs(rhs, ny) + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + END SUBROUTINE ibcrhs +!!$ SUBROUTINE coefeq_poisson(x, y, idt, idw, c) +!!$ DOUBLE PRECISION, INTENT(in) :: x, y +!!$ INTEGER, INTENT(out) :: idt(:,:), idw(:,:) +!!$ DOUBLE PRECISION, INTENT(out) :: c(:) +!!$ END SUBROUTINE coefeq_poisson + END INTERFACE +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlconmat, & + & coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlconmat = .TRUE. ! Use CONMAT instead of DISMAT + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals + ku = kl ! Number of super-diagnonals + WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku +! + CALL init(kl, ku, nrank, nterms, mat) + t0 = seconds() + IF(nlconmat) THEN + CALL conmat(splxy, mat, coefeq_poisson) + ELSE + CALL dismat(splxy, mat) + END IF + tmat = seconds() - t0 + CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice') + ALLOCATE(arr(nrank)) +! +! BC on Matrix +! + IF(nrank.LT.100) & + & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(kl+ku+1,:) + CALL ibcmat(mat, ny) + IF(nrank.LT.100) & + & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(kl+ku+1,:) +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, ny) + + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + CALL putmat(fid, '/MAT1', mat, 'GB matrice with BC') + WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL factor(mat) + tfact = seconds() - t0 + gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 + + t0 = seconds() + CALL bsolve(mat, rhs, sol) +! +! Backtransform of solution +! + sol(1:ny-1) = sol(ny) +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) +! + tsolv = seconds() - t0 + gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +!=========================================================================== +! 9.0 Epilogue +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv + WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + CALL destroy_sp(splxy) + CALL destroy(mat) +! + CALL closef(fid) +!=========================================================================== +! +CONTAINS + SUBROUTINE coefeq_poisson(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq_poisson +! +!+++ + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/pde2d_mumps.f90 b/examples/pde2d_mumps.f90 new file mode 100644 index 0000000..772fce2 --- /dev/null +++ b/examples/pde2d_mumps.f90 @@ -0,0 +1,937 @@ +!> +!> @file pde2d_mumps.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Solving the 2d PDE using splines and MUMPS non-symmetric and symmetric +! matrix +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! +MODULE pde2d_mumps_mod + USE bsplines + USE mumps_bsplines + IMPLICIT NONE +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(mumps_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +! + INTEGER :: istart, iend +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +!!$ WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 +!!$ WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! +! +! Matrix partition +! + istart = mat%istart + iend = mat%iend +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + left2 = j + CALL basfun(xg2, spl%sp2, fun2, left2) +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + IF( irow.GE.istart .AND. irow.LE.iend) THEN + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 +!------------- + contrib = 0.0d0 + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,ig1,ig2) * & + & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) + END DO + END DO + END DO + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END IF + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 + ! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq + END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(mumps_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + nrank = mat%rank +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + ALLOCATE(zsum(nrank), arr(nrank)) + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + zsum(:) = zsum(:) + arr(:) + END DO + IF(mat%nlsym) THEN + zsum(ny) = SUM(zsum(1:ny)) ! using symmetry + END IF + CALL putrow(mat, ny, zsum) +! +! The horizontal sum on the NY-th column +! + IF( .NOT.mat%nlsym) THEN + zsum = 0.0d0 + DO j=1,ny + arr = 0.0d0 + CALL getcol(mat, j, arr) + zsum(ny:) = zsum(ny:) + arr(ny:) + END DO + CALL putcol(mat, ny, zsum) + END IF +! +! The away operator +! + IF( .NOT.mat%nlsym) THEN + DO j = 1,ny-1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO + END IF +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO + DEALLOCATE(zsum) + DEALLOCATE(arr) +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + ALLOCATE(arr(nrank)) + IF( .NOT.mat%nlsym) THEN + DO j = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO + END IF +! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO + DEALLOCATE(arr) +!=========================================================================== +! 9.0 Epilogue +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE write_matrix(lun, mat, comm) +! +! Write the distribute matrix to (single) file +! + INCLUDE 'mpif.h' +! + INTEGER :: lun + TYPE(mumps_mat) :: mat + INTEGER, INTENT(in) :: comm +! + INTEGER :: nprocs, me, ierr + INTEGER :: nrank, nnz, nnz_loc, istart, iend, nloc + INTEGER :: i + INTEGER, ALLOCATABLE :: displs(:), nlocs(:), cols(:), irow(:) + DOUBLE PRECISION, ALLOCATABLE :: val(:) +! + CALL mpi_comm_size(comm, nprocs, ierr) + CALL mpi_comm_rank(comm, me, ierr) +! + IF(.NOT.ASSOCIATED(mat%val)) THEN + WRITE(*,'(a)') 'WRITE_MATRIX: MUMPS matrix does not exist!' + STOP + END IF +! +! Info on matrix +! +!!$ IF(me.EQ.0) THEN +!!$ s0 = mat%nnz_start-1 +!!$ DO i=mat%istart,mat%iend +!!$ s=mat%irow(i)-s0 +!!$ e=mat%irow(i+1)-1-s0 +!!$ WRITE(*,'(a,i6,1pe12.3)') 'nnz, Sum(val)', e-s+1, SUM(mat%val(s:e)) +!!$ END DO +!!$ END IF +! + nrank = mat%rank + nnz_loc = mat%nnz_loc + nnz = mat%nnz + istart = mat%istart + iend = mat%iend +! + IF(me.EQ.0) THEN + WRITE(lun) nrank, nnz + END IF +! +! Write irow +! + nloc = iend-istart+1 + IF (me.EQ.0) THEN + ALLOCATE(displs(0:nprocs)) + ALLOCATE(nlocs(0:nprocs-1)) + ALLOCATE(irow(nrank+1)) + END IF + CALL mpi_gather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, 0, comm, ierr) + IF(me.EQ.0) THEN + displs(0) = 0 + DO i=0,nprocs-1 + displs(i+1) = displs(i)+nlocs(i) + END DO + END IF + CALL mpi_gatherv(mat%irow, nloc, MPI_INTEGER, & + & irow, nlocs, displs, MPI_INTEGER, 0, comm, ierr) + IF(me.EQ.0) THEN + irow(nrank+1) = nnz+1 + WRITE(lun) irow + DEALLOCATE(irow) + END IF +! +! Write cols +! + nloc = mat%nnz_loc + IF(me.EQ.0) THEN + ALLOCATE(cols(nnz)) + END IF + CALL mpi_gather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, 0, comm, ierr) + IF(me.EQ.0) THEN + displs(0) = 0 + DO i=0,nprocs-1 + displs(i+1) = displs(i)+nlocs(i) + END DO + END IF + CALL mpi_gatherv(mat%cols, nloc, MPI_INTEGER, & + & cols, nlocs, displs, MPI_INTEGER, 0, comm, ierr) + IF(me.EQ.0) THEN + WRITE(lun) cols + DEALLOCATE(cols) + END IF +! +! Write val (Same data partition as "cols" +! + IF(me.EQ.0) THEN + ALLOCATE(val(nnz)) + END IF + CALL mpi_gatherv(mat%val, nloc, MPI_DOUBLE_PRECISION, & + & val, nlocs, displs, MPI_DOUBLE_PRECISION, 0, comm, ierr) + IF(me.EQ.0) THEN + WRITE(lun) val + DEALLOCATE(val) + END IF +! +! Epilogue +! + IF(me.EQ.0) THEN + DEALLOCATE(displs, nlocs) + END IF + END SUBROUTINE write_matrix +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_mumps_mod +PROGRAM main + USE pde2d_mumps_mod + USE futils +! + IMPLICIT NONE + INCLUDE 'mpif.h' + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + LOGICAL :: debug_mumps=.FALSE. + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol + TYPE(spline2d) :: splxy + TYPE(mumps_mat) :: mat +! + CHARACTER(len=128) :: file='pde2d_mumps.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1 + DOUBLE PRECISION :: tconv, treord + INTEGER :: nits=100 + LOGICAL :: nlsym, nlpos + LOGICAL :: nlmetis, nlforce_zero + LOGICAL :: nlserial +! + INTEGER :: ierr, me + INTEGER(kind=8) :: nzfact + DOUBLE PRECISION :: mem_loc +! + CHARACTER(len=128) :: matfile='' +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlsym, nlpos,& + & nlmetis, nlforce_zero, nlserial, coefx, coefy, matfile, & + & debug_mumps +!=========================================================================== +! 1.0 Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlsym = .FALSE. ! Symmetric or unsymmetric matrix + nlpos = .TRUE. ! Positive definite matrix + nlmetis = .FALSE. ! Use metis ordering or minimum degree + nlserial = .TRUE. ! Serial. The solver is duplicated on each process. Otherwise + ! the solver matrix is partionned among the processes. + nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + matfile = '' ! Save matrix file to matfile if not empty +! + IF(me.EQ.0) THEN + READ(*,newrun) + WRITE(*,newrun) + END IF + CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlpos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlmetis, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(debug_mumps, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlserial, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! +! Create hdf5 file +! + IF(me.EQ.0) THEN + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) + END IF +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + IF(me.EQ.0) WRITE(*,'(a,i8)') 'nrank', nrank +! + IF(nlserial) THEN ! The solver is duplicated + CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero,& + & nlsym=nlsym, nlpos=nlpos) + ELSE ! The solver is distributed + CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero,& + & nlsym=nlsym, nlpos=nlpos, comm_in=MPI_COMM_WORLD) + END IF + mat%mumps_par%ICNTL(23) = 400 + IF(me.EQ.0) THEN + WRITE(*,'(a/(20i6))') 'ICNTL =', mat%mumps_par%ICNTL + END IF + WRITE(*,'(a,i4.4,a,3i16)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, & + & mat%iend-mat%istart+1 +! + CALL dismat(splxy, mat) +! +! BC on Matrix +! + CALL ibcmat(mat, ny) + tmat = seconds() - t0 +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, ny) +! + mem_loc = mem() + CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT') + IF(me.EQ.0) THEN + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + END IF +!=========================================================================== +! 3.0 Solve the dicretized system +! + + t0 = seconds() + CALL to_mat(mat) + WRITE(*,'(a/(10i6))') 'MInmax IRN_loc', MINVAL(mat%mumps_par%IRN_loc), MAXVAL(mat%mumps_par%IRN_loc) + WRITE(*,'(a/(10i6))') 'JCN_loc', MINVAL(mat%mumps_par%JCN_loc), MAXVAL(mat%mumps_par%JCN_loc) + tconv = seconds() -t0 + CALL minmax_i(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz') + IF(me.EQ.0) THEN + WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz + END IF +! +! Write Matrix and RHS to file +! + IF(LEN_TRIM(matfile).GT.0) THEN + IF(me.EQ.0) THEN + OPEN(99, file=matfile, form='unformatted') + END IF + CALL write_matrix(99, mat, MPI_COMM_WORLD) + END IF +! + t0 = seconds() + CALL reord_mat(mat, nlmetis=nlmetis, debug=debug_mumps) + treord = seconds() - t0 +! + t0 = seconds() + CALL numfact(mat, debug=debug_mumps) + tfact = seconds() - t0 +! +! + mem_loc = mem() + CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after FACTOR') + IF(me.EQ.0) THEN + nzfact = mat%mumps_par%INFOG(29) + IF(nzfact<0) THEN + nzfact = -nzfact*1000000 + END IF + WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',nzfact + WRITE(*,'(a,f12.2)') 'Number of factorization MFLOPS = ',& + & mat%mumps_par%RINFOG(3)/1.e6 + END IF + gflops1 = mat%mumps_par%RINFOG(3) / tfact / 1.d9 +! + CALL bsolve(mat, rhs, sol, debug=debug_mumps) +! + IF(LEN_TRIM(matfile).GT.0) THEN + IF(me.EQ.0) THEN + WRITE(99) rhs + WRITE(99) sol + CLOSE(99) + END IF + END IF +! + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL bsolve(mat, rhs, sol) + sol(1:ny-1) = sol(ny) + END DO + tsolv = (seconds() - t0)/REAL(nits) +! + mem_loc = mem() + CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE') +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') + END IF +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + IF(me.EQ.0) THEN + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv + WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 + DEALLOCATE(solcal, solana, errsol) + END IF +!=========================================================================== +! 5.0 Clear the matrix and recompute +! + IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...' + t0 = seconds() + CALL clear_mat(mat) + CALL dismat(splxy, mat) + CALL ibcmat(mat, ny) + tmat = seconds()-t0 +! + t0 = seconds() + CALL numfact(mat, debug=debug_mumps) + tfact = seconds()-t0 + gflops1 = mat%mumps_par%RINFOG(3) / tfact / 1.d9 +! + t0 = seconds() + ALLOCATE(newsol(nrank)) + CALL bsolve(mat, rhs, newsol) + newsol(1:ny-1) = newsol(ny) + tsolv = seconds()-t0 +! + IF(me.EQ.0) THEN + WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 + END IF +! + DEALLOCATE(newsol) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(bcoef) + CALL destroy_sp(splxy) + CALL destroy(mat) +! + IF(me.EQ.0) CALL closef(fid) + CALL mpi_finalize(ierr) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +! + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +! + SUBROUTINE minmax_i(k, comm, str) + CHARACTER(len=*), INTENT(in) :: str + INTEGER, INTENT(in) :: k + INTEGER, INTENT(in) :: comm + INTEGER :: me, ierr, kmin, kmax + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr) + CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax + END IF + END SUBROUTINE minmax_i +! + SUBROUTINE minmax_r(x, comm, str) + CHARACTER(len=*), INTENT(in) :: str + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: comm + INTEGER :: me, ierr + DOUBLE PRECISION :: xmin, xmax + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr) + CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax + END IF + END SUBROUTINE minmax_r +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/pde2d_nh.f90 b/examples/pde2d_nh.f90 new file mode 100644 index 0000000..ee5a008 --- /dev/null +++ b/examples/pde2d_nh.f90 @@ -0,0 +1,684 @@ +!> +!> @file pde2d_nh.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Solving the following 2d PDE using splines: +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), +! with BC: f(x=1,y) = cos(y) +! +! Exact solution: f(x,y) = (1-x^2) x^m cos(my) + x*cos(y) +! +MODULE pde2d_nh_mod + USE bsplines + USE matrix + IMPLICIT NONE +! + LOGICAL :: nlfix +CONTAINS + SUBROUTINE dismat(spl, mat) + ! + ! Assembly of FE matrix mat using spline spl + ! + TYPE(spline2d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat + ! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib + ! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form + !=========================================================================== + ! 1.0 Prologue + ! + ! Properties of spline space + ! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 + ! + ! Weak form + ! + kterms = mat%nterms + ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms)) + ! + ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1)) ! + ! + ! Gauss quadature + ! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng2), wg2(ng2)) + !=========================================================================== + ! 2.0 Assembly loop + ! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs) + DO iterm=1,kterms + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 + contrib = fun1(iw1,iderw(iterm,1)) * & + & fun2(iw2,iderw(iterm,2)) * & + & coefs(iterm) * & + & fun2(it2,idert(iterm,2)) * & + & fun1(it1,idert(iterm,1)) * & + & wg1(ig1) * wg2(ig2) + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + !=========================================================================== + ! 9.0 Epilogue + ! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + ! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + IMPLICIT NONE + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2 + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng2), wg2(ng2)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq + END SUBROUTINE disrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, spl) +! +! Apply BC on matrix +! + IMPLICIT NONE + TYPE(gbmat), INTENT(inout) :: mat + TYPE(spline2d) :: spl + INTEGER :: nx, ndim1, nidbas1 + INTEGER :: ny, ndim2, nidbas2 + INTEGER :: kl, ku, nrank, i, j + INTEGER :: krow, kcol, jf + DOUBLE PRECISION :: yg + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:), fun(:,:) +!=========================================================================== +! 1.0 Prologue +! + CALL get_dim(spl%sp1, ndim1, nx, nidbas1) + CALL get_dim(spl%sp2, ndim2, ny, nidbas2) +! + kl = mat%kl + ku = mat%ku + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) + ALLOCATE(fun(0:nidbas2,1)) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + DO j=1,ny+ku + zsum(j) = zsum(j) + arr(j) + END DO + END DO + CALL putrow(mat, ny, zsum) +! +! The horizontal sum on the NY-th column +! + zsum = 0.0d0 + DO j=1,ny + arr = 0.0d0 + CALL getcol(mat, j, arr) + DO i=ny,ny+kl + zsum(i) = zsum(i) + arr(i) + END DO + END DO + CALL putcol(mat, ny, zsum) +! +! The away operator +! + DO j = 1,ny-1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + i=nx+nidbas1 ! The last spline in X + DO j=1,ny + krow=(i-1)*ny+j + IF(MODULO(nidbas2,2) .EQ. 0 .AND. nlfix) THEN + yg = (spl%sp2%knots(j-1)+spl%sp2%knots(j))/2.0d0 + ELSE + yg = spl%sp2%knots(j-1) + END IF + CALL basfun(yg, spl%sp2, fun, j) + arr = 0.0d0 + DO jf=0,nidbas2 + kcol=(i-1)*ny + MODULO(jf+j-1,ny)+1 + arr(kcol) = arr(kcol)+fun(jf,1) + END DO + CALL putrow(mat, krow, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) + DEALLOCATE(fun) +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, spl) +! +! Apply BC on RHS +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + TYPE(spline2d) :: spl + INTEGER :: nx, ndim1, nidbas1 + INTEGER :: ny, ndim2, nidbas2 + INTEGER :: nrank + INTEGER :: i, j, k + DOUBLE PRECISION :: xg, yg, zsum +!=========================================================================== +! 1.0 Prologue +! + CALL get_dim(spl%sp1, ndim1, nx, nidbas1) + CALL get_dim(spl%sp2, ndim2, ny, nidbas2) + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + i = nx+nidbas1 ! The last spline index on x + xg = spl%sp1%knots(nx) ! Right boundary radial coordinate + DO j=1,ny + k = (i-1)*ny + j + IF(MODULO(nidbas2,2) .EQ. 0 .AND. nlfix) THEN + yg = (spl%sp2%knots(j-1)+spl%sp2%knots(j))/2.0d0 + ELSE + yg = spl%sp2%knots(j-1) + END IF + rhs(k) = xg*COS(yg) + END DO + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO + DEALLOCATE(xint, fint) + CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist + END SUBROUTINE meshdist +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_nh_mod +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +PROGRAM main +! + USE pde2d_nh_mod + USE bsplines + USE matrix + USE futils +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + TYPE(spline2d) :: splxy + TYPE(gbmat) :: mat +! + CHARACTER(len=128) :: file='pde2d_nh.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + INTEGER :: nits=500 +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, coefx, coefy, nlfix +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + nlfix = .TRUE. ! Fix or not for even nidbas2 +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals + ku = kl ! Number of super-diagnonals + WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku +! + CALL init(kl, ku, nrank, nterms, mat) + CALL dismat(splxy, mat) + CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice') + ALLOCATE(arr(nrank)) +! +! BC on Matrix +! + IF(nrank.LT.100) & + & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(kl+ku+1,:) + CALL ibcmat(mat, splxy) + tmat = seconds() - t0 + IF(nrank.LT.100) & + & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(kl+ku+1,:) +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, splxy) + + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + CALL putmat(fid, '/MAT1', mat, 'GB matrice with BC') + WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL factor(mat) + tfact = seconds() - t0 + gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 + + t0 = seconds() + CALL bsolve(mat, rhs, sol) +! +! Backtransform of solution +! + sol(1:ny-1) = sol(ny) +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) +! + tsolv = seconds() - t0 + gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) & + & + xgrid(i)*COS(ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) & + & + COS(ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + CALL putarr(fid, '/errors_x', errsol, 'Errors in d/dx') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * & + & SIN(mbess*ygrid(j)) & + & -xgrid(i)*SIN(ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + CALL putarr(fid, '/errors_y', errsol, 'Errors in d/dy') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +!=========================================================================== +! 9.0 Epilogue +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv + WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + CALL destroy_sp(splxy) + CALL destroy(mat) +! + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +!+++ diff --git a/examples/pde2d_pardiso.f90 b/examples/pde2d_pardiso.f90 new file mode 100644 index 0000000..276f727 --- /dev/null +++ b/examples/pde2d_pardiso.f90 @@ -0,0 +1,741 @@ +!> +!> @file pde2d_pardiso.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Solving the 2d PDE using splines and PARDISO non-symmetric matrix +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! +MODULE pde2d_pardiso_mod + USE bsplines + USE pardiso_bsplines + IMPLICIT NONE +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(pardiso_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + left2 = j + CALL basfun(xg2, spl%sp2, fun2, left2) +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 +!------------- + contrib = 0.0d0 + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,ig1,ig2) * & + & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) + END DO + END DO + END DO + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 + ! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq + END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(pardiso_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + zsum(:) = zsum(:) + arr(:) + END DO + CALL putrow(mat, ny, zsum) +! +! The horizontal sum on the NY-th column +! + zsum = 0.0d0 + DO j=1,ny + arr = 0.0d0 + CALL getcol(mat, j, arr) + zsum(ny:) = zsum(ny:) + arr(ny:) + END DO + CALL putcol(mat, ny, zsum) +! +! The away operator +! + DO j = 1,ny-1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO j = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE coefeq_poisson(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq_poisson +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_pardiso_mod +PROGRAM main + USE pde2d_pardiso_mod + USE futils + USE conmat_mod +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + LOGICAL :: nlppform, nlconmat + INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol + TYPE(spline2d) :: splxy + TYPE(pardiso_mat) :: mat +! + CHARACTER(len=128) :: file='pde2d_pardiso.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + DOUBLE PRECISION :: tconv, treord + INTEGER :: nits=100 + LOGICAL :: nlmetis, nlforce_zero +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, & + & nlforce_zero, nlconmat, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlmetis = .FALSE. ! Use metis ordering or minimum degree + nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele + nlconmat = .TRUE. ! Use CONMAT instead of DISMAT + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + WRITE(*,'(a,i8)') 'nrank', nrank +! + CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero) + t0 = seconds() + IF(nlconmat) THEN + CALL conmat(splxy, mat, coefeq_poisson) + ELSE + CALL dismat(splxy, mat) + END IF + tmat = seconds() - t0 + ALLOCATE(arr(nrank)) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr + END IF +! +! BC on Matrix +! + WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero + WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + CALL ibcmat(mat, ny) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr + WRITE(*,'(a)') 'Last rows' + DO i=nrank-ny,nrank + CALL getrow(mat, i, arr) + WRITE(*,'(10(1pe12.3))') arr + END DO + END IF +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, ny) +! + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL to_mat(mat) + tconv = seconds() -t0 + WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) + WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() +! + t0 = seconds() + CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.) + CALL putmat(fid, '/MAT', mat) + treord = seconds() - t0 +! + t0 = seconds() + CALL numfact(mat, debug=.FALSE.) + tfact = seconds() - t0 + + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() + WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) + WRITE(*,'(a,i12)') 'Number of factorization MFLOPS = ',mat%p%iparm(19) + gflops1 = mat%p%iparm(19) / tfact / 1.d3 +! + CALL bsolve(mat, rhs, sol, debug=.FALSE.) + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL bsolve(mat, rhs, sol) + sol(1:ny-1) = sol(ny) + END DO + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() + tsolv = (seconds() - t0)/REAL(nits) +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv + WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +!=========================================================================== +! 5.0 Clear the matrix and recompute +! + WRITE(*,'(/a)') 'Recompute the solver ...' + CALL clear_mat(mat) + t0 = seconds() + IF(nlconmat) THEN + CALL conmat(splxy, mat, coefeq_poisson) + ELSE + CALL dismat(splxy, mat) + END IF + tmat = seconds()-t0 + CALL ibcmat(mat, ny) +! + t0 = seconds() + CALL numfact(mat, debug=.FALSE.) + tfact = seconds()-t0 + gflops1 = mat%p%iparm(19) / tfact / 1.d3 +! + t0 = seconds() + ALLOCATE(newsol(nrank)) + CALL bsolve(mat, rhs, newsol) + newsol(1:ny-1) = newsol(ny) + tsolv = seconds()-t0 +! + WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +! + DEALLOCATE(newsol) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + CALL destroy_sp(splxy) + CALL destroy(mat) +! + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/pde2d_pb.f90 b/examples/pde2d_pb.f90 new file mode 100644 index 0000000..e764273 --- /dev/null +++ b/examples/pde2d_pb.f90 @@ -0,0 +1,696 @@ +!> +!> @file pde2d_pb.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 2d PDE using splines: +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! + USE bsplines + USE matrix + USE conmat_mod +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + LOGICAL :: nlppform, nlconmat + INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + TYPE(spline2d) :: splxy + TYPE(pbmat) :: mat +! + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + INTEGER :: nits=500 +! + INTERFACE + SUBROUTINE dismat(spl, mat) + USE bsplines + USE matrix + TYPE(spline2d), INTENT(in) :: spl + TYPE(pbmat), INTENT(inout) :: mat + END SUBROUTINE dismat + SUBROUTINE disrhs(mbess, spl, rhs) + USE bsplines + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + END SUBROUTINE disrhs + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + SUBROUTINE ibcmat(mat, ny) + USE matrix + TYPE(pbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + END SUBROUTINE ibcmat + SUBROUTINE ibcrhs(rhs, ny) + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + END SUBROUTINE ibcrhs + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlconmat, & + & coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlconmat = .TRUE. ! Use CONMAT instead of DISMAT + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots +!!$ WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals + ku = kl ! Number of super-diagnonals + WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku +! + CALL init(ku, nrank, nterms, mat) + t0 = seconds() + IF(nlconmat) THEN + CALL conmat(splxy, mat, coefeq) + ELSE + CALL dismat(splxy, mat) + END IF + tmat = seconds() - t0 + ALLOCATE(arr(nrank)) +! +! BC on Matrix +! + IF(nrank.LT.100) & + & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(ku+1,:) + CALL ibcmat(mat, ny) + IF(nrank.LT.100) & + & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(ku+1,:) +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, ny) + + WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL factor(mat) + tfact = seconds() - t0 + gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 + + t0 = seconds() + CALL bsolve(mat, rhs, sol) +! +! Backtransform of solution +! + sol(1:ny-1) = sol(ny) +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) +! + tsolv = seconds() - t0 + gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM at first call to gridval + IF(nlppform) THEN + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) + END IF +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +!=========================================================================== +! 9.0 Epilogue +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv + WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + CALL destroy_sp(splxy) + CALL destroy(mat) +! +!=========================================================================== +! +CONTAINS + SUBROUTINE prntmat(str, a) + DOUBLE PRECISION, DIMENSION(:,:) :: a + CHARACTER(len=*) :: str + INTEGER :: i + WRITE(*,'(a)') TRIM(str) + DO i=1,SIZE(a,1) + WRITE(*,'(10f8.1)') a(i,:) + END DO + END SUBROUTINE prntmat + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ +SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + USE bsplines + USE matrix + IMPLICIT NONE + TYPE(spline2d), INTENT(in) :: spl + TYPE(pbmat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + left2 = j + CALL basfun(xg2, spl%sp2, fun2, left2) +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 +!------------- + contrib = 0.0d0 + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,ig1,ig2) * & + & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) + END DO + END DO + END DO + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) +! +CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq +END SUBROUTINE dismat + +SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + USE bsplines + IMPLICIT NONE + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2 + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! +CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq +END SUBROUTINE disrhs + +SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + USE matrix + IMPLICIT NONE + TYPE(pbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: kl, ku, nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) + INTEGER :: i0, ii + INTEGER :: i0_arr(ny) +!=========================================================================== +! 1.0 Prologue +! + + ku = mat%ku + kl = ku + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +! + i0 = nrank - ku + WRITE(*,'(a,i6)') 'Estimated i0', i0 + DO i=1,ny + CALL getcol(mat, nrank-ny+i, arr) + DO ii=1,nrank + i0_arr(i)=ii + IF(arr(ii) .NE. 0.0d0) EXIT + END DO + END DO +!!$ WRITE(*,'(a/(10i6))') 'i0_arr', i0_arr +! +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + DO j=1,ny+ku + zsum(j) = zsum(j) + arr(j) + END DO + END DO +! + zsum(ny) = SUM(zsum(1:ny)) ! using symmetry + CALL putrow(mat, ny, zsum) +! +! The away operator +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO i = nrank, nrank-ny+1, -1 + CALL getcol(mat, i, arr) + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! +END SUBROUTINE ibcmat +!+++ +SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 +END SUBROUTINE ibcrhs +!++++ +SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 +END SUBROUTINE coefeq diff --git a/examples/pde2d_petsc.f90 b/examples/pde2d_petsc.f90 new file mode 100644 index 0000000..4378b3f --- /dev/null +++ b/examples/pde2d_petsc.f90 @@ -0,0 +1,795 @@ +!> +!> @file pde2d_petsc.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Solving the 2d PDE using splines and PETSC matrix +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! +MODULE pde2d_petsc_mod + USE bsplines + USE petsc_bsplines + IMPLICIT NONE +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(petsc_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +! + INTEGER :: istart, iend +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +!!$ WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 +!!$ WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! +! +! Matrix partition +! + istart = mat%istart + iend = mat%iend +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + left2 = j + CALL basfun(xg2, spl%sp2, fun2, left2) +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + IF( irow.GE.istart .AND. irow.LE.iend) THEN + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 +!------------- + contrib = 0.0d0 + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,ig1,ig2) * & + & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) + END DO + END DO + END DO + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END IF + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 + ! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq + END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(petsc_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + nrank = mat%rank +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + ALLOCATE(zsum(nrank), arr(nrank)) + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + zsum(:) = zsum(:) + arr(:) + END DO + IF(mat%nlsym) THEN + zsum(ny) = SUM(zsum(1:ny)) ! using symmetry + END IF + CALL putrow(mat, ny, zsum) +! +! The horizontal sum on the NY-th column +! + IF( .NOT.mat%nlsym) THEN + zsum = 0.0d0 + DO j=1,ny + arr = 0.0d0 + CALL getcol(mat, j, arr) + zsum(ny:) = zsum(ny:) + arr(ny:) + END DO + CALL putcol(mat, ny, zsum) + END IF +! +! The away operator +! + IF( .NOT.mat%nlsym) THEN + DO j = 1,ny-1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO + END IF +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO + DEALLOCATE(zsum) + DEALLOCATE(arr) +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + ALLOCATE(arr(nrank)) + IF( .NOT.mat%nlsym) THEN + DO j = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO + END IF +! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO + DEALLOCATE(arr) +!=========================================================================== +! 9.0 Epilogue +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_petsc_mod +PROGRAM main + USE pde2d_petsc_mod + USE futils +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + INTEGER :: nitmax=10000, nits, nits0, ntrials=0 + DOUBLE PRECISION :: rtol=1.e-9 + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol + DOUBLE PRECISION, ALLOCATABLE :: row_sum(:), row(:) + TYPE(spline2d) :: splxy + TYPE(petsc_mat) :: mat +! + CHARACTER(len=128) :: file='pde2d_petsc.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: mem, dopla + DOUBLE PRECISION :: t0, tmat, tsolv, tsolv0, tgrid, gflops1, gflops2 + LOGICAL :: nlsym + LOGICAL :: nlforce_zero +! + INTEGER :: ierr, me + INTEGER(kind=8) :: nzfact + INTEGER :: nnz_loc, nnz + DOUBLE PRECISION :: mem_loc, mem_min, mem_max +! + CHARACTER(len=128) :: matfile='mat.dat' + logical :: file_exist +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlsym,& + & nlforce_zero, coefx, coefy, nitmax, rtol, ntrials, & + & matfile +!=========================================================================== +! 1.0 Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlsym = .FALSE. ! Symmetric or unsymmetric matrix + nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + nitmax = 10000 ! Max number ofviterations + rtol = 1.e-9 ! Relative tolerance + ntrials = 0 ! Run ntrials solution steps after setup + matfile = '' ! Save matrix file to matfile if not empty +! + IF(me.EQ.0) THEN + READ(*,newrun) + WRITE(*,newrun) + END IF + CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nitmax, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ntrials, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! +! Create hdf5 file +! + IF(me.EQ.0) THEN + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) + END IF +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = mpi_wtime() + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + IF(me.EQ.0) WRITE(*,'(a,i12 )') 'nrank', nrank +! + CALL init(nrank, nterms, mat, comm=MPI_COMM_WORLD) +! + INQUIRE(file=TRIM(matfile), exist=file_exist) + IF( file_exist ) THEN + t0 = mpi_wtime() + CALL load_mat(mat, matfile) + tmat = mpi_wtime()-t0 + if(me.eq.0) WRITE(*,'(a,1pe12.3)') 'Mat read time (s) ', tmat + ELSE + t0 = mpi_wtime() + CALL dismat(splxy, mat) + CALL ibcmat(mat, ny) +! +!!$ ALLOCATE(row_sum(mat%istart:mat%iend)) +!!$ ALLOCATE(row(mat%rank)) +!!$ DO i=mat%istart,mat%iend +!!$ row = 0.0d0 +!!$ CALL getrow(mat, i, row) +!!$ row_sum(i) = SUM(row) +!!$ END DO +!!$ WRITE(*,'(a,i3.3,a,(10(1pe12.3)))') 'PE', me, ': row_sum', row_sum +! + CALL to_mat(mat) +! +!!$ DO i=mat%istart,mat%iend +!!$ row = 0.0d0 +!!$ CALL getrow(mat, i, row) +!!$ row_sum(i) = SUM(row) +!!$ END DO +!!$ WRITE(*,'(a,i3.3,a,(10(1pe12.3)))') 'PE', me, ': row_sum(after)', row_sum + CALL save_mat(mat, matfile) + tmat = mpi_wtime() - t0 + IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat construction time (s) ', tmat + END IF +! + IF(me.EQ.0) THEN + WRITE(*,'(a,2i16)') 'Mat rank, nnz', mat%rank, mat%nnz + END IF +! +! RHS assembly +! + t0=mpi_wtime() + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, ny) +! + mem_loc = mem() + CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT') + IF(me.EQ.0) THEN + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + END IF + IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'RHS construction time (s) ', mpi_wtime()-t0 +!=========================================================================== +! 3.0 Solve the dicretized system +! + + CALL minmax_i8(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz') + IF(me.EQ.0) THEN + WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz + END IF +! + t0 = mpi_wtime() + CALL bsolve(mat, rhs, sol, rtol, nitmax, nits0) + tsolv0 = mpi_wtime() - t0 + IF(me.EQ.0) WRITE(*,'(a,1pe12.3,i8)') 'Solve+setup time(s) and nits ', tsolv0, nits0 +! + IF(ntrials .GT. 0) THEN + t0 = mpi_wtime() + DO it=1,ntrials ! ntrials iterations for timing + sol = 0.0d0 + CALL bsolve(mat, rhs, sol, rtol, nitmax, nits) + sol(1:ny-1) = sol(ny) + END DO + tsolv = (mpi_wtime() - t0)/REAL(ntrials) + IF(me.EQ.0) WRITE(*,'(a,1pe12.3,i8)') 'Solve time(s) and nits ', tsolv, nits + END IF +! + mem_loc = mem() + CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE') +! + CALL destroy(mat) +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO +! + mem_loc = mem() + CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after setting bcoef') +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + IF(me.EQ.0) THEN + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! + WRITE(*,'(/a)') '*** Checking solutions' + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +! + DEALLOCATE(solcal, solana, errsol) + END IF +!!$!=========================================================================== +!!$! 5.0 Clear the matrix and recompute +!!$! +!!$ IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...' +!!$ t0 = mpi_wtime()() +!!$ CALL clear_mat(mat) +!!$ CALL dismat(splxy, mat) +!!$ CALL ibcmat(mat, ny) +!!$ tmat = mpi_wtime()()-t0 +!!$! +!!$ t0 = mpi_wtime()() +!!$ CALL numfact(mat, debug=.FALSE.) +!!$ tfact = mpi_wtime()()-t0 +!!$ gflops1 = mat%petsc_par%RINFOG(3) / tfact / 1.d9 +!!$! +!!$ t0 = mpi_wtime()() +!!$ ALLOCATE(newsol(nrank)) +!!$ CALL bsolve(mat, rhs, newsol) +!!$ newsol(1:ny-1) = newsol(ny) +!!$ tsolv = mpi_wtime()()-t0 +!!$! +!!$ IF(me.EQ.0) THEN +!!$ WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) +!!$ WRITE(*,'(/a)') '---' +!!$ WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat +!!$ WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact +!!$ WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv +!!$ WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv +!!$ WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +!!$ END IF +!!$! +!!$ DEALLOCATE(newsol) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(bcoef) +!!$ CALL destroy(mat) + CALL destroy_sp(splxy) +! + IF(me.EQ.0) CALL closef(fid) + CALL mpi_finalize(ierr) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +! + SUBROUTINE minmax_i(k, comm, str) + CHARACTER(len=*), INTENT(in) :: str + INTEGER, INTENT(in) :: k + INTEGER, INTENT(in) :: comm + INTEGER :: me, ierr, kmin, kmax + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr) + CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax + END IF + END SUBROUTINE minmax_i +! + SUBROUTINE minmax_i8(k, comm, str) + CHARACTER(len=*), INTENT(in) :: str + INTEGER(8), INTENT(in) :: k + INTEGER, INTENT(in) :: comm + INTEGER :: me, ierr + INTEGER(8) :: kmin, kmax + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_reduce(k, kmin, 1, MPI_INTEGER8, MPI_MIN, 0, comm, ierr) + CALL mpi_reduce(k, kmax, 1, MPI_INTEGER8, MPI_MAX, 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax + END IF + END SUBROUTINE minmax_i8 +!! + SUBROUTINE minmax_r(x, comm, str) + CHARACTER(len=*), INTENT(in) :: str + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: comm + INTEGER :: me, ierr + DOUBLE PRECISION :: xmin, xmax + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr) + CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax + END IF + END SUBROUTINE minmax_r +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/pde2d_pwsmp.f90 b/examples/pde2d_pwsmp.f90 new file mode 100644 index 0000000..1cd7592 --- /dev/null +++ b/examples/pde2d_pwsmp.f90 @@ -0,0 +1,776 @@ +!> +!> @file pde2d_pwsmp.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Solving the 2d PDE using splines and WSMP non-symmetric matrix +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! +MODULE pde2d_pwsmp_mod + USE bsplines + USE pwsmp_bsplines + IMPLICIT NONE +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(wsmp_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +! + INTEGER :: istart, iend +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! +! +! Matrix partition +! + istart = mat%istart + iend = mat%iend +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + left2 = j + CALL basfun(xg2, spl%sp2, fun2, left2) +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + IF( irow.GE.istart .AND. irow.LE.iend) THEN + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 +!------------- + contrib = 0.0d0 + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,ig1,ig2) * & + & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) + END DO + END DO + END DO + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END IF + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 + ! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq + END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(wsmp_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + zsum(:) = zsum(:) + arr(:) + END DO + CALL putrow(mat, ny, zsum) +! +! The horizontal sum on the NY-th column +! + IF( .NOT.mat%nlsym) THEN + zsum = 0.0d0 + DO j=1,ny + arr = 0.0d0 + CALL getcol(mat, j, arr) + zsum(ny:) = zsum(ny:) + arr(ny:) + END DO + CALL putcol(mat, ny, zsum) + END IF +! +! The away operator +! + IF( .NOT.mat%nlsym) THEN + DO j = 1,ny-1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO + END IF +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO j = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_pwsmp_mod +PROGRAM main + USE pde2d_pwsmp_mod + USE futils +! + IMPLICIT NONE + INCLUDE 'mpif.h' + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol + TYPE(spline2d) :: splxy + TYPE(wsmp_mat) :: mat +! + CHARACTER(len=128) :: file='pde2d_wsmp.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + DOUBLE PRECISION :: tconv, treord + INTEGER :: nits=100 + LOGICAL :: nlsym, nlforce_zero +! + INTEGER :: ierr, me, nprocs + DOUBLE PRECISION :: mem_loc, mem_min, mem_max +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, & + & nlsym, nlforce_zero, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, nprocs, ierr) +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlsym = .TRUE. ! Symmetric matrix or not + nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + IF(me.EQ.0) THEN + READ(*,newrun) + WRITE(*,newrun) + END IF + CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! +! Create hdf5 file +! + if(me.eq.0) then + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) + end if +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + IF(me.EQ.0) THEN + WRITE(*,'(a,i8,a,i4)') 'nrank =', nrank, ' nprocs =', nprocs + END IF +! + CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero, nlsym=nlsym, & + & comm_in=MPI_COMM_WORLD) + WRITE(*,'(a,i4.4,a,3i16)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, & + & mat%iend-mat%istart+1 +! + CALL dismat(splxy, mat) +! +! BC on Matrix +! + CALL ibcmat(mat, ny) + tmat = seconds() - t0 +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, ny) +! + mem_loc = mem() + CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT') + IF(me.EQ.0) THEN + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + END IF +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL to_mat(mat) + tconv = seconds() -t0 + CALL minmax_i(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz') + IF(me.EQ.0) THEN + WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz + END IF +! + t0 = seconds() + CALL reord_mat(mat) + treord = seconds() - t0 +! + t0 = seconds() + CALL numfact(mat) + tfact = seconds() - t0 +! + mem_loc = mem() + CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after FACTOR') +! + IF(me.EQ.0) THEN + WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24) + WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9 + END IF + gflops1 = mat%p%dparm(23) / tfact / 1.d9 +! + CALL bsolve(mat, rhs, sol) +! + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL bsolve(mat, rhs, sol) + sol(1:ny-1) = sol(ny) + END DO + tsolv = (seconds() - t0)/REAL(nits) +! + mem_loc = mem() + CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE') +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') + END IF +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method (only on proc 0) +! + IF(me.EQ.0) THEN + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv + WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv + WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 + DEALLOCATE(solcal) + DEALLOCATE(solana) + DEALLOCATE(errsol) + END IF +!=========================================================================== +! 5.0 Clear the matrix and recompute +! + IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...' + t0 = seconds() + CALL clear_mat(mat) + CALL dismat(splxy, mat) + CALL ibcmat(mat, ny) + tmat = seconds()-t0 +! + t0 = seconds() +!!$ CALL numfact(mat) + CALL factor(mat, nlreord=.FALSE.) + tfact = seconds()-t0 + gflops1 = mat%p%dparm(23) / tfact / 1.d9 +! + t0 = seconds() + ALLOCATE(newsol(nrank)) + CALL bsolve(mat, rhs, newsol) + newsol(1:ny-1) = newsol(ny) + tsolv = seconds()-t0 +! + IF(me.EQ.0) THEN + WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 + END IF +! + DEALLOCATE(newsol) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(bcoef) + DEALLOCATE(xgrid, rhs, sol) +9999 CONTINUE + CALL mpi_barrier(MPI_COMM_WORLD, ierr) + CALL destroy_sp(splxy) + CALL destroy(mat) +! + IF(me.EQ.0) CALL closef(fid) + CALL mpi_finalize(ierr) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +! + SUBROUTINE minmax_i(k, comm, str) + CHARACTER(len=*), INTENT(in) :: str + INTEGER, INTENT(in) :: k + INTEGER, INTENT(in) :: comm + INTEGER :: me, ierr, kmin, kmax + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr) + CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax + END IF + END SUBROUTINE minmax_i +! + SUBROUTINE minmax_r(x, comm, str) + CHARACTER(len=*), INTENT(in) :: str + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: comm + INTEGER :: me, ierr + DOUBLE PRECISION :: xmin, xmax + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr) + CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax + END IF + END SUBROUTINE minmax_r +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/pde2d_sym_pardiso.f90 b/examples/pde2d_sym_pardiso.f90 new file mode 100644 index 0000000..47c7cda --- /dev/null +++ b/examples/pde2d_sym_pardiso.f90 @@ -0,0 +1,715 @@ +!> +!> @file pde2d_sym_pardiso.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Solving the 2d PDE using splines and PARDISO symmetric matrix +! +! -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0 +! C(x,y) = 1 + \epsilon x cos(y) +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! +MODULE pde2d_sym_pardiso_mod + USE bsplines + USE pardiso_bsplines + IMPLICIT NONE +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, epsi, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(in) :: epsi + TYPE(pardiso_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + left2 = j + CALL basfun(xg2, spl%sp2, fun2, left2) +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 +!------------- + contrib = 0.0d0 + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,ig1,ig2) * & + & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) + END DO + END DO + END DO + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! + DOUBLE PRECISION :: zcoef +! +! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy +! C(x,y) = 1 + epsilon*x*cos(y) +! + zcoef = 1.0d0 + epsi*x*COS(y) +! + c(1) = x*zcoef ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = zcoef/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, epsi, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + INTEGER, INTENT(in) :: mbess + DOUBLE PRECISION, INTENT(in) :: epsi + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x, y, m) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(in) :: m + DOUBLE PRECISION :: xm +! + xm = REAL(m,8) + rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + & + & epsi*x*( & + & ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + & + & (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) & + & )) + END FUNCTION rhseq + END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(pardiso_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + zsum(:) = zsum(:) + arr(:) + END DO + zsum(ny) = SUM(zsum(1:ny)) ! using symmetry + CALL putrow(mat, ny, zsum) +! +! The away operator +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_sym_pardiso_mod +PROGRAM main + USE pde2d_sym_pardiso_mod + USE futils +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it + DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol + TYPE(spline2d) :: splxy + TYPE(pardiso_mat) :: mat +! + CHARACTER(len=128) :: file='pde2d_sym_pardiso.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + DOUBLE PRECISION :: tconv, treord + INTEGER :: nits=100 + LOGICAL :: nlmetis, nlforce_zero, nlpos +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, epsi, nlppform, nlmetis, & + & nlforce_zero, nlpos, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + epsi = 0.5 ! Non-uniformity in the Laplacian coefficicient + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlmetis = .FALSE. ! Use metis ordering or minimum degree + nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele + nlpos = .TRUE. ! Matrix is positive definite + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) + CALL attach(fid, '/', 'EPSI', epsi) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + WRITE(*,'(a,i8)') 'nrank', nrank +! + t0 = seconds() + CALL init(nrank, nterms, mat, nlsym=.TRUE.) + CALL dismat(splxy, epsi, mat) + ALLOCATE(arr(nrank)) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr + END IF +! +! BC on Matrix +! + WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero + WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + CALL ibcmat(mat, ny) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr + WRITE(*,'(a)') 'Last rows' + DO i=nrank-ny,nrank + CALL getrow(mat, i, arr) + WRITE(*,'(10(1pe12.3))') arr + END DO + END IF + tmat = seconds() - t0 +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, epsi, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, ny) +! + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() +!!$ CALL factor(mat) +! +! The call to "factor" could be split into the +! 3 following calls +! + CALL to_mat(mat) + tconv = seconds() -t0 + WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) + WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() +! + t0 = seconds() + CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.) + CALL putmat(fid, '/MAT', mat) + treord = seconds() - t0 +! + t0 = seconds() + CALL numfact(mat, debug=.FALSE.) + tfact = seconds() - t0 + + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() + WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) + WRITE(*,'(a,i12)') 'Number of factorization MFLOPS = ',mat%p%iparm(19) + gflops1 = mat%p%iparm(19) / tfact / 1.d3 +! + CALL bsolve(mat, rhs, sol, debug=.FALSE.) + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL bsolve(mat, rhs, sol) + sol(1:ny-1) = sol(ny) + END DO + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() + tsolv = (seconds() - t0)/REAL(nits) +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv + WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv + WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +!=========================================================================== +! 5.0 Clear the matrix and recompute +! + WRITE(*,'(/a)') 'Recompute the solver ...' + t0 = seconds() + CALL clear_mat(mat) + CALL dismat(splxy, epsi, mat) + CALL ibcmat(mat, ny) + tmat = seconds()-t0 +! + t0 = seconds() +!!$ CALL numfact(mat, debug=.FALSE.) + CALL factor(mat, nlreord=.FALSE., debug=.FALSE.) + tfact = seconds()-t0 + gflops1 = mat%p%iparm(19) / tfact / 1.d3 +! + t0 = seconds() + ALLOCATE(newsol(nrank)) + CALL bsolve(mat, rhs, newsol) + newsol(1:ny-1) = newsol(ny) + tsolv = seconds()-t0 +! + WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +! + DEALLOCATE(newsol) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + CALL destroy_sp(splxy) + CALL destroy(mat) +! + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/pde2d_sym_pardiso_dft.f90 b/examples/pde2d_sym_pardiso_dft.f90 new file mode 100644 index 0000000..42426e6 --- /dev/null +++ b/examples/pde2d_sym_pardiso_dft.f90 @@ -0,0 +1,1034 @@ +!> +!> @file pde2d_sym_pardiso_dft.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Solving the 2d PDE using splines and PARDISO symmetric matrix. +! The periodic coordinate y is discrete Fourier transformed. +! +! -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0 +! C(x,y) = 1 + \epsilon x cos(y) +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! +MODULE pde2d_sym_pardiso_dft_mod + USE bsplines + USE pardiso_bsplines + IMPLICIT NONE +! +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, epsi, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(in) :: epsi + TYPE(zpardiso_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: kmin, kmax, dk + INTEGER :: i, j, ig1, ig2, kc + INTEGER :: iterm, iw1, mw, igw1, it1, mt, igt1, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:) + DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), fft_temp(:) + DOUBLE COMPLEX :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER :: kcoupl ! Number of mode couplings + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE COMPLEX, ALLOCATABLE :: coefs(:,:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + kmin = spl%sp2%dft%kmin + kmax = spl%sp2%dft%kmax + dk = spl%sp2%dft%dk + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 + WRITE(*,'(a, 5i6)') 'kmin, kmax, dk =', kmin, kmax, dk +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng2), wg2(ng2)) +! +! Weak form +! + kterms = mat%nterms + kcoupl = SIZE(spl%sp2%dft%mode_couplings) + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,kcoupl,ng1,ng2)) +! +! Splines and derivatives at all Gauss points +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(ft_fun2(kmin:kmax,0:1,ng2)) ! DFT of splines and 1st derivative + ALLOCATE(fft_temp(0:n2-1)) ! Used in coefeq +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) +! +! First interval in 2nd (periodic) coordinate +! + j = 1 + CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2) + left2 = j + CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2) + + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO mt=kmin,kmax ! Test Fourier mode + DO kc=1,kcoupl + mw = mt + spl%sp2%dft%mode_couplings(kc) + IF(mw.LT.kmin .OR. mw.GT.kmax) CYCLE +!------------- + contrib = (0.0d0, 0.0d0) + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & ft_fun2(mw,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,kc,ig1,ig2) * & + & CONJG(ft_fun2(mt,idert(iterm,2,ig1,ig2),ig2)) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) /REAL(n2,8) + END DO + END DO + END DO + irow = (igw1-1)*dk + (mw-kmin)+1 ! Number first mode m then radial coord. + jcol = (igt1-1)*dk + (mt-kmin)+1 + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, ft_fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) + DEALLOCATE(fft_temp) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + USE fft + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE COMPLEX, INTENT(out) :: c(:,:) +! + DOUBLE PRECISION :: zcoef, dy + INTEGER :: j, k, kc, kp +! +! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy +! C(x,y) = 1 + epsilon*x*cos(y) +! + dy = spl%sp2%dft%dx + kc = SIZE(spl%sp2%dft%mode_couplings) + DO j=0,n2-1 + fft_temp(j) = 1.0d0+epsi*x*COS(y+j*dy) + END DO + CALL fourcol(fft_temp,1) + DO k=1,kc + kp = spl%sp2%dft%mode_couplings(k) + IF(kp.LT.0) kp=kp+n2 + c(1,k) = x*fft_temp(kp) + c(2,k) = fft_temp(kp)/x + END DO +!!$ WRITE(*,'(a/(10(1pe12.4)))') 'fft_temp', ABS(fft_temp) +!!$ WRITE(*,'(a/(10(1pe12.4)))') 'c1', ABS(c(1,:)) +!!$ WRITE(*,'(a/(10(1pe12.4)))') 'c2', ABS(c(2,:)) +! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, epsi, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + INTEGER, INTENT(in) :: mbess + DOUBLE PRECISION, INTENT(in) :: epsi + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x, y, m) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(in) :: m +! + DOUBLE PRECISION :: xm + xm = REAL(m,8) + rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + & + & epsi*x*( & + & ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + & + & (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) & + & )) + END FUNCTION rhseq + END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, dft) +! +! Apply BC on matrix +! + TYPE(zpardiso_mat), INTENT(inout) :: mat + TYPE(dftmap), INTENT(in) :: dft + INTEGER :: nrank, k, kmin, kmax, dk, i + DOUBLE COMPLEX :: arr(mat%rank) +!=========================================================================== +! 1.0 Prologue +! + nrank = mat%rank + kmin = dft%kmin + kmax = dft%kmax + dk = dft%dk +!=========================================================================== +! 2.0 BC at the axis +! +! zero for non-zero modes +! + DO k=kmin,kmax + IF(k.NE.0) THEN + i = k-kmin+1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END IF + END DO +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO i = nrank, nrank-dk+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, dft) +! +! Apply BC on RHS +! + DOUBLE COMPLEX, INTENT(inout) :: rhs(:) + TYPE(dftmap), INTENT(in) :: dft + INTEGER :: nrank, kmin, kmax, dk, k +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) + kmin = dft%kmin + kmax = dft%kmax + dk = dft%dk +!=========================================================================== +! 2.0 BC at the axis +! +! zero for non-zero modes +! + DO k=kmin,kmax + IF(k.NE.0) rhs(k-kmin+1) = 0.0 + END DO +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-dk+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE spectrum0(spl, carr, xpt, cspec) +! +! DFT modes at xpt (integration on the first interval) +! + DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) + DOUBLE PRECISION, PARAMETER :: pi = 3.141592653589793d0 + TYPE(spline2d), INTENT(in) :: spl + DOUBLE COMPLEX, INTENT(in) :: carr(:) + DOUBLE PRECISION, INTENT(in) :: xpt + DOUBLE COMPLEX, INTENT(out) :: cspec(:) +! + INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 + INTEGER :: k, kmin, kmax, dk, kk + INTEGER :: ng2, ig2 + INTEGER, ALLOCATABLE :: left2(:) + DOUBLE PRECISION :: temp(1) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:) + DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), psi(:), coefs(:,:) +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + kmin = spl%sp2%dft%kmin + kmax = spl%sp2%dft%kmax + dk = spl%sp2%dft%dk +! + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(left2(ng2)) + ALLOCATE(xg2(ng2), wg2(ng2)) + ALLOCATE(ft_fun2(kmin:kmax,1,ng2)) ! DFT of splines + ALLOCATE(psi(kmin:kmax)) +! +! Integration over first interval +! + left2 = 1 + CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2) + CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2) + psi = (0.0d0,0.0d0) + DO k=kmin,kmax + DO ig2=1,ng2 + psi(k) = psi(k) + wg2(ig2)*EXP(k*ci*xg2(ig2))*CONJG(ft_fun2(k,1,ig2)) + END DO + END DO +! + ALLOCATE(coefs(dk,ndim1)) + coefs = RESHAPE(carr, SHAPE(coefs)) + temp = xpt + DO kk=kmin,kmax + k=kk-kmin+1 + coefs(k,:) = psi(kk)*coefs(k,:) + CALL gridval(spl%sp1, temp, cspec(k:k), 0, coefs(k,:)) + END DO + cspec = cspec/(2.0d0*pi) +! + DEALLOCATE(left2) + DEALLOCATE(xg2, wg2) + DEALLOCATE(ft_fun2) + DEALLOCATE(psi) + DEALLOCATE(coefs) + END SUBROUTINE spectrum0 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE spectrum1(spl, carr, xpt, ypt0, cspec) +! +! DFT modes at xpt (at the initial ypt0) +! + DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) +! + TYPE(spline2d), INTENT(in) :: spl + DOUBLE COMPLEX, INTENT(in) :: carr(:) + DOUBLE PRECISION, INTENT(in) :: xpt, ypt0 + DOUBLE COMPLEX, INTENT(out) :: cspec(:) +! + INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 + INTEGER :: k, kmin, kmax, dk + DOUBLE PRECISION :: temp(1) + DOUBLE COMPLEX :: ctemp(1) + DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:), coefs(:,:) +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + kmin = spl%sp2%dft%kmin + kmax = spl%sp2%dft%kmax + dk = spl%sp2%dft%dk +! +! DFT of splines at ypt0 + ALLOCATE(ft_fun2(kmin:kmax,1)) + ALLOCATE(coefs(kmin:kmax,ndim1)) + CALL ft_basfun(ypt0, spl%sp2, ft_fun2, 1) + coefs = RESHAPE(carr, SHAPE(coefs)) +! + temp = xpt + DO k=kmin,kmax + CALL gridval(spl%sp1, temp, ctemp, 0, coefs(k,:)) + cspec(k-kmin+1) = CONJG(ft_fun2(k,1))*ctemp(1)*EXP(k*ci*ypt0) + END DO + cspec = cspec/REAL(n2,8) +! + DEALLOCATE(ft_fun2) + DEALLOCATE(coefs) + END SUBROUTINE spectrum1 +!!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE spectrum2(spl, xpt, ypt0, cspec) +! +! DFT modes at xpt (at the initial ypt0) +! + USE fft + USE bsplines +! + DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) +! + TYPE(spline2d) :: spl + DOUBLE PRECISION, INTENT(in) :: xpt, ypt0 + DOUBLE COMPLEX, INTENT(out) :: cspec(:) +! + INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 + INTEGER :: k, kmin, kmax, dk + DOUBLE PRECISION, ALLOCATABLE :: ypt(:), fun(:,:) + DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:) + DOUBLE PRECISION :: temp(1) +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + kmin = spl%sp2%dft%kmin + kmax = spl%sp2%dft%kmax + dk = spl%sp2%dft%dk +! + ALLOCATE(ypt(0:n2-1)) + ALLOCATE(fun(1, 0:n2-1)) + ALLOCATE(ft_fun(0:n2-1)) +! +! Function values at points ypt +! + ypt(0:n2-1) = ypt0 + spl%sp2%knots(0:n2-1) + temp = xpt + CALL gridval(spl, temp, ypt, fun, (/0,0/)) + ft_fun = fun(1,:) +! +! Discrete Fourier Transform +! + CALL fourcol(ft_fun, 1) + DO k=kmin,kmax + IF(k.LT.0) THEN + cspec(k-kmin+1) = ft_fun(k+n2)*EXP(k*ci*ypt0) + ELSE + cspec(k-kmin+1) = ft_fun(k)*EXP(k*ci*ypt0) + END IF + END DO + cspec = cspec/REAL(n2,8) +! + DEALLOCATE(ypt) + DEALLOCATE(fun) + DEALLOCATE(ft_fun) + END SUBROUTINE spectrum2 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_sym_pardiso_dft_mod +PROGRAM main + USE pde2d_sym_pardiso_dft_mod + USE futils + USE fft +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms, kmin, kmax, dk + INTEGER :: n_mode_couplings + INTEGER, ALLOCATABLE :: mode_couplings(:) + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, nrank_full, jder(2), it, i0, i0_r + INTEGER :: k, kp, ik + DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + DOUBLE COMPLEX, ALLOCATABLE :: crhs(:), crhs_r(:), csol(:), csol_r(:) + TYPE(spline2d) :: splxy + TYPE(zpardiso_mat) :: mat +! + CHARACTER(len=128) :: file='pde2d_sym_pardiso_dft.h5' + INTEGER :: fid + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: arr, srow + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tfour, tfour0, tgrid, gflops1 + INTEGER :: nits=100 + LOGICAL :: nlmetis, nlforce_zero, nlpos +! + DOUBLE PRECISION :: xpt, ypt0 + DOUBLE COMPLEX, ALLOCATABLE :: cspec0(:), cspec(:), energy_k(:) + DOUBLE COMPLEX :: energy, energy_exact +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, kmin, kmax, mbess, epsi, & + & nlppform, nlmetis, nlforce_zero, nlpos, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + kmin = -3 ! Minimum Fourier mode number + kmax = 3 ! Maximum Fourier mode number + mbess = 2 ! Exponent of differential problem + epsi = 0.5 ! Non-uniformity in the Laplacian coefficicient + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlmetis = .FALSE. ! Use metis ordering or minimum degree + nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele + nlpos = .TRUE. ! Matrix is positive definite + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +!! +! Read table of mode couplings +! + READ(*,*) n_mode_couplings + ALLOCATE(mode_couplings(n_mode_couplings)) + READ(*,*) mode_couplings + WRITE(*,'(/a/(20i4))') 'Mode couplings', mode_couplings +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! +! Exact energy +! + energy_exact = 2.0d0*pi/REAL(2+mbess,8) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) + CALL attach(fid, '/', 'EPSI', epsi) + CALL attach(fid, '/', 'KMIN', kmin) + CALL attach(fid, '/', 'KMAX', kmax) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, & + & (/.FALSE., .TRUE./)) +! +! Init DFT for spline in 2nd direction +! + CALL init_dft(splxy%sp2, kmin, kmax, mode_couplings) + dk = splxy%sp2%dft%dk +! +! FE matrix assembly +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + nrank = (nx+nidbas(1))*dk ! Rank of restricted matrix + nrank_full = (nx+nidbas(1))*ny ! Rank of full matrix +! + ALLOCATE(rhs(nrank_full), sol(nrank_full)) + ALLOCATE(crhs(nrank_full), csol(nrank_full)) + ALLOCATE(crhs_r(nrank), csol_r(nrank)) +! + WRITE(*,'(a,i8)') 'nrank_full', nrank_full + WRITE(*,'(a,i8)') 'nrank ', nrank +! + t0 = seconds() + CALL init(nrank, nterms, mat, nlherm=.TRUE.) + CALL dismat(splxy, epsi, mat) + ALLOCATE(arr(nrank)) + ALLOCATE(srow(nrank)) + DO i=1,nrank + CALL getrow(mat, i, arr) + srow(i) = SUM(arr) + END DO +!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows before BC', REAL(srow) +!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows before BC', AIMAG(srow) + PRINT*, 'Sum of mat before BC', SUM(srow) +! +! BC on Matrix +! + WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero + WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + CALL ibcmat(mat, splxy%sp2%dft) + tmat = seconds() - t0 + DO i=1,nrank + CALL getrow(mat, i, arr) + srow(i) = SUM(arr) + END DO +!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows after BC', REAL(srow) +!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows after BC', AIMAG(srow) + PRINT*, 'Sum of mat after BC', SUM(srow) +! + WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() +! +! RHS assembly +! + CALL disrhs(mbess, epsi, splxy, rhs) +! +! Init FFT +! + t0 = seconds() + CALL fourcol(crhs(1:ny),1) + CALL fourcol(crhs(1:ny),-1) + tfour0 = seconds()-t0 + crhs = crhs/REAL(ny,8) +! +! DFT of RHS +! + t0 = seconds() + crhs = rhs + DO i=1,nx+nidbas(1) + i0 = (i-1)*ny + CALL fourcol(crhs(i0+1:i0+ny), 1) + END DO + tfour = seconds()-t0 +! +! Restriction in Fourier space +! k = kmin:kmax (restricted) +! kp = 0:ny-1 (full) +! + DO i=1,nx+nidbas(1) + i0 = (i-1)*ny + i0_r = (i-1)*dk + DO k=kmin,kmax + kp = k + IF(kp.LT.0) kp = kp+ny + crhs_r(i0_r+k-kmin+1) = crhs(i0+kp+1) + END DO + END DO +! +! BC on RHS +! + CALL ibcrhs(crhs_r, splxy%sp2%dft) +! + IF(nrank.LT.100) THEN + WRITE(*,'(a/(10(1pe12.3)))') 'Real of crhs', REAL(crhs) + WRITE(*,'(a/(10(1pe12.3)))') 'Imag of crhs', AIMAG(crhs) + END IF +!=========================================================================== +! 3.0 Solve the dicretized system +! +! Matrix factorization +! + t0 = seconds() +!!$ CALL factor(mat, nlmetis=nlmetis) + CALL to_mat(mat) + CALL reord_mat(mat, nlmetis=nlmetis); CALL putmat(fid, '/MAT1', mat) + CALL numfact(mat) + tfact = seconds() - t0 + DO i=1,nrank + CALL getrow(mat, i, arr) + srow(i) = SUM(arr) + END DO + PRINT*, 'Sum of mat after factor', SUM(srow) + + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() + WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) + WRITE(*,'(a,i12)') 'Number of factorization MFLOPS = ',mat%p%iparm(19) + gflops1 = mat%p%iparm(19) / tfact / 1.d3 +! +! Backsolve +! + t0 = seconds() + PRINT*, 'SUM of crhs_r', SUM(crhs_r) + CALL bsolve(mat, crhs_r, csol_r, debug=.FALSE.) + WRITE(*,'(a,1pe12.4)') 'Residue =', cnorm2(vmx(mat,csol_r)-crhs_r) + tsolv = seconds() - t0 + PRINT*, 'SUM of csol_r', SUM(csol_r) +! + CALL putarr(fid, '/FT_RHS', crhs_r, 'DFT of RHS') + CALL putarr(fid, '/FT_SOL', csol_r, 'DFT of Spline coefficients') +!=========================================================================== +! 4.0 Perform some diagnostics in Fourier space +! +! Fourier spectrum at xpt +! + xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8)) + ALLOCATE(cspec0(dk)) + CALL spectrum0(splxy, csol_r, xpt, cspec0) + WRITE(*,'(/a,f10.5)') 'DFT spectrum (by integration) at x = ', xpt + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) + END DO +! + ypt0 = 0.0d0 + WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0 + CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0) + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) + END DO + ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval + WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0 + CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0) + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) + END DO +! +! Spectral energy +! + WRITE(*,'(/a)') 'Spectral energies' + ALLOCATE(energy_k(kmin:kmax)) + energy_k = (0.0d0,0.0d0) + DO i=1,dimx + i0_r = (i-1)*dk + DO k=kmin,kmax + ik = i0_r+k-kmin+1 + energy_k(k) = energy_k(k) + csol_r(ik)*CONJG(crhs_r(ik)) + END DO + END DO + energy_k = energy_k/REAL(ny,8) + energy = SUM(energy_k) + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, energy_k(k), ABS(energy_k(k)) + END DO + WRITE(*,'(a5,4(1pe15.3))') 'Sum', energy, ABS(energy), REAL(energy-energy_exact) +! + CALL putarr(fid, '/ENERGY_K', energy_k, 'Spectral energies') +!=========================================================================== +! 5.0 Transform back to real space +! +! Expand to full Fourier space +! k = kmin:kmax (restricted) +! kp = 0:ny-1 (full) +! + crhs = (0.0d0,0.0d0) + DO i=1,nx+nidbas(1) + i0 = (i-1)*ny + i0_r = (i-1)*dk + DO k=kmin,kmax + kp = k + IF(kp.LT.0) kp = kp+ny + csol(i0+kp+1) = csol_r(i0_r+k-kmin+1) + END DO + END DO +! +! Fourier transform back to real space +! + t0 = seconds() + DO i=1,nx+nidbas(1) + i0 = (i-1)*ny + CALL fourcol(csol(i0+1:i0+ny),-1) + END DO + sol = REAL(csol)/REAL(ny,8) + tfour = tfour + seconds()-t0 +! +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +! +! Total energy +! + WRITE(*,'(/a, 2(1pe15.3))') 'Total energy and error(real space)', & + & DOT_PRODUCT(rhs,sol), & + & DOT_PRODUCT(rhs,sol)-REAL(energy_exact) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() +!=========================================================================== +! 6.0 Check the solution +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval +! + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! +! Fourier spectrum at xpt +! + xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8)) + ALLOCATE(cspec(dk)) +! + ypt0 = 0.0d0 + WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0 + CALL spectrum2(splxy, xpt, ypt0, cspec) + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1)) + END DO +! + ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval + WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0 + CALL spectrum2(splxy, xpt, ypt0, cspec) + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1)) + END DO +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'Init FFT time (s) ', tfour0 + WRITE(*,'(a,1pe12.3)') 'FFT time (s) ', tfour + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(cspec0, cspec) + DEALLOCATE(mode_couplings) + DEALLOCATE(xgrid, ygrid, rhs, sol) + DEALLOCATE(crhs, csol) + DEALLOCATE(crhs_r, csol_r) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + DEALLOCATE(srow) + DEALLOCATE(energy_k) + CALL destroy_sp(splxy) + CALL destroy(mat) +! + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +! + FUNCTION cnorm2(x) + DOUBLE COMPLEX, INTENT(in) :: x(:) + DOUBLE PRECISION :: cnorm2 + cnorm2 = SQRT(DOT_PRODUCT(x,x)) + END FUNCTION cnorm2 +! + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/pde2d_sym_wsmp.f90 b/examples/pde2d_sym_wsmp.f90 new file mode 100644 index 0000000..2d0c56e --- /dev/null +++ b/examples/pde2d_sym_wsmp.f90 @@ -0,0 +1,696 @@ +!> +!> @file pde2d_sym_wsmp.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Solving the 2d PDE using splines and WSMP symmetric matrix +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! +MODULE pde2d_sym_wsmp_mod + USE bsplines + USE wsmp_bsplines + IMPLICIT NONE +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(wsmp_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + left2 = j + CALL basfun(xg2, spl%sp2, fun2, left2) +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 +!------------- + contrib = 0.0d0 + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,ig1,ig2) * & + & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) + END DO + END DO + END DO + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 + ! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq + END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(wsmp_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + zsum(:) = zsum(:) + arr(:) + END DO + zsum(ny) = SUM(zsum(1:ny)) ! using symmetry + CALL putrow(mat, ny, zsum) +! +! The away operator +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_sym_wsmp_mod +PROGRAM main + USE pde2d_sym_wsmp_mod + USE futils +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol + TYPE(spline2d) :: splxy + TYPE(wsmp_mat) :: mat +! + CHARACTER(len=128) :: file='pde2d_sym_wsmp.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + DOUBLE PRECISION :: tconv, treord + INTEGER :: nits=100 + LOGICAL :: nlforce_zero, nlpos +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlpos, & + & nlforce_zero, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele + nlpos = .TRUE. ! Matrix is positive definite + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + WRITE(*,'(a,i8)') 'nrank', nrank +! + CALL init(nrank, nterms, mat, nlsym=.TRUE., nlpos=nlpos) + CALL dismat(splxy, mat) + ALLOCATE(arr(nrank)) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr + END IF +! +! BC on Matrix +! + WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero + WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + CALL ibcmat(mat, ny) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr + WRITE(*,'(a)') 'Last rows' + DO i=nrank-ny,nrank + CALL getrow(mat, i, arr) + WRITE(*,'(10(1pe12.3))') arr + END DO + END IF + tmat = seconds() - t0 +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, ny) + + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL factor(mat) +! +! The call to "factor" could be split into the +! 3 following calls +! +!!$ CALL to_mat(mat) +!!$ tconv = seconds() -t0 +!!$ WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) +!!$ WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() +!!$! +!!$ t0 = seconds() +!!$ CALL reord_mat(mat) +!!$ CALL putmat(fid, '/MAT', mat) +!!$ treord = seconds() - t0 +!!$! +!!$ t0 = seconds() +!!$ CALL numfact(mat) + tfact = seconds() - t0 + + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() + WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24) + WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9 + gflops1 = mat%p%dparm(23) / tfact / 1.d9 +! + CALL bsolve(mat, rhs, sol) + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL bsolve(mat, rhs, sol) + sol(1:ny-1) = sol(ny) + END DO + WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(6) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() + tsolv = (seconds() - t0)/REAL(nits) +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv + WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv + WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +!=========================================================================== +! 5.0 Clear the matrix and recompute +! + WRITE(*,'(/a)') 'Recompute the solver ...' + t0 = seconds() + CALL clear_mat(mat) + CALL dismat(splxy, mat) + CALL ibcmat(mat, ny) + tmat = seconds()-t0 +! + t0 = seconds() + CALL numfact(mat) + tfact = seconds()-t0 + gflops1 = mat%p%dparm(23) / tfact / 1.d9 +! + t0 = seconds() + ALLOCATE(newsol(nrank)) + CALL bsolve(mat, rhs, newsol) + newsol(1:ny-1) = newsol(ny) + tsolv = seconds()-t0 +! + WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +! + DEALLOCATE(newsol) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + CALL destroy_sp(splxy) + CALL destroy(mat) +! + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/pde2d_sym_wsmp_dft.f90 b/examples/pde2d_sym_wsmp_dft.f90 new file mode 100644 index 0000000..f3226e9 --- /dev/null +++ b/examples/pde2d_sym_wsmp_dft.f90 @@ -0,0 +1,1039 @@ +!> +!> @file pde2d_sym_wsmp_dft.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Ben McMillan +!> @author Trach-Minh Tran +!> +! +! Solving the 2d PDE using splines and WSMP symmetric matrix. +! The periodic coordinate y is discrete Fourier transformed. +! +! -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0 +! C(x,y) = 1 + \epsilon x cos(y) +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! +MODULE pde2d_sym_wsmp_dft_mod + USE bsplines + USE wsmp_bsplines + IMPLICIT NONE +! +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, epsi, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(in) :: epsi + TYPE(zwsmp_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: kmin, kmax, dk + INTEGER :: i, j, ig1, ig2, kc + INTEGER :: iterm, iw1, mw, igw1, it1, mt, igt1, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:) + DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), fft_temp(:) + DOUBLE COMPLEX :: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER :: kcoupl ! Number of mode couplings + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE COMPLEX, ALLOCATABLE :: coefs(:,:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + kmin = spl%sp2%dft%kmin + kmax = spl%sp2%dft%kmax + dk = spl%sp2%dft%dk + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 + WRITE(*,'(a, 5i6)') 'kmin, kmax, dk =', kmin, kmax, dk +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng2), wg2(ng2)) +! +! Weak form +! + kterms = mat%nterms + kcoupl = SIZE(spl%sp2%dft%mode_couplings) + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,kcoupl,ng1,ng2)) +! +! Splines and derivatives at all Gauss points +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(ft_fun2(kmin:kmax,0:1,ng2)) ! DFT of splines and 1st derivative + ALLOCATE(fft_temp(0:n2-1)) ! Used in coefeq +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) +! +! First interval in 2nd (periodic) coordinate +! + j = 1 + CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2) + left2 = j + CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2) + + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO mt=kmin,kmax ! Test Fourier mode + DO kc=1,kcoupl + mw = mt + spl%sp2%dft%mode_couplings(kc) + IF(mw.LT.kmin .OR. mw.GT.kmax) CYCLE +!------------- + contrib = (0.0d0, 0.0d0) + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & ft_fun2(mw,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,kc,ig1,ig2) * & + & CONJG(ft_fun2(mt,idert(iterm,2,ig1,ig2),ig2)) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) /REAL(n2,8) + END DO + END DO + END DO + irow = (igw1-1)*dk + (mw-kmin)+1 ! Number first mode m then radial coord. + jcol = (igt1-1)*dk + (mt-kmin)+1 + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, ft_fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) + DEALLOCATE(fft_temp) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + USE fft + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE COMPLEX, INTENT(out) :: c(:,:) +! + DOUBLE PRECISION :: zcoef, dy + INTEGER :: j, k, kc, kp +! +! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy +! C(x,y) = 1 + epsilon*x*cos(y) +! + dy = spl%sp2%dft%dx + kc = SIZE(spl%sp2%dft%mode_couplings) + DO j=0,n2-1 + fft_temp(j) = 1.0d0+epsi*x*COS(y+j*dy) + END DO + CALL fourcol(fft_temp,1) + DO k=1,kc + kp = spl%sp2%dft%mode_couplings(k) + IF(kp.LT.0) kp=kp+n2 + c(1,k) = x*fft_temp(kp) + c(2,k) = fft_temp(kp)/x + END DO +!!$ WRITE(*,'(a/(10(1pe12.4)))') 'fft_temp', ABS(fft_temp) +!!$ WRITE(*,'(a/(10(1pe12.4)))') 'c1', ABS(c(1,:)) +!!$ WRITE(*,'(a/(10(1pe12.4)))') 'c2', ABS(c(2,:)) +! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, epsi, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + INTEGER, INTENT(in) :: mbess + DOUBLE PRECISION, INTENT(in) :: epsi + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x, y, m) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(in) :: m +! + DOUBLE PRECISION :: xm + xm = REAL(m,8) + rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + & + & epsi*x*( & + & ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + & + & (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) & + & )) + END FUNCTION rhseq + END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, dft) +! +! Apply BC on matrix +! + TYPE(zwsmp_mat), INTENT(inout) :: mat + TYPE(dftmap), INTENT(in) :: dft + INTEGER :: nrank, k, kmin, kmax, dk, i + DOUBLE COMPLEX :: arr(mat%rank) +!=========================================================================== +! 1.0 Prologue +! + nrank = mat%rank + kmin = dft%kmin + kmax = dft%kmax + dk = dft%dk +!=========================================================================== +! 2.0 BC at the axis +! +! zero for non-zero modes +! + DO k=kmin,kmax + IF(k.NE.0) THEN + i = k-kmin+1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END IF + END DO +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO i = nrank, nrank-dk+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, dft) +! +! Apply BC on RHS +! + DOUBLE COMPLEX, INTENT(inout) :: rhs(:) + TYPE(dftmap), INTENT(in) :: dft + INTEGER :: nrank, kmin, kmax, dk, k +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) + kmin = dft%kmin + kmax = dft%kmax + dk = dft%dk +!=========================================================================== +! 2.0 BC at the axis +! +! zero for non-zero modes +! + DO k=kmin,kmax + IF(k.NE.0) rhs(k-kmin+1) = 0.0 + END DO +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-dk+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE spectrum0(spl, carr, xpt, cspec) +! +! DFT modes at xpt (integration on the first interval) +! + DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) + DOUBLE PRECISION, PARAMETER :: pi = 3.141592653589793d0 + TYPE(spline2d), INTENT(in) :: spl + DOUBLE COMPLEX, INTENT(in) :: carr(:) + DOUBLE PRECISION, INTENT(in) :: xpt + DOUBLE COMPLEX, INTENT(out) :: cspec(:) +! + INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 + INTEGER :: k, kmin, kmax, dk, kk + INTEGER :: ng2, ig2 + INTEGER, ALLOCATABLE :: left2(:) + DOUBLE PRECISION :: temp(1) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:) + DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), psi(:), coefs(:,:) +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + kmin = spl%sp2%dft%kmin + kmax = spl%sp2%dft%kmax + dk = spl%sp2%dft%dk +! + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(left2(ng2)) + ALLOCATE(xg2(ng2), wg2(ng2)) + ALLOCATE(ft_fun2(kmin:kmax,1,ng2)) ! DFT of splines + ALLOCATE(psi(kmin:kmax)) +! +! Integration over first interval +! + left2 = 1 + CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2) + CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2) + psi = (0.0d0,0.0d0) + DO k=kmin,kmax + DO ig2=1,ng2 + psi(k) = psi(k) + wg2(ig2)*EXP(k*ci*xg2(ig2))*CONJG(ft_fun2(k,1,ig2)) + END DO + END DO +! + ALLOCATE(coefs(dk,ndim1)) + coefs = RESHAPE(carr, SHAPE(coefs)) + temp = xpt + DO kk=kmin,kmax + k=kk-kmin+1 + coefs(k,:) = psi(kk)*coefs(k,:) + CALL gridval(spl%sp1, temp, cspec(k:k), 0, coefs(k,:)) + END DO + cspec = cspec/(2.0d0*pi) +! + DEALLOCATE(left2) + DEALLOCATE(xg2, wg2) + DEALLOCATE(ft_fun2) + DEALLOCATE(psi) + DEALLOCATE(coefs) + END SUBROUTINE spectrum0 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE spectrum1(spl, carr, xpt, ypt0, cspec) +! +! DFT modes at xpt (at the initial ypt0) +! + DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) +! + TYPE(spline2d), INTENT(in) :: spl + DOUBLE COMPLEX, INTENT(in) :: carr(:) + DOUBLE PRECISION, INTENT(in) :: xpt, ypt0 + DOUBLE COMPLEX, INTENT(out) :: cspec(:) +! + INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 + INTEGER :: k, kmin, kmax, dk + DOUBLE PRECISION :: temp(1) + DOUBLE COMPLEX :: ctemp(1) + DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:), coefs(:,:) +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + kmin = spl%sp2%dft%kmin + kmax = spl%sp2%dft%kmax + dk = spl%sp2%dft%dk +! +! DFT of splines at ypt0 + ALLOCATE(ft_fun2(kmin:kmax,1)) + ALLOCATE(coefs(kmin:kmax,ndim1)) + CALL ft_basfun(ypt0, spl%sp2, ft_fun2, 1) + coefs = RESHAPE(carr, SHAPE(coefs)) +! + temp = xpt + DO k=kmin,kmax + CALL gridval(spl%sp1, temp, ctemp, 0, coefs(k,:)) + cspec(k-kmin+1) = CONJG(ft_fun2(k,1))*ctemp(1)*EXP(k*ci*ypt0) + END DO + cspec = cspec/REAL(n2,8) +! + DEALLOCATE(ft_fun2) + DEALLOCATE(coefs) + END SUBROUTINE spectrum1 +!!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE spectrum2(spl, xpt, ypt0, cspec) +! +! DFT modes at xpt (at the initial ypt0) +! + USE fft + USE bsplines +! + DOUBLE COMPLEX, PARAMETER :: ci = (0.0d0,1.0d0) +! + TYPE(spline2d) :: spl + DOUBLE PRECISION, INTENT(in) :: xpt, ypt0 + DOUBLE COMPLEX, INTENT(out) :: cspec(:) +! + INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2 + INTEGER :: k, kmin, kmax, dk + DOUBLE PRECISION, ALLOCATABLE :: ypt(:), fun(:,:) + DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:) + DOUBLE PRECISION :: temp(1) +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + kmin = spl%sp2%dft%kmin + kmax = spl%sp2%dft%kmax + dk = spl%sp2%dft%dk +! + ALLOCATE(ypt(0:n2-1)) + ALLOCATE(fun(1, 0:n2-1)) + ALLOCATE(ft_fun(0:n2-1)) +! +! Function values at points ypt +! + ypt(0:n2-1) = ypt0 + spl%sp2%knots(0:n2-1) + temp = xpt + CALL gridval(spl, temp, ypt, fun, (/0,0/)) + ft_fun = fun(1,:) +! +! Discrete Fourier Transform +! + CALL fourcol(ft_fun, 1) + DO k=kmin,kmax + IF(k.LT.0) THEN + cspec(k-kmin+1) = ft_fun(k+n2)*EXP(k*ci*ypt0) + ELSE + cspec(k-kmin+1) = ft_fun(k)*EXP(k*ci*ypt0) + END IF + END DO + cspec = cspec/REAL(n2,8) +! + DEALLOCATE(ypt) + DEALLOCATE(fun) + DEALLOCATE(ft_fun) + END SUBROUTINE spectrum2 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_sym_wsmp_dft_mod +PROGRAM main + USE pde2d_sym_wsmp_dft_mod + USE futils + USE fft +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms, kmin, kmax, dk + INTEGER :: n_mode_couplings + INTEGER, ALLOCATABLE :: mode_couplings(:) + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, nrank_full, jder(2), it, i0, i0_r + INTEGER :: k, kp, ik + DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + DOUBLE COMPLEX, ALLOCATABLE :: crhs(:), crhs_r(:), csol(:), csol_r(:) + TYPE(spline2d) :: splxy + TYPE(zwsmp_mat) :: mat +! + CHARACTER(len=128) :: file='pde2d_sym_wsmp_dft.h5' + INTEGER :: fid + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: arr, srow + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tfour, tfour0, tgrid, gflops1 + INTEGER :: nits=100 + LOGICAL :: nlmetis, nlforce_zero, nlpos +! + DOUBLE PRECISION :: xpt, ypt0 + DOUBLE COMPLEX, ALLOCATABLE :: cspec0(:), cspec(:), energy_k(:) + DOUBLE COMPLEX :: energy, energy_exact +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, kmin, kmax, mbess, epsi, & + & nlppform, nlmetis, nlforce_zero, nlpos, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + kmin = -3 ! Minimum Fourier mode number + kmax = 3 ! Maximum Fourier mode number + mbess = 2 ! Exponent of differential problem + epsi = 0.5 ! Non-uniformity in the Laplacian coefficicient + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlmetis = .FALSE. ! Use metis ordering or minimum degree + nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele + nlpos = .TRUE. ! Matrix is positive definite + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +!! +! Read table of mode couplings +! + READ(*,*) n_mode_couplings + ALLOCATE(mode_couplings(n_mode_couplings)) + READ(*,*) mode_couplings + WRITE(*,'(/a/(20i4))') 'Mode couplings', mode_couplings +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! +! Exact energy +! + energy_exact = 2.0d0*pi/REAL(2+mbess,8) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) + CALL attach(fid, '/', 'EPSI', epsi) + CALL attach(fid, '/', 'KMIN', kmin) + CALL attach(fid, '/', 'KMAX', kmax) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, & + & (/.FALSE., .TRUE./)) +! +! Init DFT for spline in 2nd direction +! + CALL init_dft(splxy%sp2, kmin, kmax, mode_couplings) + dk = splxy%sp2%dft%dk +! +! FE matrix assembly +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + nrank = (nx+nidbas(1))*dk ! Rank of restricted matrix + nrank_full = (nx+nidbas(1))*ny ! Rank of full matrix +! + ALLOCATE(rhs(nrank_full), sol(nrank_full)) + ALLOCATE(crhs(nrank_full), csol(nrank_full)) + ALLOCATE(crhs_r(nrank), csol_r(nrank)) +! + WRITE(*,'(a,i8)') 'nrank_full', nrank_full + WRITE(*,'(a,i8)') 'nrank ', nrank +! + t0 = seconds() + CALL init(nrank, nterms, mat, nlherm=.TRUE., nlpos=nlpos) + CALL dismat(splxy, epsi, mat) + ALLOCATE(arr(nrank)) + ALLOCATE(srow(nrank)) + DO i=1,nrank + CALL getrow(mat, i, arr) + srow(i) = SUM(arr) + END DO +!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows before BC', REAL(srow) +!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows before BC', AIMAG(srow) + PRINT*, 'Sum of mat before BC', SUM(srow) +! +! BC on Matrix +! + WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero + WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + CALL ibcmat(mat, splxy%sp2%dft) + tmat = seconds() - t0 + DO i=1,nrank + CALL getrow(mat, i, arr) + srow(i) = SUM(arr) + END DO +!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows after BC', REAL(srow) +!!$ WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows after BC', AIMAG(srow) + PRINT*, 'Sum of mat after BC', SUM(srow) +! + WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() +! +! RHS assembly +! + CALL disrhs(mbess, epsi, splxy, rhs) +! +! Init FFT +! + t0 = seconds() + CALL fourcol(crhs(1:ny),1) + CALL fourcol(crhs(1:ny),-1) + tfour0 = seconds()-t0 + crhs = crhs/REAL(ny,8) +! +! DFT of RHS +! + t0 = seconds() + crhs = rhs + DO i=1,nx+nidbas(1) + i0 = (i-1)*ny + CALL fourcol(crhs(i0+1:i0+ny), 1) + END DO + tfour = seconds()-t0 +! +! Restriction in Fourier space +! k = kmin:kmax (restricted) +! kp = 0:ny-1 (full) +! + DO i=1,nx+nidbas(1) + i0 = (i-1)*ny + i0_r = (i-1)*dk + DO k=kmin,kmax + kp = k + IF(kp.LT.0) kp = kp+ny + crhs_r(i0_r+k-kmin+1) = crhs(i0+kp+1) + END DO + END DO +! +! BC on RHS +! + CALL ibcrhs(crhs_r, splxy%sp2%dft) +! + IF(nrank.LT.100) THEN + WRITE(*,'(a/(10(1pe12.3)))') 'Real of crhs', REAL(crhs) + WRITE(*,'(a/(10(1pe12.3)))') 'Imag of crhs', AIMAG(crhs) + END IF +!=========================================================================== +! 3.0 Solve the dicretized system +! +! Matrix factorization +! + t0 = seconds() +!!$ CALL factor(mat) + CALL to_mat(mat) + CALL reord_mat(mat); CALL putmat(fid, '/MAT1', mat) + CALL numfact(mat) + tfact = seconds() - t0 + DO i=1,nrank + CALL getrow(mat, i, arr) + srow(i) = SUM(arr) + END DO + PRINT*, 'Sum of mat after factor', SUM(srow) + PRINT*, 'iparm(64) after factor', mat%p%iparm(64) + + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() + WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24) + WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9 + gflops1 = mat%p%dparm(23) / tfact / 1.d9 +! +! Backsolve +! + t0 = seconds() + PRINT*, 'SUM of crhs_r', SUM(crhs_r) + CALL bsolve(mat, crhs_r, csol_r) + tsolv = seconds() - t0 + WRITE(*,'(a,1pe12.4)') 'Residue =', cnorm2(vmx(mat,csol_r)-crhs_r) + PRINT*, 'SUM of csol_r', SUM(csol_r) + PRINT*, 'iparm(64) after bsolve', mat%p%iparm(64) + PRINT*, 'Residue from WSMP', mat%p%dparm(7) + WRITE(*,'(a/(20i4))') 'iparm', mat%p%iparm +! + CALL putarr(fid, '/FT_RHS', crhs_r, 'DFT of RHS') + CALL putarr(fid, '/FT_SOL', csol_r, 'DFT of Spline coefficients') +!=========================================================================== +! 4.0 Perform some diagnostics in Fourier space +! +! Fourier spectrum at xpt +! + xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8)) + ALLOCATE(cspec0(dk)) + CALL spectrum0(splxy, csol_r, xpt, cspec0) + WRITE(*,'(/a,f10.5)') 'DFT spectrum (by integration) at x = ', xpt + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) + END DO +! + ypt0 = 0.0d0 + WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0 + CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0) + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) + END DO + ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval + WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0 + CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0) + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1)) + END DO +! +! Spectral energy +! + WRITE(*,'(/a)') 'Spectral energies' + ALLOCATE(energy_k(kmin:kmax)) + energy_k = (0.0d0,0.0d0) + DO i=1,dimx + i0_r = (i-1)*dk + DO k=kmin,kmax + ik = i0_r+k-kmin+1 + energy_k(k) = energy_k(k) + csol_r(ik)*CONJG(crhs_r(ik)) + END DO + END DO + energy_k = energy_k/REAL(ny,8) + energy = SUM(energy_k) + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, energy_k(k), ABS(energy_k(k)) + END DO + WRITE(*,'(a5,4(1pe15.3))') 'Sum', energy, ABS(energy), REAL(energy-energy_exact) +! + CALL putarr(fid, '/ENERGY_K', energy_k, 'Spectral energies') +!=========================================================================== +! 5.0 Transform back to real space +! +! Expand to full Fourier space +! k = kmin:kmax (restricted) +! kp = 0:ny-1 (full) +! + crhs = (0.0d0,0.0d0) + DO i=1,nx+nidbas(1) + i0 = (i-1)*ny + i0_r = (i-1)*dk + DO k=kmin,kmax + kp = k + IF(kp.LT.0) kp = kp+ny + csol(i0+kp+1) = csol_r(i0_r+k-kmin+1) + END DO + END DO +! +! Fourier transform back to real space +! + t0 = seconds() + DO i=1,nx+nidbas(1) + i0 = (i-1)*ny + CALL fourcol(csol(i0+1:i0+ny),-1) + END DO + sol = REAL(csol)/REAL(ny,8) + tfour = tfour + seconds()-t0 +! +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +! +! Total energy +! + WRITE(*,'(/a, 2(1pe15.3))') 'Total energy and error(real space)', & + & DOT_PRODUCT(rhs,sol), & + & DOT_PRODUCT(rhs,sol)-REAL(energy_exact) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() +!=========================================================================== +! 6.0 Check the solution +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval +! + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! +! Fourier spectrum at xpt +! + xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8)) + ALLOCATE(cspec(dk)) +! + ypt0 = 0.0d0 + WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0 + CALL spectrum2(splxy, xpt, ypt0, cspec) + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1)) + END DO +! + ypt0 = splxy%sp2%dft%dx/2.0d0 ! Center of first interval + WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0 + CALL spectrum2(splxy, xpt, ypt0, cspec) + DO k=kmin,kmax + WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1)) + END DO +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'Init FFT time (s) ', tfour0 + WRITE(*,'(a,1pe12.3)') 'FFT time (s) ', tfour + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(cspec0, cspec) + DEALLOCATE(mode_couplings) + DEALLOCATE(xgrid, ygrid, rhs, sol) + DEALLOCATE(crhs, csol) + DEALLOCATE(crhs_r, csol_r) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + DEALLOCATE(srow) + DEALLOCATE(energy_k) + CALL destroy_sp(splxy) + CALL destroy(mat) +! + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +! + FUNCTION cnorm2(x) + DOUBLE COMPLEX, INTENT(in) :: x(:) + DOUBLE PRECISION :: cnorm2 + cnorm2 = SQRT(DOT_PRODUCT(x,x)) + END FUNCTION cnorm2 +! + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/pde2d_wsmp.f90 b/examples/pde2d_wsmp.f90 new file mode 100644 index 0000000..6a20630 --- /dev/null +++ b/examples/pde2d_wsmp.f90 @@ -0,0 +1,711 @@ +!> +!> @file pde2d_wsmp.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Solving the 2d PDE using splines and WSMP non-symmetric matrix +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! +MODULE pde2d_wsmp_mod + USE bsplines + USE wsmp_bsplines + IMPLICIT NONE +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(wsmp_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + left2 = j + CALL basfun(xg2, spl%sp2, fun2, left2) +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 +!------------- + contrib = 0.0d0 + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,ig1,ig2) * & + & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) + END DO + END DO + END DO + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 + ! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq + END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(wsmp_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + zsum(:) = zsum(:) + arr(:) + END DO + CALL putrow(mat, ny, zsum) +! +! The horizontal sum on the NY-th column +! + zsum = 0.0d0 + DO j=1,ny + arr = 0.0d0 + CALL getcol(mat, j, arr) + zsum(ny:) = zsum(ny:) + arr(ny:) + END DO + CALL putcol(mat, ny, zsum) +! +! The away operator +! + DO j = 1,ny-1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO j = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_wsmp_mod +PROGRAM main + USE pde2d_wsmp_mod + USE futils +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol + TYPE(spline2d) :: splxy + TYPE(wsmp_mat) :: mat +! + CHARACTER(len=128) :: file='pde2d_wsmp.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + DOUBLE PRECISION :: tconv, treord + INTEGER :: nits=100 + LOGICAL :: nlforce_zero + LOGICAL :: nlserial +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, & + & nlforce_zero, nlserial, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + WRITE(*,'(a,i8)') 'nrank', nrank +! + CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero) + CALL dismat(splxy, mat) + ALLOCATE(arr(nrank)) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr + END IF +! +! BC on Matrix +! + WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero + WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + CALL ibcmat(mat, ny) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr + WRITE(*,'(a)') 'Last rows' + DO i=nrank-ny,nrank + CALL getrow(mat, i, arr) + WRITE(*,'(10(1pe12.3))') arr + END DO + END IF + tmat = seconds() - t0 +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, ny) +! + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL to_mat(mat) + tconv = seconds() -t0 + WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) + WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() +! + t0 = seconds() + CALL reord_mat(mat) + CALL putmat(fid, '/MAT', mat) + treord = seconds() - t0 +! + t0 = seconds() + CALL numfact(mat) + tfact = seconds() - t0 + + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() + WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24) + WRITE(*,'(a,1pe12.3)') 'Number of factorization GFLOPS = ',mat%p%dparm(23)/1.d9 + gflops1 = mat%p%dparm(23) / tfact / 1.d9 +! + CALL bsolve(mat, rhs, sol) + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL bsolve(mat, rhs, sol) + sol(1:ny-1) = sol(ny) + END DO + WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(26) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() + tsolv = (seconds() - t0)/REAL(nits) +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv + WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv + WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +!=========================================================================== +! 5.0 Clear the matrix and recompute +! + WRITE(*,'(/a)') 'Recompute the solver ...' + t0 = seconds() + CALL clear_mat(mat) + CALL dismat(splxy, mat) + CALL ibcmat(mat, ny) + tmat = seconds()-t0 +! + t0 = seconds() +!!$ CALL numfact(mat) + CALL factor(mat, nlreord=.FALSE.) + tfact = seconds()-t0 + gflops1 = mat%p%dparm(23) / tfact / 1.d9 +! + t0 = seconds() + ALLOCATE(newsol(nrank)) + CALL bsolve(mat, rhs, newsol) + newsol(1:ny-1) = newsol(ny) + tsolv = seconds()-t0 +! + WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +! + DEALLOCATE(newsol) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + CALL destroy_sp(splxy) + CALL destroy(mat) +! + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/examples/pde3d.f90 b/examples/pde3d.f90 new file mode 100644 index 0000000..9fed6e9 --- /dev/null +++ b/examples/pde3d.f90 @@ -0,0 +1,396 @@ +!> +!> @file pde3d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 3d PDE using splines: +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n +! + USE futils + USE fft + USE pde3d_mod +! + IMPLICIT NONE + INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms + LOGICAL :: nlppform + INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku + INTEGER :: jder(3), it + DOUBLE PRECISION :: pi, coefx(5) + DOUBLE PRECISION :: dy, dz + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol + DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: crhs +! + TYPE(spline2d1d), TARGET :: splxyz + TYPE(spline2d), POINTER :: splxy + TYPE(gbmat) :: mat +! + CHARACTER(len=128) :: file='pde3d.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + INTEGER :: nits=500 +! + INTEGER, PARAMETER :: npart=10 + DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal +! + INTEGER :: kmin, kmax + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass_shifted +! + NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nz = 8 ! Number of intervals in z + nidbas = (/3,3,3/) ! Degree of splines + ngauss = (/4,4, 4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + npow = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz)) +! + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) +! + dy = 2.d0*pi/REAL(ny,8) ! Equidistant in y + ygrid = (/ (j*dy, j=0,ny) /) +! + dz = 2.0d0*pi/REAL(nz,8) ! Equidistant in z + zgrid = (/ (k*dz, k=0,nz) /) +! + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) + WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NZ', nz) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NIDBAS3', nidbas(3)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(3)) + CALL attach(fid, '/', 'MBESS', mbess) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid, splxyz, & + & (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform) + splxy => splxyz%sp12 +! + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Z', splxyz%sp3%knots +! +! 2D FE matrix assembly (in plane x-y) +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals + ku = kl ! Number of super-diagnonals + WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku +! + CALL init(kl, ku, nrank, nterms, mat) + CALL dismat(splxy, mat) +!!$ CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice') + ALLOCATE(arr(nrank)) +! +! BC on Matrix +! + CALL ibcmat(mat, ny) + tmat = seconds() - t0 +! +! 3D RHS assembly +! + ALLOCATE(rhs(nrank,0:nz-1), sol(nrank,0:nz-1)) + CALL disrhs3(mbess, npow, splxyz, rhs) +! +! FFT in z of RHS +! + ALLOCATE(crhs(nrank,0:nz-1)) + crhs = rhs + CALL fourrow(crhs, 1) + crhs = crhs/REAL(nz,8) +! +! Apply Mass matrix to crhs +! + kmin =-nz/2 + kmax = nz/2-1 + CALL init_dft(splxyz%sp3, kmin, kmax) + ALLOCATE(fftmass_shifted(kmin:kmax)) + ALLOCATE(fftmass(0:nz-1)) + CALL calc_fftmass(splxyz%sp3, fftmass_shifted) + DO k=kmin,kmax + fftmass(MODULO(k+nz,nz)) = fftmass_shifted(k) + END DO + DO k=0,nz-1 + crhs(:,k) = crhs(:,k)/fftmass(k) + END DO +! +! Fourier transform back crhs to real space in z +! + CALL fourrow(crhs, -1) + rhs(:,:) = REAL(crhs(:,:),8) +! +! BC on RHS +! + CALL ibcrhs3(rhs, ny) +! + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL factor(mat) + tfact = seconds() - t0 + gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 + + t0 = seconds() + CALL bsolve(mat, rhs, sol) +! +! Backtransform of solution +! + DO k=0,nz-1 + sol(1:ny-1,k) = sol(ny,k) + END DO + tsolv = seconds() - t0 + gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +! +! Spline coefficients, taking into account of periodicity in y and z +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + dimz = splxyz%sp3%dim + WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + DO k=0,dimz-1 + kk = MODULO(k,nz) + bcoef(i,j,k) = sol(ij,kk) + END DO + END DO + END DO + CALL putarr(fid, '/BCOEF', bcoef, 'Spline coefficients of solution') +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + WRITE(*,'(/a)') 'Check with analytical solutions ...' + CALL RANDOM_NUMBER(xp) + yp=0.0d0 + zp=0.0d0 + jder = (/0,0,0/) + CALL gridval(splxyz, xp, yp, zp, fp_calc, jder, bcoef) +!!$ WRITE(*,'(4a12)') 'X', 'CALC', 'ANAL', 'ERROR' +!!$ DO i=1,npart +!!$ fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess & +!!$ & * COS(mbess*yp(i)) * COS(zp(i))**npow +!!$ WRITE(*,'(4(1pe12.3))') xp(i), fp_calc(i), fp_anal(i), fp_calc(i)-fp_anal(i) +!!$ END DO +! + ALLOCATE(solcal(0:nx,0:ny,0:nz)) + ALLOCATE(solana(0:nx,0:ny,0:nz)) + ALLOCATE(errsol(0:nx,0:ny,0:nz)) + DO i=0,nx + DO j=0,ny + DO k=0,nz + solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess & + & * COS(mbess*ygrid(j)) * COS(zgrid(k))**npow + END DO + END DO + END DO +! + jder = (/0,0,0/) + CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder, bcoef) + t0 = seconds() + CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder) + tgrid = seconds()-t0 + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/zgrid', zgrid, '\phi') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') +! +! Check derivative d/dx +! + DO i=0,nx + DO j=0,ny + DO k=0,nz + IF( mbess .EQ. 0 ) THEN + solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k))**npow + ELSE + solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * & + & COS(zgrid(k))**npow + END IF + END DO + END DO + END DO +! + jder = (/1,0,0/) + t0 = seconds() + CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder) + tgrid = tgrid + seconds()-t0 + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + CALL putarr(fid, '/derivx_exact', solana) + WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', norm2(errsol)/norm2(solana) +! +! Check derivative d/dy +! + DO i=0,nx + DO j=0,ny + DO k=0,nz + solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * & + & SIN(mbess*ygrid(j))* COS(zgrid(k))**npow + END DO + END DO + END DO +! + jder = (/0,1,0/) + t0 = seconds() + CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder) + tgrid = tgrid + seconds()-t0 + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + CALL putarr(fid, '/derivy_exact', solana) + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', norm2(errsol)/norm2(solana) +! +! Check derivative d/dz +! + DO i=0,nx + DO j=0,ny + DO k=0,nz + solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess & + & * COS(mbess*ygrid(j)) * COS(zgrid(k))**(npow-1) & + & * SIN(zgrid(k)) + END DO + END DO + END DO +! + jder = (/0,0,1/) + t0 = seconds() + IF(nlppform) THEN + CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder) + ELSE + CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder, bcoef) + END IF + tgrid = tgrid + seconds()-t0 + CALL putarr(fid, '/derivz', solcal, 'd/dz of solutions') + CALL putarr(fid, '/derivz_exact', solana) + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', norm2(errsol)/norm2(solana) +!=========================================================================== +! 9.0 Epilogue +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'gridval time (s) ', tgrid + WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 + WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() +! + DEALLOCATE(xgrid, ygrid, zgrid) + DEALLOCATE(rhs, sol) + DEALLOCATE(crhs) + DEALLOCATE(fftmass) + DEALLOCATE(fftmass_shifted) + DEALLOCATE(bcoef) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(arr) + CALL destroy_sp(splxyz) + CALL destroy(mat) +! + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + DO k=1,SIZE(x,3) + sum2 = sum2 + x(i,j,k)**2 + END DO + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ diff --git a/examples/pde3d_mod.f90 b/examples/pde3d_mod.f90 new file mode 100644 index 0000000..79c3edf --- /dev/null +++ b/examples/pde3d_mod.f90 @@ -0,0 +1,397 @@ +!> +!> @file pde3d_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE pde3d_mod + USE bsplines + USE matrix + IMPLICIT NONE +! +CONTAINS + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms)) +! + ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1)) ! +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs) + DO iterm=1,kterms + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 + contrib = fun1(iw1,iderw(iterm,1)) * & + & fun2(iw2,iderw(iterm,2)) * & + & coefs(iterm) * & + & fun2(it2,idert(iterm,2)) * & + & fun1(it1,idert(iterm,1)) * & + & wg1(ig1) * wg2(ig2) + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs3(mbess, npow, spl, rhs) +! +! Assembly the RHS using 3d spline spl +! + INTEGER, INTENT(in) :: mbess, npow + TYPE(spline2d1d), TARGET :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:,:) +! + TYPE(spline1d), POINTER :: sp1, sp2, sp3 + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: n3, nidbas3, ndim3, ng3 + INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + sp1 => spl%sp12%sp1 + sp2 => spl%sp12%sp2 + sp3 => spl%sp3 +! + CALL get_dim(sp1, ndim1, n1, nidbas1) + CALL get_dim(sp2, ndim2, n2, nidbas2) + CALL get_dim(sp3, ndim3, n3, nidbas3) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(sp1, ng1) + CALL get_gauss(sp2, ng2) + CALL get_gauss(sp3, ng3) + WRITE(*,'(/a, 3i3)') 'Gauss points and weights, ngauss =', ng1, ng2, ng3 + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng2), wg2(ng2)) + ALLOCATE(xg3(ng3), wg3(ng3)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs,1) + rhs(1:nrank,1:n3) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), sp2, fun2, j) + DO k=1,n3 + CALL get_gauss(sp3, ng3, k, xg3, wg3) + DO ig3=1,ng3 + CALL basfun(xg3(ig3), sp3, fun3, k) + contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * & + & rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + DO k3=0,nidbas3 + kk = MODULO(k+k3-1,n3) + 1 + rhs(ij,kk) = rhs(ij, kk) + & + & contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1) + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(xg3, wg3, fun3) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n) + DOUBLE PRECISION, INTENT(in) :: x1, x2, x3 + INTEGER, INTENT(in) :: m, n + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n + END FUNCTION rhseq + END SUBROUTINE disrhs3 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) + CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist + END SUBROUTINE meshdist +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(gbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: kl, ku, nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + kl = mat%kl + ku = mat%ku + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + DO j=1,ny+ku + zsum(j) = zsum(j) + arr(j) + END DO + END DO + CALL putrow(mat, ny, zsum) +! +! The horizontal sum on the NY-th column +! + zsum = 0.0d0 + DO j=1,ny + arr = 0.0d0 + CALL getcol(mat, j, arr) + DO i=ny,ny+kl + zsum(i) = zsum(i) + arr(i) + END DO + END DO + CALL putcol(mat, ny, zsum) +! +! The away operator +! + DO j = 1,ny-1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO j = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO + ! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs3(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:,:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, nz, k + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) + nz = SIZE(rhs,2) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + DO k=1,nz + zsum = SUM(rhs(1:ny,k)) + rhs(ny,k) = zsum + rhs(1:ny-1,k) = 0.0d0 + END DO +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO k=1,nz + rhs(nrank-ny+1:nrank,k) = 0.0d0 + END DO + END SUBROUTINE ibcrhs3 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde3d_mod diff --git a/examples/poisson_mumps.f90 b/examples/poisson_mumps.f90 new file mode 100644 index 0000000..941a715 --- /dev/null +++ b/examples/poisson_mumps.f90 @@ -0,0 +1,169 @@ +!> +!> @file poisson_mumps.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main + USE mumps_bsplines + USE cds +! + IMPLICIT NONE + INCLUDE 'mpif.h' + TYPE(mumps_mat) :: amat + TYPE(cds_mat) :: amat_cds + DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:) + INTEGER :: nx=5, ny=4 + INTEGER :: n + INTEGER :: i, j, irow + INTEGER :: ierr, me + INTEGER, ALLOCATABLE :: dists(:) + DOUBLE PRECISION :: t0 +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! + IF(me.EQ.0) THEN + WRITE(*,'(a)', advance='no') 'Enter nx, ny: ' + READ(*,*) nx, ny + END IF + CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + n = nx*ny ! Rank of the matrix + ALLOCATE(rhs(n)) + ALLOCATE(sol(n)) + ALLOCATE(arow(n)) +! + WRITE(*,'(/a)') 'Mumps using CSR mat ...' + CALL init(n, 1, amat) +! +! Construct the matrix and RHS +! + t0 = mpi_wtime(0) + DO j=1,ny + DO i=1,nx + arow = 0.0d0 + irow = numb(i,j) + arow(irow) = 4.0d0 + IF(i.GT.1) arow(numb(i-1,j)) = -1.0d0 + IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0 + IF(j.GT.1) arow(numb(i,j-1)) = -1.0d0 + IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0 + CALL putrow(amat, irow, arow) + rhs(irow) = SUM(arow) ! => the exact solution is 1 + END DO + END DO +! + WRITE(*,'(a,i6)') 'Rank of matrix', n + WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat) + WRITE(*,'(a,1pe12.3)') 'Matrix construction time (s)', mpi_wtime()-t0 +! +! Factor the amat matrix (Reordering, symbolic and numerical factorization) +! + t0 = mpi_wtime(0) + CALL factor(amat, nlmetis=.TRUE.) + sol=rhs + CALL bsolve(amat, sol) + WRITE(*,'(a,1pe12.3)') 'Direct solve time (s)', mpi_wtime()-t0 +! + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0)) + END IF + CALL destroy(amat) +! +! CDS matrix +! + WRITE(*,'(/a)') 'Mumps using CDS mat ...' + IF(ALLOCATED(dists)) DEALLOCATE(dists) + ALLOCATE(dists(-2:2)) + dists = [-nx, -1, 0, 1, nx] + WRITE(*,'(a/(20i4))') 'dists used in INIT =', dists + CALL init(n, dists, 1, amat_cds) +! + t0 = mpi_wtime(0) + DO j=1,ny + DO i=1,nx + arow = 0.0d0 + irow = numb(i,j) + amat_cds%val(irow,0) = 4.0d0 + IF(i.GT.1) amat_cds%val(irow,-1) = -1.0d0 + IF(i.LT.nx) amat_cds%val(irow,+1) = -1.0d0 + IF(j.GT.1) amat_cds%val(irow,-2) = -1.0d0 + IF(j.LT.ny) amat_cds%val(irow,+2) = -1.0d0 + END DO + END DO + WRITE(*,'(a,1pe12.3)') 'Matrix construction time (s)', mpi_wtime()-t0 +! +! Compute dists of amat + PRINT*, 'stat of mata%mat', ASSOCIATED(amat%mat) + PRINT*, 'rank of mata', amat%mat%rank + CALL mstruct(amat%mat, dists) + WRITE(*,'(A/(20i4))') 'dists from MSTRUCT=', dists +! + t0 = mpi_wtime(0) + CALL cds2mumps(amat_cds, amat) + CALL factor(amat, debug=.FALSE.) + sol = rhs + CALL bsolve(amat, sol) + WRITE(*,'(a,1pe12.3)') 'Direct solve time (s)', mpi_wtime()-t0 +! + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0)) + END IF +! +! Clean up +! + DEALLOCATE(rhs) + DEALLOCATE(sol) + DEALLOCATE(arow) + CALL destroy(amat) + CALL mpi_finalize(ierr) +CONTAINS + SUBROUTINE mstruct(mat, dists) + TYPE(spmat), INTENT(in) :: mat + INTEGER, ALLOCATABLE, INTENT(inout) :: dists(:) + TYPE(elt), POINTER :: t + INTEGER :: n, i, j0 + j0 = LBOUND(dists,1) + n = mat%rank + PRINT*, 'rank of mat', n + DO i=1,n ! scan the matrix rows + t => mat%row(i)%row0 + DO WHILE(ASSOCIATED(t)) ! walk thru the linked list row(i) + j = t%index + IF(ABS(t%val) .LE. EPSILON(0.0d0)) THEN + dists(j0) = t%index-i ! distance from main diag + j0 = j0+1 + END IF + t => t%next + END DO + END DO + END SUBROUTINE mstruct + INTEGER FUNCTION numb(i,j) +! +! One-dimensional numbering +! Number first x then y +! + INTEGER, INTENT(in) :: i, j + numb = (j-1)*nx + i + END FUNCTION numb +END PROGRAM main diff --git a/examples/poisson_petsc.f90 b/examples/poisson_petsc.f90 new file mode 100644 index 0000000..341afba --- /dev/null +++ b/examples/poisson_petsc.f90 @@ -0,0 +1,218 @@ +!> +!> @file poisson_petsc.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main + USE petsc_bsplines + IMPLICIT NONE + TYPE(petsc_mat) :: amat + DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:) + INTEGER :: nx=5, ny=4, ntrials=10 + INTEGER :: nitmax=10000, nits + DOUBLE PRECISION :: rtol=1.e-9 + INTEGER :: n, nnz, nnz_loc + INTEGER :: i, j, irow, jcol + INTEGER :: ierr, me, npes, istart, iend + DOUBLE PRECISION :: t0 + INTEGER :: ncols, cols(5) ! Max nnz by row .LE. 5 +! + CHARACTER(len=128) :: matfile='mat.dat', rhsfile='rhs.dat' + LOGICAL :: file_exist +! + NAMELIST /newrun/ nx, ny, nitmax, rtol, matfile, rhsfile +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) +! + IF(me.EQ.0) THEN + READ(*, newrun) + WRITE(*, newrun) + WRITE(*,'(a,i6)') 'npes =', npes + END IF + CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nitmax, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(rhsfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) +! + n = nx*ny ! Rank of the matrix +! +! Initialize matrix +! + CALL init(n, 1, amat, comm=MPI_COMM_WORLD) + istart = amat%istart + iend = amat%iend +!!$ WRITE(*,'(a,i3.3,a,3i6)') 'PE', me, ': istart, iend', istart, iend +! +! + INQUIRE(file=TRIM(matfile), exist=file_exist) +! + IF( file_exist ) THEN + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + t0 = mpi_wtime() + CALL load_mat(amat, matfile) + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. loading time (s)', mpi_wtime()-t0 + ELSE +! +! Construct the matrix +! + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + t0 = mpi_wtime() + ALLOCATE(arow(5)) ! Max nnz per row .LE. 5 + DO j=1,ny + DO i=1,nx + irow = numb(i,j) + IF( irow.GE.istart .AND. irow.LE.iend) THEN + ncols = 1; cols(ncols) = irow; arow(ncols) = 4.0d0 + IF(i.GT.1) THEN + ncols = ncols+1 + cols(ncols) = numb(i-1,j); arow(ncols) = -1.0d0 + END IF + IF(i.LT.nx) THEN + ncols = ncols+1 + cols(ncols) = numb(i+1,j); arow(ncols) = -1.0d0 + END IF + IF(j.GT.1) THEN + ncols = ncols+1 + cols(ncols) = numb(i,j-1); arow(ncols) = -1.0d0 + END IF + IF(j.LT.ny) THEN + ncols = ncols+1 + cols(ncols) = numb(i,j+1); arow(ncols) = -1.0d0 + END IF + CALL putrow(amat, irow, arow(1:ncols), cols(1:ncols)) + END IF + END DO + END DO + DEALLOCATE(arow) + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. construction time (s)', mpi_wtime()-t0 +! +! Convert to PETSC mat +! + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + t0=mpi_wtime() + CALL to_mat(amat) + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. conversion time (s)', mpi_wtime()-t0 +! + CALL save_mat(amat, matfile) + END IF +! +! Matrix size and partition could have changed after loading from file! +! + n = amat%rank + istart = amat%istart + iend = amat%iend +! + nnz_loc = get_count(amat) + CALL mpi_reduce(nnz_loc, nnz, 1, MPI_INTEGER, mpi_sum, 0, MPI_COMM_WORLD, ierr) + IF(npes.LE.4) THEN + WRITE(*,'(a,i3.3,a,3i6)') 'PE', me, ': istart, iend (after), nloc, nnz_loc', & + & istart, iend, iend-istart+1, nnz_loc + END IF +! +! Construct or read RHS +! + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + t0 = mpi_wtime() + ALLOCATE(rhs(n)) + INQUIRE(file=TRIM(rhsfile), exist=file_exist) + IF( file_exist ) THEN + OPEN(unit=99, file=TRIM(rhsfile), status='old', form='unformatted') + READ(99) rhs(1:n) + CLOSE(99) + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3)') 'RHS read time (s)', mpi_wtime()-t0 + END IF + ELSE + rhs = 0.0d0 + ALLOCATE(arow(n)) + DO i=istart, iend + arow = 0.0d0 + CALL getrow(amat, i, arow) + rhs(i) = SUM(arow) ! => the exact solution is 1 + END DO + arow = rhs + CALL mpi_allreduce(arow, rhs, n, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr) + DEALLOCATE(arow) + IF( me.EQ.0 ) THEN ! All processes have the gobla rhs + OPEN(unit=99, file=TRIM(rhsfile), status='new', form='unformatted') + WRITE(99) rhs(1:n) + CLOSE(99) + END IF + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3)') 'RHS construction time (s)', mpi_wtime()-t0 + END IF + END IF + CLOSE(99) + CALL mpi_barrier(MPI_COMM_WORLD,ierr) +! +! Back solve +! + ALLOCATE(sol(n)) +! + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + sol = 0.0d0 + t0=mpi_wtime() + CALL bsolve(amat, rhs, sol, rtol, nitmax, nits) + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3,i8,1pe12.3)') 'Error, nits, solve time(s)', & + & MAXVAL(ABS(sol-1.0d0)), nits, mpi_wtime()-t0 + END IF + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + t0=mpi_wtime() + DO i=1,ntrials + sol = 0.0d0 + CALL bsolve(amat, rhs, sol, rtol, nitmax, nits) + END DO + CALL mpi_barrier(MPI_COMM_WORLD,ierr) + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3,i8,1pe12.3)') 'Error, nits, solve time(s)', & + & MAXVAL(ABS(sol-1.0d0)), nits, (mpi_wtime()-t0)/REAL(ntrials) + END IF +! +! Clean up +! + DEALLOCATE(rhs) + DEALLOCATE(sol) + CALL destroy(amat) + CALL PetscFinalize(ierr) + CALL mpi_finalize(ierr) +CONTAINS + INTEGER FUNCTION numb(i,j) +! +! One-dimensional numbering +! Number first x then y +! + INTEGER, INTENT(in) :: i, j + numb = (j-1)*nx + i + END FUNCTION numb +END PROGRAM main diff --git a/examples/ppde3d.f90 b/examples/ppde3d.f90 new file mode 100644 index 0000000..00e4f06 --- /dev/null +++ b/examples/ppde3d.f90 @@ -0,0 +1,510 @@ +!> +!> @file ppde3d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 3d PDE using splines: +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n +! + USE futils + USE fft + USE pputils2, ONLY : pptransp + USE ppde3d_mod +! + IMPLICIT NONE +! + CHARACTER(len=128) :: infile="ppde3d.in" + INTEGER :: l + INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms + INTEGER :: startz, endz, nzloc + INTEGER :: start_rank, end_rank, nrank_loc + LOGICAL :: nlppform + INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku + INTEGER :: jder(3), it + DOUBLE PRECISION :: pi, coefx(5) + DOUBLE PRECISION :: dy, dz + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol, rhs_t + DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: crhs_t +! + TYPE(spline2d1d), TARGET :: splxyz + TYPE(spline2d), POINTER :: splxy + TYPE(spline1d) :: splz + TYPE(gbmat) :: mat +! + CHARACTER(len=128) :: file='ppde3d.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + INTEGER :: nits=500 +! + INTEGER, PARAMETER :: npart=10000000 + INTEGER :: nploc + DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal + DOUBLE PRECISION zsuml, zsumg, errnorm2 +! + NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx +!=========================================================================== +! 1.0 Prologue +! +! Init MPI +! + CALL mpi_init(ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Get input file name from command argument +! + IF( COMMAND_ARGUMENT_COUNT() .EQ. 1 ) THEN + CALL GET_COMMAND_ARGUMENT(1, infile, l, ierr) + END IF +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nz = 8 ! Number of intervals in z + nidbas = (/3,3,3/) ! Degree of splines + ngauss = (/4,4, 4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + npow = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + OPEN(unit=99, file=TRIM(infile), status='old', action='read') + READ(99,newrun) + IF( me.EQ.0) THEN + WRITE(*,newrun) + END IF + CLOSE(99) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz)) +! + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) +! + dy = 2.d0*pi/REAL(ny,8) ! Equidistant in y + ygrid = (/ (j*dy, j=0,ny) /) +! +! Partitionned toroidal grid z +! + dz = 2.0d0*pi/REAL(nz,8) ! Equidistant in z + zgrid = (/ (k*dz, k=0,nz) /) + CALL dist1d(0, nz, startz, nzloc) + endz = startz+nzloc +!!$ PRINT*, 'PE', me, ' startz, endz, nzloc', startz, endz, nzloc +! + IF( me.EQ.0) THEN + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) + WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz) + END IF +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d', & + & mpicomm=MPI_COMM_WORLD) + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NZ', nz) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NIDBAS3', nidbas(3)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(3)) + CALL attach(fid, '/', 'MBESS', mbess) + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/zgrid', zgrid(0:nz-1), '\phi') +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid(startz:endz), & + & splxyz, (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform) + splxy => splxyz%sp12 +! + IF( me.EQ.0) THEN + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots + END IF + CALL disp(splxyz%sp3%knots, 'KNOTS in Z', MPI_COMM_WORLD) +! +! 2D FE matrix assembly (in plane x-y) +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals + ku = kl ! Number of super-diagnonals + IF(me.EQ.0) THEN + WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku + END IF +! + CALL init(kl, ku, nrank, nterms, mat) + CALL dismat(splxy, mat) + ALLOCATE(arr(nrank)) +! +! BC on Matrix +! + CALL ibcmat(mat, ny) + tmat = seconds() - t0 +! +! 3D RHS assembly +! + ALLOCATE(rhs(nrank,0:nzloc+nidbas(3)-1)) ! With right guard cells nzloc:nzloc+nidbas3-1 + ALLOCATE(sol(nrank,0:nzloc-1)) + CALL disrhs3(mbess, npow, splxyz, rhs) +! + zsuml = SUM(ABS(rhs(:,0:nzloc-1))) + CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, & + & MPI_COMM_WORLD, ierr) + IF(me.EQ.0) PRINT*, 'sum of rhs after DISRHS3', zsumg +! +! FFT in z of RHS +! + CALL dist1d(1, nrank, start_rank, nrank_loc) + end_rank = start_rank+nrank_loc-1 + ALLOCATE(rhs_t(0:nz-1,nrank_loc), crhs_t(0:nz-1,nrank_loc)) +! + CALL pptransp(MPI_COMM_WORLD, rhs(:,0:nzloc-1), rhs_t) + crhs_t = rhs_t + CALL fourcol(crhs_t,1) + crhs_t = crhs_t/REAL(nz,8) +! +! Apply Mass matrix to crhs +! + PRINT*, 4 + CALL set_spline(nidbas(3), ngauss(3), zgrid, splz, .TRUE.) + ALLOCATE(fftmass(0:nz-1)) + PRINT*, 5 +!!$ CALL calc_fftmass(splz, fftmass) + CALL calc_fftmass_old(splz, fftmass) + IF(me.EQ.0) THEN + WRITE(*,'(/a/(10(1pe12.3)))') 'Mass matrix', fftmass + END IF + DO k=0,nz-1 + crhs_t(k,:) = crhs_t(k,:)/fftmass(k) + END DO +! +! Fourier transform back crhs to real space in z +! + CALL fourcol(crhs_t, -1) + rhs_t(:,:) = REAL(crhs_t(:,:),8) + CALL pptransp(MPI_COMM_WORLD, rhs_t, sol) ! Put the final RHS in SOL +! +! BC on RHS +! + CALL ibcrhs3(sol, ny) +! + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() + END IF +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL factor(mat) + tfact = seconds() - t0 + gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 + + t0 = seconds() + CALL bsolve(mat, sol) +! +! Backtransform of solution +! + DO k=0,nzloc-1 + sol(1:ny-1,k) = sol(ny,k) + END DO +! + zsuml = SUM(ABS(sol)) + CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, & + & MPI_COMM_WORLD, ierr) + IF(me.EQ.0) PRINT*, 'sum of sol', zsumg +! + tsolv = seconds() - t0 + gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 +! +! Spline coefficients, taking into account of periodicity in y and z +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + dimz = splxyz%sp3%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1)) +! +! Get 3D array of spline coefs. +! + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + DO k=0,nzloc-1 + bcoef(i,j,k) = sol(ij,k) + END DO + END DO + END DO +! +! Get missing coefs from remote guard cells +! + prev = MODULO(me-1,npes) + next = MODULO(me+1,npes) + count = dimx*dimy + DO i=0,nidbas(3)-1 + CALL mpi_sendrecv(bcoef(0,0,i), count, MPI_DOUBLE_PRECISION, prev, 0, & + & bcoef(0,0,nzloc+i), count, MPI_DOUBLE_PRECISION, next, 0, & + & MPI_COMM_WORLD, status, ierr) + END DO +! + IF(me.EQ.0) THEN + WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz + END IF +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + CALL RANDOM_NUMBER(xp) + CALL RANDOM_NUMBER(yp); yp = 2.d0*pi*yp + CALL RANDOM_NUMBER(zp); zp = 2.d0*pi*zp + nploc = 0 + DO i=1,npart + IF(zp(i).GE.zgrid(startz) .AND. zp(i).LT.zgrid(endz)) THEN + nploc = nploc+1 + xp(nploc) = xp(i) + yp(nploc) = yp(i) + zp(nploc) = zp(i) + END IF + END DO + jder = (/0,0,0/) + CALL gridval(splxyz, xp(1:nploc), yp(1:nploc), zp(1:nploc), fp_calc(1:nploc), jder, bcoef) + DO i=1,nploc + fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess & + & * COS(mbess*yp(i)) * COS(zp(i))**npow + END DO + errnorm2 = norm21(fp_calc(1:nploc)-fp_anal(1:nploc))/norm21(fp_calc(1:nploc)) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors, using random points', & + & errnorm2 + END IF +! + ALLOCATE(solcal(0:nx,0:ny,0:nzloc-1)) + ALLOCATE(solana(0:nx,0:ny,0:nzloc-1)) + ALLOCATE(errsol(0:nx,0:ny,0:nzloc-1)) +! + DO i=0,nx + DO j=0,ny + DO k=0,nzloc-1 + kk=startz+k + solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess & + & * COS(mbess*ygrid(j)) * COS(zgrid(kk))**npow + END DO + END DO + END DO +! + jder = (/0,0,0/) + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef) + t0 = seconds() + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) + tgrid = seconds()-t0 + errsol = solana - solcal +! + errnorm2 = norm2(errsol) / norm2(solana) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & errnorm2 + END IF + CALL putarr(fid, '/sol', solcal,pardim=3) + CALL putarr(fid, '/solana', solana,pardim=3) +! +! Check derivative d/dx +! + DO i=0,nx + DO j=0,ny + DO k=0,nzloc-1 + IF( mbess .EQ. 0 ) THEN + solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k+startz))**npow + ELSE + solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * & + & COS(zgrid(k+startz))**npow + END IF + END DO + END DO + END DO +! + jder = (/1,0,0/) + t0 = seconds() + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) + tgrid = tgrid + seconds()-t0 + errsol = solana - solcal + errnorm2 = norm2(errsol) / norm2(solana) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', errnorm2 + END IF + CALL putarr(fid, '/derivx', solcal, pardim=3) + CALL putarr(fid, '/derivx_exact', solana,pardim=3) +! +! Check derivative d/dy +! + DO i=0,nx + DO j=0,ny + DO k=0,nzloc-1 + solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * & + & SIN(mbess*ygrid(j))* COS(zgrid(k+startz))**npow + END DO + END DO + END DO +! + jder = (/0,1,0/) + t0 = seconds() + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) + tgrid = tgrid + seconds()-t0 + errsol = solana - solcal + errnorm2 = norm2(errsol) / norm2(solana) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', errnorm2 + END IF + CALL putarr(fid, '/derivy', solcal, pardim=3) + CALL putarr(fid, '/derivy_exact', solana,pardim=3) +! +! Check derivative d/dz +! + DO i=0,nx + DO j=0,ny + DO k=0,nzloc-1 + solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess & + & * COS(mbess*ygrid(j)) * COS(zgrid(k+startz))**(npow-1) & + & * SIN(zgrid(k+startz)) + END DO + END DO + END DO +! + jder = (/0,0,1/) + t0 = seconds() + IF(nlppform) THEN + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) + ELSE + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef) + END IF + tgrid = tgrid + seconds()-t0 + errsol = solana - solcal + errnorm2 = norm2(errsol) / norm2(solana) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', errnorm2 + END IF + CALL putarr(fid, '/derivz', solcal, pardim=3) + CALL putarr(fid, '/derivz_exact', solana,pardim=3) +!=========================================================================== +! 9.0 Epilogue +! + IF(me.EQ.0) THEN + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'gridval time (s) ', tgrid + WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 + WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() + END IF +! + DEALLOCATE(xgrid, ygrid, zgrid) + DEALLOCATE(fftmass) + DEALLOCATE(rhs) + DEALLOCATE(sol) + DEALLOCATE(rhs_t) + DEALLOCATE(crhs_t) + DEALLOCATE(bcoef) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(arr) + CALL destroy_sp(splxyz) + CALL destroy_sp(splz) + CALL destroy(mat) +! + CALL closef(fid) +! + CALL mpi_finalize(ierr) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:,:) + DOUBLE PRECISION :: sum2, sum2g + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + DO k=1,SIZE(x,3) + sum2 = sum2 + x(i,j,k)**2 + END DO + END DO + END DO + CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & + & MPI_COMM_WORLD, ierr) + norm2 = SQRT(sum2g) + END FUNCTION norm2 +! + FUNCTION norm21(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm21 + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2, sum2g + INTEGER :: i, j +! + sum2 = DOT_PRODUCT(x,x) + CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & + & MPI_COMM_WORLD, ierr) + norm21 = SQRT(sum2g) + END FUNCTION norm21 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ diff --git a/examples/ppde3d_mod.f90 b/examples/ppde3d_mod.f90 new file mode 100644 index 0000000..c04c788 --- /dev/null +++ b/examples/ppde3d_mod.f90 @@ -0,0 +1,473 @@ +!> +!> @file ppde3d_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE ppde3d_mod + USE bsplines + USE matrix + IMPLICIT NONE + INCLUDE "mpif.h" +! + INTEGER :: me, npes + INTEGER :: prev, next + INTEGER :: count, status(MPI_STATUS_SIZE), ierr +! +CONTAINS + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(gbmat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms)) +! + ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1)) ! +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs) + DO iterm=1,kterms + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 + contrib = fun1(iw1,iderw(iterm,1)) * & + & fun2(iw2,iderw(iterm,2)) * & + & coefs(iterm) * & + & fun2(it2,idert(iterm,2)) * & + & fun1(it1,idert(iterm,1)) * & + & wg1(ig1) * wg2(ig2) + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs3(mbess, npow, spl, rhs) +! +! Assembly the RHS using 3d spline spl +! + INTEGER, INTENT(in) :: mbess, npow + TYPE(spline2d1d), TARGET :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:,:) +! + TYPE(spline1d), POINTER :: sp1, sp2, sp3 + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: n3, nidbas3, ndim3, ng3 + INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:) + DOUBLE PRECISION, ALLOCATABLE :: buf(:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + sp1 => spl%sp12%sp1 + sp2 => spl%sp12%sp2 + sp3 => spl%sp3 +! + CALL get_dim(sp1, ndim1, n1, nidbas1) + CALL get_dim(sp2, ndim2, n2, nidbas2) + CALL get_dim(sp3, ndim3, n3, nidbas3) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(sp1, ng1) + CALL get_gauss(sp2, ng2) + CALL get_gauss(sp3, ng3) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng2), wg2(ng2)) + ALLOCATE(xg3(ng3), wg3(ng3)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs,1) + rhs = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), sp2, fun2, j) + DO k=1,n3 + CALL get_gauss(sp3, ng3, k, xg3, wg3) + DO ig3=1,ng3 + CALL basfun(xg3(ig3), sp3, fun3, k) + contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * & + & rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + DO k3=0,nidbas3 + kk = k+k3 + rhs(ij,kk) = rhs(ij, kk) + & + & contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1) + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO +! +! Update from remote guard cells +! + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) + next = MODULO(me+1,npes) + prev = MODULO(me-1,npes) + count = nrank + ALLOCATE(buf(nrank)) + DO i=nidbas3,1,-1 + CALL mpi_sendrecv(rhs(1,n3+i), count, MPI_DOUBLE_PRECISION, next, 0, & + & buf, count, MPI_DOUBLE_PRECISION, prev, 0, & + & MPI_COMM_WORLD, status, ierr) + rhs(:,i) = rhs(:,i) + buf(:) + END DO + DEALLOCATE(buf) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(xg3, wg3, fun3) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n) + DOUBLE PRECISION, INTENT(in) :: x1, x2, x3 + INTEGER, INTENT(in) :: m, n + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n + END FUNCTION rhseq + END SUBROUTINE disrhs3 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) + CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist + END SUBROUTINE meshdist +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(gbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: kl, ku, nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + kl = mat%kl + ku = mat%ku + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + DO j=1,ny+ku + zsum(j) = zsum(j) + arr(j) + END DO + END DO + CALL putrow(mat, ny, zsum) +! +! The horizontal sum on the NY-th column +! + zsum = 0.0d0 + DO j=1,ny + arr = 0.0d0 + CALL getcol(mat, j, arr) + DO i=ny,ny+kl + zsum(i) = zsum(i) + arr(i) + END DO + END DO + CALL putcol(mat, ny, zsum) +! +! The away operator +! + DO j = 1,ny-1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO j = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO + ! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs3(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:,:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, nz, k + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) + nz = SIZE(rhs,2) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + DO k=1,nz + zsum = SUM(rhs(1:ny,k)) + rhs(ny,k) = zsum + rhs(1:ny-1,k) = 0.0d0 + END DO +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO k=1,nz + rhs(nrank-ny+1:nrank,k) = 0.0d0 + END DO + END SUBROUTINE ibcrhs3 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dist1d(s0, ntot, s, nloc) + INCLUDE 'mpif.h' + INTEGER, INTENT(in) :: s0, ntot + INTEGER, INTENT(out) :: s, nloc + INTEGER :: me, npes, ierr, naver, rem +! + CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr) + CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr) + naver = ntot/npes + rem = MODULO(ntot,npes) + s = s0 + MIN(rem,me) + me*naver + nloc = naver + IF( me.LT.rem ) nloc = nloc+1 + END SUBROUTINE dist1d +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disp(a, str, comm) +! +! Gather partitionned 1d array to 0 and print it +! + INCLUDE 'mpif.h' + DOUBLE PRECISION, INTENT(in) :: a(:) + INTEGER, INTENT(in) :: comm + CHARACTER(len=*), INTENT(in) :: str + INTEGER :: n, ntot, npes, me, ierr, i + DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: c + INTEGER, ALLOCATABLE, DIMENSION(:) :: counts, displs +! + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_comm_size(comm, npes, ierr) + n = SIZE(a) + IF(me.EQ.0) THEN + ALLOCATE(counts(npes), displs(npes+1)) + END IF + CALL mpi_gather(n, 1, MPI_INTEGER, counts, 1, MPI_INTEGER, 0, comm, ierr) + IF(me.EQ.0) THEN + displs(1) = 0 + DO i=2,npes+1 + displs(i) = displs(i-1)+counts(i-1) + END DO + ntot = displs(npes+1) + ALLOCATE(c(ntot)) + c = 0.0d0 + END IF + CALL mpi_gatherv(a, n, MPI_DOUBLE_PRECISION, c, counts, displs, & + & MPI_DOUBLE_PRECISION, 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(/a)') TRIM(str) + DO i=1,npes + WRITE(*,'(a,i3.3/(10(1pe12.3)))') 'PE', i-1, & + & c(displs(i)+1:displs(i+1)) + END DO + DEALLOCATE(c) + DEALLOCATE(counts) + DEALLOCATE(displs) + END IF + END SUBROUTINE disp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE ppde3d_mod diff --git a/examples/ppde3d_pb.f90 b/examples/ppde3d_pb.f90 new file mode 100644 index 0000000..850056b --- /dev/null +++ b/examples/ppde3d_pb.f90 @@ -0,0 +1,518 @@ +!> +!> @file ppde3d_pb.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 3d PDE using splines: +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n +! + USE futils + USE fft + USE pputils2, ONLY : pptransp + USE ppde3d_pb_mod +! + IMPLICIT NONE +! + CHARACTER(len=128) :: infile="ppde3d_pb.in" + INTEGER :: l + INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms + INTEGER :: startz, endz, nzloc + INTEGER :: start_rank, end_rank, nrank_loc + LOGICAL :: nlppform + INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku + INTEGER :: jder(3), it + DOUBLE PRECISION :: pi, coefx(5) + DOUBLE PRECISION :: dy, dz + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol, rhs_t + DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: crhs_t +! + TYPE(spline2d1d), TARGET :: splxyz + TYPE(spline2d), POINTER :: splxy + TYPE(spline1d) :: splz + TYPE(pbmat) :: mat +! + CHARACTER(len=128) :: file='ppde3d_pb.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + INTEGER :: nits=500 +! + INTEGER, PARAMETER :: npart=10000000 + INTEGER :: nploc + DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal + DOUBLE PRECISION zsuml, zsumg, errnorm2 +! + INTEGER :: kmin, kmax + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass_shifted +! + NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx +!=========================================================================== +! 1.0 Prologue +! +! Init MPI +! + CALL mpi_init(ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Get input file name from command argument +! + IF( COMMAND_ARGUMENT_COUNT() .EQ. 1 ) THEN + CALL GET_COMMAND_ARGUMENT(1, infile, l, ierr) + END IF +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nz = 8 ! Number of intervals in z + nidbas = (/3,3,3/) ! Degree of splines + ngauss = (/4,4, 4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + npow = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + OPEN(unit=99, file=TRIM(infile), status='old', action='read') + READ(99,newrun) + IF( me.EQ.0) THEN + WRITE(*,newrun) + END IF + CLOSE(99) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz)) +! + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) +! + dy = 2.d0*pi/REAL(ny,8) ! Equidistant in y + ygrid = (/ (j*dy, j=0,ny) /) +! +! Partitionned toroidal grid z +! + dz = 2.0d0*pi/REAL(nz,8) ! Equidistant in z + zgrid = (/ (k*dz, k=0,nz) /) + CALL dist1d(0, nz, startz, nzloc) + endz = startz+nzloc +!!$ PRINT*, 'PE', me, ' startz, endz, nzloc', startz, endz, nzloc +! + IF( me.EQ.0) THEN + WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) + WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz) + END IF +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d', & + & mpicomm=MPI_COMM_WORLD) + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NZ', nz) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NIDBAS3', nidbas(3)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(3)) + CALL attach(fid, '/', 'MBESS', mbess) + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/zgrid', zgrid(0:nz-1), '\phi') +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid(startz:endz), & + & splxyz, (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform) + splxy => splxyz%sp12 +! + IF( me.EQ.0) THEN + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots + WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots + END IF + CALL disp(splxyz%sp3%knots, 'KNOTS in Z', MPI_COMM_WORLD) +! +! 2D FE matrix assembly (in plane x-y) +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals + ku = kl ! Number of super-diagnonals + IF(me.EQ.0) THEN + WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku + END IF +! + CALL init(ku, nrank, nterms, mat) + CALL dismat(splxy, mat) + ALLOCATE(arr(nrank)) +! +! BC on Matrix +! + CALL ibcmat(mat, ny) + tmat = seconds() - t0 +! +! 3D RHS assembly +! + ALLOCATE(rhs(nrank,0:nzloc+nidbas(3)-1)) ! With right guard cells nzloc:nzloc+nidbas3-1 + ALLOCATE(sol(nrank,0:nzloc-1)) + CALL disrhs3(mbess, npow, splxyz, rhs) +! + zsuml = SUM(ABS(rhs(:,0:nzloc-1))) + CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, & + & MPI_COMM_WORLD, ierr) + IF(me.EQ.0) PRINT*, 'sum of rhs after DISRHS3', zsumg +! +! FFT in z of RHS +! + CALL dist1d(1, nrank, start_rank, nrank_loc) + end_rank = start_rank+nrank_loc-1 + ALLOCATE(rhs_t(0:nz-1,nrank_loc), crhs_t(0:nz-1,nrank_loc)) +! + CALL pptransp(MPI_COMM_WORLD, rhs(:,0:nzloc-1), rhs_t) + crhs_t = rhs_t + CALL fourcol(crhs_t,1) + crhs_t = crhs_t/REAL(nz,8) +! +! Apply Mass matrix to crhs +! + CALL set_spline(nidbas(3), ngauss(3), zgrid, splz, .TRUE.) + kmin =-nz/2 + kmax = nz/2-1 + CALL init_dft(splz, kmin, kmax) + ALLOCATE(fftmass_shifted(kmin:kmax)) + ALLOCATE(fftmass(0:nz-1)) + CALL calc_fftmass_old(splz, fftmass_shifted) + DO k=kmin,kmax + fftmass(MODULO(k+nz,nz)) = fftmass_shifted(k) + END DO + IF(me.EQ.0) THEN + WRITE(*,'(/a/(10(1pe12.3)))') 'Mass matrix', fftmass + END IF + DO k=0,nz-1 + crhs_t(k,:) = crhs_t(k,:)/fftmass(k) + END DO +! +! Fourier transform back crhs to real space in z +! + CALL fourcol(crhs_t, -1) + rhs_t(:,:) = REAL(crhs_t(:,:),8) + CALL pptransp(MPI_COMM_WORLD, rhs_t, sol) ! Put the final RHS in SOL +! +! BC on RHS +! + CALL ibcrhs3(sol, ny) +! + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() + END IF +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL factor(mat) + tfact = seconds() - t0 + gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 + + t0 = seconds() + CALL bsolve(mat, sol) +! +! Backtransform of solution +! + DO k=0,nzloc-1 + sol(1:ny-1,k) = sol(ny,k) + END DO +! + zsuml = SUM(ABS(sol)) + CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, & + & MPI_COMM_WORLD, ierr) + IF(me.EQ.0) PRINT*, 'sum of sol', zsumg +! + tsolv = seconds() - t0 + gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 +! +! Spline coefficients, taking into account of periodicity in y and z +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + dimz = splxyz%sp3%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1)) +! +! Get 3D array of spline coefs. +! + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + DO k=0,nzloc-1 + bcoef(i,j,k) = sol(ij,k) + END DO + END DO + END DO +! +! Get missing coefs from remote guard cells +! + prev = MODULO(me-1,npes) + next = MODULO(me+1,npes) + count = dimx*dimy + DO i=0,nidbas(3)-1 + CALL mpi_sendrecv(bcoef(0,0,i), count, MPI_DOUBLE_PRECISION, prev, 0, & + & bcoef(0,0,nzloc+i), count, MPI_DOUBLE_PRECISION, next, 0, & + & MPI_COMM_WORLD, status, ierr) + END DO +! + IF(me.EQ.0) THEN + WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz + END IF +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + CALL RANDOM_NUMBER(xp) + CALL RANDOM_NUMBER(yp); yp = 2.d0*pi*yp + CALL RANDOM_NUMBER(zp); zp = 2.d0*pi*zp + nploc = 0 + DO i=1,npart + IF(zp(i).GE.zgrid(startz) .AND. zp(i).LT.zgrid(endz)) THEN + nploc = nploc+1 + xp(nploc) = xp(i) + yp(nploc) = yp(i) + zp(nploc) = zp(i) + END IF + END DO + jder = (/0,0,0/) + CALL gridval(splxyz, xp(1:nploc), yp(1:nploc), zp(1:nploc), fp_calc(1:nploc), jder, bcoef) + DO i=1,nploc + fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess & + & * COS(mbess*yp(i)) * COS(zp(i))**npow + END DO + errnorm2 = norm21(fp_calc(1:nploc)-fp_anal(1:nploc))/norm21(fp_calc(1:nploc)) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors, using random points', & + & errnorm2 + END IF +! + ALLOCATE(solcal(0:nx,0:ny,0:nzloc-1)) + ALLOCATE(solana(0:nx,0:ny,0:nzloc-1)) + ALLOCATE(errsol(0:nx,0:ny,0:nzloc-1)) +! + DO i=0,nx + DO j=0,ny + DO k=0,nzloc-1 + kk=startz+k + solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess & + & * COS(mbess*ygrid(j)) * COS(zgrid(kk))**npow + END DO + END DO + END DO +! + jder = (/0,0,0/) + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef) + t0 = seconds() + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) + tgrid = seconds()-t0 + errsol = solana - solcal +! + errnorm2 = norm2(errsol) / norm2(solana) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & errnorm2 + END IF + CALL putarr(fid, '/sol', solcal,pardim=3) + CALL putarr(fid, '/solana', solana,pardim=3) +! +! Check derivative d/dx +! + DO i=0,nx + DO j=0,ny + DO k=0,nzloc-1 + IF( mbess .EQ. 0 ) THEN + solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k+startz))**npow + ELSE + solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * & + & COS(zgrid(k+startz))**npow + END IF + END DO + END DO + END DO +! + jder = (/1,0,0/) + t0 = seconds() + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) + tgrid = tgrid + seconds()-t0 + errsol = solana - solcal + errnorm2 = norm2(errsol) / norm2(solana) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', errnorm2 + END IF + CALL putarr(fid, '/derivx', solcal, pardim=3) + CALL putarr(fid, '/derivx_exact', solana,pardim=3) +! +! Check derivative d/dy +! + DO i=0,nx + DO j=0,ny + DO k=0,nzloc-1 + solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * & + & SIN(mbess*ygrid(j))* COS(zgrid(k+startz))**npow + END DO + END DO + END DO +! + jder = (/0,1,0/) + t0 = seconds() + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) + tgrid = tgrid + seconds()-t0 + errsol = solana - solcal + errnorm2 = norm2(errsol) / norm2(solana) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', errnorm2 + END IF + CALL putarr(fid, '/derivy', solcal, pardim=3) + CALL putarr(fid, '/derivy_exact', solana,pardim=3) +! +! Check derivative d/dz +! + DO i=0,nx + DO j=0,ny + DO k=0,nzloc-1 + solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess & + & * COS(mbess*ygrid(j)) * COS(zgrid(k+startz))**(npow-1) & + & * SIN(zgrid(k+startz)) + END DO + END DO + END DO +! + jder = (/0,0,1/) + t0 = seconds() + IF(nlppform) THEN + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) + ELSE + CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef) + END IF + tgrid = tgrid + seconds()-t0 + errsol = solana - solcal + errnorm2 = norm2(errsol) / norm2(solana) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', errnorm2 + END IF + CALL putarr(fid, '/derivz', solcal, pardim=3) + CALL putarr(fid, '/derivz_exact', solana,pardim=3) +!=========================================================================== +! 9.0 Epilogue +! + IF(me.EQ.0) THEN + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'gridval time (s) ', tgrid + WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 + WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() + END IF +! + DEALLOCATE(xgrid, ygrid, zgrid) + DEALLOCATE(fftmass) + DEALLOCATE(fftmass_shifted) + DEALLOCATE(rhs) + DEALLOCATE(sol) + DEALLOCATE(rhs_t) + DEALLOCATE(crhs_t) + DEALLOCATE(bcoef) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(arr) + CALL destroy_sp(splxyz) + CALL destroy_sp(splz) + CALL destroy(mat) +! + CALL closef(fid) +! + CALL mpi_finalize(ierr) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:,:) + DOUBLE PRECISION :: sum2, sum2g + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + DO k=1,SIZE(x,3) + sum2 = sum2 + x(i,j,k)**2 + END DO + END DO + END DO + CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & + & MPI_COMM_WORLD, ierr) + norm2 = SQRT(sum2g) + END FUNCTION norm2 +! + FUNCTION norm21(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm21 + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sum2, sum2g + INTEGER :: i, j +! + sum2 = DOT_PRODUCT(x,x) + CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & + & MPI_COMM_WORLD, ierr) + norm21 = SQRT(sum2g) + END FUNCTION norm21 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ diff --git a/examples/ppde3d_pb_mod.f90 b/examples/ppde3d_pb_mod.f90 new file mode 100644 index 0000000..72c4917 --- /dev/null +++ b/examples/ppde3d_pb_mod.f90 @@ -0,0 +1,452 @@ +!> +!> @file ppde3d_pb_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE ppde3d_pb_mod + USE bsplines + USE matrix + IMPLICIT NONE + INCLUDE "mpif.h" +! + INTEGER :: me, npes + INTEGER :: prev, next + INTEGER :: count, status(MPI_STATUS_SIZE), ierr +! +CONTAINS + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(pbmat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms)) +! + ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1)) ! +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs) + DO iterm=1,kterms + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 + contrib = fun1(iw1,iderw(iterm,1)) * & + & fun2(iw2,iderw(iterm,2)) * & + & coefs(iterm) * & + & fun2(it2,idert(iterm,2)) * & + & fun1(it1,idert(iterm,1)) * & + & wg1(ig1) * wg2(ig2) + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs3(mbess, npow, spl, rhs) +! +! Assembly the RHS using 3d spline spl +! + INTEGER, INTENT(in) :: mbess, npow + TYPE(spline2d1d), TARGET :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:,:) +! + TYPE(spline1d), POINTER :: sp1, sp2, sp3 + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: n3, nidbas3, ndim3, ng3 + INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:) + DOUBLE PRECISION, ALLOCATABLE :: buf(:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + sp1 => spl%sp12%sp1 + sp2 => spl%sp12%sp2 + sp3 => spl%sp3 +! + CALL get_dim(sp1, ndim1, n1, nidbas1) + CALL get_dim(sp2, ndim2, n2, nidbas2) + CALL get_dim(sp3, ndim3, n3, nidbas3) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(sp1, ng1) + CALL get_gauss(sp2, ng2) + CALL get_gauss(sp3, ng3) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng2), wg2(ng2)) + ALLOCATE(xg3(ng3), wg3(ng3)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs,1) + rhs = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), sp2, fun2, j) + DO k=1,n3 + CALL get_gauss(sp3, ng3, k, xg3, wg3) + DO ig3=1,ng3 + CALL basfun(xg3(ig3), sp3, fun3, k) + contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * & + & rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + DO k3=0,nidbas3 + kk = k+k3 + rhs(ij,kk) = rhs(ij, kk) + & + & contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1) + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO +! +! Update from remote guard cells +! + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) + next = MODULO(me+1,npes) + prev = MODULO(me-1,npes) + count = nrank + ALLOCATE(buf(nrank)) + DO i=nidbas3,1,-1 + CALL mpi_sendrecv(rhs(1,n3+i), count, MPI_DOUBLE_PRECISION, next, 0, & + & buf, count, MPI_DOUBLE_PRECISION, prev, 0, & + & MPI_COMM_WORLD, status, ierr) + rhs(:,i) = rhs(:,i) + buf(:) + END DO + DEALLOCATE(buf) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(xg3, wg3, fun3) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n) + DOUBLE PRECISION, INTENT(in) :: x1, x2, x3 + INTEGER, INTENT(in) :: m, n + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n + END FUNCTION rhseq + END SUBROUTINE disrhs3 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) + CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist + END SUBROUTINE meshdist +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(pbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: kl, ku, nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + ku = mat%ku + kl = ku + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + DO j=1,ny+ku + zsum(j) = zsum(j) + arr(j) + END DO + END DO +! + zsum(ny) = SUM(zsum(1:ny)) ! using symmetry + CALL putrow(mat, ny, zsum) +! +! The away operator +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs3(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:,:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, nz, k + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) + nz = SIZE(rhs,2) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + DO k=1,nz + zsum = SUM(rhs(1:ny,k)) + rhs(ny,k) = zsum + rhs(1:ny-1,k) = 0.0d0 + END DO +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO k=1,nz + rhs(nrank-ny+1:nrank,k) = 0.0d0 + END DO + END SUBROUTINE ibcrhs3 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dist1d(s0, ntot, s, nloc) + INCLUDE 'mpif.h' + INTEGER, INTENT(in) :: s0, ntot + INTEGER, INTENT(out) :: s, nloc + INTEGER :: me, npes, ierr, naver, rem +! + CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr) + CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr) + naver = ntot/npes + rem = MODULO(ntot,npes) + s = s0 + MIN(rem,me) + me*naver + nloc = naver + IF( me.LT.rem ) nloc = nloc+1 + END SUBROUTINE dist1d +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disp(a, str, comm) +! +! Gather partitionned 1d array to 0 and print it +! + INCLUDE 'mpif.h' + DOUBLE PRECISION, INTENT(in) :: a(:) + INTEGER, INTENT(in) :: comm + CHARACTER(len=*), INTENT(in) :: str + INTEGER :: n, ntot, npes, me, ierr, i + DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: c + INTEGER, ALLOCATABLE, DIMENSION(:) :: counts, displs +! + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_comm_size(comm, npes, ierr) + n = SIZE(a) + IF(me.EQ.0) THEN + ALLOCATE(counts(npes), displs(npes+1)) + END IF + CALL mpi_gather(n, 1, MPI_INTEGER, counts, 1, MPI_INTEGER, 0, comm, ierr) + IF(me.EQ.0) THEN + displs(1) = 0 + DO i=2,npes+1 + displs(i) = displs(i-1)+counts(i-1) + END DO + ntot = displs(npes+1) + ALLOCATE(c(ntot)) + c = 0.0d0 + END IF + CALL mpi_gatherv(a, n, MPI_DOUBLE_PRECISION, c, counts, displs, & + & MPI_DOUBLE_PRECISION, 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(/a)') TRIM(str) + DO i=1,npes + WRITE(*,'(a,i3.3/(10(1pe12.3)))') 'PE', i-1, & + & c(displs(i)+1:displs(i+1)) + END DO + DEALLOCATE(c) + DEALLOCATE(counts) + DEALLOCATE(displs) + END IF + END SUBROUTINE disp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE ppde3d_pb_mod diff --git a/examples/tbasfun.f90 b/examples/tbasfun.f90 new file mode 100644 index 0000000..26bf500 --- /dev/null +++ b/examples/tbasfun.f90 @@ -0,0 +1,137 @@ +!> +!> @file tbasfun.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test scalar and vector versions of def_basfun +! + USE bsplines + IMPLICIT NONE + INTEGER :: nx, nidbas, nrank, npt, jdermx + DOUBLE PRECISION :: dx + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid + DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fun(:, :), vfun(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: fun1(:, :), vfun1(:,:,:) + DOUBLE PRECISION :: errfun + INTEGER :: left, i, nerrs, k + INTEGER, ALLOCATABLE :: vleft(:) + LOGICAL :: nlper=.FALSE. + TYPE(spline1d) :: splx +! + NAMELIST /newrun/ nx, nidbas, npt, jdermx, nlper +! +!=============================================================================== +! +! 1D grid +! + nx = 10 + nidbas = 3 + npt = 1000000 + jdermx = 0 + READ(*,newrun) + WRITE(*,newrun) + + ALLOCATE(xgrid(0:nx)) + dx = 1.0d0/REAL(nx) + xgrid = (/ (i*dx,i=0,nx) /) + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid +! +! Set up spline +! + CALL set_spline(nidbas, 4, xgrid, splx, period=nlper) + nrank = splx%dim + WRITE(*,'(a, i5)') 'nrank =', nrank + WRITE(*,'(a/(10f8.3))') 'knots', splx%knots +! + IF(nx.LE.2) THEN + WRITE(*,'(/a)') 'VAL0' + DO i=1,nx + WRITE(*,'(a,i3)') 'Interval', i + DO k=1,nidbas+1 ! Spline number + WRITE(*,'(10f12.4)') splx%val0(:,k,i) + END DO + END DO + IF(nlper) THEN + WRITE(*,'(/a)') 'VALC' + DO k=1,nidbas+1 ! Spline number + WRITE(*,'(10f12.4)') splx%valc(:,k) + END DO + END IF + END IF +! + ALLOCATE(xpt(npt)) + ALLOCATE(vleft(npt)) + ALLOCATE(fun(0:nidbas,0:jdermx)) ! Values and derivatives of all Splines + ALLOCATE(vfun(0:nidbas,0:jdermx,npt)) + ALLOCATE(fun1(0:nidbas,0:jdermx)) ! Values and derivatives of all Splines + ALLOCATE(vfun1(0:nidbas,0:jdermx,npt)) + CALL RANDOM_NUMBER(xpt) +!=============================================================================== +! +! Check def_basfun +! + CALL def_basfun(xpt, splx, vfun, vleft) +! + WRITE(*,'(/a)') 'vector def_basfun versus scalar def_basfun' + WRITE(*,'(a6,a12, 2a6, a12)') 'i', 'x', 'left', 'vleft', 'Max. err' + DO i=1,npt + CALL def_basfun(xpt(i), splx, fun, left) + errfun= MAXVAL(ABS(fun(:,:)-vfun(:,:,i))) + WRITE(*,'(i6,1pe12.4,2i6,1pe12.4)') i, xpt(i), left, vleft(i), errfun + END DO +! + IF(npt.LE.10) THEN + WRITE(*,'(/a)') 'Scalar/vector basfun' + DO i=1,npt + CALL basfun(xpt(i), splx, fun, vleft(i)+1) + WRITE(*,'(a,1pe12.4/10(1pe12.4))') 'x = ', xpt(i), fun(:,:) + WRITE(*,'(10(1pe12.4))') vfun(:,:,i) + END DO + END IF +!=============================================================================== +! +! Check basfun +! + CALL basfun(xpt, splx, vfun1, vleft+1) + WRITE(*,'(/a,1pe12.4)') 'vector basfun versus vector def_basun: Max err', & + & MAXVAL(ABS(vfun-vfun1)) +! + WRITE(*,'(/a)') 'vector basfun versus scalar basfun' + WRITE(*,'(a6,a12,a12)') 'i', 'x', 'Max. err' + DO i=1,npt + CALL basfun(xpt(i), splx, fun1, vleft(i)+1) + errfun= MAXVAL(ABS(fun1(:,:)-vfun1(:,:,i))) + WRITE(*,'(i6,1pe12.4,1pe12.4)') i, xpt(i), errfun + END DO +!!=============================================================================== +! +! Clean up +! + CALL destroy_sp(splx) + DEALLOCATE(xgrid) + DEALLOCATE(xpt) + DEALLOCATE(vleft) + DEALLOCATE(fun) +END PROGRAM main diff --git a/examples/tcdsmat.f90 b/examples/tcdsmat.f90 new file mode 100644 index 0000000..f832abf --- /dev/null +++ b/examples/tcdsmat.f90 @@ -0,0 +1,283 @@ +!> +!> @file tcdsmat.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 2d PDE using splines and iterative method +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! + USE tcdsmat_mod + USE bsplines + USE cds + USE futils +! + IMPLICIT NONE + INCLUDE 'mpif.h' +! + INTEGER :: nints(2), nidbas(2), ngauss(2), mbess, nterms + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), ygrid(:) + DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:) + + INTEGER :: mrank, bw0 + TYPE(spline2d) :: splxy + TYPE(cds_mat) :: mat + LOGICAL :: readmat, verbose + CHARACTER(len=128) :: file='tcdsmat.h5' + CHARACTER(len=128) :: filein + INTEGER :: fid, fidin + DOUBLE PRECISION :: mem, seconds + DOUBLE PRECISION :: t0, tmat, tbal, tsolv, tgrid, tmumps(2) + INTEGER :: nitmx=100, niter, nssor + DOUBLE PRECISION :: rtolmx=1.0d-6, omega=0.0d0, resid +! + INTEGER :: i, j, ij, dimx, dimy + DOUBLE PRECISION, ALLOCATABLE :: bcoef(:,:) + DOUBLE PRECISION, ALLOCATABLE :: solcal(:,:), solana(:,:), errsol(:,:) + INTEGER, ALLOCATABLE :: dists(:) +! + INTEGER :: ierr, me + TYPE(mumps_mat) :: mat_mumps +! + NAMELIST /newrun/ nints, nidbas, ngauss, mbess, coefx, coefy, & + & nitmx, rtolmx, omega, nssor, readmat, verbose, filein +!=========================================================================== +! 1.0 Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Read in data specific to run +! + WRITE(*,'(/a)') 'Prologue ...' + readmat = .FALSE. ! Read matrix and rhs from file + filein = 'mat.h5' + nints = (/8,8/) ! Number of intervals in x, y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nitmx = 1000 ! Max number of iterations + rtolmx = 1.e-12 ! Max relative tolerance + nssor = 1 ! Number of SSOR precond steps + verbose = .FALSE. ! Output residue at each iteration + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) +! +! Overwrite some input by reading in "filein" if required +! + IF(readmat) THEN + CALL openf(filein, fidin, mode='r') + CALL getatt(fidin, '/', 'NX', nints(1)) + CALL getatt(fidin, '/', 'NY', nints(2)) + CALL getatt(fidin, '/', 'NIDBAS1', nidbas(1)) + CALL getatt(fidin, '/', 'NIDBAS2', nidbas(2)) + CALL getatt(fidin, '/', 'NGAUSS1', ngauss(1)) + CALL getatt(fidin, '/', 'NGAUSS2', ngauss(2)) + CALL getatt(fidin, '/', 'MBESS', mbess) + END IF + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nints(1)), ygrid(0:nints(2))) + xgrid(0) = 0.0d0; xgrid(nints(1)) = 1.0d0 + CALL meshdist(coefx, xgrid, nints(1)) + ygrid(0) = 0.0d0; ygrid(nints(2)) = 2.d0*pi + CALL meshdist(coefy, ygrid, nints(2)) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'TCDSMAT Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nints(1)) + CALL attach(fid, '/', 'NY', nints(2)) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + WRITE(*,'(/a)') 'Discretize the PDE ...' + t0 = seconds() + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./) ) +! +! Set up CDS matrix for solver +! + CALL mstruct(nidbas, nints, mrank, dists) + bw0=(nidbas(1)+1)*nints(2) ! Half band including all zero diagonals + CALL init(mrank, dists, nterms, mat, bw0=bw0) + WRITE(*,'(a,4i8)') 'rank, kl, ku, bw0 = ', mat%rank, mat%kl, mat%ku, bw0 + WRITE(*,'(i4,a/(10i8))') mat%ndiags, ' diagonals:', mat%dists +! +! FE matrix assembly and apply BC +! + IF(readmat) THEN + CALL getmat(fidin, '/MAT1', mat) + ELSE + CALL dismat(splxy, mat) + CALL ibcmat(mat, nints(2), nidbas(1)) + END IF +! + WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() +! +! Assembly RHS and apply BC +! + ALLOCATE(rhs(mrank), sol(mrank)) +! + IF(readmat) THEN + CALL getarr(fidin, '/RHS', rhs) + ELSE + CALL disrhs(mbess, splxy, rhs) + CALL ibcrhs(rhs, nints(2)) + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + END IF + tmat = seconds() - t0 +!=========================================================================== +! 3.0 Diagonal balance of matrix +! + WRITE(*,'(/a)') 'Diagonal balance of matrix ...' + tbal = seconds() + IF( .NOT. readmat ) THEN + CALL diagbal(mat) + CALL putmat(fid,'/MAT1', mat, 'CDS matrix with BC') + END IF + rhs = mat%bal * rhs + tbal = seconds()-tbal +!=========================================================================== +! 4.0 Analytical solutions +! + ALLOCATE(solcal(0:nints(1),0:nints(2))) + ALLOCATE(solana(0:nints(1),0:nints(2))) + ALLOCATE(errsol(0:nints(1),0:nints(2))) + DO i=0,nints(1) + DO j=0,nints(2) + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO +!=========================================================================== +! 5.0 Direct solve with MUMPS +! +! + WRITE(*,'(/a)') 'Solve the linear system using MUMPS ...' +! + tmumps(1) = seconds() + CALL cds2mumps(mat, mat_mumps) + CALL factor(mat_mumps, debug=.FALSE.) + tmumps(2) = seconds() + sol = rhs + CALL bsolve(mat_mumps, sol, debug=.FALSE.) + sol = mat%bal * sol + sol(1:nints(2)-1) = sol(nints(2)) ! Unicity + tmumps(1:2) = seconds()-tmumps(1:2) + +! + tgrid = seconds() + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) +! + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,nints(2)) + i*nints(2) + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + CALL gridval(splxy, xgrid, ygrid, solcal, (/0,0/), bcoef) + errsol = solana - solcal + tgrid = seconds() - tgrid +! + PRINT*, 'Relative discretization errors', norm2(errsol) / norm2(solana) +!!$ WRITE(*,'(a,2(1pe15.6))') 'Relative discretization errors', & +!!$ & norm2(errsol) / norm2(solana) +!=========================================================================== +! 5.0 Solve the linear system using CG +! + WRITE(*,'(/a)') 'Solve the linear system using CG ...' +! + tsolv = seconds() + sol(:) = 0.0d0 ! Initial guest for solution + IF( nssor .EQ. 0 ) THEN + CALL cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, niter, & + & verbose=verbose) + ELSE + CALL cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, niter, & + & verbose=verbose, nssor=nssor) + END IF + sol = mat%bal * sol + sol(1:nints(2)-1) = sol(nints(2)) ! Unicity + tsolv = seconds()-tsolv +! + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,nints(2)) + i*nints(2) + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + CALL gridval(splxy, xgrid, ygrid, solcal, (/0,0/), bcoef) + errsol = solana - solcal +! + PRINT*, 'Relative discretization errors', norm2(errsol) / norm2(solana) +!=========================================================================== +! 9.0 Epilogue +! + CALL putarr(fid, '/SOL', sol) + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice balancing time (s) ', tbal + WRITE(*,'(a,2(1pe12.3))') 'MUMPS solver time (s) ', tmumps + WRITE(*,'(a,1pe12.3)') 'Solution at grid time (s) ', tgrid + WRITE(*,'(a,i8,2(1pe12.3))') 'nits, resid, t(s)', niter, resid, tsolv +! + DEALLOCATE(xgrid,ygrid) + DEALLOCATE(rhs, sol) + DEALLOCATE(dists) + DEALLOCATE(bcoef) + DEALLOCATE(solcal,solana,errsol) + CALL destroy_sp(splxy) + CALL destroy(mat) + CALL closef(fid) + IF(readmat) THEN + CALL closef(fidin) + END IF +! + CALL mpi_finalize(ierr) +!=========================================================================== +END PROGRAM main diff --git a/examples/tcdsmat_mod.f90 b/examples/tcdsmat_mod.f90 new file mode 100644 index 0000000..776e422 --- /dev/null +++ b/examples/tcdsmat_mod.f90 @@ -0,0 +1,635 @@ +!> +!> @file tcdsmat_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE tcdsmat_mod + IMPLICIT NONE +! + INTERFACE + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + END INTERFACE +! + INTERFACE norm2 + MODULE PROCEDURE norm2_1d, norm2_2d + END INTERFACE +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE mstruct(p, n, rank, dist) +! +! It is assumed that: +! . 2nd dimension is number first +! . 2nd dimension is periodic +! + INTEGER, INTENT(in) :: p(2), n(2) + INTEGER, INTENT(out) :: rank + INTEGER, ALLOCATABLE :: dist(:) +! + INTEGER, ALLOCATABLE :: pvect(:,:) + INTEGER :: kl, ku, i +! + rank = (n(1)+p(1))*n(2) ! Rank of the FE matrix + ku = (p(1)+1)*(2*p(2)+1)-1 + kl = ku + ALLOCATE(pvect(0:2*p(2),0:p(1))) + IF( ALLOCATED(dist)) DEALLOCATE(dist) + ALLOCATE(dist(-kl:ku)) +! +! Upper (North) points + pvect(0:p(2),0) = (/(i,i=0,p(2))/) +! +! Lower (South) points and periodicity of 2nd dim. + DO i=1,p(2) + pvect(p(2)+i,0) = n(2)-pvect(p(2)-i+1,0) + END DO +! +! Shift by N2 for points on the right (West) side + DO i=1,p(1) + pvect(:,i) = pvect(:,i-1)+n(2) + END DO +! +! Super-diagonals including the diagonal + dist(0:ku) = RESHAPE(pvect, (/ku+1/)) +! +! Sub-diagonals + DO i=-1,-kl,-1 + dist(i) = -dist(-i) + END DO +! + DEALLOCATE(pvect) + END SUBROUTINE mstruct +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + USE bsplines + USE cds +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(cds_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw ! Derivative order + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: coefs ! Terms in weak form +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms)) +! + ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1)) ! +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) + !=========================================================================== +! 2.0 Assembly loop +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs) + DO iterm=1,kterms + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 + contrib = fun1(iw1,iderw(iterm,1)) * & + & fun2(iw2,iderw(iterm,2)) * & + & coefs(iterm) * & + & fun2(it2,idert(iterm,2)) * & + & fun1(it1,idert(iterm,1)) * & + & wg1(ig1) * wg2(ig2) + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny, px) +! +! Apply BC on matrix +! + USE cds + IMPLICIT NONE + TYPE(cds_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny, px +! + INTEGER :: kl, ku, n, bw0, i, j + DOUBLE PRECISION :: zsum(mat%rank), arr(mat%rank) +!=========================================================================== +! 1.0 Prologue +! + kl = mat%kl + ku = mat%ku + n = mat%rank + mat%ny = ny +! +! Size of row ny and column ny +! + bw0 = SIZE(mat%rowv) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! Store the sums at row ny in MAT%ROWV +! + zsum(1:n) = 0.0d0 + DO i=1,ny + CALL getrow(mat, i, arr) + zsum(1:bw0) = zsum(1:bw0) + arr(1:bw0) + IF( i .LE. ny ) THEN ! Clear rows 1:(ny-1) + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END IF + END DO + mat%rowv(1:bw0) = zsum(1:bw0) ! row ny +!!$ WRITE(*,'(/a,/(10(f8.3)))') 'rowv', mat%rowv(1:130) +! +! The horizontal sum on the NY-th column +! The NY-th row of matrix was stored in mat%rowv +! Store the sums ar column ny at MAT%COLH +! + zsum(1:n) = 0.0d0 + DO j=1,ny + CALL getcol(mat, j, arr) + zsum(ny) = zsum(ny) + mat%rowv(j) + zsum(ny+1:bw0) = zsum(ny+1:bw0) + arr(ny+1:bw0) + IF( j .NE. ny ) THEN ! Clear columns 1:(ny-1) + mat%rowv(j) = 0.0d0 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END IF + END DO + mat%rowv(ny) = 0.0d0 ! Its value is now in mat%colh(ny) + arr = 0.0d0 + CALL putcol(mat, ny, arr) + CALL putrow(mat, ny, arr) + mat%colh(1:bw0) = zsum(1:bw0) ! column ny +! +! Move the diagonal term from mat%colh back to main diagonal +! + CALL putele(mat, ny, ny, mat%colh(ny)) + mat%colh(ny) = 0.0d0 +!!$ WRITE(*,'(/a,/(10(f8.3)))') 'rowv', mat%rowv(1:130) +!!$ WRITE(*,'(/a,/(10(f8.3)))') 'colh', mat%colh(1:130) +!!$ WRITE(*,'(/a,/(10(f8.3)))') 'colh', mat%val(1:ny,0) +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO j = n, n-ny+1, -1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = n, n-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!!$ WRITE(*,'(/a,/(10(f8.3)))') 'diag 1', mat%val(n-ny-1:n,1) +!!$ WRITE(*,'(/a,/(10(f8.3)))') 'diag 0', mat%val(n-ny:n,0) +!!$ WRITE(*,'(/a,/(10(f8.3)))') 'diag -1', mat%val(n-ny-1:n,-1) +!=========================================================================== +! 9.0 Epilogue +! + END SUBROUTINE ibcmat +!=========================================================================== + SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + USE bsplines + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2 + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq + END SUBROUTINE disrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny +! + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!=========================================================================== + SUBROUTINE cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, nit, verbose, & + & nssor) +! +! Preconditionned Conjugate Gradient solver +! + USE cds + TYPE(cds_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: rhs(:), omega, rtolmx + INTEGER, INTENT(in) :: nitmx + DOUBLE PRECISION, INTENT(inout) :: sol(:) + DOUBLE PRECISION, OPTIONAL, INTENT(out) :: resid + INTEGER, OPTIONAL, INTENT(out) :: nit + INTEGER, OPTIONAL, INTENT(in) :: nssor + LOGICAL, OPTIONAL, INTENT(in) :: verbose +! + DOUBLE PRECISION, DIMENSION(SIZE(rhs,1)) :: wr, wz, wp, wq + DOUBLE PRECISION :: bnrm2, residue, rho0, rho1, alpha, beta + INTEGER :: it +! + bnrm2 = SQRT(DOT_PRODUCT(rhs,rhs)) ! Euclidian norm of RHS + it = 0 + wr = rhs-vmx(mat,sol) +! +!... Iteration loop (see fig. 2.5, p.15 of "Templates...") + DO + it = it+1 + IF( PRESENT(nssor) ) THEN + CALL psolve(mat, wz, wr, omega, nssor) + ELSE + wz = wr + END IF + rho1 = DOT_PRODUCT(wr,wz) + IF( it .EQ. 1 ) THEN + wp = wz + ELSE + beta = rho1/rho0 + wp = wz + beta*wp + END IF + wq = vmx(mat,wp) + alpha = rho1 / DOT_PRODUCT(wp,wq) + sol = sol + alpha*wp + wr = wr - alpha*wq + residue = SQRT(DOT_PRODUCT(wr,wr)) / bnrm2 + IF( PRESENT(verbose) ) THEN + IF(verbose) WRITE(*,'(a,i8,1pe12.3)') 'it, resid', it, residue + END IF + IF( residue .LE. rtolmx .OR. it .GE. nitmx) EXIT + rho0 = rho1 + END DO + IF(PRESENT(resid)) resid = residue + IF(PRESENT(nit)) nit = it + END SUBROUTINE cg +!=========================================================================== + SUBROUTINE psolve(mat, x, b, omega, niter_in) +! +! Preconditionners +! + USE cds + TYPE(cds_mat) :: mat + DOUBLE PRECISION, INTENT(out) :: x(:) + DOUBLE PRECISION, INTENT(in) :: b(:) + DOUBLE PRECISION, INTENT(in) :: omega + INTEGER, OPTIONAL, INTENT(in) :: niter_in +! + INTEGER :: niter + DOUBLE PRECISION :: rtolmx +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! 1. No-preconditionning +! + IF( omega .LT. 0.0d0 ) THEN + x = b + RETURN + END IF +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! 2. SSOR Preconditionner +! + niter = 1 + rtolmx = 1.d-6 + IF(PRESENT(niter_in)) THEN + niter = niter_in + END IF + CALL ssor(mat, b, omega, niter, rtolmx, x) +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + END SUBROUTINE psolve +! +!=========================================================================== + SUBROUTINE ssor(mat, b, omega, nitmx, rtolmx, x, resid, nit, verbose) +! +! Solve Ax = b using SSOR method +! + USE cds + TYPE(cds_mat) :: mat + DOUBLE PRECISION, INTENT(out) :: x(:) + DOUBLE PRECISION, INTENT(in) :: b(:) + DOUBLE PRECISION, INTENT(in) :: omega, rtolmx + INTEGER, INTENT(in) :: nitmx + DOUBLE PRECISION, OPTIONAL, INTENT(out) :: resid + INTEGER, OPTIONAL, INTENT(out) :: nit + LOGICAL, OPTIONAL, INTENT(in) :: verbose +! + INTEGER :: n, iter + INTEGER :: k, i, j, d, bw0, ny + DOUBLE PRECISION :: omega1, bnrm2, residue + DOUBLE PRECISION :: rhs(SIZE(x)) +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! 1. Initialization + n = SIZE(x) + bw0 = SIZE(mat%rowv) + ny = mat%ny + bnrm2 = norm2(b) ! Euclidian norm of RHS + omega1 = 1.0d0-omega + iter = 0 + DO + iter = iter+1 +! +! 2. Forward SOR +! Set RHS + rhs = omega*b ! rhs <- omega*b + IF( iter .GT. 1 ) THEN + rhs = rhs + omega1*x ! rhs <- rhs + (1-omega)*x0 + DO k=1,mat%ku ! rhs <- rhs - omega*U*x0 + d = mat%dists(k) + DO i=MAX(1,1-d),MIN(n,n-d) + rhs(i) = rhs(i) - omega*mat%val(i,k)*x(i+d) + END DO + END DO + IF( ny .NE. 0 ) THEN ! Contributions from unicity BC + rhs(ny) = rhs(ny) - omega*DOT_PRODUCT(mat%rowv(ny+1:bw0),x(ny+1:bw0)) + END IF + END IF +! +! Solve (1+omega*L) x = rhs + x = rhs + IF( ny .NE. 0 ) THEN ! Contributions from unicity BC + rhs(ny+1:bw0) = rhs(ny+1:bw0) - omega*mat%colh(ny:bw0)*x(ny) + END IF + DO i=ny+1,n + DO k=-1,-mat%kl,-1 + d = mat%dists(k) + j=i+d + IF( j.LE.0 ) EXIT + x(i) = x(i) - omega*mat%val(i,k)*x(j) + END DO + END DO +! +! 3. Backward SOR +! Set RHS + rhs = omega*b + omega1*x ! rhs <- omega*b + (1-omega)*x0 + IF( ny .NE. 0 ) THEN ! Contributions from unicity BC + rhs(ny+1:bw0) = rhs(ny+1:bw0) - omega*mat%colh(ny:bw0)*x(ny) + END IF + DO k=-mat%kl,-1 ! rhs <- rhs - omega*L*x0 + d = mat%dists(k) + DO i=MAX(1,1-d),MIN(n,n-d) + rhs(i) = rhs(i) - omega*mat%val(i,k)*x(i+d) + END DO + END DO +! +! Solve (1+omega*U) x = rhs + x = rhs + DO i=n-1,ny+1,-1 + DO k=1,mat%ku + d = mat%dists(k) + j = i+d + IF( j.GT.n ) EXIT + x(i) = x(i) - omega*mat%val(i,k)*x(j) + END DO + END DO + IF( ny .NE. 0 ) THEN ! Contributions from unicity BC + x(ny) = x(ny) - omega*DOT_PRODUCT(mat%rowv(ny+1:bw0),x(ny+1:bw0)) + END IF +! +! 4. Compute residue +! + IF( PRESENT(resid) ) THEN + residue = norm2(b-vmx(mat,x)) / bnrm2 + IF(PRESENT(verbose)) THEN + IF(verbose) WRITE(*,'(a,i8,1pe12.3)') 'it, resid', iter, residue + END IF + IF( residue .LT. rtolmx ) EXIT + END IF +! + IF( iter .GE. nitmx ) EXIT + END DO ! End of SSOR iterations + IF(PRESENT(nit)) nit = iter + IF(PRESENT(resid)) resid = residue + END SUBROUTINE ssor +!=========================================================================== + SUBROUTINE diagbal(mat) +! +! Diagonal matrix balancing: store D^(-1/2) in mat%bal +! + USE cds + TYPE(cds_mat) :: mat + INTEGER :: n, bw0, ny, d, i, k + DOUBLE PRECISION :: diag(mat%rank) +! + n = mat%rank + ny = mat%ny + bw0 = SIZE(mat%colh) + diag(1:n) = mat%val(1:n,0) + IF( MINVAL(diag) .LE. 0.0d0 ) THEN + WRITE(*,'(a)') 'Diagonal elements of matrix are not stricly positive!' + STOP + END IF + diag(1:n) = 1.0d0/SQRT(diag(1:n)) +! +! Scale the matrix + +!$OMP parallel do private (k,d,i) + DO k=-mat%kl,mat%ku + d = mat%dists(k) + DO i=MAX(1,1-d),MIN(n,n-d) + mat%val(i,k) = diag(i)*diag(i+d)*mat%val(i,k) + END DO + END DO +!$OMP end parallel do +! +! The ny^th column and row + IF( ny.NE.0 ) THEN + mat%rowv(1:bw0) = diag(1:bw0)*diag(ny)*mat%rowv(1:bw0) + mat%colh(1:bw0) = diag(ny)*diag(1:bw0)*mat%colh(1:bw0) + END IF +! +! Save D^(-1/2) + mat%bal(:) = diag(:) + END SUBROUTINE diagbal +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION norm2_1d(x) +! +! Compute the 2-norm of 1d array +! + DOUBLE PRECISION :: x(:) + DOUBLE PRECISION :: norm2_1d + norm2_1d = SQRT(SUM(x*x)) + END FUNCTION norm2_1d +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION norm2_2d(x) +! +! Compute the 2-norm of 2d array +! + DOUBLE PRECISION :: x(:,:) + DOUBLE PRECISION :: norm2_2d + norm2_2d = SQRT(SUM(x*x)) + END FUNCTION norm2_2d +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE tcdsmat_mod diff --git a/examples/test_kron.f90 b/examples/test_kron.f90 new file mode 100644 index 0000000..ee9d5fe --- /dev/null +++ b/examples/test_kron.f90 @@ -0,0 +1,125 @@ +!> +!> @file test_kron.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test Kronecker product for both GE and CSR versions. +! + USE iso_fortran_env, ONLY : rkind => real64 + USE matrix, ONLY : gemat, init, kron, vmx + USE csr, ONLY : csr_mat, init, full_to_csr, kron, vmx + IMPLICIT NONE +! + TYPE(gemat) :: mata, matb, matc + TYPE(gemat) :: matu1 + TYPE(csr_mat) :: mata_csr, matb_csr, matc_csr + INTEGER, PARAMETER :: m=3, n=2 + REAL(rkind) :: a(m*m)=[ 1.1, 0.0, 0.0, 0.2, 1.5, 1.0, 0.5, 0.0, 1.0 ] + REAL(rkind) :: b(n*n)=[ 2.0, 1.0, 0.0, 3.0 ] + REAL(rkind), TARGET :: u(m,n), uu(m,n), v(m,n) + REAL(rkind), POINTER :: u1d(:), uu1d(:) + INTEGER :: i, s, e +! + CALL init(m, 0, mata) + CALL init(n, 0, matb) + mata%val(1:m,1:m) = RESHAPE(a, [m,m]) + matb%val(1:n,1:n) = RESHAPE(b, [n,n]) +! + CALL printmat_ge('Matrix A', mata) + CALL printmat_ge('Matrix B', matb) +! + u1d(1:m*n) => u ! u1d = vec(u) + u1d = [ (REAL(i,rkind), i=1,m*n) ] + CALL printmat('Array U', u) +! +! Compute (A.U).B^T +! + CALL init(n, 0, matu1, mrows=m) + v = TRANSPOSE(vmx(matb, TRANSPOSE(vmx(mata, u)))) + CALL printmat('(A.U).B^T', v) +! +! Compute (BxA).vec(U) +! + CALL kron(matb, mata, matc) + uu1d(1:m*n) => uu + uu1d = vmx(matc, u1d) + CALL printmat_ge('Matrix C', matc) + CALL printmat('(BxA).vec(U)', uu) +!---------------------------------------------------------------------- +! +! Using CSR matrices +! + CALL full_to_csr(mata%val, mata_csr) + CALL full_to_csr(matb%val, matb_csr) + CALL printmat_csr('Matrix A', mata_csr) + CALL printmat_csr('Matrix B', matb_csr) +! + CALL kron(matb_csr, mata_csr, matc_csr) + uu1d = vmx(matc_csr, u1d) +! + CALL printmat_csr('Matrix C', matc_csr) + CALL printmat('(BxA).vec(U)', uu) +! +CONTAINS + SUBROUTINE printmat(str, a) + CHARACTER(len=*) :: str + REAL(rkind), INTENT(in) :: a(:,:) + INTEGER :: i,m,n + WRITE(*,'(/a)') TRIM(str) + m=SIZE(a,1) + n=SIZE(a,2) + DO i=1,m + WRITE(*,'(12f8.3)') a(i,:) + END DO + END SUBROUTINE printmat + SUBROUTINE printmat_ge(str, a) + CHARACTER(len=*) :: str + TYPE(gemat) :: a + INTEGER :: i + WRITE(*,'(/a)') TRIM(str) + DO i=1,a%mrows + WRITE(*,'(12f8.3)') a%val(i,:) + END DO + END SUBROUTINE printmat_ge + SUBROUTINE printmat_csr(str, a) + CHARACTER(len=*) :: str + TYPE(csr_mat) :: a + INTEGER :: i, s, e + REAL(rkind) :: arow(a%ncols) + WRITE(*,'(/a,a,3i4)') TRIM(str), ': m, n, nnz', a%mrows, a%ncols, a%nnz + DO i=1,a%mrows + arow = 0.0_rkind + s = a%irow(i) + e = a%irow(i+1)-1 + arow(a%cols(s:e)) = a%val(s:e) + WRITE(*,'(12f8.3)') arow + END DO + IF(SIZE(a%idiag) .GT. 0) THEN + WRITE(*,'(a,(20i4))') 'idiag =', a%idiag + WRITE(*,'(a,12f8.3)') 'diag = ', a%val(a%idiag) + END IF +! + END SUBROUTINE printmat_csr +END PROGRAM main diff --git a/examples/test_pwsmp.f90 b/examples/test_pwsmp.f90 new file mode 100644 index 0000000..0537296 --- /dev/null +++ b/examples/test_pwsmp.f90 @@ -0,0 +1,287 @@ +!> +!> @file test_pwsmp.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +!!$ USE futils + IMPLICIT NONE + INCLUDE 'mpif.h' +! + INTEGER :: npes, me, ierr, comm=MPI_COMM_WORLD + INTEGER :: l, i, lun=99 + INTEGER :: nrank, nnz, s, e, nrank_loc, nnz_loc, nnz_sum + INTEGER :: istart, iend + INTEGER, ALLOCATABLE :: irow(:), cols(:) + INTEGER, ALLOCATABLE :: irow_loc(:), cols_loc(:) + DOUBLE PRECISION, ALLOCATABLE :: val(:), val_loc(:) + DOUBLE PRECISION, ALLOCATABLE :: rhs(:), rhs_loc(:) + DOUBLE PRECISION, ALLOCATABLE :: sol(:), sol_loc(:) + DOUBLE PRECISION :: mem + CHARACTER(len=128) :: fname = "mat.dat" + DOUBLE PRECISION :: mem_loc, mem_min, mem_max + DOUBLE PRECISION :: err, err_max, err_norm + DOUBLE PRECISION :: t0, tfact, tsolv + INTEGER :: it, nits=100 +! +! PWSMP vars +! + DOUBLE PRECISION :: dparm(64) + INTEGER :: iparm(64) + INTEGER, ALLOCATABLE :: perm(:), invp(:) +! + INTEGER :: mrp ! just a placeholder in this program + DOUBLE PRECISION :: aux, diag ! just placeholders in this program + INTEGER :: naux=0, nrhs=1 +!=========================================================================== +! 1.0 Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_size(comm, npes, ierr) + CALL mpi_comm_rank(comm, me, ierr) +!=========================================================================== +! 2.0 Read matrix +! +! File header + IF( command_argument_count() > 0 ) THEN + CALL get_command_argument(1, fname, l, ierr) + END IF + OPEN(unit=lun, file=fname, form="unformatted") + READ(lun) nrank, nnz + IF(me.EQ.0) WRITE(*,'(a,3i16)') 'npes, nrank, nnz', npes, nrank, nnz +! +! Matrix partition + CALL dist1d(comm, 1, nrank, istart, nrank_loc) + iend = istart+nrank_loc-1 + WRITE(*,'(a,i3.3,a,2i12)') 'PE', me, ':istart, iend', istart, iend + ALLOCATE(irow_loc(nrank_loc+1)) +! +! Read irow + ALLOCATE(irow(nrank+1)) + READ(lun) irow + nnz_loc = irow(iend+1)-irow(istart) + CALL mpi_reduce(nnz_loc, nnz_sum, 1, MPI_INTEGER, MPI_SUM, 0, comm, ierr) + IF(me.EQ.0) THEN + PRINT*, 'nnz_sum', nnz_sum + END IF + irow_loc(:) = irow(istart:iend+1) ! Still unshifted + DEALLOCATE(irow) +! + ALLOCATE(cols_loc(nnz_loc)) + ALLOCATE(val_loc(nnz_loc)) + ALLOCATE(rhs_loc(nrank_loc)) + ALLOCATE(sol_loc(nrank_loc)) +! + s = irow_loc(1) + e = irow_loc(nrank_loc+1)-1 + irow_loc(:) = irow_loc(:)-s+1 ! Shifted relative irow + WRITE(*,'(a,i3.3,a,3i12)') 'PE', me, ':s, e, nnz_loc', s, e, nnz_loc +! +! Read cols + ALLOCATE(cols(nnz)) + READ(lun) cols + cols_loc(:) = cols(s:e) + DEALLOCATE(cols) +! +! Read vals + ALLOCATE(val(nnz)) + READ(lun) val + val_loc(:) = val(s:e) + DEALLOCATE(val) +! +! Read RHS + ALLOCATE(rhs(nrank)) + READ(lun) rhs + rhs_loc(:) = rhs(istart:iend) + DEALLOCATE(rhs) +! +!!$ mem_loc = mem() +!!$ CALL minmax_r(mem_loc, comm, 'mem used (MB) after matrix read') +!=========================================================================== +! 3.0 Call PWSMP +! +! Initializing of PWSMP. +! +!!$ CALL pwsmp_initialize + ALLOCATE(invp(nrank), perm(nrank)) +! +! Fill 'iparm' and 'dparm' arrays with default values. + iparm(1:3) = 0 + CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & + & rhs_loc, nrank_loc, nrhs, & + & aux, naux, mrp, iparm, dparm) + IF(iparm(64).NE.0) THEN + PRINT*, 'WSMP init failed with iparm(64) =', iparm(64) + CALL mpi_abort(comm, iparm(64), ierr) + ELSE + IF(me.EQ.0) PRINT*, 'WSMP init ok' + END IF +! +! Ordering + iparm(2) = 1 + iparm(3) = 1 + CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & + & rhs_loc, nrank_loc, nrhs, & + & aux, naux, mrp, iparm, dparm) + IF(iparm(64).NE.0) THEN + PRINT*, 'WSMP ordering failed with iparm(64) =', iparm(64) + CALL mpi_abort(comm, iparm(64), ierr) + ELSE + IF(me.EQ.0) PRINT*, 'WSMP ordering ok' + END IF +! +! Symbolic factorization + iparm(2) = 2 + iparm(3) = 2 + CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & + & rhs_loc, nrank_loc, nrhs, & + & aux, naux, mrp, iparm, dparm) + IF(iparm(64).NE.0) THEN + PRINT*, 'WSMP symbolic failed with iparm(64) =', iparm(64) + CALL mpi_abort(comm, iparm(64), ierr) + ELSE + IF(me.EQ.0) PRINT*, 'WSMP symbolic ok' + END IF + IF(me.EQ.0) THEN + PRINT *,'Number of nonzeros in factor L = 1000 X ',iparm(24) + PRINT *,'Number of FLOPS in factorization = ',dparm(23) + PRINT *,'Double words needed to factor on 0 = 1000 X ',iparm(23) + END IF +! +! Cholesky factorizarion + iparm(2) = 3 + iparm(3) = 3 + t0 = mpi_wtime() + CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & + & rhs_loc, nrank_loc, nrhs, & + & aux, naux, mrp, iparm, dparm) + tfact = mpi_wtime()-t0 + IF(iparm(64).NE.0) THEN + PRINT*, 'WSMP Choleski failed with iparm(64) =', iparm(64) + CALL mpi_abort(comm, iparm(64), ierr) + ELSE + IF(me.EQ.0) PRINT*, 'WSMP Choleski ok' + END IF +! +! Backsolve + t0 = mpi_wtime() + DO it=1,nits + sol_loc=rhs_loc + iparm(2) = 4 + iparm(3) = 4 + CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & + & sol_loc, nrank_loc, nrhs, & + & aux, naux, mrp, iparm, dparm) + END DO + rhs_loc=sol_loc + tsolv = (mpi_wtime()-t0)/REAL(nits,8) + IF(iparm(64).NE.0) THEN + PRINT*, 'WSMP backsolve failed with iparm(64) =', iparm(64) + CALL mpi_abort(comm, iparm(64), ierr) + ELSE + IF(me.EQ.0) PRINT*, 'WSMP backsolve ok' + END IF +! +! Iterative refinement + iparm(2) = 5 + iparm(3) = 5 + CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, & + & rhs_loc, nrank_loc, nrhs, & + & aux, naux, mrp, iparm, dparm) + IF(iparm(64).NE.0) THEN + PRINT*, 'WSMP refinement failed with iparm(64) =', iparm(64) + CALL mpi_abort(comm, iparm(64), ierr) + ELSE + IF(me.EQ.0) PRINT*, 'WSMP refinement ok' + END IF +! +!!$ mem_loc = mem() +!!$ CALL minmax_r(mem_loc, comm, 'mem used (MB) after PWSMP') +!=========================================================================== +! 4.0 Check SOL +! +! Read SOL + ALLOCATE(sol(nrank)) + READ(lun) sol + sol_loc(:) = sol(istart:iend) + DEALLOCATE(sol) + PRINT*, 'Comp. sol', SUM(rhs_loc) + PRINT*, 'Exact sol', SUM(sol_loc) +! + err=MAXVAL(ABS(sol_loc-rhs_loc)) + CALL mpi_reduce(err, err_max, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr) + IF(me.EQ.0) THEN + PRINT*, 'Max. error', err_max + END IF + rhs_loc = rhs_loc-sol_loc + err = DOT_PRODUCT(rhs_loc,rhs_loc) + CALL mpi_reduce(err, err_norm, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0, comm, ierr) + IF(me.EQ.0) THEN + PRINT*, 'Norm of error', SQRT(err_norm) + END IF +! +!!$ mem_loc = mem() +!!$ CALL minmax_r(mem_loc, comm, 'mem used (MB)') +!=========================================================================== +! 9.0 Epilogue +! + CALL minmax_r(tfact, comm, 'Factorisation time(s)') + CALL minmax_r(tsolv, comm, ' Backsolve time(s)') + CALL mpi_finalize(ierr) +! +CONTAINS + SUBROUTINE dist1d(comm, s0, ntot, s, nloc) +! +! 1d distribute ntot elements, returns offset s and local number of +! elements nloc. +! + IMPLICIT NONE + INCLUDE 'mpif.h' + INTEGER, INTENT(in) :: s0, ntot + INTEGER, INTENT(out) :: s, nloc + INTEGER :: comm, me, npes, ierr, naver, rem +! + CALL MPI_COMM_SIZE(comm, npes, ierr) + CALL MPI_COMM_RANK(comm, me, ierr) + naver = ntot/npes + rem = MODULO(ntot,npes) + s = s0 + MIN(rem,me) + me*naver + nloc = naver + IF( me.LT.rem ) nloc = nloc+1 + END SUBROUTINE dist1d +! + SUBROUTINE minmax_r(x, comm, str) + CHARACTER(len=*), INTENT(in) :: str + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: comm + INTEGER :: me, ierr + DOUBLE PRECISION :: xmin, xmax + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr) + CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax + END IF + END SUBROUTINE minmax_r +! +END PROGRAM main diff --git a/examples/tlocintv.f90 b/examples/tlocintv.f90 new file mode 100644 index 0000000..0208d29 --- /dev/null +++ b/examples/tlocintv.f90 @@ -0,0 +1,140 @@ +!> +!> @file tlocintv.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Optimization of locintv +! + USE bsplines + IMPLICIT NONE + INTEGER :: nx, nidbas, ngauss, np, nits + DOUBLE PRECISION :: a, b, coefs(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid + TYPE(spline1d) :: splx +! + INTEGER :: i, nerrs, it + DOUBLE PRECISION :: t0, t1, seconds, tscal, tscal_new, tvec, tvec_new + DOUBLE PRECISION, ALLOCATABLE :: xp(:) + INTEGER, ALLOCATABLE :: left(:) +! + INTERFACE + SUBROUTINE meshdist(coefs, x, nx) + DOUBLE PRECISION, INTENT(in) :: coefs(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + END SUBROUTINE meshdist + END INTERFACE +! + NAMELIST /newrun/ nx, nidbas, ngauss, np, nits, a, b, coefs +!=========================================================================== +! Read in data +! + nx = 8 ! Number oh intevals in x + nidbas = 3 ! Degree of splines + ngauss = 4 ! Number of Gauss points/interval + np = 10 ! Number of random points in [a,b] + nits = 1000000 + a = 0.0 + b = 1.0 + coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + ! see function FDIST in MESHDIST +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x axis +! + ALLOCATE(xgrid(0:nx)) + xgrid(0) = a + xgrid(nx) = b + CALL meshdist(coefs, xgrid, nx) + WRITE(*,'(/a/(10f8.3))') 'XGRID', xgrid(0:nx) + WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx) +! +! Set up spline +! + CALL set_spline(nidbas, ngauss, xgrid, splx) + WRITE(*,'(a,l1)') 'Is mesh equidistant? ', splx%nlequid +! +! Test locintv +! + ALLOCATE(xp(np)) + ALLOCATE(left(np)) + xp(:) = (b-a)*xp(:) + a + tscal = 0.0d0 + tscal_new = 0.0d0 + tvec = 0.0d0 + tvec_new = 0.0d0 +! + nerrs = 0 + DO it=1,nits + CALL RANDOM_NUMBER(xp) + t0 = seconds() + DO i=1,np + CALL locintv_old(splx, xp(i), left(i)) + END DO + tscal = tscal + seconds()-t0 + nerrs = nerrs + COUNT(.NOT.in_interv(xp, left)) +! + t0 = seconds() + DO i=1,np + CALL locintv(splx, xp(i), left(i)) + END DO + tscal_new = tscal_new + seconds()-t0 + nerrs = nerrs + COUNT(.NOT.in_interv(xp, left)) +! + t0 = seconds() + CALL locintv_old(splx, xp, left) + tvec = tvec + seconds()-t0 + nerrs = nerrs + COUNT(.NOT.in_interv(xp, left)) +! + t0 = seconds() + CALL locintv(splx, xp, left) + tvec_new = tvec_new + seconds()-t0 + nerrs = nerrs + COUNT(.NOT.in_interv(xp, left)) + END DO + PRINT*, 'nerrs =', nerrs +! + tscal = tscal/(REAL(nits*np,8)) + tscal_new = tscal_new/(REAL(nits*np,8)) + tvec = tvec/(REAL(nits*np,8)) + tvec_new = tvec_new/(REAL(nits*np,8)) + WRITE(*,'(4a12)') 'scalar', 'scalar new', 'vector', 'vector new' + WRITE(*,'(4(1pe12.3))') tscal, tscal_new, tvec, tvec_new +! +! Clean up +! + DEALLOCATE(xp) + DEALLOCATE(left) + DEALLOCATE(xgrid) + CALL destroy_sp(splx) +! +CONTAINS + LOGICAL ELEMENTAL FUNCTION in_interv(x, l) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(in) :: l + in_interv = x.GE.xgrid(l) .AND. x.LT.xgrid(l+1) + END FUNCTION in_interv +END PROGRAM main diff --git a/examples/tmassmat.f90 b/examples/tmassmat.f90 new file mode 100644 index 0000000..679f965 --- /dev/null +++ b/examples/tmassmat.f90 @@ -0,0 +1,189 @@ +!> +!> @file tmassmat.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! FT of mass matrix +! + USE bsplines + USE matrix +! + IMPLICIT NONE + INTEGER :: nx, nidbas + INTEGER :: ngauss, nrank, kl, ku + INTEGER :: i, k, kmin, kmax + DOUBLE PRECISION :: pi, xlenght, dx, arg0, arg + DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), arow(:) + DOUBLE PRECISION, ALLOCATABLE :: fftmassm1(:), fftmassm2(:), fftmassm3(:) + DOUBLE PRECISION, ALLOCATABLE :: fftmass_shifted(:) + TYPE(spline1d) :: splx + TYPE(periodic_mat) :: massm +! + NAMELIST /newrun/ nx, nidbas, xlenght +!================================================================================ +! 1.0 Prologue +! + pi = 4.0d0*ATAN(1.0d0) +! + nx = 8 + nidbas = 3 + xlenght = 2.0d0*pi +! + READ(*,newrun) + WRITE(*,newrun) +! + ngauss = nidbas+1 ! Exact integration for polynomials of degree 2*nidbas +! + ALLOCATE(xgrid(0:nx)) + dx = xlenght/REAL(nx) + xgrid = (/ (i*dx,i=0,nx) /) + WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid +! + CALL set_spline(nidbas, ngauss, xgrid, splx, .TRUE.) +!=========================================================================== +! 2.0 Mass matrix + nrank = nx + kl = nidbas + ku = kl + CALL init(kl, ku, nrank, 1, massm) + CALL dismat(splx, massm) +! + ALLOCATE(arow(nrank)) +!!$ WRITE(*,'(/a)') 'Mass matrix' +!!$ DO i=1,nrank +!!$ CALL getrow(massm, i, arow) +!!$ WRITE(*,'(10(1pe12.4))') arow +!!$ END DO +!=========================================================================== +! 3.0 Fourier transform of Mass matrix +! + ALLOCATE(fftmassm1(0:nx-1)) + ALLOCATE(fftmassm2(0:nx-1)) + ALLOCATE(fftmassm3(0:nx-1)) + IF(nidbas.LE.3) THEN + CALL analytic(nidbas, fftmassm1) + fftmassm1 = dx*fftmassm1 + WRITE(*,'(/a/(10(1pe12.4)))') 'Analytic', fftmassm1 + END IF +! + CALL calc_fftmass_old(splx, fftmassm2) + WRITE(*,'(/a/(10(1pe12.4)))') 'Old version', fftmassm2 + IF(nidbas.LE.3) THEN + WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(fftmassm2-fftmassm1)) + END IF +! +! Init DFT + kmin = -nx/2 + kmax = nx/2-1 + CALL init_dft(splx, kmin, kmax) +! + ALLOCATE(fftmass_shifted(kmin:kmax)) + CALL calc_fftmass(splx, fftmass_shifted) + DO k=kmin, kmax + fftmassm3(MODULO(k+nx,nx))=fftmass_shifted(k) + END DO + WRITE(*,'(/a/(10(1pe12.4)))') 'New version', fftmassm3 + WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(fftmassm3-fftmassm2)) +! +! Check dftcoefs + WRITE(*,'(/a)') 'Check DFT of splines' + PRINT*, 'dims of dftcoefs', SHAPE(splx%dft%coefs) + DO i=0,nidbas + WRITE(*,'(a,i3,2(1pe12.4))') 'Sum of coefs for spline', i, & + & SUM(splx%dft%coefs(:,i)) + END DO +!=========================================================================== +! 9.0 Clean up +! + DEALLOCATE(xgrid) + DEALLOCATE(fftmassm1) + DEALLOCATE(fftmassm2) + DEALLOCATE(fftmassm3) + DEALLOCATE(fftmass_shifted) + DEALLOCATE(arow) + CALL destroy(massm) + CALL destroy_sp(splx) +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat) + TYPE(spline1d) :: spl + TYPE(periodic_mat) :: mat + INTEGER :: dim, nx, nidbas, ngauss + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), xgauss(:), wgauss(:) + INTEGER :: i, igauss, iw, jt, irow, jcol + DOUBLE PRECISION :: contrib +! + CALL get_dim(spl, dim, nx, nidbas) + ALLOCATE(fun(0:nidbas,1)) ! Spline + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) +! + DO i=1,nx + CALL get_gauss(spl, ngauss, i, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, i) + DO jt=0,nidbas + DO iw=0,nidbas + contrib = fun(jt,1) * fun(iw,1) * wgauss(igauss) + irow=MODULO(i+iw-1,nx) + 1 ! Periodic BC + jcol=MODULO(i+jt-1,nx) + 1 + CALL updtmat(mat, irow, jcol, contrib) + END DO + END DO + END DO + END DO +! + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE analytic(nidbas, mat) +! +! Analytic form for nidbas .le. 3 +! + INTEGER, INTENT(in) :: nidbas + DOUBLE PRECISION, INTENT(out) :: mat(0:) + DOUBLE PRECISION :: arg0, arg, cosk + INTEGER :: n, k +! + n = SIZE(mat) + arg0 = 2.0d0*pi/REAL(n,8) + DO k=0,n-1 + arg = k*arg0 + cosk = COS(arg) + SELECT CASE (nidbas) + CASE (1) + mat(k) = (2.0d0 + cosk)/3.0d0 + CASE (2) + mat(k) = (16.0d0 + cosk*(13.0d0+cosk))/30.0d0 + CASE (3) + mat(k) = (272.0d0 + cosk*(297.0d0+cosk*(60.0d0+cosk)))/630.0d0 + CASE default + WRITE(*,'(a,i4,a)') 'ANALYTIC: nidbas =', nidbas, ' is not implemented!' + STOP + END SELECT + END DO + END SUBROUTINE analytic +END PROGRAM main diff --git a/examples/tmatrix_gb.f90 b/examples/tmatrix_gb.f90 new file mode 100644 index 0000000..6183335 --- /dev/null +++ b/examples/tmatrix_gb.f90 @@ -0,0 +1,114 @@ +!> +!> @file tmatrix_gb.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test some routines of module matrix +! + USE matrix + IMPLICIT NONE + TYPE(gbmat) :: mata + INTEGER, PARAMETER :: n=5, ku=3, kl=3 + DOUBLE PRECISION :: arr(n), fulla(n,n), base + DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p + INTEGER :: i, j, iaway, pow + CHARACTER(len=32) :: str +! + CALL init(ku, ku, n, 0, mata) + CALL getvalp(mata, p) + PRINT*, 'shape of A: ', SHAPE(p) +! +! Test updtmat + p = 0.0d0 + DO j=1,n + DO i=1,n + arr(i) = 10*i + j + IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(i)) + END DO + END DO + CALL prntmat('Test of UPDTMAT', p) +! +! Test PUTCOL + p = 0.0d0 + DO j=1,n + DO i=1,n + arr(i) = 10*i + j + END DO + CALL putcol(mata, j, arr) + END DO + CALL prntmat('Test of PUTCOL', p) +! +! +! Test PUTROW + p = 0.0d0 + DO i=1,n + DO j=1,n + arr(j) = 10*i + j + END DO + CALL putrow(mata, i, arr) + END DO + CALL prntmat('Test of PUTROW', p) +! +! Test GETCOL + fulla = 0.0 + DO j=1,n + CALL getcol(mata, j, fulla(:,j)) + END DO + CALL prntmat('Test of GETCOL', fulla) +! + iaway=4 + arr = 0.0d0 + arr(iaway) =1.0 + CALL putrow(mata, iaway, arr) + CALL putcol(mata, iaway, arr) + WRITE(str,'(a,i3)') 'Away on i = j =',iaway + CALL prntmat(TRIM(str), p) +! +! Test GETCOL + fulla = 0.0 + DO j=1,n + CALL getcol(mata, j, fulla(:,j)) + END DO + CALL prntmat('Matrix full', fulla) +! +! Test of determinant + CALL determinant(mata, base, pow) + CALL prntmat('Factored A (gb)', p) + PRINT*, 'Prod. of factored A diagnonals', PRODUCT(p(kl+ku+1,:)) + WRITE(*,'(a,1pe15.6,i6)') 'Determinant(bas,power) =', base, pow + PRINT*, 'Pivots ', mata%piv +! + call destroy(mata) +CONTAINS + SUBROUTINE prntmat(str, a) + DOUBLE PRECISION, DIMENSION(:,:) :: a + CHARACTER(len=*) :: str + INTEGER :: i + WRITE(*,'(a)') TRIM(str) + DO i=1,SIZE(a,1) + WRITE(*,'(10f8.1)') a(i,:) + END DO + END SUBROUTINE prntmat +END PROGRAM main diff --git a/examples/tmatrix_pb.f90 b/examples/tmatrix_pb.f90 new file mode 100644 index 0000000..eae0fd4 --- /dev/null +++ b/examples/tmatrix_pb.f90 @@ -0,0 +1,143 @@ +!> +!> @file tmatrix_pb.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test some routines of module matrix +! + USE matrix + IMPLICIT NONE + TYPE(pbmat) :: mata, matb + INTEGER, PARAMETER :: n=5, ku=3 + DOUBLE PRECISION :: arr(n), fulla(n,n), fullb(n,n), base + DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p, pb + INTEGER :: i, j, info, pow +! + CALL init(ku, n, 0, mata) + CALL init(1, n, 0, matb) + CALL getvalp(mata, p) + CALL getvalp(matb, pb) + PRINT*, 'shape of A: ', SHAPE(p) +! +! Test updtmat + p = 0.0d0 + DO i=1,n + DO j=i,n + arr(j) = 10*i + j + IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(j)) + END DO + END DO + CALL prntmat('Test of UPDTMAT', p) +! +! Test GETCOL + fulla = 0.0 + DO j=1,n + CALL getcol(mata, j, fulla(:,j)) + END DO + CALL prntmat('Full matrix from GETCOL', fulla) +! +! Test GETROW + fulla = 0.0 + DO i=1,n + CALL getrow(mata, i, fulla(i,:)) + END DO + CALL prntmat('Full matrix from GETROW', fulla) +! +! Test PUTCOL + p = 0.0d0 + DO j=1,n + DO i=1,n + arr(i) = 10*i + j + END DO + CALL putcol(mata, j, arr) + END DO + CALL prntmat('Test of PUTCOL', p) +! +! Test PUTROW + p = 0.0d0 + DO i=1,n + DO j=1,n + arr(j) = 10*i + j + END DO + CALL putrow(mata, i, arr) + END DO + CALL prntmat('Test of PUTROW', p) +! + arr = 0.0d0 + arr(2) =1.0 + CALL putrow(mata, 2, arr) + CALL prntmat('Away on i=2, j=2', p) +! +! Test GETCOL + fulla = 0.0 + DO j=1,n + CALL getcol(mata, j, fulla(:,j)) + END DO + CALL prntmat('Full matrix from GETCOL', fulla) +! +! Test GETROW + fulla = 0.0 + DO i=1,n + CALL getrow(mata, i, fulla(i,:)) + END DO + CALL prntmat('Full matrix from GETROW', fulla) +! +! Test GETELE + fulla = 0.0 + DO i=1,n + DO j=1,n + IF(ABS(j-i).LT.ku+1) CALL getele(mata,i,j,fulla(i,j)) + END DO + END DO + CALL prntmat('Full matrix from GETELE', fulla) +! +! Test of determinant + fullb = 0.0 + DO i=1,n + fullb(i,i) = 2.0d0 + IF(i.LT.n) fullb(i,i+1)=-1.0d0 + IF(i.GT.1) fullb(i,i-1)=-1.0d0 + END DO + DO j=1,n + CALL putcol(matb, j, fullb(:,j)) + END DO + CALL prntmat('Mat. A (full)', fullb) + CALL prntmat('Mat. A (pb)', pb) + CALL determinant(matb, base, pow) + WRITE(*,'(a,1pe15.6,i6)') 'Determinant(bas,power) =', base, pow +! + CALL destroy(mata) + CALL destroy(matb) +CONTAINS + SUBROUTINE prntmat(str, a) + DOUBLE PRECISION, DIMENSION(:,:) :: a + CHARACTER(len=*) :: str + INTEGER :: i + WRITE(*,'(a)') TRIM(str) + DO i=1,SIZE(a,1) + WRITE(*,'(10f8.1)') a(i,:) + END DO + END SUBROUTINE prntmat +END PROGRAM main diff --git a/examples/tmatrix_zpb.f90 b/examples/tmatrix_zpb.f90 new file mode 100644 index 0000000..ba7c30d --- /dev/null +++ b/examples/tmatrix_zpb.f90 @@ -0,0 +1,137 @@ +!> +!> @file tmatrix_zpb.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test some routines of module matrix +! + USE matrix + IMPLICIT NONE + TYPE(zpbmat) :: mata, matb + INTEGER, PARAMETER :: n=5, ku=3 + DOUBLE COMPLEX :: arr(n), fulla(n,n), fullb(n,n), base + DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p, pb + INTEGER :: i, j, pow +! + CALL init(ku, n, 0, mata) + CALL init(1, n, 0, matb) + CALL getvalp(mata, p) + CALL getvalp(matb, pb) + PRINT*, 'shape of A: ', SHAPE(p) +! +! Test updtmat + p = 0.0d0 + DO i=1,n + DO j=i,n + arr(j) = CMPLX(10*i + j, j-i) + IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(j)) + END DO + END DO + CALL prntmat('Test of UPDTMAT', p) +! +! Test GETCOL + fulla = 0.0 + DO j=1,n + CALL getcol(mata, j, fulla(:,j)) + END DO + CALL prntmat('Full matrix from GETCOL', fulla) +! +! Test GETROW + fulla = 0.0 + DO i=1,n + CALL getrow(mata, i, fulla(i,:)) + END DO + CALL prntmat('Full matrix from GETROW', fulla) +! +! Test PUTCOL + p = 0.0d0 + DO j=1,n + CALL putcol(mata, j, fulla(:,j)) + END DO + CALL prntmat('Test of PUTCOL', p) +! +! Test PUTROW + p = 0.0d0 + DO i=1,n + CALL putrow(mata, i, fulla(i,:)) + END DO + CALL prntmat('Test of PUTROW', p) +! + arr = 0.0d0 + arr(2) =1.0 + CALL putrow(mata, 2, arr) + CALL prntmat('Away on i=2, j=2', p) +! +! Test GETCOL + fulla = 0.0 + DO j=1,n + CALL getcol(mata, j, fulla(:,j)) + END DO + CALL prntmat('Full matrix from GETCOL', fulla) +! +! Test GETROW + fulla = 0.0 + DO i=1,n + CALL getrow(mata, i, fulla(i,:)) + END DO + CALL prntmat('Full matrix from GETROW', fulla) +! +! Test GETELE + fulla = 0.0 + DO i=1,n + DO j=1,n + IF(ABS(j-i).LT.ku+1) CALL getele(mata,i,j,fulla(i,j)) + END DO + END DO + CALL prntmat('Full matrix from GETELE', fulla) +! +! Test of determinant + fullb = 0.0 + DO i=1,n + fullb(i,i) = 2.0d0 + IF(i.LT.n) fullb(i,i+1)=-1.0d0 + IF(i.GT.1) fullb(i,i-1)=-1.0d0 + END DO + DO j=1,n + CALL putcol(matb, j, fullb(:,j)) + END DO + CALL prntmat('Mat. A (full)', fullb) + CALL prntmat('Mat. A (pb)', pb) + CALL determinant(matb, base, pow) + WRITE(*,'(a,2f8.5,i3)') 'Determinant(base,power) = ', base, pow +! + CALL destroy(mata) + CALL destroy(matb) +CONTAINS + SUBROUTINE prntmat(str, a) + DOUBLE COMPLEX, DIMENSION(:,:) :: a + CHARACTER(len=*) :: str + INTEGER :: i + WRITE(*,'(a)') TRIM(str) + DO i=1,SIZE(a,1) + WRITE(*,'(5(5x,"(",f5.1,",",f5.1,")"))') a(i,:) + END DO + END SUBROUTINE prntmat +END PROGRAM main diff --git a/examples/tp2p_mat.f90 b/examples/tp2p_mat.f90 new file mode 100644 index 0000000..d5084fb --- /dev/null +++ b/examples/tp2p_mat.f90 @@ -0,0 +1,108 @@ +!> +!> @file tp2p_mat.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main + USE pardiso_bsplines + IMPLICIT NONE + INCLUDE 'mpif.h' +! + INTEGER :: me, npes, ierr + INTEGER :: next + INTEGER :: i, j, rank + DOUBLE PRECISION :: val + DOUBLE PRECISION, ALLOCATABLE :: arow(:) + TYPE(pardiso_mat) :: mat +! + CALL mpi_init(ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) + IF(npes.NE.2) THEN + PRINT*, 'Should run with 2 procs!' + CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) + END IF +! +! Define local matrix +! + rank = npes + CALL init(npes, 0, mat) + DO i=1,rank ! Fill row me+1 + val = me+1 + j = me+1 + CALL updtmat(mat, i, me+1, val) + END DO +! +! Exchange matrix +! + CALL disp_mat('Original matrix') + next=MODULO(me+1,2) +! + IF(me.EQ.0) THEN + CALL p2p_mat(mat, 1, 'send', 'put', MPI_COMM_WORLD) + ELSE + CALL p2p_mat(mat, 0, 'recv', 'put', MPI_COMM_WORLD) + END IF + CALL disp_mat('Matrix after 0->1/put') +! + CALL p2p_mat(mat, next, 'sendrecv', 'put', MPI_COMM_WORLD) + CALL disp_mat('Matrix after sendrev/put') +! + CALL p2p_mat(mat, next, 'sendrecv', 'updt', MPI_COMM_WORLD) + CALL disp_mat('Matrix after sendrev/updt') +! + IF(me.EQ.1) THEN + CALL p2p_mat(mat, 0, 'send', 'updt', MPI_COMM_WORLD) + ELSE + CALL p2p_mat(mat, 1, 'recv', 'updt', MPI_COMM_WORLD) + END IF + CALL disp_mat('Matrix after 1->0/updt') +! + IF(me.EQ.1) THEN + CALL p2p_mat(mat, 0, 'send', 'put', MPI_COMM_WORLD) + ELSE + CALL p2p_mat(mat, 1, 'recv', 'put', MPI_COMM_WORLD) + END IF + CALL disp_mat('Matrix after 1->0/put') +! + CALL mpi_finalize(ierr) +CONTAINS + SUBROUTINE disp_mat(str) + CHARACTER(len=*), INTENT(in) :: str + INTEGER :: p + DO p=0,npes-1 + IF(me.EQ.p) THEN + WRITE(*,'(a,i3.3)') str//' on PE', me + CALL to_mat(mat, nlkeep=.TRUE.) + ALLOCATE(arow(mat%rank)) + DO i=1,mat%rank + CALL getrow(mat, i, arow) + WRITE(*,'(10f8.2)') arow + END DO + DEALLOCATE(arow) + CALL FLUSH(6) + END IF + CALL mpi_barrier(MPI_COMM_WORLD, ierr) + END DO + END SUBROUTINE disp_mat +END PROGRAM main diff --git a/examples/tpsum_mat.f90 b/examples/tpsum_mat.f90 new file mode 100644 index 0000000..5f01441 --- /dev/null +++ b/examples/tpsum_mat.f90 @@ -0,0 +1,77 @@ +!> +!> @file tpsum_mat.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +!!$ USE pardiso_bsplines +!!$ USE wsmp_bsplines + USE mumps_bsplines + IMPLICIT NONE + INCLUDE 'mpif.h' +! + INTEGER :: me, npes, ierr + INTEGER :: i, j, rank + DOUBLE PRECISION :: val + DOUBLE PRECISION, ALLOCATABLE :: arow(:) +!!$ TYPE(pardiso_mat) :: mat +!!$ TYPE(wsmp_mat) :: mat + TYPE(mumps_mat) :: mat +! + CALL mpi_init(ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! + rank = npes + CALL init(npes, 1, mat) + DO i=1,rank ! Fill row me+1 + val = me+1 + j = me+1 + CALL updtmat(mat, i, me+1, val) + END DO +! +!!$ CALL disp_mat('Original matrix') + CALL psum_mat(mat, MPI_COMM_WORLD) + CALL disp_mat('Global sum of matrix') +! + CALL mpi_finalize(ierr) +CONTAINS + SUBROUTINE disp_mat(str) + CHARACTER(len=*), INTENT(in) :: str + INTEGER :: p + DO p=0,npes-1 + IF(me.EQ.p) THEN + CALL to_mat(mat, nlkeep=.TRUE.) + WRITE(*,'(a,i3.3,a,2i6)') str//' on PE', me, ': rank, nnz', mat%rank, mat%nnz + ALLOCATE(arow(mat%rank)) + DO i=1,mat%rank + CALL getrow(mat, i, arow) + WRITE(*,'(10f8.2)') arow + END DO + DEALLOCATE(arow) + CALL FLUSH(6) + END IF + CALL mpi_barrier(MPI_COMM_WORLD, ierr) + END DO + END SUBROUTINE disp_mat +END PROGRAM main diff --git a/examples/tsparse1.f90 b/examples/tsparse1.f90 new file mode 100644 index 0000000..c22a07f --- /dev/null +++ b/examples/tsparse1.f90 @@ -0,0 +1,117 @@ +!> +!> @file tsparse1.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! MAIN !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +PROGRAM main + USE sparse + IMPLICIT none +! + TYPE(zspmat) :: amat + TYPE(zsprow) :: arow +! + LOGICAL :: found + INTEGER :: n=10 + INTEGER :: jcol, nnz, i + DOUBLE PRECISION :: valr, vali + DOUBLE COMPLEX :: val, zero=(0.,0.) + DOUBLE COMPLEX, ALLOCATABLE :: arr(:), farr(:) + INTEGER, ALLOCATABLE :: col(:), newcol(:) +! +! Initialize the sparse matrix amat +! + CALL init(n, amat) +! +! Use UPDT_SP to update a sparse row +! + WRITE(*,*) 'Enter a list of positive indices, terminate with a zero' + DO + READ(*,*) jcol + IF(jcol .LE. 0) EXIT + CALL RANDOM_NUMBER(valr) + vali = jcol + val = CMPLX(valr, vali) + CALL updtmat(arow, jcol, val) + END DO +! +! Convert a sparse row to a sequential row +! + nnz = arow%nnz + WRITE(*,'(a,i5)') 'nnz =', nnz +! + ALLOCATE(arr(nnz), col(nnz), newcol(nnz)) + CALL getrow(arow, arr, col) + WRITE(*, '(a/(10i8))') 'col', col + WRITE(*, '(a/(10f8.4))') 'arr', arr +! + ALLOCATE(farr(MAXVAL(col))) + CALL getrow(arow, farr) + WRITE(*, '(/a/(10f8.4))') 'farr', farr +! +! Clear element by element of row +! + DO i=1,nnz + CALL putele(arow, col(i), zero) + CALL getrow(arow, arr, newcol) + WRITE(*, '(/a,i6/(10i8))') 'col', arow%nnz, newcol(1:arow%nnz) + END DO +! +! Re-create row using PUTROW and full row +! + CALL putrow(arow, farr) + CALL getrow(arow, arr, col) + WRITE(*,'(/a,i5)') 'nnz =', arow%nnz + WRITE(*, '(a/(10i8))') 'col', col + WRITE(*, '(a/(10f8.4))') 'arr', arr +! +! Clear row using DESTROY +! + CALL destroy(arow) + CALL getrow(arow, arr, newcol) + nnz = arow%nnz + WRITE(*, '(/a,i6/(10i8))') 'col', nnz, newcol(1:nnz) +! +! Re-create row using PUTROW and sparse row +! + CALL putrow(arow, arr, col) + CALL getrow(arow, arr, newcol, nnz) + WRITE(*, '(/a,i6/(10i8))') 'col', nnz, newcol(1:nnz) + WRITE(*, '(a/(10f8.4))') 'arr', arr +! +! Test GETELE +! + i=111;val=0 + CALL getele(arow, i, val, found) + WRITE(*,'(/i8,2f8.4,l3)') i, val, found + DO i=1,nnz + CALL getele(arow, col(i), val, found) + WRITE(*,'(i8,2f8.4,l3)') col(i), val, found + END DO +! +! Test destroy_spmat +! + CALL destroy(amat) +END PROGRAM main diff --git a/examples/tsparse2.f90 b/examples/tsparse2.f90 new file mode 100644 index 0000000..23d3fcf --- /dev/null +++ b/examples/tsparse2.f90 @@ -0,0 +1,151 @@ +!> +!> @file tsparse2.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Simple 2D Poisson using 5 points FD +! + USE pardiso_bsplines + IMPLICIT NONE +! + TYPE(pardiso_mat) :: amat, bmat + DOUBLE PRECISION, ALLOCATABLE :: arow(:), rhs(:), sol(:) + DOUBLE PRECISION, ALLOCATABLE :: arown(:,:), rhsn(:,:), soln(:,:) + INTEGER :: nx=5, ny=4, n + INTEGER :: i, j, irow, jcol +! + DOUBLE PRECISION :: mem, seconds +! + n = nx*ny + CALL init(n, 1, amat) ! Non-symmetric matrix + CALL init(n, 1, bmat, nlsym=.TRUE.) ! Symmetric matrix + ALLOCATE(arow(n), arown(n,2)) + ALLOCATE(rhs(n), rhsn(n,2)) + ALLOCATE(sol(n), soln(n,2)) +! +! Construct the FD matrix amat, using sparse rows (linked lists) +! + DO j=1,ny + DO i=1,nx + arow = 0.0d0 + irow = numb(i,j) + arow(irow) = 4.0d0 + IF(i.GT.1) arow(numb(i-1,j)) = -1.0d0 + IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0 + IF(j.GT.1) arow(numb(i,j-1)) = -1.0d0 + IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0 + rhs(irow) = SUM(arow) + CALL putrow(amat, irow, arow) ! General matrix + CALL putrow(bmat, irow, arow) ! Symmetric matrix + END DO + END DO +! +! Print the matrices +! + WRITE(*,'(/a)') 'Matrix A' + DO i=1,n + CALL getrow(amat, i, arow) + WRITE(*,'(30f4.0)') arow + END DO + PRINT*, 'nnz from get_count', get_count(amat) +! + WRITE(*,'(/a)') 'Matrix B' + DO i=1,n + CALL getrow(bmat, i, arow) + WRITE(*,'(30f4.0)') arow + END DO + PRINT*, 'nnz from get_count', get_count(bmat) +! +! Factor the matrix using Pardiso +! + CALL factor(amat, nlmetis=.TRUE.) + WRITE(*,'(/a,i5)') 'Number of nonzeros in factors of A = ',amat%p%iparm(18) + WRITE(*,'(a,i5)') 'Number of factorization MFLOPS = ',amat%p%iparm(19) +! + CALL factor(bmat, nlmetis=.TRUE.) + WRITE(*,'(/a,i5)') 'Number of nonzeros in factors of B = ',bmat%p%iparm(18) + WRITE(*,'(a,i5)') 'Number of factorization MFLOPS = ',bmat%p%iparm(19) +! + WRITE(*,'(/a/(10f8.4))') 'rhs', rhs +! +! Backsolve Ax = b, using Pardiso +! + sol = rhs + CALL bsolve(amat, sol, debug=.FALSE.) + WRITE(*,'(/a/(10f8.4))') 'sol (non-sym)', sol + WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0)) +! +! Backsolve Bx = b, using Pardiso +! + sol = rhs + CALL bsolve(bmat, sol, debug=.FALSE.) + WRITE(*,'(/a/(10f8.4))') 'sol (sym)', sol + WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0)) +! + arow = vmx(amat, sol) + WRITE(*,'(/a/(10f8.4))') 'A*x (non-sym)', arow + WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arow-rhs)) +! + arow = vmx(bmat, sol) + WRITE(*,'(/a/(10f8.4))') 'B*x (sym)', arow + WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arow-rhs)) +! +! Multiple RHS +! + rhsn(:,1) = -rhs(:) + rhsn(:,2) = 2.d0*rhs(:) + CALL bsolve(amat, rhsn, soln) + WRITE(*,'(/a/(10f8.4))') 'soln (non-sym)', soln + WRITE(*,'(a,2(1pe12.3))') 'Error', MAXVAL(ABS(soln(:,1)+1.0d0)), & + & MAXVAL(ABS(soln(:,2)-2.0d0)) +! + CALL bsolve(bmat, rhsn, soln) + WRITE(*,'(/a/(10f8.4))') 'soln (sym)', soln + WRITE(*,'(a,2(1pe12.3))') 'Error', MAXVAL(ABS(soln(:,1)+1.0d0)), & + & MAXVAL(ABS(soln(:,2)-2.0d0)) +! + arown = vmx(amat, soln) + WRITE(*,'(/a/(10f8.4))') 'A*x (non-sym)', arown + WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arown-rhsn)) +! + arown = vmx(bmat, soln) + WRITE(*,'(/a/(10f8.4))') 'A*x (sym)', arown + WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arown-rhsn)) +! +! Clean up +! + DEALLOCATE(arow,arown) + DEALLOCATE(rhs,rhsn) + DEALLOCATE(sol,soln) + CALL destroy(amat) + CALL destroy(bmat) +CONTAINS + INTEGER FUNCTION numb(i,j) + INTEGER, INTENT(in) :: i, j + numb = (j-1)*nx + i + END FUNCTION numb +! +END PROGRAM main + diff --git a/examples/zpardiso_ex1.f b/examples/zpardiso_ex1.f new file mode 100644 index 0000000..7979d88 --- /dev/null +++ b/examples/zpardiso_ex1.f @@ -0,0 +1,96 @@ +!> +!> @file zpardiso_ex1.f +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> + PROGRAM main + USE wsmp_bsplines + USE pardiso_bsplines + IMPLICIT NONE +c + INTEGER :: n=9 + INTEGER ia(10) + INTEGER ja(29) + COMPLEX*16 avals(29) + COMPLEX*16 b(9), sol(9), arow(9) +c +c$$$ type(zwsmp_mat) :: mat + type(zpardiso_mat) :: mat + integer :: i, k +c + DATA ia /1,5,9,13,17,21,25,27,29,30/ + data ja + 1 /1, 3, 7, 8, + 2 2, 3, 8, 9, + 3 3, 7, 8, 9, + 4 4, 6, 7, 8, + 5 5, 6, 8, 9, + 6 6, 7, 8, 9, + 7 7, 8, + 8 8, 9, + 9 9/ + data avals + 1 /(14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1), + 2 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2), + 3 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1), + 4 (14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1), + 5 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2), + 6 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1), + 7 (16.d0,0.d0), (-4.d0,-2.d-1), + 8 (71.d0,0.d0), (-4.d0,-2.d-1), + 9 (16.d0,0.d0)/ +c +c$$$ call init(n, 1, mat, nlherm=.false., nlsym=.true., nlpos=.true.) + call init(n, 1, mat, nlherm=.true., nlpos=.true.) + do i=1,n + do k=ia(i),ia(i+1)-1 + call putele(mat, i, ja(k), avals(k)) + end do + end do +c + call factor(mat) +c + print*, 'diff of val', cnorm2(avals-mat%val) + print*, 'diff of ia', ia-mat%irow + print*,' diff ja', ja-mat%cols +c + print*, 'The RHS:' + do i = 1, n + call getrow(mat,i, arow) + b(i) = sum(arow) + print *,i,' : ',b(i) + end do + call bsolve(mat,b,sol) + print *,'The solution of the system is as follows:' + do i = 1, n + print *,i,' : ',sol(i) + end do + print*, 'Residue =', cnorm2(vmx(mat,sol)-b) + contains + FUNCTION cnorm2(x) + DOUBLE COMPLEX, INTENT(in) :: x(:) + DOUBLE PRECISION :: cnorm2 + cnorm2 = SQRT(DOT_PRODUCT(x,x)) + END FUNCTION cnorm2 +c + END PROGRAM main diff --git a/examples/zssmp_ex1.f b/examples/zssmp_ex1.f new file mode 100644 index 0000000..cae1c08 --- /dev/null +++ b/examples/zssmp_ex1.f @@ -0,0 +1,111 @@ +!> +!> @file zssmp_ex1.f +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> + PROGRAM main + USE wsmp_bsplines + USE pardiso_bsplines + IMPLICIT NONE +c + INTEGER :: n=9 + INTEGER ia(10) + INTEGER ja(29) + COMPLEX*16 avals(29) + COMPLEX*16 b(9), sol(9), arow(9) +c + type(zwsmp_mat) :: mat + integer :: i, k +c + DATA ia /1,5,9,13,17,21,25,27,29,30/ + data ja + 1 /1, 3, 7, 8, + 2 2, 3, 8, 9, + 3 3, 7, 8, 9, + 4 4, 6, 7, 8, + 5 5, 6, 8, 9, + 6 6, 7, 8, 9, + 7 7, 8, + 8 8, 9, + 9 9/ + data avals + 1 /(14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1), + 2 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2), + 3 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1), + 4 (14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1), + 5 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2), + 6 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1), + 7 (16.d0,0.d0), (-4.d0,-2.d-1), + 8 (71.d0,0.d0), (-4.d0,-2.d-1), + 9 (16.d0,0.d0)/ +c + call init(n, 1, mat, nlherm=.true., nlpos=.true.) + do i=1,n + do k=ia(i),ia(i+1)-1 + call putele(mat, i, ja(k), avals(k)) + end do + end do +c + print*, 'The RHS before tomat:' + do i = 1, n + call getrow(mat,i, arow) + b(i) = sum(arow) + print *,i,' : ',b(i) + end do +c + call factor(mat) +c + write(*,'(a/(20f6.2))') 'avals', avals + write(*,'(a/(20f6.2))') 'mat%val', mat%val + print*, 'diff of val', cnorm2(avals-mat%val) + print*, 'diff of ia', ia-mat%irow + print*,' diff ja', ja-mat%cols +c + print*, 'Check getrow' + do i = 1, n + call getrow(mat,i, arow) + write(*,'(i3,": ",(20f6.2))') i, arow(i:n) + end do +c + print*, 'The RHS:' + do i = 1, n + call getrow(mat,i, arow) + b(i) = sum(arow) + print *,i,' : ',b(i) + end do + call bsolve(mat,b,sol) + print *,'Norm of Residual = ',mat%p%dparm(7) + print *,'The solution of the system is as follows:' + do i = 1, n + print *,i,' : ',sol(i) + end do + print*, 'Residue =', cnorm2(vmx(mat,sol)-b) +c + contains + FUNCTION cnorm2(x) + DOUBLE COMPLEX, INTENT(in) :: x(:) + DOUBLE PRECISION :: cnorm2 + cnorm2 = SQRT(DOT_PRODUCT(x,x)) + END FUNCTION cnorm2 +c + END PROGRAM main diff --git a/fft/CMakeLists.txt b/fft/CMakeLists.txt new file mode 100644 index 0000000..ca0d58b --- /dev/null +++ b/fft/CMakeLists.txt @@ -0,0 +1,69 @@ +/** + * @file CMakeLists.txt + * + * @brief Principal CMake configuration file for the fft library + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + * @author Trach-Minh Tran + */ +project(fft) + +find_package(FFTW REQUIRED) +set(fft_w $ENV{fft_w}) + +set(SRCS + fft_fftw.F90 +) + +set(EXAMPLES tfft.f90) + +add_library(fft STATIC ${SRCS}) + +target_include_directories(fft + PRIVATE $ + ${FFTW_INCLUDES} + INTERFACE $ + $ + ${FFTW_INCLUDES} + ) + +if (${fft_w} MATCHES "fft_w2") + target_compile_options(fft PRIVATE "-Dfft_w2") +else() + target_compile_options(fft PRIVATE "-Dfft_w3") +endif() + +target_link_libraries(fft PUBLIC ${FFTW_LIBRARY} ${MPI_Fortran_LIBRARIES}) +# +set_property(TARGET fft + PROPERTY PUBLIC_HEADER ${CMAKE_CURRENT_BINARY_DIR}/modules/fft.mod) + +add_executable(tfft tfft.f90) +target_link_libraries(tfft fft ${MPI_Fortran_LIBRARIES}) + +#add_test(tfft ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 +# ${CMAKE_CURRENT_BINARY_DIR}/tfft < ${fft_SOURCE_DIR}/in) + +install(TARGETS fft + EXPORT ${BSPLINES_EXPORT_TARGETS} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) diff --git a/fft/Makefile b/fft/Makefile new file mode 100644 index 0000000..bb7a818 --- /dev/null +++ b/fft/Makefile @@ -0,0 +1,68 @@ +# +# @file Makefile +# +# @brief Makefile for the fft library +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Trach-Minh Tran +# +FFTW=$(FFTW_HOME) +F90 = ifort +LD = ifort + +debug = -g -traceback -CB +optim = -O3 + +OPT=$(debug) +#OPT=$(optim) + +F90FLAGS = $(OPT) -Dfft_w3 -I$(FFTW)/include +LDFLAGS = $(OPT) -L. -L$(FFTW)/lib + +LIBS = -lfft -lfftw3 + +.SUFFIXES: +.SUFFIXES: .o .c .F90 .f90 .f + +.f90.o: + $(F90) $(F90FLAGS) -c $< +.F90.o: + $(F90) $(F90FLAGS) -c $< +.f.o: + $(F90) $(F90FLAGS) -c $< + +all: tfft + +lib: libfft.a + +libfft.a: fft_fftw.o + xiar r $@ $? + ranlib $@ + +tfft: tfft.o + $(LD) $(LDFLAGS) -o $@ tfft.o fft_fftw.o $(LIBS) + +tfft.o: lib + +clean: + rm -f *.o *.mod *~ a.out + +distclean: clean + rm -f tfft diff --git a/fft/fft_fftw.F90 b/fft/fft_fftw.F90 new file mode 100644 index 0000000..ff9e7ca --- /dev/null +++ b/fft/fft_fftw.F90 @@ -0,0 +1,1376 @@ +!> +!> @file fft_fftw.F90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +#if defined(fft_w2) +MODULE fft +! + IMPLICIT NONE +! + PRIVATE +! + PUBLIC four1, fourcol, fourrow +! +! Global parameters +! + INTEGER, PARAMETER :: MXPLAN=8 +! +! Global variables +! + INTEGER ::n1d_saved=0 + INTEGER*8, DIMENSION(MXPLAN) :: plan1d + INTEGER, DIMENSION(MXPLAN) :: n1d + REAL, DIMENSION(:), ALLOCATABLE :: scr1_real + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: scr1 + + INTERFACE fourcol + MODULE PROCEDURE four1, fourcol_ra, fourcol_raa + END INTERFACE + +CONTAINS +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE four1(arr, isign) +! +! A single 1D complex FFT +! + INCLUDE 'fftw_f77.h' +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: isign +! +! Local variables +! + INTEGER :: n, id +! + n = SIZE(arr) + IF( .NOT. ALLOCATED(scr1) ) THEN + ALLOCATE(scr1(n)) + ELSE + IF ( SIZE(scr1) < n ) THEN + DEALLOCATE(scr1) + ALLOCATE(scr1(n)) + END IF + END IF +! + CALL getplan(n, isign, id, 1) + CALL fftw_f77_one(plan1d(id), arr, scr1) + END SUBROUTINE four1 +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourcol_ra(arr, isign) +! +! 1D complex FFT of columns of arr(1:N,1:howmany) +! + INCLUDE 'fftw_f77.h' +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: isign +! +! Local variables +! + INTEGER :: n, howmany, id +! + n = SIZE(arr,1) + howmany = SIZE(arr,2) +! + IF( .NOT. ALLOCATED(scr1) ) THEN + ALLOCATE(scr1(n)) + ELSE + IF ( SIZE(scr1) < n ) THEN + DEALLOCATE(scr1) + ALLOCATE(scr1(n)) + END IF + END IF +! + CALL getplan(n, isign, id,1) + CALL fftw_f77(plan1d(id), howmany, arr, 1, n, scr1, 1, n) + END SUBROUTINE fourcol_ra +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourcol_raa(arr, isign) +! +! 1D complex FFT of columns of arr(1:N,1:howmany) +! + INCLUDE 'fftw_f77.h' +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: isign +! +! Local variables +! + INTEGER :: n, howmany, id +! + n = SIZE(arr,1) + howmany = SIZE(arr,2)*SIZE(arr,3) +! + IF( .NOT. ALLOCATED(scr1) ) THEN + ALLOCATE(scr1(n)) + ELSE + IF ( SIZE(scr1) < n ) THEN + DEALLOCATE(scr1) + ALLOCATE(scr1(n)) + END IF + END IF +! + CALL getplan(n, isign, id, 1) + CALL fftw_f77(plan1d(id), howmany, arr, 1, n, scr1, 1, n) + END SUBROUTINE fourcol_raa +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourrow(arr, isign) +! +! 1D complex FFT of rows of arr(1:howmany,1:N) +! + INCLUDE 'fftw_f77.h' +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: isign +! +! Local variables +! + INTEGER :: n, howmany, id +! + n = SIZE(arr,2) + howmany = SIZE(arr,1) +! + IF( .NOT. ALLOCATED(scr1) ) THEN + ALLOCATE(scr1(n)) + ELSE + IF ( SIZE(scr1) < n ) THEN + DEALLOCATE(scr1) + ALLOCATE(scr1(n)) + END IF + END IF +! + CALL getplan(n, isign, id, 1) + CALL fftw_f77(plan1d(id), howmany, arr, howmany, 1, & + & scr1, howmany, 1) + END SUBROUTINE fourrow +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE getplan(n, sign, id, complex_fftw) +! +! Create or get an already created FFT plan (depends only on N.) +! + INCLUDE 'fftw_f77.h' +! +! Dummy arguments +! + INTEGER, INTENT(IN) :: n ! size of transform + INTEGER, INTENT(IN) :: sign ! dir. of transform -1=>FORWARD, +1=>BACKWARD + INTEGER, INTENT(OUT) :: id ! id of FFT plan + INTEGER, INTENT(IN) :: complex_fftw ! Create complex<->complex transform if =1 +! +! Local variables +! + INTEGER :: k, i, dir +! + k = sign*(2*n+complex_fftw) + DO i = 1,n1d_saved + IF( k == n1d(i)) THEN + id = i + RETURN + END IF + END DO + IF( n1d_saved == MXPLAN) THEN + PRINT*, 'Module fft: MXPLAN too small! Increase it and recompile' + STOP + END IF + n1d_saved = n1d_saved+1 + n1d(n1d_saved) = k + id = n1d_saved + dir = FFTW_FORWARD + IF( sign == +1 ) dir = FFTW_BACKWARD + IF (complex_fftw == 1) THEN + CALL fftw_f77_create_plan(plan1d(id), n, dir, FFTW_ESTIMATE + FFTW_IN_PLACE) +!!$ ELSE +!!$ CALL rfftw_f77_create_plan(plan1d(id), n, dir, FFTW_ESTIMATE + FFTW_IN_PLACE) + END IF + END SUBROUTINE getplan +END MODULE fft +#endif +! +#if defined(fft_w3) +MODULE fft +! + IMPLICIT NONE +! + PRIVATE + PUBLIC :: four1, fourcol, fourrow +! + INCLUDE 'fftw3.f' +! + TYPE int_para + INTEGER, DIMENSION(2) :: par ! size of transform + END TYPE int_para +! +! Global parameters +! + INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of plans. +! +! Global variables +! + INTEGER*8, DIMENSION(MXPLAN,4), SAVE :: plan1d ! plans for 1-dim FFT + TYPE(int_para), DIMENSION(MXPLAN,4), SAVE :: n1d_par + INTEGER, DIMENSION(4), SAVE :: n1d_saved=0 ! number of plans saved +! + INTERFACE fourcol + MODULE PROCEDURE four1, fourcol_ra, fourcol_raa + END INTERFACE +! +CONTAINS +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE four1(vec, isign) +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec + INTEGER, INTENT(IN) :: isign +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=1 +! +! Local variables +! + INTEGER :: k + INTEGER :: dim1, i, id, istat, n + DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: vec_tmp +! +! + n = SIZE(vec) +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*n + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) +! +! test if the maximal number of plans is alredy reached. +! + IF (n1d_saved(NUM) == MXPLAN) THEN + WRITE(*,*) 'FOUR1: MXPLAN too small! Increase it and recompile' + STOP + END IF +! + dim1 = SIZE(vec) + ALLOCATE(vec_tmp(dim1), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOUR1: Allocation of vec_tmp failed!' + STOP + END IF +! + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + id = n1d_saved(NUM) +! + SELECT CASE (isign) + CASE (-1) + CALL dfftw_plan_dft_1d(plan1d(id,NUM), n, vec_tmp(1), vec_tmp(1), & + FFTW_FORWARD, FFTW_ESTIMATE) + CASE (1) + CALL dfftw_plan_dft_1d(plan1d(id,NUM), n, vec_tmp(1), vec_tmp(1), & + FFTW_BACKWARD, FFTW_ESTIMATE) + END SELECT +! + DEALLOCATE(vec_tmp, stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOUR1: Dellocation of vec_tmp failed!' + STOP + END IF +! + END SELECT +! + CALL dfftw_execute_dft(plan1d(id,NUM), vec(1), vec(1)) +! + END SUBROUTINE four1 +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourcol_ra(arr, isign) +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: isign +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=2, RANK=1 +! +! Local variables +! + INTEGER :: k + INTEGER :: dim1, dim2, howmany, i, id, istat, n + INTEGER, DIMENSION(RANK) :: n_arr, nembed + DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: arr_tmp +! + dim1 = SIZE(arr,1) + dim2 = SIZE(arr,2) + howmany = SIZE(arr,2) +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*dim1 + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1) .AND. & + howmany == n1d_par(i,NUM)%par(2)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) +! +! test if the maximal number of plans is alredy reached. +! + IF (n1d_saved(NUM) == MXPLAN) THEN + WRITE(*,*) 'FOURCOL_RA: MXPLAN too small! Increase it and recompile' + STOP + END IF +! + ALLOCATE(arr_tmp(dim1, dim2), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RA: Allocation of arr_tmp failed!' + STOP + END IF +! + nembed(1) = SIZE(arr) + n_arr(1) = dim1 + n = n_arr(1) +! + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany + id = n1d_saved(NUM) +! + SELECT CASE (isign) + CASE (-1) + CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & + arr_tmp(1,1), nembed, 1, n, arr_tmp(1,1), nembed, 1, n, & + FFTW_FORWARD, FFTW_ESTIMATE) + CASE (1) + CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & + arr_tmp(1,1), nembed, 1, n, arr_tmp(1,1), nembed, 1, n, & + FFTW_BACKWARD, FFTW_ESTIMATE) + END SELECT +! + DEALLOCATE(arr_tmp, stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RA: Dellocation of arr_tmp failed!' + STOP + END IF +! + END SELECT +! + CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1), arr(1,1)) +! + END SUBROUTINE fourcol_ra +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourcol_raa(arr, isign) +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: isign +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=3, RANK=1 +! +! Local variables +! + INTEGER :: k + INTEGER :: dim1, dim2, dim3, howmany, i, id, istat, n + INTEGER, DIMENSION(RANK) :: n_arr, nembed + DOUBLE COMPLEX, DIMENSION(:,:,:), ALLOCATABLE :: arr_tmp +! + dim1 = SIZE(arr,1) + dim2 = SIZE(arr,2) + dim3 = SIZE(arr,3) + howmany = dim2*dim3 +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*dim1 + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1) .AND. & + howmany == n1d_par(i,NUM)%par(2)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) +! +! test if the maximal number of plans is alredy reached. +! + IF (n1d_saved(NUM) == MXPLAN) THEN + WRITE(*,*) 'FOURCOL_RAA: MXPLAN too small! Increase it and recompile' + STOP + END IF +! + ALLOCATE(arr_tmp(dim1, dim2, dim3), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RAA: Allocation of arr_tmp failed!' + STOP + END IF +! + nembed(1) = SIZE(arr) + n_arr(1) = dim1 + n = n_arr(1) +! + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany + id = n1d_saved(NUM) +! + SELECT CASE (isign) + CASE (-1) + CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & + arr_tmp(1,1,1), nembed, 1, n, arr_tmp(1,1,1), nembed, 1, n, & + FFTW_FORWARD, FFTW_ESTIMATE) + CASE (1) + CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & + arr_tmp(1,1,1), nembed, 1, n, arr_tmp(1,1,1), nembed, 1, n, & + FFTW_BACKWARD, FFTW_ESTIMATE) + END SELECT +! + DEALLOCATE(arr_tmp, stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RAA: Dellocation of arr_tmp failed!' + STOP + END IF +! + END SELECT +! + CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1,1), arr(1,1,1)) +! + END SUBROUTINE fourcol_raa +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourrow(arr, isign) +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: isign +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=4, RANK=1 +! +! Local variables +! + INTEGER :: k + INTEGER :: dim1, dim2, howmany, i, id, istat, n + INTEGER, DIMENSION(RANK) :: n_arr, nembed + DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: arr_tmp +! + dim1 = SIZE(arr,1) + dim2 = SIZE(arr,2) + howmany = dim1 +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*dim2 + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1) .AND. & + howmany == n1d_par(i,NUM)%par(2)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) +! +! test if the maximal number of plans is alredy reached. +! + IF (n1d_saved(NUM) == MXPLAN) THEN + WRITE(*,*) 'FOURROW: MXPLAN too small! Increase it and recompile' + STOP + END IF +! + ALLOCATE(arr_tmp(dim1, dim2), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURROW: Allocation of arr_tmp failed!' + STOP + END IF +! + nembed(1) = SIZE(arr) + n_arr(1) = SIZE(arr,2) + n = n_arr(1) + howmany = SIZE(arr,1) +! + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany + id = n1d_saved(NUM) +! + SELECT CASE (isign) + CASE (-1) + CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & + arr_tmp(1,1), nembed, howmany, 1, arr_tmp(1,1), nembed, howmany, 1, & + FFTW_FORWARD, FFTW_ESTIMATE) + CASE (1) + CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, & + arr_tmp(1,1), nembed, howmany, 1, arr_tmp(1,1), nembed, howmany, 1, & + FFTW_BACKWARD, FFTW_ESTIMATE) + END SELECT +! + DEALLOCATE(arr_tmp, stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOUR1: Dellocation of arr_tmp failed!' + STOP + END IF +! + END SELECT +! + CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1), arr(1,1)) +! + END SUBROUTINE fourrow +END MODULE fft +#endif +! +#if defined(fft_essl) +MODULE fft +! + IMPLICIT NONE +! + PRIVATE +! + PUBLIC :: four1, fourcol, fourrow +! + TYPE pointer_ra + REAL, DIMENSION(:), POINTER :: poi_ra + END TYPE pointer_ra +! + TYPE int_para + INTEGER, DIMENSION(2) :: par + END TYPE int_para +! +! Global parameters +! + INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of work arrays. +! +! Global variables +! + EXTERNAL :: ENOTRM + LOGICAL, SAVE :: initflag=.TRUE. ! initialization of the module + CHARACTER (len=8), SAVE :: S2015 ! string to copy error list entry + REAL, DIMENSION(8) :: aux1 ! auxilary array + REAL, DIMENSION(1) :: aux2 ! auxilary array + TYPE(pointer_ra), DIMENSION(:,:), ALLOCATABLE, SAVE :: aux1_poi, aux2_poi ! work arrays for the ESSL routine + TYPE(int_para), DIMENSION(MXPLAN,4), SAVE :: n1d_par ! size of transform + INTEGER, DIMENSION(4), SAVE :: n1d_saved=0 ! number of plans saved +! + INTERFACE fourcol + MODULE PROCEDURE fourcol_ra, fourcol_raa + END INTERFACE +! +CONTAINS +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE four1(vec, isign) +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec + INTEGER, INTENT(IN) :: isign +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=1 +! +! Local variables +! + INTEGER :: i, id, istat, k, n, naux1, naux2 +! + IF (initflag) THEN + initflag = .FALSE. +! + CALL EINFO(0) + CALL ERRSAV(2015,S2015) +! + ALLOCATE(aux1_poi(MXPLAN,4), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOUR1: 1. Allocation of aux1_poi failed!' + STOP + END IF +! + ALLOCATE(aux2_poi(MXPLAN,4), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOUR1: 1. Allocation of aux2_poi failed!' + STOP + END IF +! + END IF +! + n = SIZE(vec) +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*n + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) +! + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + id = n1d_saved(NUM) +! + CALL ERRSET(2015,0,-1,1,ENOTRM,0) +! + naux1 = SIZE(aux1) + naux2 = SIZE(aux2) +! + CALL dcft(1, vec(1), 1, n, vec(1), 1, n, & + n, 1, -isign, 1.0, aux1, naux1, aux2, naux2) +! + CALL ERRSTR(2015,S2015) +! +! dynamic allocation of the work arrays. +! + ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOUR1: 2. Allocation of aux1_poi failed!' + STOP + ENDIF +! + ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOUR1: 2. Allocation of aux2_poi failed!' + STOP + ENDIF +! + CALL dcft(1, vec(1), 1, n, vec(1), 1, n, & + n, 1, -isign, 1.0, & + aux1_poi(id,NUM)%poi_ra(1), naux1, & + aux2_poi(id,NUM)%poi_ra(1), naux2) +! + END SELECT +! + CALL dcft(0, vec(1), 1, n, vec(1), 1, n, & + n, 1, -isign, 1.0, & + aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), & + aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra)) +! + END SUBROUTINE four1 +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourcol_ra(arr, isign) +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: isign +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=2 +! +! Local variables +! + INTEGER :: dim1, howmany, i, id, istat, k, naux1, naux2 +! + IF (initflag) THEN + initflag = .FALSE. +! + CALL EINFO(0) + CALL ERRSAV(2015,S2015) +! + ALLOCATE(aux1_poi(MXPLAN,4), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RA: 1. Allocation of aux1_poi failed!' + STOP + END IF +! + ALLOCATE(aux2_poi(MXPLAN,4), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RA: 1. Allocation of aux2_poi failed!' + STOP + END IF +! + END IF +! + dim1 = SIZE(arr,1) + howmany = SIZE(arr,2) +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*dim1 + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1) .AND. & + howmany == n1d_par(i,NUM)%par(2)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) +! + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany + id = n1d_saved(NUM) +! + CALL ERRSET(2015,0,-1,1,ENOTRM,0) +! + naux1 = SIZE(aux1) + naux2 = SIZE(aux2) +! + CALL dcft(1, arr(1,1), 1, dim1, arr(1,1), 1, dim1, & + dim1, howmany, -isign, 1.0, aux1, naux1, aux2, naux2) +! + CALL ERRSTR(2015,S2015) +! +! dynamic allocation of the work arrays. +! + ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RA: 2. Allocation of aux1_poi failed!' + STOP + ENDIF +! + ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RA: 2. Allocation of aux2_poi failed!' + STOP + ENDIF +! + CALL dcft(1, arr(1,1), 1, dim1, arr(1,1), 1, dim1, & + dim1, howmany, -isign, 1.0, & + aux1_poi(id,NUM)%poi_ra(1), naux1, & + aux2_poi(id,NUM)%poi_ra(1), naux2) +! + END SELECT +! + CALL dcft(0, arr(1,1), 1, dim1, arr(1,1), 1, dim1, & + dim1, howmany, -isign, 1.0, & + aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), & + aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra)) +! + END SUBROUTINE fourcol_ra +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourcol_raa(arr, isign) +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: isign +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=3 +! +! Local variables +! + INTEGER :: dim1, howmany, i, id, istat, k, naux1, naux2 +! + IF (initflag) THEN + initflag = .FALSE. +! + CALL EINFO(0) + CALL ERRSAV(2015,S2015) +! + ALLOCATE(aux1_poi(MXPLAN,NUM), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RAA: 1. Allocation of aux1_poi failed!' + STOP + END IF +! + ALLOCATE(aux2_poi(MXPLAN,NUM), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RAA: 1. Allocation of aux2_poi failed!' + STOP + END IF +! + END IF +! + dim1 = SIZE(arr,1) + howmany = SIZE(arr,2)*SIZE(arr,3) +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*dim1 + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1) .AND. & + howmany == n1d_par(i,NUM)%par(2)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) +! + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany + id = n1d_saved(NUM) +! + CALL ERRSET(2015,0,-1,1,ENOTRM,0) +! + naux1 = SIZE(aux1) + naux2 = SIZE(aux2) +! + CALL dcft(1, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, & + dim1, howmany, -isign, 1.0, aux1, naux1, aux2, naux2) +! + CALL ERRSTR(2015,S2015) +! +! dynamic allocation of the work arrays. +! + ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RAA: 2. Allocation of aux1_poi failed!' + STOP + ENDIF +! + ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURCOL_RAA: 2. Allocation of aux2_poi failed!' + STOP + ENDIF +! + CALL dcft(1, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, & + dim1, howmany, -isign, 1.0, & + aux1_poi(id,NUM)%poi_ra(1), naux1, & + aux2_poi(id,NUM)%poi_ra(1), naux2) +! + END SELECT +! + CALL dcft(0, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, & + dim1, howmany, -isign, 1.0, & + aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), & + aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra)) +! + END SUBROUTINE fourcol_raa +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourrow(arr, isign) +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: isign +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=4 +! +! Local variables +! + INTEGER :: dim1, dim2, i, id, istat, k, naux1, naux2 +! + IF (initflag) THEN + initflag = .FALSE. +! + CALL EINFO(0) + CALL ERRSAV(2015,S2015) +! + ALLOCATE(aux1_poi(MXPLAN,NUM), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURROW: 1. Allocation of aux1_poi failed!' + STOP + END IF +! + ALLOCATE(aux2_poi(MXPLAN,NUM), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURROW: 1. Allocation of aux2_poi failed!' + STOP + END IF +! + END IF +! + dim1 = SIZE(arr,1) + dim2 = SIZE(arr,2) +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*dim1 + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1) .AND. & + dim2 == n1d_par(i,NUM)%par(2)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) +! + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + n1d_par(n1d_saved(NUM),NUM)%par(2) = dim2 + id = n1d_saved(NUM) +! + CALL ERRSET(2015,0,-1,1,ENOTRM,0) +! + naux1 = SIZE(aux1) + naux2 = SIZE(aux2) +! + CALL dcft(1, arr(1,1), dim2, 1, arr(1,1), dim2, 1, & + dim1, dim2, -isign, 1.0, aux1, naux1, aux2, naux2) +! + CALL ERRSTR(2015,S2015) +! +! dynamic allocation of the work arrays. +! + ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURROW: 2. Allocation of aux1_poi failed!' + STOP + ENDIF +! + ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat) + IF (istat /= 0) THEN + WRITE(*,*) 'FOURROW: 2. Allocation of aux2_poi failed!' + STOP + ENDIF +! + CALL dcft(1, arr(1,1), dim1, 1, arr(1,1), dim1, 1, & + dim1, dim2, -isign, 1.0, & + aux1_poi(id,NUM)%poi_ra(1), naux1, & + aux2_poi(id,NUM)%poi_ra(1), naux2) +! + END SELECT +! + CALL dcft(0, arr(1,1), dim1, 1, arr(1,1), dim1, 1, & + dim1, dim2, -isign, 1.0, & + aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), & + aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra)) +! + END SUBROUTINE fourrow +END MODULE fft +#endif +! +#if defined(fft_mkl) +MODULE fft +! + USE mkl_dfti +! + IMPLICIT NONE +! + PRIVATE +! + PUBLIC :: pointer_r, handle1d + PUBLIC :: four1, fourcol, fourrow +! + TYPE pointer_r + TYPE(DFTI_DESCRIPTOR), POINTER :: desc_handle + END TYPE pointer_r +! + TYPE int_para + INTEGER, DIMENSION(2) :: par + END TYPE int_para +! +! Global parameters +! + INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of plans. +! +! Global variables +! + TYPE(DFTI_DESCRIPTOR), POINTER :: desc_handle + TYPE(pointer_r), DIMENSION(MXPLAN,4), SAVE :: handle1d ! descriptor handles for 1-dim FFT + TYPE(int_para), DIMENSION(MXPLAN,4), SAVE :: n1d_par ! size of transform + INTEGER, DIMENSION(4), SAVE :: n1d_saved=0 ! number of descriptor handles saved +! + INTERFACE fourcol + MODULE PROCEDURE fourcol_ra, fourcol_raa + END INTERFACE +! +CONTAINS +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE four1(vec, isign) +! +! Dummy arguments +! + INTEGER, INTENT(IN) :: isign + DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=1 +! +! Local variables +! + INTEGER :: dim1, id, i, k, status + LOGICAL :: init_flag +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*SIZE(vec) + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) + init_flag = .TRUE. + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + id = n1d_saved(NUM) +! + CASE default + init_flag = .FALSE. + END SELECT +! + dim1 = SIZE(vec,1) +! + CALL fourcol_mkl(vec(1), dim1, 1, isign, init_flag, id, NUM) +! + END SUBROUTINE four1 +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourcol_ra(arr, isign) +! +! Dummy arguments +! + INTEGER, INTENT(IN) :: isign + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=2 +! +! Local variables +! + INTEGER :: dim1, howmany, id, i, k, status + LOGICAL :: init_flag +! + dim1 = SIZE(arr,1) + howmany = SIZE(arr,2) +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*SIZE(arr) + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1) .AND. & + howmany == n1d_par(i,NUM)%par(2)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) + init_flag = .TRUE. + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany + id = n1d_saved(NUM) +! + CASE default + init_flag = .FALSE. + END SELECT +! + CALL fourcol_mkl(arr(1,1), dim1, howmany, isign, init_flag, id, NUM) +! + END SUBROUTINE fourcol_ra +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourcol_raa(arr, isign) +! +! Dummy arguments +! + INTEGER, INTENT(IN) :: isign + DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=3 +! +! Local variables +! + INTEGER :: dim1, howmany, id, i, k, status + LOGICAL :: init_flag +! + dim1 = SIZE(arr,1) + howmany = SIZE(arr,2)*SIZE(arr,3) +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*SIZE(arr) + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1) .AND. & + howmany == n1d_par(i,NUM)%par(2)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) + init_flag = .TRUE. + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany + id = n1d_saved(NUM) +! + CASE default + init_flag = .FALSE. + END SELECT +! + CALL fourcol_mkl(arr(1,1,1), dim1, howmany, isign, init_flag, id, NUM) +! + END SUBROUTINE fourcol_raa +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! + SUBROUTINE fourrow(arr, isign) +! +! Dummy arguments +! + INTEGER, INTENT(IN) :: isign + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr +! +! Local parameters +! + INTEGER, PARAMETER :: NUM=4 +! +! Local variables +! + INTEGER :: dim2, howmany, id, i, k, status + LOGICAL :: init_flag +! + howmany = SIZE(arr,1) + dim2 = SIZE(arr,2) +! +! test if a plan that fits is already created. +! + id = -1 + k = isign*SIZE(arr) + DO i = 1,n1d_saved(NUM) + IF (k == n1d_par(i,NUM)%par(1) .AND. & + howmany == n1d_par(i,NUM)%par(2)) THEN + id = i + EXIT + END IF + END DO +! + SELECT CASE (id) + CASE (-1) + init_flag = .TRUE. + n1d_saved(NUM) = n1d_saved(NUM)+1 + n1d_par(n1d_saved(NUM),NUM)%par(1) = k + n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany + id = n1d_saved(NUM) +! + CASE default + init_flag = .FALSE. + END SELECT +! + CALL fourrow_mkl(arr(1,1), howmany, dim2, isign, init_flag, id, NUM) +! + END SUBROUTINE fourrow +END MODULE fft +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! +SUBROUTINE fourcol_mkl(arr, dim1, howmany, isign, init_flag, id, num) +! +! COMMENT: This subroutine is necessary to prevent the Lahey/Fujitsu +! compiler from making a copy of array arr when passing +! arguments. + USE fft, ONLY: handle1d +! + USE mkl_dfti +! + IMPLICIT NONE +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(*), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: dim1 + INTEGER, INTENT(IN) :: howmany + INTEGER, INTENT(IN) :: isign + LOGICAL, INTENT(IN) :: init_flag + INTEGER, INTENT(IN) :: id + INTEGER, INTENT(IN) :: num +! +! Local variables +! + INTEGER :: i, status +! + IF (init_flag) THEN +! + status = DftiCreateDescriptor(handle1d(id,num)%desc_handle, & + DFTI_DOUBLE, DFTI_COMPLEX, 1, dim1) + IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 0:', DftiErrorMessage(status) + + status = DftiSetValue(handle1d(id,num)%desc_handle, & + DFTI_NUMBER_OF_TRANSFORMS, howmany) + IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 1:', DftiErrorMessage(status) + status = DftiSetValue(handle1d(id,num)%desc_handle, & + DFTI_INPUT_DISTANCE, dim1) + IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 2:', DftiErrorMessage(status) + status = DftiSetValue(handle1d(id,num)%desc_handle, & + DFTI_OUTPUT_DISTANCE, dim1) + IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 3:', DftiErrorMessage(status) +! + status = DftiCommitDescriptor(handle1d(id,num)%desc_handle) + IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 4:', DftiErrorMessage(status) +! + END IF +! + SELECT CASE (isign) + CASE (-1) + status = DftiComputeForward(handle1d(id,num)%desc_handle, arr) + CASE (1) + status = DftiComputeBackward(handle1d(id,num)%desc_handle, arr) + END SELECT +! +END SUBROUTINE fourcol_mkl +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! +SUBROUTINE fourrow_mkl(arr, howmany, dim2, isign, init_flag, id, num) +! +! COMMENT: This subroutine is necessary to prevent the Lahey/Fujitsu +! compiler from making a copy of array arr when passing +! arguments. +! + USE fft, ONLY: handle1d +! + USE mkl_dfti +! + IMPLICIT NONE +! +! Dummy arguments +! + DOUBLE COMPLEX, DIMENSION(*), INTENT(INOUT) :: arr + INTEGER, INTENT(IN) :: howmany + INTEGER, INTENT(IN) :: dim2 + INTEGER, INTENT(IN) :: isign + LOGICAL, INTENT(IN) :: init_flag + INTEGER, INTENT(IN) :: id + INTEGER, INTENT(IN) :: num +! +! Local variables +! + INTEGER :: i, status + INTEGER, DIMENSION(2) :: stride +! + IF (init_flag) THEN +! + stride(1) = 0 + stride(2) = howmany +! + status = DftiCreateDescriptor(handle1d(id,num)%desc_handle, & + DFTI_DOUBLE, DFTI_COMPLEX, 1, dim2) + IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 0:', DftiErrorMessage(status) + + status = DftiSetValue(handle1d(id,num)%desc_handle, & + DFTI_NUMBER_OF_TRANSFORMS, dim2) + IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 1:', DftiErrorMessage(status) +! + status = DftiSetValue(handle1d(id,num)%desc_handle, & + DFTI_INPUT_DISTANCE, 1) + IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 2:', DftiErrorMessage(status) + status = DftiSetValue(handle1d(id,num)%desc_handle, & + DFTI_INPUT_STRIDES, stride) + IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 3:', DftiErrorMessage(status) +! + status = DftiSetValue(handle1d(id,num)%desc_handle, & + DFTI_OUTPUT_DISTANCE, 1) + IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 4:', DftiErrorMessage(status) + status = DftiSetValue(handle1d(id,num)%desc_handle, & + DFTI_OUTPUT_STRIDES, stride) + IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 5:', DftiErrorMessage(status) +! + status = DftiCommitDescriptor(handle1d(id,num)%desc_handle) + IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 6:', DftiErrorMessage(status) +! + END IF +! + SELECT CASE (isign) + CASE (-1) + status = DftiComputeForward(handle1d(id,num)%desc_handle, arr) + CASE (1) + status = DftiComputeBackward(handle1d(id,num)%desc_handle, arr) + END SELECT +! +END SUBROUTINE fourrow_mkl +#endif +! diff --git a/fft/fftw_f77.h b/fft/fftw_f77.h new file mode 100644 index 0000000..b825f23 --- /dev/null +++ b/fft/fftw_f77.h @@ -0,0 +1,53 @@ +!> +!> @file fftw_f77.h +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! This file contains PARAMETER statements for various constants +! that can be passed to FFTW routines. You should include +! this file in any FORTRAN program that calls the fftw_f77 +! routines (either directly or with an #include statement +! if you use the C preprocessor). + + integer FFTW_FORWARD,FFTW_BACKWARD + parameter (FFTW_FORWARD=-1,FFTW_BACKWARD=1) + + integer FFTW_REAL_TO_COMPLEX,FFTW_COMPLEX_TO_REAL + parameter (FFTW_REAL_TO_COMPLEX=-1,FFTW_COMPLEX_TO_REAL=1) + + integer FFTW_ESTIMATE,FFTW_MEASURE + parameter (FFTW_ESTIMATE=0,FFTW_MEASURE=1) + + integer FFTW_OUT_OF_PLACE,FFTW_IN_PLACE,FFTW_USE_WISDOM + parameter (FFTW_OUT_OF_PLACE=0) + parameter (FFTW_IN_PLACE=8,FFTW_USE_WISDOM=16) + + integer FFTW_THREADSAFE + parameter (FFTW_THREADSAFE=128) + +! Constants for the MPI wrappers: + integer FFTW_TRANSPOSED_ORDER, FFTW_NORMAL_ORDER + integer FFTW_SCRAMBLED_INPUT, FFTW_SCRAMBLED_OUTPUT + parameter(FFTW_TRANSPOSED_ORDER=1, FFTW_NORMAL_ORDER=0) + parameter(FFTW_SCRAMBLED_INPUT=8192) + parameter(FFTW_SCRAMBLED_OUTPUT=16384) diff --git a/fft/tfft.f90 b/fft/tfft.f90 new file mode 100644 index 0000000..866a66b --- /dev/null +++ b/fft/tfft.f90 @@ -0,0 +1,141 @@ +!> +!> @file tfft.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test the FFT routines exported by module fft +! + USE fft + IMPLICIT NONE + INTEGER :: nx=8 + DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: a,b,c + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rn1, rn2 + DOUBLE PRECISION :: pi, argx + INTEGER :: ix, nx0=2 +! + PRINT*, 'Enter array dim. nx' + READ(*,*) nx + ALLOCATE(a(nx,4), b(nx,4), c(4,nx)) + ALLOCATE(rn1(nx,4), rn2(nx,4)) +! +! Create initial array + pi = 4.0d0*ATAN(1.0d0) + WRITE(*,*) 'Enter mode nx0' + READ(*,*) nx0 + DO ix=0,nx-1 + argx = 2.0d0*pi/nx*nx0*ix + a(ix+1,:) = COS(argx) + END DO +! + WRITE(*,*) '-----------------------' + WRITE(*,*) 'Reals of original array' + WRITE(*,*) '-----------------------' + WRITE(*,'(10f10.4)') REAL(a) +!________________________________________________________________________________ +! + WRITE(*,*) '-----------------' + WRITE(*,*) 'testing four1 ...' + WRITE(*,*) '-----------------' +! +! Forward transform + b=a + CALL four1(b(:,1), -1) +! +! Check + WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1)) + WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1)) +! +! Backward transform + CALL four1(b(:,1), 1) + b = b/REAL(nx) +! +! Check + WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1)) + WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1)) +! + b(:,1) = b(:,1)-a(:,1) + rn1(:,1) = REAL(b(:,1),8) + rn2(:,1) = AIMAG(b(:,1)) + PRINT *, 'Min. max err of real', MINVAL(rn1(:,1)), MAXVAL(rn1(:,1)) + PRINT *, 'Min. max err of imag', MINVAL(rn2(:,1)), MAXVAL(rn2(:,1)) +!________________________________________________________________________________ +! + WRITE(*,*) '-------------------' + WRITE(*,*) 'testing fourcol ...' + WRITE(*,*) '-------------------' +! +! Forward transform + b=a + CALL fourcol(b(:,:), -1) +! +! Check + WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1)) + WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1)) +! +! Backward transform + CALL fourcol(b(:,:), 1) + b = b/REAL(nx) +! +! Check + WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1)) + WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1)) +! + b = b-a + rn1 = REAL(b,8) + rn2 = AIMAG(b) + PRINT *, 'Min. max err of real', MINVAL(rn1), MAXVAL(rn1) + PRINT *, 'Min. max err of imag', MINVAL(rn2), MAXVAL(rn2) +!________________________________________________________________________________ +! + WRITE(*,*) '-------------------' + WRITE(*,*) 'testing fourrow ...' + WRITE(*,*) '-------------------' +! +! Forward transform + b=a + c = TRANSPOSE(b) + CALL fourrow(c, -1) +! +! Check + WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(c(1,:)) + WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(c(1,:)) +! +! Backward transform + CALL fourrow(c, 1) + c = c/REAL(nx) +! +! Check + WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(c(1,:)) + WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(c(1,:)) +! + b = TRANSPOSE(c)-a + rn1 = REAL(b,8) + rn2 = AIMAG(b) + PRINT *, 'Min. max err of real', MINVAL(rn1), MAXVAL(rn1) + PRINT *, 'Min. max err of imag', MINVAL(rn2), MAXVAL(rn2) +! +! Clean up + DEALLOCATE(a,b,c, rn1,rn2) +END PROGRAM main diff --git a/matlab/cds_mat.m b/matlab/cds_mat.m new file mode 100644 index 0000000..e2a79d4 --- /dev/null +++ b/matlab/cds_mat.m @@ -0,0 +1,44 @@ +% +% @file cds_mat.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +function [mata, diag] = cds_mat(file, dset) + n=double(hdf5read(file,dset, 'RANK')); + dists=double(hdf5read(file, strcat(dset,'/dists'))); + val=hdf5read(file, strcat(dset,'/vals')); + + %% Shift the off-diagonals %% + for k=1:length(dists) + d=dists(k); + if d < 0 + val(1:n+d,k) = val(1-d:n,k); + elseif d > 0 + val(n:-1:d+1,k) = val(n-d:-1:1,k); + end + end + mata = spdiags(val, dists, n,n); + if nargout == 2 + idiag = find(dists==0); + diag = val(:,idiag); + end diff --git a/matlab/csr_mat.m b/matlab/csr_mat.m new file mode 100644 index 0000000..ddfa679 --- /dev/null +++ b/matlab/csr_mat.m @@ -0,0 +1,45 @@ +% +% @file csr_mat.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +function [mata, diag] = csr_mat(file, dset) + n=hdf5read(file,dset, 'RANK'); + nnz=hdf5read(file,dset, 'NNZ'); + cols=hdf5read(file, strcat(dset,'/cols')); + irow=hdf5read(file, strcat(dset,'/irow')); + val=hdf5read(file, strcat(dset,'/val')); + idiag=hdf5read(file, strcat(dset,'/idiag')); + for i=1:n + s = irow(i); + e = irow(i+1)-1; + rows(s:e) = i; + end + cols=double(cols); + rows=double(rows); + vals = double(val); + mata = sparse(rows,cols,vals); + if nargout == 2 + diag = val(idiag); + end + diff --git a/matlab/driv1.m b/matlab/driv1.m new file mode 100644 index 0000000..2d28ea7 --- /dev/null +++ b/matlab/driv1.m @@ -0,0 +1,68 @@ +% +% @file driv1.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='driv1.h5'; + +x = hdf5read(file, '/X'); +knotsx = hdf5read(file, 'KNOTSX'); +splinesx = hdf5read(file, '/splinesx','V71Dimensions', true); +y = hdf5read(file, '/Y'); +knotsy = hdf5read(file, 'KNOTSY'); +splinesy = hdf5read(file, '/splinesy','V71Dimensions', true); + +c=['b','g','r','c','m','y','k']; +nc=size(c,2); + +figure +%subplot(211) +hold on +ns = size(splinesx,1); +attr=hdf5read(file,'/splinesx/title'); title_ann=attr.Data; +for i = 1:ns + cc = mod(i-1,nc)+1; + plot(x,splinesx(i,:),c(cc)) +end +yk=zeros(size(knotsx)); +plot(knotsx,yk,'ro'); +grid on +title(title_ann) +ylabel('Splines') + +figure +%subplot(212) +hold on +ns = size(splinesy,1); +attr=hdf5read(file,'/splinesy/title'); title_ann=attr.Data; +for i = 1:ns + cc = mod(i-1,nc)+1; + plot(y,splinesy(i,:),c(cc)) +end +yk=zeros(size(knotsy)); +i1=find(knotsy==y(1)); +i2=find(knotsy>=y(size(y,1)),1); +plot(knotsy(i1:i2),yk(i1:i2),'ro'); +grid on +title(title_ann) +ylabel('Splines') diff --git a/matlab/fit.m b/matlab/fit.m new file mode 100644 index 0000000..64b90e8 --- /dev/null +++ b/matlab/fit.m @@ -0,0 +1,38 @@ +% +% @file fit.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +P1=polyfit(log(n),log(err1),1) +P2=polyfit(log(n),log(err2),1) +P3=polyfit(log(n),log(err3),1) + +figure +loglog(n,err1,'o', n, exp(P1(2)).*n.^P1(1), 'b') +hold on +loglog(n,err2,'rh', n, exp(P2(2)).*n.^P2(1),'r') +loglog(n,err3,'*k', n, exp(P3(2)).*n.^P3(1), 'k') +grid on +xlabel('Number of intervals N'); +ylabel('Discretization Error') +title('2D Cylindrical problem with m=1, s=10') diff --git a/matlab/fit1d.m b/matlab/fit1d.m new file mode 100644 index 0000000..6475e77 --- /dev/null +++ b/matlab/fit1d.m @@ -0,0 +1,94 @@ +% +% @file fit1d.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='fit1d.h5'; + +x = hdf5read(file, '/X'); +f = hdf5read(file, '/FCALC'); +fexact = hdf5read(file, '/FEXACT'); +error = hdf5read(file, '/ERROR'); + +f1 = hdf5read(file, '/FCALC1'); +fexact1 = hdf5read(file, '/FEXACT1'); +error1 = hdf5read(file, '/ERROR1'); + +splines = hdf5read(file, '/SPLINES'); splines = splines'; +% +% Attributes +% +nidbas = hdf5read(file,'/NIDBAS'); +nx = hdf5read(file,'/NX'); +attr=hdf5read(file,'/FEXACT/title'); fexact_ann=attr.Data; +attr=hdf5read(file,'/FCALC/title'); f_ann=attr.Data; +attr=hdf5read(file,'/ERROR/title'); error_ann=attr.Data; + +attr=hdf5read(file,'/FEXACT1/title'); fexact1_ann=attr.Data; +attr=hdf5read(file,'/FCALC1/title'); f1_ann=attr.Data; +attr=hdf5read(file,'/ERROR1/title'); error1_ann=attr.Data; + +label=sprintf('Splines of degree %d, NX =%d', nidbas, nx); + +ns = size(splines,1); + + +c=['b','g','r','c','m','y','k']; +nc=size(c,2); + +figure +subplot(511) +hold on +for i = 1:ns + cc = mod(i-1,nc)+1; + plot(x,splines(i,:),c(cc)) +end +grid on +ylabel('Splines') +xlabel('X') +title(label); +hold off + +subplot(512) +plot(x, f, 'o', x, fexact) +legend(f_ann, fexact_ann) +xlabel('X') +grid on + +subplot(513) +plot(x, error) +ylabel(error_ann) +xlabel('X') +grid on + +subplot(514) +plot(x, f1, 'h', x, fexact1) +legend(f1_ann, fexact1_ann) +xlabel('X') +grid on + +subplot(515) +plot(x, error1) +ylabel(error1_ann) +xlabel('X') +grid on diff --git a/matlab/fit2d.m b/matlab/fit2d.m new file mode 100644 index 0000000..f225459 --- /dev/null +++ b/matlab/fit2d.m @@ -0,0 +1,80 @@ +% +% @file fit2d.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='fit2d.h5'; +% +% Get data from data sets +% +r=hdf5read(file,'/xpt'); +t=hdf5read(file,'/ypt'); + +fcalc=hdf5read(file,'/fcalc'); +fexact=hdf5read(file,'/fexact'); +errs=hdf5read(file,'/errs'); +% +% Attributes +% +NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); +NIDBAS1=hdf5read(file,'/NIDBAS1'); +NIDBAS2=hdf5read(file,'/NIDBAS2'); +MBESS=hdf5read(file,'/MBESS'); +LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ... + NIDBAS1, NIDBAS2, MBESS); + +attr=hdf5read(file,'/xpt/title'); x_ann=attr.Data; +attr=hdf5read(file,'/ypt/title'); y_ann=attr.Data; +attr=hdf5read(file,'/fcalc/title'); fcalc_ann=attr.Data; +attr=hdf5read(file,'/fexact/title'); fexact_ann=attr.Data; +attr=hdf5read(file,'/errs/title');errs_ann=attr.Data; + +[R,T]=meshgrid(r,t); +x = R.*cos(T); y= R.*sin(T); + +figure +subplot(221) +pcolor(double(x),double(y),double(fcalc)); +shading interp +xlabel('X'); ylabel('Y') +title(LABEL) +colorbar + +subplot(222) +pcolor(double(x),double(y),double(fexact)) +shading interp +axis image +xlabel('X'); ylabel('Y') +title('X-Y plane') +colorbar + +subplot(223) +surfc(double(x),double(y),double(errs)) +xlabel('X'); ylabel('Y'); +title(errs_ann) + +%% Plot error at theta ~ pi/4 +k = max(find(t. +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +function [yfit, P] = fitlog(x,y) +P=polyfit(log(x),log(y),1); +yfit = exp(P(2)).*x.^P(1); diff --git a/matlab/fourier_gs.m b/matlab/fourier_gs.m new file mode 100644 index 0000000..09bbb05 --- /dev/null +++ b/matlab/fourier_gs.m @@ -0,0 +1,69 @@ +% +% @file fourier_gs.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +clear all +tau=0; +alpha=0.5; + +theta=-pi:0.02:pi; +[x,y] = meshgrid(theta,theta); +n1=length(theta); +n2=n1; + +%%%% +%%%% Gauss-Seidel relaxations +%%%% +str_title=sprintf('tau = %.1f, alpha = %.2f',tau, alpha); +ee = exp(i.*theta); +eep= conj(ee); +csin= alpha.*complex(alpha, (tau/2).*imag(ee)); +G=zeros(n1,n2); +for ii=1:n1 + for jj=1:n2 + num = ee(ii) + csin(ii)*ee(jj); + G(ii,jj) = num / (2*(1+alpha^2) - conj(num)); + end +end + + +figure +hold off +G0=(ee+csin)./(2*(1+alpha^2)-(eep+conj(csin))); +plot(theta, abs(G(:,1)), 'r', 'LineWidth', 2) +hold on +plot(theta, abs(G0), 'g', 'LineWidth', 2) +for jj=1:20:n2 + plot(theta, abs(G(:,jj)), 'b') +end +xlabel('\theta_1'); ylabel('Amplification Factor for Gauss-Seidel') +title(str_title) + +% $$$ figure +% $$$ mesh(x,y,abs(G)) +% $$$ xlabel('\theta_1'); ylabel('\theta_2') +% $$$ title(str_title); +% $$$ view(-120,25) + +max(max(abs(G))) diff --git a/matlab/fourier_jac.m b/matlab/fourier_jac.m new file mode 100644 index 0000000..4e9da71 --- /dev/null +++ b/matlab/fourier_jac.m @@ -0,0 +1,69 @@ +% +% @file fourier_jac.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +clear all +omega=0.8; +tau=-2; +alpha=1; + +c=2*omega/(1+alpha^2); +str_title=sprintf('omega = %.1f, tau = %.1f, alpha = %.2f', omega, ... + tau, alpha) + +theta1=-pi:0.01:pi; +theta2=-pi:0.01:pi; +[x,y] = meshgrid(theta1,theta2); + +n1=length(theta1); +n2=length(theta2); + +%%%% +%%%% Damped Jacobi relaxations +%%%% +G=zeros(n1,n2); +for ii=1:n1 + for jj=1:n2 + G(ii,jj) = 1-c.*( sin(theta1(ii)/2)^2 + alpha^2*sin(theta2(jj)/2)^2 ... + + 0.25*alpha*tau*sin(theta1(ii))*sin(theta2(jj)) ); + end +end + +figure +hold off +G0 = 1-c.*sin(theta1./2).^2; +plot(theta1, G(:,1), 'r', 'LineWidth', 2) +hold on +plot(theta1, G0, 'g', 'LineWidth', 2) +for jj=1:20:n2 + plot(theta1, G(:,jj), 'b') +end +xlabel('\theta_1'); ylabel('Amplification Factor for Jacobi') +title(str_title) +% $$$ figure +% $$$ mesh(x,y,G) +% $$$ xlabel('\theta_1'); ylabel('\theta_2') +% $$$ title(str_title); + +max(max(abs(G))) diff --git a/matlab/fourier_smooth.m b/matlab/fourier_smooth.m new file mode 100644 index 0000000..45e8a7d --- /dev/null +++ b/matlab/fourier_smooth.m @@ -0,0 +1,106 @@ +% +% @file fourier_smooth.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +% Brute force computation of \mu = MAX |G| for +% damped Jacobi and Gauss-Seidel relaxation +% +% Find optimal damping factor \omega for Jacobi relaxations +% +if ~exist('tau','var'), tau=0; end +if ~exist('alpha','var'), alpha=1.0; end +dth=0.01; + +theta1=-pi+dth:dth:pi; +theta2=-pi+dth:dth:pi; +nth1=length(theta1); +nth2=length(theta2); + +sint12=sin(theta1./2).^2; +sint22=sin(theta2./2).^2; + +sint1=sin(theta1); +sint2=sin(theta2); +[S1,S2]=meshgrid(sint1,sint2); +ctau=0.25*alpha*tau; + +omega=0.5:0.002:1; +n=length(omega); +G = zeros(nth1,nth2); +for i=1:n + c=2*omega(i)/(1+alpha^2); + for i1=1:nth1 + for i2=1:nth2 + if or(abs(theta1(i1))>= pi/2, abs(theta2(i2)) >= pi/2); + G(i1,i2) = abs(1-c*(sint12(i1)+alpha^2*sint22(i2) + ... + ctau*sint1(i1)*sint2(i2))); + end + end + end + [gmax,imax]=max(G); + [mu(i),jmax]=max(gmax); + theta1_opt(i) = theta1(imax(jmax)); + theta2_opt(i) = theta2(jmax); +end +[mu_min,i_min]=min(mu); +omega_opt=omega(i_min); +str_title=sprintf(['omega = %.3f, mu = %.3f, alpha = %.2f, tau = ' ... +'%.1f'], omega_opt, mu_min, alpha, tau); + +figure +subplot(211) +plot(omega,mu,'LineWidth',2); +xlabel('\omega'); ylabel('\mu') +grid on +title(str_title); +subplot(212) +plot(omega, theta1_opt, omega, theta2_opt,'LineWidth',2); +legend('\theta_{1opt}', '\theta_{2opt}') +xlabel('\omega'); ylabel('optimum \theta') +grid on +% +% \mu for Gauss-Seidel relaxation +% +Ggs = zeros(nth1,nth2); +exp1=complex(cos(theta1),sin(theta1)); +exp2=complex(cos(theta2),sin(theta2)); +c=2*(1+alpha^2); +ctau=complex(alpha^2, (0.5*alpha*tau).*sint1); +for i1=1:nth1 + for i2=1:nth2 + if or(abs(theta1(i1))>= pi/2, abs(theta2(i2)) >= pi/2); + Num = exp1(i1) + ctau(i1)*exp2(i2); + Ggs(i1,i2) = abs( Num/(c-conj(Num)) ); + end + end +end +[gmax,imax]=max(Ggs); +[mugs,jmax]=max(gmax); +theta1_gs_opt = theta1(imax(jmax)); +theta2_gs_opt = theta2(jmax); +fprintf('alpha = %.2f, tau = %.1f, Ggs = %.4f, theta1 = %.4f, theta2 = %.4f\n', alpha, tau, ... + mugs, theta1_gs_opt, theta2_gs_opt); +subplot(211) +hold on +plot(omega,repmat(mugs,1,length(omega)),'r--','LineWidth',2) diff --git a/matlab/gb_mat.m b/matlab/gb_mat.m new file mode 100644 index 0000000..a15b31d --- /dev/null +++ b/matlab/gb_mat.m @@ -0,0 +1,43 @@ +% +% @file gb_mat.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +function [mata] = gb_mat(file, dset) +rank=h5readatt(file, dset, 'RANK'); +ku=h5readatt(file, dset, 'KU'); +kl=h5readatt(file, dset, 'KL'); +gbmat=h5read(file, dset); +m=rank; n=rank; +mata = zeros(m,n); +for i=1:m + jmin = max(1,i-kl); + jmax = min(n,i+ku); + for j=jmin:jmax + ib = kl+ku+i-j+1; + mata(i,j)=gbmat(ib,j); + end +end +clear gbmat; + + \ No newline at end of file diff --git a/matlab/gbmat.m b/matlab/gbmat.m new file mode 100644 index 0000000..3bb7c5e --- /dev/null +++ b/matlab/gbmat.m @@ -0,0 +1,60 @@ +% +% @file gbmat.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde2d.h5'; +% +% Attributes of GB matrix +% +NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); +NIDBAS1=hdf5read(file,'/NIDBAS1'); +NIDBAS2=hdf5read(file,'/NIDBAS2'); + +%mat='/MAT1'; +rank=hdf5read(file,strcat(mat,'/RANK')); +ku=hdf5read(file,strcat(mat,'/KU')); +kl=hdf5read(file,strcat(mat,'/KL')); +gb_mat=hdf5read(file,mat); +rhs0=hdf5read(file,'/RHS'); +sol0=hdf5read(file,'/SOL'); +% +m=rank; n=rank; +a = zeros(m,n); +for i=1:m + jmin = max(1,i-kl); + jmax = min(n,i+ku); + for j=jmin:jmax + ib = kl+ku+i-j+1; + a(i,j)=gb_mat(ib,j); + end +end +%clear gb_mat; +S = sparse(a); +%clear a; +figure +spy(S); +LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), rank = %d', NR, NTH, ... + NIDBAS1, NIDBAS2, rank); +title(LABEL) + diff --git a/matlab/gs_fd.m b/matlab/gs_fd.m new file mode 100644 index 0000000..8e8c1b6 --- /dev/null +++ b/matlab/gs_fd.m @@ -0,0 +1,66 @@ +% +% @file gs_fd.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +clear all + +N=1024; +L=100; +n0=1; +Narr=[128 256 512 1024]; + +for i=1:length(Narr); + N=Narr(i); + h=L/N; h2=h^2; + title_str=sprintf(['N=%d, L=%.1f, n0=%.1f'], N, L, n0); + v0=-2.+n0*h2; + u0=1.; + dom(i)= 2*abs(u0)/abs(v0); + v=v0.*ones(N,1); + u=u0*ones(N-1,1); + mata=diag(u,1) + diag(u,-1) + diag(v); + matl= tril(mata,0); % D+L + lambda = eig(-triu(mata,1),matl); + rho(i)=max(abs(lambda)); + +% $$$ figure +% $$$ plot(lambda,'o') +% $$$ xlabel('Real eigenvalue'), ylabel('Imag eigenvalue') +% $$$ grid on +% $$$ axis equal +% $$$ title_str=sprintf(['N=%d, L=%.1f, n0=%.1f, Spec. Radius=%.4f, dom=%.4f'], N, L, ... +% $$$ n0, rho(i),dom(i)); +% $$$ title(title_str); + fprintf(1, 'Spectral Radius of GS relaxation matrix = %.4f, dom=%.4f\n', ... + rho(i),dom(i)) +end + +figure +plot(Narr, rho,'o-') +xlabel('N'); ylabel('GS spectral radius') +title_str=sprintf(['FD scheme, L=%.1f, n0=%.1f'], L, n0); +title(title_str); +grid on + + diff --git a/matlab/gs_fe.m b/matlab/gs_fe.m new file mode 100644 index 0000000..7dfaa33 --- /dev/null +++ b/matlab/gs_fe.m @@ -0,0 +1,66 @@ +% +% @file gs_fe.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +clear all + +N=1024; +L=100; +n0=1; +Narr=[128 256 512 1024]; + +for i=1:length(Narr); + N=Narr(i); + h=L/N; h2=h^2; + title_str=sprintf(['N=%d, L=%.1f, n0=%.1f'], N, L, n0); + v0=-2.+2.0*n0*h2/6.0; + u0=1.+n0*h2/6.0; + dom(i)= 2*abs(u0)/abs(v0); + v=v0.*ones(N,1); + u=u0*ones(N-1,1); + mata=diag(u,1) + diag(u,-1) + diag(v); + matl= tril(mata,0); % D+L + lambda = eig(-triu(mata,1),matl); + rho(i)=max(abs(lambda)); + +% $$$ figure +% $$$ plot(lambda,'o') +% $$$ xlabel('Real eigenvalue'), ylabel('Imag eigenvalue') +% $$$ grid on +% $$$ axis equal +% $$$ title_str=sprintf(['N=%d, L=%.1f, n0=%.1f, Spec. Radius=%.4f, dom=%.4f'], N, L, ... +% $$$ n0, rho(i),dom(i)); +% $$$ title(title_str); + fprintf(1, 'Spectral Radius of GS relaxation matrix = %.4f, dom=%.4f\n', ... + rho(i),dom(i)) +end + +figure +plot(Narr, rho,'o-') +xlabel('N'); ylabel('GS spectral radius') +title_str=sprintf(['FE scheme, L=%.1f, n0=%.1f'], L, n0); +title(title_str); +grid on + + diff --git a/matlab/h5Complex.m b/matlab/h5Complex.m new file mode 100644 index 0000000..ab338f9 --- /dev/null +++ b/matlab/h5Complex.m @@ -0,0 +1,50 @@ +% +% @file h5Complex.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +function z = h5Complex(file, dset) + data = hdf5read(file, dset); + dims = size(data); + rank = size(dims,2); + switch rank + case {1} + for i=1:dims(1) + z(i)=complex(cell2mat(data(i,1).Data(1)), cell2mat(data(i,1).Data(2))); + end + case {2} + for i=1:dims(1) + for j=1:dims(2) + z(i,j)=complex(cell2mat(data(i,j).Data(1)), cell2mat(data(i,j).Data(2))); + end + end + case {3} + for i=1:dims(1) + for j=1:dims(2) + for k=1:dims(3) + z(i,j,k)=complex(cell2mat(data(i,j,k).Data(1)), cell2mat(data(i,j,k).Data(2))); + end + end + + end + end diff --git a/matlab/h5Complex_ll.m b/matlab/h5Complex_ll.m new file mode 100644 index 0000000..ad43007 --- /dev/null +++ b/matlab/h5Complex_ll.m @@ -0,0 +1,33 @@ +% +% @file h5Complex_ll.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +function z = h5Complex_ll(file, dset) + fid=H5F.open(file, 'H5F_ACC_RDONLY', 'H5P_DEFAULT'); + dset_id=H5D.open(fid, dset); + dxpl = 'H5P_DEFAULT'; + data = H5D.read(dset_id,'H5ML_DEFAULT','H5S_ALL','H5S_ALL', dxpl); + z = complex(data.real, data.imaginary); + H5D.close(dset_id); + H5F.close(fid); diff --git a/matlab/jac_opt.m b/matlab/jac_opt.m new file mode 100644 index 0000000..82d6f70 --- /dev/null +++ b/matlab/jac_opt.m @@ -0,0 +1,78 @@ +% +% @file jac_opt.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +% +% Find optimal damping factor \omega for Jacobi relaxations +% +clear all +alpha=0.5; + +theta2=pi/2:0.01:pi; +theta1=0:0.01:pi; +[x,y]=meshgrid(theta1,theta1); + +sint12=sin(theta1./2).^2; +sint22=sin(theta2./2).^2; +[ksi,eta]=meshgrid(sint12,sint22); + +nth1=length(theta1); +nth2=length(theta2); + +omega=0.1:0.01:1; +n=length(omega); +for i=1:n + c=2*omega(i)/(1+alpha^2); + G=abs(1 - c.*(ksi+(alpha^2).*eta )); + [gmax,imax]=max(G); + [mu(i),jmax]=max(gmax); + eta_opt(i)=eta(imax(jmax),jmax); + ksi_opt(i)=ksi(imax(jmax),jmax); +end +[mu_min,i_min]=min(mu); +omega_opt=omega(i_min); +str_title=sprintf('omega = %.3f, mu = %.3f, alpha = %.2f', omega_opt, ... + mu_min, alpha); + +figure +plot(omega,mu,'o-', omega, ksi_opt, '*-', omega, eta_opt, '^-'); +legend('\mu', '\xi_{opt}', '\eta_{opt}') +xlabel('\omega'); ylabel('\mu') +grid on +title(str_title); + +% $$$ +% $$$ c=2*omega_opt/(1+alpha^2); +% $$$ G=1-c.*(ksi+(alpha^2).*eta); +% $$$ figure +% $$$ hold off +% $$$ plot(theta1, G(1,:), 'r', 'LineWidth', 2) +% $$$ hold on +% $$$ plot(theta1, G(nth2,:), 'g', 'LineWidth', 2) +% $$$ for jj=1:20:nth2 +% $$$ plot(theta1, G(jj,:), 'b') +% $$$ end +% $$$ xlabel('\theta_1'); ylabel('Amplification Factor for Jacobi') +% $$$ title(str_title) +% $$$ grid on diff --git a/matlab/modes.m b/matlab/modes.m new file mode 100644 index 0000000..ccb5c06 --- /dev/null +++ b/matlab/modes.m @@ -0,0 +1,84 @@ +% +% @file modes.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +N=8; +kmodes=N-1; + +%% modes on fine grid %% +xh=(1/N).*(0:1:N); +wh=zeros(N+1,kmodes); +for k=1:kmodes + wh(:,k) = sin((k*pi).*xh); +end + +%% Restriction %% +R=[ +1 0 0 0 0 0 0 0 0 +0 0.5 1 0.5 0 0 0 0 0 +0 0 0 0.5 1 0.5 0 0 0 +0 0 0 0 0 0.5 1 0.5 0 +0 0 0 0 0 0 0 0 1 +]; + +%% Null space of Restriction %% +ns = [ + 0 0 0 0 + 2 0 0 0 + -1 -1 0 0 + 0 2 0 0 + 0 -1 -1 0 + 0 0 2 0 + 0 0 -1 -1 + 0 0 0 2 + 0 0 0 0]; + +figure +subplot(211) +plot(ns,'o-'); +title('Basis of Null space of Restriction') +subplot(212) +plot(R','o-'); +title('Basis of Range of Prolongation') + + +%% modes on coarse grid %% +N2h = N/2; +x2h=(1/N2h).*(0:1:N2h); +w2h = R*wh; + +x=0:0.01:1.; +figure +for k=1:kmodes + subplot(3,3,k) + plot(xh,wh(:,k),'o', x, sin((k*pi).*x),'b-', x2h, w2h(:,k), 'ro-'); + grid on +end + +figure +for k=1:N/2 + subplot(2,2,k) + plot(xh,wh(:,k),'o-', xh,wh(:,N-k),'r*-') + grid on +end diff --git a/matlab/pde1d.m b/matlab/pde1d.m new file mode 100644 index 0000000..86b6642 --- /dev/null +++ b/matlab/pde1d.m @@ -0,0 +1,72 @@ +% +% @file pde1d.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde1d.h5'; +fprintf(1,'NX = %d\n', hdf5read(file,'/NX')); +fprintf(1,'NIDBAS = %d\n', hdf5read(file,'/NIDBAS')); +fprintf(1,'NGAUSS = %d\n', hdf5read(file,'/NGAUSS')); +fprintf(1,'KDIFF = %d\n', hdf5read(file,'/KDIFF')); + +x = hdf5read(file, '/XGRID'); +f= hdf5read(file, '/SOLCAL'); +fexact= hdf5read(file, '/SOLANA'); +err=hdf5read(file, '/ERR'); +f=f'; +fexact=fexact'; +err=err'; + +figure +subplot(311) +plot(x,f(1,:),'o',x,fexact(1,:)) +xlabel('X') +ylabel('Function') +grid on +subplot(312) +plot(x,f(2,:),'o',x,fexact(2,:)) +xlabel('X') +ylabel('1st Derivative') +grid on +subplot(313) +plot(x,f(3,:),'o',x,fexact(3,:)) +xlabel('X') +ylabel('2nd Derivative') +grid on + +figure +subplot(311) +plot(x,err(1,:),'o-') +xlabel('X'); +ylabel('Errors on function'); +grid on +subplot(312) +plot(x,err(2,:),'o-') +xlabel('X'); +ylabel('Errors on 1st derivative'); +grid on +subplot(313) +plot(x,err(3,:),'o-') +xlabel('X'); +ylabel('Errors on 2nd derivative'); +grid on diff --git a/matlab/pde1d_eig.m b/matlab/pde1d_eig.m new file mode 100644 index 0000000..8b39b43 --- /dev/null +++ b/matlab/pde1d_eig.m @@ -0,0 +1,30 @@ +% +% @file pde1d_eig.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde1d_eig.h5'; + +%%% get sparse matrix and its diagonal elelments in dig %%% +[mata,diag]=zcsr_mat(file,'/MAT'); +spy(mata,12); diff --git a/matlab/pde1d_eig_zcsr.m b/matlab/pde1d_eig_zcsr.m new file mode 100644 index 0000000..c6fa3e7 --- /dev/null +++ b/matlab/pde1d_eig_zcsr.m @@ -0,0 +1,30 @@ +% +% @file pde1d_eig_zcsr.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde1d_eig.h5'; + +%%% get sparse matrix and its diagonal elelments in dig %%% +[mata,diag]=zcsr_mat(file,'/MAT'); +spy(mata,12); diff --git a/matlab/pde1d_eig_zmumps.m b/matlab/pde1d_eig_zmumps.m new file mode 100644 index 0000000..2a431bd --- /dev/null +++ b/matlab/pde1d_eig_zmumps.m @@ -0,0 +1,55 @@ +% +% @file pde1d_eig_zmumps.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde1d_eig_zmumps.h5'; + +%%% Read mumps matrix and convert to Matlab sparse format +[mata,diag_ele]=zmumps_mat(file,'/MAT'); +n=size(mata,1); + +%spy(mata,12); +arpack_vals=h5Complex_ll(file,'/eig_vals'); +nev=size(arpack_vals,1); +arpack_vecs = h5Complex_ll(file, '/eig_vecs'); + +%%% Compute eigen values and vectors, using EIGS +[V,D,FLAG]=eigs(mata,nev,'SM'); +[eigs_vals,perm]=sort(diag(D)); +eigs_vecs=V(:,perm); + +fprintf('Eigenvalues from Arpack and Matlab eigs\n'); +for i=1:nev + fprintf('%i (%.5e %.5e), %.5e\n',i,real(arpack_vals(i)),imag(arpack_vals(i)),eigs_vals(i)); +end +fprintf('Norm of difference %.3e\n', norm(arpack_vals-eigs_vals,Inf)); + +%%% Renormalize EIGS %%% +fprintf('\n\nDiff of Eigenvectors from Arpack and Matlab eigs\n'); +for i=1:nev + nrm=arpack_vecs(1,i); + eigs_vecs(:,i) = (eigs_vecs(:,i)./eigs_vecs(1,i)).*nrm; + diff_vecs = norm(arpack_vecs(:,i)-eigs_vecs(:,i),Inf); + fprintf('%i %10.3e\n', i, diff_vecs); +end diff --git a/matlab/pde1dp.m b/matlab/pde1dp.m new file mode 100644 index 0000000..97b7a6e --- /dev/null +++ b/matlab/pde1dp.m @@ -0,0 +1,41 @@ +% +% @file pde1dp.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde1dp.h5'; +fprintf(1,'NX = %d\n', hdf5read(file,'/NX')); +fprintf(1,'NIDBAS = %d\n', hdf5read(file,'/NIDBAS')); +fprintf(1,'NGAUSS = %d\n', hdf5read(file,'/NGAUSS')); + +mata=hdf5read(file,'/mata'); + +xpts = hdf5read(file, '/rhs/x'); +frhs = hdf5read(file, '/rhs/f'); + +figure +plot(xpts,frhs); +xlabel('X') +ylabel('RHS') +grid on + diff --git a/matlab/pde1dp_cmpl.m b/matlab/pde1dp_cmpl.m new file mode 100644 index 0000000..e07fc31 --- /dev/null +++ b/matlab/pde1dp_cmpl.m @@ -0,0 +1,49 @@ +% +% @file pde1dp_cmpl.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde1dp_cmpl.h5'; + +xgrid=hdf5read(file,'/xgrid'); +nx = size(xgrid)-1; + +rhs=h5Complex(file, '/rhs'); +sol=h5Complex(file, '/sol'); +%mat=h5Complex(file, '/mat'); + +x=hdf5read(file,'/x'); +solana=h5Complex(file,'/solana'); +solcal=h5Complex(file,'/solcal'); +err=hdf5read(file, '/err'); + +figure +subplot(211) +plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ... + imag(solcal), '*') +legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag') +xlabel('X'); ylabel('SOL'); +subplot(212) +plot(x, err, 'o-'); +xlabel('X'); ylabel('|Error|') +grid on diff --git a/matlab/pde1dp_cmpl_dft.m b/matlab/pde1dp_cmpl_dft.m new file mode 100644 index 0000000..10aa61a --- /dev/null +++ b/matlab/pde1dp_cmpl_dft.m @@ -0,0 +1,70 @@ +% +% @file pde1dp_cmpl_dft.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde1dp_cmpl_dft.h5'; + +xgrid=hdf5read(file,'/xgrid'); +nx = size(xgrid)-1; +mode=0:nx-1; + +rhs=h5Complex(file, '/rhs'); +sol=h5Complex(file, '/sol'); +rhs_fft=h5Complex(file, '/rhs_fft'); +sol_fft=h5Complex(file, '/sol_fft'); +mat=h5Complex(file, '/mat'); + +x=hdf5read(file,'/x'); +solana=h5Complex(file,'/solana'); +solcal=h5Complex(file,'/solcal'); +err=hdf5read(file, '/err'); + +figure +subplot(211) +plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ... + imag(solcal), '*') +legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag') +xlabel('X'); ylabel('SOL'); +subplot(212) +plot(x, err, 'o-'); +xlabel('X'); ylabel('|Error|') +grid on + +figure +subplot(311) +plot(mode, real(mat), 'o', mode, imag(mat), '*') +xlabel('mode'); ylabel('DFT of MAT') +legend('Real', 'Imag') +grid on + +subplot(312) +plot(mode, real(rhs_fft), 'o', mode, imag(rhs_fft), '*') +xlabel('mode'); ylabel('DFT of RHS') +legend('Real', 'Imag') +grid on +subplot(313) +plot(mode, real(sol_fft), 'o', mode, imag(sol_fft), '*') +xlabel('mode'); ylabel('DFT of SOL') +legend('Real', 'Imag') +grid on diff --git a/matlab/pde1dp_cmpl_pardiso.m b/matlab/pde1dp_cmpl_pardiso.m new file mode 100644 index 0000000..1878e8e --- /dev/null +++ b/matlab/pde1dp_cmpl_pardiso.m @@ -0,0 +1,72 @@ +% +% @file pde1dp_cmpl_pardiso.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde1dp_cmpl_pardiso.h5'; + +xgrid=hdf5read(file,'/xgrid'); +nx = size(xgrid)-1; + +rhs=h5Complex(file, '/rhs'); +sol=h5Complex(file, '/sol'); + +cols=hdf5read(file, '/MAT/cols'); +irow=hdf5read(file, '/MAT/irow'); +val=h5Complex(file, '/MAT/val'); +perm=hdf5read(file, '/MAT/perm'); + +cols=double(cols); +irow=double(irow); +perm=double(perm); +n = size(perm,1); +nnz=size(val,1); + +rows = zeros(nnz,1); +for i=1:n + s = irow(i); + e = irow(i+1)-1; + rows(s:e) = i; +end +mat = sparse(rows,cols,val); +figure +subplot(121); +spy(mat); +subplot(122); +spy(mat(perm,perm)); + +x=hdf5read(file,'/x'); +solana=h5Complex(file,'/solana'); +solcal=h5Complex(file,'/solcal'); +err=hdf5read(file, '/err'); + +figure +subplot(211) +plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ... + imag(solcal), '*') +legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag') +xlabel('X'); ylabel('SOL'); +subplot(212) +plot(x, err, 'o-'); +xlabel('X'); ylabel('|Error|') +grid on diff --git a/matlab/pde2d.m b/matlab/pde2d.m new file mode 100644 index 0000000..c375cef --- /dev/null +++ b/matlab/pde2d.m @@ -0,0 +1,85 @@ +% +% @file pde2d.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde2d.h5'; +% +% Get data from data sets +% +r=hdf5read(file,'/xgrid'); +t=hdf5read(file,'/ygrid'); +sol=hdf5read(file,'/sol')'; +solexact=hdf5read(file,'/solana')'; +err=hdf5read(file,'/errors')'; +solr=hdf5read(file,'/derivx')'; +solt=hdf5read(file,'/derivy')'; +% +% Attributes +% +NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); +NIDBAS1=hdf5read(file,'/NIDBAS1'); +NIDBAS2=hdf5read(file,'/NIDBAS2'); +MBESS=hdf5read(file,'/MBESS'); +LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ... + NIDBAS1, NIDBAS2, MBESS); + +attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data; +attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data; +attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data; +attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data; +attr=hdf5read(file,'/errors/title');err_ann=attr.Data; + +[R,T]=meshgrid(r,t); +x = R.*cos(T); y= R.*sin(T); +solx = cos(T).*solr - sin(T)./R.*solt; +soly = sin(T).*solr + cos(T)./R.*solt; + +figure +subplot(221) +pcolor(double(r),double(t),double(sol)); +shading interp +hold on, quiver(r,t,solr,solt) +xlabel(x_ann); ylabel(y_ann) +title(LABEL) +colorbar + +subplot(222) +pcolor(double(x),double(y),double(sol)) +shading interp +hold on, quiver(x,y,solx,soly) +hold off, axis image +xlabel('X'); ylabel('Y') +title('X-Y plane') +colorbar + +subplot(223) +surfc(double(x),double(y),double(sol)) +xlabel('X'); ylabel('Y'); +title(sol_ann) + +subplot(224) +surfc(double(x),double(y),double(err)) +xlabel('X'); ylabel('Y'); +title(err_ann) + diff --git a/matlab/pde2d_mumps.m b/matlab/pde2d_mumps.m new file mode 100644 index 0000000..268f2ed --- /dev/null +++ b/matlab/pde2d_mumps.m @@ -0,0 +1,97 @@ +% +% @file pde2d_mumps.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde2d_mumps.h5'; + +nr=hdf5read(file,'/', 'NX'); +nth=hdf5read(file,'/', 'NY'); +NIDBAS1=hdf5read(file,'/','NIDBAS1'); +NIDBAS2=hdf5read(file,'/','NIDBAS2'); +MBESS=hdf5read(file,'/','MBESS'); + +r=hdf5read(file,'/xgrid'); +t=hdf5read(file,'/ygrid'); +sol=hdf5read(file,'/sol')'; +solexact=hdf5read(file,'/solana')'; +err=hdf5read(file,'/errors')'; +solr=hdf5read(file,'/derivx')'; +solt=hdf5read(file,'/derivy')'; +LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', nr, nth, ... + NIDBAS1, NIDBAS2, MBESS); + +figure +subplot(211) +plot(r, sol(nth/2,:), 'o', r, solexact(nth/2,:)) +xlabel('r') +grid on +title(LABEL) +subplot(212) +if MBESS == 0 + plot(t, sol(:,1), 'o', t, solexact(:,1)) +else + plot(t, sol(:,nr/2), 'o', t, solexact(:,nr/2)) +end +xlabel('\theta') +grid on + + + + +% $$$ if verLessThan('matlab', '7.9'); +% $$$ n = hdf5read(file,'/MAT/RANK'); +% $$$ nnz = hdf5read(file,'/MAT/NNZ'); +% $$$ nlsym = hdf5read(file,'/MAT/NLSYM'); +% $$$ else +% $$$ n = hdf5read(file,'/MAT/', 'RANK'); +% $$$ nnz = hdf5read(file,'/MAT/', 'NNZ'); +% $$$ nlsym = hdf5read(file,'/MAT/', 'NLSYM'); +% $$$ end +% $$$ +% $$$ cols=hdf5read(file, '/MAT/cols'); +% $$$ irow=hdf5read(file, '/MAT/irow'); +% $$$ val=hdf5read(file, '/MAT/val'); +% $$$ perm=hdf5read(file, '/MAT/perm'); +% $$$ +% $$$ rows = zeros(nnz,1); +% $$$ cols=double(cols); +% $$$ irow=double(irow); +% $$$ perm=double(perm); +% $$$ +% $$$ for i=1:n +% $$$ s = irow(i); +% $$$ e = irow(i+1)-1; +% $$$ rows(s:e) = i; +% $$$ end +% $$$ +% $$$ mat = sparse(rows,cols,val); +% $$$ figure +% $$$ subplot(121); +% $$$ spy(mat(perm,perm)); +% $$$ title('Matrix structure') +% $$$ subplot(122); +% $$$ spy(chol(mat(perm,perm))); +% $$$ title('Factor L^T') + + diff --git a/matlab/pde2d_nh.m b/matlab/pde2d_nh.m new file mode 100644 index 0000000..cd12593 --- /dev/null +++ b/matlab/pde2d_nh.m @@ -0,0 +1,101 @@ +% +% @file pde2d_nh.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde2d_nh.h5'; +% +% Get data from data sets +% +r=hdf5read(file,'/xgrid'); +t=hdf5read(file,'/ygrid'); +sol=hdf5read(file,'/sol')'; +solexact=hdf5read(file,'/solana')'; +err=hdf5read(file,'/errors')'; +errx=hdf5read(file,'/errors_x')'; +erry=hdf5read(file,'/errors_y')'; +solr=hdf5read(file,'/derivx')'; +solt=hdf5read(file,'/derivy')'; +% +% Attributes +% +NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); +NIDBAS1=hdf5read(file,'/NIDBAS1'); +NIDBAS2=hdf5read(file,'/NIDBAS2'); +MBESS=hdf5read(file,'/MBESS'); +LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ... + NIDBAS1, NIDBAS2, MBESS); + +attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data; +attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data; +attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data; +attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data; +attr=hdf5read(file,'/errors/title');err_ann=attr.Data; + +[R,T]=meshgrid(r,t); +x = R.*cos(T); y= R.*sin(T); +solx = cos(T).*solr - sin(T)./R.*solt; +soly = sin(T).*solr + cos(T)./R.*solt; + +figure +subplot(221) +pcolor(double(r),double(t),double(sol)); +shading interp +hold on, quiver(r,t,solr,solt) +xlabel(x_ann); ylabel(y_ann) +title(LABEL) +colorbar + +subplot(222) +pcolor(double(x),double(y),double(sol)) +shading interp +hold on, quiver(x,y,solx,soly) +hold off, axis image +xlabel('X'); ylabel('Y') +title('X-Y plane') +colorbar + +subplot(223) +surfc(double(x),double(y),double(sol)) +xlabel('X'); ylabel('Y'); +title(sol_ann) + +subplot(224) +surfc(double(x),double(y),double(err)) +xlabel('X'); ylabel('Y'); +title(err_ann) + +figure +subplot(311) +plot(t,err(:,NR+1),'o-') +xlabel('\theta'); ylabel('Error on solution') +grid on +title('Error on Boundary r=1'); +subplot(312) +plot(t,errx(:,NR+1),'o-') +xlabel('\theta'); ylabel('Error on x-derivative') +grid on +subplot(313) +plot(t,erry(:,NR+1),'o-') +xlabel('\theta'); ylabel('Error on y-derivative') +grid on diff --git a/matlab/pde2d_pardiso.m b/matlab/pde2d_pardiso.m new file mode 100644 index 0000000..1ec3ae3 --- /dev/null +++ b/matlab/pde2d_pardiso.m @@ -0,0 +1,63 @@ +% +% @file pde2d_pardiso.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde2d_pardiso.h5'; + +if verLessThan('matlab', '7.9'); + n = hdf5read(file,'/MAT/RANK'); + nnz = hdf5read(file,'/MAT/NNZ'); + nlsym = hdf5read(file,'/MAT/NLSYM'); +else + n = hdf5read(file,'/MAT/', 'RANK'); + nnz = hdf5read(file,'/MAT/', 'NNZ'); + nlsym = hdf5read(file,'/MAT/', 'NLSYM'); +end + +cols=hdf5read(file, '/MAT/cols'); +irow=hdf5read(file, '/MAT/irow'); +val=hdf5read(file, '/MAT/val'); +perm=hdf5read(file, '/MAT/perm'); + +rows = zeros(nnz,1); +cols=double(cols); +irow=double(irow); +perm=double(perm); + +for i=1:n + s = irow(i); + e = irow(i+1)-1; + rows(s:e) = i; +end + +mat = sparse(rows,cols,val); +figure +subplot(121); +spy(mat(perm,perm)); +title('Matrix structure') +subplot(122); +spy(chol(mat(perm,perm))); +title('Factor L^T') + + diff --git a/matlab/pde2d_sym_pardiso.m b/matlab/pde2d_sym_pardiso.m new file mode 100644 index 0000000..7eb2c8f --- /dev/null +++ b/matlab/pde2d_sym_pardiso.m @@ -0,0 +1,136 @@ +% +% @file pde2d_sym_pardiso.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde2d_sym_pardiso.h5'; +% +% Get data from data sets +% +r=hdf5read(file,'/xgrid'); +t=hdf5read(file,'/ygrid'); +sol=hdf5read(file,'/sol')'; +solexact=hdf5read(file,'/solana')'; +err=hdf5read(file,'/errors')'; +solr=hdf5read(file,'/derivx')'; +solt=hdf5read(file,'/derivy')'; +% +% Attributes +% +if verLessThan('matlab', '7.9'); + NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); + NIDBAS1=hdf5read(file,'/NIDBAS1'); + NIDBAS2=hdf5read(file,'/NIDBAS2'); + MBESS=hdf5read(file,'/MBESS'); + EPSI=hdf5read(file,'/EPSI'); + attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data; + attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data; + attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data; + attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data; + attr=hdf5read(file,'/errors/title');err_ann=attr.Data; +else + NR=hdf5read(file,'/','NX'); NTH=hdf5read(file,'/','NY'); + NIDBAS1=hdf5read(file,'/','NIDBAS1'); + NIDBAS2=hdf5read(file,'/','NIDBAS2'); + MBESS=hdf5read(file,'/','MBESS'); + EPSI=hdf5read(file,'/','EPSI'); + attr=hdf5read(file,'/xgrid/','title'); x_ann=attr.Data; + attr=hdf5read(file,'/ygrid/','title'); y_ann=attr.Data; + attr=hdf5read(file,'/sol/','title'); sol_ann=attr.Data; + attr=hdf5read(file,'/solana/','title'); solexact_ann=attr.Data; + attr=hdf5read(file,'/errors/','title');err_ann=attr.Data; +end +LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d, epsi = %3.2f', ... + NR, NTH, NIDBAS1, NIDBAS2, MBESS, EPSI); + + +[R,T]=meshgrid(r,t); +x = R.*cos(T); y= R.*sin(T); +solx = cos(T).*solr - sin(T)./R.*solt; +soly = sin(T).*solr + cos(T)./R.*solt; + +figure +subplot(221) +pcolor(double(r),double(t),double(sol)); +shading interp +hold on, quiver(r,t,solr,solt) +xlabel(x_ann); ylabel(y_ann) +title(LABEL) +colorbar + +subplot(222) +pcolor(double(x),double(y),double(sol)) +shading interp +hold on, quiver(x,y,solx,soly) +hold off, axis image +xlabel('X'); ylabel('Y') +title('X-Y plane') +colorbar + +subplot(223) +surfc(double(x),double(y),double(sol)) +xlabel('X'); ylabel('Y'); +title(sol_ann) + +subplot(224) +surfc(double(x),double(y),double(err)) +xlabel('X'); ylabel('Y'); +title(err_ann) + + + +if verLessThan('matlab', '7.9'); + n = hdf5read(file,'/MAT/RANK'); + nnz = hdf5read(file,'/MAT/NNZ'); + nlsym = hdf5read(file,'/MAT/NLSYM'); +else + n = hdf5read(file,'/MAT/', 'RANK'); + nnz = hdf5read(file,'/MAT/', 'NNZ'); + nlsym = hdf5read(file,'/MAT/', 'NLSYM'); +end + +cols=hdf5read(file, '/MAT/cols'); +irow=hdf5read(file, '/MAT/irow'); +val=hdf5read(file, '/MAT/val'); +perm=hdf5read(file, '/MAT/perm'); + +rows = zeros(nnz,1); +cols=double(cols); +irow=double(irow); +perm=double(perm); + +for i=1:n + s = irow(i); + e = irow(i+1)-1; + rows(s:e) = i; +end + +mat = sparse(rows,cols,val); +figure + subplot(121) + spy(mat) + title('Original Matrix structure') + subplot(122) + spy(mat(perm,perm)) + title('Permuted Matrix structure') + diff --git a/matlab/pde2d_sym_pardiso_dft.m b/matlab/pde2d_sym_pardiso_dft.m new file mode 100644 index 0000000..5b0f948 --- /dev/null +++ b/matlab/pde2d_sym_pardiso_dft.m @@ -0,0 +1,171 @@ +% +% @file pde2d_sym_pardiso_dft.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +mat_disp=1; + +file='pde2d_sym_pardiso_dft.h5'; + +if verLessThan('matlab', '7.9'); + n = hdf5read(file,'/MAT1/RANK'); + nnz = hdf5read(file,'/MAT1/NNZ'); + nlsym = hdf5read(file,'/MAT1/NLSYM'); + NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); + NIDBAS1=hdf5read(file,'/NIDBAS1'); + NIDBAS2=hdf5read(file,'/NIDBAS2'); + MBESS=hdf5read(file,'/MBESS'); + EPSI=hdf5read(file,'/EPSI'); + KMIN=hdf5read(file,'/KMIN'); + KMAX=hdf5read(file,'/KMAX'); +else + n = hdf5read(file,'/MAT1/', 'RANK'); + nnz = hdf5read(file,'/MAT1/', 'NNZ'); + nlsym = hdf5read(file,'/MAT1/', 'NLSYM'); + NR=hdf5read(file,'/', 'NX'); NTH=hdf5read(file,'/', 'NY'); + NIDBAS1=hdf5read(file,'/', 'NIDBAS1'); + NIDBAS2=hdf5read(file,'/', 'NIDBAS2'); + MBESS=hdf5read(file,'/', 'MBESS'); + EPSI=hdf5read(file,'/','EPSI'); + KMIN=hdf5read(file,'/','KMIN'); + KMAX=hdf5read(file,'/','KMAX'); +end +LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d, epsi = %3.2f', NR, NTH, ... + NIDBAS1, NIDBAS2, MBESS, EPSI); +DK = KMAX-KMIN+1; + +attr=hdf5read(file,'/xgrid/','title'); x_ann=attr.Data; +attr=hdf5read(file,'/ygrid/','title'); y_ann=attr.Data; +attr=hdf5read(file,'/sol/','title'); sol_ann=attr.Data; +attr=hdf5read(file,'/solana/','title'); solexact_ann=attr.Data; +attr=hdf5read(file,'/errors/','title');err_ann=attr.Data; + +if mat_disp == 1 + cols=hdf5read(file, '/MAT1/cols'); + irow=hdf5read(file, '/MAT1/irow'); + val=h5Complex(file, '/MAT1/val'); + perm=hdf5read(file, '/MAT1/perm'); + + rows = zeros(nnz,1); + cols=double(cols); + irow=double(irow); + perm=double(perm); + + for i=1:n + s = irow(i); + e = irow(i+1)-1; + rows(s:e) = i; + end + + valr=real(val); vali=imag(val); + mat = sparse(rows,cols,valr); + figure + subplot(121) + spy(mat,8) + title('Original Matrix structure') + subplot(122) + spy(mat(perm,perm),8) + title('Permuted Matrix structure') +end + +r=hdf5read(file,'/xgrid'); +t=hdf5read(file,'/ygrid'); +sol=hdf5read(file,'/sol')'; +solexact=hdf5read(file,'/solana')'; +err=hdf5read(file,'/errors')'; +solr=hdf5read(file,'/derivx')'; +solt=hdf5read(file,'/derivy')'; + +[R,T]=meshgrid(r,t); +x = R.*cos(T); y= R.*sin(T); +solx = cos(T).*solr - sin(T)./R.*solt; +soly = sin(T).*solr + cos(T)./R.*solt; + +figure +subplot(221) +pcolor(r,t,sol); +shading interp +hold on, quiver(r,t,solr,solt) +xlabel(x_ann); ylabel(y_ann) +title(LABEL) +colorbar + +subplot(222) +pcolor(x,y,sol) +shading interp +hold on, quiver(x,y,solx,soly) +hold off, axis image +xlabel('X'); ylabel('Y') +title('X-Y plane') +colorbar + +subplot(223) +surfc(x,y,sol) +xlabel('X'); ylabel('Y'); +title(sol_ann) + +subplot(224) +surfc(x,y,err) +xlabel('X'); ylabel('Y'); +title(err_ann) + +figure +ft_sol=h5Complex(file,'/FT_SOL'); +ft_sol=reshape(ft_sol,DK,[]); +m=[KMIN:KMAX]; sp=1:NR+NIDBAS1; +subplot(121) + stem3(sp,m, real(ft_sol), 'filled') + shading interp + xlabel('Radial spline number'); ylabel('m') + title('Real(\phi)') +subplot(122) + stem3(sp,m, imag(ft_sol),'filled') + shading interp + xlabel('Radial spline number'); ylabel('m') + title('Imag(\phi)') + +figure +ft_rhs=h5Complex(file,'/FT_RHS'); +ft_rhs=reshape(ft_rhs,DK,[]); +m=[KMIN:KMAX]; sp=1:NR+NIDBAS1; +subplot(121) + stem3(sp,m, real(ft_rhs), 'filled') + shading interp + xlabel('Radial spline number'); ylabel('m') + title('Real(\rho)') +subplot(122) + stem3(sp,m, imag(ft_rhs),'filled') + shading interp + xlabel('Radial spline number'); ylabel('m') + title('Imag(\rho)') + +figure +energy_k = h5Complex(file,'/ENERGY_K'); +subplot(211) +stem(m, real(energy_k)); +xlabel('m'); ylabel('Real(\phi)'); title('Spectral energy') +subplot(212) +stem(m, imag(energy_k)); +xlabel('m'); ylabel('Imag(\phi)'); title('Spectral energy') + + diff --git a/matlab/pde3d.m b/matlab/pde3d.m new file mode 100644 index 0000000..f8cb948 --- /dev/null +++ b/matlab/pde3d.m @@ -0,0 +1,124 @@ +% +% @file pde3d.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='pde3d.h5'; +% +% Get data from data sets +% +x=hdf5read(file,'/xgrid'); +y=hdf5read(file,'/ygrid'); +z=hdf5read(file,'/zgrid'); +% +nx=size(x); +ny=size(y); +nz=size(z); +% +rhs=hdf5read(file,'/RHS'); +coefs=hdf5read(file,'/SOL'); +bcoef=hdf5read(file,'/BCOEF'); +sol=hdf5read(file,'/sol'); +solexact=hdf5read(file,'/solana'); +solx=hdf5read(file,'/derivx'); +soly=hdf5read(file,'/derivy'); +solz=hdf5read(file,'/derivz'); +solx_exact=hdf5read(file,'/derivx_exact'); +soly_exact=hdf5read(file,'/derivy_exact'); +solz_exact=hdf5read(file,'/derivz_exact'); + +figure +k=ceil(nz(1)/2); +subplot(211); +pcolor(x,y,sol(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated solution') +colorbar + +subplot(212); +pcolor(x,y,solexact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical solution') +colorbar + +figure +err=sol-solexact; +pcolor(x,y,err(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Discretization error') +colorbar + +figure +subplot(211); +pcolor(x,y,solx(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated d/dx') +colorbar + +subplot(212); +pcolor(x,y,solx_exact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical d/dx') +colorbar + +figure +k=ceil(nz(1)/2); +subplot(211); +pcolor(x,y,soly(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated d/dy') +colorbar + +subplot(212); +pcolor(x,y,soly_exact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical d/dy') +colorbar + +figure +k=ceil(nz(1)/6); +subplot(211); +pcolor(x,y,solz(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated d/dz') +colorbar + +subplot(212); +pcolor(x,y,solz_exact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical d/dz') +colorbar + +figure +i=ceil(nx(1)/2); +plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r') +xlabel('z'); diff --git a/matlab/poisson_fe.m b/matlab/poisson_fe.m new file mode 100644 index 0000000..92012f6 --- /dev/null +++ b/matlab/poisson_fe.m @@ -0,0 +1,123 @@ +% +% @file poisson_fe.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='poisson_fe.h5'; +% +nx=h5readatt(file,'/','NX'); +ny=h5readatt(file,'/','NY'); +kx=h5readatt(file,'/','KX'); +ky=h5readatt(file,'/','KY'); +nidbas1=h5readatt(file,'/','NIDBAS1'); +nidbas2=h5readatt(file,'/','NIDBAS2'); +relax=h5readatt(file,'/','RELAX'); +nlevels=h5readatt(file,'/','LEVELS'); +nu1=h5readatt(file,'/','NU1'); +nu2=h5readatt(file,'/','NU2'); +mu=h5readatt(file,'/','MU'); +title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ... + 'nu2=%d, mu=%d, LEVELS=%d, KX=%d, KY=%d'], ... + nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,kx,ky); + + +% +% Prolongation matrices at the coarsest grid +% +levels=nlevels; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/matpx'); +matpx=h5read(file,dset); +dset=strcat(mglevel,'/matpy'); +matpy=h5read(file,dset); + +% +% FE matrix at the finest grid +% +levels=1; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/mata'); +[mata,diag]=csr_mat(file,dset); + +f=h5read(file,strcat(mglevel,'/f')); +v=h5read(file,strcat(mglevel,'/v')); +f1d=h5read(file,strcat(mglevel,'/f1d')); +v1d=h5read(file,strcat(mglevel,'/v1d')); +% $$$ figure +% $$$ spy(mata) + +% +% Solutions at the finest grid +% +x=h5read(file,'/solutions/xg'); +y=h5read(file,'/solutions/yg'); +dense=h5read(file,'/solutions/dense'); +sol_anal=h5read(file,'/solutions/anal'); +sol_calc=h5read(file,'/solutions/calc'); +sol_direct=h5read(file,'/solutions/direct'); +nx=int32(size(x,1)); +ny=int32(size(y,1)); + +figure +surf(x,y,sol_direct'-sol_anal') +xlabel('X'); ylabel('Y'); +title('Error on the finest grid') + +figure +subplot(211) +[yy,iy] = max(abs(sol_anal),[],2); +[xx,ix] = max(yy); +iy0=iy(ix); +str=sprintf('Solution at y = %.4f', y(iy0)); +plot(x, sol_anal(:,iy0),x, sol_direct(:,iy0),'o') +xlabel('x'); ylabel(str); +grid on +legend('Analytic Solution', 'Direct Solution') +title(title_str) +subplot(212) +[xx,ix] = max(abs(sol_anal)); +[yy,iy] = max(xx); +ix0=ix(iy); +str=sprintf('Solution at x = %.4f', x(ix0)); +plot(y, sol_anal(ix0,:),y, sol_direct(ix0,:),'o') +xlabel('y'); ylabel(str); +grid on +title(title_str) + +% +% Iterations +% +dset='/Iterations/'; +disc_err=h5read(file, strcat(dset,'disc_errors')); +resid=h5read(file, strcat(dset,'residues')); +its=0:1:size(resid,1)-1; +figure +subplot(211) +semilogy(its,resid,'o-') +grid on +xlabel('Iterations'); ylabel('Norm of residue'); +title(title_str); +subplot(212) +semilogy(its,disc_err,'h-') +grid on +xlabel('Iterations'); ylabel('Norm of error'); diff --git a/matlab/poisson_mg.m b/matlab/poisson_mg.m new file mode 100644 index 0000000..e407dd6 --- /dev/null +++ b/matlab/poisson_mg.m @@ -0,0 +1,140 @@ +% +% @file poisson_mg.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +if ~exist('file'), file='poisson_mg.h5'; end +% +nx=h5readatt(file,'/','NX'); +ny=h5readatt(file,'/','NY'); +lx=h5readatt(file,'/','LX'); +ly=h5readatt(file,'/','LY'); +kx=h5readatt(file,'/','KX'); +ky=h5readatt(file,'/','KY'); +beta=h5readatt(file,'/','BETA'); +omega=h5readatt(file,'/','OMEGA'); +relax=h5readatt(file,'/','RELAX'); +mat_type=h5readatt(file,'/','MAT_TYPE'); +nlevels=h5readatt(file,'/','LEVELS'); +mu=h5readatt(file,'/','MU'); +nnu=h5readatt(file,'/','NNU'); +nu1=h5read(file,'/nu1'); +nu2=h5read(file,'/nu2'); +title_str=sprintf(['N=(%d,%d), Lx=%d, Ly=%d, beta=%.4f, relax=%s, V(%d,%d), ' ... + 'LEVELS=%d, KX=%d, KY=%d'], ... + nx,ny,lx,ly,beta,relax,nu1(nnu),nu2(nnu),nlevels,kx,ky); + + +% +% Prolongation matrices at the coarsest grid +% +levels=nlevels; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/matpx'); +matpx=csr_mat(file,dset); +dset=strcat(mglevel,'/matpy'); +matpy=csr_mat(file,dset); + +% +% FE matrix at the finest grid +% +levels=1; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/mata'); +if mat_type == 'csr' + [mata,diag]=csr_mat(file,dset); +else + [mata,diag]=cds_mat(file,dset); +end +n=size(diag,1); + +% $$$ figure +% $$$ spy(mata) +% +% Spectral radius of GS Iteration Matrix +% Rg = -(D+L)^(-1) * U +% +% $$$ matl= tril(mata,0); % D+L +% $$$ lambda = eigs(-triu(mata,1),matl) +% $$$ fprintf(1, 'Spectral Radius of GS relaxation matrix = %g\n', max(abs(lambda))) +% $$$ figure +% $$$ plot(lambda, 'o') +% $$$ axis equal +% $$$ grid on +% $$$ xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues') +% $$$ title(title_str) +% +% Solutions at the finest grid +% +dense=h5read(file,'/dense'); +x=h5read(file,'/solutions/xg'); +y=h5read(file,'/solutions/yg'); +sol_anal=h5read(file,'/solutions/anal'); +%sol_direct=h5read(file,'/solutions/direct'); +sol_calc=h5read(file,'/solutions/calc'); + +figure +% $$$ surf(x,y,sol_calc'-sol_anal') +pcolor(x,y,sol_calc'-sol_anal') +shading interp +colorbar +xlabel('X'); ylabel('Y'); zlabel('Error'); +title(title_str) + +figure +subplot(211) +[yy,iy] = max(abs(sol_anal),[],2); +[xx,ix] = max(yy); +iy0=iy(ix); +str=sprintf('Solution at y = %.4f', y(iy0)); +plot(x, sol_anal(:,iy0),x, sol_calc(:,iy0),'o') +xlabel('x'); ylabel(str); +grid on +legend('Analytic Solution', 'MG Solution') +title(title_str) +subplot(212) +[xx,ix] = max(abs(sol_anal)); +[yy,iy] = max(xx); +ix0=ix(iy); +str=sprintf('Solution at x = %.4f', x(ix0)); +plot(y, sol_anal(ix0,:),y, sol_calc(ix0,:),'o') +xlabel('y'); ylabel(str); +grid on +title(title_str) +% +% Iterations +% +dset='/Iterations/'; +disc_err=h5read(file, strcat(dset,'disc_errors')); +resid=h5read(file, strcat(dset,'residues')); +its=0:1:size(resid,1)-1; +figure +subplot(211) +semilogy(its,resid,'o-') +grid on +xlabel('Iterations'); ylabel('Norm of residue'); +title(title_str); +subplot(212) +semilogy(its,disc_err,'h-') +grid on +xlabel('Iterations'); ylabel('Norm of error'); diff --git a/matlab/ppde3d.m b/matlab/ppde3d.m new file mode 100644 index 0000000..ed333f2 --- /dev/null +++ b/matlab/ppde3d.m @@ -0,0 +1,145 @@ +% +% @file ppde3d.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='ppde3d.h5'; +% +% Get data from data sets +% +x=hdf5read(file,'/xgrid'); +y=hdf5read(file,'/ygrid'); +z=hdf5read(file,'/zgrid'); +% +nx=size(x); +ny=size(y); +nz=size(z); +% +sol=hdf5read(file,'/sol'); +solexact=hdf5read(file,'/solana'); +solx=hdf5read(file,'/derivx'); +soly=hdf5read(file,'/derivy'); +solz=hdf5read(file,'/derivz'); +solx_exact=hdf5read(file,'/derivx_exact'); +soly_exact=hdf5read(file,'/derivy_exact'); +solz_exact=hdf5read(file,'/derivz_exact'); + +figure +k=ceil(nz(1)/2); +subplot(311); +pcolor(x,y,sol(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated solution') +colorbar + +subplot(312); +pcolor(x,y,solexact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical solution') +colorbar + +subplot(313); +err=sol-solexact; +pcolor(x,y,err(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Discretization error') +colorbar + +figure +subplot(311); +pcolor(x,y,solx(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated d/dx') +colorbar + +subplot(312); +pcolor(x,y,solx_exact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical d/dx') +colorbar + +subplot(313); +err=solx-solx_exact; +pcolor(x,y,err(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Discretization error') +colorbar + +figure +k=ceil(nz(1)/2); +subplot(311); +pcolor(x,y,soly(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated d/dy') +colorbar + +subplot(312); +pcolor(x,y,soly_exact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical d/dy') +colorbar + +subplot(313); +err=soly-soly_exact; +pcolor(x,y,err(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Discretization error') +colorbar + +figure +k=ceil(nz(1)/6); +subplot(311); +pcolor(x,y,solz(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated d/dz') +colorbar + +subplot(312); +pcolor(x,y,solz_exact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical d/dz') +colorbar + +subplot(313); +err=solz-solz_exact; +pcolor(x,y,err(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Discretization error') +colorbar + +figure +i=ceil(nx(1)/2); +plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r') +xlabel('z'); diff --git a/matlab/ppde3d_pb.m b/matlab/ppde3d_pb.m new file mode 100644 index 0000000..0702db8 --- /dev/null +++ b/matlab/ppde3d_pb.m @@ -0,0 +1,145 @@ +% +% @file ppde3d_pb.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='ppde3d_pb.h5'; +% +% Get data from data sets +% +x=hdf5read(file,'/xgrid'); +y=hdf5read(file,'/ygrid'); +z=hdf5read(file,'/zgrid'); +% +nx=size(x); +ny=size(y); +nz=size(z); +% +sol=hdf5read(file,'/sol'); +solexact=hdf5read(file,'/solana'); +solx=hdf5read(file,'/derivx'); +soly=hdf5read(file,'/derivy'); +solz=hdf5read(file,'/derivz'); +solx_exact=hdf5read(file,'/derivx_exact'); +soly_exact=hdf5read(file,'/derivy_exact'); +solz_exact=hdf5read(file,'/derivz_exact'); + +figure +k=ceil(nz(1)/2); +subplot(311); +pcolor(x,y,sol(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated solution') +colorbar + +subplot(312); +pcolor(x,y,solexact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical solution') +colorbar + +subplot(313); +err=sol-solexact; +pcolor(x,y,err(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Discretization error') +colorbar + +figure +subplot(311); +pcolor(x,y,solx(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated d/dx') +colorbar + +subplot(312); +pcolor(x,y,solx_exact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical d/dx') +colorbar + +subplot(313); +err=solx-solx_exact; +pcolor(x,y,err(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Discretization error') +colorbar + +figure +k=ceil(nz(1)/2); +subplot(311); +pcolor(x,y,soly(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated d/dy') +colorbar + +subplot(312); +pcolor(x,y,soly_exact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical d/dy') +colorbar + +subplot(313); +err=soly-soly_exact; +pcolor(x,y,err(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Discretization error') +colorbar + +figure +k=ceil(nz(1)/6); +subplot(311); +pcolor(x,y,solz(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Caculated d/dz') +colorbar + +subplot(312); +pcolor(x,y,solz_exact(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Analytical d/dz') +colorbar + +subplot(313); +err=solz-solz_exact; +pcolor(x,y,err(:,:,k)'); +shading interp +xlabel('x'); ylabel('y') +title('Discretization error') +colorbar + +figure +i=ceil(nx(1)/2); +plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r') +xlabel('z'); diff --git a/matlab/ppoisson_fd.m b/matlab/ppoisson_fd.m new file mode 100644 index 0000000..a573749 --- /dev/null +++ b/matlab/ppoisson_fd.m @@ -0,0 +1,110 @@ +% +% @file ppoisson_fd.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +if ~exist('file'), file='ppoisson_fd.h5'; end + +prb=h5readatt(file,'/','PRB'); +nx=h5readatt(file,'/','NX'); +ny=h5readatt(file,'/','NY'); +lx=h5readatt(file,'/','LX'); +ly=h5readatt(file,'/','LY'); +kx=h5readatt(file,'/','KX'); +ky=h5readatt(file,'/','KY'); +beta=h5readatt(file,'/','BETA'); +omega=h5readatt(file,'/','OMEGA'); +relax=h5readatt(file,'/','RELAX'); +nlevels=h5readatt(file,'/','LEVELS'); +mu=h5readatt(file,'/','MU'); +nu1=h5readatt(file,'/','NU1'); +nu2=h5readatt(file,'/','NU2'); +direct_solve_nits=h5readatt(file,'/','DIRECT_SOLVE_NITS'); + +title_str=sprintf(['PRB=%s, N=(%d,%d), relax=%s, V(%d,%d), LEVELS=%d, DIRECT SOLVE=%d'], ... + prb, nx, ny, relax, nu1, nu2, nlevels, direct_solve_nits); + +x = h5read(file, '/xgrid'); +y = h5read(file, '/ygrid'); +[X,Y]=meshgrid(y,x); +n1=size(x,1); +n2=size(y,1); +n=n1*n2 + +mat = stencil_mat(file, '/MAT'); +% $$$ figure +% $$$ spy(mat) + +f = h5read(file,'/f'); f1d=reshape(f,n,1); +v = h5read(file,'/v'); v1d=reshape(v,n,1); +u = h5read(file,'/u'); u1d=reshape(u,n,1); + +% $$$ udirect1d = mat\f1d; +% $$$ udirect=reshape(udirect1d,n1,n2); +% $$$ fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d)); +% $$$ fprintf('Error of direct solution = %.3e\n', norm(udirect1d- ... +% $$$ v1d)); +figure +subplot(221) +pcolor(x,y,v'-u') +xlabel('X'); ylabel('Y'); +shading interp +colorbar +title('Error on the finest grid') + +subplot(222) +[yy,iy] = max(abs(v),[],2); +[xx,ix] = max(yy); +iy0=iy(ix); +str=sprintf('Solution at y = %.4f', y(iy0)); +plot(x, v(:,iy0),x, u(:,iy0),'o') +xlabel('x'); ylabel(str); +grid on +legend('Analytic Solution', 'Computed Solution') +title(title_str) + +subplot(223) +[xx,ix] = max(abs(v)); +[yy,iy] = max(xx); +ix0=ix(iy); +str=sprintf('Solution at x = %.4f', x(ix0)); +plot(y, v(ix0,:),y, u(ix0,:),'o') +xlabel('y'); ylabel(str); +grid on + +figure +resid_it=h5read(file, '/resid'); +err_it=h5read(file, '/error'); +nits = size(resid_it,1)-1; +it=0:nits; +subplot(211) +semilogy(it,resid_it,'o-') +xlabel('Iterations') +ylabel('Residual norm') +grid on +title(title_str) +subplot(212) +semilogy(it, err_it, 'o-') +xlabel('Iterations') +ylabel('Norm of Discretization Error') +grid on diff --git a/matlab/relax.m b/matlab/relax.m new file mode 100644 index 0000000..37d3fca --- /dev/null +++ b/matlab/relax.m @@ -0,0 +1,55 @@ +% +% @file relax.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +load relax.mat + +figure +subplot(221) +semilogy(jac_1(:,1),jac_1(:,2),gs_1(:,1),gs_1(:,2)) +grid on +legend('Jacobi', 'GS') +xlabel('Iterations'); ylabel('Error') +title('NX=32, P=1') + +subplot(222) +semilogy(jac_3(:,1),jac_3(:,2),gs_3(:,1),gs_3(:,2)) +grid on +legend('Jacobi', 'GS') +xlabel('Iterations'); ylabel('Error') +title('NX=32, P=3') + +subplot(223) +semilogy(jac_1(:,1),jac_1(:,4),gs_1(:,1),gs_1(:,4)) +grid on +legend('Jacobi', 'GS') +xlabel('Iterations'); ylabel('Discretization error') +title('NX=32, P=1') + +subplot(224) +semilogy(jac_3(:,1),jac_3(:,4),gs_3(:,1),gs_3(:,4)) +grid on +legend('Jacobi', 'GS') +xlabel('Iterations'); ylabel('Discretization error') +title('NX=32, P=3') diff --git a/matlab/stencil_mat.m b/matlab/stencil_mat.m new file mode 100644 index 0000000..4283768 --- /dev/null +++ b/matlab/stencil_mat.m @@ -0,0 +1,49 @@ +% +% @file stencil_mat.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +function [mata, diag] = stencil_mat(file, dset) + id = double(h5read(file, strcat(dset,'/dists'))); + val = h5read(file, strcat(dset,'/val')); + n1 = size(val,1); + n2 = size(val,2); + n = n1*n2; + ndiag = size(val,3); + dists = id(:,1) + n1*id(:,2); + val = reshape(val,n,ndiag); + + %% Shift the off-diagonals %% + for k=1:length(dists) + d=dists(k); + if d < 0 + val(1:n+d,k) = val(1-d:n,k); + elseif d > 0 + val(n:-1:d+1,k) = val(n-d:-1:1,k); + end + end + mata = spdiags(val, dists, n,n); + if nargout == 2 + idiag = find(dists==0); + diag = val(:,idiag); + end diff --git a/matlab/tcdsmat.m b/matlab/tcdsmat.m new file mode 100644 index 0000000..5331df7 --- /dev/null +++ b/matlab/tcdsmat.m @@ -0,0 +1,91 @@ +% +% @file tcdsmat.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +mat='/MAT1'; +gbmat; +clear S gb_mat; + +file='tcdsmat.h5'; +nx=hdf5read(file,'/NX'); +ny=hdf5read(file,'/NY'); +dists=hdf5read(file,strcat(mat,'/dists')); +vals=hdf5read(file,strcat(mat,'/vals')); +rowv=hdf5read(file,strcat(mat,'/rowv')); +colh=hdf5read(file,strcat(mat,'/colh')); +n=hdf5read(file,strcat(mat,'/RANK')); +nd=hdf5read(file,strcat(mat,'/NDIAGS')); + +err=zeros(n,nd); + +% Diagonal balancing of matrix +dbal = 1./sqrt(diag(a)); +a = diag(dbal)*a*diag(dbal); + +% Check CDS mat except row ny and column ny +for k=1:nd + d=dists(k); + i1=max(1,1-d); i2=min(n,n-d); + fprintf(1,'%8d %8d %8d\n',d,i1,i2); + for i=i1:i2 + j=i+d; + if (i~=ny && j~=ny) + err(i,k) = a(i,j)-vals(i,k); + end + end +end +fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err))); + +% Check row ny and j .ne. ny +i=ny; +bw0=size(rowv,1); +for k=1:nd + d=dists(k); + j=i+d; + if ((j >= ny+1) && (j <= bw0)) + err(i,k)=a(i,j)-rowv(j); + end +end +fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err))); + + +% Check column ny +j=ny; +for k=1:nd + d=dists(k); + i=j-d; + if ((i >= ny+1) && (i <= bw0)) + err(i,k)=a(i,j)-colh(i); + end +end +fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err))); + +% Check RHS +rhs=hdf5read(file,'/RHS'); +fprintf('Err in RHS: %8.3e\n', max(max(abs(rhs-rhs0)))) + +% Check SOL +sol=hdf5read(file,'/SOL'); +err= sol-sol0; +fprintf('Err SOL: %8.3e\n', max(max(abs(err)))); diff --git a/matlab/tcdsmat_plot_sol.m b/matlab/tcdsmat_plot_sol.m new file mode 100644 index 0000000..0754929 --- /dev/null +++ b/matlab/tcdsmat_plot_sol.m @@ -0,0 +1,79 @@ +% +% @file tcdsmat_plot_sol.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='tcdsmat.h5' +% +% Get data from data sets +% +r=hdf5read(file,'/xgrid'); +t=hdf5read(file,'/ygrid'); +sol=hdf5read(file,'/sol')'; +solexact=hdf5read(file,'/solana')'; +err=hdf5read(file,'/errors')'; +% +% Attributes +% +NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY'); +NIDBAS1=hdf5read(file,'/NIDBAS1'); +NIDBAS2=hdf5read(file,'/NIDBAS2'); +MBESS=hdf5read(file,'/MBESS'); +LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ... + NIDBAS1, NIDBAS2, MBESS); + +attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data; +attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data; +attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data; +attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data; +attr=hdf5read(file,'/errors/title');err_ann=attr.Data; + +[R,T]=meshgrid(r,t); +x = R.*cos(T); y= R.*sin(T); + +figure +subplot(221) +pcolor(double(r),double(t),double(sol)); +shading interp +xlabel(x_ann); ylabel(y_ann) +title(LABEL) +colorbar + +subplot(222) +pcolor(double(x),double(y),double(sol)) +shading interp +hold off, axis image +xlabel('X'); ylabel('Y') +title('X-Y plane') +colorbar + +subplot(223) +surfc(double(x),double(y),double(sol)) +xlabel('X'); ylabel('Y'); +title(sol_ann) + +subplot(224) +surfc(double(x),double(y),double(err)) +xlabel('X'); ylabel('Y'); +title(err_ann) + diff --git a/matlab/test_intergrid.m b/matlab/test_intergrid.m new file mode 100644 index 0000000..4d89561 --- /dev/null +++ b/matlab/test_intergrid.m @@ -0,0 +1,102 @@ +% +% @file test_intergrid.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +if ~exist('file'), file='test_intergrid0.h5'; end +% +nx=h5readatt(file,'/','NX'); +ny=h5readatt(file,'/','NY'); +lx=h5readatt(file,'/','LX'); +ly=h5readatt(file,'/','LY'); +kx=h5readatt(file,'/','KX'); +ky=h5readatt(file,'/','KY'); +nlevels=h5readatt(file,'/','LEVELS'); +title_str=sprintf(['N=(%d,%d), Lx=%d, Ly=%d, LEVELS=%d, KX=%d, KY=%d'], ... + nx,ny,lx,ly,nlevels,kx,ky); + +if nlevels ~= 2 + disp 'levels should be 2!' + return +end + +% +% Prolongation matrices at the coarsest grid +% +levels=nlevels; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/matpx'); +matpx=csr_mat(file,dset); +dset=strcat(mglevel,'/matpy'); +matpy=csr_mat(file,dset); + +for l=1:2 + mglevel=sprintf('/mglevels/level.%.2d', l); + x=h5read(file,strcat(mglevel,'/x')); + y=h5read(file,strcat(mglevel,'/y')); + f=h5read(file,strcat(mglevel,'/f')); + v=h5read(file,strcat(mglevel,'/v')); + figure + subplot(221) + [yy,iy] = max(abs(f),[],2); [xx,ix] = max(yy); iy0=iy(ix); + str=sprintf('f at y = %.4f', y(iy0)); + plot(x, f(:,iy0),'o-') + xlabel('x'); ylabel(str); + grid on + title(title_str) + subplot(222) + [xx,ix] = max(abs(f)); [yy,iy] = max(xx); ix0=ix(iy); + str=sprintf('f at x = %.4f', x(ix0)); + plot(y, f(ix0,:),'o-') + xlabel('y'); ylabel(str); + grid on + title(title_str) + subplot(223) + [yy,iy] = max(abs(v),[],2); [xx,ix] = max(yy); iy0=iy(ix); + str=sprintf('v at y = %.4f', y(iy0)); + plot(x, v(:,iy0),'ro-') + xlabel('x'); ylabel(str); + grid on + title(title_str) + subplot(224) + [xx,ix] = max(abs(v)); [yy,iy] = max(xx); ix0=ix(iy); + str=sprintf('v at x = %.4f', x(ix0)); + plot(y, v(ix0,:),'ro-') + xlabel('y'); ylabel(str); + grid on + title(title_str) + if l==1 + ffine=f; vfine=v; + else + fcoarse=f; vcoarse=v; + end +end + +%% Check +err_restriction = matpx'*ffine*matpy./4 - fcoarse; +err_prolong = matpx*vcoarse*matpy' - vfine; + + + +max(max(abs(err_restriction))) +max(max(abs(err_prolong))) diff --git a/matlab/test_jacobi.m b/matlab/test_jacobi.m new file mode 100644 index 0000000..2a71117 --- /dev/null +++ b/matlab/test_jacobi.m @@ -0,0 +1,96 @@ +% +% @file test_jacobi.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +if ~exist('file'), file='test_jacobi.h5'; end + +x = h5read(file, '/xgrid'); +y = h5read(file, '/ygrid'); +[X,Y]=meshgrid(y,x); +n1=size(x,1); +n2=size(y,1); +n=n1*n2 + +mat = stencil_mat(file, '/MAT'); +% $$$ figure +% $$$ spy(mat) + +f = h5read(file,'/f'); f1d=reshape(f,n,1); +v = h5read(file,'/v'); v1d=reshape(v,n,1); +u = h5read(file,'/u'); u1d=reshape(u,n,1); +resids = h5read(file,'/resids'); +errs = h5read(file,'/errs'); + +udirect1d = mat\f1d; +udirect=reshape(udirect1d,n1,n2); +fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d)); +fprintf('Error of direct solution = %.3e\n', norm(udirect1d-v1d)); + +figure +subplot(211) +[yy,iy] = max(abs(v),[],2); +[xx,ix] = max(yy); +iy0=iy(ix); +str=sprintf('Solution at y = %.4f', y(iy0)); +plot(x, v(:,iy0),x, u(:,iy0),'o') +xlabel('x'); ylabel(str); +grid on +legend('Analytic Solution', 'Computed Solution') +subplot(212) +[xx,ix] = max(abs(v)); +[yy,iy] = max(xx); +ix0=ix(iy); +str=sprintf('Solution at x = %.4f', x(ix0)); +plot(y, v(ix0,:),y, u(ix0,:),'o') +xlabel('y'); ylabel(str); +grid on + +% $$$ figure +% $$$ subplot(321) +% $$$ surf(x,y,v'); title('Exact solution') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(322) +% $$$ surf(x,y,f'); title('RHS') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(323) +% $$$ surf(x,y,u'); title('Num. solution') +% $$$ subplot(324) +% $$$ surf(x,y,udirect'); title('Direct. solution') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(325) +% $$$ surf(x,y,resids'); title('Residuals') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(326) +% $$$ surf(x,y,errs'); title('Errors') +% $$$ xlabel('X'); ylabel('Y') + +resid_it=h5read(file, '/resid'); +err_it=h5read(file, '/error'); +nits = size(resid_it,1)-1; +it=0:nits; +figure +semilogy(it,resid_it, it, err_it) +legend('Residual norm', 'Discretization error') +xlabel('Iterations') +grid on diff --git a/matlab/test_jacobig.m b/matlab/test_jacobig.m new file mode 100644 index 0000000..a0e31d2 --- /dev/null +++ b/matlab/test_jacobig.m @@ -0,0 +1,84 @@ +% +% @file test_jacobig.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +if ~exist('file'), file='test_jacobig.h5'; end + +x = h5read(file, '/xgrid'); +y = h5read(file, '/ygrid'); +[X,Y]=meshgrid(y,x); +n1=size(x,1); +n2=size(y,1); +n=n1*n2 + +mat = stencil_mat(file, '/MAT'); +% $$$ figure +% $$$ spy(mat) + +f = h5read(file,'/f'); f1d=reshape(f,n,1); +v = h5read(file,'/v'); v1d=reshape(v,n,1); +u = h5read(file,'/u'); u1d=reshape(u,n,1); + +udirect1d = mat\f1d; +udirect=reshape(udirect1d,n1,n2); +fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d)); +fprintf('Error of direct solution = %.3e\n', norm(udirect1d- ... + v1d)); +figure +subplot(221) +pcolor(x,y,v'-u') +xlabel('X'); ylabel('Y'); +shading interp +colorbar +title('Error') + +subplot(223) +[yy,iy] = max(abs(v),[],2); +[xx,ix] = max(yy); +iy0=iy(ix); +str=sprintf('Solution at y = %.4f', y(iy0)); +plot(x, v(:,iy0),x, u(:,iy0),'o') +xlabel('x'); ylabel(str); +grid on +legend('Analytic Solution', 'Computed Solution') + +subplot(224) +[xx,ix] = max(abs(v)); +[yy,iy] = max(xx); +ix0=ix(iy); +str=sprintf('Solution at x = %.4f', x(ix0)); +plot(y, v(ix0,:),y, u(ix0,:),'o') +xlabel('y'); ylabel(str); +grid on + +resid_it=h5read(file, '/resid'); +err_it=h5read(file, '/error'); +nits = size(resid_it,1)-1; +it=0:nits; + +subplot(222) +semilogy(it,resid_it, it, err_it) +legend('Residual norm', 'Discretization error') +xlabel('Iterations') +grid on diff --git a/matlab/test_mg.m b/matlab/test_mg.m new file mode 100644 index 0000000..d0ed10b --- /dev/null +++ b/matlab/test_mg.m @@ -0,0 +1,164 @@ +% +% @file test_mg.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='test_mg.h5'; +% +nx=h5readatt(file,'/','NX'); +nidbas=h5readatt(file,'/','NIDBAS'); +relax=h5readatt(file,'/','RELAX'); +levels=h5readatt(file,'/','LEVELS'); +nu1=h5readatt(file,'/','NU1'); +nu2=h5readatt(file,'/','NU2'); +alpha=h5readatt(file,'/','ALPHA'); +omega=h5readatt(file,'/','OMEGA'); + +if alpha == 0 + kx=h5readatt(file,'/','KMODE'); + title_str=sprintf('N=%d, NIDBAS=%d, KX=%d, relax=%s, omega=%.3f, levels = %d, nu1 = %d, nu2 = %d', ... + nx,nidbas,kx,relax, omega, levels, nu1, nu2); +else + modem=h5readatt(file,'/','MODEM'); + modep=h5readatt(file,'/','MODEP'); + title_str=sprintf('N=%d, NIDBAS=%d, modem=%d, modep=%d, relax=%s, omega=%.3f, levels = %d, nu1 = %d, nu2 = %d', ... + nx,nidbas,modem,modep,relax, omega, levels, nu1, nu2); +end + + +% +% Read matrices at coarset grid +% +for lev=2:levels + % + % FE mat at fine grid + mglevel=sprintf('/mglevels/level.%.2d', lev-1); + dset=strcat(mglevel,'/mata'); + mata_f = gb_mat(file, dset); + + % + % FE mat at coarse grid + mglevel=sprintf('/mglevels/level.%.2d', lev); + dset=strcat(mglevel,'/mata'); + mata_c = gb_mat(file, dset); + + % + % Prolong mat + dset=strcat(mglevel,'/matp'); + matp=h5read(file,dset); + % + % Check + fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ... + - mata_c)) +end +% +% Iterations +dset='/Iterations/'; +err=h5read(file, strcat(dset,'errors')); +disc_err=h5read(file, strcat(dset,'disc_errors')); +resid=h5read(file, strcat(dset,'residues')); +its=0:1:size(err,1)-1; + +figure +subplot(212) +semilogy(its,resid,'o-', its, disc_err,'h-') +legend('Residue', 'Error') +grid on +xlabel('Iterations'); ylabel('Norm od residue and error'); +title(title_str); + +% +% Plot grid values at the last iteration +xgrid=h5read(file, '/Iterations/xgrid'); +u_calc=h5read(file, '/Iterations/u_calc'); +u_exact=h5read(file, '/Iterations/u_exact'); +u_direct=h5read(file, '/Iterations/u_direct'); +subplot(211) +plot(xgrid, u_exact, xgrid,u_calc,'o') +legend('Analytic', 'Calculated') +xlabel('X');ylabel('Grid values of solution') +grid on +title(title_str); + +% +% +% $$$ mglevel=sprintf('/mglevels/level.%.2d', 1); +% $$$ dset=strcat(mglevel,'/mata'); +% $$$ A = gb_mat(file, dset); +% $$$ D = diag(diag(A),0); +% $$$ n=rank(A); +% $$$ k=1:1:n; +% $$$ if relax(1:2) == 'ja' +% $$$ % +% $$$ % Compute eigenvalues of Rj = D^(-1)*A +% $$$ % +% $$$ [V, l] = eig(A,D); +% $$$ [lambda, iss] = sort(diag(l)); +% $$$ V = V(1:end,iss); +% $$$ % +% $$$ % Spectral radius of Jacobi iteration matrix +% $$$ % R(omega) = max |1-omega*lambda| +% $$$ % +% $$$ om=0:0.01:1; +% $$$ for i=1:size(om,2) +% $$$ rho(i) = max(abs(1-om(i).*lambda)); +% $$$ end +% $$$ fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(1-omega*lambda))) +% $$$ +% $$$ figure +% $$$ subplot(211) +% $$$ plot(k, 1-omega*lambda, 'o-') +% $$$ xlabel('mode k'); ylabel('Eigen value of inv(D)*A') +% $$$ grid on +% $$$ title(title_str) +% $$$ subplot(212) +% $$$ plot(om, rho) +% $$$ xlabel('\omega'); ylabel('Spectral Radius') +% $$$ grid on +% $$$ +% $$$ for i=1:n +% $$$ k=mod(i-1,4*5)+1; +% $$$ if k==1 +% $$$ figure +% $$$ title(title_str) +% $$$ end +% $$$ subplot(4,5,k) +% $$$ str = sprintf('Mode = %d, ||R|| = %.3f', i, 1-omega*lambda(i)); +% $$$ plot(V(:,i)); grid on +% $$$ title(str) +% $$$ end +% $$$ elseif relax(1:2) == 'gs' +% $$$ % +% $$$ % Spectral radius of GS Iteration Matrix +% $$$ % Rg = (D-L)^(-1) * U +% $$$ % +% $$$ B = tril(A,0); % D-L +% $$$ lambda = eig(-triu(A,1),B); +% $$$ fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda))) +% $$$ figure +% $$$ plot(lambda, 'o', 'MarkerSize', 6) +% $$$ axis equal +% $$$ grid on +% $$$ xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues') +% $$$ title(title_str) +% $$$ end diff --git a/matlab/test_mg2d.m b/matlab/test_mg2d.m new file mode 100644 index 0000000..cac6b9d --- /dev/null +++ b/matlab/test_mg2d.m @@ -0,0 +1,102 @@ +% +% @file test_mg2d.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='test_mg2d.h5'; +% +nx=h5readatt(file,'/','NX'); +ny=h5readatt(file,'/','NY'); +kx=h5readatt(file,'/','KX'); +ky=h5readatt(file,'/','KY'); +nidbas1=h5readatt(file,'/','NIDBAS1'); +nidbas2=h5readatt(file,'/','NIDBAS2'); +relax=h5readatt(file,'/','RELAX'); +nlevels=h5readatt(file,'/','LEVELS'); +nu1=h5readatt(file,'/','NU1'); +nu2=h5readatt(file,'/','NU2'); +mu=h5readatt(file,'/','MU'); +title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ... + 'nu2=%d, mu=%d, LEVELS=%d, KX=%d, KY=%d'], ... + nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,kx,ky); + + +% +% Prolongation matrices at the coarsest grid +% +levels=nlevels; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/matpx'); +matpx=csr_mat(file,dset); +dset=strcat(mglevel,'/matpy'); +matpy=csr_mat(file,dset); + +% +% FE matrix at the finest grid +% +levels=1; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/mata'); +[mata,diag]=csr_mat(file,dset); + +f=h5read(file,strcat(mglevel,'/f')); +v=h5read(file,strcat(mglevel,'/v')); +f1d=h5read(file,strcat(mglevel,'/f1d')); +v1d=h5read(file,strcat(mglevel,'/v1d')); +% $$$ figure +% $$$ spy(mata) + +% +% Solutions at the finest grid +% +x=h5read(file,'/solutions/xg'); +y=h5read(file,'/solutions/yg'); +sol_anal=h5read(file,'/solutions/anal'); +sol_calc=h5read(file,'/solutions/calc'); +% $$$ figure +% $$$ subplot(211) +% $$$ surf(x,y,sol_anal') +% $$$ xlabel('X'); ylabel('Y'); +% $$$ title('Analytical solution on the finest grid') +% $$$ subplot(212) +% $$$ surf(x,y,sol_calc') +% $$$ xlabel('X'); ylabel('Y'); +% $$$ title('Calculated solution on the finest grid') + +% +% Iterations +% +dset='/Iterations/'; +disc_err=h5read(file, strcat(dset,'disc_errors')); +resid=h5read(file, strcat(dset,'residues')); +its=0:1:size(resid,1)-1; +figure +subplot(211) +semilogy(its,resid,'o-') +grid on +xlabel('Iterations'); ylabel('Norm of residue'); +title(title_str); +subplot(212) +semilogy(its,disc_err,'h-') +grid on +xlabel('Iterations'); ylabel('Norm of error'); diff --git a/matlab/test_mg2d_cyl.m b/matlab/test_mg2d_cyl.m new file mode 100644 index 0000000..3258da6 --- /dev/null +++ b/matlab/test_mg2d_cyl.m @@ -0,0 +1,141 @@ +% +% @file test_mg2d_cyl.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='test_mg2d_cyl.h5'; +% +nx=h5readatt(file,'/','NX'); +ny=h5readatt(file,'/','NY'); +nidbas1=h5readatt(file,'/','NIDBAS1'); +nidbas2=h5readatt(file,'/','NIDBAS2'); +modem=h5readatt(file,'/','MODEM'); +modep=h5readatt(file,'/','MODEP'); +relax=h5readatt(file,'/','RELAX'); +nlevels=h5readatt(file,'/','LEVELS'); +nu1=h5readatt(file,'/','NU1'); +nu2=h5readatt(file,'/','NU2'); +mu=h5readatt(file,'/','MU'); +title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ... + 'nu2=%d, mu=%d, LEVELS=%d, MODEM=%d, MODEP=%d'], ... + nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,modem,modep); + + +% +% Prolongation matrices at the coarsest grid +% +levels=nlevels; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/matpx'); +matpx=csr_mat(file,dset); +dset=strcat(mglevel,'/matpy'); +matpy=csr_mat(file,dset); + +% +% FE matrix at the finest grid +% +levels=1; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/mata'); +[mata,diag]=csr_mat(file,dset); + +f=h5read(file,strcat(mglevel,'/f')); +v=h5read(file,strcat(mglevel,'/v')); +f1d=h5read(file,strcat(mglevel,'/f1d')); +v1d=h5read(file,strcat(mglevel,'/v1d')); +% $$$ figure +% $$$ spy(mata) + +% +% Solutions at the finest grid +% +x=h5read(file,'/solutions/xg'); +y=h5read(file,'/solutions/yg'); +sol_anal=h5read(file,'/solutions/anal'); +sol_calc=h5read(file,'/solutions/calc'); +sol_direct=h5read(file,'/solutions/direct'); +nx=int32(size(x,1)); +ny=int32(size(y,1)); + +figure +subplot(211) +surf(x,y,sol_anal') +xlabel('X'); ylabel('Y'); +title('Analytical solution on the finest grid') +subplot(212) +surf(x,y,sol_calc') +% $$$ surf(x,y, (abs(sol_calc'-sol_anal'))) +xlabel('X'); ylabel('Y'); +title('Calculated solution on the finest grid') + +figure +subplot(211) +plot(x, sol_anal(:,ny/2),x, sol_calc(:,ny/2),'o') +xlabel('r'); +grid on +legend('Analytic Solution', 'MG Solution') +title(title_str) +subplot(212) +if modem == 0 + plot(y, sol_anal(1,:),y, sol_calc(1,:),'o') +else + plot(y, sol_anal(nx/2,:),y, sol_calc(nx/2,:),'o') +end +xlabel('\theta'); +grid on +title(title_str) + +% $$$ figure +% $$$ subplot(211) +% $$$ semilogy(x, abs(sol_anal(:,ny/2)-sol_calc(:,ny/2)),'o') +% $$$ xlabel('r'); ylabel('Error') +% $$$ grid on +% $$$ title(title_str) +% $$$ subplot(212) +% $$$ if modem == 0 +% $$$ semilogy(y, abs(sol_anal(1,:)-sol_calc(1,:)),'o') +% $$$ else +% $$$ semilogy(y, abs(sol_anal(nx/2,:)-sol_calc(nx/2,:)),'o') +% $$$ end +% $$$ xlabel('\theta'); ylabel('Error'); +% $$$ grid on +% $$$ title(title_str) +% $$$ + +% +% Iterations +% +dset='/Iterations/'; +disc_err=h5read(file, strcat(dset,'disc_errors')); +resid=h5read(file, strcat(dset,'residues')); +its=0:1:size(resid,1)-1; +figure +subplot(211) +semilogy(its,resid,'o-') +grid on +xlabel('Iterations'); ylabel('Norm of residue'); +title(title_str); +subplot(212) +semilogy(its,disc_err,'h-') +grid on +xlabel('Iterations'); ylabel('Norm of error'); diff --git a/matlab/test_mgp.m b/matlab/test_mgp.m new file mode 100644 index 0000000..4f5fb1b --- /dev/null +++ b/matlab/test_mgp.m @@ -0,0 +1,98 @@ +% +% @file test_mgp.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='test_mgp.h5'; +% +nx=h5readatt(file,'/','NX'); +nidbas=h5readatt(file,'/','NIDBAS'); +relax=h5readatt(file,'/','RELAX'); +levels=h5readatt(file,'/','LEVELS'); +nu1=h5readatt(file,'/','NU1'); +nu2=h5readatt(file,'/','NU2'); +title_str=sprintf('NX = %d, NIDBAS = %d, levels = %d, nu1 = %d, nu2 = %d', nx, nidbas, levels, nu1, nu2); +% +% Read matrices at coarset grid +% +for lev=2:levels +% +% FE mat at fine grid +mglevel=sprintf('/mglevels/level.%.2d', lev-1); +dset=strcat(mglevel,'/mata'); +mata_f=h5read(file,dset); +n=size(mata_f,1); +% +% FE mat at coarse grid +mglevel=sprintf('/mglevels/level.%.2d', lev); +dset=strcat(mglevel,'/mata'); +mata_c=h5read(file,dset); +n=size(mata_c,1); +% +% Prolong mat +dset=strcat(mglevel,'/matp'); +matp=h5read(file,dset); +% +% Check +fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ... + - mata_c)) +end +% +% Iterations +dset='/Iterations/'; +err=h5read(file, strcat(dset,'errors')); +disc_err=h5read(file, strcat(dset,'disc_errors')); +resid=h5read(file, strcat(dset,'residues')); +its=0:1:size(err,1)-1; + +figure +subplot(221) +semilogy(its,resid,'o-', its, disc_err,'h-') +legend('Residue', 'Error') +grid on +xlabel('Iterations'); ylabel('Norm od residue and error'); +title(title_str); + +% +% Plot grid values at the last iteration +xgrid=h5read(file, '/Iterations/xgrid'); +u_calc=h5read(file, '/Iterations/u_calc'); +u_exact=h5read(file, '/Iterations/u_exact'); +u_direct=h5read(file, '/Iterations/u_direct'); +subplot(222) +plot(xgrid,u_exact, xgrid,u_calc,'o') +xlabel('X');ylabel('Grid values of solution') +grid on +title(title_str); + +subplot(223) +semilogy(xgrid,abs(u_calc-u_direct)) +xlabel('X');ylabel('Diff with direct solution') +grid on +title(title_str); + +subplot(224) +semilogy(xgrid,abs(u_calc-u_exact)) +xlabel('X');ylabel('Diff with exact solution') +grid on +title(title_str); diff --git a/matlab/test_relax.m b/matlab/test_relax.m new file mode 100644 index 0000000..938b605 --- /dev/null +++ b/matlab/test_relax.m @@ -0,0 +1,157 @@ +% +% @file test_relax.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='test_relax.h5'; +% +nx=h5readatt(file,'/','NX'); +alpha=h5readatt(file,'/','ALPHA'); +nidbas=h5readatt(file,'/','NIDBAS'); +relax=h5readatt(file,'/','RELAX'); +omega=h5readatt(file,'/','OMEGA'); +if alpha == 0 + kx=h5readatt(file,'/','KX'); + title_str=sprintf('N=%d, NIDBAS=%d, KX=%d, relax=%s, omega=%.3f', ... + nx,nidbas,kx,relax, omega); +else + modem=h5readatt(file,'/','MODEM'); + modep=h5readatt(file,'/','MODEP'); + title_str=sprintf('N=%d, NIDBAS=%d, modem=%d, modep=%d, relax=%s, omega=%.3f', ... + nx,nidbas,modem,modep,relax, omega); +end + + +% +% Solutions at the finest grid +% +x=h5read(file,'/solutions/xg'); +sol_direct=h5read(file,'/solutions/direct'); +sol_anal=h5read(file,'/solutions/anal'); +sol_calc=h5read(file,'/solutions/calc'); +figure +subplot(211) +plot(x, sol_anal, x, sol_calc, 'o') +legend('Analytic', 'Calculated') +xlabel('X') +grid on +title(title_str); + +% +% Relaxations +% +errdisc=h5read(file,'/relaxation/errdisc'); +resid=h5read(file,'/relaxation/resid'); +its=0:1:size(errdisc)-1; +subplot(212) +semilogy(its,errdisc,its,resid) +legend('Discretisation Error', 'Residual Norm') +xlabel('Iterations') +grid on +title(title_str) + +% +% FE Matrix +% +dset = '/MATA/'; +A = gb_mat(file, dset); +D = diag(diag(A),0); +n=rank(A); +k=1:1:n; +if relax(1:2) == 'ja' +% +% Compute eigenvalues of Rj = D^(-1)*A +% + [V, l] = eig(A,D); + [lambda, iss] = sort(diag(l)); + V = V(1:end,iss); +% +% Spectral radius of Jacobi iteration matrix +% R(omega) = max |1-omega*lambda| +% + om=0:0.01:1; + for i=1:size(om,2) + rho(i) = max(abs(1-om(i).*lambda)); + end + fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(1-omega*lambda))) + + figure + subplot(211) + plot(k, 1-omega*lambda, 'o-') + xlabel('mode k'); ylabel('Eigen value of inv(D)*A') + grid on + title(title_str) + subplot(212) + plot(om, rho) + omega_c = 2.0/max(lambda); + str = sprintf('Critical omega = %.3f', omega_c) + title(str) + xlabel('\omega'); ylabel('Spectral Radius') + grid on + +elseif relax(1:2) == 'gs' +% +% Spectral radius of GS Iteration Matrix +% Rg = -(D+L)^(-1) * U +% + B = tril(A,0); % D+L +% $$$ [V, l] = eig(-triu(A,1),B); lambda=diag(l); +% $$$ [V, l] = eig(B,A); lambda = 1 - 1./diag(l); + [V, l] = eig(A,B); lambda = 1 - diag(l); + [lambda, iss] = sort(lambda, 'descend'); + V = V(1:end,iss); + + fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda))) + figure + subplot(211) + plot(real(lambda), imag(lambda), 'o') + xlabel('Real of eigenvalues'); ylabel(['Imag of ' ... + 'eigenvalues']) + axis equal + title(title_str) + grid on + subplot(212) + plot(k, abs(lambda), 'o-') + xlabel('Mode'); ylabel('eigenvalues') + grid on +end +% +% Plot eigenvectors +neig=size(lambda,1); +for i=1:neig + k=mod(i-1,4*5)+1; + if k==1 + figure + title(title_str) + end + subplot(4,5,k) + if relax(1:2) == 'ja' + str = sprintf('Mode = %d, ||R|| = %.4f', i, 1-omega* ... + lambda(i)); + else + str = sprintf('Mode = %d, ||R|| = %.4f', i, ... + lambda(i)); + end + plot(V(:,i)); grid on + title(str) +end diff --git a/matlab/test_relax2d.m b/matlab/test_relax2d.m new file mode 100644 index 0000000..3e5f778 --- /dev/null +++ b/matlab/test_relax2d.m @@ -0,0 +1,86 @@ +% +% @file test_relax2d.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='test_relax2d.h5'; +% +nx=h5readatt(file,'/','NX'); +ny=h5readatt(file,'/','NY'); +kx=h5readatt(file,'/','KX'); +ky=h5readatt(file,'/','KY'); +nidbas1=h5readatt(file,'/','NIDBAS1'); +nidbas2=h5readatt(file,'/','NIDBAS2'); +levels=h5readatt(file,'/','LEVELS'); +relax=h5readatt(file,'/','RELAX'); +title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), KX=%d, KY=%d, relax=%s', ... + nx,ny,nidbas1,nidbas2,kx,ky,relax); + + +% +% Prolongation matrices at the coarsest grid +% +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/matpx'); +matpx=csr_mat(file,dset); +dset=strcat(mglevel,'/matpy'); +matpy=csr_mat(file,dset); + +% +% FE matrix at the finest grid +% +levels=1; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/mata'); +[mata,diag]=csr_mat(file,dset); + +x=h5read(file,strcat(mglevel,'/x')); +y=h5read(file,strcat(mglevel,'/y')); +f=h5read(file,strcat(mglevel,'/f')); +v=h5read(file,strcat(mglevel,'/v')); +f1d=h5read(file,strcat(mglevel,'/f1d')); +v1d=h5read(file,strcat(mglevel,'/v1d')); +figure +spy(mata) + +% +% Solutions at te finest grid +% +sol_direct=h5read(file,'/solutions/direct'); +sol_anal=h5read(file,'/solutions/anal'); +figure +surf(x,y,sol_direct') +xlabel('X'); ylabel('Y'); + +% +% Relaxations +% +errdisc=h5read(file,'/relaxation/errdisc'); +resid=h5read(file,'/relaxation/resid'); +its=0:1:size(errdisc)-1; +figure +semilogy(its,errdisc,its,resid) +legend('Discretisation Error', 'Residual Norm') +xlabel('Iterations') +grid on +title(title_str) diff --git a/matlab/test_relax2d_cyl.m b/matlab/test_relax2d_cyl.m new file mode 100644 index 0000000..e01c146 --- /dev/null +++ b/matlab/test_relax2d_cyl.m @@ -0,0 +1,183 @@ +% +% @file test_relax2d_cyl.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='test_relax2d_cyl.h5'; +% +nx=h5readatt(file,'/','NX'); +ny=h5readatt(file,'/','NY'); +modem=h5readatt(file,'/','MODEM'); +modep=h5readatt(file,'/','MODEP'); +nidbas1=h5readatt(file,'/','NIDBAS1'); +nidbas2=h5readatt(file,'/','NIDBAS2'); +levels=h5readatt(file,'/','LEVELS'); +omega=h5readatt(file,'/','OMEGA'); +relax=h5readatt(file,'/','RELAX'); +if relax(1:2) == 'ja' + title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d, relax=%s, omega=%.3f', ... + nx,ny,nidbas1,nidbas2,modem,modep,relax, omega); +else + title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d, relax=%s,', ... + nx,ny,nidbas1,nidbas2,modem,modep,relax); +end + +% +% Prolongation matrices at the coarsest grid +% +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/matpx'); +matpx=csr_mat(file,dset); +dset=strcat(mglevel,'/matpy'); +matpy=csr_mat(file,dset); + +% +% Solutions at the finest grid +% +mglevel=sprintf('/mglevels/level.%.2d', 1); +x=h5read(file,strcat(mglevel,'/x')); +y=h5read(file,strcat(mglevel,'/y')); +f=h5read(file,strcat(mglevel,'/f')); +v=h5read(file,strcat(mglevel,'/v')); +f1d=h5read(file,strcat(mglevel,'/f1d')); +v1d=h5read(file,strcat(mglevel,'/v1d')); +sol_direct=h5read(file,'/solutions/direct'); +sol_relax=h5read(file,'/solutions/relax'); +sol_anal=h5read(file,'/solutions/anal'); + +% $$$ figure +% $$$ surf(x,y,sol_direct') +% $$$ xlabel('r'); ylabel('\theta'); +% $$$ title(title_str) + +% $$$ figure +% $$$ subplot(211) +% $$$ plot(x, sol_anal(:,ny/2),x, sol_direct(:,ny/2),'o') +% $$$ xlabel('r'); ylabel('Direct solution') +% $$$ grid on +% $$$ title(title_str) +% $$$ subplot(212) +% $$$ if modem == 0 +% $$$ plot(x, sol_anal(1,:),x, sol_direct(1,:),'o') +% $$$ else +% $$$ plot(x, sol_anal(nx/2,:),x, sol_direct(nx/2,:),'o') +% $$$ end +% $$$ xlabel('\theta'); ylabel('Direct solution') +% $$$ grid on +% $$$ title(title_str) + +% +% Relaxations +% +errdisc=h5read(file,'/relaxation/errdisc'); +resid=h5read(file,'/relaxation/resid'); +its=0:1:size(errdisc)-1; +figure +semilogy(its,errdisc,its,resid) +legend('Discretisation Error', 'Residual Norm') +xlabel('Iterations') +grid on +title(title_str) + +% $$$ figure +% $$$ subplot(211) +% $$$ plot(x, sol_anal(:,ny/2),x, sol_relax(:,ny/2),'o') +% $$$ xlabel('r'); ; ylabel('Relaxed solution') +% $$$ grid on +% $$$ title(title_str) +% $$$ subplot(212) +% $$$ if modem == 0 +% $$$ plot(x, sol_anal(1,:),x, sol_relax(1,:),'o') +% $$$ else +% $$$ plot(x, sol_anal(nx/2,:),x, sol_relax(nx/2,:),'o') +% $$$ end +% $$$ xlabel('\theta'); ylabel('Relaxed solution') +% $$$ grid on +% $$$ title(title_str) + +figure +subplot(211) +plot(x, sol_anal(:,ny/2),x, sol_direct(:,ny/2),'o') +xlabel('r'); ; ylabel('Direct solution') +grid on +title(title_str) +subplot(212) +plot(x, sol_anal(1,:),x, sol_direct(1,:),'o') +xlabel('\theta'); ylabel('Direct solution at the axis') +grid on +title(title_str) + +% +% FE matrix at the finest grid +% +levels=1; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/mata'); +[mata,diag]=csr_mat(file,dset); +n=size(diag,1); +% $$$ figure +% $$$ spy(mata) +% $$$ title(title_str) + +if relax(1:2) == 'ja' + % + % Eigenvalues of inv(D)*A + % + matd=spdiags(diag,0,n,n); + lambda = eigs(mata, matd, n); + om=0:0.01:0.8; + clear rho; + for i=1:size(om,2) + rho(i) = max(abs(1-om(i).*lambda)); + end + figure + subplot(211) + plot(lambda,'o') + ylabel('Eigenvalue of inv(D)*A') + grid on + title(title_str) + subplot(212) + plot(om,rho) + xlabel('\omega'); ylabel('\rho') + grid on + omega_c = 2.0/max(lambda); + lambda_min = eigs(mata, matd, 1, 'SM'); + fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', abs(1-omega*lambda_min)) + str = sprintf('Critical omega = %.3f', omega_c) + title(str) +else +% +% Spectral radius of GS Iteration Matrix +% Rg = -(D+L)^(-1) * U +% + matl= tril(mata,0); % D+L + lambda = eigs(-triu(mata,1),matl); + fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda))) + figure + plot(lambda, 'o') + axis equal + grid on + xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues') + title(title_str) +end + diff --git a/matlab/test_stencil.m b/matlab/test_stencil.m new file mode 100644 index 0000000..db11cc9 --- /dev/null +++ b/matlab/test_stencil.m @@ -0,0 +1,62 @@ +% +% @file test_stencil.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +if ~exist('file'), file='test_stencil.h5'; end + +x = h5read(file, '/xgrid'); +y = h5read(file, '/ygrid'); +b1 = h5read(file, '/barr1'); +b2 = h5read(file, '/barr2'); +b3 = h5read(file, '/barr3'); +n1=size(x,1); +n2=size(y,1); +n=n1*n2 + +mat = stencil_mat(file, '/MAT'); +% $$$ figure +% $$$ spy(mat) + +fprintf('||B1|| = %e\n', norm(reshape(b1,n,1))); +fprintf('||B2|| = %e\n', norm(reshape(b2,n,1))); +fprintf('||B3|| = %e\n', norm(reshape(b3,n,1))); + +% $$$ figure +% $$$ subplot(321) +% $$$ surf(x,y,v'); title('Exact solution') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(322) +% $$$ surf(x,y,f'); title('RHS') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(323) +% $$$ surf(x,y,u'); title('Num. solution') +% $$$ subplot(324) +% $$$ surf(x,y,udirect'); title('Direct. solution') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(325) +% $$$ surf(x,y,resids'); title('Residuals') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(326) +% $$$ surf(x,y,errs'); title('Errors') +% $$$ xlabel('X'); ylabel('Y') diff --git a/matlab/test_stencilg.m b/matlab/test_stencilg.m new file mode 100644 index 0000000..fb7dccd --- /dev/null +++ b/matlab/test_stencilg.m @@ -0,0 +1,63 @@ +% +% @file test_stencilg.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +if ~exist('file'), file='test_stencilg.h5'; end + +x = h5read(file, '/xgrid'); +y = h5read(file, '/ygrid'); +a = h5read(file, '/arr'); +b1 = h5read(file, '/barr1'); +b2 = h5read(file, '/barr2'); +b3 = h5read(file, '/barr3'); +n1=size(x,1); +n2=size(y,1); +n=n1*n2 + +mat = stencil_mat(file, '/MAT'); +% $$$ figure +% $$$ spy(mat) + +fprintf('||B1|| = %e\n', norm(reshape(b1,n,1))); +fprintf('||B2|| = %e\n', norm(reshape(b2,n,1))); +fprintf('||B3|| = %e\n', norm(reshape(b3,n,1))); + +% $$$ figure +% $$$ subplot(321) +% $$$ surf(x,y,v'); title('Exact solution') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(322) +% $$$ surf(x,y,f'); title('RHS') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(323) +% $$$ surf(x,y,u'); title('Num. solution') +% $$$ subplot(324) +% $$$ surf(x,y,udirect'); title('Direct. solution') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(325) +% $$$ surf(x,y,resids'); title('Residuals') +% $$$ xlabel('X'); ylabel('Y') +% $$$ subplot(326) +% $$$ surf(x,y,errs'); title('Errors') +% $$$ xlabel('X'); ylabel('Y') diff --git a/matlab/test_transf2d.m b/matlab/test_transf2d.m new file mode 100644 index 0000000..506c5ad --- /dev/null +++ b/matlab/test_transf2d.m @@ -0,0 +1,105 @@ +% +% @file test_transf2d.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='test_transf2d.h5'; +% +nx(1)=h5readatt(file,'/','NX'); +ny(1)=h5readatt(file,'/','NY'); +kx=h5readatt(file,'/','KX'); +ky=h5readatt(file,'/','KY'); +nidbas1=h5readatt(file,'/','NIDBAS1'); +nidbas2=h5readatt(file,'/','NIDBAS2'); +nlevels=h5readatt(file,'/','LEVELS'); +title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), KX=%d, KY=%d', ... + nx(1),ny(1),nidbas1,nidbas2,kx,ky); +% +% Grid sizes on each levels +% +for l=2:nlevels + nx(l) = nx(l-1)/2; + ny(l) = ny(l-1)/2; +end +% +% Prolongation matrices at the coarsest grid +% +levels=nlevels; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/matpx'); +matpx=csr_mat(file,dset); +dset=strcat(mglevel,'/matpy'); +matpy=csr_mat(file,dset); + +% +% FE matrix at the finest grid +% +levels=1; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/mata'); +[mata,diag]=csr_mat(file,dset); + +x=h5read(file,strcat(mglevel,'/x')); +y=h5read(file,strcat(mglevel,'/y')); +f=h5read(file,strcat(mglevel,'/f')); +v=h5read(file,strcat(mglevel,'/v')); +f1d=h5read(file,strcat(mglevel,'/f1d')); +v1d=h5read(file,strcat(mglevel,'/v1d')); +figure +spy(mata) + +% +% Solutions at the finest grid +% +sol_direct=h5read(file,'/solutions/direct'); +sol_anal=h5read(file,'/solutions/anal'); +vfine=h5read(file,'/solutions/vfine'); +figure +subplot(211) +surf(x,y,sol_direct') +xlabel('X'); ylabel('Y'); +title('Direct Solve on the finest grid') +subplot(212) +surf(x,y,vfine'-sol_direct') +xlabel('X'); ylabel('Y'); +title('Prolongation solution on the finest grid') +% +% Errors +% +errdisc = h5read(file,'/errors/errdisc'); +resid = h5read(file,'/errors/resid'); +restrict = h5read(file,'/errors/restrict'); +prolong = h5read(file,'/errors/prolong'); +errdisc_prolong = h5read(file,'/errors/disc_err_prolong'); +figure +subplot(211) +loglog(nx, errdisc, 'o-', nx(1:end-1), errdisc_prolong, 'h-') +grid on; +legend('Direct Solution', 'Prolonged Solution') +xlabel('N'); ylabel('Discretization Errors') +title(title_str); +subplot(212) +loglog(nx(2:end), restrict, 'o-', nx(1:end-1), prolong, 'h-') +grid on; +legend('Restricted RHS', 'Prolonged Solution') +xlabel('N'); ylabel('Discretization Errors') diff --git a/matlab/test_transf2d_cyl.m b/matlab/test_transf2d_cyl.m new file mode 100644 index 0000000..1466f02 --- /dev/null +++ b/matlab/test_transf2d_cyl.m @@ -0,0 +1,117 @@ +% +% @file test_transf2d_cyl.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='test_transf2d_cyl.h5'; +% +nx(1)=h5readatt(file,'/','NX'); +ny(1)=h5readatt(file,'/','NY'); +modem=h5readatt(file,'/','MODEM'); +modep=h5readatt(file,'/','MODEP'); +nidbas1=h5readatt(file,'/','NIDBAS1'); +nidbas2=h5readatt(file,'/','NIDBAS2'); +nlevels=h5readatt(file,'/','LEVELS'); +title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d', ... + nx(1),ny(1),nidbas1,nidbas2,modem,modep); +% +% Grid sizes on each levels +% +for l=2:nlevels + nx(l) = nx(l-1)/2; + ny(l) = ny(l-1)/2; +end +% +% Prolongation matrices at the coarsest grid +% +levels=nlevels; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/matpx'); +matpx=csr_mat(file,dset); +dset=strcat(mglevel,'/matpy'); +matpy=csr_mat(file,dset); + +% +% FE matrix at the finest grid +% +levels=1; +mglevel=sprintf('/mglevels/level.%.2d', levels); +dset=strcat(mglevel,'/mata'); +[mata,diag]=csr_mat(file,dset); + +x=h5read(file,strcat(mglevel,'/x')); +y=h5read(file,strcat(mglevel,'/y')); +f=h5read(file,strcat(mglevel,'/f')); +v=h5read(file,strcat(mglevel,'/v')); +f1d=h5read(file,strcat(mglevel,'/f1d')); +v1d=h5read(file,strcat(mglevel,'/v1d')); +% $$$ figure +% $$$ spy(mata) + +% +% Solutions at the finest grid +% +sol_direct=h5read(file,'/solutions/direct'); +sol_anal=h5read(file,'/solutions/anal'); +vfine=h5read(file,'/solutions/vfine'); +figure +subplot(211) +surf(x,y,sol_direct') +xlabel('X'); ylabel('Y'); +title('Direct Solve on the finest grid') +subplot(212) +surf(x,y,vfine') +xlabel('X'); ylabel('Y'); +title('Prolongation solution on the finest grid') + +figure +subplot(311) +plot(x, sol_direct(:,ny(1)/2),x, vfine(:,ny(1)/2),'o') +xlabel('r'); +grid on +legend('Direct Solution', 'Prolonged Solution') +title(title_str) +subplot(313) +plot(y, sol_direct(1,:),y, vfine(1,:),'o') +xlabel('\theta'); ylabel('On axis') +grid on +title(title_str) +subplot(312) +plot(y, sol_direct(nx(1)/2,:),y, vfine(nx(1)/2,:),'o') +xlabel('\theta'); ylabel('Off axis') +grid on +title(title_str) +% +% Errors +% +errdisc = h5read(file,'/errors/errdisc'); +resid = h5read(file,'/errors/resid'); +restrict = h5read(file,'/errors/restrict'); +prolong = h5read(file,'/errors/prolong'); +errdisc_prolong = h5read(file,'/errors/disc_err_prolong'); +figure +loglog(nx, errdisc, 'o-', nx(2:end), errdisc_prolong, 'h-') +grid on; +legend('Direct Solution', 'Prolonged Solution') +xlabel('N'); ylabel('Discretization Errors') +title(title_str); diff --git a/matlab/tpardiso.m b/matlab/tpardiso.m new file mode 100644 index 0000000..5efb0a1 --- /dev/null +++ b/matlab/tpardiso.m @@ -0,0 +1,72 @@ +% +% @file tpardiso.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +mat='/MAT1'; +gbmat; +clear S gb_mat; + +% Diagonal balancing of matrix +dbal = 1./sqrt(diag(a)); +a = diag(dbal)*a*diag(dbal); + +file='tpardiso.h5'; +mat='/MAT'; +n=hdf5read(file,strcat(mat,'/N')); +nz=hdf5read(file,strcat(mat,'/NZ')); + +irow=hdf5read(file,strcat(mat,'/irow')); +cols=hdf5read(file,strcat(mat,'/cols')); +val=hdf5read(file,strcat(mat,'/val')); +perm=hdf5read(file,strcat(mat,'/perm')); + +amat=zeros(n,n); + +% Check PARDISO mat +for i=1:n + for k=irow(i):irow(i+1)-1 + j=cols(k); + amat(i,j) = val(k); + amat(j,i) = val(k); + end +end +err = a-amat; +errmx = max(max(abs(err))); +fprintf(1,'Max. error = %e\n', errmx); + +figure +spy(sparse(amat(perm,perm)),'r.'); +LABEL=sprintf('n = %d, nz =%d', n, nz); +title(LABEL) + +% $$$ pmat=zeros(n); +% $$$ for i=1:n +% $$$ pmat(i,perm(i))=1; +% $$$ end +% $$$ amod=pmat*amat*pmat'; +% $$$ S=sparse(amod); +% $$$ figure +% $$$ spy(S,'r.'); +% $$$ LABEL=sprintf('n = %d', n); +% $$$ title(LABEL) diff --git a/matlab/two_grid.m b/matlab/two_grid.m new file mode 100644 index 0000000..3be72de --- /dev/null +++ b/matlab/two_grid.m @@ -0,0 +1,92 @@ +% +% @file two_grid.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +file='two_grid.h5'; +% +nx=h5readatt(file,'/','NX'); +nidbas=h5readatt(file,'/','NIDBAS'); +levels=h5readatt(file,'/','LEVELS'); +title_str=sprintf('NX = %d, NIDBAS = %d, levels = %d', nx, nidbas, levels); +% +% Read matrices at coarset grid +% +for lev=2:levels +% +% FE mat at fine grid +mglevel=sprintf('/mglevels/level.%.2d', lev-1); +dset=strcat(mglevel,'/mata'); +ku=h5readatt(file,dset,'KU'); kl=ku; +n=h5readatt(file,dset,'RANK'); +gbmat=h5read(file,dset); +mata_f=zeros(n,n); +for i=1:n + jmin = max(1,i-kl); + jmax = min(n,i+ku); + for j=jmin:jmax + ib = kl+ku+i-j+1; + mata_f(i,j)=gbmat(ib,j); + end +end +dset=strcat(mglevel,'/f'); +f_fine = h5read(file,dset); +dset=strcat(mglevel,'/v'); +v_fine = h5read(file,dset); +% +% FE mat at coarse grid +mglevel=sprintf('/mglevels/level.%.2d', lev); +dset=strcat(mglevel,'/mata'); +ku=h5readatt(file,dset,'KU'); kl=ku; +n=h5readatt(file,dset,'RANK'); +gbmat=h5read(file,dset); +mata_c=zeros(n,n); +for i=1:n + jmin = max(1,i-kl); + jmax = min(n,i+ku); + for j=jmin:jmax + ib = kl+ku+i-j+1; + mata_c(i,j)=gbmat(ib,j); + end +end +dset=strcat(mglevel,'/f'); +f_coarse = h5read(file,dset); +dset=strcat(mglevel,'/v'); +v_coarse = h5read(file,dset); +% +% Prolong mat +dset=strcat(mglevel,'/matp'); +matp=h5read(file,dset); +% +% Check +fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ... + - mata_c)) +end +% + +v_prolong = h5read(file,'/v_prolong'); + +figure +plot(v_fine) +hold on +plot(v_prolong, 'r') diff --git a/matlab/zcsr_mat.m b/matlab/zcsr_mat.m new file mode 100644 index 0000000..0b1b491 --- /dev/null +++ b/matlab/zcsr_mat.m @@ -0,0 +1,44 @@ +% +% @file zcsr_mat.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +function [mata, diag] = zcsr_mat(file, dset) + n=hdf5read(file,dset, 'RANK'); + nnz=hdf5read(file,dset, 'NNZ'); + cols=hdf5read(file, strcat(dset,'/cols')); + irow=hdf5read(file, strcat(dset,'/irow')); + val=h5Complex_ll(file, strcat(dset,'/val')); + idiag=hdf5read(file, strcat(dset,'/idiag')); + for i=1:n + s = irow(i); + e = irow(i+1)-1; + rows(s:e) = i; + end + cols=double(cols); + rows=double(rows); + mata = sparse(rows,cols,val); + if nargout == 2 + diag = val(idiag); + end + diff --git a/matlab/zmumps_mat.m b/matlab/zmumps_mat.m new file mode 100644 index 0000000..2db15b7 --- /dev/null +++ b/matlab/zmumps_mat.m @@ -0,0 +1,44 @@ +% +% @file zmumps_mat.m +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +function [mata, diag] = zmumps_mat(file, dset) + n=hdf5read(file,dset, 'RANK'); + Nnz=hdf5read(file,dset, 'NNZ'); + cols=hdf5read(file, strcat(dset,'/cols')); + irow=hdf5read(file, strcat(dset,'/irow')); + irn=hdf5read(file, strcat(dset,'/mumps_par/IRN')); + val=h5Complex_ll(file, strcat(dset,'/val')); + idiag=int32(find((irn-cols)==0)); + for i=1:n + s = irow(i); + e = irow(i+1)-1; + rows(s:e) = i; + end + cols=double(cols); + rows=double(rows); + mata = sparse(rows,cols,val); + if nargout == 2 + diag = val(idiag); + end diff --git a/multigrid/CMakeLists.txt b/multigrid/CMakeLists.txt new file mode 100644 index 0000000..cec61e0 --- /dev/null +++ b/multigrid/CMakeLists.txt @@ -0,0 +1,32 @@ +/** + * @file CMakeLists.txt + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + * @author Trach-Minh Tran + */ +add_subdirectory(src) +if(BSPLINES_EXAMPLES) + add_subdirectory(wk) +endif() + +#add_subdirectory(halpern) diff --git a/multigrid/docs/Makefile b/multigrid/docs/Makefile new file mode 100644 index 0000000..e144969 --- /dev/null +++ b/multigrid/docs/Makefile @@ -0,0 +1,52 @@ +# +# @file Makefile +# +# @brief +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Trach-Minh Tran +# +all: multigrid.pdf multigrid_2d.pdf mg_gbs.pdf + +grid.eps: grid.tex + tex grid.tex + dvips -o grid.eps grid.dvi + +mg_gbs.dvi: grid.eps + +clean: + rm -f grid.eps *.dvi *.log *.aux *~ *.toc *.flc *.bbl *.blg *.out *~ + +distclean: clean + +.SUFFIXES: +.SUFFIXES: .tex .dvi .pdf + +%.dvi: %.tex + latex $< + @while ( grep "Rerun to get cross-references" \ + ${<:tex=log} > /dev/null ); do \ + latex $<; \ + done + latex $< + +%.pdf: %.dvi + dvipdf $< + diff --git a/multigrid/docs/cubic_mg2d.eps b/multigrid/docs/cubic_mg2d.eps new file mode 100644 index 0000000..34ee918 --- /dev/null +++ b/multigrid/docs/cubic_mg2d.eps @@ -0,0 +1,1625 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 3.4.6-2.10-desktop #1 SMP PREEMPT Thu Jul 26 09:36:26 UTC 2012 (641c197) x86_64. +%%Title: /home/ttran/HLST/2012/report-4/cubic_mg2d.eps +%%CreationDate: 12/21/2012 09:47:59 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: 16 73 576 769 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 16 73 576 769 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 0192 9228 csm + + 0 0 6731 8346 rc +87 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 6732 8347 rf +6 w +0 2847 5217 0 0 -2847 875 3473 4 MP +PP +-5217 0 0 2847 5217 0 0 -2847 875 3473 5 MP stroke +4 w +DO +0 sg + 875 3473 mt 875 626 L + 875 626 mt 875 626 L +1396 3473 mt 1396 626 L +1396 626 mt 1396 626 L +1918 3473 mt 1918 626 L +1918 626 mt 1918 626 L +2440 3473 mt 2440 626 L +2440 626 mt 2440 626 L +2961 3473 mt 2961 626 L +2961 626 mt 2961 626 L +3483 3473 mt 3483 626 L +3483 626 mt 3483 626 L +4005 3473 mt 4005 626 L +4005 626 mt 4005 626 L +4526 3473 mt 4526 626 L +4526 626 mt 4526 626 L +5048 3473 mt 5048 626 L +5048 626 mt 5048 626 L +5570 3473 mt 5570 626 L +5570 626 mt 5570 626 L +6092 3473 mt 6092 626 L +6092 626 mt 6092 626 L + 875 3473 mt 6092 3473 L +6092 3473 mt 6092 3473 L + 875 2998 mt 6092 2998 L +6092 2998 mt 6092 2998 L + 875 2524 mt 6092 2524 L +6092 2524 mt 6092 2524 L + 875 2049 mt 6092 2049 L +6092 2049 mt 6092 2049 L + 875 1575 mt 6092 1575 L +6092 1575 mt 6092 1575 L + 875 1100 mt 6092 1100 L +6092 1100 mt 6092 1100 L + 875 626 mt 6092 626 L +6092 626 mt 6092 626 L +SO +6 w + 875 3473 mt 6092 3473 L + 875 626 mt 6092 626 L + 875 3473 mt 875 626 L +6092 3473 mt 6092 626 L + 875 3473 mt 6092 3473 L + 875 3473 mt 875 626 L + 875 3473 mt 875 3420 L + 875 626 mt 875 678 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 829 3663 mt +(0) s +1396 3473 mt 1396 3420 L +1396 626 mt 1396 678 L +1350 3663 mt +(1) s +1918 3473 mt 1918 3420 L +1918 626 mt 1918 678 L +1872 3663 mt +(2) s +2440 3473 mt 2440 3420 L +2440 626 mt 2440 678 L +2394 3663 mt +(3) s +2961 3473 mt 2961 3420 L +2961 626 mt 2961 678 L +2915 3663 mt +(4) s +3483 3473 mt 3483 3420 L +3483 626 mt 3483 678 L +3437 3663 mt +(5) s +4005 3473 mt 4005 3420 L +4005 626 mt 4005 678 L +3959 3663 mt +(6) s +4526 3473 mt 4526 3420 L +4526 626 mt 4526 678 L +4480 3663 mt +(7) s +5048 3473 mt 5048 3420 L +5048 626 mt 5048 678 L +5002 3663 mt +(8) s +5570 3473 mt 5570 3420 L +5570 626 mt 5570 678 L +5524 3663 mt +(9) s +6092 3473 mt 6092 3420 L +6092 626 mt 6092 678 L +5999 3663 mt +(10) s + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 927 3473 L +6092 3473 mt 6039 3473 L + 465 3535 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 3431 mt +(-12) s + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 2998 mt 927 2998 L +6092 2998 mt 6039 2998 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 465 3060 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 2956 mt +(-10) s + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 2524 mt 927 2524 L +6092 2524 mt 6039 2524 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 465 2586 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 2482 mt +(-8) s + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 2049 mt 927 2049 L +6092 2049 mt 6039 2049 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 465 2111 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 2007 mt +(-6) s + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 1575 mt 927 1575 L +6092 1575 mt 6039 1575 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 465 1637 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 1533 mt +(-4) s + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 1100 mt 927 1100 L +6092 1100 mt 6039 1100 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 465 1162 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 1058 mt +(-2) s + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 626 mt 927 626 L +6092 626 mt 6039 626 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 465 688 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 584 mt +(0) s + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +DO + 875 3473 mt 875 3473 L + 875 3473 mt 875 3473 L +SO + 875 3473 mt 6092 3473 L + 875 626 mt 6092 626 L + 875 3473 mt 875 626 L +6092 3473 mt 6092 626 L +gs 875 626 5218 2848 rc +/c8 { 0.000000 0.000000 1.000000 sr} bdef +c8 +522 65 522 67 522 67 521 69 522 71 522 74 521 83 522 285 +522 474 521 804 875 1197 11 MP stroke +gr + +c8 +gs 802 1124 5364 2206 rc + 36 36 875 1197 FO + 36 36 1396 2001 FO + 36 36 1918 2475 FO + 36 36 2440 2760 FO + 36 36 2961 2843 FO + 36 36 3483 2917 FO + 36 36 4005 2988 FO + 36 36 4526 3057 FO + 36 36 5048 3124 FO + 36 36 5570 3191 FO + 36 36 6092 3256 FO +gr + +gs 875 626 5218 2848 rc +gr + +0 sg +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3137 3841 mt +(Iterations) s + 373 2668 mt -90 rotate +(Norm of residual) s +90 rotate +1825 494 mt +(Cubic Splines, V\(2,1\), relax=gs, KX=3, KY=3) s +gs 875 626 5218 2848 rc +/c9 { 0.000000 0.498039 0.000000 sr} bdef +c9 +522 66 522 66 522 68 521 69 522 72 522 75 521 81 522 195 +522 442 521 731 875 1126 11 MP stroke +gr + +c9 +gs 802 1053 5364 2012 rc + 36 36 875 1126 FO + 36 36 1396 1857 FO + 36 36 1918 2299 FO + 36 36 2440 2494 FO + 36 36 2961 2575 FO + 36 36 3483 2650 FO + 36 36 4005 2722 FO + 36 36 4526 2791 FO + 36 36 5048 2859 FO + 36 36 5570 2925 FO + 36 36 6092 2991 FO +gr + +gs 875 626 5218 2848 rc +/c10 { 0.847059 0.160784 0.000000 sr} bdef +c10 +522 65 522 67 522 68 521 70 522 73 522 77 521 84 522 119 +522 425 521 660 875 1057 11 MP stroke +gr + +c10 +gs 802 984 5364 1855 rc + 36 36 875 1057 FO + 36 36 1396 1717 FO + 36 36 1918 2142 FO + 36 36 2440 2261 FO + 36 36 2961 2345 FO + 36 36 3483 2422 FO + 36 36 4005 2495 FO + 36 36 4526 2565 FO + 36 36 5048 2633 FO + 36 36 5570 2700 FO + 36 36 6092 2765 FO +gr + +gs 875 626 5218 2848 rc +/c11 { 0.000000 0.749020 0.749020 sr} bdef +c11 +522 65 522 67 522 69 521 70 522 74 522 78 521 86 522 115 +522 350 521 627 875 995 11 MP stroke +gr + +c11 +gs 802 922 5364 1748 rc + 36 36 875 995 FO + 36 36 1396 1622 FO + 36 36 1918 1972 FO + 36 36 2440 2087 FO + 36 36 2961 2173 FO + 36 36 3483 2251 FO + 36 36 4005 2325 FO + 36 36 4526 2395 FO + 36 36 5048 2464 FO + 36 36 5570 2531 FO + 36 36 6092 2596 FO +gr + +gs 875 626 5218 2848 rc +gr + +1 sg +0 2847 5217 0 0 -2847 875 7428 4 MP +PP +-5217 0 0 2847 5217 0 0 -2847 875 7428 5 MP stroke +4 w +DO +0 sg + 875 7428 mt 875 4581 L + 875 4581 mt 875 4581 L +1396 7428 mt 1396 4581 L +1396 4581 mt 1396 4581 L +1918 7428 mt 1918 4581 L +1918 4581 mt 1918 4581 L +2440 7428 mt 2440 4581 L +2440 4581 mt 2440 4581 L +2961 7428 mt 2961 4581 L +2961 4581 mt 2961 4581 L +3483 7428 mt 3483 4581 L +3483 4581 mt 3483 4581 L +4005 7428 mt 4005 4581 L +4005 4581 mt 4005 4581 L +4526 7428 mt 4526 4581 L +4526 4581 mt 4526 4581 L +5048 7428 mt 5048 4581 L +5048 4581 mt 5048 4581 L +5570 7428 mt 5570 4581 L +5570 4581 mt 5570 4581 L +6092 7428 mt 6092 4581 L +6092 4581 mt 6092 4581 L + 875 7428 mt 6092 7428 L +6092 7428 mt 6092 7428 L + 875 6716 mt 6092 6716 L +6092 6716 mt 6092 6716 L + 875 6004 mt 6092 6004 L +6092 6004 mt 6092 6004 L + 875 5292 mt 6092 5292 L +6092 5292 mt 6092 5292 L + 875 4581 mt 6092 4581 L +6092 4581 mt 6092 4581 L +SO +6 w + 875 7428 mt 6092 7428 L + 875 4581 mt 6092 4581 L + 875 7428 mt 875 4581 L +6092 7428 mt 6092 4581 L + 875 7428 mt 6092 7428 L + 875 7428 mt 875 4581 L + 875 7428 mt 875 7375 L + 875 4581 mt 875 4633 L + 829 7618 mt +(0) s +1396 7428 mt 1396 7375 L +1396 4581 mt 1396 4633 L +1350 7618 mt +(1) s +1918 7428 mt 1918 7375 L +1918 4581 mt 1918 4633 L +1872 7618 mt +(2) s +2440 7428 mt 2440 7375 L +2440 4581 mt 2440 4633 L +2394 7618 mt +(3) s +2961 7428 mt 2961 7375 L +2961 4581 mt 2961 4633 L +2915 7618 mt +(4) s +3483 7428 mt 3483 7375 L +3483 4581 mt 3483 4633 L +3437 7618 mt +(5) s +4005 7428 mt 4005 7375 L +4005 4581 mt 4005 4633 L +3959 7618 mt +(6) s +4526 7428 mt 4526 7375 L +4526 4581 mt 4526 4633 L +4480 7618 mt +(7) s +5048 7428 mt 5048 7375 L +5048 4581 mt 5048 4633 L +5002 7618 mt +(8) s +5570 7428 mt 5570 7375 L +5570 4581 mt 5570 4633 L +5524 7618 mt +(9) s +6092 7428 mt 6092 7375 L +6092 4581 mt 6092 4633 L +5999 7618 mt +(10) s + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 927 7428 L +6092 7428 mt 6039 7428 L + 465 7490 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 7386 mt +(-10) s + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 6716 mt 927 6716 L +6092 6716 mt 6039 6716 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 465 6778 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 6674 mt +(-8) s + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 6004 mt 927 6004 L +6092 6004 mt 6039 6004 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 465 6066 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 5962 mt +(-6) s + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 5292 mt 927 5292 L +6092 5292 mt 6039 5292 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 465 5354 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 5250 mt +(-4) s + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 4581 mt 927 4581 L +6092 4581 mt 6039 4581 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 465 4643 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 651 4539 mt +(-2) s + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +DO + 875 7428 mt 875 7428 L + 875 7428 mt 875 7428 L +SO + 875 7428 mt 6092 7428 L + 875 4581 mt 6092 4581 L + 875 7428 mt 875 4581 L +6092 7428 mt 6092 4581 L +gs 875 4581 5218 2848 rc +c8 +522 0 522 0 522 0 521 0 522 0 522 0 521 0 522 22 +522 937 521 1686 875 4776 11 MP stroke +gr + +c8 +gs 802 4703 5364 2792 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 844 4758 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1365 6444 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1887 7381 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2409 7403 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2930 7403 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3452 7403 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3974 7403 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4495 7403 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5017 7403 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5539 7403 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 6061 7403 13 MP +DP +gr + +gs 875 4581 5218 2848 rc +gr + +0 sg +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3137 7796 mt +(Iterations) s + 373 6506 mt -90 rotate +(Norm of error) s +90 rotate +gs 875 4581 5218 2848 rc +c9 +522 0 522 0 522 0 521 0 522 0 522 0 521 1 522 10 +522 594 521 1611 875 4776 11 MP stroke +gr + +c9 +gs 802 4703 5364 2363 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 844 4758 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1365 6369 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1887 6963 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2409 6973 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2930 6974 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3452 6974 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3974 6974 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4495 6974 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5017 6974 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5539 6974 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 6061 6974 13 MP +DP +gr + +gs 875 4581 5218 2848 rc +c10 +522 0 522 0 522 0 521 0 522 0 522 0 521 0 522 4 +522 390 521 1388 875 4776 11 MP stroke +gr + +c10 +gs 802 4703 5364 1929 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 844 4758 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1365 6146 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1887 6536 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2409 6540 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2930 6540 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3452 6540 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3974 6540 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4495 6540 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5017 6540 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5539 6540 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 6061 6540 13 MP +DP +gr + +gs 875 4581 5218 2848 rc +c11 +522 0 522 0 522 0 521 0 522 1 522 0 521 0 522 1 +522 200 521 1135 875 4776 11 MP stroke +gr + +c11 +gs 802 4703 5364 1484 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 844 4758 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1365 5893 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1887 6093 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2409 6094 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2930 6094 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3452 6094 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3974 6095 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4495 6095 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5017 6095 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5539 6095 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 6061 6095 13 MP +DP +gr + +gs 875 4581 5218 2848 rc +gr + +0 sg +-225 213 3020 5810 2 MP stroke +57 -53 -119 49 2857 6027 3 MP +PP +62 4 57 -53 -119 49 2857 6027 4 MP stroke +0 62 57 -115 2738 6076 3 MP +PP +-57 53 0 62 57 -115 2738 6076 4 MP stroke +3022 5773 mt +(16 X 16) s +-151 154 3312 6300 2 MP stroke +55 -56 -116 55 3222 6455 3 MP +PP +61 1 55 -56 -116 55 3222 6455 4 MP stroke +3 62 52 -118 3106 6510 3 MP +PP +-55 56 3 62 52 -118 3106 6510 4 MP stroke +3314 6263 mt +(32 X 32) s +-161 158 3777 6730 2 MP stroke +55 -55 -117 52 3678 6891 3 MP +PP +62 3 55 -55 -117 52 3678 6891 4 MP stroke +1 61 54 -116 3561 6943 3 MP +PP +-55 55 1 61 54 -116 3561 6943 4 MP stroke +3779 6693 mt +(64 X 64) s +-171 163 4244 7151 2 MP stroke +57 -54 -118 50 4134 7318 3 MP +PP +61 4 57 -54 -118 50 4134 7318 4 MP stroke +1 62 56 -116 4016 7368 3 MP +PP +-57 54 1 62 56 -116 4016 7368 4 MP stroke +4246 7114 mt +(128 X 128) s + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/multigrid/docs/cubic_mg2d.fig b/multigrid/docs/cubic_mg2d.fig new file mode 100644 index 0000000..df7519e Binary files /dev/null and b/multigrid/docs/cubic_mg2d.fig differ diff --git a/multigrid/docs/cubic_mg2d_levels.eps b/multigrid/docs/cubic_mg2d_levels.eps new file mode 100644 index 0000000..65352b0 --- /dev/null +++ b/multigrid/docs/cubic_mg2d_levels.eps @@ -0,0 +1,1559 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 3.4.6-2.10-desktop #1 SMP PREEMPT Thu Jul 26 09:36:26 UTC 2012 (641c197) x86_64. +%%Title: /home/ttran/HLST/2012/report-4/cubic_mg2d_levels.eps +%%CreationDate: 12/21/2012 09:48:21 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: 17 88 576 753 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 17 88 576 753 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 0204 9036 csm + + 0 0 6711 7980 rc +87 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 6712 7981 rf +6 w +0 2722 5201 0 0 -2722 872 3321 4 MP +PP +-5201 0 0 2722 5201 0 0 -2722 872 3321 5 MP stroke +4 w +DO +0 sg + 872 3321 mt 872 599 L + 872 599 mt 872 599 L +1392 3321 mt 1392 599 L +1392 599 mt 1392 599 L +1912 3321 mt 1912 599 L +1912 599 mt 1912 599 L +2432 3321 mt 2432 599 L +2432 599 mt 2432 599 L +2952 3321 mt 2952 599 L +2952 599 mt 2952 599 L +3472 3321 mt 3472 599 L +3472 599 mt 3472 599 L +3992 3321 mt 3992 599 L +3992 599 mt 3992 599 L +4512 3321 mt 4512 599 L +4512 599 mt 4512 599 L +5032 3321 mt 5032 599 L +5032 599 mt 5032 599 L +5552 3321 mt 5552 599 L +5552 599 mt 5552 599 L +6073 3321 mt 6073 599 L +6073 599 mt 6073 599 L + 872 3321 mt 6073 3321 L +6073 3321 mt 6073 3321 L + 872 2776 mt 6073 2776 L +6073 2776 mt 6073 2776 L + 872 2232 mt 6073 2232 L +6073 2232 mt 6073 2232 L + 872 1687 mt 6073 1687 L +6073 1687 mt 6073 1687 L + 872 1143 mt 6073 1143 L +6073 1143 mt 6073 1143 L + 872 599 mt 6073 599 L +6073 599 mt 6073 599 L +SO +6 w + 872 3321 mt 6073 3321 L + 872 599 mt 6073 599 L + 872 3321 mt 872 599 L +6073 3321 mt 6073 599 L + 872 3321 mt 6073 3321 L + 872 3321 mt 872 599 L + 872 3321 mt 872 3268 L + 872 599 mt 872 651 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 826 3511 mt +(0) s +1392 3321 mt 1392 3268 L +1392 599 mt 1392 651 L +1346 3511 mt +(1) s +1912 3321 mt 1912 3268 L +1912 599 mt 1912 651 L +1866 3511 mt +(2) s +2432 3321 mt 2432 3268 L +2432 599 mt 2432 651 L +2386 3511 mt +(3) s +2952 3321 mt 2952 3268 L +2952 599 mt 2952 651 L +2906 3511 mt +(4) s +3472 3321 mt 3472 3268 L +3472 599 mt 3472 651 L +3426 3511 mt +(5) s +3992 3321 mt 3992 3268 L +3992 599 mt 3992 651 L +3946 3511 mt +(6) s +4512 3321 mt 4512 3268 L +4512 599 mt 4512 651 L +4466 3511 mt +(7) s +5032 3321 mt 5032 3268 L +5032 599 mt 5032 651 L +4986 3511 mt +(8) s +5552 3321 mt 5552 3268 L +5552 599 mt 5552 651 L +5506 3511 mt +(9) s +6073 3321 mt 6073 3268 L +6073 599 mt 6073 651 L +5980 3511 mt +(10) s + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 924 3321 L +6073 3321 mt 6020 3321 L + 462 3383 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 648 3279 mt +(-12) s + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 2776 mt 924 2776 L +6073 2776 mt 6020 2776 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 462 2838 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 648 2734 mt +(-10) s + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 2232 mt 924 2232 L +6073 2232 mt 6020 2232 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 462 2294 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 648 2190 mt +(-8) s + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 1687 mt 924 1687 L +6073 1687 mt 6020 1687 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 462 1749 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 648 1645 mt +(-6) s + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 1143 mt 924 1143 L +6073 1143 mt 6020 1143 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 462 1205 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 648 1101 mt +(-4) s + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 599 mt 924 599 L +6073 599 mt 6020 599 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 462 661 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 648 557 mt +(-2) s + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +DO + 872 3321 mt 872 3321 L + 872 3321 mt 872 3321 L +SO + 872 3321 mt 6073 3321 L + 872 599 mt 6073 599 L + 872 3321 mt 872 599 L +6073 3321 mt 6073 599 L +gs 872 599 5202 2723 rc +/c8 { 0.000000 0.000000 1.000000 sr} bdef +c8 +1 0 521 75 520 76 520 77 520 78 520 81 520 85 520 92 +520 254 520 558 520 988 872 710 12 MP stroke +gr + +c8 +gs 799 637 5349 2511 rc + 36 36 872 710 FO + 36 36 1392 1698 FO + 36 36 1912 2256 FO + 36 36 2432 2510 FO + 36 36 2952 2602 FO + 36 36 3472 2687 FO + 36 36 3992 2768 FO + 36 36 4512 2846 FO + 36 36 5032 2923 FO + 36 36 5552 2999 FO + 36 36 6073 3074 FO +gr + +gs 872 599 5202 2723 rc +gr + +0 sg +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3126 3689 mt +(Iterations) s + 370 2579 mt -90 rotate +(Norm of residual) s +90 rotate +gs 872 599 5202 2723 rc +/c9 { 0.847059 0.160784 0.000000 sr} bdef +c9 +1 0 521 75 520 76 520 77 520 79 520 82 520 85 520 95 +520 366 520 511 520 917 872 710 12 MP stroke +gr + +c9 +gs 799 637 5349 2510 rc + 36 36 872 710 FO + 36 36 1392 1627 FO + 36 36 1912 2138 FO + 36 36 2432 2504 FO + 36 36 2952 2599 FO + 36 36 3472 2684 FO + 36 36 3992 2766 FO + 36 36 4512 2845 FO + 36 36 5032 2922 FO + 36 36 5552 2998 FO + 36 36 6073 3073 FO +gr + +gs 872 599 5202 2723 rc +/c10 { 0.000000 0.498039 0.000000 sr} bdef +c10 +1 1 521 75 520 75 520 78 520 79 520 81 520 86 520 95 +520 331 520 538 520 923 872 710 12 MP stroke +gr + +c10 +gs 799 637 5349 2509 rc + 36 36 872 710 FO + 36 36 1392 1633 FO + 36 36 1912 2171 FO + 36 36 2432 2502 FO + 36 36 2952 2597 FO + 36 36 3472 2683 FO + 36 36 3992 2764 FO + 36 36 4512 2843 FO + 36 36 5032 2921 FO + 36 36 5552 2996 FO + 36 36 6073 3071 FO +gr + +gs 872 599 5202 2723 rc +/c11 { 0.000000 0.749020 0.749020 sr} bdef +c11 +1 0 521 75 520 76 520 77 520 79 520 81 520 86 520 95 +520 327 520 543 520 923 872 710 12 MP stroke +gr + +c11 +gs 799 637 5349 2509 rc + 36 36 872 710 FO + 36 36 1392 1633 FO + 36 36 1912 2176 FO + 36 36 2432 2503 FO + 36 36 2952 2598 FO + 36 36 3472 2684 FO + 36 36 3992 2765 FO + 36 36 4512 2844 FO + 36 36 5032 2921 FO + 36 36 5552 2997 FO + 36 36 6073 3072 FO +gr + +gs 872 599 5202 2723 rc +gr + +0 sg +1041 467 mt +(Cubic Splines, 128 x 128 Problem, V\(2,1\), relax=gs, KX=3, \ +KY=3) s +1 sg +0 2722 5201 0 0 -2722 872 7102 4 MP +PP +-5201 0 0 2722 5201 0 0 -2722 872 7102 5 MP stroke +4 w +DO +0 sg + 872 7102 mt 872 4380 L + 872 4380 mt 872 4380 L +1392 7102 mt 1392 4380 L +1392 4380 mt 1392 4380 L +1912 7102 mt 1912 4380 L +1912 4380 mt 1912 4380 L +2432 7102 mt 2432 4380 L +2432 4380 mt 2432 4380 L +2952 7102 mt 2952 4380 L +2952 4380 mt 2952 4380 L +3472 7102 mt 3472 4380 L +3472 4380 mt 3472 4380 L +3992 7102 mt 3992 4380 L +3992 4380 mt 3992 4380 L +4512 7102 mt 4512 4380 L +4512 4380 mt 4512 4380 L +5032 7102 mt 5032 4380 L +5032 4380 mt 5032 4380 L +5552 7102 mt 5552 4380 L +5552 4380 mt 5552 4380 L +6073 7102 mt 6073 4380 L +6073 4380 mt 6073 4380 L + 872 7102 mt 6073 7102 L +6073 7102 mt 6073 7102 L + 872 6421 mt 6073 6421 L +6073 6421 mt 6073 6421 L + 872 5741 mt 6073 5741 L +6073 5741 mt 6073 5741 L + 872 5060 mt 6073 5060 L +6073 5060 mt 6073 5060 L + 872 4380 mt 6073 4380 L +6073 4380 mt 6073 4380 L +SO +6 w + 872 7102 mt 6073 7102 L + 872 4380 mt 6073 4380 L + 872 7102 mt 872 4380 L +6073 7102 mt 6073 4380 L + 872 7102 mt 6073 7102 L + 872 7102 mt 872 4380 L + 872 7102 mt 872 7049 L + 872 4380 mt 872 4432 L + 826 7292 mt +(0) s +1392 7102 mt 1392 7049 L +1392 4380 mt 1392 4432 L +1346 7292 mt +(1) s +1912 7102 mt 1912 7049 L +1912 4380 mt 1912 4432 L +1866 7292 mt +(2) s +2432 7102 mt 2432 7049 L +2432 4380 mt 2432 4432 L +2386 7292 mt +(3) s +2952 7102 mt 2952 7049 L +2952 4380 mt 2952 4432 L +2906 7292 mt +(4) s +3472 7102 mt 3472 7049 L +3472 4380 mt 3472 4432 L +3426 7292 mt +(5) s +3992 7102 mt 3992 7049 L +3992 4380 mt 3992 4432 L +3946 7292 mt +(6) s +4512 7102 mt 4512 7049 L +4512 4380 mt 4512 4432 L +4466 7292 mt +(7) s +5032 7102 mt 5032 7049 L +5032 4380 mt 5032 4432 L +4986 7292 mt +(8) s +5552 7102 mt 5552 7049 L +5552 4380 mt 5552 4432 L +5506 7292 mt +(9) s +6073 7102 mt 6073 7049 L +6073 4380 mt 6073 4432 L +5980 7292 mt +(10) s + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 924 7102 L +6073 7102 mt 6020 7102 L + 462 7164 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 648 7060 mt +(-10) s + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 6421 mt 924 6421 L +6073 6421 mt 6020 6421 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 462 6483 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 648 6379 mt +(-8) s + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 5741 mt 924 5741 L +6073 5741 mt 6020 5741 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 462 5803 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 648 5699 mt +(-6) s + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 5060 mt 924 5060 L +6073 5060 mt 6020 5060 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 462 5122 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 648 5018 mt +(-4) s + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 4380 mt 924 4380 L +6073 4380 mt 6020 4380 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 462 4442 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 648 4338 mt +(-2) s + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +DO + 872 7102 mt 872 7102 L + 872 7102 mt 872 7102 L +SO + 872 7102 mt 6073 7102 L + 872 4380 mt 6073 4380 L + 872 7102 mt 872 4380 L +6073 7102 mt 6073 4380 L +gs 872 4380 5202 2723 rc +c8 +1 0 521 0 520 0 520 0 520 0 520 0 520 0 520 1 +520 6 520 495 520 2027 872 4567 12 MP stroke +gr + +c8 +gs 799 4494 5349 2676 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 841 4549 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1361 6576 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1881 7071 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2401 7077 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2921 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3441 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3961 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4481 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5001 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5521 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 6042 7078 13 MP +DP +gr + +gs 872 4380 5202 2723 rc +gr + +0 sg +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3126 7470 mt +(Iterations) s + 370 6243 mt -90 rotate +(Norm of error) s +90 rotate +gs 872 4380 5202 2723 rc +c9 +1 0 521 0 520 0 520 0 520 0 520 0 520 0 520 1 +520 33 520 624 520 1871 872 4567 12 MP stroke +gr + +c9 +gs 799 4494 5349 2676 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 841 4549 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1361 6420 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1881 7044 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2401 7077 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2921 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3441 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3961 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4481 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5001 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5521 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 6042 7078 13 MP +DP +gr + +gs 872 4380 5202 2723 rc +c10 +1 0 521 0 520 0 520 0 520 0 520 0 520 0 520 1 +520 22 520 814 520 1692 872 4567 12 MP stroke +gr + +c10 +gs 799 4494 5349 2676 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 841 4549 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1361 6241 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1881 7055 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2401 7077 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2921 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3441 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3961 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4481 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5001 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5521 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 6042 7078 13 MP +DP +gr + +gs 872 4380 5202 2723 rc +c11 +1 0 521 0 520 0 520 0 520 0 520 0 520 0 520 1 +520 20 520 897 520 1611 872 4567 12 MP stroke +gr + +c11 +gs 799 4494 5349 2676 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 841 4549 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1361 6160 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1881 7057 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2401 7077 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2921 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3441 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3961 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4481 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5001 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5521 7078 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 6042 7078 13 MP +DP +gr + +gs 872 4380 5202 2723 rc +gr + +0 sg +-290 198 1737 5827 2 MP stroke +65 -44 -125 31 1507 6038 3 MP +PP +60 13 65 -44 -125 31 1507 6038 4 MP stroke +-9 61 74 -105 1382 6069 3 MP +PP +-65 44 -9 61 74 -105 1382 6069 4 MP stroke +1739 5790 mt +(levels=7) s +-290 197 1871 6300 2 MP stroke +65 -44 -125 30 1641 6511 3 MP +PP +60 14 65 -44 -125 30 1641 6511 4 MP stroke +-9 60 74 -104 1516 6541 3 MP +PP +-65 44 -9 60 74 -104 1516 6541 4 MP stroke +1873 6263 mt +(levels=3) s +-290 198 1938 6536 2 MP stroke +65 -44 -125 31 1708 6747 3 MP +PP +60 13 65 -44 -125 31 1708 6747 4 MP stroke +-9 61 74 -105 1583 6778 3 MP +PP +-65 44 -9 61 74 -105 1583 6778 4 MP stroke +1940 6499 mt +(levels=2) s +-291 198 1795 6111 2 MP stroke +65 -44 -125 30 1564 6323 3 MP +PP +60 14 65 -44 -125 30 1564 6323 4 MP stroke +-9 61 74 -105 1439 6353 3 MP +PP +-65 44 -9 61 74 -105 1439 6353 4 MP stroke +1797 6074 mt +(levels=4) s + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/multigrid/docs/cubic_mg2d_levels.fig b/multigrid/docs/cubic_mg2d_levels.fig new file mode 100644 index 0000000..8c823c7 Binary files /dev/null and b/multigrid/docs/cubic_mg2d_levels.fig differ diff --git a/multigrid/docs/cubic_mg2d_relax.eps b/multigrid/docs/cubic_mg2d_relax.eps new file mode 100644 index 0000000..55c592f --- /dev/null +++ b/multigrid/docs/cubic_mg2d_relax.eps @@ -0,0 +1,1482 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 3.4.6-2.10-desktop #1 SMP PREEMPT Thu Jul 26 09:36:26 UTC 2012 (641c197) x86_64. +%%Title: /home/ttran/HLST/2012/report-4/cubic_mg2d_relax.eps +%%CreationDate: 12/21/2012 09:48:11 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: 4 72 589 769 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 4 72 589 769 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 0048 9228 csm + + 0 0 7028 8364 rc +87 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 7029 8365 rf +6 w +0 2855 5446 0 0 -2855 914 3481 4 MP +PP +-5446 0 0 2855 5446 0 0 -2855 914 3481 5 MP stroke +4 w +DO +0 sg + 914 3481 mt 914 627 L + 914 627 mt 914 627 L +1458 3481 mt 1458 627 L +1458 627 mt 1458 627 L +2003 3481 mt 2003 627 L +2003 627 mt 2003 627 L +2547 3481 mt 2547 627 L +2547 627 mt 2547 627 L +3092 3481 mt 3092 627 L +3092 627 mt 3092 627 L +3637 3481 mt 3637 627 L +3637 627 mt 3637 627 L +4181 3481 mt 4181 627 L +4181 627 mt 4181 627 L +4726 3481 mt 4726 627 L +4726 627 mt 4726 627 L +5270 3481 mt 5270 627 L +5270 627 mt 5270 627 L +5815 3481 mt 5815 627 L +5815 627 mt 5815 627 L +6360 3481 mt 6360 627 L +6360 627 mt 6360 627 L + 914 3481 mt 6360 3481 L +6360 3481 mt 6360 3481 L + 914 2910 mt 6360 2910 L +6360 2910 mt 6360 2910 L + 914 2339 mt 6360 2339 L +6360 2339 mt 6360 2339 L + 914 1768 mt 6360 1768 L +6360 1768 mt 6360 1768 L + 914 1197 mt 6360 1197 L +6360 1197 mt 6360 1197 L + 914 627 mt 6360 627 L +6360 627 mt 6360 627 L +SO +6 w + 914 3481 mt 6360 3481 L + 914 626 mt 6360 626 L + 914 3481 mt 914 626 L +6360 3481 mt 6360 626 L + 914 3481 mt 6360 3481 L + 914 3481 mt 914 626 L + 914 3481 mt 914 3426 L + 914 627 mt 914 681 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 868 3671 mt +(0) s +1458 3481 mt 1458 3426 L +1458 627 mt 1458 681 L +1412 3671 mt +(1) s +2003 3481 mt 2003 3426 L +2003 627 mt 2003 681 L +1957 3671 mt +(2) s +2547 3481 mt 2547 3426 L +2547 627 mt 2547 681 L +2501 3671 mt +(3) s +3092 3481 mt 3092 3426 L +3092 627 mt 3092 681 L +3046 3671 mt +(4) s +3637 3481 mt 3637 3426 L +3637 627 mt 3637 681 L +3591 3671 mt +(5) s +4181 3481 mt 4181 3426 L +4181 627 mt 4181 681 L +4135 3671 mt +(6) s +4726 3481 mt 4726 3426 L +4726 627 mt 4726 681 L +4680 3671 mt +(7) s +5270 3481 mt 5270 3426 L +5270 627 mt 5270 681 L +5224 3671 mt +(8) s +5815 3481 mt 5815 3426 L +5815 627 mt 5815 681 L +5769 3671 mt +(9) s +6360 3481 mt 6360 3426 L +6360 627 mt 6360 681 L +6267 3671 mt +(10) s + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 968 3481 L +6360 3481 mt 6305 3481 L + 504 3543 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 690 3439 mt +(-12) s + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 2910 mt 968 2910 L +6360 2910 mt 6305 2910 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 504 2972 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 690 2868 mt +(-10) s + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 2339 mt 968 2339 L +6360 2339 mt 6305 2339 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 504 2401 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 690 2297 mt +(-8) s + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 1768 mt 968 1768 L +6360 1768 mt 6305 1768 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 504 1830 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 690 1726 mt +(-6) s + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 1197 mt 968 1197 L +6360 1197 mt 6305 1197 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 504 1259 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 690 1155 mt +(-4) s + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 627 mt 968 627 L +6360 627 mt 6305 627 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 504 689 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 690 585 mt +(-2) s + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +DO + 914 3481 mt 914 3481 L + 914 3481 mt 914 3481 L +SO + 914 3481 mt 6360 3481 L + 914 626 mt 6360 626 L + 914 3481 mt 914 626 L +6360 3481 mt 6360 626 L +gs 914 627 5447 2855 rc +/c8 { 0.000000 0.000000 1.000000 sr} bdef +c8 +545 79 545 79 544 81 545 83 544 85 545 90 545 100 544 342 +545 570 544 968 914 743 11 MP stroke +gr + +c8 +gs 841 670 5593 2624 rc + 36 36 914 743 FO + 36 36 1458 1711 FO + 36 36 2003 2281 FO + 36 36 2547 2623 FO + 36 36 3092 2723 FO + 36 36 3637 2813 FO + 36 36 4181 2898 FO + 36 36 4726 2981 FO + 36 36 5270 3062 FO + 36 36 5815 3141 FO + 36 36 6360 3220 FO +gr + +gs 914 627 5447 2855 rc +gr + +0 sg +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3291 3849 mt +(Iterations) s + 412 2673 mt -90 rotate +(Norm of residual) s +90 rotate +1509 495 mt +(Cubic Splines, 128 x 128 problem, relax=gs, KX=3, KY=3) s +gs 914 627 5447 2855 rc +/c9 { 0.847059 0.160784 0.000000 sr} bdef +c9 +545 54 545 55 544 55 545 58 544 61 545 83 545 275 544 390 +545 456 544 831 914 743 11 MP stroke +gr + +c9 +gs 841 670 5593 2465 rc + 36 36 914 743 FO + 36 36 1458 1574 FO + 36 36 2003 2030 FO + 36 36 2547 2420 FO + 36 36 3092 2695 FO + 36 36 3637 2778 FO + 36 36 4181 2839 FO + 36 36 4726 2897 FO + 36 36 5270 2952 FO + 36 36 5815 3007 FO + 36 36 6360 3061 FO +gr + +gs 914 627 5447 2855 rc +/c10 { 0.000000 0.498039 0.000000 sr} bdef +c10 +545 47 545 56 544 82 545 158 544 225 545 236 545 229 544 262 +545 338 544 275 914 743 11 MP stroke +gr + +c10 +gs 841 670 5593 2055 rc + 36 36 914 743 FO + 36 36 1458 1018 FO + 36 36 2003 1356 FO + 36 36 2547 1618 FO + 36 36 3092 1847 FO + 36 36 3637 2083 FO + 36 36 4181 2308 FO + 36 36 4726 2466 FO + 36 36 5270 2548 FO + 36 36 5815 2604 FO + 36 36 6360 2651 FO +gr + +gs 914 627 5447 2855 rc +gr + +1 sg +0 2854 5446 0 0 -2854 914 7444 4 MP +PP +-5446 0 0 2854 5446 0 0 -2854 914 7444 5 MP stroke +4 w +DO +0 sg + 914 7444 mt 914 4590 L + 914 4590 mt 914 4590 L +1458 7444 mt 1458 4590 L +1458 4590 mt 1458 4590 L +2003 7444 mt 2003 4590 L +2003 4590 mt 2003 4590 L +2547 7444 mt 2547 4590 L +2547 4590 mt 2547 4590 L +3092 7444 mt 3092 4590 L +3092 4590 mt 3092 4590 L +3637 7444 mt 3637 4590 L +3637 4590 mt 3637 4590 L +4181 7444 mt 4181 4590 L +4181 4590 mt 4181 4590 L +4726 7444 mt 4726 4590 L +4726 4590 mt 4726 4590 L +5270 7444 mt 5270 4590 L +5270 4590 mt 5270 4590 L +5815 7444 mt 5815 4590 L +5815 4590 mt 5815 4590 L +6360 7444 mt 6360 4590 L +6360 4590 mt 6360 4590 L + 914 7444 mt 6360 7444 L +6360 7444 mt 6360 7444 L + 914 6730 mt 6360 6730 L +6360 6730 mt 6360 6730 L + 914 6017 mt 6360 6017 L +6360 6017 mt 6360 6017 L + 914 5303 mt 6360 5303 L +6360 5303 mt 6360 5303 L + 914 4590 mt 6360 4590 L +6360 4590 mt 6360 4590 L +SO +6 w + 914 7444 mt 6360 7444 L + 914 4590 mt 6360 4590 L + 914 7444 mt 914 4590 L +6360 7444 mt 6360 4590 L + 914 7444 mt 6360 7444 L + 914 7444 mt 914 4590 L + 914 7444 mt 914 7389 L + 914 4590 mt 914 4644 L + 868 7634 mt +(0) s +1458 7444 mt 1458 7389 L +1458 4590 mt 1458 4644 L +1412 7634 mt +(1) s +2003 7444 mt 2003 7389 L +2003 4590 mt 2003 4644 L +1957 7634 mt +(2) s +2547 7444 mt 2547 7389 L +2547 4590 mt 2547 4644 L +2501 7634 mt +(3) s +3092 7444 mt 3092 7389 L +3092 4590 mt 3092 4644 L +3046 7634 mt +(4) s +3637 7444 mt 3637 7389 L +3637 4590 mt 3637 4644 L +3591 7634 mt +(5) s +4181 7444 mt 4181 7389 L +4181 4590 mt 4181 4644 L +4135 7634 mt +(6) s +4726 7444 mt 4726 7389 L +4726 4590 mt 4726 4644 L +4680 7634 mt +(7) s +5270 7444 mt 5270 7389 L +5270 4590 mt 5270 4644 L +5224 7634 mt +(8) s +5815 7444 mt 5815 7389 L +5815 4590 mt 5815 4644 L +5769 7634 mt +(9) s +6360 7444 mt 6360 7389 L +6360 4590 mt 6360 4644 L +6267 7634 mt +(10) s + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 968 7444 L +6360 7444 mt 6305 7444 L + 504 7506 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 690 7402 mt +(-10) s + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 6730 mt 968 6730 L +6360 6730 mt 6305 6730 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 504 6792 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 690 6688 mt +(-8) s + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 6017 mt 968 6017 L +6360 6017 mt 6305 6017 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 504 6079 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 690 5975 mt +(-6) s + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 5303 mt 968 5303 L +6360 5303 mt 6305 5303 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 504 5365 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 690 5261 mt +(-4) s + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 4590 mt 968 4590 L +6360 4590 mt 6305 4590 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 504 4652 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 690 4548 mt +(-2) s + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +DO + 914 7444 mt 914 7444 L + 914 7444 mt 914 7444 L +SO + 914 7444 mt 6360 7444 L + 914 4590 mt 6360 4590 L + 914 7444 mt 914 4590 L +6360 7444 mt 6360 4590 L +gs 914 4590 5447 2855 rc +c8 +545 0 545 0 544 0 545 0 544 0 545 0 545 0 544 22 +545 939 544 1690 914 4786 11 MP stroke +gr + +c8 +gs 841 4713 5593 2798 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 883 4768 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1427 6458 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1972 7397 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2516 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3061 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3606 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4150 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4695 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5239 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5784 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 6329 7419 13 MP +DP +gr + +gs 914 4590 5447 2855 rc +gr + +0 sg +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3291 7812 mt +(Iterations) s + 412 6519 mt -90 rotate +(Norm of error) s +90 rotate +gs 914 4590 5447 2855 rc +c9 +545 0 545 0 544 0 545 0 544 0 545 0 545 3 544 305 +545 1174 544 1169 914 4786 11 MP stroke +gr + +c9 +gs 841 4713 5593 2798 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 883 4768 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1427 5937 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1972 7111 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2516 7416 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3061 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3606 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4150 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4695 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5239 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5784 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 6329 7419 13 MP +DP +gr + +gs 914 4590 5447 2855 rc +c10 +545 0 545 0 544 1 545 11 544 124 545 290 545 388 544 604 +545 608 544 625 914 4786 11 MP stroke +gr + +c10 +gs 841 4713 5593 2798 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 883 4768 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1427 5393 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1972 6001 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2516 6605 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3061 6993 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3606 7283 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4150 7407 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4695 7418 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5239 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5784 7419 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 6329 7419 13 MP +DP +gr + +gs 914 4590 5447 2855 rc +gr + +0 sg +-199 130 2409 5908 2 MP stroke +65 -43 -125 28 2270 6053 3 MP +PP +60 15 65 -43 -125 28 2270 6053 4 MP stroke +-11 61 76 -104 2145 6081 3 MP +PP +-65 43 -11 61 76 -104 2145 6081 4 MP stroke +2411 5871 mt +(V\(1,0\)) s +-216 174 2419 6894 2 MP stroke +61 -49 -122 40 2264 7077 3 MP +PP +61 9 61 -49 -122 40 2264 7077 4 MP stroke +-5 61 66 -110 2142 7117 3 MP +PP +-61 49 -5 61 66 -110 2142 7117 4 MP stroke +2421 6857 mt +(V\(1,1\)) s +167 -222 1372 7134 2 MP stroke +-47 62 108 -69 1478 6919 3 MP +PP +-61 7 -47 62 108 -69 1478 6919 4 MP stroke +-11 -61 -36 123 1586 6850 3 MP +PP +47 -62 -11 -61 -36 123 1586 6850 4 MP stroke + 915 7290 mt +(V\(2,1\)) s + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/multigrid/docs/cubic_mg2d_relax.fig b/multigrid/docs/cubic_mg2d_relax.fig new file mode 100644 index 0000000..8252dd5 Binary files /dev/null and b/multigrid/docs/cubic_mg2d_relax.fig differ diff --git a/multigrid/docs/cyl_conv.eps b/multigrid/docs/cyl_conv.eps new file mode 100644 index 0000000..f0326c7 --- /dev/null +++ b/multigrid/docs/cyl_conv.eps @@ -0,0 +1,991 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 2.6.27.56-0.1-default #1 SMP 2010-12-01 16:57:58 +0100 x86_64. +%%Title: /home/ttran/multigrid/bsplines/cyl_conv.eps +%%CreationDate: 09/03/2012 14:52:17 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: 73 252 521 589 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 73 252 521 589 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 0876 7068 csm + + 0 0 5376 4034 rc +85 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 5377 4035 rf +6 w +0 3231 4166 0 0 -3231 699 3575 4 MP +PP +-4166 0 0 3231 4166 0 0 -3231 699 3575 5 MP stroke +4 w +DO +0 sg + 699 3575 mt 699 344 L + 699 344 mt 699 344 L +2087 3575 mt 2087 344 L +2087 344 mt 2087 344 L +3476 3575 mt 3476 344 L +3476 344 mt 3476 344 L +4865 3575 mt 4865 344 L +4865 344 mt 4865 344 L + 699 3575 mt 4865 3575 L +4865 3575 mt 4865 3575 L + 699 2928 mt 4865 2928 L +4865 2928 mt 4865 2928 L + 699 2282 mt 4865 2282 L +4865 2282 mt 4865 2282 L + 699 1636 mt 4865 1636 L +4865 1636 mt 4865 1636 L + 699 990 mt 4865 990 L +4865 990 mt 4865 990 L + 699 344 mt 4865 344 L +4865 344 mt 4865 344 L +SO +6 w + 699 3575 mt 4865 3575 L + 699 344 mt 4865 344 L + 699 3575 mt 699 344 L +4865 3575 mt 4865 344 L + 699 3575 mt 4865 3575 L + 699 3575 mt 699 344 L + 699 3575 mt 699 3554 L + 699 344 mt 699 364 L +DO + 699 3575 mt 699 344 L + 699 344 mt 699 344 L +SO + 699 3575 mt 699 3533 L + 699 344 mt 699 385 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 575 3817 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 761 3713 mt +(1) s +1117 3575 mt 1117 3554 L +1117 344 mt 1117 364 L +DO +1117 3575 mt 1117 344 L +1117 344 mt 1117 344 L +SO +1361 3575 mt 1361 3554 L +1361 344 mt 1361 364 L +DO +1361 3575 mt 1361 344 L +1361 344 mt 1361 344 L +SO +1535 3575 mt 1535 3554 L +1535 344 mt 1535 364 L +DO +1535 3575 mt 1535 344 L +1535 344 mt 1535 344 L +SO +1669 3575 mt 1669 3554 L +1669 344 mt 1669 364 L +DO +1669 3575 mt 1669 344 L +1669 344 mt 1669 344 L +SO +1779 3575 mt 1779 3554 L +1779 344 mt 1779 364 L +DO +1779 3575 mt 1779 344 L +1779 344 mt 1779 344 L +SO +1872 3575 mt 1872 3554 L +1872 344 mt 1872 364 L +DO +1872 3575 mt 1872 344 L +1872 344 mt 1872 344 L +SO +1953 3575 mt 1953 3554 L +1953 344 mt 1953 364 L +DO +1953 3575 mt 1953 344 L +1953 344 mt 1953 344 L +SO +2024 3575 mt 2024 3554 L +2024 344 mt 2024 364 L +DO +2024 3575 mt 2024 344 L +2024 344 mt 2024 344 L +SO +2087 3575 mt 2087 3554 L +2087 344 mt 2087 364 L +DO +2087 3575 mt 2087 344 L +2087 344 mt 2087 344 L +SO +2087 3575 mt 2087 3533 L +2087 344 mt 2087 385 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +1963 3817 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + +2149 3713 mt +(2) s +2505 3575 mt 2505 3554 L +2505 344 mt 2505 364 L +DO +2505 3575 mt 2505 344 L +2505 344 mt 2505 344 L +SO +2750 3575 mt 2750 3554 L +2750 344 mt 2750 364 L +DO +2750 3575 mt 2750 344 L +2750 344 mt 2750 344 L +SO +2923 3575 mt 2923 3554 L +2923 344 mt 2923 364 L +DO +2923 3575 mt 2923 344 L +2923 344 mt 2923 344 L +SO +3058 3575 mt 3058 3554 L +3058 344 mt 3058 364 L +DO +3058 3575 mt 3058 344 L +3058 344 mt 3058 344 L +SO +3168 3575 mt 3168 3554 L +3168 344 mt 3168 364 L +DO +3168 3575 mt 3168 344 L +3168 344 mt 3168 344 L +SO +3261 3575 mt 3261 3554 L +3261 344 mt 3261 364 L +DO +3261 3575 mt 3261 344 L +3261 344 mt 3261 344 L +SO +3341 3575 mt 3341 3554 L +3341 344 mt 3341 364 L +DO +3341 3575 mt 3341 344 L +3341 344 mt 3341 344 L +SO +3412 3575 mt 3412 3554 L +3412 344 mt 3412 364 L +DO +3412 3575 mt 3412 344 L +3412 344 mt 3412 344 L +SO +3476 3575 mt 3476 3554 L +3476 344 mt 3476 364 L +DO +3476 3575 mt 3476 344 L +3476 344 mt 3476 344 L +SO +3476 3575 mt 3476 3533 L +3476 344 mt 3476 385 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3352 3817 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + +3538 3713 mt +(3) s +3894 3575 mt 3894 3554 L +3894 344 mt 3894 364 L +DO +3894 3575 mt 3894 344 L +3894 344 mt 3894 344 L +SO +4138 3575 mt 4138 3554 L +4138 344 mt 4138 364 L +DO +4138 3575 mt 4138 344 L +4138 344 mt 4138 344 L +SO +4312 3575 mt 4312 3554 L +4312 344 mt 4312 364 L +DO +4312 3575 mt 4312 344 L +4312 344 mt 4312 344 L +SO +4446 3575 mt 4446 3554 L +4446 344 mt 4446 364 L +DO +4446 3575 mt 4446 344 L +4446 344 mt 4446 344 L +SO +4556 3575 mt 4556 3554 L +4556 344 mt 4556 364 L +DO +4556 3575 mt 4556 344 L +4556 344 mt 4556 344 L +SO +4649 3575 mt 4649 3554 L +4649 344 mt 4649 364 L +DO +4649 3575 mt 4649 344 L +4649 344 mt 4649 344 L +SO +4730 3575 mt 4730 3554 L +4730 344 mt 4730 364 L +DO +4730 3575 mt 4730 344 L +4730 344 mt 4730 344 L +SO +4801 3575 mt 4801 3554 L +4801 344 mt 4801 364 L +DO +4801 3575 mt 4801 344 L +4801 344 mt 4801 344 L +SO +4865 3575 mt 4865 3554 L +4865 344 mt 4865 364 L +DO +4865 3575 mt 4865 344 L +4865 344 mt 4865 344 L +SO +4865 3575 mt 4865 3533 L +4865 344 mt 4865 385 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +4741 3817 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + +4927 3713 mt +(4) s + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 740 3575 L +4865 3575 mt 4823 3575 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 289 3637 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 475 3533 mt +(-10) s + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 2928 mt 740 2928 L +4865 2928 mt 4823 2928 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 289 2990 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 475 2886 mt +(-8) s + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 2282 mt 740 2282 L +4865 2282 mt 4823 2282 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 289 2344 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 475 2240 mt +(-6) s + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 1636 mt 740 1636 L +4865 1636 mt 4823 1636 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 289 1698 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 475 1594 mt +(-4) s + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 990 mt 740 990 L +4865 990 mt 4823 990 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 289 1052 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 475 948 mt +(-2) s + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 344 mt 740 344 L +4865 344 mt 4823 344 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 289 406 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 475 302 mt +(0) s + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +DO + 699 3575 mt 699 3575 L + 699 3575 mt 699 3575 L +SO + 699 3575 mt 4865 3575 L + 699 344 mt 4865 344 L + 699 3575 mt 699 344 L +4865 3575 mt 4865 344 L +gs 699 344 4167 3232 rc +gr + +/c8 { 0.000000 0.000000 1.000000 sr} bdef +c8 + 36 36 1400 1303 FO + 36 36 1818 1778 FO + 36 36 2236 2196 FO + 36 36 2654 2593 FO + 36 36 3072 2984 FO + 36 36 3490 3373 FO +gs 699 344 4167 3232 rc +/c9 { 0.000000 0.500000 0.000000 sr} bdef +c9 +418 410 418 411 418 410 418 411 418 410 1400 1345 6 MP stroke +gr + +c9 +/c10 { 1.000000 0.000000 0.000000 sr} bdef +c10 + 36 36 1400 951 FO + 36 36 1818 1148 FO + 36 36 2236 1343 FO + 36 36 2654 1538 FO + 36 36 3072 1732 FO + 36 36 3490 1927 FO +gs 699 344 4167 3232 rc +418 195 418 195 418 195 418 195 418 195 1400 952 6 MP stroke +gr + +0 sg +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +1968 3995 mt +(Number of intervals N) s + 197 2674 mt -90 rotate +(Discretization Error) s +90 rotate +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 216 FMSR + +1251 201 mt +(m=22, s=10 Cylindrical Problem) s +-279 260 2841 1060 2 MP stroke +57 -53 -119 49 2624 1324 3 MP +PP +62 4 57 -53 -119 49 2624 1324 4 MP stroke +0 62 57 -115 2505 1373 3 MP +PP +-57 53 0 62 57 -115 2505 1373 4 MP stroke +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +2843 947 mt +(p=1, N) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 132 FMSR + +3342 1031 mt +(G) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3444 947 mt +(=4) s +466 -203 1872 2654 2 MP stroke +-71 31 128 -7 2281 2427 3 MP +PP +-57 -24 -71 31 128 -7 2281 2427 4 MP stroke +21 -58 -92 89 2409 2420 3 MP +PP +71 -31 21 -58 -92 89 2409 2420 4 MP stroke +1082 2642 mt +(p=3, N) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 132 FMSR + +1581 2726 mt +(G) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +1683 2642 mt +(=6) s + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/multigrid/docs/grid.tex b/multigrid/docs/grid.tex new file mode 100644 index 0000000..13b90a0 --- /dev/null +++ b/multigrid/docs/grid.tex @@ -0,0 +1,70 @@ +% +% @file grid.tex +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% + +\hoffset=-2truecm +\voffset=-2truecm +\special{papersize=9cm,5cm} +\hsize=9truecm +\vsize=5truecm + +\parindent=0pt +\nopagenumbers + +\input pstricks + +\pspicture(4,4) + \psline[linestyle=solid](0,0)(8,0) + \psline[linestyle=dotted](0,1)(8,1) + \psline[linestyle=solid](0,2)(8,2) + \psline[linestyle=dotted](0,3)(8,3) + \psline[linestyle=solid](0,4)(8,4) + + \psline[linestyle=solid](0,0)(0,4) + \psline[linestyle=dotted](1,0)(1,4) + \psline[linestyle=solid](2,0)(2,4) + \psline[linestyle=dotted](3,0)(3,4) + \psline[linestyle=solid](4,0)(4,4) + \psline[linestyle=dotted](5,0)(5,4) + \psline[linestyle=solid](6,0)(6,4) + \psline[linestyle=dotted](7,0)(7,4) + \psline[linestyle=solid](8,0)(8,4) + + \psdots[dotstyle=square,dotscale=2](0,0)(2,0)(4,0)(6,0)(8,0) + \psdots[dotstyle=square,dotscale=2](0,2)(2,2)(4,2)(6,2)(8,2) + \psdots[dotstyle=square,dotscale=2](0,4)(2,4)(4,4)(6,4)(8,4) + + \psdots[dotstyle=*](0,0)(1,0)(2,0)(3,0)(4,0)(5,0)(6,0)(7,0)(8,0) + \psdots[dotstyle=*](0,1)(1,1)(2,1)(3,1)(4,1)(5,1)(6,1)(7,1)(8,1) + \psdots[dotstyle=*](0,2)(1,2)(2,2)(3,2)(4,2)(5,2)(6,2)(7,2)(8,2) + \psdots[dotstyle=*](0,3)(1,3)(2,3)(3,3)(4,3)(5,3)(6,3)(7,3)(8,3) + \psdots[dotstyle=*](0,4)(1,4)(2,4)(3,4)(4,4)(5,4)(6,4)(7,4)(8,4) + +\endpspicture + + +\bye + + diff --git a/multigrid/docs/linear_mg2d.eps b/multigrid/docs/linear_mg2d.eps new file mode 100644 index 0000000..43e69dd --- /dev/null +++ b/multigrid/docs/linear_mg2d.eps @@ -0,0 +1,1387 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 3.4.6-2.10-desktop #1 SMP PREEMPT Thu Jul 26 09:36:26 UTC 2012 (641c197) x86_64. +%%Title: /home/ttran/HLST/2012/report-4/linear_mg2d.eps +%%CreationDate: 12/21/2012 09:47:45 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: 24 70 568 771 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 24 70 568 771 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 0288 9252 csm + + 0 0 6539 8402 rc +87 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 6540 8403 rf +6 w +0 2867 5068 0 0 -2867 850 3497 4 MP +PP +-5068 0 0 2867 5068 0 0 -2867 850 3497 5 MP stroke +4 w +DO +0 sg + 850 3497 mt 850 630 L + 850 630 mt 850 630 L +1356 3497 mt 1356 630 L +1356 630 mt 1356 630 L +1863 3497 mt 1863 630 L +1863 630 mt 1863 630 L +2370 3497 mt 2370 630 L +2370 630 mt 2370 630 L +2877 3497 mt 2877 630 L +2877 630 mt 2877 630 L +3384 3497 mt 3384 630 L +3384 630 mt 3384 630 L +3890 3497 mt 3890 630 L +3890 630 mt 3890 630 L +4397 3497 mt 4397 630 L +4397 630 mt 4397 630 L +4904 3497 mt 4904 630 L +4904 630 mt 4904 630 L +5411 3497 mt 5411 630 L +5411 630 mt 5411 630 L +5918 3497 mt 5918 630 L +5918 630 mt 5918 630 L + 850 3497 mt 5918 3497 L +5918 3497 mt 5918 3497 L + 850 2780 mt 5918 2780 L +5918 2780 mt 5918 2780 L + 850 2063 mt 5918 2063 L +5918 2063 mt 5918 2063 L + 850 1346 mt 5918 1346 L +5918 1346 mt 5918 1346 L + 850 630 mt 5918 630 L +5918 630 mt 5918 630 L +SO +6 w + 850 3497 mt 5918 3497 L + 850 630 mt 5918 630 L + 850 3497 mt 850 630 L +5918 3497 mt 5918 630 L + 850 3497 mt 5918 3497 L + 850 3497 mt 850 630 L + 850 3497 mt 850 3446 L + 850 630 mt 850 680 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 804 3687 mt +(0) s +1356 3497 mt 1356 3446 L +1356 630 mt 1356 680 L +1310 3687 mt +(1) s +1863 3497 mt 1863 3446 L +1863 630 mt 1863 680 L +1817 3687 mt +(2) s +2370 3497 mt 2370 3446 L +2370 630 mt 2370 680 L +2324 3687 mt +(3) s +2877 3497 mt 2877 3446 L +2877 630 mt 2877 680 L +2831 3687 mt +(4) s +3384 3497 mt 3384 3446 L +3384 630 mt 3384 680 L +3338 3687 mt +(5) s +3890 3497 mt 3890 3446 L +3890 630 mt 3890 680 L +3844 3687 mt +(6) s +4397 3497 mt 4397 3446 L +4397 630 mt 4397 680 L +4351 3687 mt +(7) s +4904 3497 mt 4904 3446 L +4904 630 mt 4904 680 L +4858 3687 mt +(8) s +5411 3497 mt 5411 3446 L +5411 630 mt 5411 680 L +5365 3687 mt +(9) s +5918 3497 mt 5918 3446 L +5918 630 mt 5918 680 L +5825 3687 mt +(10) s + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 900 3497 L +5918 3497 mt 5867 3497 L + 440 3559 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 626 3455 mt +(-20) s + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 2780 mt 900 2780 L +5918 2780 mt 5867 2780 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 440 2842 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 626 2738 mt +(-15) s + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 2063 mt 900 2063 L +5918 2063 mt 5867 2063 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 440 2125 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 626 2021 mt +(-10) s + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 1346 mt 900 1346 L +5918 1346 mt 5867 1346 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 440 1408 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 626 1304 mt +(-5) s + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 630 mt 900 630 L +5918 630 mt 5867 630 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 440 692 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 626 588 mt +(0) s + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +DO + 850 3497 mt 850 3497 L + 850 3497 mt 850 3497 L +SO + 850 3497 mt 5918 3497 L + 850 630 mt 5918 630 L + 850 3497 mt 850 630 L +5918 3497 mt 5918 630 L +gs 850 630 5069 2868 rc +/c8 { 0.000000 0.000000 1.000000 sr} bdef +c8 +507 178 507 177 507 171 507 176 506 181 507 182 507 175 507 174 +507 198 506 116 850 975 11 MP stroke +gr + +c8 +gs 777 902 5215 1875 rc + 36 36 850 975 FO + 36 36 1356 1091 FO + 36 36 1863 1289 FO + 36 36 2370 1463 FO + 36 36 2877 1638 FO + 36 36 3384 1820 FO + 36 36 3890 2001 FO + 36 36 4397 2177 FO + 36 36 4904 2348 FO + 36 36 5411 2525 FO + 36 36 5918 2703 FO +gr + +gs 850 630 5069 2868 rc +gr + +0 sg +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3038 3865 mt +(Iterations) s + 348 2682 mt -90 rotate +(Norm of residual) s +90 rotate +1707 498 mt +(Linear Splines, V\(2,1\), relax=gs, KX=3, KY=3) s +gs 850 630 5069 2868 rc +/c9 { 0.000000 0.498039 0.000000 sr} bdef +c9 +507 178 507 182 507 175 507 177 506 183 507 191 507 185 507 198 +507 215 506 122 850 932 11 MP stroke +gr + +c9 +gs 777 859 5215 1953 rc + 36 36 850 932 FO + 36 36 1356 1054 FO + 36 36 1863 1269 FO + 36 36 2370 1467 FO + 36 36 2877 1652 FO + 36 36 3384 1843 FO + 36 36 3890 2026 FO + 36 36 4397 2203 FO + 36 36 4904 2378 FO + 36 36 5411 2560 FO + 36 36 5918 2738 FO +gr + +gs 850 630 5069 2868 rc +/c10 { 0.847059 0.160784 0.000000 sr} bdef +c10 +507 186 507 185 507 189 507 185 506 185 507 193 507 205 507 214 +507 222 506 137 850 889 11 MP stroke +gr + +c10 +gs 777 816 5215 2048 rc + 36 36 850 889 FO + 36 36 1356 1026 FO + 36 36 1863 1248 FO + 36 36 2370 1462 FO + 36 36 2877 1667 FO + 36 36 3384 1860 FO + 36 36 3890 2045 FO + 36 36 4397 2230 FO + 36 36 4904 2419 FO + 36 36 5411 2604 FO + 36 36 5918 2790 FO +gr + +gs 850 630 5069 2868 rc +/c11 { 0.000000 0.749020 0.749020 sr} bdef +c11 +507 216 507 214 507 231 507 224 506 253 507 210 507 205 507 215 +507 223 506 179 850 849 11 MP stroke +gr + +c11 +gs 777 776 5215 2317 rc + 36 36 850 849 FO + 36 36 1356 1028 FO + 36 36 1863 1251 FO + 36 36 2370 1466 FO + 36 36 2877 1671 FO + 36 36 3384 1881 FO + 36 36 3890 2134 FO + 36 36 4397 2358 FO + 36 36 4904 2589 FO + 36 36 5411 2803 FO + 36 36 5918 3019 FO +gr + +gs 850 630 5069 2868 rc +gr + +1 sg +0 2867 5068 0 0 -2867 850 7478 4 MP +PP +-5068 0 0 2867 5068 0 0 -2867 850 7478 5 MP stroke +4 w +DO +0 sg + 850 7478 mt 850 4611 L + 850 4611 mt 850 4611 L +1356 7478 mt 1356 4611 L +1356 4611 mt 1356 4611 L +1863 7478 mt 1863 4611 L +1863 4611 mt 1863 4611 L +2370 7478 mt 2370 4611 L +2370 4611 mt 2370 4611 L +2877 7478 mt 2877 4611 L +2877 4611 mt 2877 4611 L +3384 7478 mt 3384 4611 L +3384 4611 mt 3384 4611 L +3890 7478 mt 3890 4611 L +3890 4611 mt 3890 4611 L +4397 7478 mt 4397 4611 L +4397 4611 mt 4397 4611 L +4904 7478 mt 4904 4611 L +4904 4611 mt 4904 4611 L +5411 7478 mt 5411 4611 L +5411 4611 mt 5411 4611 L +5918 7478 mt 5918 4611 L +5918 4611 mt 5918 4611 L + 850 7478 mt 5918 7478 L +5918 7478 mt 5918 7478 L + 850 6761 mt 5918 6761 L +5918 6761 mt 5918 6761 L + 850 6044 mt 5918 6044 L +5918 6044 mt 5918 6044 L + 850 5327 mt 5918 5327 L +5918 5327 mt 5918 5327 L + 850 4611 mt 5918 4611 L +5918 4611 mt 5918 4611 L +SO +6 w + 850 7478 mt 5918 7478 L + 850 4611 mt 5918 4611 L + 850 7478 mt 850 4611 L +5918 7478 mt 5918 4611 L + 850 7478 mt 5918 7478 L + 850 7478 mt 850 4611 L + 850 7478 mt 850 7427 L + 850 4611 mt 850 4661 L + 804 7668 mt +(0) s +1356 7478 mt 1356 7427 L +1356 4611 mt 1356 4661 L +1310 7668 mt +(1) s +1863 7478 mt 1863 7427 L +1863 4611 mt 1863 4661 L +1817 7668 mt +(2) s +2370 7478 mt 2370 7427 L +2370 4611 mt 2370 4661 L +2324 7668 mt +(3) s +2877 7478 mt 2877 7427 L +2877 4611 mt 2877 4661 L +2831 7668 mt +(4) s +3384 7478 mt 3384 7427 L +3384 4611 mt 3384 4661 L +3338 7668 mt +(5) s +3890 7478 mt 3890 7427 L +3890 4611 mt 3890 4661 L +3844 7668 mt +(6) s +4397 7478 mt 4397 7427 L +4397 4611 mt 4397 4661 L +4351 7668 mt +(7) s +4904 7478 mt 4904 7427 L +4904 4611 mt 4904 4661 L +4858 7668 mt +(8) s +5411 7478 mt 5411 7427 L +5411 4611 mt 5411 4661 L +5365 7668 mt +(9) s +5918 7478 mt 5918 7427 L +5918 4611 mt 5918 4661 L +5825 7668 mt +(10) s + 850 7478 mt 875 7478 L +5918 7478 mt 5892 7478 L +DO + 850 7478 mt 5918 7478 L +5918 7478 mt 5918 7478 L +SO + 850 7478 mt 900 7478 L +5918 7478 mt 5867 7478 L + 502 7540 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 688 7436 mt +(-6) s + 850 7262 mt 875 7262 L +5918 7262 mt 5892 7262 L +DO + 850 7262 mt 5918 7262 L +5918 7262 mt 5918 7262 L +SO + 850 7136 mt 875 7136 L +5918 7136 mt 5892 7136 L +DO + 850 7136 mt 5918 7136 L +5918 7136 mt 5918 7136 L +SO + 850 7046 mt 875 7046 L +5918 7046 mt 5892 7046 L +DO + 850 7046 mt 5918 7046 L +5918 7046 mt 5918 7046 L +SO + 850 6977 mt 875 6977 L +5918 6977 mt 5892 6977 L +DO + 850 6977 mt 5918 6977 L +5918 6977 mt 5918 6977 L +SO + 850 6920 mt 875 6920 L +5918 6920 mt 5892 6920 L +DO + 850 6920 mt 5918 6920 L +5918 6920 mt 5918 6920 L +SO + 850 6872 mt 875 6872 L +5918 6872 mt 5892 6872 L +DO + 850 6872 mt 5918 6872 L +5918 6872 mt 5918 6872 L +SO + 850 6830 mt 875 6830 L +5918 6830 mt 5892 6830 L +DO + 850 6830 mt 5918 6830 L +5918 6830 mt 5918 6830 L +SO + 850 6794 mt 875 6794 L +5918 6794 mt 5892 6794 L +DO + 850 6794 mt 5918 6794 L +5918 6794 mt 5918 6794 L +SO + 850 6761 mt 875 6761 L +5918 6761 mt 5892 6761 L +DO + 850 6761 mt 5918 6761 L +5918 6761 mt 5918 6761 L +SO + 850 6761 mt 900 6761 L +5918 6761 mt 5867 6761 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 502 6823 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 688 6719 mt +(-5) s + 850 6545 mt 875 6545 L +5918 6545 mt 5892 6545 L +DO + 850 6545 mt 5918 6545 L +5918 6545 mt 5918 6545 L +SO + 850 6419 mt 875 6419 L +5918 6419 mt 5892 6419 L +DO + 850 6419 mt 5918 6419 L +5918 6419 mt 5918 6419 L +SO + 850 6329 mt 875 6329 L +5918 6329 mt 5892 6329 L +DO + 850 6329 mt 5918 6329 L +5918 6329 mt 5918 6329 L +SO + 850 6260 mt 875 6260 L +5918 6260 mt 5892 6260 L +DO + 850 6260 mt 5918 6260 L +5918 6260 mt 5918 6260 L +SO + 850 6203 mt 875 6203 L +5918 6203 mt 5892 6203 L +DO + 850 6203 mt 5918 6203 L +5918 6203 mt 5918 6203 L +SO + 850 6155 mt 875 6155 L +5918 6155 mt 5892 6155 L +DO + 850 6155 mt 5918 6155 L +5918 6155 mt 5918 6155 L +SO + 850 6113 mt 875 6113 L +5918 6113 mt 5892 6113 L +DO + 850 6113 mt 5918 6113 L +5918 6113 mt 5918 6113 L +SO + 850 6077 mt 875 6077 L +5918 6077 mt 5892 6077 L +DO + 850 6077 mt 5918 6077 L +5918 6077 mt 5918 6077 L +SO + 850 6044 mt 875 6044 L +5918 6044 mt 5892 6044 L +DO + 850 6044 mt 5918 6044 L +5918 6044 mt 5918 6044 L +SO + 850 6044 mt 900 6044 L +5918 6044 mt 5867 6044 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 502 6106 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 688 6002 mt +(-4) s + 850 5828 mt 875 5828 L +5918 5828 mt 5892 5828 L +DO + 850 5828 mt 5918 5828 L +5918 5828 mt 5918 5828 L +SO + 850 5702 mt 875 5702 L +5918 5702 mt 5892 5702 L +DO + 850 5702 mt 5918 5702 L +5918 5702 mt 5918 5702 L +SO + 850 5612 mt 875 5612 L +5918 5612 mt 5892 5612 L +DO + 850 5612 mt 5918 5612 L +5918 5612 mt 5918 5612 L +SO + 850 5543 mt 875 5543 L +5918 5543 mt 5892 5543 L +DO + 850 5543 mt 5918 5543 L +5918 5543 mt 5918 5543 L +SO + 850 5486 mt 875 5486 L +5918 5486 mt 5892 5486 L +DO + 850 5486 mt 5918 5486 L +5918 5486 mt 5918 5486 L +SO + 850 5438 mt 875 5438 L +5918 5438 mt 5892 5438 L +DO + 850 5438 mt 5918 5438 L +5918 5438 mt 5918 5438 L +SO + 850 5397 mt 875 5397 L +5918 5397 mt 5892 5397 L +DO + 850 5397 mt 5918 5397 L +5918 5397 mt 5918 5397 L +SO + 850 5360 mt 875 5360 L +5918 5360 mt 5892 5360 L +DO + 850 5360 mt 5918 5360 L +5918 5360 mt 5918 5360 L +SO + 850 5327 mt 875 5327 L +5918 5327 mt 5892 5327 L +DO + 850 5327 mt 5918 5327 L +5918 5327 mt 5918 5327 L +SO + 850 5327 mt 900 5327 L +5918 5327 mt 5867 5327 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 502 5389 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 688 5285 mt +(-3) s + 850 5111 mt 875 5111 L +5918 5111 mt 5892 5111 L +DO + 850 5111 mt 5918 5111 L +5918 5111 mt 5918 5111 L +SO + 850 4985 mt 875 4985 L +5918 4985 mt 5892 4985 L +DO + 850 4985 mt 5918 4985 L +5918 4985 mt 5918 4985 L +SO + 850 4896 mt 875 4896 L +5918 4896 mt 5892 4896 L +DO + 850 4896 mt 5918 4896 L +5918 4896 mt 5918 4896 L +SO + 850 4826 mt 875 4826 L +5918 4826 mt 5892 4826 L +DO + 850 4826 mt 5918 4826 L +5918 4826 mt 5918 4826 L +SO + 850 4770 mt 875 4770 L +5918 4770 mt 5892 4770 L +DO + 850 4770 mt 5918 4770 L +5918 4770 mt 5918 4770 L +SO + 850 4722 mt 875 4722 L +5918 4722 mt 5892 4722 L +DO + 850 4722 mt 5918 4722 L +5918 4722 mt 5918 4722 L +SO + 850 4680 mt 875 4680 L +5918 4680 mt 5892 4680 L +DO + 850 4680 mt 5918 4680 L +5918 4680 mt 5918 4680 L +SO + 850 4643 mt 875 4643 L +5918 4643 mt 5892 4643 L +DO + 850 4643 mt 5918 4643 L +5918 4643 mt 5918 4643 L +SO + 850 4611 mt 875 4611 L +5918 4611 mt 5892 4611 L +DO + 850 4611 mt 5918 4611 L +5918 4611 mt 5918 4611 L +SO + 850 4611 mt 900 4611 L +5918 4611 mt 5867 4611 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + + 502 4673 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 112 FMSR + + 688 4569 mt +(-2) s + 850 7478 mt 5918 7478 L + 850 4611 mt 5918 4611 L + 850 7478 mt 850 4611 L +5918 7478 mt 5918 4611 L +gs 850 4611 5069 2868 rc +c8 +507 0 507 0 507 0 507 0 506 0 507 0 507 11 507 327 +507 978 506 1082 850 5005 11 MP stroke +gr + +c8 +gs 777 4932 5215 2545 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 819 4987 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1325 6069 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1832 7047 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2339 7374 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2846 7385 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3353 7385 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3859 7385 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4366 7385 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4873 7385 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5380 7385 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5887 7385 13 MP +DP +gr + +gs 850 4611 5069 2868 rc +gr + +0 sg +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +3038 7846 mt +(Iterations) s + 410 6546 mt -90 rotate +(Norm of error) s +90 rotate +gs 850 4611 5069 2868 rc +c9 +507 0 507 0 507 0 507 0 506 0 507 0 507 2 507 105 +507 785 506 1074 850 5005 11 MP stroke +gr + +c9 +gs 777 4932 5215 2113 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 819 4987 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1325 6061 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1832 6846 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2339 6951 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2846 6953 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3353 6953 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3859 6953 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4366 6953 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4873 6953 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5380 6953 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5887 6953 13 MP +DP +gr + +gs 850 4611 5069 2868 rc +c10 +507 0 507 0 507 0 507 0 506 0 507 0 507 0 507 23 +507 467 506 1045 850 5005 11 MP stroke +gr + +c10 +gs 777 4932 5215 1682 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 819 4987 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1325 6032 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1832 6499 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2339 6522 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2846 6522 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3353 6522 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3859 6522 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4366 6522 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4873 6522 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5380 6522 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5887 6522 13 MP +DP +gr + +gs 850 4611 5069 2868 rc +c11 +507 0 507 0 507 0 507 0 506 0 507 0 507 0 507 2 +507 155 506 947 850 5005 11 MP stroke +gr + +c11 +gs 777 4932 5215 1251 rc +0 j +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 819 4987 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1325 5934 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 1832 6089 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2339 6091 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 2846 6091 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3353 6091 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 3859 6091 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4366 6091 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 4873 6091 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5380 6091 13 MP +DP +-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 +21 0 10 18 10 -18 21 0 5887 6091 13 MP +DP +gr + +gs 850 4611 5069 2868 rc +gr + +0 sg +-217 214 2943 5814 2 MP stroke +56 -55 -117 52 2787 6031 3 MP +PP +61 3 56 -55 -117 52 2787 6031 4 MP stroke +2 62 54 -117 2670 6083 3 MP +PP +-56 55 2 62 54 -117 2670 6083 4 MP stroke +2945 5777 mt +(16 X 16) s +-146 155 3217 6316 2 MP stroke +53 -56 -115 56 3133 6471 3 MP +PP +62 0 53 -56 -115 56 3133 6471 4 MP stroke +4 62 49 -118 3018 6527 3 MP +PP +-53 56 4 62 49 -118 3018 6527 4 MP stroke +3219 6279 mt +(32 X 32) s +-156 159 3906 6740 2 MP stroke +54 -56 -116 54 3812 6901 3 MP +PP +62 2 54 -56 -116 54 3812 6901 4 MP stroke +3 61 51 -117 3696 6955 3 MP +PP +-54 56 3 61 51 -117 3696 6955 4 MP stroke +3908 6703 mt +(64 X 64) s +-166 164 4208 7181 2 MP stroke +55 -55 -117 52 4104 7348 3 MP +PP +62 3 55 -55 -117 52 4104 7348 4 MP stroke +1 61 54 -116 3987 7400 3 MP +PP +-55 55 1 61 54 -116 3987 7400 4 MP stroke +4210 7144 mt +(128 X 128) s + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/multigrid/docs/linear_mg2d.fig b/multigrid/docs/linear_mg2d.fig new file mode 100644 index 0000000..f613e20 Binary files /dev/null and b/multigrid/docs/linear_mg2d.fig differ diff --git a/multigrid/docs/mg_gbs.pdf b/multigrid/docs/mg_gbs.pdf new file mode 100644 index 0000000..8b77358 Binary files /dev/null and b/multigrid/docs/mg_gbs.pdf differ diff --git a/multigrid/docs/mg_gbs.tex b/multigrid/docs/mg_gbs.tex new file mode 100644 index 0000000..4c1eec0 --- /dev/null +++ b/multigrid/docs/mg_gbs.tex @@ -0,0 +1,1623 @@ +% +% @file mg_gbs.tex +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +\documentclass[a4paper]{article} +\usepackage{linuxdoc-sgml} +\usepackage{graphicx} +\usepackage{hyperref} +%\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{mathtools} +\usepackage{placeins} +\usepackage{multirow} +\usepackage{latexsym} +\usepackage{listings} +\usepackage{xcolor} +\usepackage{rotating} +\def\RepFigures{FIGURES_mg_gbs} + + +\title{\tt Multigrid Solver for GBS} +\author{Trach-Minh Tran, Federico Halpern\\CRPP/EPFL} +\date{v1.0, June 2015} + +\begin{document} +\maketitle +\tableofcontents + +\section{The PDE} +The PDE considered is +\begin{equation} +\label{eq:pde} + \left[\frac{\partial^2}{\partial x^2} + \tau\frac{\partial^2}{\partial + x\partial y} + \frac{\partial^2}{\partial y^2} - a(x,y)\right] u(x,y) = + f(x,y), \qquad 0\le x \le L_x, \; 0\le y \le L_y. +\end{equation} +On the four boundaries, homogeneous Dirichlet boundary condition +$u=0$ as well as Neumann boundary condition +$\partial u/\partial n=0$ can be applied. + +\section{Discretization} +The grid points $(x_i,y_j)$ are defined by +\begin{equation} + \begin{split} + x_i &= ih_x = i\frac{L_x}{N_x}, \quad i=0,\ldots, N_x \\ + y_j &= jh_y = j\frac{L_y}{N_y}, \quad j=0,\ldots, N_y \\ + \end{split} +\end{equation} + +Second order Finite Difference discretization of Eq.\ref{eq:pde} leads +to the following 9-point stencil + +\begin{equation} +\label{eq:stencil} + S_{ij} = \frac{1}{h_x^2} + \begin{bmatrix} + -\tau\alpha/4 & \alpha^2 & \tau\alpha/4 \\ + 1 & -2(1+\alpha^2)-h_x^2a_{ij} & 1 \\ + \tau\alpha/4 & \alpha^2 &-\tau\alpha/4 \\ + \end{bmatrix} +, \qquad \mbox{where $\alpha=h_x/h_y$}. +\end{equation} + +Note that the mesh aspect ratio $\alpha$ results in the same stencil +for the \emph{anisotropic} Poisson equation with $h_x=h_y$: +\begin{equation} + \frac{\partial^2 u}{\partial x^2} + \alpha^2 + \frac{\partial^2 u}{\partial y^2} = f. +\end{equation} + +It is shown in \cite[p.~119]{Briggs} that this anisotropy can degrade +the performance of multigrid using standard relaxations such as +Gauss-Seidel or damped Jacobi can be strongly degraded. + +With the \emph{lexicographic} numbering +\begin{equation} + I = j(N_x+1) + i+1, +\end{equation} +for the $(N_x+1)(N_y+1)$ nodes, the discretized problem can be +expressed as a matrix problem +\begin{equation} +\label{eq:matrix} + \mathbf{Au} = \mathbf{f}, +\end{equation} +where $\mathbf{A}$ is a 9-diagonal matrix, assembled using the stencil defined +above. Homogeneous Dirichlet boundary condition can be imposed, for example, on +the face $j=0$ simply by \emph{clearing} the matrix rows and columns +$1,2,\ldots, N_x+1$, and setting the diagonal terms to 1. + +Neumann boundary condition $\partial u/\partial x=0$ at +the face $i=0$, can be simply implemented by imposing +$u_{-1j}=u_{1j}$. The stencil for the boundary nodes $(0,j)$ +can thus be modified as + +\begin{equation} + S_{0j} = \frac{1}{h_x^2} + \begin{bmatrix} + 0 &\alpha^2 & 0 \\ + 0 &-2(1+\alpha^2)-h_x^2a_{0j} & 2 \\ + 0 &\alpha^2 & 0 \\ + \end{bmatrix} +. +\end{equation} + +Two model problems are considered in this report: +\begin{description} +\item[\texttt{\textbf{DDDD}} problem:] Homogeneous Dirichlet BC + at all the 4 boundaries. The \emph{analytic solution} is + \begin{equation} + u(x,y) = \sin\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y}, + \qquad \mbox{where $k_x$, $k_y$ are positive integers}. + \end{equation} +\item[\texttt{\textbf{NNDD}} problem:] Neumann boundary conditions + $\partial u/\partial x=0$ at $x=0$ and $x=L_x$, homogeneous + Dirichlet BC at $y=0$ and $y=L_y$. The \emph{analytic solution} is + \begin{equation} + u(x,y) = \cos\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y}, + \qquad \mbox{where $k_x$, $k_y$ are positive integers}. + \end{equation} +\end{description} +In both problems, $a$ depends only on $x$: +\begin{equation} + \label{eq:density} + a(x,y)= \exp\left[-\frac{(x-L_x/3)^2}{(L_x/2)^2}\right]. +\end{equation} + +The sparse direct solver MUMPS \cite{MUMPS} is used to solve (\ref{eq:matrix}) in +order to check the convergence of the schema described +above. Fig.\ref{fig:convergence} shows the expected \emph{quadratic} +convergence of the error with respect to $h_x$ with fixed $\alpha=h_x/h_y=0.5$ for +both problems, when the grid size is varied from $16\times 64$ to $512\times 2048$. + +\begin{figure}[hbt] + \centering + \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/convergence} + \caption{Convergence of the error + $\| u_{calc}- u_{anal}\|_\infty$ wrt the number of intervals + in the $x$ direction $N_x$ for $L_x=100$, $L_y=800$, $k_x=k_y=4$, $\tau=1$ and $N_y=4N_x$.} + \label{fig:convergence} +\end{figure} + +\section{Multigrid $V$-cycle} +\label{sec-mgProc} +Given an approximate $\mathbf{u}^h$ and right hand side $\mathbf{f}^h$ +defined at some grid level represented by the grid spacing $h$, the +following MG $V$-cycle procedure +\begin{equation*} + \boxed{\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{f}^h)} +\end{equation*} +will compute a \emph{new} $\mathbf{u}^h$. It is defined recursively by the +following steps: + +\begin{enumerate} +\item If $h$ is the coarsest mesh size, + \begin{itemize} + \item Direct solve $\mathbf{A}^h\mathbf{u}^h=\mathbf{f}^h$ + \item Goto 3. + \end{itemize} +\item Else + \begin{itemize} + \item Relax $\mathbf{u}^h$ $\nu_1$ times. + \item $\mathbf{f}^{2h} \leftarrow {\mathbf{R}}(\mathbf{f}^h-\mathbf{A}^h\mathbf{u}^h)$. + \item $\mathbf{u}^{2h} \leftarrow MG^{2h}(\mathbf{u}^{2h},\mathbf{f}^{2h})$ $\mu$ times. + \item $\mathbf{u}^h\leftarrow + \mathbf{u}^h+{\mathbf{P}}\mathbf{u}^{2h}$. + \item Relax $\mathbf{u}^h$ $\nu_2$ times. + \end{itemize} +\item Return +\end{enumerate} + +In the procedure above, the operators $\mathbf{R}$ and $\mathbf{P}$ +denote respectively the \emph{restriction} (from \emph{fine} to +\emph{coarse} grid) and the \emph{prolongation} (from \emph{coarse} to +\emph{fine} grid). Notice that in this multigrid procedure, +$\mathbf{R}$ applies only to the \emph{right hand side} while $\mathbf{P}$ +applies only to the \emph{solution}. The standard $V(\nu_1,\nu_2)$ cycle is obtained by +calling this $MG^h$ procedure with $\mathbf{f}^h$ defined at the +\emph{finest} grid level, a guess $\mathbf{u}^h=0$ and $\mu=1$, while +$\mu=2$ results in the $W(\nu_1,\nu_2)$ cycle. + +Details on the grid coarsening, the inter-grid transfers and methods +of relaxation are given in the following. + +\subsection{Grid coarsening} +Let start with the one-dimensional \emph{fine} grid defined +by $x_i,\, i=0,\ldots,N$, assuming that $N$ is even. The next coarse grid +(with $N/2$ intervals) is obtained by simply discarding the grid +points with \emph{odd} indices. + +In order to get a \emph{smallest coarsest} grid (so that it is possible +to solve \emph{cheaply} the problem with a \emph{direct} method), $N$ should be +$N=N_c2^{L-1}$ where $L$ the total number of grid levels and $N_c$ is either +2 or a \emph{small odd} integer. As an example, the fine grid with $N=768$ can have +up to 9 grid levels, and a coarsest grid with 3 intervals, see +Table~\ref{tab:level}. + +\begin{table}[hbt] +\centering +\begin{tabular}{|l||r|r|r|r|r|}\hline +$L$ & \multicolumn{5}{c|}{$N$} \\ \hline + 1 & 2 & 3 & 5 & 7 & 9\\ + 2 & 4 & 6 & 10 & 14 & 18\\ + 3 & 8 & 12 & 20 & 28 & 36\\ + 4 & 16 & 24 & 40 & 56 & 72\\ + 5 & 32 & 48 & 80 & 112 & 144\\ + 6 & 64 & 96 & 160 & 224 & 288\\ + 7 & 128 & 192 & 320 & 448 & 576\\ + 8 & 256 & 384 & 640 & 896 & 1152\\ + 9 & 512 & 768 & 1280 & 1792 & 2304\\ + 10 & 1024 & 1536 & 2560 & 3584 & 4608\\ +\hline +\end{tabular} +\caption{Set of values of the \emph{fine} grid number of intervals $N$ + to obtain a \emph{coarsest} grid size at most equal to $9$ with + at most $10$ grid levels.} +\label{tab:level} +\end{table} + +For a two-dimensional grid, the same procedure is applied to both +dimensions. The result of such procedure is illustrated in +Fig.~\ref{fig:2d_coarsening}, for a $8\times4$ fine grid. + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.6\hsize]{grid} + \caption{A \emph{coarse} $4\times 2$ grid ($\Box$) obtained from a + $8\times4$ fine grid ($\bullet$).} + \label{fig:2d_coarsening} +\end{figure} + + +\subsection{Inter-grid transfers} +The one-dimensional \emph{prolongation} operator for the second-order FD +discretization is chosen the same as the one obtained with the +\emph{linear Finite Elements} \cite{MG1D}. For a $N=8$ grid, it can be +represented as a $9\times 5$ matrix given by +\begin{equation} + \label{eq:1dprolongation} + \mathbf{P} = + \left( + \begin{matrix} + 1 & 0 & 0 & 0 & 0 \\ + 1/2 & 1/2 & 0 & 0 & 0 \\ + 0 & 1 & 0 & 0 & 0 \\ + 0 & 1/2 & 1/2 & 0 & 0 \\ + 0 & 0 & 1 & 0 & 0 \\ + 0 & 0 & 1/2 & 1/2 & 0 \\ + 0 & 0 & 0 & 1 & 0 \\ + 0 & 0 & 0 & 1/2 & 1/2\\ + 0 & 0 & 0 & 0 & 1 \\ + \end{matrix}\right) +\end{equation} +The \emph{restriction} matrix $\mathbf{R}$ is simply related to $\mathbf{P}$ by +\begin{equation} + \label{eq:1drestriction} + \mathbf{R} = \frac{1}{2}\mathbf{P}^{T}=\frac{1}{2}\left( + \begin{matrix} + 1&1/2&0&0&0&0&0&0&0\\ + 0&1/2&1&1/2&0&0&0&0&0\\ + 0&0&0&1/2&1&1/2&0&0&0\\ + 0&0&0&0&0&1/2&1&1/2&0\\ + 0&0&0&0&0&0&0&1/2&1\\ + \end{matrix} + \right). +\end{equation} +For Dirichlet BC imposed on the \emph{left} boundary one has to set +${P}_{21}=R_{12}=0$, while for Dirichlet BC imposed on the \emph{right} +boundary, ${P}_{N,N/2+1}=R_{N/2+1,N}=0$. Notice that these inter-grid operators are +identical to the standard \emph{linear interpolation} and \emph{full + weighting} operators. + +For a two-dimensional problem, using the property that the grid is a +\emph{tensor product} of two one-dimensional grids, the +restriction of the right hand side $f^{h}_{ij}$ and the prolongation of +the solution $u^{2h}_{ij}$ can be computed as +\begin{equation} + \label{eq:2dintergrid} + \begin{split} + \mathbf{f}^{2h} &= \mathbf{R}^x \cdot \mathbf{f}^{h}\cdot (\mathbf{R}^y)^T \\ + \mathbf{u}^{h} &= \mathbf{\mathbf{P}}^x \cdot \mathbf{u}^{2h}\cdot + (\mathbf{\mathbf{P}}^y)^T \\ + \end{split} +\end{equation} + +\subsection{Relaxations} +Gauss-Seidel and damped Jacobi iterations are used as relaxation +methods in the multigrid $V$ cycle. In general, Gauss-Seidel +method is more efficient but much more difficult to +\emph{parallelize} than the Jacobi method. + +It should be noted that if $a(x,y)$ in Eq.~\ref{eq:pde} is +non-positive, both relaxations diverge! This can be seen by +considering the following one-dimensional FD equation with uniform +$a$: + +\begin{equation} + u_{j-1} -(2+ah^2)u_j + u_{j+1} = h^2f_j. +\end{equation} + +Using the damped Jacobi relaxation, the error +$\epsilon^{(m)}_j\equiv u_{anal}(x_j)-u_j^{(m)}$ at iteration $m+1$ is given by + +\begin{equation} + \epsilon^{(m+1)}_j = + \frac{\omega}{2+h^2a}(\epsilon^{(m)}_{j-1}+\epsilon^{(m)}_{j+1}) + +(1-\omega)\epsilon^{(m)}_j . +\end{equation} +Performing a \emph{local mode analysis} (or Fourier analysis) (see +\cite[p.~48]{Briggs}), assuming that +$\epsilon^{(m)}_j=A(m)e^{ij\theta}$, where $\theta$ is related to the +mode number $k$ by $\theta=2\pi k/N$, the +\emph{amplification factor} $G(\theta)$ is obtained as +\begin{equation} + \begin{split} + G(\theta) &= \frac{A(m+1)}{A(m)} = + \frac{2\omega}{2+h^2a}\cos\theta + (1-\omega) \\ + &= G_0(\theta) -\frac{ \omega h^2a}{2+h^2a}\cos\theta + \simeq G_0(\theta) -\frac{ \omega h^2a}{2}\cos\theta, \\ + G_0(\theta) &=1-2\omega\sin^2\frac{\theta}{2}, \\ + \end{split} +\end{equation} +where $G_0(\theta)$ is the amplification factor for +$a=0$. Note that $|G_0(\theta)|< 1$ for \emph{all} $\theta$ and $0<\omega< 1$ +but $\displaystyle{\max_{|\theta|<\pi}|G(\theta)|>1}$ if $a<0$. + +In Gauss-Seidel relaxation method, the errors evolve as: +\begin{equation} + \epsilon^{(m+1)}_j = \frac{\epsilon^{(m+1)}_{j-1}+\epsilon^{(m)}_{j+1}}{2+h^2a}. +\end{equation} +Applying again the same Fourier analysis yields the +following complex amplification factor: +\begin{equation} + \begin{split} + G(\theta) &\simeq G_0(\theta)\left(1-\frac{h^2a}{2-e^{-i\theta}}\right) \\ + G_0(\theta) &=\frac{e^{i\theta}}{2-e^{-i\theta}}, \quad + |G_0(\theta)| < 1 + \end{split} +\end{equation} +which show that the Gauss-Seidel relaxations \emph{diverge} if $a<0$. + +Notice that when $a>0$, the effect of $a$ on the amplification is +negligible and is thus ignored in the following two-dimensional +analysis. Applying the damped Jacobi scheme on the +stencil~(\ref{eq:stencil}), the error at the iteration $m+1$ is given +by: +\begin{equation} + \begin{split} + \epsilon^{(m+1)}_{ij} = & \frac{\omega}{2(1+\alpha^2)} \left[ + \epsilon^{(m)}_{i-1,j} + \epsilon^{(m)}_{i+1,j} + \alpha^2( + \epsilon^{(m)}_{i,j-1} + \epsilon^{(m)}_{i,j+1}) + \frac{\tau\alpha}{4}( + \epsilon^{(m)}_{i+1,j+1}+\epsilon^{(m)}_{i-1,j-1} - + \epsilon^{(m)}_{i-1,j+1}-\epsilon^{(m)}_{i+1,j-1}) + \right] \\ + & + (1-\omega)\epsilon^{(m)}_{ij}. \\ + \end{split} +\end{equation} +Using the two-dimensional Fourier mode expression +\begin{equation} + \epsilon^{(m)}_{ij} = A(m)e^{i(\theta_1+\theta_2)}, \quad -\pi <\theta_1, + \theta_2 \le \pi, +\end{equation} +the amplification factor $G=A(m+1)/A(m)$ is given +by +\begin{equation} +\label{eq:amp_jac} + G(\theta_1,\theta_2;\omega,\alpha,\tau)=1 - \frac{2\omega}{1+\alpha^2} \left( + \sin^2\frac{\theta_1}{2} + \alpha^2\sin^2\frac{\theta_2}{2} + + \frac{\tau\alpha}{4}\sin\theta_1\,\sin\theta_2 + \right). +\end{equation} + +The errors in Gauss-Seidel method, assuming a \emph{lexicographic} ordering +for the unknowns (increasing first $i$ then $j$), are updated according +to +\begin{equation} + \epsilon^{(m+1)}_{ij} = \frac{1}{2(1+\alpha^2)} \left[ + \epsilon^{(m+1)}_{i-1,j} + \epsilon^{(m)}_{i+1,j} + \alpha^2( + \epsilon^{(m+1)}_{i,j-1} + \epsilon^{(m)}_{i,j+1}) + \frac{\tau\alpha}{4}( + \epsilon^{(m)}_{i+1,j+1}+\epsilon^{(m+1)}_{i-1,j-1} - + \epsilon^{(m)}_{i-1,j+1}-\epsilon^{(m+1)}_{i+1,j-1}) + \right]. +\end{equation} +The Fourier mode analysis then leads to the following complex +amplification factor +\begin{equation} +\label{eq:amp_gs} + G(\theta_1,\theta_2;\alpha,\tau) = + \frac{e^{i\theta_1} + + \left(\alpha^2+i\dfrac{\tau\alpha}{2}\sin\theta_1\right)e^{i\theta_2}} + {2(1+\alpha^2) - \left[e^{-i\theta_1}+\left(\alpha^2 - + i\dfrac{\tau\alpha}{2}\sin\theta_1\right)e^{-i\theta_2}\right]}. +\end{equation} + +Curves of $G$ for \emph{fixed} $\theta_2$ are plotted in +Fig.~\ref{fig:fourier_jac} showing \emph{convergence} ($\max|G|<1$) for +$\tau=-1,0,1,2$, using the damped Jacobi method. The +same conclusions are obtained for Gauss-Seidel relaxations as shown +in Fig.~\ref{fig:fourier_gs} where the absolute values +of the complex amplification factor $G$ are plotted. However, for larger +$|\tau|>2$, both methods diverge as can be seen in +Fig.~\ref{fig:relax_diverge}.Notice however that the PDE +(\ref{eq:pde}) is \emph{elliptic} only when $|\tau|<2$ is satisfied! + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.85\hsize]{\RepFigures/fourier_jac} + \caption{Amplification factor for damped Jacobi relaxations with + $\omega=0.8$ and $\alpha=h_x/h_y=1$ and $\tau=-1,0,1,2$ displayed as curves of + constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on + the \emph{red} curve.} + \label{fig:fourier_jac} +\end{figure} + +\begin{figure}[hbt] + \centering + \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/fourier_gs} + \caption{Absolute value of the amplification factor for Gauss-Seidel + relaxations with $\alpha=h_x/h_y=1$ and $\tau=-1,0,1,2$, displayed as curves of + constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on + the \emph{red} curve.} + \label{fig:fourier_gs} +\end{figure} + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/relax_diverge} + \caption{Amplification factor for Jacobi (left) and Gauss-Seidel (right) + relaxations for $|\tau|=3,5$, $\alpha=h_x/h_y=1$, displayed as curves of + constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on + the \emph{red} curve.} + \label{fig:relax_diverge} +\end{figure} + +In summary, the local mode analysis predicts that +\begin{itemize} +\item Negative values of the coefficient $a$ and large + mixed derivative ($|\tau| > 2$) can make both damped Jacobi and + Gauss-Seidel relaxations diverge. +\item Positive values of $a$ can decrease the amplification factor + (improving thus the convergence rate) but + its contributions $h^2a$ decrease for increasing grid resolution. +\end{itemize} +These predictions will be checked against numerical experiments in the +next section. + +\FloatBarrier +\section{Numerical Experiments} +\label{sec:NumExp1} +In the following numerical experiments, we look at the convergence +rate of the residual norm and the error norm which are defined at the +iteration $m$ by +\begin{equation} + \begin{split} + r^{(m)} &= \|\mathbf{f}-\mathbf{A}\mathbf{u}^{(m)}\|_\infty, \\ + e^{(m)} &= \|\mathbf{u}^{(m)}-\mathbf{u}_{anal}\|_\infty.\\ + \end{split} +\end{equation} + +The iterations are stopped when the number of iterations reach an +user supplied \emph{maximum} of iterations or when the residual norm +is smaller than either a given \emph{relative} tolerance {\tt rtol} +\cite[p.~51]{TEMPL} or \emph{absolute} tolerance {\tt atol}: +\begin{equation} + \begin{split} + r^{(m)} &< \mbox{\tt rtol} + \cdot(\|\mathbf{A}\|_\infty\cdot\|\mathbf{u}^{(m)}\|_\infty + + \|\mathbf{f}\|_\infty), \\ + r^{(m)} &< \mbox{\tt atol}.\\ + \end{split} +\end{equation} + +An additional stopping criteria consists of stopping the +iterations when the change of the discretization error between +successive iteration is small enough: +\begin{equation} + \frac{e^{(m)}-e^{(m-1)}}{e^{(m-1)}}< \mbox{\tt etol}. +\end{equation} + +\subsection{$V$-cycle performances} +Table~\ref{tab:iternum} shows the numbers of $V$-cycles required to +reach the \emph{relative tolerance} +$\mbox{\tt rtol}=10^{-8}$. In these runs where $\alpha=0.5$, $\tau=1$ +and $a(x,y)$ given by Eq.~\ref{eq:density}, we observe that the +biggest improvement is obtained at $\nu_1=\nu_2=2$. For larger +$\nu_1,\nu_2$, the number of required iterations is relatively insensitive +to the grid sizes. As can be seen in Fig.~\ref{fig:mg_iterations} +which plots the evolution of the error $e^{(m)}$, it +is clear that the level of discretization error has been largely +reached. Finally the times used by these runs are shown in +Fig.~\ref{fig:dddd_pc220} and Fig.~\ref{fig:nndd_pc220} where the +times spent in the direct solver using MUMPS \cite{MUMPS} are included +for comparison. For the $512\times 2048$ grid (the largest +case using the direct solver), the multigrid $V(3,3)$ is about $30$ +times faster! + +\begin{table}[htb] +\centering +\begin{tabular}{|l||c|c|c|c||c|c|c|c|}\hline + & \multicolumn{4}{c||}{\texttt{\textbf{DDDD}} problem} + & \multicolumn{4}{c|}{\texttt{\textbf{NNDD}} problem} \\ \cline{2-9} +Grid size & $V(1,1)$ & $V(2,2)$ & $V(3,3)$ & $V(4,4)$ + & $V(1,1)$ & $V(2,2)$ & $V(3,3)$ & $V(4,4)$ \\ \hline + $16\times 64$ & 3 & 2 & 2 & 1 & 4 & 2 & 2 & 1\\ + $32\times 128$ & 5 & 3 & 2 & 2 & 5 & 3 & 2 & 2\\ + $64\times 256$ & 7 & 4 & 3 & 3 & 7 & 4 & 3 & 3\\ + $128\times 512$ & 10 & 6 & 4 & 4 & 10 & 6 & 5 & 4\\ + $256\times 1024$ & 11 & 6 & 5 & 4 & 11 & 6 & 5 & 4\\ + $512\times 2048$ & 11 & 6 & 5 & 4 & 11 & 6 & 5 & 4\\ + $1024\times 4096$ & 10 & 6 & 4 & 4 & 10 & 6 & 4 & 4\\ + $1536\times 6144$ & 9 & 6 & 4 & 4 & 9 & 5 & 4 & 3\\ +\hline +\end{tabular} +\caption{Multigrid $V$-cycle results for the {\tt DDDD} and {\tt NNDD} + model problems with $k_x=k_y=4$, $L_x=100$, $L_y=800$, $\tau=1$ and + $a(x,y)$ given by Eq.~\ref{eq:density}. Shown are the numbers of + multigrid $V$-cycles required to reduce the \emph{relative} + residual norm to less than $10^{-8}$ for different + grid sizes and numbers of pre and post relaxation + sweeps. Gauss-Seidel relaxation is used. The coarsest grid size of the + $1536\times 6144$ case is $3\times 12$ while all the others have a coarsest + grid of size $2\times 8$.} +\label{tab:iternum} +\end{table} + +\begin{figure}[htb!] + \centering + \includegraphics[width=\textwidth]{\RepFigures/mg_iterations} + \caption{Performance of the $V(2,2)$-cycle using the Gauss-Seidel + relaxation scheme for the {\tt DDDD} (upper curve) and {\tt NNDD} + (lower curve) problem. The relative tolerance {\tt rtol} is + set to $10^{-8}$ the coarsest grid size for all the problem size + is fixed to $2\times 8$.} + \label{fig:mg_iterations} +\end{figure} + +The fittings of the obtained data show that the multigrid +$V$ cycle cost scales almost \emph{linearly} with the number of +unknowns $N=(N_x+1)(N_y+1)$ (as does the backsolve stage of MUMPS) +while the \emph{total} direct solve time scales as $N^{1.4}$. + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/dddd_pc220} + \caption{Times used by the multigrid $V$ cycles for the runs reported + in Table~\ref{tab:iternum} for the \texttt{\textbf{DDDD}} + problem. The last 6 $V(3,3)$ data points are used for the multigrid fit. The + MUMPS direct solver's cost is included for comparison. All the + timing results are obtained on an Intel Nehalem i7 processor, using the + Intel compiler version 13.0.1 and MUMPS-4.10.0. } + \label{fig:dddd_pc220} +\end{figure} + + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/nndd_pc220} + \caption{As in Fig.~\ref{fig:dddd_pc220} for the + \texttt{\textbf{NNDD}} problem.} + \label{fig:nndd_pc220} +\end{figure} + +\FloatBarrier +\subsection{Effects of the mesh aspect ratio $\alpha$} +From Table~\ref{tab:anisotropy}, one can observe that the required number of +$V(2,2)$ cycles increase quickly when $\alpha<0.5$ and $\alpha>2$. Advanced +\emph{relaxation} methods and \emph{coarsening} strategies +\cite[chap. 7]{Wesseling} can solve this performance degradation +but are generally more difficult to parallelize. + +\begin{table}[htb] +\centering +\begin{tabular}{|l||c|c|c|}\hline + $\alpha$ & \texttt{\textbf{DDDD}} & \texttt{\textbf{NNDD}} \\ \hline + 0.125 & 19 & 22 \\ + 0.25 & 12 & 12 \\ + 0.5 & 6 & 6 \\ + 1.0 & 5 & 5 \\ + 2.0 & 7 & 7 \\ + 4.0 & 20 & 19 \\ +\hline +\end{tabular} +\caption{Effects of the \emph{mesh aspect ratio} $\alpha=h_x/h_y$ on the + number of $V(2,2)$ cycles required to reach $\mbox{\tt + rtol}=10^{-8}$ for {\tt DDDD} and {\tt NNDD} + model problems. The listed $\alpha$'s are obtained by fixing + $N_x=256$, $N_y=1024$, $L_x=100$ and varying $L_y$.} +\label{tab:anisotropy} +\end{table} + +\subsection{Effects of the mixed partial derivative} +When $|\tau|>2$, Table~\ref{tab:mixedterm} shows that the multigrid +$V$-cycle diverge, as predicted from the local mode analysis based on +the amplification factor given in Eq.~\ref{eq:amp_gs}. Although, a +non-negative coefficient $a$ has a \emph{stabilizing} effect, the +latter disappears already for a $256\times 1024$ grid. + +\begin{table}[htb] +\centering +\begin{tabular}{|l|l|c|c|c|c|c|c|c|c|}\hline + &Grid size & $\tau=-3$ & $\tau=-2$ & $\tau=-1$ & $\tau=0$ & $\tau=1$ + & $\tau=2$ & $\tau=3$ \\ \hline +\multirow{4}{*}{\texttt{\textbf{DDDD}}} +&$128\times 512(a=0)$ & - & 39 & 7 & 5 & 7 & 38 & - \\ +& $128\times 512$ &16 & 6 & 5 & 4 & 4 & 6 & 17 \\ +& $256\times 1024$ &- & 8 & 5 & 4 & 5 & 7 & - \\ +& $512\times 2048$ &- & 9 & 5 & 4 & 5 & 9 & - \\ +\hline\hline +\multirow{4}{*}{\texttt{\textbf{NNDD}}} +&$128\times 512(a=0)$ & - & 42 & 7 & 5 & 7 & 41 & - \\ +& $128\times 512$ &13 & 6 & 5 & 4 & 5 & 5 & 13 \\ +& $256\times 1024$ &- & 7 & 5 & 4 & 5 & 7 & - \\ +& $512\times 2048$ &- & 7 & 5 & 4 & 5 & 7 & - \\ +\hline +\end{tabular} +\caption{Effects of the mixed derivative term $\tau$ on the + performances of the $V(3,3)$ cycle. The dashes + indicate that the $V$-cycle diverges. In theses runs, $a(x,y)$ is given by + Eq.~\ref{eq:density} except for the cases where it is set to + 0. Notice the \emph{stabilizing} effect of $a\ne 0$ for the + $128\times 512$ grid at $\tau = \pm 3$.} +\label{tab:mixedterm} +\end{table} + +\subsection{Using the damped Jacobi relaxation} +The optimum Jacobi damping factor $\omega$ can be determined by minimizing +the \emph{smoothing factor} defined as the maximum amplification +coefficient (\ref{eq:amp_jac}) restricted to the \emph{oscillatory modes}: +\begin{equation} + \label{eq:mu_jac} + \mu(\omega,\alpha,\tau) = \max_{(\theta_1,\theta_2)\in\Omega} + |G(\theta_1,\theta_2,\omega,\alpha,\tau)|, \qquad \Omega = + [|\theta_1|>\pi/2]\,\bigcup\,[|\theta_2|>\pi/2]. +\end{equation} +Results from numerical computation of (\ref{eq:mu_jac} are shown +in Fig.~\ref{fig:jac_opt}. An analytic expression for $\tau=0$ +assuming $\alpha\le 1$ is derived in \cite[p.~119]{salpha}: +\begin{gather} + \mu(\omega,\alpha,\tau=0) = + \max\left(\left|1-2\omega\right|,\;\left|1-\frac{\alpha^2}{1+\alpha^2}\omega\right|\right), + \nonumber\\ +\mu_{\mbox{opt}} = \frac{2+\alpha^2}{2+3\alpha^2} \quad +\mbox{at} \quad \omega_{\mbox{opt}} = \frac{2+2\alpha^2}{2+3\alpha^2}. +\end{gather} +Notice that the smoothing factor increases as $\alpha$ departs from 1 +and for increasing $\tau$. + +For Gauss-Seidel relaxation, the same numerical procedure applied to +(\ref{eq:amp_gs}) yields a smoothing factor $\mu$ equal to +respectively $0.5$, $ 0.68$ and $0.70$ for the three cases shown in +Fig.~\ref{fig:jac_opt}, which result in a better smoothing property +than the damped Jacobi relaxation. + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/jac_opt} + \caption{The smoothing factor for damped Jacobi relaxation + for different values of $\alpha$ and $\tau$.} + \label{fig:jac_opt} +\end{figure} + +Numerical experiments with the reference case ($\alpha=0.5$, $\tau=1$, +$a(x,y)$ given by Eq.~\ref{eq:density}) and the $128\times 512$ grid +using damped Jacobi relaxation, are shown in Table~\ref{tab:jac_opt} +and confirm that $\omega=0.9$ is the optimum damping factor and that +it is less efficient than the Gauss-Seidel relaxation, in agreement +with the Fourier analysis. + +\begin{table}[htb] +\centering +\begin{tabular}{l c c c c c c}\hline +&$\omega=0.5$ &$\omega=0.6$ &$\omega=0.7$ &$\omega=0.8$ &$\omega=0.9$ + &$\omega=1.0$ \\ \cline{2-7} +\texttt{\textbf{DDDD}}& 12 & 10 & 9 & 8 & 7 & 15 \\ +\texttt{\textbf{NNDD}}& 12 & 11 & 9 & 8 & 7 & 18 \\ +\hline +\end{tabular} +\caption{The number of $V(3,3)$ cycles required to obtain + $\mbox{\texttt{rtol}}=10^{-8}$ versus the Jacobi \emph{damped + factor} $\omega$. The grid size is $128\times 512$ with + $\alpha=0.5$, $\tau=1$ and $a(x,y)$ given by Eq.~\ref{eq:density}.} +\label{tab:jac_opt} +\end{table} + +\subsection{Matrix storage} +Initially the \emph{Compressed Sparse Row} storage format (CSR or CRS) (see +\cite[p.~58--59]{TEMPL}) was used to store the discretized finite +difference matrix. With this choice, the CPU time used by the matrix +construction (and boundary condition setting) is found to be always larger +than the multigrid solver time as shown in +Fig.~\ref{fig:matcon_time}. Fortunately, switching to the \emph{Compressed + Diagonal Storage} (CDS), where the 9 diagonal structure of the +matrix is fully exploited, the matrix construction time is considerably +reduced as shown in the same figure. On the other hand, no +difference in the multigrid solver performance is noticeable +between the two matrix storage. + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=\hsize]{\RepFigures/matcon_time} + \caption{CPU time used by the matrix construction for CSR and CDS + matrix storage compared to the multigrid $V(3,3)$ cycle time for + the \textbf{DDDD} and \textbf{NNDD} model problems. The timing is + obtained using the same conditions as in Fig.~\ref{fig:dddd_pc220}.} + \label{fig:matcon_time} +\end{figure} + +\FloatBarrier + +\section{Modified PDE} +Here, the following modified PDE is considered: +\begin{equation} +\label{eq:new_pde} + \left[\frac{\partial^2}{\partial x^2} + \tau\frac{\partial^2}{\partial + x\partial y} + (1+\tau^2/4)\frac{\partial^2}{\partial y^2} - a(x,y)\right] u(x,y) = + f(x,y), \qquad 0\le x \le L_x, \; 0\le y \le L_y. +\end{equation} +This PDE which is obtained from the $\hat s-\alpha$ model +\cite[Eq.10]{salpha} is +\emph{elliptic} for any value of $\tau$. The resulting stencil is +changed from the previous stencil \ref{eq:stencil} to +\begin{equation} +\label{eq:new_stencil} + S_{ij} = \frac{1}{h_x^2} + \begin{bmatrix} + -\tau\alpha/4 & \alpha^2(1+\tau^2/4) & \tau\alpha/4 \\ + 1 & -2\left[1+\alpha^2(1+\tau^2/4)\right]-h_x^2a_{ij} & 1 \\ + \tau\alpha/4 & \alpha^2(1+\tau^2/4) &-\tau\alpha/4 \\ + \end{bmatrix} +. +\end{equation} + +Note that the +\emph{anisotropy} of the resulting Finite Difference discretization +is now $\alpha^2(1+\tau^2/4)$ and could be controlled by adjusting both +the mesh aspect ratio $\alpha$ and the \emph{shear} term $\tau$. + +Numerical calculations show that the multigrid $V$-cycles do always +converge, as shown in Table~\ref{tab:new_mixedterm}. + +\begin{table}[htb] +\centering +\begin{tabular}{|l|l|c|c|c|c|c|c|c|}\hline + &Grid size & $\tau=0$ & $\tau=1$ & $\tau=2$ & $\tau=4$ & $\tau=8$ + & $\tau=16$ \\ \hline +\multirow{3}{*}{\texttt{\textbf{DDDD}}} +& $128\times 512$ &4 & 4 & 5 & 6 & 9 & 20 (6) \\ +& $256\times 1024$ &4 & 4 & 5 & 6 & 11 & 25 (8)\\ +& $512\times 2048$ &4 & 4 & 5 & 7 & 12 & 29 (8)\\ +\hline\hline +\multirow{3}{*}{\texttt{\textbf{NNDD}}} +& $128\times 512$ &4 & 4 & 4 & 5 & 8 & 17 (5) \\ +& $256\times 1024$ &4 & 5 & 5 & 5 & 8 & 19 (6) \\ +& $512\times 2048$ &4 & 4 & 4 & 5 & 7 & 18 (6) \\ +\hline +\end{tabular} +\caption{Effects of the mixed derivative term $\tau$ on the + performances of the $V(3,3)$ cycle. In theses runs, $a(x,y)$ is + given by Eq.~\ref{eq:density}. The mesh aspect ratio $\alpha=0.5$ + was used. On the last column and shown in parenthesis are the + numbers of $V(3,3)$ cycles when $\alpha$ is reduced to 0.125 by + increasing the length $L_y$ while keeping all the other values fixed.} +\label{tab:new_mixedterm} +\end{table} + +\section{Parallel Multigrid} +In order to maximize the parallel efficiency and the flexibility of +utilization, a two-dimensional domain partition scheme is chosen to +parallelize the multigrid solver. As shown below, generalization of this +procedure for higher dimensions is straightforward. + +\subsection{Distributed grid coarsening} +The coarsening algorithm can be summarized as follow: +\begin{itemize} +\item Partition the grid points on each dimension at the + \emph{finest} grid level, as evenly as possible. +\item The range for each sub-grid, using \emph{global indexing} is + thus specified by $[s,e]$, with $s=0$ for the first sub-grid and + $e=N$ for the last sub-grid, $N$ being the number of grid intervals. +\item The next coarse sub-grid is thus obtained by discarding all the + \emph{odd} indexed grid points, as in the serial case. +\item This process can continue (as long as the total of number of + intervals is even) until there exists a prescribed \emph{minimum} + number of grid points on any sub-grid is reached. +\end{itemize} + +\subsection{Matrix-free formulation} +Using standard \emph{matrix} to represent the discretized 2D (or higher +dimensional) operators imply an \emph{one-dimensional numbering} of +the grid nodes. For example on a 2D $N_x\times N_y$ grid, the 1D numbering +of the node $(x_{i_1},y_{i_2})$ could be defined as +\[ + k=i_1+i_2\times N_x, \quad i_1=0:N_x,\;i_2=0:N_y. +\] +However, using 2D domain partition defined by +\begin{equation} + \label{eq:2dnumber} + i_1=s_1:e_1,\;i_2=s_2:e_2, +\end{equation} +with $s=(s_1,s_2)$ and $e=(e_1,e_2)$ denoting respectively the +\emph{starting} and \emph{ending} indices of a rectangular sub-domain, +result in a \emph{non-contiguous} set of the indices ${k}$ and in a +complicate structure of the partitioned matrix for the linear +operator. + +On the other hand, using the \emph{stencil notation} +introduced in \cite[chap. 5.2]{Wesseling} based on the +\emph{multidimensional} node labeling as defined by +(\ref{eq:2dnumber}) for a 2D problem, one can define a simple data +structure for the partitioned operator, $A(i,\delta)$, +where the $d$-tuple $i=(i_1,\ldots,i_d)$ represents a node of the +$d$-dimensional grid and the $d$-tuple +$\delta=(\delta_1,\ldots,\delta_d)$, the +\emph{distance} between the connected nodes. The result of +$\mathbf{A}u$ can thus be defined as +\begin{equation} + \label{eq:vmx} + (\mathbf{A}u)_i = \sum_{\delta\in\mathbb{Z}^d} + A(i,\delta)u_{i+\delta}, \quad i=s:e. +\end{equation} +In (\ref{eq:vmx}), the sum is performed over all +indices $\delta$ such that $A(i,\delta)$ is non-zero. For the 2D +nine-point stencil defined in (\ref{eq:stencil}), the 2-tuple +$\delta$ can be specified as the 9 columns of the following +\emph{structure} matrix +\begin{equation} + \label{5points} + S_\delta = \left( + \begin{array}{rrrrrrrrr} + 0 & -1 & 0 & 1 & -1 & 1 & -1& 0 & 1 \\ + 0 & -1 & -1&-1 & 0 & 0 & 1 & 1 & 1 \\ + \end{array} \right). +\end{equation} +In the general case of a $d$-dimensional grid and $\mathcal{N}$ point stencil, +$S_\delta$ is a $d\times\mathcal{N}$ +matrix. By noting that the subscript $i+\delta$ of $u$ on the right hand side of +(\ref{eq:vmx}) should be in the range $[0,N]$ \emph{only} for sub-domains which are +\emph{adjacent} to the boundary, one can deduce that for +a \emph{fixed} $\delta$, the lower and upper bounds of the indices $i$ +should be +\begin{equation} + \begin{split} + i_{\mbox{min}} &= \max (0, -\delta, s), \\ + i_{\mbox{max}} &= \min (N, N-\delta, e) \\ + \end{split} +\end{equation} +where $N=(N_1,N_2,\ldots,N_d)$ specify the number of intervals, +since, for sub-domains \emph{not adjacent} to the boundary, $u$ should +include values at the \emph{ghost} cells $s-g$ and $e+g$ where $g$ is +given by +\begin{equation} + g = \max|S_\delta| +\end{equation} +with the operator max taken along the \emph{rows} of the matrix. +The formula defined in (\ref{eq:vmx}) can then be implemented as in +the \emph{pseudo} Fortran code + +\par +\addvspace{\medskipamount} +\nopagebreak\hrule +\begin{lstlisting}[mathescape] +do k=1,SIZE($S_{\delta}$,2) ! loop over the stencil points + $\delta$ = $S_{\delta}$(:,k) + lb = MAX(0,-$\delta$,$s$) + ub = MIN($N$,$N-\delta$,e) + do i=lb,ub + Au(i) = Au(i) + A(i,$\delta$)*u(i+$\delta$) + enddo +enddo +\end{lstlisting} +\nopagebreak\hrule +\addvspace{\medskipamount} + +On the other hand, if the values of $u$ at the ghost cells of the +sub-domains \emph{adjacent} to the boundary are set to 0 +\begin{equation*} + u_{-g} = u_{N+g} = 0, +\end{equation*} +the lower and upper bounds of the +inner loop can be simply set to $lb=s$ and $ub=e$. Note that the inner +loop should be interpreted as $d$ nested loops over the $d$-tuple +$i=(i_1,\ldots,i_d)$ for a $d$-dimensional problem. + +\subsection{Inter-grid transfers} + +\subsubsection{Restriction} +Using the definition in the first equation of (\ref{eq:2dintergrid}) +together with (\ref{eq:1drestriction}), the 2D restriction operator +can be represented by the following 9-point stencil: +\begin{equation} + \label{eq:2drestriction} + \mathbf{R}_i = \frac{1}{16} + \begin{pmatrix} + 1 & 2 & 1 \\ + 2 & 4 & 2 \\ + 1 & 2 & 1 \\ + \end{pmatrix}, +\end{equation} +and the restriction of $f$ can be computed as +\begin{equation} + \bar{f}_i = (\mathbf{R}f)_i = \sum_{\delta\in\mathbb{Z}^2} + R(i,\delta)f_{2i+\delta}, \quad i=\bar{s}:\bar{e}, +\end{equation} +where $\bar{s},\bar{e}$ denote the partitioned domain boundary indices on the +\emph{coarse} grid, using the same algorithm described previously. + +\subsubsection{BC for the restriction operator} +\label{sec:restrict_bc} +Dirichlel boundary conditions can be imposed by modifying the +\emph{restriction stencil} on each of the four boundaries as follow: +\begin{equation} + \mathbf{R}_{0,.} = \frac{1}{16}\begin{pmatrix} + 1 & 2 & 0 \\ + 2 & 4 & 0 \\ + 1 & 2 & 0 \\ + \end{pmatrix},\quad + \mathbf{R}_{N_x,.} = \frac{1}{16}\begin{pmatrix} + 0 & 2 & 1 \\ + 0 & 4 & 2 \\ + 0 & 2 & 1 \\ + \end{pmatrix},\quad + \mathbf{R}_{.,0} = \frac{1}{16}\begin{pmatrix} + 0 & 0 & 0 \\ + 2 & 4 & 2 \\ + 1 & 2 & 1 \\ + \end{pmatrix},\quad + \mathbf{R}_{.,N_y} = \frac{1}{16}\begin{pmatrix} + 1 & 2 & 1 \\ + 2 & 4 & 2 \\ + 0 & 0 & 0 \\ + \end{pmatrix}. +\end{equation} +With the natural Neumann BC, no change of the restriction operator is needed. + +\subsubsection{Prolongation} +Stencil notation for \emph{prolongation} operators is less obvious to +formulate, see \cite[chap. 5.2]{Wesseling}. A more straightforward +implementation is obtained in the 2D case, by simply applying +\emph{bilinear interpolation} on the \emph{coarse grid}: +\begin{equation} + \label{eq:2dprolongation} + \begin{split} + (\mathbf{P}\bar{u})_{2i} &= \bar{u}_{i}, \\ + (\mathbf{P}\bar{u})_{2i+e_1} &= (\bar{u}_{i} + \bar{u}_{i+e_1})/2, \quad + (\mathbf{P}\bar{u})_{2i+e_2} = (\bar{u}_{i} + \bar{u}_{i+e_2})/2, \\ + (\mathbf{P}\bar{u})_{2i+e_1+e_2} &= (\bar{u}_{i} + + \bar{u}_{i+e_1} + \bar{u}_{i+e_2} + \bar{u}_{i+e_1+e_2})/4, \\ + \end{split} +\end{equation} + +\subsection{Relaxations} +While the Gauss-Seidel proves to be more efficient, the damped Jacobi +method, at least for a first version of the parallel multigrid solver, +is used because it is straightforward to \emph{parallelize}. The same +undamped Jacobi (with $\omega=1$) with a \emph{few} number of +iterations is also used to solve the linear system at the coarsest +mesh as prescribed by the multigrid $V$-cycle procedure defined in +section \ref{sec-mgProc}. + +\subsection{Local vectors and stencils} +All local vectors (used to represent solutions or +right-hand-sides) contain \emph{ghost cells} and are implemented +using 2D arrays, for example +\[\mbox{\texttt{sol(s(1)-1:e(1)+1,s(2)-1:e(2)+1)}}\] +for the solution vector. + +The partitioned stencils are defined +only for the \emph{local} grid points, without the ghost cells. +Thus, before each operation on the local vectors, an exchange (or +update) of the values on the ghost cells is performed. + +As a result, all the memory required by the solver is completely +partitioned, except for the space used by the ghost cells. + +\subsection{Numerical Experiments} +In this section, all the numerical experiments are conducted on +\texttt{helios.iferc-csc.org}, using the Intel compiler version 13.1.3 +and bullxpmi-1.2.4.3. The \emph{stopping criteria} for the $V$-cycles +is based on the absolute and relative residual norms as well as the +discretization error norm as defined in section \ref{sec:NumExp1}. In +cases where the analytic solution is not known, the latter can be +replaced by some norm of the solution. + +\subsubsection{Strong scaling} +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=\hsize]{\RepFigures/strong_256x1024_DDD} + \caption{DDDD problem for a $256\times 1024$ size, using multigrid + $V(3,3)$ cycles. Different times for a given number of + processes are obtained with different combinations of processes in + each dimension. The number of grid levels are fixed to 6. Five Jacobi + iterations are used at the coarsest grid.} + \label{fig:strong_scal_small} +\end{figure} + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=\hsize]{\RepFigures/strong_512x2048_DDD} + \caption{DDDD problem for a $512\times 2048$ size using multigrid + $V(3,3)$ cycles. The \textcolor{red}{red marker} on the left shows + the time for the serial multigrid solver. Different times for a + given number of processes are obtained with different combinations + of processes in each dimension. The number of grid levels are fixed + to 6. Five Jacobi iterations are used at the coarsest grid.} + \label{fig:strong_scal} +\end{figure} + +Here 2 \emph{fixed} problem sizes are considered: +\begin{itemize} + \item A small size with the (fine) grid of $256\times 1024$ + shown in Fig.~\ref{fig:strong_scal_small} and + \item a larger size of $512\times 2048$ in Fig.~\ref{fig:strong_scal}. +\end{itemize} + +In both cases $\mbox{\tt rtol}=10^{-8}$ and $\mbox{\tt + etol}=10^{-3}$. It was checked that the results do not change when more +than 5 Jacobi iterations are used at the coarsest mesh. Notice that +for the small problem, the parallel efficiency starts to degrade at 32 +MPI processes while for the larger case, this happens after 64 MPI +processes. This can be explained by +the ghost cell exchange communication overhead: denoting $N_1$ and +$N_2$, the number of grid points in each direction and $P_1$ and $P_2$ +the number of MPI processes in each direction, the ratio $S/V$ between the +number of ghost points and interior grid points for each local +subdomains can be estimated as +\begin{equation} +\label{eq:comm_overhead} + S/V\simeq\frac{2(N_1/P_1+N_2/P_2)}{N_1N_2/P_1P_2} = 2\left(P_1/N_1+P_2/N_2\right). +\end{equation} +This ratio increases as the number MPI processes increases while +keeping the problem size fixed. On very coarse grids, this communication +cost can become prohibitive. For this reason, in all the runs shown +here, the number of grid points on each direction for the coarsest +grid is limited to 2. + +\subsubsection{Weak Scaling} +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/weak_DDDD} + \caption{Weak scaling for a DDDD problem, using multigrid $V(3,3)$ + cycles. The number of grid levels are fixed to 7. The solver for the + coarsest grid uses 5 Jacobi iterations except for the 2 largest cases + which require respectively 20 and 100 iterations to converge. The 2 + sets of curves on the right figure show respectively the timings with + and without the calculations of the residual norm and discretization + error which require both a \emph{global reduction}.} + \label{fig:weak_scal_DDDD} +\end{figure} + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/weak_NNDD} + \caption{Weak scaling for a NNDD problem, using multigrid $V(3,3)$ + cycles. The number of grid levels are fixed to 7. The solver for the + coarsest grid uses 5 Jacobi iterations except for the 2 largest cases + which require respectively 20 and 100 iterations to converge. The 2 + sets of curves on the right figure show respectively the timings with + and without the calculations of the residual norm and discretization + error which require both a \emph{global reduction}.} + \label{fig:weak_scal_NNDD} +\end{figure} + +According to Eq.~\ref{eq:comm_overhead}, varying the problem size +together with the number of MPI processes by keeping $N_1/P_1$ and +$N_2/P_2$ constant should yield a \emph{constant scaling}, +provided that the convergence rate does not depend on the problem sizes. +The results for the \texttt{DDDD} and \texttt{NNDD} problems are shown +in Fig.~\ref{fig:weak_scal_DDDD} and +Fig.~\ref{fig:weak_scal_NNDD}. The left part of the figures shows that +the convergence rate depends only weakly on the problem sizes, which +leads indeed to a (almost) constant time obtained for numbers of MPI +processes $P$ between 16 and 1024 . The reason for the good timings +for smaller $P$ is simply that there are only 2 ghost cell exchanges +for $P=2\times 2$ (instead of 4 for $P\ge 16$) and that there is no +exchange for $P=0$. + +\section{Non-homogeneous Boundary Conditions} + +\subsection{Non-homogeneous Dirichlet Conditions} +Non-homogeneous Dirichlet boundary conditions can be imposed on all the +Dirichlet faces simply by \emph{clearing}, as for the +\emph{homogeneous case}, the matrice rows and columns and setting its +diagonal term to 1. Moreover, the corresponding corresponding +\emph{right-hand-side} should be set to: +\begin{equation} + \begin{split} + f_{0,j} &= D^W(y_j), \quad f_{N_x,j}=D^E(y_j), \qquad j=0,\ldots,N_y, \\ + f_{i,0} &= D^S(x_i), \quad f_{j,N_y}=D^N(x_i), \qquad i=0,\ldots,N_x, \\ + \end{split} +\end{equation} +where $D^W, D^E, D^S, D^N$ are the values of $u$ at the 4 Dirichlet +faces. As for the homogeneous Dirichlet BC, the \emph{restriction} +operator should be changed as described in section +\ref{sec:restrict_bc} while the \emph{prolongation} defined +in (\ref{eq:2dprolongation}) remains unchanged. + +\subsection{Non-homogeneous Neumann Conditions} +The non-homogeneous Neumann conditions at the 4 faces $x=0$ can be +defined as +\begin{equation} + \begin{split} + \left.\frac{\partial u}{\partial x}\right|_{x=0} &= N^W(y), \quad + \left.\frac{\partial u}{\partial x}\right|_{x=L_x} = N^E(y), \\ + \left.\frac{\partial u}{\partial y}\right|_{y=0} &= N^S(x), \quad + \left.\frac{\partial u}{\partial y}\right|_{y=L_y} = N^N(x). \\ + \end{split} +\end{equation} +Discretization of the BC defined above, using the \emph{central +difference} yields on the 4 faces +\begin{equation} + \begin{split} + u_{-1,j} &= u_{1,j} -2h_xN^W(y_j), \quad + u_{N_x+1,j} = u_{N_x-1,j} + 2h_xN^E(y_j), \qquad j=0,\ldots,N_y, \\ + u_{i,-1} &= u_{i,1} -2h_yN^S(x_i), \quad + u_{i,N_y+1} = u_{i,N_y-1} + 2h_yN^N(x_i), \qquad i=0,\ldots,N_x. \\ + \end{split} +\end{equation} + +With these relations, the stencil (\ref{eq:new_stencil}) on the 4 +boundaries is modified as follow + +\begin{equation} + \begin{split} + S^W &= \frac{1}{h_x^2} + \begin{bmatrix} + 0 &\alpha^2(1+\tau^2/4) & 0 \\ + 0 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{0,j} & 2 \\ + 0 &\alpha^2(1+\tau^2/4) & 0 \\ + \end{bmatrix} +,\quad + S^E = \frac{1}{h_x^2} + \begin{bmatrix} + 0 &\alpha^2(1+\tau^2/4) & 0 \\ + 2 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{N_x,j} & 0 \\ + 0 &\alpha^2(1+\tau^2/4) & 0 \\ + \end{bmatrix} +, \\ + S^S &= \frac{1}{h_x^2} + \begin{bmatrix} + 0 & 2\alpha^2(1+\tau^2/4) & 0 \\ + 1 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{i,0} & 1 \\ + 0 & 0 & 0 \\ + \end{bmatrix} +,\quad + S^N = \frac{1}{h_x^2} + \begin{bmatrix} + 0 & 0 & 0 \\ + 1 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{i,N_y} & 1 \\ + 0 & 2\alpha^2(1+\tau^2/4) & 0 \\ + \end{bmatrix} +, \\ + \end{split} +\end{equation} +while the right-hand-side should be changed according to +\begin{equation} + \begin{split} + f_{0,j} &\longleftarrow f_{0,j} + \frac{2}{h_x}\left[\frac{\tau\alpha}{4}\,N^W(y_{j-1}) + + N^W(y_j) - \frac{\tau\alpha}{4}\,N^W(y_{j+1})\right], \\ + f_{N_x,j} &\longleftarrow f_{N_x,j} + \frac{2}{h_x}\left[\frac{\tau\alpha}{4}\,N^E(y_{j-1}) + - N^E(y_j) - \frac{\tau\alpha}{4}\,N^E(y_{j+1})\right], \\ + f_{i,0} &\longleftarrow f_{i,0} + \frac{2}{h_y}\left[\frac{\tau}{4\alpha}\,N^S(x_{i-1}) + + (1+\tau^2/4)N^S(x_i) - \frac{\tau}{4\alpha}\,N^S(x_{i+1})\right], \\ + f_{i,N_y} &\longleftarrow f_{i,N_y} + \frac{2}{h_y}\left[\frac{\tau}{4\alpha}\,N^N(x_{i-1}) + - (1+\tau^2/4)N^N(x_i) - \frac{\tau}{4\alpha}\,N^N(x_{i+1})\right]. \\ + \end{split} +\end{equation} + +\subsection{The NNDD test problem} +In order to test the discretization of the non-homogeneous boundary +conditions as formulated above, a test problem with the prescribed +\emph{exact} solution +\begin{equation} + u(x,y) = 1 + \sin\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y}, + \qquad \mbox{where $k_x$, $k_y$ are positive integers} +\end{equation} +and the following non-homogeneous boundary conditions + +\begin{equation} + \begin{split} + \left.\frac{\partial u}{\partial x}\right|_{x=0} = + \left.\frac{\partial u}{\partial x}\right|_{x=L_x} &= + k_x\sin\frac{2\pi k_yy}{L_y}, \\ + u(x,0) = u(x,L_y) &= 1, \\ + \end{split} +\end{equation} + +is solved with varying grid spacing. The discretization errors versus +the number of grid intervals $N_x$ displayed in Fig(\ref{fig:conv_nh_bc} shows +a \emph{quadratic} convergence as expected from the second order +finite differences used in both the PDE and the Neumann boundary condition +discretization. + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/conv_nh_bc} + \caption{Convergence of the error + $\| u_{calc}- u_{anal}\|_\infty$ wrt the number of intervals in + the $x$ direction $N_x$ for the non-homogeneous NNDD + problem. Here, $L_x=100$, $L_y=800$, $k_x=k_y=4$, $\tau=1$ and $N_y=4N_x$.} + \label{fig:conv_nh_bc} +\end{figure} + +As shown in Fig.(\ref{fig:nndd_nh}), the multigrid $V$-cycles for the +\emph{non-homogeneous} problem converge with a slightly smaller efficiency, +than the \emph{homogeneous} problem shown in Fig.(\ref{fig:weak_scal_NNDD}). + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/nndd_nh} + \caption{Performances of the $V(3,3)$-cycle for the non-homogeneous + NNDD problem. The same parameters in Fig.(\ref{fig:conv_nh_bc}) + are used here.} + \label{fig:nndd_nh} +\end{figure} + +\FloatBarrier + +\subsection{Local relaxation methods} +In addition to the damped Jacobi, three methods of relaxations are +added in this parallel multigrid solver: +\begin{enumerate} +\item The 4 color Gauss-Seidel method (RBGS). +\item The Gauss-Seidel method (GS). +\item The successive over-relaxation method (SOR). +\end{enumerate} +In order to apply correctly the parallel 4 color Gauss-Seidel, a +complicated ghost cell exchange has to be performed for each sweep +for each color. Here we simply apply the method \emph{locally} on each +subdomain with only one ghost exchange performed at the beginning of +each relaxation. + +The same procedure is also used for the other 2 methods which are +inherently \emph{serial}. All these 3 relaxations are +thus only correct if there is only one subdomain. As a +consequence, while the damped Jacobi does not depend on the +partition of the subdomains, results from these 3 methods do depend on +how the domain is partitioned. + +Table~\ref{tab:advrelax} show +however that all of the 3 \emph{approximated} relaxation methods produce +a much \emph{faster} convergence rate than the damped Jacobi relaxations for the NNDD test +problem considered here. The performance of the implemented solver +using the 4 relaxation methods on HELIOS is compared in +Fig,(\ref{fig:weakhelios}. The bad performance of the 4 color +Gauss-Seidel relaxations (RBGS) can be explained by the 4 nested loops +required to sweep each of the 4 colors. + +\begin{table}[hbt] +\centering +\begin{tabular}{|l||r|r|r|r|r|r|}\hline + Grid Sizes & $256\times 1024$ & $512\times 2048$ & $1024\times 4096$ & $2048\times 8192$ & $4096\times 16384$ & $8192\times 32768$ \\ +\hline + Process topology & + \multicolumn{1}{c|} {$1\times 1$} & + \multicolumn{1}{c|} {$2\times 2$} & + \multicolumn{1}{c|} {$4\times 4$} & + \multicolumn{1}{c|} {$8\times 8$} & + \multicolumn{1}{c|} {$16\times 16$} & + \multicolumn{1}{c|} {$32\times 32$} \\ +\hline + Jacobi $\omega=0.9$ & 0.22 & 0.24 & 0.24 & 0.24 & 0.24 & 0.25 \\ + RBGS & 0.05 & 0.07 & 0.10 & 0.10 & 0.12 & 0.12 \\ + GS & 0.07 & 0.08 & 0.10 & 0.11 & 0.11 & 0.12 \\ + SOR $\omega=1.2$ & 0.04 & 0.05 & 0.07 & 0.07 & 0.07 & 0.08 \\ +\hline +\end{tabular} +\caption{Reduction factor for the residuals (obtained as the \emph{geometric mean} of + all its values except the first 2 values) for the non-homogeneous + NNDD test problem. The same parameters as in Fig.(\ref{fig:conv_nh_bc}) are used here.} +\label{tab:advrelax} +\end{table} + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.9\hsize]{\RepFigures/weak_helios} + \caption{Performance of the 4 relaxations on the non-homogeneous + NNDD problem. The same parameters in Fig.(\ref{fig:conv_nh_bc}) + are used here. The grid sizes used in this \emph{weak scaling} + run are shown in Table~\ref{tab:advrelax}.} + \label{fig:weakhelios} +\end{figure} + +\FloatBarrier + +\section{Performance of the Stencil Kernel on different platform} +To get a feeling on the performances gained on the different +platforms and how well the compilers (with their +auto-vectorization capability) support these platforms, +the following Fortran \emph{9-point stencil} kernel has been used. The +OpenMP directives are used for parallelization on both Xeon and Xeon +Phi while offload to GPU card is done via the high level OpenACC +directives. \emph{First touch} is applied in the initialization of +\texttt{x} and \texttt{mat}. + +\begin{lstlisting}[language=Fortran,numbers=left,commentstyle=\color{blue},keywordstyle=\color{red},frame=single] +!$omp parallel do private(ix,iy) +!$acc parallel loop present(mat,x,y) private(ix,iy) + DO iy=0,ny + DO ix=0,nx + y(ix,iy) = mat(ix,iy,1)*x(ix-1,iy-1) & + & + mat(ix,iy,2)*x(ix, iy-1) & + & + mat(ix,iy,3)*x(ix+1,iy-1) & + & + mat(ix,iy,4)*x(ix-1,iy) & + & + mat(ix,iy,0)*x(ix,iy) & + & + mat(ix,iy,5)*x(ix+1,iy) & + & + mat(ix,iy,6)*x(ix-1,iy+1) & + & + mat(ix,iy,7)*x(ix, iy+1) & + & + mat(ix,iy,8)*x(ix+1,iy+1) + END DO + END DO +!$acc end parallel loop +!$omp end parallel do +\end{lstlisting} + +The performances on a Helios dual processor node and its attached Xeon +Phi co-processor are shown in Fig.~\ref{fig:cpu_mic} while the +performances on a Cray XC30 CPU and its attached NVIDIA graphics card are +shown in Fig.~\ref{fig:cpu_gpu}. In these figures, Intel optimization +flag \texttt{-O3} and default Cray optimization were applied. In +Fig.~\ref{fig:cpu_mic_O1_O3}, the speedup by vectorization is shown by +comparing performances obtained with \texttt{-O3} and \texttt{-O1}. +Several observations can be drawn from these results. +\begin{itemize} +\item The parallel scaling, using OpenMP is linear for both Intel and + Cray compilers, when the problem sizes fit into the 20MB cache of + the Sandybridge processor. For grid sizes smaller than $32\times8$, the + overhead of thread creation dominates. When the memory footprint is + larger than the cache, 4 threads per socket already saturate the memory + bandwidth. +\item On the MIC, the parallel speedup scales linearly up to 60 + cores with 1 thread per core. Using 2 or 3 threads per core does not help + while with 4 threads, the performance even degrades. +\item The MIC, using the Intel \emph{mic native} mode, does not + perform better than 8 cores of the Sandybridge processor. +\item Since the benefit from \emph{vectorization} is quite large for + the MIC (see Fig.~\ref{fig:cpu_mic_O1_O3}), the poor parallel + scalability may be explained by the low flop intensity per thread + coupled with the high overhead of the (many) thread creation and + thread synchronization. +\item The NIVIDIA card, using the high level OpenACC programming + style is more than 3 times faster than 8 Sandybridge cores, for grid + sizes larger than $1024\times256$. For smaller sizes, there are not + enough flops to keep the GPU threads busy. +\end{itemize} + + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_mic} + \caption{Performance on the Helios dual processor (left) using the + \texttt{-O3} compiler option and on the MIC (right), + using the native mode \texttt{-mmic}.} + \label{fig:cpu_mic} +\end{figure} + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_gpu} + \caption{Performance on a Cray XC30 single 8 core processor node + (left) and the NVIDIA card (right) using OpenACC. Default Cray + Fortran compiler optimization has been used on both runs.} + \label{fig:cpu_gpu} +\end{figure} + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_mic_O1_O3} + \caption{Performance comparison between using \texttt{-O3} and + \texttt{-O1}, on the Helios dual processor (left) and on the MIC (right).} + \label{fig:cpu_mic_O1_O3} +\end{figure} + +\FloatBarrier + +\section{Hybrid MPI+OpenMP \texttt{PARMG (r599})} +In this version, a \emph{straightforward} parallelization is +done in the subroutines \texttt{jacobi, residue, prolong, restrict} and +\texttt{norm\_vec}, using the OpenMP work sharing directives. The +ghost cell exchange is executed by the \emph{master} thread. +All the 2D arrays (solutions, RHS, etc.) are allocated and initialized +\emph{once} by the \emph{master} thread. Dynamic array allocations +during the multigrid $V$-cycles are thus avoided. + +To help further optimization, timings are introduced for each of the 4 +multigrid components \texttt{jacobi, residue, prolong, restrict} and +the ghost cell \texttt{exchange} as well as on the \emph{recursive} +subroutine \texttt{mg}. Since the timings of the 4 MG components +include already calls to \texttt{exchange}, the time obtained for +\texttt{mg} should be equal to the sum of the 4 MG components and +the \emph{extras} time which includes operations in \texttt{mg} but +not in the 4 components: +\begin{equation} + \label{eq:timings} + t_\text{mg} = t_\text{jacobi} + t_\text{residue} + t_\text{prolong} + + t_\text{restrict} + t_\text{extras}. +\end{equation} +We will see in the following sections that, in addition to these 5 +contributions to $t_\text{mg}$, +there is \emph{overhead} probably due to the \emph{recursive} +calls of \texttt{mg}. + +\subsection{Parallel efficiency on single node} +The comparison in Fig.~\ref{fig:single_node} shows that the pure +OpenMP version is at most $30\%$ slower than the pure MPI version when all the +16 cores are used but less than $10\%$ when only one socket is +used. The degradation of the OpenMP version can be explained by the +\emph{numa} effects when 2 sockets are used. It is also observed +that the performance level off at 4 cores, due to the +saturation of the socket memory bandwidth. +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/single_node} + \caption{Parallel performance of the 7 level $V(3,3)$-cycle on a + dual socket Helios node ($2\times 8$ cores) for pure OpenMP and pure + MPI. The non-homogeneous NNDD problem with the same parameters in + Fig.~\ref{fig:conv_nh_bc} is considered here. The OpenMP threads and MPI + tasks are placed first on the first socket before filling the + second socket, using the \texttt{srun} option + ``\texttt{--cpu\_bind=cores -m block:block}'' and the environment + variable \texttt{OMP\_PROC\_BIND=true}.} + \label{fig:single_node} +\end{figure} + +\subsection{Hybrid efficiency on multi-nodes} +In the following multi-node experiments, all the 16 cores on each +Helios node are +utilized. The numbers of OpenMP threads \emph{per} MPI \emph{process} NT, the +number of MPI processes \emph{per node} NP, the number of nodes NNODES +and the \emph{total} number of MPI processes $\text{NP}_{tot}$ verify +thus the following relations: +\begin{equation} + \begin{gathered} + 1\le\text{NT} \le 16, \qquad 1\le\text{NP} \le 16 \\ + \text{NT}\times\text{NP} = 16 \\ + \text{NP}_{tot} = 16\times\text{NNODES}/\text{NT} \\ + \end{gathered} +\end{equation} + +The times of the different MG components and the relative +contributions for the \emph{strong scaling} +experiments using a $1024\times 4096$ grid size, are shown in +Fig.~\ref{fig:hybrid_strong} and Fig.~\ref{fig:hybrid_strong_contrib} +respectively. The following observations can be made: +\begin{enumerate} + \item The \texttt{exchange} time increases strongly with increasing + NNODES, due to smaller partitioned subdomains and thus their larger + surface/volume ratio. + \item The pure MPI (NT=1) \texttt{exchange} time is on the other + hand reduced with + $\text{NT}>1$ since the local partitioned grid becomes larger. + \item The less efficient OpenMP parallelization (numa effects, + Amdahl's law) tends to limit however this advantage. + \item As a result, there is an optimal NT for a given NNODES: + 2 for 4 and 16 nodes, 8 for 64 nodes. + \item The \texttt{jacobi} and \texttt{residue} contributions dominate + largely with $0.63\le t_\text{jacobi}/t_\text{mg}\le 0.83$ and + $0.09\le t_\text{residue}/t_\text{mg}\le 0.18$. + \item The \emph{overhead} (see Eq.~\ref{eq:timings}) times increase + with NNODES but decrease slightly for increasing NT. +\end{enumerate} + +The times of the different MG components and the relative +contributions for the \emph{weak scaling} +experiments are shown in Fig.~\ref{fig:hybrid_weak} and +Fig.~\ref{fig:hybrid_weak_contrib} respectively. The following +observations can be made: +\begin{enumerate} +\item A steady increase of MG times with the number of nodes can + be attributed to the increase of ghost cells \texttt{exchange} time, + even though the amount of communications between nodes does not + change. +\item The MG performance is improved slightly when NT=2 but drop + drastically for NT=16. This seems to indicate that + \emph{numa} effects are important here, since the array initialization + is not done locally on each thread. +\item The \emph{overhead} (see Eq.~\ref{eq:timings}) times are much + smaller than in the \emph{strong scaling} runs. +\end{enumerate} + +\begin{sidewaysfigure} + \centering + \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_strong} + \caption{Detailed timings for strong scaling experiments using the same + problem parameters as in Fig.~\ref{fig:single_node}, except that 5 + levels are chosen to be able to run the runs with 64 nodes.} + \label{fig:hybrid_strong} +\end{sidewaysfigure} + +\begin{sidewaysfigure} +%\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_strong_contrib} + \caption{Relative contributions of each of the MG components for the + strong scaling experiments using the same + problem parameters as in Fig.~\ref{fig:single_node}.} + \label{fig:hybrid_strong_contrib} +%\end{figure} +\end{sidewaysfigure} + +\begin{sidewaysfigure} +%\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_weak} + \caption{Detailed timings for weak scaling experiments using the same + problem parameters as in Fig.~\ref{fig:single_node}.} + \label{fig:hybrid_weak} +%\end{figure} +\end{sidewaysfigure} + +\begin{sidewaysfigure} +%\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_weak_contrib} + \caption{Relative contributions of each of the MG components for the + weak scaling experiments using the same + problem parameters as in Fig.~\ref{fig:single_node}.} + \label{fig:hybrid_weak_contrib} +%\end{figure} +\end{sidewaysfigure} + +Finally, Table~\ref{tab:memory} shows that using $\text{NT}>1$ +decreases the memory needed by the multigrid procedure for both strong +and weak scaling runs. + +\begin{table}[htb] +\begin{center} +\begin{tabular}{|l|c||r|r|r|r|r|} +\hline + &\texttt{\textbf{NNODES}}& NT=1 & NT=2 & NT=4 & NT=8 & NT=16 \\ +\hline +\multirow{4}{*}{\texttt{\textbf{Strong Scaling}}} +&1 & 57.01 & 53.70 & 52.04 & 51.06 & 48.57 \\ +&4 & 21.87 & 21.49 & 15.75 & 13.87 & 13.11 \\ +&16 & 13.82 & 8.52 & 5.75 & 5.69 & 4.00 \\ +&64 & 13.93 & 6.80 & 3.73 & 2.32 & 1.48 \\ +\hline\hline +\multirow{4}{*}{\texttt{\textbf{Weak Scaling}}} +&1 & 57.03 & 53.71 & 52.04 & 51.08 & 48.61 \\ +&4 & 59.24 & 59.18 & 53.39 & 51.52 & 48.69 \\ +&16 & 60.48 & 55.33 & 52.69 & 52.74 & 49.06 \\ +&64 & 63.30 & 56.13 & 53.30 & 51.58 & 48.79 \\ +\hline +\end{tabular} +\end{center} + \caption{Memory footprint \emph{per core} (MB/core) for the strong scaling and weak + scaling experiments.} + \label{tab:memory} +\end{table} + +\subsection{Summary and conclusions} +The \emph{strong scaling} and \emph{weak scaling} wrt NT and NNODES +are summarized in Fig.~\ref{fig:scaling}. The speed up for the strong +scaling experiments shows a good efficiency up to 16 nodes +for all NT but degrades at 64 nodes (1024 cores) due the partitioned +grid becoming too small. A good \emph{weak scaling} is also +obtained with an increase in $t_\text{mg}$ of +less than $10\%$ when NNODES vary from 4 to 64. However, for NT=16, the +efficiency drops significantly, due to the non-local memory access +when the OpenMP threads are placed on both sockets (\emph{numa} effect). + +In order to improve the hybrid MPI+OpenMP multigrid, especially for large number +of threads per MPI process NT, the following optimizations should be done: +\begin{itemize} + \item \emph{First touch} array initialization in order to avoid + \emph{numa} effects. + \item OpenMP parallelization of some remaining \emph{serial} loops. + \item Better vectorization of inner loops. +\end{itemize} +The outcome of these optimization steps is important in order to run +efficiently on upcoming \emph{multicore} processors and +\emph{manycore} (MIC) devices. + +\begin{figure}[htb] + \centering + \includegraphics[angle=0,width=0.9\hsize]{\RepFigures/scaling} + \caption{Strong scaling with a $1024\times 4096$ grid size (left) and + weak scaling (right) with grid sizes $1024\times 4096,10248\times 8192, + 4096\times 16384$ and $8192\times32768$ respectively for 1, 4, 16 + and 64 nodes.} + \label{fig:scaling} +\end{figure} + +\FloatBarrier + +\pagebreak +\begin{thebibliography}{99} + \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A + Multigrid Tutorial, Second Edition, SIAM (2000)}. + \bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/}. + \bibitem{MG1D} {\tt Multigrid Formulation for Finite Elements},\\ + \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/multigrid/docs/multigrid.pdf} + \bibitem{TEMPL} {R. Barrett, M. Berry, T. F. Chan, + J. Demmel, J. Donato, J. Dongarra, V. Eijkhout, + R. Pozo, C. Romine and H. Van der Vorst, + Templates for the Solution of Linear Systems: Building Blocks for + Iterative Methods, 2nd Edition , SIAM, (1994)}. + \bibitem{Wesseling} {P.~Wesseling, An Introduction to Multigrid + Methods, Edwards, 2004}. + \bibitem{salpha} {X. Lapillonne, S. Brunner, T. Dannert, S. Jolliet, + A. Marinoni et al., Phys. Plasmas 16, 032308 (2009)}. +\end{thebibliography} + +\end{document} diff --git a/multigrid/docs/multigrid.pdf b/multigrid/docs/multigrid.pdf new file mode 100644 index 0000000..165662e Binary files /dev/null and b/multigrid/docs/multigrid.pdf differ diff --git a/multigrid/docs/multigrid.tex b/multigrid/docs/multigrid.tex new file mode 100644 index 0000000..a8f694d --- /dev/null +++ b/multigrid/docs/multigrid.tex @@ -0,0 +1,949 @@ +% +% @file multigrid.tex +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +\documentclass[a4paper]{article} +\usepackage{linuxdoc-sgml} +\usepackage{graphicx} +\usepackage{hyperref} +\usepackage{amsmath} +%\usepackage{verbatim} +%\usepackage[notref]{showkeys} + +\title{\tt Multigrid Formulation for Finite Elements.} +\author{Trach-Minh Tran, Stephan Brunner} +\date{v0.2, October 2012} +\abstract{A multigrid formulation for finite elements is + derived, using variational principles. More specifically the grid + transfer operators will be derived and tested in 1D Cartesian, + cylindrical and spherical geometry for arbitrary order B-Splines.} + +\begin{document} +\maketitle +\tableofcontents + +\section{The discretized problem} +Consider the one-dimensional linear integro-differential problem +\begin{equation} +\label{eq:oned_prob} + \mathcal{L}(u) = f, \qquad 0\le x\le L, +\end{equation} +with suitable boundary conditions. On an \emph{equidistant} mesh with +interval $h=L/N$ and using the \emph{weak form} of Eq.~(\ref{eq:oned_prob}), +the linear system to be solved on this grid (which will be +referred as the \emph{fine} grid) can be written as (see +\cite{SOLVERS}, \cite{BSPLINES}): + \begin{equation} + \label{eq:fine} + \sum_{i'=1}^{N+p}A_{ii'}^hu^h_{i'} = b^h_i, \qquad + A^h_{ii'}=\int_{0}^{L}\Lambda^h_i + \mathcal{L}(\Lambda^h_{i'})\,x^{\alpha}dx, \qquad + b^h_i = \int_{0}^{L}f\Lambda^h_i\,x^{\alpha}dx, + \end{equation} +where $p$ is the order the Splines $\Lambda^h_i$ and +$\alpha=0,1,2$ for Cartesian, cylindrical and spherical coordinates +respectively. It should be noted that the unknowns $u^h_i$ of this +linear system are the \emph{expansion coefficients} of the discretized +solution of the problem $u^h(x)$ +\begin{equation} + u^h(x) = \sum_{i'=1}^{N+p}u^h_{i'}\Lambda^h_{i'}(x) +\end{equation} +and the right hand sides $b^h_i$ are defined as the \emph{projection} of +$f(x)$ on the same basis functions, in contrast with the Finite +Differences (FD) or Finite Volume (FV) formulations where $u^h_i$ and +$b^h_i$ are the \emph{nodal values} of $u$ and $f$. + +On the \emph{coarser} mesh with interval $2h=2L/N$, the discretized +linear system can be written as + \begin{equation} + \label{eq:coarse} + \sum_{i'=1}^{N/2+p}A_{ii'}^{2h}u^{2h}_{i'} = b^{2h}_i, \qquad + A^{2h}_{ii'}=\int_{0}^{L}\Lambda^{2h}_i + \mathcal{L}(\Lambda^{2h}_{i'})\,x^{\alpha}dx, \qquad + b^{2h}_i = \int_{0}^{L}f\Lambda^{2h}_i\,x^{\alpha}dx. + \end{equation} + +\section{Transfer operators} +\label{sec:twogrid} +For simplicity let consider the two-grid procedure \cite{Briggs} which can be +summarized as follow: +\begin{enumerate} +\item Obtain an approximation $\mathbf{u}^h$ on the \emph{fine} grid, using a + Gauss-Seidel (GS) or a weighted Jacobi scheme. This procedure is + also called \emph{smoothing} or \emph{relaxation}. +\item Compute the \emph{residuals}: + $\mathbf{r}^h=\mathbf{b}^h-\mathbf{A}^h\mathbf{u}^h$. +\item Obtain the residuals on the coarse mesh $\mathbf{r^{2h}}$ by + \emph{restriction} of $\mathbf{r}^h$. +\item Direct solve $\mathbf{A}^{2h}\mathbf{e}^{2h}=\mathbf{r^{2h}}$ to obtain + the error $\mathbf{e}^{2h}=\mathbf{u}-\mathbf{u}^{2h}$ +\item Interpolate (\emph{prolong}) the error to obtain $\mathbf{e}^h$. +\item Correct the approximation obtained on the fine grid: + $\mathbf{u}^h\leftarrow \mathbf{u}^h+\mathbf{e}^h$. +\item Relax on $\mathbf{A}^{h}\mathbf{u}^{h}=\mathbf{f^{h}}$, using + the previously computed $\mathbf{u}^h$ as a guess. +\end{enumerate} + +Steps 3 and 5 are called \emph{grid transfers} and are detailed in the +following. It should be noted that the fine to coarse transfer +(restriction) applies to the right hand side $\mathbf{b}^h$ while the +prolongation applies to the expansion coefficients $\mathbf{u}^{2h}$. + +\subsection{Fine to coarse grid transfer (restriction)} +The right hand side on the fine and coarse grid can be written as +\begin{equation*} + \begin{split} + b^{h}_i &= \int_0^Lf\Lambda^{h}_i\,x^{\alpha}dx = + \sum_{i'=1}^{N+p}f^h_{i'}\underbrace{\int_0^L\Lambda^{h}_{i}\Lambda^{h}_{i'}\,x^{\alpha}dx}_ + {M^{h,h}_{ii'}}, \\ + b^{2h}_i &= \int_0^Lf\Lambda^{2h}_i\,x^{\alpha}dx = \sum_{i'=1}^{N+p}f^h_{i'} + \underbrace{\int_0^L\Lambda^{2h}_{i}\Lambda^{h}_{i'}\,x^{\alpha}dx}_{M^{2h,h}_{ii'}} \\ + \end{split} +\end{equation*} +where the expansion $f(x)=\sum_{i=1}^{N+p}f^h_i\Lambda^h_i(x)$ has been used. +Elimination of $\mathbf{f^h}$ leads to the definition of the +\emph{restriction} matrix: +\begin{equation} +\label{eq:restriction} + \mathbf{b}^{2h} = \mathbf{R}^{2h}_{h}\mathbf{b}^h, + \qquad \boxed{\mathbf{R}^{2h}_{h}=\mathbf{M}^{2h,h}(\mathbf{M}^{h,h})^{-1}}. +\end{equation} + +Note that the computation of the \emph{mass matrices} +$\mathbf{M}^{h,h}$ and $\mathbf{M}^{2h,h}$ can be done \emph{exactly} +using a Gauss integration with $N_G=\lceil p+(\alpha+1)/2 \rceil$ points. + +Another way to derive the restriction operator $\mathbf{R}^{2h}_{h}$ +is by noting that the basis functions $\Lambda^{2h}_{i}$ are +\emph{piecewise} $C^{p-1}_h$ \emph{polynomials} with \emph{breaks} on +the fine grid points $x_i=ih$, and thus can be expressed +\emph{uniquely} as +\begin{equation} + \Lambda^{2h}_{i}(x) = \sum_{i'=1}^{N+p}c_{ii'}\Lambda^{h}_{i'}(x), + \quad i=1\ldots N/2+p. +\end{equation} +Projecting this equation on the basis $\Lambda^{h}_j$ then leads to +\begin{equation*} + \begin{split} + \sum_{i'=1}^{N+p}c_{ii'}\int_0^L\Lambda^{h}_{i'}\Lambda^{h}_{j}\,x^{\alpha}dx + &= \int_0^L\Lambda^{2h}_{i}\Lambda^{h}_{j}\,x^{\alpha}dx, \qquad + i=1\ldots N/2+p, \quad j=1,\ldots N+p \\ + \Longrightarrow \mathbf{c}\cdot\mathbf{M}^{h,h} &= + \mathbf{M}^{2h,h} \Longrightarrow \mathbf{c} = + \mathbf{M}^{2h,h}(\mathbf{M}^{h,h})^{-1} = \mathbf{R}^{2h}_{h} \\ + \end{split} +\end{equation*} +and finally +\begin{equation} +\label{eq:restrict_gen} + \boxed{\Lambda^{2h}_{i}(x) = + \sum_{i'=1}^{N+p}\left(\mathbf{R}^{2h}_{h}\right)_{ii'}\Lambda^{h}_{i'}(x), + \quad i=1\ldots N/2+p} +\end{equation} +Because the expansion coefficients $c_{ii'}$ of $\Lambda^{2h}_{i}(x)$ +(rows of the restriction matrix $\mathbf{R}^{2h}_{h}$) on the +fine mesh basis are \emph{unique}, $\mathbf{R}^{2h}_{h}$ should be +independent of the geometry exponent $\alpha$ or more generally, of the +definition of the \emph{projection} (or scalar product) used to +calculate the restriction matrix. Furthermore, since the supports of +both $\Lambda^{h}_i$ and $\Lambda^{2h}_i$ are \emph{compact}, the +matrix $\mathbf{R}^{2h}_{h}$ should be \emph{sparse}. + +One can show that, using (\ref{eq:restrict_gen}), the +restriction of the fine mesh FE matrix $\mathbf{A}^h$ is given by +\begin{equation} +\label{eq_coarse_mat} + \mathbf{A}^{2h} = \mathbf{R}^{2h}_h\mathbf{A}^{h}\left(\mathbf{R}^{2h}_h\right)^{T}. +\end{equation} + +\subsection{Coarse to fine grid transfer (prolongation)} +Let denote the discretized solution on the coarse mesh of +$\mathbf{A}^{2h}\mathbf{u}^{2h}=\mathbf{R}^{2h}_{h}\mathbf{b}^h$ +by +\begin{equation*} + u^{2h}(x) = \sum_{i=1}^{N/2+p}u^{2h}_{i}\Lambda^{2h}_{i}(x), +\end{equation*} +and seek for an approximated solution on the fine mesh $\mathbf{u}^{h}$ +\begin{equation*} + u^{h}(x) = \sum_{i=1}^{N+p}u^{h}_{i}\Lambda^{h}_{i}(x). +\end{equation*} +by \emph{prolongation} of $\mathbf{u}^{2h}$ (instead of solving +$\mathbf{A}^{h}\mathbf{u}^{h}=\mathbf{b}^h$). A reasonable solution is +to \emph{minimize} the square of the error norm defined as +\begin{equation*} + \begin{split} + \epsilon^2 &= \|u^{h}(x)-u^{2h}(x)\|^2 \equiv \int_0^L + [u^{h}(x)-u^{2h}(x)]^2\,x^\alpha dx, \\ + \frac{\partial\epsilon^2}{\partial u^h_i} &=0 \Longrightarrow + \sum_{i'=1}^{N+p}u^{h}_{i}\int_0^L + \Lambda^{h}_{i}\Lambda^{h}_{i'}\,x^\alpha dx = + \sum_{i'=1}^{N/2+p}u^{2h}_{i}\int_0^L + \Lambda^{h}_{i}\Lambda^{2h}_{i'}\,x^\alpha dx. \\ + \end{split} +\end{equation*} + +This yields the prolonged (or interpolated) \emph{coarse grid} + solution on the \emph{fine grid} +\begin{equation} +\label{eq:prolongation} + \mathbf{u}^h = \mathbf{P}^h_{2h}\mathbf{u}^{2h}, \qquad + \boxed{\mathbf{P}^h_{2h} = + (\mathbf{M}^{h,h})^{-1}\mathbf{M}^{h,2h}=(\mathbf{R}^{2h}_{h})^T} +\end{equation} +and the coarse FE matrix can be finally expressed as\begin{equation} + \label{eq:coarse_mat} + \boxed{\mathbf{A}^{2h} = \mathbf{R}^{2h}_{h}\mathbf{A}^{h}\mathbf{P}^h_{2h}} +\end{equation} + +\subsection{An alternative derivation of grid transfer operators} +Starting from the inter grid transformation of the basis functions +Eq.(\ref{eq:restrict_gen}), the +restriction of $\mathbf{b}^h$ and the prolongation of +$\mathbf{u}^{2h}$ can be derived as follow +\begin{gather*} + b^{2h}_i = \int_0^L f\Lambda^{2h}_i\,x^{\alpha}dx = \sum_{i'=1}^{N+p} + \left(\mathbf{R}^{2h}_{h}\right)_{ii'}\int_0^Lf\Lambda^{h}_{i'}\,x^{\alpha}dx + =\sum_{i'=1}^{N+p} \left(\mathbf{R}^{2h}_{h}\right)_{ii'}b^h_{i'}, \\ + u^{2h}(x) = \sum_{i=1}^{N/2+p} u^{2h}_{i} \Lambda^{2h}_{i}= + \sum_{i'=1}^{N+p}\underbrace{\left[\sum_{i=1}^{N/2+p} + \left(\mathbf{R}^{2h}_{h}\right)_{ii'} + u^{2h}_{i}\right]}_{u^h_{i'}}\Lambda^{h}_{i'}(x) \Longrightarrow + \mathbf{u}^h = \left(\mathbf{R}^{2h}_{h}\right)^T\mathbf{u}^{2h} = + \mathbf{P}^h_{2h}\mathbf{u}^{2h}.\\ +\end{gather*} + +\section{Numerical results for the transfer operators} +The prolongation matrix as defined in Eq.~(\ref{eq:prolongation}) was +calculated using the BSPLINES module. A Gauss integration with +$N_G=\lceil p+(\alpha+1)/2 \rceil$ points is used to carry out the +numerical integrations. +In the following, the results are presented +for linear, quadratic and cubic Splines. Since the +restriction matrix is just the transpose of the prolongation matrix, +only the latter is shown. As expected, all the obtained matrices +are found to be \emph{independent} of $\alpha$ and \emph{sparse}. + +During the calculations, it was checked that +\begin{itemize} +\item The coarse matrix computed using Eq.~(\ref{eq:coarse_mat}) and + the transfer matrix, is identical to the matrix assembled directly + on the coarse grid. +\item The sum of each row of the prolongation matrix is 1, since a + constant function ($\mathbf{u}^{2h}=1$) should remain constant after + the grid transfer. + +\end{itemize} + +\subsection{Linear Splines} +For $N=8$, the prolongation is a $9\times 5$ matrix given by +\begin{equation} + \mathbf{P}^{h}_{2h} = + \left( + \begin{matrix} + 1 & 0 & 0 & 0 & 0 \\ + 1/2 & 1/2 & 0 & 0 & 0 \\ + 0 & 1 & 0 & 0 & 0 \\ + 0 & 1/2 & 1/2 & 0 & 0 \\ + 0 & 0 & 1 & 0 & 0 \\ + 0 & 0 & 1/2 & 1/2 & 0 \\ + 0 & 0 & 0 & 1 & 0 \\ + 0 & 0 & 0 & 1/2 & 1/2\\ + 0 & 0 & 0 & 0 & 1 \\ + \end{matrix}\right) +\end{equation} +As expected, the prolongation matrix for linear Splines is +identical to the one obtained for first order FD discretization, where +a linear interpolation is used. One can easily check that +\begin{equation*} + \begin{split} + \Lambda^{2h}_1(x) &= \Lambda^h_1(x) + \frac{1}{2}\Lambda^h_2(x), \\ + \Lambda^{2h}_2(x) &= \frac{1}{2}\Lambda^h_2(x) + \Lambda^h_3(x) + + \frac{1}{2}\Lambda^h_4(x), \\ + \end{split} +\end{equation*} +as expected from (\ref{eq:restrict_gen}). + +\subsection{Quadratic Splines} +For $N=8$, the prolongation is a $10\times 6$ matrix given by +\begin{equation} + \mathbf{P}^{h}_{2h} = + \left( + \begin{matrix} + 1 & 0 & 0 & 0 & 0 & 0 \\ + 1/2 & 1/2 & 0 & 0 & 0 & 0 \\ + 0 & 3/4 & 1/4 & 0 & 0 & 0 \\ + 0 & 1/4 & 3/4 & 0 & 0 & 0 \\ + 0 & 0 & 3/4 & 1/4 & 0 & 0 \\ + 0 & 0 & 1/4 & 3/4 & 0 & 0 \\ + 0 & 0 & 0 & 3/4 & 1/4 & 0 \\ + 0 & 0 & 0 & 1/4 & 3/4 & 0 \\ + 0 & 0 & 0 & 0 & 1/2 & 1/2\\ + 0 & 0 & 0 & 0 & 0 & 1 \\ + \end{matrix}\right) +\end{equation} + +\subsection{Cubic Splines} +For $N=10$, the prolongation is a $13\times 8$ matrix given by +\begin{equation} + \mathbf{P}^{h}_{2h} = + \left( + \begin{matrix} + 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\ + 1/2 & 1/2 & 0 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 3/4 & 1/4 & 0 & 0 & 0 & 0 & 0 \\ + 0 & 3/16 & 11/16 & 1/8 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 1/2 & 1/2 & 0 & 0 & 0 & 0 \\ + 0 & 0 & 1/8 & 3/4 & 1/8 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 1/2 & 1/2 & 0 & 0 & 0 \\ + 0 & 0 & 0 & 1/8 & 3/4 & 1/8 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 1/2 & 1/2 & 0 & 0 \\ + 0 & 0 & 0 & 0 & 1/8 & 11/16 & 3/16 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 1/4 & 3/4 & 0 \\ + 0 & 0 & 0 & 0 & 0 & 0 & 1/2 & 1/2\\ + 0 & 0 & 0 & 0 & 0 & 0 & 0 & 1 \\ + \end{matrix}\right) +\end{equation} + +Note that from the results shown above, it is straightforward to derive the +prolongation matrix for other number of intervals $N$. + +\section{Practical Considerations} +\subsection{Boundary conditions} +The \emph{essential Dirichlet boundary conditions} are imposed by zeroing the column +and row (first column and first row for the left boundary and last column +and last row for the right boundary) of the FE matrix $\mathbf{A}^h$ and +putting 1 on the diagonal. The same operation should be also performed +on the prolongation matrix, preserving thus the relation +(\ref{eq:coarse_mat}). For non-homogeneous Dirichlet boundary +conditions, the elements of the column should be saved before the +zeroing operation (for example $A^h_{21}, A^h_{31}, \ldots$ for the left +boundary condition). They will be used later to modify the right hand +side: +\begin{equation*} + b^h_i \leftarrow b^h_i-A^h_{i1}u^h_1, \quad i=2,\ldots +\end{equation*} + +Nothing has to be done for \emph{natural boundary conditions}. + +\subsection{Residual norm and error} +The residual norm is simply defined as the Euclidean norm of the +residue: + +\begin{equation} +\label{eq:resid} +\|r\|_2 = \|\mathbf{b}-\mathbf{A}\mathbf{u}\|_2 = +\sqrt{\sum_i\left(b_i-\sum_{i'}A_{ii'}u_{i'}\right)^2}. +\end{equation} +When the \emph{exact} solution $u(x)$ is known, the \emph{discretization error} +can defined as +\begin{equation} +\label{eq:discerr} + \|e\|_2 = \sqrt{\int x^\alpha dx\left[\sum_{i}u_{i}\Lambda_i(x)-u(x)\right]^2} +\end{equation} +and computed using a Gauss quadrature. Note that for Splines of order +$p$, $\|e\|_2(h)$ converges to zero as $O(h^{p+1})$. + +\section{The Model Problems} +\subsection{Cartesian geometry} +The following second-order boundary value problem is considered: +\begin{equation} +\label{eq:cartesian_problem} + \begin{split} + -\frac{d^2}{dx^2} u(x) + \sigma u(x) &= \sin (\pi kx), \qquad 0\le x\le 1 \\ + u(0)=u(1) &= 0 \\ + \Rightarrow u(x) = \frac{\sin(\pi kx)}{\pi^2k^2+\sigma}.& \\ + \end{split} +\end{equation} +Using the weak form, the FE discretized matrix and right hand side can +be computed as +\begin{equation} + A_{ii'} = \int_0^1dx\left[\Lambda'_i(x)\Lambda'_{i'}(x) + + \sigma\Lambda_i(x)\Lambda_{i'}(x)\right], \qquad + b_i = \int_0^1dx \sin (\pi kx)\Lambda_i(x). +\end{equation} +For Splines of order $p$, the integration is done with a $\lceil +p+1/2\rceil$ point Gauss quadrature which is \emph{exact} +for the matrix $\mathbf{A}$ if $\sigma$ is constant. + +The boundary conditions are simply imposed by setting +\begin{equation*} + A_{ki}=A_{ik}=\delta_{ik} \qquad\mbox{and} \qquad b_k=0 +\end{equation*} +for $k=1$ (the first equation) and $N+p$ (the last equation). + +\subsection{Cylindrical geometry} +The following second-order boundary value problem is considered: +\begin{equation} + \begin{split} + -\frac{1}{r}\frac{d}{dr}r\frac{d}{dr}u(r) + \frac{m^2}{r^2}u(r) &= + j^2_{ms}J_{m}(j_{ms}r), \qquad 0\le r\le 1, \quad j_{ms} = + s^{th}\mbox{ zero of }J_{m}, \\ + u(1) &= 0 \\ + \Rightarrow u(r) = J_{m}(j_{ms}r).& \\ + \end{split} +\end{equation} +Using the weak form, the FE discretized matrix and right hand side can +be computed as +\begin{equation} + A_{ii'} = \int_0^1rdr\left[\Lambda'_i(r)\Lambda'_{i'}(r) + + \frac{m^2}{r^2}\Lambda_i(r)\Lambda_{i'}(r)\right], \qquad + b_i = \int_0^1rdr j^2_{ms}\,J_{m}(j_{ms}r)\Lambda_i(r). +\end{equation} +The boundary condition has only to be imposed on the last equation, +using the same procedure described for the Cartesian geometry. + +It should be noted here that for $m\neq 0$, the matrix elements +$A_{1i}$ and $A_{i1}$ \emph{diverge} since $\Lambda_1(r)$ is not +equal to zero at $r=0$. However, using a \emph{direct solver}, one can observe +that the resulting \emph{discretization errors} as defined by +Eq.(\ref{eq:discerr}) converge for number of Gauss points $N_G$ slightly +larger than $p+1$, as shown in Table~\ref{tab:gauss_conv}. Then, using +$N_G=4$ and $6$ for the linear and cubic splines respectively, the +discretization error as a function of the number of grid intervals +(Fig~\ref{fig:cyl_conv}) show the expected quadratic and quartic +scaling respectively for the linear and cubic Splines. + +\begin{figure} + \centering + \includegraphics[angle=0,width=0.8\hsize]{cyl_conv} + \caption{Discretization errors $\|e\|_2$ obtained by a \emph{direct + solver} versus the number of grid intervals $N$. A linear fit + yields a quadratic scaling ($\sim N^{-2.0}$) for the linear Splines + and a quartic convergence ($\sim N^{-4.3}$) for the cubic Splines.} + \label{fig:cyl_conv} +\end{figure} + +\begin{table} + \centering + \begin{tabular}{|c|c|c|}\hline +Number of Gauss points & $p=1$ & $p=3$ \\ \hline + 2 & 8.319E-04 & \\ + 4 & 9.277E-04 & 5.799E-07 \\ + 6 & 9.276E-04 & 5.936E-07 \\ + 8 & 9.276E-04 & 5.936E-07 \\\hline + \end{tabular} + \caption{Convergence of the \emph{discretization error} with respect + to the number of Gauss points for the cylindrical problem with $m=1$, + $s=10$ on a $128$ interval grid.} + \label{tab:gauss_conv} +\end{table} + +\section{The Multigrid Schemes} +The two grid procedure described in section (\ref{sec:twogrid}) can be +generalized as follow. +Let $\nu_1$, $\nu_2$ and $\mu$ be three iteration parameters. +Given a guess $\mathbf{u}^h$ and right hand side $\mathbf{b}^h$ at the +\emph{finest} level, a MG cycle represented by +\begin{equation*} + \boxed{\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{b}^h)} +\end{equation*} +will compute a \emph{new} $\mathbf{u}^h$ and is defined recursively by the +following steps: + +\begin{enumerate} +\item If $h$ is the coarsest mesh size, direct solve + $\mathbf{A}^h\mathbf{u}^h=\mathbf{b}^h$ and return. + \item Else + \begin{itemize} + \item Relax $\mathbf{u}^h$ $\nu_1$ times. + \item $\mathbf{b}^{2h} \leftarrow + \mathbf{R}^{2h}_h(\mathbf{b}^h-\mathbf{A}^h\mathbf{u}^h), \quad + \mathbf{u}^{2h}\leftarrow 0$. + \item $\mathbf{u}^{2h} \leftarrow MG^{2h}(\mathbf{u}^{2h},\mathbf{b}^{2h})$ $\mu$ times. + \item $\mathbf{u}^h\leftarrow + \mathbf{u}^h+\mathbf{P}^{h}_{2h}\mathbf{u}^{2h}$. + \item Relax $\mathbf{u}^h$ $\nu_2$ times. + \end{itemize} +\end{enumerate} + +The standard $V$-cycle is obtained for $\mu=1$ while $\mu=2$ results in +the $W$-cycle. Usually the number of \emph{pre-smooth} and +\emph{post-smooth} sweeps $\nu_1$ and $\nu_2$ is limited to 1 or 2. In the +following a $V$-cycle will be denoted by $V(\nu_1,\nu_2)$. + +Another multigrid algorithm called \emph{Full Multigrid} or FMG does +not require an input guess $\mathbf{u}^h$ but solves first the +problem on coarser grids and uses one or many MG cycles to obtain the problem +solution. It can be represented by +\begin{equation*} + \boxed{\mathbf{u}^h \leftarrow FMG^h(\mathbf{b}^h)} +\end{equation*} +and defined recursively by the following steps: + +\begin{enumerate} +\item If $h$ is the coarsest mesh size, direct solve + $\mathbf{A}^h\mathbf{u}^h=\mathbf{b}^h$ and return. + \item Else + \begin{itemize} + \item $\mathbf{b}^{2h} \leftarrow \mathbf{R}^{2h}_h(\mathbf{b}^h)$. + \item $\mathbf{u}^{2h} \leftarrow FMG^{2h}(\mathbf{b}^{2h})$. + \item $\mathbf{u}^h \leftarrow \mathbf{P}^{h}_{2h}\mathbf{u}^{2h}$. + \item $\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{b}^h)$ $\nu_0$ times. + \end{itemize} +\end{enumerate} + +Note that while the MG process is an iterative process (started by +setting for example the initial guess $\mathbf{u}^h=0$), the FMG is +more like a \emph{direct solver} with appropriate values of $\nu_1$, +$\nu_2$ and $\nu_0$ determined experimentally. + +\section{Numerical Experiments} +The residual norm $\|r\|_2$ and error $\|e\|_2$ defined +previously are reported after each $V$-cycle in +Table~\ref{tab:cartesian1} for the Cartesian model problem and in +Table~\ref{tab:cylindrical1} for the cylindrical one . The ratio +between successive cycle $\|r\|_2$ +and $\|e\|_2$ are shown in columns labeled \emph{ratio} and measure +the rate of iteration convergence. The \emph{asymptotic} ratio of +$\|r\|_2$ is called the \emph{convergence factor}. + +In all the cases shown, one can note that $\|e\|_2$ level off quickly to the +discretization error obtained by using the \emph{direct solver} on the +finest grid, while the residual norms $\|r\|_2$ continue to decrease until +the machine zero is eventually reached. One can also verify that the \emph{final} +discretization errors scale approximately as $8^2$ and $8^4$ +respectively for linear and cubic Splines, as $N$ is increased from +$128$ to $1024$. + +Most interestingly, the \emph{iterative performance} depends very weakly +on the problem size $N$, for both the Cartesian and the cylindrical +cases. Moreover, the multigrid seems to be less efficient when linear Splines are used for +the problem discretization. This iterative performance can be further improved by +increasing the \emph{iteration parameters} $\nu_1$, $\nu_2$ and $\mu$, +as shown in Table~\ref{tab:improv}. One can also observe in the same +table that the Jacobi relaxation is systematically less efficient than Gauss +Seidel relaxation. + +\begin{table} +\centering +\begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline + \multicolumn{9}{|c|}{ Linear B-Splines $p=1$} \\ \hline + & \multicolumn{4}{c|}{ $N=128$} + & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} +$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & + $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline + 0 & 6.219E-02 & & 7.164E-04 & & 2.210E-02 & & 7.164E-04 & \\ + 1 & 2.169E-02 & 0.35 & 5.880E-05 & 0.08 & 9.699E-03 & 0.44 & 3.622E-05 & 0.05 \\ + 2 & 3.801E-03 & 0.18 & 7.806E-06 & 0.13 & 1.790E-03 & 0.18 & 1.965E-06 & 0.05 \\ + 3 & 5.061E-04 & 0.13 & 3.666E-06 & 0.47 & 2.923E-04 & 0.16 & 1.583E-07 & 0.08 \\ + 4 & 6.762E-05 & 0.13 & 3.564E-06 & 0.97 & 4.055E-05 & 0.14 & 6.197E-08 & 0.39 \\ + 5 & 8.902E-06 & 0.13 & 3.585E-06 & 1.01 & 5.586E-06 & 0.14 & 5.655E-08 & 0.91 \\ + 6 & 1.199E-06 & 0.13 & 3.589E-06 & 1.00 & 7.122E-07 & 0.13 & 5.622E-08 & 0.99 \\ + 7 & 1.585E-07 & 0.13 & 3.590E-06 & 1.00 & 9.815E-08 & 0.14 & 5.620E-08 & 1.00 \\ + 8 & 2.089E-08 & 0.13 & 3.590E-06 & 1.00 & 1.320E-08 & 0.13 & 5.619E-08 & 1.00 \\ + 9 & 2.746E-09 & 0.13 & 3.590E-06 & 1.00 & 1.887E-09 & 0.14 & 5.619E-08 & 1.00 \\ + 10 & 3.741E-10 & 0.14 & 3.590E-06 & 1.00 & 2.533E-10 & 0.13 & 5.619E-08 & 1.00 \\ +\hline \hline +\multicolumn{9}{|c|}{ Cubic B-Splines $p=3$} \\ \hline + & \multicolumn{4}{c|}{ $N=128$} + & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} +$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & + $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline + 0 & 6.187E-02 & & 7.164E-04 & & 2.209E-02 & & 7.164E-04 & \\ + 1 & 1.948E-04 & 0.00 & 1.893E-06 & 0.00 & 1.685E-05 & 0.00 & 4.292E-08 & 0.00 \\ + 2 & 4.316E-06 & 0.02 & 3.927E-09 & 0.00 & 1.241E-07 & 0.01 & 7.156E-11 & 0.00 \\ + 3 & 1.554E-07 & 0.04 & 2.374E-09 & 0.60 & 4.184E-09 & 0.03 & 6.198E-13 & 0.01 \\ + 4 & 5.750E-09 & 0.04 & 2.373E-09 & 1.00 & 1.560E-10 & 0.04 & 5.635E-13 & 0.91 \\ + 5 & 2.153E-10 & 0.04 & 2.373E-09 & 1.00 & 5.912E-12 & 0.04 & 5.635E-13 & 1.00 \\ + 6 & 8.122E-12 & 0.04 & 2.373E-09 & 1.00 & 2.258E-13 & 0.04 & 5.635E-13 & 1.00 \\ + 7 & 3.079E-13 & 0.04 & 2.373E-09 & 1.00 & 8.777E-15 & 0.04 & 5.635E-13 & 1.00 \\ + 8 & 1.173E-14 & 0.04 & 2.373E-09 & 1.00 & 1.758E-15 & 0.20 & 5.635E-13 & 1.00 \\ + 9 & 4.489E-16 & 0.04 & 2.373E-09 & 1.00 & 1.709E-15 & 0.97 & 5.635E-13 & 1.00 \\ + 10 & 9.571E-17 & 0.21 & 2.373E-09 & 1.00 & 1.761E-15 & 1.03 & + 5.635E-13 & 1.00 \\ \hline +\end{tabular} +\caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation + for a \emph{Cartesian} problem with $k=10$ and $\sigma=0$, + discretized on a grid with $N=128$ and $1024$ intervals, using + linear and cubic B-splines. For + both grid sizes, a total of 6 grid levels were considered.} +\label{tab:cartesian1} +\end{table} + +\begin{table} +\centering +\begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline + \multicolumn{9}{|c|}{ Linear B-Splines $p=1$} \\ \hline + & \multicolumn{4}{c|}{ $N=128$} + & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} +$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & + $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline + 0 & 1.789E+01 & & 9.354E-02 & & 6.400E+00 & & 9.354E-02 & \\ + 1 & 3.373E+00 & 0.19 & 3.068E-03 & 0.03 & 1.826E+00 & 0.29 & 3.036E-03 & 0.03 \\ + 2 & 4.895E-01 & 0.15 & 8.064E-04 & 0.26 & 3.133E-01 & 0.17 & 1.624E-04 & 0.05 \\ + 3 & 6.160E-02 & 0.13 & 6.704E-04 & 0.83 & 4.581E-02 & 0.15 & 1.411E-05 & 0.09 \\ + 4 & 8.013E-03 & 0.13 & 6.811E-04 & 1.02 & 5.959E-03 & 0.13 & 1.062E-05 & 0.75 \\ + 5 & 9.871E-04 & 0.12 & 6.844E-04 & 1.00 & 8.098E-04 & 0.14 & 1.069E-05 & 1.01 \\ + 6 & 1.283E-04 & 0.13 & 6.847E-04 & 1.00 & 1.048E-04 & 0.13 & 1.070E-05 & 1.00 \\ + 7 & 1.613E-05 & 0.13 & 6.847E-04 & 1.00 & 1.504E-05 & 0.14 & 1.070E-05 & 1.00 \\ + 8 & 2.097E-06 & 0.13 & 6.847E-04 & 1.00 & 2.050E-06 & 0.14 & 1.070E-05 & 1.00 \\ + 9 & 2.639E-07 & 0.13 & 6.847E-04 & 1.00 & 3.008E-07 & 0.15 & 1.070E-05 & 1.00 \\ + 10 & 3.500E-08 & 0.13 & 6.847E-04 & 1.00 & 4.074E-08 & 0.14 & 1.070E-05 & 1.00 \\ +\hline \hline +\multicolumn{9}{|c|}{ Cubic B-Splines $p=3$} \\ \hline + & \multicolumn{4}{c|}{ $N=128$} + & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} +$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & + $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline + 0 & 1.768E+01 & & 9.354E-02 & & 6.399E+00 & & 9.354E-02 & \\ + 1 & 4.243E-02 & 0.00 & 4.727E-05 & 0.00 & 4.975E-03 & 0.00 & 6.588E-06 & 0.00 \\ + 2 & 1.378E-03 & 0.03 & 1.897E-06 & 0.04 & 7.835E-05 & 0.02 & 6.578E-09 & 0.00 \\ + 3 & 4.773E-05 & 0.03 & 1.814E-06 & 0.96 & 2.797E-06 & 0.04 & 4.125E-10 & 0.06 \\ + 4 & 2.174E-06 & 0.05 & 1.814E-06 & 1.00 & 1.041E-07 & 0.04 & 4.092E-10 & 0.99 \\ + 5 & 4.816E-07 & 0.22 & 1.814E-06 & 1.00 & 3.935E-09 & 0.04 & 4.092E-10 & 1.00 \\ + 6 & 1.942E-07 & 0.40 & 1.814E-06 & 1.00 & 1.499E-10 & 0.04 & 4.092E-10 & 1.00 \\ + 7 & 8.887E-08 & 0.46 & 1.814E-06 & 1.00 & 5.757E-12 & 0.04 & 4.092E-10 & 1.00 \\ + 8 & 4.449E-08 & 0.50 & 1.814E-06 & 1.00 & 2.517E-13 & 0.04 & 4.092E-10 & 1.00 \\ + 9 & 2.377E-08 & 0.53 & 1.814E-06 & 1.00 & 1.360E-13 & 0.54 & 4.092E-10 & 1.00 \\ + 10 & 1.328E-08 & 0.56 & 1.814E-06 & 1.00 & 1.384E-13 & 1.02 & 4.092E-10 & 1.00 \\ \hline +\end{tabular} +\caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation + for a one-dimensional \emph{cylindrical} problem with $m=22$ and + $s=10$, discretized on a grid with + $N=128$ and $1024$ intervals, using linear and cubic B-splines. For + both grid sizes, a total of 6 grid levels were considered.} +\label{tab:cylindrical1} +\end{table} + +\begin{table} +\centering +\begin{tabular}{|c|c|c|c|c|}\hline + & \multicolumn{2}{c|}{ Cartesian problem} + & \multicolumn{2}{c|}{ Cylindrical problem} \\ \cline{2-5} + & $N=128$ & $N=1024$ & $N=128$ & $N=1024$\\ \hline +$\nu_1=1, \nu_2=1, \mu=1$ & 0.13 & 0.14 & 0.13 & 0.14 \\ +$\nu_1=1, \nu_2=2, \mu=1$ & 0.08 & 0.08 (\emph{0.10}) & 0.08 & 0.08 (\emph{0.09}) \\ +$\nu_1=2, \nu_2=1, \mu=1$ & 0.08 & 0.08 & 0.08 & 0.08 \\ +$\nu_1=2, \nu_2=2, \mu=1$ & 0.04 & 0.04 (\emph{0.08}) & 0.02 & 0.03 (\emph{0.08})\\ +$\nu_1=1, \nu_2=1, \mu=2$ & 0.12 & 0.11 & 0.12 & 0.11 \\ \hline +\end{tabular} +\caption{The \emph{convergence factor} (averaged over + the last 5 cycles) for different iteration parameters $\nu_1$, $\nu_2$ and + $\mu$, using the linear Splines for both Cartesian ($k=10$, + $\sigma=0$) and cylindrical ($m=22$, $s=10$) problems. The last + entry is usually called + a $W$-cycle while the first four designate a $V(\nu_1,\nu_2)$ + cycle. Gauss Seidel relaxation is used except for the results enclosed + in parenthesis which are obtained with the Jacobi + (weighted with $\omega=2/3$) relaxation.} +\label{tab:improv} +\end{table} + +The next experiment is shown on Table~\ref{tab:fmg}, where two FMG$(\nu_1,\nu_2)$ +schemes are applied to the $m=22$, $s=10$ cylindrical problem with +grid sizes up to $N=2048$. Note that the problem is solved to the level of +discretization for $N\ge 128$ with FMG$(2,1)$ but not with FMG$(1,1)$. Solving the same +problem with the $V(2,1)$ cycle required 3 iterations for all the values of +$N$ shown. Since the cost of one FMG(2,1) is $\sim 2$ the cost +of one $V(2,1)$ (see Appendix \ref{sec:cost}), it appears that FMG is +more efficient for $N\ge 128$. + +Finally, in all the cases shown here, the equality (\ref{eq:coarse_mat}) +is verified numerically, except for the cylindrical case with $m\neq 0$. This is +expected since as noted earlier, the matrix elements $A_{i1}$ and +$A_{1i}$ diverge unless $m=0$ in the cylindrical problem. + +\begin{table} +\centering +\begin{tabular}{|c|c|c|c|c|}\hline +& \multicolumn{2}{c|}{FMG(1,1)} +& \multicolumn{2}{c|}{FMG(2,1)} \\ \cline{2-5} + $N$ & $\|e\|_2$ & $\|e\|_2/\|e\|_d$ & $\|e\|_2$ & $\|e\|_2/\|e\|_d$ \\ \hline + 4 & 1.011E-01 & 0.968 & 1.012E-01 & 0.969 \\ + 8 & 7.781E-02 & 1.031 & 7.679E-02 & 1.018 \\ + 16 & 3.332E-02 & 1.310 & 2.808E-02 & 1.104 \\ + 32 & 1.516E-03 & 1.421 & 1.098E-03 & 1.030 \\ + 64 & 5.168E-05 & 1.443 & 3.652E-05 & 1.019 \\ + 128 & 2.012E-06 & 1.109 & 1.818E-06 & 1.002 \\ + 256 & 1.125E-07 & 1.053 & 1.069E-07 & 1.001 \\ + 512 & 6.819E-09 & 1.037 & 6.576E-09 & 1.000 \\ + 1024 & 4.224E-10 & 1.032 & 4.093E-10 & 1.000 \\ + 2048 & 2.634E-11 & 1.031 & 2.556E-11 & 1.000 \\ +\hline +\end{tabular} +\caption{The discretization errors $\|e\|_2$ obtained from a + FMG$(\nu_1,\nu_2)$ sweep with $\nu_0=1$ for different grid sizes $N$. The columns + $\|e\|_2/\|e\|_d$ display their ratio with the discretization errors + obtained from a \emph{direct} solver. The cylindrical problem with + $m=22$ and $s=10$ using cubic Splines is considered here.} +\label{tab:fmg} +\end{table} + +\section{Periodic Case} +\subsection{Transfer operators} +For periodic problems, we use \emph{periodic} Splines \cite{BSPLINES} which +satisfy the periodic boundary condition +$\Lambda^h_{i+N}(x)=\Lambda^h_i(x-Nh)$. As a result, both the expansion +coefficients and the right hand sides are periodic +with periodicity $N$ ($u^h_{i+N}=u^h_i$, $b^h_{i+N}=b^h_i$) and he rank +of all matrices should be $N$ instead of $N+p$ as in the non-periodic case. + +The \emph{prolongation} matrix $\mathbf{P}^h_{2h}$ as given by +(\ref{eq:prolongation}) are computed numerically and the results +for $N=8$ are given below for linear, quadratic and cubic Splines. + +\begin{itemize} +\item Linear Splines + \begin{equation} + \mathbf{P}^{h}_{2h} = + \left( + \begin{matrix} + 1 & 0 & 0 & 0 \\ + 1/2 & 1/2 & 0 & 0 \\ + 0 & 1 & 0 & 0 \\ + 0 & 1/2 & 1/2 & 0 \\ + 0 & 0 & 1 & 0 \\ + 0 & 0 & 1/2 & 1/2 \\ + 0 & 0 & 0 & 1 \\ + 1/2 & 0 & 0 & 1/2 \\ + \end{matrix}\right) + \end{equation} +\item Quadratic Splines + \begin{equation} + \mathbf{P}^{h}_{2h} = + \left( + \begin{matrix} + 3/4 & 1/4 & 0 & 0 \\ + 1/4 & 3/4 & 0 & 0 \\ + 0 & 3/4 & 1/4 & 0 \\ + 0 & 1/4 & 3/4 & 0 \\ + 0 & 0 & 3/4 & 1/4 \\ + 0 & 0 & 1/4 & 3/4 \\ + 1/4 & 0 & 0 & 3/4 \\ + 3/4 & 0 & 0 & 1/4 \\ + \end{matrix}\right) + \end{equation} +\item Cubic Splines + \begin{equation} + \mathbf{P}^{h}_{2h} = + \left( + \begin{matrix} + 1/2 & 1/2 & 0 & 0 \\ + 1/8 & 3/4 & 1/8 & 0 \\ + 0 & 1/2 & 1/2 & 0 \\ + 0 & 1/8 & 3/4 & 1/8 \\ + 0 & 0 & 1/2 & 1/2 \\ + 1/8 & 0 & 1/8 & 3/4 \\ + 1/2 & 0 & 0 & 1/2 \\ + 3/4 & 1/8 & 0 & 1/8 \\ + \end{matrix}\right) + \end{equation} +\end{itemize} +The restriction matrix is simply $\mathbf{R}^{2h}_h= +(\mathbf{P}^{h}_{2h})^T$. Generalization for any other number of intervals $N$ should be +straightforward. + +\subsection{Numerical Experiments} +In order to test the grid transfer operators obtained above, the same +second-order problem (\ref{eq:cartesian_problem}) but with the +periodic boundary condition $u(x+1)=u(x)$ is considered. It should be +noted that in that case, if $\sigma=0$, the problem is singular since +the solution is not \emph{unique}! But we have observed that this +problem can be avoided for a slightly non zero $\sigma$, + +With $\sigma=0.01$ and $k=10$ and using linear and cubic Splines, we +recover the same multigrid iterative performances shown in Table +\ref{tab:cartesian1} obtained previously for non-periodic Dirichlet +boundary conditions. Table \ref{tab:quad_splines} also shows similar +iterative efficiencies for \emph{quadratic} non-periodic and periodic problems. + +The identity (\ref{eq:coarse_mat}) is numerically verified in all the +cases considered. + +\begin{table} +\centering +\begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline + \multicolumn{9}{|c|}{ Cartesian problem with quadratic splines} \\ \hline + & \multicolumn{4}{c|}{ $N=128$} + & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} +$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & + $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline + 0 & 6.203E-02 & & 7.164E-04 & & 2.209E-02 & & 7.164E-04 & \\ + 1 & 8.114E-04 & 0.01 & 6.375E-06 & 0.01 & 1.003E-04 & 0.00 & 4.509E-07 & 0.00 \\ + 2 & 1.891E-05 & 0.02 & 6.079E-08 & 0.01 & 1.769E-06 & 0.02 & 8.061E-10 & 0.00 \\ + 3 & 1.103E-06 & 0.06 & 5.220E-08 & 0.86 & 7.018E-08 & 0.04 & 9.970E-11 & 0.12 \\ + 4 & 8.148E-08 & 0.07 & 5.220E-08 & 1.00 & 5.620E-09 & 0.08 & 9.958E-11 & 1.00 \\ + 5 & 6.368E-09 & 0.08 & 5.220E-08 & 1.00 & 4.772E-10 & 0.08 & 9.958E-11 & 1.00 \\ + 6 & 4.969E-10 & 0.08 & 5.220E-08 & 1.00 & 4.101E-11 & 0.09 & 9.958E-11 & 1.00 \\ + 7 & 3.874E-11 & 0.08 & 5.220E-08 & 1.00 & 3.548E-12 & 0.09 & 9.958E-11 & 1.00 \\ + 8 & 3.081E-12 & 0.08 & 5.220E-08 & 1.00 & 3.081E-13 & 0.09 & 9.958E-11 & 1.00 \\ + 9 & 2.489E-13 & 0.08 & 5.220E-08 & 1.00 & 2.690E-14 & 0.09 & 9.958E-11 & 1.00 \\ + 10 & 1.986E-14 & 0.08 & 5.220E-08 & 1.00 & 3.212E-15 & 0.12 & 9.958E-11 & 1.00 \\ +\hline \hline +\multicolumn{9}{|c|}{ Periodic problem with quadratic splines} \\ \hline + & \multicolumn{4}{c|}{ $N=128$} + & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9} +$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & + $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline + 0 & 6.203E-02 & & 7.164E-04 & & 2.209E-02 & & 7.164E-04 & \\ + 1 & 1.285E-03 & 0.02 & 1.294E-05 & 0.02 & 3.116E-04 & 0.01 & 5.862E-06 & 0.01 \\ + 2 & 7.878E-05 & 0.06 & 6.569E-07 & 0.05 & 2.893E-05 & 0.09 & 1.626E-07 & 0.03 \\ + 3 & 6.573E-06 & 0.08 & 6.511E-08 & 0.10 & 2.691E-06 & 0.09 & 5.631E-09 & 0.03 \\ + 4 & 5.681E-07 & 0.09 & 5.224E-08 & 0.80 & 2.385E-07 & 0.09 & 2.400E-10 & 0.04 \\ + 5 & 4.890E-08 & 0.09 & 5.219E-08 & 1.00 & 2.097E-08 & 0.09 & 9.997E-11 & 0.42 \\ + 6 & 4.198E-09 & 0.09 & 5.219E-08 & 1.00 & 1.828E-09 & 0.09 & 9.958E-11 & 1.00 \\ + 7 & 3.607E-10 & 0.09 & 5.219E-08 & 1.00 & 1.584E-10 & 0.09 & 9.958E-11 & 1.00 \\ + 8 & 3.103E-11 & 0.09 & 5.219E-08 & 1.00 & 1.370E-11 & 0.09 & 9.958E-11 & 1.00 \\ + 9 & 2.674E-12 & 0.09 & 5.219E-08 & 1.00 & 1.184E-12 & 0.09 & 9.958E-11 & 1.00 \\ + 10 & 2.307E-13 & 0.09 & 5.219E-08 & 1.00 & 1.025E-13 & 0.09 & 9.958E-11 & 1.00 \\\hline +\end{tabular} +\caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation + for \emph{Cartesian} problem ($k=10$, $\sigma=0$) and + \emph{periodic} problem ($k=10$, $\sigma=0.01$), discretized on a grid with + $N=128$ and $1024$ intervals, using quadratic B-splines. For + both grid sizes, a total of 6 grid levels were considered.} +\label{tab:quad_splines} +\end{table} + +\section{Conclusion} +Using the variational principle, we have derived the expressions of +the grid transfer matrices for Finite Elements using Splines of any +order. It is found that: + +\begin{itemize} +\item The grid transfer matrices do not depend of the geometries + characterized by the Jacobian as defined in $dV = x^\alpha dx$. +\item The standard grid transfer operator used for first order finite + difference (FD) discretization for Cartesian geometry is recovered when + linear Spline finite elements (FE) are used. +\item Applying these transfer matrices, we have solved Cartesian, + cylindrical as well as periodic one dimensional + problems, and obtained essentially the same multigrid iterative performances + as found for standard first order FD Cartesian problems. +\item No performance \emph{degradation} is observed when the order of Splines FE + is increased from 1 to 3, or when the cylindrical geometry is considered. +\end{itemize} + +For two dimensional problems, notice that for both Cartesian ($dV=dxdy$) and +standard curvilinear geometries ($dV=r^\alpha drd\theta$), the +Jacobian is \emph{separable}. Using this property, one can show that +the two dimensional grid transfer consists of simply applying +successively one dimensional grid transfer on each of the $x$ and +$y$ (or $r$ and $\theta$) grids. With the solution +$\mathbf{u}^h=[u^h_{ij}]$ and right hand side +$\mathbf{b}^h=[b^h_{ij}]$ defined by +\begin{equation} + u(x,y) = \sum_{ij} u^h_{ij}\Lambda^h_i(x)\Lambda^h_j(y), \qquad + b^h_{ij} = \int dx\Lambda^h_i(x)\int dy\Lambda^h_j(y)f(x,y), +\end{equation} +the two dimension grid transfers can be expressed as (see Appendix \ref{sec:twod}) +\begin{equation} + \begin{split} + \mathbf{u}^h &= {_{x}\mathbf{P}^h_{2h}}\; \mathbf{u}^{2h} + \left(_{y}\mathbf{P}^h_{2h}\right)^T, \\ + \mathbf{b}^{2h} &={_{x}\mathbf{R}^{2h}_{h}}\; \mathbf{b}^{h} + \left(_{y}\mathbf{R}^{2h}_{h}\right)^T. \\ + \end{split} +\end{equation} + +For more general curvilinear coordinates such as found in tokamak +magnetic coordinates defined by $dV=J(s,\theta)dsd\theta$, we will +assume that the grid transfer operators derived above are still +applicable. The validity of this assumption will be the object of the +next task. + + +\appendix +\section{Multigrid Cost Estimation} +\label{sec:cost} +Assuming that the \emph{coarsest} grid is fixed to $2$, the total +number of grid levels $L$ is given by $N/2^{L-1}=2$ or $L=\log_2(N)$, where $N$ +is the number of intervals in the \emph{finest grid}. Since both +\emph{relaxation} and intergrid transfer are proportional to the +number of problem unknowns, the cost of the +$V$-cycle can be estimated as: +\begin{equation} + \begin{split} + \mbox{MG}(N) &= c\left[ (N+p)+(N/2+p) +\ldots + (N/2^{L-2}+p) \right] \\ + &= c \left[ 2N-4 +(L-1)p \right] ,\\ + \end{split} +\end{equation} +where $p$ is the order of Splines used for the discretization. The FMG +can then be deduced, assuming $\nu_0=1$ as +\begin{equation} + \begin{split} + \mbox{FMG}(N) &= \mbox{MG}(N) +\mbox{MG}(N/2) + \ldots + \mbox{MG}(N/2^{L-2})\\ + &= c\left[ 4N-8 +(L-1)(pL/2-4) \right] .\\ + \end{split} +\end{equation} +As expected a single FMG cycle (with $\nu_0=1$) costs about two $V$-cycles. + +\section{Two dimensional Grid Transfer} +\label{sec:twod} +On the fine and the coarse grids, the problem solution $u(x,y)$ can be written +as: +\begin{equation*} + u(x,y) = \sum_{i'j'} u^h_{i'j'}\Lambda^h_{i'}(x)\Lambda^h_{j'}(y) + = \sum_{i'j'} u^{2h}_{i'j'}\Lambda^{2h}_{i'}(x)\Lambda^{2h}_{j'}(y). +\end{equation*} +Projecting these two expansions on the two dimensional basis +functions $\Lambda^h_{i}(x)\Lambda^h_{j}(y)$ yields + + \begin{gather*} + \sum_{i'j'} u^h_{i'j'} + \underbrace{\int dx\Lambda^h_{i}(x)\Lambda^h_{i'}(x)}_{M^{h,h}_{ii'}} + \underbrace{\int dy\Lambda^h_{j}(x)\Lambda^h_{j'}(y)}_{N^{h,h}_{jj'}} + = \sum_{i'j'} u^{2h}_{i'j'} + \underbrace{\int dx\Lambda^h_{i}(x)\Lambda^{2h}_{i'}(x)}_{M^{h,2h}_{ii'}} + \underbrace{\int + dy\Lambda^h_{j}(x)\Lambda^{2h}_{j'}(y)}_{N^{h,2h}_{jj'}} \\ + \Longrightarrow \quad + \mathbf{M}^{h,h}\;\mathbf{u}^h\;\left(\mathbf{N}^{h,h}\right)^T = + \mathbf{M}^{h,2h}\mathbf{u}^{2h}\left(\mathbf{N}^{h,2h}\right)^T \\ + \Longrightarrow \quad + \mathbf{u}^h = \left(\mathbf{M}^{h,h}\right)^{-1} + \mathbf{M}^{h,2h}\;\mathbf{u}^{2h}\;\left[\left(\mathbf{N}^{h,h}\right)^{-1}\mathbf{N}^{h,2h}\right]^T. + \end{gather*} + +The right hand side can be written on the fine and coarse grids as +\begin{equation*} + \begin{split} + b^{h}_{ij} = \int dx\Lambda^{h}_i(x)\int dy\Lambda^{h}_j(y)\;f(x,y) + = \sum_{i'j'} M^{h,h}_{ii'}f^{h}_{i'j'}N^{h,h}_{jj'} \quad\Longrightarrow\quad + \mathbf{b}^h &= \mathbf{M}^{h,h}\;\mathbf{f}^h\left(\mathbf{N}^{h,h}\right)^{T}, \\ + b^{2h}_{ij} = \int dx\Lambda^{2h}_i(x)\int dy\Lambda^{2h}_j(y)\;f(x,y) + = \sum_{i'j'} M^{2h,h}_{ii'}f^{h}_{i'j'}N^{2h,h}_{jj'} \quad\Longrightarrow\quad + \mathbf{b}^{2h} &= \mathbf{M}^{2h,h}\;\mathbf{f}^h\left(\mathbf{N}^{2h,h}\right)^{T}, \\ + \end{split} +\end{equation*} +where the expansion of $f(x,y)$ on the \emph{fine} mesh has been +used. Elimination of $\mathbf{f}^h$ then yields +\begin{equation*} + \mathbf{b}^{2h} = + \mathbf{M}^{2h,h}\left(\mathbf{M}^{h,h}\right)^{-1} \; + \mathbf{b}^h\; + \left[\mathbf{N}^{2h,h}\left(\mathbf{N}^{h,h}\right)^{-1} \right]^T. +\end{equation*} + +\begin{thebibliography}{99} + \bibitem{SOLVERS} {\tt The Solvers in BSPLINES}, + \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/solvers.pdf} + \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide, + \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/bsplines.pdf} + \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A + Multigrid Tutorial, Second Edition, Siam (2000)}. +\end{thebibliography} +\end{document} + + diff --git a/multigrid/docs/multigrid_2d.pdf b/multigrid/docs/multigrid_2d.pdf new file mode 100644 index 0000000..77333fc Binary files /dev/null and b/multigrid/docs/multigrid_2d.pdf differ diff --git a/multigrid/docs/multigrid_2d.tex b/multigrid/docs/multigrid_2d.tex new file mode 100644 index 0000000..a373364 --- /dev/null +++ b/multigrid/docs/multigrid_2d.tex @@ -0,0 +1,425 @@ +% +% @file multigrid_2d.tex +% +% @brief +% +% @copyright +% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +% SPC (Swiss Plasma Center) +% +% spclibs is free software: you can redistribute it and/or modify it under +% the terms of the GNU Lesser General Public License as published by the Free +% Software Foundation, either version 3 of the License, or (at your option) +% any later version. +% +% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +% +% You should have received a copy of the GNU Lesser General Public License +% along with this program. If not, see . +% +% @authors +% (in alphabetical order) +% @author Trach-Minh Tran +% +\documentclass[a4paper]{article} +\usepackage{linuxdoc-sgml} +\usepackage{graphicx} +\usepackage{hyperref} +\usepackage{amsmath} + +\title{Multigrid for Finite Elements using Splines.} +\author{Trach-Minh Tran, Stephan Brunner} +\date{v0.1, January 2013} +\abstract{A multigrid formulation for finite elements is + derived, using variational principles. More specifically the grid + transfer operators will be derived and tested in 2D Cartesian and + cylindrical geometry for arbitrary order B-Splines.} + +\begin{document} +\maketitle +\tableofcontents + +\section{The Model Problems} +\subsection{Cartesian Geometry} +The following second-order boundary value problem is considered + +\begin{equation} + \label{eq:cartesian_problem} + \begin{split} + -\left[\frac{\partial^2}{\partial x^2} + +\frac{\partial^2}{\partial y^2} \right] u(x,y) &= f(x,y) \qquad 0\le x\le 1,\quad 0\le y\le 1 \\ + u(0,y) = u(1,y) &= u(x,0) = u(x,1) = 0. + \end{split} +\end{equation} +By choosing +\begin{equation*} + f(x,y) = \sin (\pi k_xx + \pi k_yy), +\end{equation*} +where $k_x$ and $k_y$ are integers, the solution of the BVP is simply +\begin{equation*} + u(x,y) = \frac{\sin(\pi k_xx+\pi k_yy)}{\pi^2(k_x^2+k_y^2)}. +\end{equation*} +Using a weak formulation on Eq.(\ref{eq:cartesian_problem}) and a grid +of $N_x\times N_y$ intervals, one obtains the following discretized +linear system +\begin{equation} + \sum_{i'=1}^{N_x+p}\sum_{j'=1}^{N_y+p}A_{iji'j'}u_{i'j'} = b_{ij}, + \qquad i=1,\ldots,N_x+p,\quad j=1,\ldots,N_y+p, +\end{equation} +where the unknowns $u_{ij}$ are the Spline (of order $p$) expansion +coefficients of the solution +\begin{equation} + u(x,y) = \sum_{i=1}^{N_x+p}\sum_{j=1}^{N_y+p}u_{ij}\Lambda_i(x)\Lambda_j(y), +\end{equation} +and the matrix $A$ and right hand side $b$ are determined from +\begin{align} + A_{iji'j'} &= \int^1_0\int^1_0 dxdy + \left[\Lambda'_{i'}(x)\Lambda_{j'}(y)\Lambda'_i(x)\Lambda_j(y) + + \Lambda_{i'}(x)\Lambda'_{j'}(y)\Lambda_i(x)\Lambda'_j(y) \right], \\ + b_{ij} &= \int^1_0\int^1_0 dxdy\Lambda_i(x)\Lambda_j(y)f(x,y). +\end{align} +Note that using a Gauss quadrature with $\lceil(2p+1)/2\rceil$ points +per interval to calculate the matrix $A$ would yield an exact integration. + +\subsection{Cylindrical Geometry} +The following second-order boundary value problem is considered: +\begin{equation} + \label{eq:cylindrical_problem} + \begin{split} + -\left[\frac{1}{r}\frac{\partial}{\partial r}r\frac{\partial}{\partial r} + + \frac{1}{r^2} \frac{\partial^2}{\partial\theta^2} + \right]u(r,\theta) &= f(r,\theta) \qquad 0\le r\le 1,\quad + 0 \le \theta < 2\pi \\ + u(1,\theta) &= 0, \\ + \end{split} +\end{equation} +By choosing +\begin{equation*} + f(r,\theta) = j^2_{ms} J_{m}(j_{ms}r)\cos(m\theta), +\end{equation*} +where $m$ is an integer and $j_{ms}$, the $s^{th}$ zero of $J_{m}$, +the solution of this BVP is +\begin{equation*} +u(r,\theta) = J_{m}(j_{ms}r)\cos(m\theta). +\end{equation*} +Using a weak formulation on Eq.(\ref{eq:cylindrical_problem})and a grid +of $N_r\times N_\theta$ intervals, one obtains the following discretized +linear system +\begin{equation} + \sum_{i'=1}^{N_r+p}\sum_{j'=1}^{N_\theta}A_{iji'j'}u_{i'j'} = b_{ij}, + \qquad i=1,\ldots,N_r+p,\quad j=1,\ldots,N_\theta, +\end{equation} +where the unknowns $u_{ij}$ are the Spline (of order $p$) expansion +coefficients of the solution +\begin{equation} + u(r,\theta) = \sum_{i=1}^{N_r+p}\sum_{j=1}^{N_\theta}u_{ij}\Lambda_i(r)\Lambda_j(\theta), +\end{equation} +and the matrix $A$ and right hand side $b$ are determined from +\begin{align} + A_{iji'j'} &= \int^1_0\int^{2\pi}_0 rdrd\theta + \left[\Lambda'_{i'}(r)\Lambda_{j'}(\theta)\Lambda'_i(r)\Lambda_j(\theta) + + \frac{1}{r^2} + \Lambda_{i'}(r)\Lambda'_{j'}(\theta)\Lambda_i(r)\Lambda'_j(\theta) + \right], \\ + b_{ij} &= \int^1_0\int^{2\pi}_0 rdrd\theta\Lambda_i(r)\Lambda_j(\theta)f(r,\theta). +\end{align} +Note that $A$ has an $1/r$ singularity in the +integrand. For $m\neq0$, this should not be problematic since the +converged solution behaves as $\sim r^m$ near $r=0$. The case $m=0$ +will be investigated numerically latter in this report, together withe +the $m\neq 0$ case. + +\section{Restriction Operator} +In the following, let us use the superscripts $h$ and $2h$ to denote +quantities defined respectively on a \emph{fine} ($N_x\times N_y$ +or $N_r\times N_\theta$) and a \emph{coarser} ($N_x/2\times N_y/2$ +or $N_r/2\times N_\theta/2$) grid. + +The two grid transfers required in the standard \emph{multigrid} +\cite{MG1D,Briggs} are: +\begin{enumerate} +\item the \emph{restriction} of the right hand side: $\mathbf{b}^{h} + \longrightarrow \mathbf{b}^{2h}$ and +\item the \emph{prolongation} of the solution: $\mathbf{u}^{2h} + \longrightarrow \mathbf{u}^{h}$. +\end{enumerate} + +Noting that the basis functions $\Lambda^{2h}_i(x)$, which are \emph{piecewise} +$C^{p-1}$ polynomials with \emph{breaks} on the \emph{coarse} grid +points $x^{2h}_k=(2h)k$ can be also considered as \emph{piecewise} +$C^{p-1}$ polynomials with \emph{breaks} on the \emph{fine} grid +$x^h_k=kh$, they can be expressed \emph{uniquely} as a linear +combination of the \emph{fine} grid basis functions: + +\begin{equation} + \label{eq:basis_transf} + \Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}c_{ii'}\Lambda^h_{i'}(x), \quad + i=1,\ldots,N/2+p. +\end{equation} +The (rectangular) matrix $c_{ii'}$ can be identified as the +one-dimensional \emph{restriction} $\mathbf{R}$ since +\begin{equation*} + b^{2h}_i = \int_0^1 dx f(x)\Lambda^{2h}_i(x) = + \sum^{N+p}_{i'=1}c_{ii'}\;b^h_{i'} = + \sum^{N+p}_{i'=1}R_{ii'}\;b^h_{i'}. +\end{equation*} + +It can be computed by simply projecting Eq.(\ref{eq:basis_transf}) +on the fine grid basis function $\Lambda^h_{j}(x)$ \cite{MG1D}: +\begin{equation} + \sum^{N+p}_{i'=1}R_{ii'}\underbrace{\int_0^1 dx + \Lambda^h_{i'}(x)\Lambda^h_{j}(x)}_{M^h_{i'j}} = \underbrace{\int_0^1 dx + \Lambda^{2h}_i(x)\Lambda^h_{j}(x)}_{M^{2h,h}_{i'j}} \Longrightarrow + \mathbf{R}=\mathbf{M}^{2h,h}\cdot(\mathbf{M}^{h})^{-1}. +\end{equation} + +It should be stressed that the representation for $\Lambda^{2h}_i(x)$ +in Eq.(\ref{eq:basis_transf}) is \emph {unique}. This is +checked by verifying that the same matrix $R_{ii'}$ is obtained using +for example the \emph{collocation} methods. One such method, which is used for +this check is detailed in Appendix \ref{sec:colloc}. The calculated +grid transfer matrices for linear, quadratic and cubic periodic and +non-periodic Splines are given in \cite{MG1D}. + +Denoting the restriction on $x$ and $y$ respectively +by $\mathbf{R}^x$ and $\mathbf{R}^y$, the two-dimensional restriction of $b^h_{ij}$ is +defined as +\begin{equation*} + b^{2h}_{ij} = \int_0^1\int_0^1 dxdy f(x,y)\Lambda^{2h}_i(x)\Lambda^{2h}_j(y) = + \sum^{N+p}_{i'=1}\sum^{N+p}_{j'=1}R^x_{ii'}R^y_{jj'}b^h_{i'j'}, +\end{equation*} +and thus +\begin{equation} + \label{eq:restriction} + \boxed{\mathbf{b}^{2h} = \mathbf{R}^x \cdot \mathbf{b}^{h} + \cdot (\mathbf{R}^y)^T.} +\end{equation} + +\section{Prolongation Operator} +Using Eq.(\ref{eq:basis_transf}) (with $c_{ii'}=R_{ii'}$), the +solution at the coarse grid can be expressed as +\begin{equation*} + u^{2h}(x) = \sum_{i=1}^{N/2+p}u^{2h}_{i}\Lambda^{2h}_{i}(x) = + \sum_{i'=1}^{N+p}\left[\sum_{i=1}^{N/2+p} R_{ii'}u^{2h}_{i}\right] + \Lambda^h_{i'}(x) = + \sum_{i'=1}^{N+p}\underbrace{\left[\sum_{i=1}^{N/2+p} (R)^T_{i'i}u^{2h}_{i}\right]}_{\tilde{u}^h_{i'}} + \Lambda^h_{i'}(x), +\end{equation*} +from which one obvious choice for the \emph{prolongation} operator would be +\begin{equation} + \mathbf{P} = \mathbf{R}^T = (\mathbf{M}^{h})^{-1}\cdot\mathbf{M}^{h,2h}. +\end{equation} + +Generalization to a two-dimensional prolongation is obtained as +follows, where summation over repeated indices is assumed: +\begin{equation*} + u^{2h}(x,y) = u^{2h}_{ij}\Lambda^{2h}_{i}(x)\Lambda^{2h}_{j}(y) = + \left[R^x_{ii'}u^{2h}_{ij}R^y_{jj'}\right]\Lambda^{h}_{i'}(x)\Lambda^{h}_{j'}(y) +\end{equation*} +which leads to the prolonged solution $\tilde{\mathbf{u}}^{h}$ given by +\begin{equation} + \label{eq:prolongation} + \boxed{\tilde{\mathbf{u}}^{h} = \mathbf{P}^x \cdot \mathbf{u}^{2h} \cdot + (\mathbf{P}^y)^T .} +\end{equation} + +It should be noted here that, while the restricted right hand side +$\mathbf{b}^{2h}$ as defined in Eq.(\ref{eq:restriction}) is +\emph{exactly identical} to the assembled right hand side, the +prolonged solution $\tilde{\mathbf{u}}^{h}$ defined in +Eq.(\ref{eq:prolongation}) is just a +representation of $u^{2h}(x,y)$ on the fine mesh and \emph{not} the +solution $u^h(x,y)$ which can only be obtained by solving the problem +on the fine mesh! + +\section{Numerical Experiments} +The multigrid performance can be characterized by looking at the +convergence of the residual Euclidean norm for the linear system +$\mathbf{A}\mathbf{u}=\mathbf{b}$: + +\begin{equation} +\label{eq:resid} +\|\mathbf{r}\|_2 = \|\mathbf{b}-\mathbf{A}\mathbf{u}\|_2. +\end{equation} +When the \emph{exact} solution $u(x,y)$ is known, the \emph{discretization error} +can defined as +\begin{equation} +\label{eq:discerr} + \|e\|_2 = \sqrt{\int dV\left[\sum_{ij}u_{ij}\Lambda_{ij}(x,y)-u(x,y)\right]^2} +\end{equation} +and computed using a Gauss quadrature. Note that for Splines of order +$p$, $\|e(x,y)\|_2(h)$ converges to zero as $O(h^{p+1})$. + +\subsection{Cartesian Geometry} +The multigrid performances for varying problem sizes are displayed in +Fig.(\ref{fig:linear_mg2d}) for linear Splines and +Fig.(\ref{fig:cubic_mg2d}) for cubic Splines. They show that the number +of iterations required for convergence (abount 3 for both linear and cubic +Splines) is insensitive to the problem sizes. Compared to direct +methods, the multigrid should scale much better for large problem +sizes, as indicated in Table~\ref{tab:comparison1}. For this model +problem, using cubic Splines seems to converge +slightly faster than linear Splines! + +\begin{figure} + \centering + \includegraphics[angle=0,width=\hsize]{linear_mg2d} + \caption{Performance of the multigrid $V(2,1)$ scheme using a + Gauss-Seidel relaxation and \emph{linear Splines} for different + problem sizes. The size of the \emph{coarsest} grid is $2\times 2$.} + \label{fig:linear_mg2d} +\end{figure} + +\begin{figure} + \centering + \includegraphics[angle=0,width=\hsize]{cubic_mg2d} + \caption{Performance of the multigrid $V(2,1)$ scheme using a + Gauss-Seidel relaxation and \emph{cubic Splines} for different + problem sizes. The size of the \emph{coarsest} grid is $2\times 2$.} + \label{fig:cubic_mg2d} +\end{figure} + +\begin{table}[htb] + \centering + \begin{tabular}{|c|c|c|c|c|c|}\hline + & \multicolumn{2}{c|}{Linear Splines} + & \multicolumn{2}{c|}{Cubic Splines} \\ \hline +$N$ & $V(2,1)$ & Direct & $V(2,1)$ & Direct \\ \hline + 16 & 8.844E-04 & 2.051E-03 & 2.653E-03 & 3.970E-03 \\ + 32 & 1.661E-03 & 5.345E-03 & 4.983E-03 & 1.540E-02 \\ + 64 & 5.766E-03 & 2.054E-02 & 1.730E-02 & 7.492E-02 \\ +128 & 2.347E-02 & 3.288E-01 & 7.042E-02 & 1.060E+00 \\ +\hline + \end{tabular} + \caption{Times (in seconds) used by a the \emph{direct sparse} solver MUMPS-4.10.0 + for different problem sizes versus the times used by \emph{three} + multigrid $V(2,1)$ cycles. The Intel Fortran-13.0 compiler is used on an Intel + i7 platform.} +\label{tab:comparison1} +\end{table} + +The effects of the relaxation parameters $\nu_1,\nu_2$ on the +multigrid performnace (Fig.(\ref{fig:cubic_mg2d_relax})) indicates +that only a few relaxations are sufficient to achieve a good multigrid +performance. Further analysis of the computational cost is required +however to determine the \emph{optimal} $\nu_1,\nu_2$. + +Finally, the effects of the number of grid levels are analyzed in +Fig.(\ref{fig:cubic_mg2d_levels}). In addition to the computational +cost (see Table~\ref{tab:comparison2}), the memory required for the +\emph{direct solver} at the coarsest grid level should be taken into +account for the choice of the optimal number of grid levels, +especially for very large problems. + +\begin{figure} + \centering + \includegraphics[angle=0,width=\hsize]{cubic_mg2d_relax} + \caption{Effect of the number of the relaxation sweeps $\nu_1,\nu_2$ + on the performance of the multigrid $V(\nu_1,\nu_2)$-cycle for + \emph{Cubic Splines}. The finest grid has $128\times 128$ + intervals.} + \label{fig:cubic_mg2d_relax} +\end{figure} + +\begin{figure} + \centering + \includegraphics[angle=0,width=\hsize]{cubic_mg2d_levels} + \caption{Effect of the number grid levels + on the performance of the multigrid $V(2,1)$-cycle for \emph{Cubic + Splines}. The finest grid has $128\times 128$ intervals.} + \label{fig:cubic_mg2d_levels} +\end{figure} + +\begin{table}[htb] + \centering + \begin{tabular}{|c|c|c|c|}\hline +Number of levels & $V(1,0)$ & $V(1,1)$ & $V(2,1)$ \\ \hline + 2 & 3.386E-02 & 3.881E-02 & 4.031E-02 \\ + 3 & 2.923E-02 & 3.398E-02 & 3.605E-02 \\ + 4 & 2.880E-02 & 3.275E-02 & 3.595E-02 \\ + 7 & 2.912E-02 & 3.236E-02 & 3.566E-02 \\ +\hline + \end{tabular} + \caption{Effects of the times in seconds used per $V$-cyclefor + different number of grid levels and relaxation paramters for a + $128\times 128$ problem. The Intel Fortran-13.0 compiler is used on an Intel + i7 platform.} +\label{tab:comparison2} +\end{table} + + + +\subsection{Cylindrical Geometry} + +\newpage +\appendix + +\section{Grid transfer matrix by collocation} +\label{sec:colloc} +Let first consider the \emph{periodic case}. Denoting $N$ as the +number of intervals of the fine grid, the \emph{periodic} Spline basis +functions on the \emph{coarse} grid $\Lambda^{2h}_i$ can be expressed +as linear combinations of the \emph{fine} grid Spline basis functions +as: +\begin{equation} + \Lambda^{2h}_i(x) = \sum^{N}_{i'=1}R_{ii'}\Lambda^h_{i'}(x), \quad + i=1,\ldots,N/2. +\end{equation} +For any given $i$, the coefficients $R_{ii'}$ can be calculated by +expressing the relation above on exactly $N$ points on the $x$-grid. For +\emph{odd} Spline order $p$, these \emph{collocation} (or +interpolating) points can be chosen as the \emph{break} points of the fine +grid $x^h_k,\quad k=0,\ldots,N-1$. For \emph{even} values of $p$, the +collocation points should be $x^h_{k+1/2}=(x^h_{k}+x^h_{k+1})/2$ in +order to obtain a non-singular linear system of equations +\cite{BSPLINES}. The resulting system of equations to solve for +$R_{ii'}$ are given below: +\begin{equation} + \begin{split} + p\mbox{ odd}: \qquad &\sum^{N}_{i'=1}\Lambda^h_{i'}(x^h_k)\,R_{ii'} = + \Lambda^{2h}_i(x^h_k), \qquad k=0,\ldots,N-1,\quad i=1,\ldots,N/2, \\ + p\mbox{ even}: \qquad &\sum^{N}_{i'=1}\Lambda^h_{i'}(x^h_{k+1/2})\,R_{ii'} = + \Lambda^{2h}_i(x^h_{k+1/2}), \qquad k=0,\ldots,N-1,\quad i=1,\ldots,N/2.\\ + \end{split} +\end{equation} + +For \emph{non-periodic} Splines, there are $N+p$ and +$N/2+p$ basis functions respectively on the fine and coarse grid: +\begin{equation} + \Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}R_{ii'}\Lambda^h_{i'}(x), \quad + i=1,\ldots,N/2+p. +\end{equation} +This implies that for any given $\Lambda^{2h}_i$, $N+p$ conditions +are required to determined the $N+p$ terms of row $i$ of the matrix $R_{ii'}$. For odd $p$, +$N+1$ collocation points $x_k,\quad k=0,\ldots,N$ can be used with the +missing $p-1$ equations obtained by expressing all the $(p-1)/2$ derivatives +of $\Lambda^{2h}_i(x)$ at the end points $x_0$ and $x_N$: +\begin{equation} + \frac{d^\alpha}{dx^\alpha}\Lambda^{2h}_i(x) = + \sum^{N+p}_{i'=1}R_{ii'}\frac{d^\alpha}{dx^\alpha} \Lambda^h_{i'}(x) + , \quad \alpha=1,\dots,\frac{p-1}{2} \quad (\mbox{$p$ odd}). +\end{equation} +For \emph{even} $p$, in addition to the $N$ relations obtained with +the collocation points $x_{k+1/2}$ (as in the \emph{periodic} case), +the missing $p$ conditions can be obtained by expressing $\Lambda^{2h}_i$ +and its derivatives up to $p/2-1$ at the end points $x_0$ and $x_N$: +\begin{equation} + \frac{d^\alpha}{dx^\alpha}\Lambda^{2h}_i(x) = + \sum^{N+p}_{i'=1}R_{ii'}\frac{d^\alpha}{dx^\alpha} \Lambda^h_{i'}(x) + , \quad \alpha=0,\dots,\frac{p}{2}-1 \quad (\mbox{$p$ even}). +\end{equation} + +\begin{thebibliography}{99} + \bibitem{MG1D} {\tt Multigrid Formulation for Finite Elements},\\ + \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/multigrid/docs/multigrid.pdf} + \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A + Multigrid Tutorial, Second Edition, Siam (2000)}. + \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide, + \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/bsplines.pdf} + \bibitem{SOLVERS} {\tt The Solvers in BSPLINES}, + \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/solvers.pdf} +\end{thebibliography} + +\end{document} diff --git a/multigrid/docs/quad_splines.eps b/multigrid/docs/quad_splines.eps new file mode 100644 index 0000000..1cc1190 --- /dev/null +++ b/multigrid/docs/quad_splines.eps @@ -0,0 +1,1081 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 2.6.27.56-0.1-default #1 SMP 2010-12-01 16:57:58 +0100 x86_64. +%%Title: /home/ttran/multigrid/quad_splines.eps +%%CreationDate: 09/18/2012 13:09:10 +%%DocumentNeededFonts: Helvetica +%%DocumentProcessColors: Cyan Magenta Yellow Black +%%LanguageLevel: 2 +%%Pages: 1 +%%BoundingBox: 44 189 549 653 +%%EndComments + +%%BeginProlog +% MathWorks dictionary +/MathWorks 160 dict begin +% definition operators +/bdef {bind def} bind def +/ldef {load def} bind def +/xdef {exch def} bdef +/xstore {exch store} bdef +% operator abbreviations +/c /clip ldef +/cc /concat ldef +/cp /closepath ldef +/gr /grestore ldef +/gs /gsave ldef +/mt /moveto ldef +/np /newpath ldef +/cm /currentmatrix ldef +/sm /setmatrix ldef +/rm /rmoveto ldef +/rl /rlineto ldef +/s {show newpath} bdef +/sc {setcmykcolor} bdef +/sr /setrgbcolor ldef +/sg /setgray ldef +/w /setlinewidth ldef +/j /setlinejoin ldef +/cap /setlinecap ldef +/rc {rectclip} bdef +/rf {rectfill} bdef +% page state control +/pgsv () def +/bpage {/pgsv save def} bdef +/epage {pgsv restore} bdef +/bplot /gsave ldef +/eplot {stroke grestore} bdef +% orientation switch +/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def +% coordinate system mappings +/dpi2point 0 def +% font control +/FontSize 0 def +/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0] + makefont setfont} bdef +/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse + exch dup 3 1 roll findfont dup length dict begin + { 1 index /FID ne {def}{pop pop} ifelse } forall + /Encoding exch def currentdict end definefont pop} bdef +/isroman {findfont /CharStrings get /Agrave known} bdef +/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse + exch FMS} bdef +/csm {1 dpi2point div -1 dpi2point div scale neg translate + dup landscapeMode eq {pop -90 rotate} + {rotateMode eq {90 rotate} if} ifelse} bdef +% line types: solid, dotted, dashed, dotdash +/SO { [] 0 setdash } bdef +/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef +/DA { [6 dpi2point mul] 0 setdash } bdef +/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 + dpi2point mul] 0 setdash } bdef +% macros for lines and objects +/L {lineto stroke} bdef +/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef +/AP {{rlineto} repeat} bdef +/PDlw -1 def +/W {/PDlw currentlinewidth def setlinewidth} def +/PP {closepath eofill} bdef +/DP {closepath stroke} bdef +/MR {4 -2 roll moveto dup 0 exch rlineto exch 0 rlineto + neg 0 exch rlineto closepath} bdef +/FR {MR stroke} bdef +/PR {MR fill} bdef +/L1i {{currentfile picstr readhexstring pop} image} bdef +/tMatrix matrix def +/MakeOval {newpath tMatrix currentmatrix pop translate scale +0 0 1 0 360 arc tMatrix setmatrix} bdef +/FO {MakeOval stroke} bdef +/PO {MakeOval fill} bdef +/PD {currentlinewidth 2 div 0 360 arc fill + PDlw -1 eq not {PDlw w /PDlw -1 def} if} def +/FA {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef +/PA {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef +/FAn {newpath tMatrix currentmatrix pop translate scale + 0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef +/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale + 0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef +/vradius 0 def /hradius 0 def /lry 0 def +/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def +/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef + /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly + vradius add translate hradius vradius scale 0 0 1 180 270 arc + tMatrix setmatrix lrx hradius sub uly vradius add translate + hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix + lrx hradius sub lry vradius sub translate hradius vradius scale + 0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub + translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix + closepath} bdef +/FRR {MRR stroke } bdef +/PRR {MRR fill } bdef +/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix + closepath} bdef +/FlrRR {MlrRR stroke } bdef +/PlrRR {MlrRR fill } bdef +/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def + newpath tMatrix currentmatrix pop ulx rad add uly rad add translate + rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad + sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix + closepath} bdef +/FtbRR {MtbRR stroke } bdef +/PtbRR {MtbRR fill } bdef +/stri 6 array def /dtri 6 array def +/smat 6 array def /dmat 6 array def +/tmat1 6 array def /tmat2 6 array def /dif 3 array def +/asub {/ind2 exch def /ind1 exch def dup dup + ind1 get exch ind2 get sub exch } bdef +/tri_to_matrix { + 2 0 asub 3 1 asub 4 0 asub 5 1 asub + dup 0 get exch 1 get 7 -1 roll astore } bdef +/compute_transform { + dmat dtri tri_to_matrix tmat1 invertmatrix + smat stri tri_to_matrix tmat2 concatmatrix } bdef +/ds {stri astore pop} bdef +/dt {dtri astore pop} bdef +/db {2 copy /cols xdef /rows xdef mul dup 3 mul string + currentfile + 3 index 0 eq {/ASCIIHexDecode filter} + {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if } + ifelse exch readstring pop + dup 0 3 index getinterval /rbmap xdef + dup 2 index dup getinterval /gbmap xdef + 1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef +/it {gs np dtri aload pop moveto lineto lineto cp c + cols rows 8 compute_transform + rbmap gbmap bbmap true 3 colorimage gr}bdef +/il {newpath moveto lineto stroke}bdef +currentdict end def +%%EndProlog + +%%BeginSetup +MathWorks begin + +0 cap + +end +%%EndSetup + +%%Page: 1 1 +%%BeginPageSetup +%%PageBoundingBox: 44 189 549 653 +MathWorks begin +bpage +%%EndPageSetup + +%%BeginObject: obj1 +bplot + +/dpi2point 12 def +portraitMode 0528 7836 csm + + 0 0 6068 5560 rc +86 dict begin %Colortable dictionary +/c0 { 0.000000 0.000000 0.000000 sr} bdef +/c1 { 1.000000 1.000000 1.000000 sr} bdef +/c2 { 0.900000 0.000000 0.000000 sr} bdef +/c3 { 0.000000 0.820000 0.000000 sr} bdef +/c4 { 0.000000 0.000000 0.800000 sr} bdef +/c5 { 0.910000 0.820000 0.320000 sr} bdef +/c6 { 1.000000 0.260000 0.820000 sr} bdef +/c7 { 0.000000 0.820000 0.820000 sr} bdef +c0 +1 j +1 sg + 0 0 6069 5561 rf +6 w +0 4531 4703 0 0 -4531 789 4948 4 MP +PP +-4703 0 0 4531 4703 0 0 -4531 789 4948 5 MP stroke +4 w +DO +0 sg + 789 4948 mt 789 417 L + 789 417 mt 789 417 L +2356 4948 mt 2356 417 L +2356 417 mt 2356 417 L +3924 4948 mt 3924 417 L +3924 417 mt 3924 417 L +5492 4948 mt 5492 417 L +5492 417 mt 5492 417 L + 789 4948 mt 5492 4948 L +5492 4948 mt 5492 4948 L + 789 4381 mt 5492 4381 L +5492 4381 mt 5492 4381 L + 789 3815 mt 5492 3815 L +5492 3815 mt 5492 3815 L + 789 3248 mt 5492 3248 L +5492 3248 mt 5492 3248 L + 789 2682 mt 5492 2682 L +5492 2682 mt 5492 2682 L + 789 2116 mt 5492 2116 L +5492 2116 mt 5492 2116 L + 789 1549 mt 5492 1549 L +5492 1549 mt 5492 1549 L + 789 983 mt 5492 983 L +5492 983 mt 5492 983 L + 789 417 mt 5492 417 L +5492 417 mt 5492 417 L +SO +6 w + 789 4948 mt 5492 4948 L + 789 417 mt 5492 417 L + 789 4948 mt 789 417 L +5492 4948 mt 5492 417 L + 789 4948 mt 5492 4948 L + 789 4948 mt 789 417 L + 789 4948 mt 789 4924 L + 789 417 mt 789 440 L +DO + 789 4948 mt 789 417 L + 789 417 mt 789 417 L +SO + 789 4948 mt 789 4900 L + 789 417 mt 789 464 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + + 683 5160 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + + 843 5071 mt +(1) s +1260 4948 mt 1260 4924 L +1260 417 mt 1260 440 L +DO +1260 4948 mt 1260 417 L +1260 417 mt 1260 417 L +SO +1536 4948 mt 1536 4924 L +1536 417 mt 1536 440 L +DO +1536 4948 mt 1536 417 L +1536 417 mt 1536 417 L +SO +1732 4948 mt 1732 4924 L +1732 417 mt 1732 440 L +DO +1732 4948 mt 1732 417 L +1732 417 mt 1732 417 L +SO +1884 4948 mt 1884 4924 L +1884 417 mt 1884 440 L +DO +1884 4948 mt 1884 417 L +1884 417 mt 1884 417 L +SO +2008 4948 mt 2008 4924 L +2008 417 mt 2008 440 L +DO +2008 4948 mt 2008 417 L +2008 417 mt 2008 417 L +SO +2113 4948 mt 2113 4924 L +2113 417 mt 2113 440 L +DO +2113 4948 mt 2113 417 L +2113 417 mt 2113 417 L +SO +2204 4948 mt 2204 4924 L +2204 417 mt 2204 440 L +DO +2204 4948 mt 2204 417 L +2204 417 mt 2204 417 L +SO +2284 4948 mt 2284 4924 L +2284 417 mt 2284 440 L +DO +2284 4948 mt 2284 417 L +2284 417 mt 2284 417 L +SO +2356 4948 mt 2356 4924 L +2356 417 mt 2356 440 L +DO +2356 4948 mt 2356 417 L +2356 417 mt 2356 417 L +SO +2356 4948 mt 2356 4900 L +2356 417 mt 2356 464 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + +2250 5160 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + +2410 5071 mt +(2) s +2828 4948 mt 2828 4924 L +2828 417 mt 2828 440 L +DO +2828 4948 mt 2828 417 L +2828 417 mt 2828 417 L +SO +3104 4948 mt 3104 4924 L +3104 417 mt 3104 440 L +DO +3104 4948 mt 3104 417 L +3104 417 mt 3104 417 L +SO +3300 4948 mt 3300 4924 L +3300 417 mt 3300 440 L +DO +3300 4948 mt 3300 417 L +3300 417 mt 3300 417 L +SO +3452 4948 mt 3452 4924 L +3452 417 mt 3452 440 L +DO +3452 4948 mt 3452 417 L +3452 417 mt 3452 417 L +SO +3576 4948 mt 3576 4924 L +3576 417 mt 3576 440 L +DO +3576 4948 mt 3576 417 L +3576 417 mt 3576 417 L +SO +3681 4948 mt 3681 4924 L +3681 417 mt 3681 440 L +DO +3681 4948 mt 3681 417 L +3681 417 mt 3681 417 L +SO +3772 4948 mt 3772 4924 L +3772 417 mt 3772 440 L +DO +3772 4948 mt 3772 417 L +3772 417 mt 3772 417 L +SO +3852 4948 mt 3852 4924 L +3852 417 mt 3852 440 L +DO +3852 4948 mt 3852 417 L +3852 417 mt 3852 417 L +SO +3924 4948 mt 3924 4924 L +3924 417 mt 3924 440 L +DO +3924 4948 mt 3924 417 L +3924 417 mt 3924 417 L +SO +3924 4948 mt 3924 4900 L +3924 417 mt 3924 464 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + +3818 5160 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + +3978 5071 mt +(3) s +4396 4948 mt 4396 4924 L +4396 417 mt 4396 440 L +DO +4396 4948 mt 4396 417 L +4396 417 mt 4396 417 L +SO +4672 4948 mt 4672 4924 L +4672 417 mt 4672 440 L +DO +4672 4948 mt 4672 417 L +4672 417 mt 4672 417 L +SO +4868 4948 mt 4868 4924 L +4868 417 mt 4868 440 L +DO +4868 4948 mt 4868 417 L +4868 417 mt 4868 417 L +SO +5020 4948 mt 5020 4924 L +5020 417 mt 5020 440 L +DO +5020 4948 mt 5020 417 L +5020 417 mt 5020 417 L +SO +5144 4948 mt 5144 4924 L +5144 417 mt 5144 440 L +DO +5144 4948 mt 5144 417 L +5144 417 mt 5144 417 L +SO +5249 4948 mt 5249 4924 L +5249 417 mt 5249 440 L +DO +5249 4948 mt 5249 417 L +5249 417 mt 5249 417 L +SO +5340 4948 mt 5340 4924 L +5340 417 mt 5340 440 L +DO +5340 4948 mt 5340 417 L +5340 417 mt 5340 417 L +SO +5420 4948 mt 5420 4924 L +5420 417 mt 5420 440 L +DO +5420 4948 mt 5420 417 L +5420 417 mt 5420 417 L +SO +5492 4948 mt 5492 4924 L +5492 417 mt 5492 440 L +DO +5492 4948 mt 5492 417 L +5492 417 mt 5492 417 L +SO +5492 4948 mt 5492 4900 L +5492 417 mt 5492 464 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + +5386 5160 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + +5546 5071 mt +(4) s + 789 4948 mt 812 4948 L +5492 4948 mt 5468 4948 L +DO + 789 4948 mt 5492 4948 L +5492 4948 mt 5492 4948 L +SO + 789 4948 mt 836 4948 L +5492 4948 mt 5444 4948 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + + 432 5001 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + + 592 4912 mt +(-11) s + 789 4777 mt 812 4777 L +5492 4777 mt 5468 4777 L +DO + 789 4777 mt 5492 4777 L +5492 4777 mt 5492 4777 L +SO + 789 4677 mt 812 4677 L +5492 4677 mt 5468 4677 L +DO + 789 4677 mt 5492 4677 L +5492 4677 mt 5492 4677 L +SO + 789 4607 mt 812 4607 L +5492 4607 mt 5468 4607 L +DO + 789 4607 mt 5492 4607 L +5492 4607 mt 5492 4607 L +SO + 789 4552 mt 812 4552 L +5492 4552 mt 5468 4552 L +DO + 789 4552 mt 5492 4552 L +5492 4552 mt 5492 4552 L +SO + 789 4507 mt 812 4507 L +5492 4507 mt 5468 4507 L +DO + 789 4507 mt 5492 4507 L +5492 4507 mt 5492 4507 L +SO + 789 4469 mt 812 4469 L +5492 4469 mt 5468 4469 L +DO + 789 4469 mt 5492 4469 L +5492 4469 mt 5492 4469 L +SO + 789 4436 mt 812 4436 L +5492 4436 mt 5468 4436 L +DO + 789 4436 mt 5492 4436 L +5492 4436 mt 5492 4436 L +SO + 789 4407 mt 812 4407 L +5492 4407 mt 5468 4407 L +DO + 789 4407 mt 5492 4407 L +5492 4407 mt 5492 4407 L +SO + 789 4381 mt 812 4381 L +5492 4381 mt 5468 4381 L +DO + 789 4381 mt 5492 4381 L +5492 4381 mt 5492 4381 L +SO + 789 4381 mt 836 4381 L +5492 4381 mt 5444 4381 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + + 432 4434 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + + 592 4345 mt +(-10) s + 789 4211 mt 812 4211 L +5492 4211 mt 5468 4211 L +DO + 789 4211 mt 5492 4211 L +5492 4211 mt 5492 4211 L +SO + 789 4111 mt 812 4111 L +5492 4111 mt 5468 4111 L +DO + 789 4111 mt 5492 4111 L +5492 4111 mt 5492 4111 L +SO + 789 4040 mt 812 4040 L +5492 4040 mt 5468 4040 L +DO + 789 4040 mt 5492 4040 L +5492 4040 mt 5492 4040 L +SO + 789 3985 mt 812 3985 L +5492 3985 mt 5468 3985 L +DO + 789 3985 mt 5492 3985 L +5492 3985 mt 5492 3985 L +SO + 789 3940 mt 812 3940 L +5492 3940 mt 5468 3940 L +DO + 789 3940 mt 5492 3940 L +5492 3940 mt 5492 3940 L +SO + 789 3902 mt 812 3902 L +5492 3902 mt 5468 3902 L +DO + 789 3902 mt 5492 3902 L +5492 3902 mt 5492 3902 L +SO + 789 3870 mt 812 3870 L +5492 3870 mt 5468 3870 L +DO + 789 3870 mt 5492 3870 L +5492 3870 mt 5492 3870 L +SO + 789 3841 mt 812 3841 L +5492 3841 mt 5468 3841 L +DO + 789 3841 mt 5492 3841 L +5492 3841 mt 5492 3841 L +SO + 789 3815 mt 812 3815 L +5492 3815 mt 5468 3815 L +DO + 789 3815 mt 5492 3815 L +5492 3815 mt 5492 3815 L +SO + 789 3815 mt 836 3815 L +5492 3815 mt 5444 3815 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + + 432 3868 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + + 592 3779 mt +(-9) s + 789 3644 mt 812 3644 L +5492 3644 mt 5468 3644 L +DO + 789 3644 mt 5492 3644 L +5492 3644 mt 5492 3644 L +SO + 789 3545 mt 812 3545 L +5492 3545 mt 5468 3545 L +DO + 789 3545 mt 5492 3545 L +5492 3545 mt 5492 3545 L +SO + 789 3474 mt 812 3474 L +5492 3474 mt 5468 3474 L +DO + 789 3474 mt 5492 3474 L +5492 3474 mt 5492 3474 L +SO + 789 3419 mt 812 3419 L +5492 3419 mt 5468 3419 L +DO + 789 3419 mt 5492 3419 L +5492 3419 mt 5492 3419 L +SO + 789 3374 mt 812 3374 L +5492 3374 mt 5468 3374 L +DO + 789 3374 mt 5492 3374 L +5492 3374 mt 5492 3374 L +SO + 789 3336 mt 812 3336 L +5492 3336 mt 5468 3336 L +DO + 789 3336 mt 5492 3336 L +5492 3336 mt 5492 3336 L +SO + 789 3303 mt 812 3303 L +5492 3303 mt 5468 3303 L +DO + 789 3303 mt 5492 3303 L +5492 3303 mt 5492 3303 L +SO + 789 3274 mt 812 3274 L +5492 3274 mt 5468 3274 L +DO + 789 3274 mt 5492 3274 L +5492 3274 mt 5492 3274 L +SO + 789 3248 mt 812 3248 L +5492 3248 mt 5468 3248 L +DO + 789 3248 mt 5492 3248 L +5492 3248 mt 5492 3248 L +SO + 789 3248 mt 836 3248 L +5492 3248 mt 5444 3248 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + + 432 3301 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + + 592 3212 mt +(-8) s + 789 3078 mt 812 3078 L +5492 3078 mt 5468 3078 L +DO + 789 3078 mt 5492 3078 L +5492 3078 mt 5492 3078 L +SO + 789 2978 mt 812 2978 L +5492 2978 mt 5468 2978 L +DO + 789 2978 mt 5492 2978 L +5492 2978 mt 5492 2978 L +SO + 789 2907 mt 812 2907 L +5492 2907 mt 5468 2907 L +DO + 789 2907 mt 5492 2907 L +5492 2907 mt 5492 2907 L +SO + 789 2852 mt 812 2852 L +5492 2852 mt 5468 2852 L +DO + 789 2852 mt 5492 2852 L +5492 2852 mt 5492 2852 L +SO + 789 2808 mt 812 2808 L +5492 2808 mt 5468 2808 L +DO + 789 2808 mt 5492 2808 L +5492 2808 mt 5492 2808 L +SO + 789 2770 mt 812 2770 L +5492 2770 mt 5468 2770 L +DO + 789 2770 mt 5492 2770 L +5492 2770 mt 5492 2770 L +SO + 789 2737 mt 812 2737 L +5492 2737 mt 5468 2737 L +DO + 789 2737 mt 5492 2737 L +5492 2737 mt 5492 2737 L +SO + 789 2708 mt 812 2708 L +5492 2708 mt 5468 2708 L +DO + 789 2708 mt 5492 2708 L +5492 2708 mt 5492 2708 L +SO + 789 2682 mt 812 2682 L +5492 2682 mt 5468 2682 L +DO + 789 2682 mt 5492 2682 L +5492 2682 mt 5492 2682 L +SO + 789 2682 mt 836 2682 L +5492 2682 mt 5444 2682 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + + 432 2735 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + + 592 2646 mt +(-7) s + 789 2512 mt 812 2512 L +5492 2512 mt 5468 2512 L +DO + 789 2512 mt 5492 2512 L +5492 2512 mt 5492 2512 L +SO + 789 2412 mt 812 2412 L +5492 2412 mt 5468 2412 L +DO + 789 2412 mt 5492 2412 L +5492 2412 mt 5492 2412 L +SO + 789 2341 mt 812 2341 L +5492 2341 mt 5468 2341 L +DO + 789 2341 mt 5492 2341 L +5492 2341 mt 5492 2341 L +SO + 789 2286 mt 812 2286 L +5492 2286 mt 5468 2286 L +DO + 789 2286 mt 5492 2286 L +5492 2286 mt 5492 2286 L +SO + 789 2241 mt 812 2241 L +5492 2241 mt 5468 2241 L +DO + 789 2241 mt 5492 2241 L +5492 2241 mt 5492 2241 L +SO + 789 2203 mt 812 2203 L +5492 2203 mt 5468 2203 L +DO + 789 2203 mt 5492 2203 L +5492 2203 mt 5492 2203 L +SO + 789 2171 mt 812 2171 L +5492 2171 mt 5468 2171 L +DO + 789 2171 mt 5492 2171 L +5492 2171 mt 5492 2171 L +SO + 789 2142 mt 812 2142 L +5492 2142 mt 5468 2142 L +DO + 789 2142 mt 5492 2142 L +5492 2142 mt 5492 2142 L +SO + 789 2116 mt 812 2116 L +5492 2116 mt 5468 2116 L +DO + 789 2116 mt 5492 2116 L +5492 2116 mt 5492 2116 L +SO + 789 2116 mt 836 2116 L +5492 2116 mt 5444 2116 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + + 432 2169 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + + 592 2080 mt +(-6) s + 789 1945 mt 812 1945 L +5492 1945 mt 5468 1945 L +DO + 789 1945 mt 5492 1945 L +5492 1945 mt 5492 1945 L +SO + 789 1845 mt 812 1845 L +5492 1845 mt 5468 1845 L +DO + 789 1845 mt 5492 1845 L +5492 1845 mt 5492 1845 L +SO + 789 1775 mt 812 1775 L +5492 1775 mt 5468 1775 L +DO + 789 1775 mt 5492 1775 L +5492 1775 mt 5492 1775 L +SO + 789 1720 mt 812 1720 L +5492 1720 mt 5468 1720 L +DO + 789 1720 mt 5492 1720 L +5492 1720 mt 5492 1720 L +SO + 789 1675 mt 812 1675 L +5492 1675 mt 5468 1675 L +DO + 789 1675 mt 5492 1675 L +5492 1675 mt 5492 1675 L +SO + 789 1637 mt 812 1637 L +5492 1637 mt 5468 1637 L +DO + 789 1637 mt 5492 1637 L +5492 1637 mt 5492 1637 L +SO + 789 1604 mt 812 1604 L +5492 1604 mt 5468 1604 L +DO + 789 1604 mt 5492 1604 L +5492 1604 mt 5492 1604 L +SO + 789 1575 mt 812 1575 L +5492 1575 mt 5468 1575 L +DO + 789 1575 mt 5492 1575 L +5492 1575 mt 5492 1575 L +SO + 789 1549 mt 812 1549 L +5492 1549 mt 5468 1549 L +DO + 789 1549 mt 5492 1549 L +5492 1549 mt 5492 1549 L +SO + 789 1549 mt 836 1549 L +5492 1549 mt 5444 1549 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + + 432 1602 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + + 592 1513 mt +(-5) s + 789 1379 mt 812 1379 L +5492 1379 mt 5468 1379 L +DO + 789 1379 mt 5492 1379 L +5492 1379 mt 5492 1379 L +SO + 789 1279 mt 812 1279 L +5492 1279 mt 5468 1279 L +DO + 789 1279 mt 5492 1279 L +5492 1279 mt 5492 1279 L +SO + 789 1208 mt 812 1208 L +5492 1208 mt 5468 1208 L +DO + 789 1208 mt 5492 1208 L +5492 1208 mt 5492 1208 L +SO + 789 1153 mt 812 1153 L +5492 1153 mt 5468 1153 L +DO + 789 1153 mt 5492 1153 L +5492 1153 mt 5492 1153 L +SO + 789 1109 mt 812 1109 L +5492 1109 mt 5468 1109 L +DO + 789 1109 mt 5492 1109 L +5492 1109 mt 5492 1109 L +SO + 789 1071 mt 812 1071 L +5492 1071 mt 5468 1071 L +DO + 789 1071 mt 5492 1071 L +5492 1071 mt 5492 1071 L +SO + 789 1038 mt 812 1038 L +5492 1038 mt 5468 1038 L +DO + 789 1038 mt 5492 1038 L +5492 1038 mt 5492 1038 L +SO + 789 1009 mt 812 1009 L +5492 1009 mt 5468 1009 L +DO + 789 1009 mt 5492 1009 L +5492 1009 mt 5492 1009 L +SO + 789 983 mt 812 983 L +5492 983 mt 5468 983 L +DO + 789 983 mt 5492 983 L +5492 983 mt 5492 983 L +SO + 789 983 mt 836 983 L +5492 983 mt 5444 983 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + + 432 1036 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + + 592 947 mt +(-4) s + 789 812 mt 812 812 L +5492 812 mt 5468 812 L +DO + 789 812 mt 5492 812 L +5492 812 mt 5492 812 L +SO + 789 713 mt 812 713 L +5492 713 mt 5468 713 L +DO + 789 713 mt 5492 713 L +5492 713 mt 5492 713 L +SO + 789 642 mt 812 642 L +5492 642 mt 5468 642 L +DO + 789 642 mt 5492 642 L +5492 642 mt 5492 642 L +SO + 789 587 mt 812 587 L +5492 587 mt 5468 587 L +DO + 789 587 mt 5492 587 L +5492 587 mt 5492 587 L +SO + 789 542 mt 812 542 L +5492 542 mt 5468 542 L +DO + 789 542 mt 5492 542 L +5492 542 mt 5492 542 L +SO + 789 504 mt 812 504 L +5492 504 mt 5468 504 L +DO + 789 504 mt 5492 504 L +5492 504 mt 5492 504 L +SO + 789 471 mt 812 471 L +5492 471 mt 5468 471 L +DO + 789 471 mt 5492 471 L +5492 471 mt 5492 471 L +SO + 789 442 mt 812 442 L +5492 442 mt 5468 442 L +DO + 789 442 mt 5492 442 L +5492 442 mt 5492 442 L +SO + 789 417 mt 812 417 L +5492 417 mt 5468 417 L +DO + 789 417 mt 5492 417 L +5492 417 mt 5492 417 L +SO + 789 417 mt 836 417 L +5492 417 mt 5444 417 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 144 FMSR + + 432 470 mt +(10) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 96 FMSR + + 592 381 mt +(-3) s + 789 4948 mt 5492 4948 L + 789 417 mt 5492 417 L + 789 4948 mt 789 417 L +5492 4948 mt 5492 417 L +gs 789 417 4704 4532 rc +gr + +/c8 { 0.000000 0.000000 1.000000 sr} bdef +c8 + 36 36 1108 1070 FO + 36 36 1580 1744 FO + 36 36 2052 2314 FO + 36 36 2524 2842 FO + 36 36 2996 3358 FO + 36 36 3468 3870 FO + 36 36 3940 4382 FO +gs 789 417 4704 4532 rc +/c9 { 0.000000 0.500000 0.000000 sr} bdef +c9 +472 544 472 544 472 544 472 544 472 544 472 544 1108 1165 7 MP stroke +gr + +c9 +0 sg +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +2322 5338 mt +(Number of Intervals N) s + 340 3397 mt -90 rotate +(Discretization Error) s +90 rotate +gs 789 417 4704 4532 rc +/c10 { 0.847059 0.160784 0.000000 sr} bdef +c10 +472 230 472 230 472 229 472 230 472 230 472 230 1108 677 7 MP stroke +gr + +c10 + 36 36 1108 647 FO + 36 36 1580 894 FO + 36 36 2052 1157 FO + 36 36 2524 1403 FO + 36 36 2996 1625 FO + 36 36 3468 1827 FO + 36 36 3940 2014 FO +gs 789 417 4704 4532 rc +gr + +0 sg +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 216 FMSR + +2289 287 mt +(Quadratic Splines) s +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 120 FMSR + + 772 4991 mt +( ) s +5476 459 mt +( ) s +2112 3946 mt 3053 3946 L +3053 3946 mt 3053 3619 L +2112 3946 mt 2112 3619 L +2112 3619 mt 3053 3619 L +%%IncludeResource: font Helvetica +/Helvetica /ISOLatin1Encoding 168 FMSR + +2173 3801 mt +(Cartesian) s +3651 1788 mt 4506 1788 L +4506 1788 mt 4506 1461 L +3651 1788 mt 3651 1461 L +3651 1461 mt 4506 1461 L +3712 1643 mt +(Periodic) s + +end %%Color Dict + +eplot +%%EndObject + +epage +end + +showpage + +%%Trailer +%%EOF diff --git a/multigrid/src/CMakeLists.txt b/multigrid/src/CMakeLists.txt new file mode 100644 index 0000000..cacde7a --- /dev/null +++ b/multigrid/src/CMakeLists.txt @@ -0,0 +1,92 @@ +/** + * @file CMakeLists.txt + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + * @author Trach-Minh Tran + */ +project(multigrid_tests) + +if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel") +# set(CMAKE_Fortran_FLAGS_RELEASE +# "${CMAKE_Fortran_FLAGS_RELEASE} -profile-functions -profile-loops=outer" +# ) + set(CMAKE_Fortran_FLAGS_DEBUG + "${CMAKE_Fortran_FLAGS_DEBUG} -fpe0" + ) +endif() + +set(MG_TESTS + transfer1d + test_relax + test_mg + test_mgp + test_csr + two_grid + test_mg2d + test_relax2d + test_transf2d + transfer1d_col + test_relax2d_cyl + test_transf2d_cyl + test_mg2d_cyl +) + +foreach(test ${MG_TESTS}) + add_executable(${test} ${test}.f90) + target_link_libraries(${test} bsplines) +endforeach() + +add_executable(poisson_fd poisson_fd.f90 fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) +target_link_libraries(poisson_fd bsplines) +set(TESTS ${TESTS} poisson_mg) + +add_executable(partition partition.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) +target_link_libraries(partition bsplines) + +# Fail to compile with crayftn 4.0.46 on ROSA/DAINT +add_executable(test_stencil test_stencil.f90 stencil_mod.f90 gvector_mod.f90) +target_link_libraries(test_stencil bsplines) + +add_executable(test_stencilg test_stencilg.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) +target_link_libraries(test_stencilg bsplines) + +# Fail to compile with crayftn 4.0.46 on ROSA/DAINT +add_executable(test_jacobi test_jacobi.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) +target_link_libraries(test_jacobi bsplines) + +add_executable(test_jacobig test_jacobig.f90 fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) +target_link_libraries(test_jacobig bsplines) + +add_executable(ppoisson_fd ppoisson_fd.f90 fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90) +target_link_libraries(ppoisson_fd bsplines) + +add_executable(test_gvec1d test_gvec1d.f90) +target_link_libraries(test_gvec1d bsplines) + +add_executable(test_intergrid0 test_intergrid0.f90) +target_link_libraries(test_intergrid0 bsplines) + +add_executable(test_intergrid1 test_intergrid1.f90 parmg_mod.f90 gvector_mod.f90 stencil_mod.f90) +target_link_libraries(test_intergrid1 bsplines) + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) diff --git a/multigrid/src/Makefile b/multigrid/src/Makefile new file mode 100644 index 0000000..5748f81 --- /dev/null +++ b/multigrid/src/Makefile @@ -0,0 +1,136 @@ +# +# @file Makefile +# +# @brief +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Trach-Minh Tran +# +MPIF90 = mpif90 +LD = $(MPIF90) + +# F90FLAGS = -I$(HOME)/include/O -I$(PPUTILS2) +# LDFLAGS = -L$(HOME)/lib/O -L${HDF5}/lib +F90FLAGS = -I$(FUTILS)/include -I$(BSPLINES)/include +LDFLAGS = -mkl=cluster -L$(FUTILS)/lib -L$(BSPLINES)/lib -L${HDF5}/lib + +MODS = gvector_mod.o stencil_mod.o +LIBS = $(MODS) -lbsplines -lpppack -lpputils2 -lfutils \ + -lhdf5_fortran -lhdf5 -lz + +ifdef MKL +SPBLAS = -DMKL +endif + +ifdef MUMPS +F90FLAGS += -I$(MUMPS)/include +LDFLAGS += -L$(MUMPS)/lib +LIBS += $(MUMPSLIBS) +endif + +all: transfer1d test_relax test_mg test_mgp test_csr two_grid \ + test_mg2d test_relax2d test_transf2d transfer1d_col \ + test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl poisson_fd + +.SUFFIXES: +.SUFFIXES: .o .f90 + +.f90.o: + $(MPIF90) $(F90FLAGS) -c $< + +partition: partition.o + $(LD) $(LDFLAGS) -o $@ $< parmg_mod.o $(MODS) -lpputils2 -lfutils \ + -lhdf5_fortran -lhdf5 -lz + +transfer1d: transfer1d.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +test_relax: test_relax.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +test_mg: test_mg.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +test_mgp: test_mgp.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +test_csr: test_csr.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +two_grid: two_grid.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +test_mg2d: test_mg2d.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +test_relax2d: test_relax2d.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +test_transf2d: test_transf2d.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +transfer1d_col: transfer1d_col.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +test_relax2d_cyl: test_relax2d_cyl.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +test_transf2d_cyl: test_transf2d_cyl.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +test_mg2d_cyl: test_mg2d_cyl.o + $(LD) $(LDFLAGS) -o $@ $< $(LIBS) + +poisson_fd: poisson_fd.o + $(LD) $(LDFLAGS) -o $@ $< fdmat_mod.o $(LIBS) + +csr_mod.o: csr_mod.f90 + $(MPIF90) $(FPP) $(SPBLAS) $(F90FLAGS) -c csr_mod.f90 + +parmg_mod.o: gvector_mod.o stencil_mod.o +partition.o: parmg_mod.o +transfer1d.o: $(MODS) +test_relax.o: $(MODS) +test_mg.o: $(MODS) +test_mgp.o: $(MODS) +test_csr.o: $(MODS) +two_grid.o: $(MODS) +test_mg2d.o: $(MODS) +test_relax2d.o: $(MODS) +test_transf2d.o: $(MODS) +transfer1d_col.o: $(MODS) +test_relax2d_cyl.o: $(MODS) +test_transf2d_cyl.o: $(MODS) +test_mg2d_cyl.o: $(MODS) +poisson_fd.o: fdmat_mod.o +fdmat_mod.o: stencil_mod.o parmg_mod.o +stencil_mod.o: gvector_mod.o +parmg_mod.o: gvector_mod.o stencil_mod.o + +clean: + rm -f *.o *.mod *~ ../wk/*~ a.out lib + +distclean: clean + rm -f ../wk/*.h5 ../wk/fort.* *.eps \ + transfer1d test_relax test_mg test_mgp test_csr two_grid \ + test_mg2d test_relax2d test_transf2d transfer1d_col \ + test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl + +#include $(HOST).mk diff --git a/multigrid/src/README_mod.txt b/multigrid/src/README_mod.txt new file mode 100644 index 0000000..224e499 --- /dev/null +++ b/multigrid/src/README_mod.txt @@ -0,0 +1,91 @@ +/** + * @file README_mod.txt + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Trach-Minh Tran + */ +1) Module parmg + ============ + - types: + grid2_type + INTEGER, DIMENSION(2) :: s, e, s0, e0, npt_loc, npt + REAL(rkind), ALLOCATABLE :: x(:), y(:) + TYPE(gvector_2d) :: f, v + TYPE(stencil_2d) :: fdmat, restrict_mat + + - module procedures: + creat_grid + coarse (1d, 2d) + exchange (gvector) + prolong (gvector) + restrict (gvector) + jacobi + get_resids + init_restrict (gvector) + disp (0d, 1d array of int) + get_lmax + + - Uses + gvector + stencil + +2) Module stencil + ============== + - types: + LOGICAL :: nluni + INTEGER, DIMENSION(2) :: ldim, gdim, s0, e0, s, e + INTEGER :: npoints + INTEGER, ALLOCATABLE :: id(:,:) + REAL(rkind), ALLOCATABLE :: val(:,:,:) + + - module procedures: + init + vmx + laplacian + putmat + + - operators: + *: vmx + + - Uses + gvector + +3) Module gvector + ============== + - types: + gvector_2d + INTEGER, DIMENSION(2) :: s, e ! vector internal bounds + INTEGER, DIMENSION(2) :: g ! ghost cell widths + REAL(rkind), ALLOCATABLE :: val(:,:) + + - module procedures: + constructor (gvector_2d) + disp + norm2 (serial, mpi) + + - operators: + + : add_scal, add_vec + - : minus_vec, substract_vec + * : scale_left, scale_right + + - assignment: + = : from_scal, from_vec diff --git a/multigrid/src/fdmat_mod.f90 b/multigrid/src/fdmat_mod.f90 new file mode 100644 index 0000000..c925169 --- /dev/null +++ b/multigrid/src/fdmat_mod.f90 @@ -0,0 +1,600 @@ +!> +!> @file fdmat_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE fdmat_mod +! + USE multigrid + IMPLICIT NONE +! + INTERFACE fdmat + MODULE PROCEDURE fdmat_stencil + MODULE PROCEDURE fdmat_gen, fdmat_csr, fdmat_cds + END INTERFACE fdmat + INTERFACE ibc_fdmat + MODULE PROCEDURE ibc_fdmat_stencil + MODULE PROCEDURE ibc_fdmat_gen, ibc_fdmat_csr, ibc_fdmat_cds + END INTERFACE ibc_fdmat + INTERFACE ibc_rhs + MODULE PROCEDURE ibc_rhs_g + END INTERFACE ibc_rhs +! +CONTAINS +!-------------------------------------------------------------------------------- + SUBROUTINE fdmat_stencil(grid, fdense, icrosst, mat) +! +! Construct model GBS FD partitioned matrix +! + USE iso_fortran_env, ONLY : rkind => real64 + USE stencil, ONLY : stencil_2d + USE parmg, ONLY : grid2_type +! + TYPE(grid2_type) :: grid + REAL(rkind), INTENT(in) :: icrosst + TYPE(stencil_2d) :: mat + INTERFACE + FUNCTION fdense(x) + USE iso_fortran_env, ONLY : rkind => real64 + REAL(rkind), INTENT(in) :: x(:) + REAL(rkind) :: fdense(SIZE(x)) + END FUNCTION fdense + END INTERFACE +! + REAL(rkind) :: dx, dy + REAL(rkind),ALLOCATABLE :: dense(:) + REAL(rkind) :: stencil_arr(-1:1,-1:1), zdiag(-1:1,-1:1), corr + INTEGER :: nx, ny, i, j, k, d(2) +! +! Grid properties +! + nx = grid%npt(1)-1 + ny = grid%npt(2)-1 + dx = grid%x(1)-grid%x(0) ! Assume equidistant grid + dy = grid%y(1)-grid%y(0) + ALLOCATE(dense(0:nx)) ! electron densoty vary only along x +! +! Stencil array +! + stencil_arr = 0.0d0 + zdiag = 0.0d0 + corr = 1.d0+icrosst**2/4.0d0 + stencil_arr(0,0) = -2.0d0/dx/dx-2.0d0/dy/dy*corr + stencil_arr(-1,0) = 1.0/dx/dx + stencil_arr(1,0) = 1.0/dx/dx + stencil_arr(0,-1) = 1.0/dy/dy*corr + stencil_arr(0,1) = 1.0/dy/dy*corr + stencil_arr(-1,-1) = icrosst*1.0/4.0/dx/dy + stencil_arr(1,-1) = icrosst*(-1.0/4.0/dx/dy) + stencil_arr(-1,1) = icrosst*(-1.0/4.0/dx/dy) + stencil_arr(1,1) = icrosst*(1.0/4.0/dx/dy) + zdiag(0,0) = 1.0d0 + dense(:) = fdense(grid%x(:)) +! +! Assemble the stencil by scanning local grid points +! + DO k=0,mat%npoints-1 + d(:) = mat%id(k,:) + DO j=mat%s(2),mat%e(2) + DO i=mat%s(1),mat%e(1) + mat%val(i,j,k) = stencil_arr(d(1),d(2)) + & + & zdiag(d(1),d(2))*dense(i) + END DO + END DO + END DO +! + DEALLOCATE(dense) + END SUBROUTINE fdmat_stencil +!-------------------------------------------------------------------------------- + SUBROUTINE ibc_rhs_g(f, s0, e0, prb) +! +! Impose BC on rhs +! + USE iso_fortran_env, ONLY : rkind => real64 + USE gvector, ONLY : gvector_2d +! + TYPE(gvector_2d), INTENT(inout) :: f + CHARACTER(len=*), INTENT(in) :: prb + INTEGER, INTENT(in) :: s0(2), e0(2) + INTEGER :: s(2), e(2) +! + s = f%s + e = f%e +! + IF(s(1).EQ.s0(1)) THEN + IF(prb(1:1).EQ.'d') THEN ! West face + f%val(s(1),s(2):e(2)) = 0.0_rkind + ELSE + f%val(s(1),s(2):e(2)) = 0.5_rkind*f%val(s(1),s(2):e(2)) + END IF + END IF + IF(e(1).EQ.e0(1)) THEN + IF(prb(2:2).EQ.'d') THEN ! East face + f%val(e(1),s(2):e(2)) = 0.0_rkind + ELSE + f%val(e(1),s(2):e(2)) = 0.5_rkind*f%val(e(1),s(2):e(2)) + END IF + END IF + IF(s(2).EQ.s0(2)) THEN + IF(prb(3:3).EQ.'d') THEN ! South face + f%val(s(1):e(1),s(2)) = 0.0_rkind + ELSE + f%val(s(1):e(1),s(2)) = 0.5_rkind*f%val(s(1):e(1),s(2)) + END IF + END IF + IF(e(2).EQ.e0(2)) THEN + IF(prb(4:4).EQ.'d') THEN ! North face + f%val(s(1):e(1),e(2)) = 0.0_rkind + ELSE + f%val(s(1):e(1),e(2)) = 0.5_rkind*f%val(s(1):e(1),e(2)) + END IF + END IF + END SUBROUTINE ibc_rhs_g +!-------------------------------------------------------------------------------- + SUBROUTINE ibc_fdmat_stencil(mat, prb) +! +! Impose BC on matrix +! + USE iso_fortran_env, ONLY : rkind => real64 + USE stencil, ONLY : stencil_2d +! + TYPE(stencil_2d), INTENT(inout) :: mat + CHARACTER(len=*), INTENT(in) :: prb +! + INTEGER :: s0(2), e0(2), s(2), e(2) +! + s0 = mat%s0 + e0 = mat%e0 + s = mat%s + e = mat%e +! +! Neumann BC +! WARNING: Divide the stencil by 2 => should do the same for RHS! +! +! N +! 6---7---8 +! | | | +! W 4---0---5 E Numbering of stencil +! | | | +! 1---2---3 +! S +! + IF(s(1).EQ.s0(1) .AND. prb(1:1).EQ.'n') THEN ! West face + mat%val(s(1),s(2):e(2),1) = 0.0_rkind + mat%val(s(1),s(2):e(2),3) = 0.0_rkind + mat%val(s(1),s(2):e(2),4) = 0.0_rkind + mat%val(s(1),s(2):e(2),5) = 2.0d0*mat%val(s(1),s(2):e(2),5) + mat%val(s(1),s(2):e(2),6) = 0.0_rkind + mat%val(s(1),s(2):e(2),8) = 0.0_rkind + mat%val(s(1),s(2):e(2),:) = 0.5_rkind*mat%val(s(1),s(2):e(2),:) + END IF + IF(e(1).EQ.e0(1) .AND. prb(2:2).EQ.'n') THEN ! East face + mat%val(e(1),s(2):e(2),1) = 0.0_rkind + mat%val(e(1),s(2):e(2),3) = 0.0_rkind + mat%val(e(1),s(2):e(2),4) = 2.0d0*mat%val(e(1),s(2):e(2),4) + mat%val(e(1),s(2):e(2),5) = 0.0_rkind + mat%val(e(1),s(2):e(2),6) = 0.0_rkind + mat%val(e(1),s(2):e(2),8) = 0.0_rkind + mat%val(e(1),s(2):e(2),:) = 0.5_rkind*mat%val(e(1),s(2):e(2),:) + END IF + IF(s(2).EQ.s0(2) .AND. prb(3:3).EQ.'n') THEN ! South face + mat%val(s(1):e(1),s(2),1) = 0.0_rkind + mat%val(s(1):e(1),s(2),2) = 0.0_rkind + mat%val(s(1):e(1),s(2),3) = 0.0_rkind + mat%val(s(1):e(1),s(2),6) = 0.0_rkind + mat%val(s(1):e(1),s(2),7) = 2.0d0*mat%val(s(1):e(1),s(2),7) + mat%val(s(1):e(1),s(2),8) = 0.0_rkind + mat%val(s(1):e(1),s(2),:) = 0.5_rkind*mat%val(s(1):e(1),s(2),:) + END IF + IF(e(2).EQ.e0(2) .AND. prb(4:4).EQ.'n') THEN ! North face + mat%val(s(1):e(1),e(2),1) = 0.0_rkind + mat%val(s(1):e(1),e(2),2) = 2.0d0*mat%val(s(1):e(1),e(2),2) + mat%val(s(1):e(1),e(2),3) = 0.0_rkind + mat%val(s(1):e(1),e(2),6) = 0.0_rkind + mat%val(s(1):e(1),e(2),7) = 0.0_rkind + mat%val(s(1):e(1),e(2),8) = 0.0_rkind + mat%val(s(1):e(1),e(2),:) = 0.5_rkind*mat%val(s(1):e(1),e(2),:) + END IF +! +! Dirichlet BC +! + IF(s(1).EQ.s0(1) .AND. prb(1:1).EQ.'d') THEN ! West face + mat%val(s(1),s(2):e(2),:) = 0.0_rkind + mat%val(s(1),s(2):e(2),0) = 1.0_rkind + END IF + IF(e(1).EQ.e0(1) .AND. prb(2:2).EQ.'d') THEN ! East face + mat%val(e(1),s(2):e(2),:) = 0.0_rkind + mat%val(e(1),s(2):e(2),0) = 1.0_rkind + END IF + IF(s(2).EQ.s0(2) .AND. prb(3:3).EQ.'d') THEN ! South face + mat%val(s(1):e(1),s(2),:) = 0.0_rkind + mat%val(s(1):e(1),s(2),0) = 1.0_rkind + END IF + IF(e(2).EQ.e0(2) .AND. prb(4:4).EQ.'d') THEN ! North face + mat%val(s(1):e(1),e(2),:) = 0.0_rkind + mat%val(s(1):e(1),e(2),0) = 1.0_rkind + END IF + END SUBROUTINE ibc_fdmat_stencil +!-------------------------------------------------------------------------------- + SUBROUTINE fdmat_gen(grid, fdense, icrosst, noinit) +! +! Generic version +! + TYPE(grid2d), INTENT(inout) :: grid + DOUBLE PRECISION, INTENT(in) :: icrosst + LOGICAL, INTENT(in), OPTIONAL :: noinit + INTERFACE + FUNCTION fdense(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: fdense(SIZE(x)) + END FUNCTION fdense + END INTERFACE +! + IF(ALLOCATED(grid%mata)) THEN + CALL fdmat_csr(grid, fdense, icrosst, grid%mata, noinit) + ELSE + CALL fdmat_cds(grid, fdense, icrosst, grid%mata_cds, noinit) + END IF + END SUBROUTINE fdmat_gen +!-------------------------------------------------------------------------------- + SUBROUTINE fdmat_cds(grid, fdense, icrosst, mat, noinit) +! +! Construct FD matrix +! + TYPE(grid2d), INTENT(in) :: grid + DOUBLE PRECISION, INTENT(in) :: icrosst + TYPE(cds_mat), INTENT(inout) :: mat + LOGICAL, INTENT(in), OPTIONAL :: noinit + INTERFACE + FUNCTION fdense(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: fdense(SIZE(x)) + END FUNCTION fdense + END INTERFACE +! + INTEGER :: n, nx, ny + INTEGER :: kl, ku, k + INTEGER :: ix, ix2, jx, iy, jy, iy2, irow + DOUBLE PRECISION :: lx, ly, dx, dy, mele + DOUBLE PRECISION :: dense(0:grid%n(1)) + DOUBLE PRECISION :: stencil(-1:1,-1:1), zdiag(-1:1,-1:1) + DOUBLE PRECISION :: corr + LOGICAL :: run_init + INTEGER, ALLOCATABLE :: dists(:) +!-------------------------------------------------------------------------------- + run_init = .TRUE. + IF(PRESENT(noinit)) run_init = .NOT.noinit +! +! Grid properties +! + nx = grid%n(1) + ny = grid%n(2) + dx = grid%x(1) - grid%x(0) + dy = grid%y(1) - grid%y(0) + lx = grid%x(nx) + ly = grid%y(ny) + n = PRODUCT(grid%rank) ! Rank of matrix +! +! Stencil +! + stencil = 0.0d0 + zdiag = 0.0d0 +! + corr = 1.d0+icrosst**2/4.0d0 + stencil(0,0) = -2.0d0/dx/dx-2.0d0/dy/dy*corr + stencil(-1,0) = 1.0/dx/dx + stencil(1,0) = 1.0/dx/dx + stencil(0,-1) = 1.0/dy/dy*corr + stencil(0,1) = 1.0/dy/dy*corr + stencil(-1,-1) = icrosst*1.0/4.0/dx/dy + stencil(1,-1) = icrosst*(-1.0/4.0/dx/dy) + stencil(-1,1) = icrosst*(-1.0/4.0/dx/dy) + stencil(1,1) = icrosst*(1.0/4.0/dx/dy) + zdiag(0,0) = 1.0d0 +! +! 9-point stencil "diagonal storage" +! + kl=4 + ku=4 + ALLOCATE(dists(-kl:ku)) + DO iy2=-1,1 + DO ix2=-1,1 + k=3*iy2+ix2 + dists(k) = iy2*(nx+1) + ix2 + END DO + END DO +! + IF(run_init) THEN + CALL init(n, dists, 1, mat) + END IF +! +! Assemble matrix by scanning all grid points +! + dense(:) = fdense(grid%x(:)) + DO iy=0,ny + DO ix=0,nx + irow = iy*(nx+1)+ix+1 + DO iy2=-1,1 + jy=iy+iy2 + IF(jy.GE.0 .AND. jy.LE.ny) THEN + DO ix2=-1,1 + jx=ix+ix2 + IF(jx.GE.0 .AND.jx.LE.nx) THEN + mele = stencil(ix2,iy2) + zdiag(ix2,iy2)*dense(ix) + k=3*iy2+ix2 + mat%val(irow,k) = mele + END IF + END DO + END IF + END DO + END DO + END DO +! + DEALLOCATE(dists) + END SUBROUTINE fdmat_cds +!-------------------------------------------------------------------------------- + SUBROUTINE fdmat_csr(grid, fdense, icrosst, mat, noinit) +! +! Construct FD matrix +! + TYPE(grid2d), INTENT(in) :: grid + DOUBLE PRECISION, INTENT(in) :: icrosst + TYPE(csr_mat), INTENT(inout) :: mat + LOGICAL, INTENT(in), OPTIONAL :: noinit + INTERFACE + FUNCTION fdense(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: fdense(SIZE(x)) + END FUNCTION fdense + END INTERFACE +! + INTEGER :: n, nx, ny + INTEGER :: ix, ix2, jx, iy, jy, iy2, irow, icol + DOUBLE PRECISION :: lx, ly, dx, dy, mele + DOUBLE PRECISION :: dense(0:grid%n(1)) + DOUBLE PRECISION :: stencil(-1:1,-1:1), zdiag(-1:1,-1:1) + DOUBLE PRECISION :: corr + LOGICAL :: run_init +!-------------------------------------------------------------------------------- + run_init = .TRUE. + IF(PRESENT(noinit)) run_init = .NOT.noinit +! +! Grid properties +! + nx = grid%n(1) + ny = grid%n(2) + dx = grid%x(1) - grid%x(0) + dy = grid%y(1) - grid%y(0) + lx = grid%x(nx) + ly = grid%y(ny) + n = PRODUCT(grid%rank) ! Rank of matrix +! +! Stencil +! + stencil = 0.0d0 + zdiag = 0.0d0 +! + corr = 1.d0+icrosst**2/4.0d0 + stencil(0,0) = -2.0d0/dx/dx-2.0d0/dy/dy*corr + stencil(-1,0) = 1.0/dx/dx + stencil(1,0) = 1.0/dx/dx + stencil(0,-1) = 1.0/dy/dy*corr + stencil(0,1) = 1.0/dy/dy*corr + stencil(-1,-1) = icrosst*1.0/4.0/dx/dy + stencil(1,-1) = icrosst*(-1.0/4.0/dx/dy) + stencil(-1,1) = icrosst*(-1.0/4.0/dx/dy) + stencil(1,1) = icrosst*(1.0/4.0/dx/dy) + zdiag(0,0) = 1.0d0 +! +! Create CSR matrix + IF(run_init) THEN + CALL init(n, 1, mat) + END IF +! +! Assemble matrix by scanning all grid points +! + dense(:) = fdense(grid%x(:)) + DO iy=0,ny + DO ix=0,nx + irow = numb(ix,iy) + DO iy2=-1,1 + jy=iy+iy2 + IF(jy.GE.0 .AND. jy.LE.ny) THEN + DO ix2=-1,1 + jx=ix+ix2 + IF(jx.GE.0 .AND.jx.LE.nx) THEN + icol=numb(jx,jy) + mele = stencil(ix2,iy2) + zdiag(ix2,iy2)*dense(ix) + CALL putele(mat, irow, icol,mele) + END IF + END DO + END IF + END DO + END DO + END DO +!-------------------------------------------------------------------------------- + CONTAINS + INTEGER FUNCTION numb(ix,iy) + INTEGER, INTENT(in) :: ix, iy + INTEGER :: stride + stride = grid%rank(1) + numb = iy*stride + (ix+1) + END FUNCTION numb +!-------------------------------------------------------------------------------- + END SUBROUTINE fdmat_csr +!++ + SUBROUTINE ibc_fdmat_gen(grid, prb) +! +! Generic version +! + TYPE(grid2d), INTENT(inout) :: grid + CHARACTER(len=*), INTENT(in) :: prb +! + IF(ALLOCATED(grid%mata)) THEN + CALL ibc_fdmat_csr(grid, grid%mata, prb) + ELSE + CALL ibc_fdmat_cds(grid, grid%mata_cds, prb) + END IF +! + END SUBROUTINE ibc_fdmat_gen +!++ + SUBROUTINE ibc_fdmat_csr(grid, mat, prb) +! +! Impose BC +! + TYPE(grid2d), INTENT(in) :: grid + TYPE(csr_mat), INTENT(inout) :: mat + CHARACTER(len=*), INTENT(in) :: prb +! + DOUBLE PRECISION :: arow(mat%rank) + INTEGER :: nx, ny, nx1, ny1, n, iy, irow, irow1 +!-------------------------------------------------------------------------------- + nx = grid%n(1) + ny = grid%n(2) + nx1=nx+1 + ny1=ny+1 + n = nx1*ny1 +! +! Dirichelt BC on West/East +! + IF(prb.EQ.'dddd') THEN + DO irow=1,ny*nx1+1,nx1 + arow=0.0d0; arow(irow)=1.0d0 + CALL putrow(mat, irow, arow) + irow1=irow+nx + arow=0.0d0; arow(irow1)=1.0d0 + CALL putrow(mat, irow1, arow) + END DO +! +! Neumann on West/East +! WARNING: Divide the stencil by 2 => should do the same for RHS! +! + ELSE IF(prb.EQ.'nndd') THEN + DO irow=1,ny*nx1+1,nx1 + iy = irow/nx1 + CALL getrow(mat, irow, arow) + arow(irow+1) = 2.0d0*arow(irow+1) + IF(iy.GT.0) arow(irow-nx) = 0.0d0 + IF(iy.LT.ny) arow(irow+nx+2) = 0.0d0 + arow(:) = 0.5d0*arow(:) + CALL putrow(mat, irow, arow) + END DO + DO irow=nx1,n,nx1 + iy = irow/nx1 + CALL getrow(mat, irow, arow) + arow(irow-1) = 2.0d0*arow(irow-1) + IF(iy.GT.0) arow(irow-nx-2) = 0.0d0 + IF(iy.LT.ny) arow(irow+nx) = 0.0d0 + arow(:) = 0.5d0*arow(:) + CALL putrow(mat, irow, arow) + END DO +! + ELSE + WRITE(*,'(a,a4,a)') 'ibc_mat: prb = ', prb, ' NOT IMPLEMENTED!' + STOP + END IF +! +! Dirichlet BC on South/North sides +! + DO irow=1,nx1 + arow=0.0d0; arow(irow)=1.0d0 + CALL putrow(mat, irow, arow) + END DO + DO irow1=ny*nx1+1,n + arow=0.0d0; arow(irow1)=1.0d0 + CALL putrow(mat, irow1, arow) + END DO +!-------------------------------------------------------------------------------- + END SUBROUTINE ibc_fdmat_csr +!++ + SUBROUTINE ibc_fdmat_cds(grid, mat, prb) +! +! Impose BC +! + TYPE(grid2d), INTENT(in) :: grid + TYPE(cds_mat), INTENT(inout) :: mat + CHARACTER(len=*), INTENT(in) :: prb +! + INTEGER :: nx, ny, iy, irow + INTEGER :: n, nx1, ny1 +!-------------------------------------------------------------------------------- + nx = grid%n(1) + ny = grid%n(2) + nx1=nx+1 + ny1=ny+1 + n = nx1*ny1 +! +! 2 == 3 == 4 +! | | | +! -1 == 0 == 1 +! | | | +! -4 == -3 == -2 +! +! Dirichelt BC on West/East +! + IF(prb.EQ.'dddd') THEN + DO irow=1,ny*nx1+1,nx1 + mat%val(irow,:) = 0.0d0 + mat%val(irow,0) = 1.0d0 + mat%val(irow+nx,:) = 0.0d0 + mat%val(irow+nx,0) = 1.0d0 + END DO +! +! Neumann on West/East +! WARNING: Divide the stencil by 2 => should do the same for RHS! +! + ELSE IF(prb.EQ.'nndd') THEN + DO irow=1,ny*nx1+1,nx1 + iy = irow/nx1 + IF(iy.GT.0) mat%val(irow,-2)=0.0d0 + IF(iy.LT.ny) mat%val(irow,+4)=0.0d0 + mat%val(irow,+1)=2.0d0*mat%val(irow,+1) + mat%val(irow,:)=0.5d0*mat%val(irow,:) + END DO + DO irow=nx1,n,nx1 + iy = irow/nx1 + IF(iy.GT.0) mat%val(irow,-4)=0.0d0 + IF(iy.LT.ny) mat%val(irow,+2)=0.0d0 + mat%val(irow,-1)=2.0d0*mat%val(irow,-1) + mat%val(irow,:)=0.5d0*mat%val(irow,:) + END DO +! + ELSE + WRITE(*,'(a,a4,a)') 'ibc_mat: prb = ', prb, ' NOT IMPLEMENTED!' + STOP + END IF +! +! Dirichlet BC on South/North sides +! + DO irow=1,nx1 + mat%val(irow,:) = 0.0d0 + mat%val(irow,0) = 1.0d0 + END DO + DO irow=ny*nx1+1,n + mat%val(irow,:) = 0.0d0 + mat%val(irow,0) = 1.0d0 + END DO +!-------------------------------------------------------------------------------- + END SUBROUTINE ibc_fdmat_cds +!++ +END MODULE fdmat_mod diff --git a/multigrid/src/gvector_mod.f90 b/multigrid/src/gvector_mod.f90 new file mode 100644 index 0000000..2f0162e --- /dev/null +++ b/multigrid/src/gvector_mod.f90 @@ -0,0 +1,231 @@ +!> +!> @file gvector_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE gvector +! +! Implementation of 2D vectors with arbitrary +! vector bounds and ghost cell width. +! +! T.M. Tran, CRPP-EPFL +! September 2013 +! + USE iso_fortran_env, ONLY : rkind => real64 + IMPLICIT NONE + PRIVATE + PUBLIC :: gvector_2d, disp, norm2, & + & OPERATOR(+), OPERATOR(-), OPERATOR(*), & + & ASSIGNMENT(=) + + TYPE gvector_2d + INTEGER, DIMENSION(2) :: s, e ! vector internal bounds + INTEGER, DIMENSION(2) :: g ! ghost cell widths + REAL(rkind), ALLOCATABLE :: val(:,:) + END TYPE gvector_2d + + INTERFACE gvector_2d + MODULE PROCEDURE constructor + END INTERFACE gvector_2d + INTERFACE OPERATOR(+) + MODULE PROCEDURE add_scal + MODULE PROCEDURE add_vec + END INTERFACE OPERATOR(+) + INTERFACE OPERATOR(-) + MODULE PROCEDURE minus_vec + MODULE PROCEDURE substract_vec + END INTERFACE OPERATOR(-) + INTERFACE OPERATOR(*) + MODULE PROCEDURE scale_left + MODULE PROCEDURE scale_right + END INTERFACE OPERATOR(*) + INTERFACE ASSIGNMENT(=) + MODULE PROCEDURE from_scal + MODULE PROCEDURE from_vec + END INTERFACE ASSIGNMENT(=) + INTERFACE norm2 + MODULE PROCEDURE norm2_gvector_2d + MODULE PROCEDURE norm2_root_g_2d + MODULE PROCEDURE norm2_all_g_2d + END INTERFACE norm2 + +CONTAINS +!======================================================================= + FUNCTION constructor(s, e, g) RESULT(res) + INTEGER, INTENT(in) :: s(2), e(2) + INTEGER, OPTIONAL, INTENT(in) :: g(2) + TYPE(gvector_2d) :: res + INTEGER :: lb(2), ub(2) + res%g= 0 + IF(PRESENT(g)) res%g = g + res%s = s + res%e = e + lb = res%s - res%g + ub = res%e + res%g + ALLOCATE(res%val(lb(1):ub(1),lb(2):ub(2))) +! +! Initialize to 0 on all ghost cells + res%val(lb(1):s(1)-1,:) = 0._rkind + res%val(e(1)+1:ub(1),:) = 0._rkind + res%val(:,lb(2):s(2)-1) = 0._rkind + res%val(:,e(2)+1:ub(2)) = 0._rkind + END FUNCTION constructor +!======================================================================= + FUNCTION add_vec(lhs, rhs) RESULT(res) + TYPE(gvector_2d), INTENT(in) :: lhs, rhs + TYPE(gvector_2d) :: res + res = gvector_2d(lhs%s, lhs%e, lhs%g) + res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & + & lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + & + & rhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + END FUNCTION add_vec +!======================================================================= + FUNCTION add_scal(lhs, rhs) RESULT(res) + TYPE(gvector_2d), INTENT(in) :: lhs + REAL(rkind), INTENT(in) :: rhs + TYPE(gvector_2d) :: res + res = gvector_2d(lhs%s, lhs%e, lhs%g) + res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & + & lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + rhs + END FUNCTION add_scal +!======================================================================= + FUNCTION minus_vec(this) RESULT(res) + TYPE(gvector_2d), INTENT(in) :: this + TYPE(gvector_2d) :: res + res = gvector_2d(this%s, this%e, this%g) + res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & + & -this%val(res%s(1):res%e(1),res%s(2):res%e(2)) + END FUNCTION minus_vec +!======================================================================= + FUNCTION substract_vec(lhs, rhs) RESULT(res) + TYPE(gvector_2d), INTENT(in) :: lhs, rhs + TYPE(gvector_2d) :: res + res = gvector_2d(lhs%s, lhs%e, lhs%g) + res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & + & lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) - & + & rhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + END FUNCTION substract_vec +!======================================================================= + FUNCTION scale_left(lhs, rhs) RESULT(res) + REAL(rkind), INTENT(in) :: lhs + TYPE(gvector_2d), INTENT(in) :: rhs + TYPE(gvector_2d) :: res + res = gvector_2d(rhs%s, rhs%e, rhs%g) + res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & + & lhs * rhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + END FUNCTION scale_left +!======================================================================= + FUNCTION scale_right(lhs, rhs) RESULT(res) + TYPE(gvector_2d), INTENT(in) :: lhs + REAL(rkind), INTENT(in) :: rhs + TYPE(gvector_2d) :: res + res = gvector_2d(lhs%s, lhs%e, lhs%g) + res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = & + & lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) * rhs + END FUNCTION scale_right +!======================================================================= + + SUBROUTINE from_vec(lhs, rhs) + TYPE(gvector_2d), INTENT(inout) :: lhs + REAL(rkind), INTENT(in) :: rhs(:,:) + INTEGER :: n(2) + n = lhs%e - lhs%s + 1 + IF(SIZE(rhs,1).NE.n(1) .OR. SIZE(rhs,2).NE.n(2)) THEN + PRINT*, 'from_vec: sizes of rhs and lhs not equal!' + STOP + END IF + lhs%val(lhs%s(1):lhs%e(1),lhs%s(2):lhs%e(2)) = rhs(:,:) + END SUBROUTINE from_vec +!======================================================================= + + SUBROUTINE from_scal(lhs, rhs) + TYPE(gvector_2d), INTENT(inout) :: lhs + REAL(rkind), INTENT(in) :: rhs + lhs%val(lhs%s(1):lhs%e(1),lhs%s(2):lhs%e(2)) = rhs + END SUBROUTINE from_scal +!======================================================================= + SUBROUTINE disp(str,this) + CHARACTER(len=*), INTENT(in) :: str + TYPE(gvector_2d), INTENT(in) :: this + INTEGER :: i + WRITE(*,'(/a,3(" (",i0,",",i0,") "))') str//': s, e, g =',& + & this%s, this%e, this%g + DO i=LBOUND(this%val,1),UBOUND(this%val,1) + WRITE(*,'(10(1pe11.3))') (this%val(i,:)) + END DO + END SUBROUTINE disp +!======================================================================= + + FUNCTION norm2_gvector_2d(this) RESULT(res) + TYPE(gvector_2d), INTENT(in) :: this + REAL(rkind) :: res + res = NORM2( this%val(this%s(1):this%e(1), & + & this%s(2):this%e(2)) ) + END FUNCTION norm2_gvector_2d +!======================================================================= + FUNCTION norm2_root_g_2d(x, comm, root) RESULT(res) +! +! Vector norm of 2d distributed array with ghost cells +! + USE mpi + TYPE(gvector_2d), INTENT(in) :: x + INTEGER, INTENT(in) :: comm + INTEGER, INTENT(in) :: root + REAL(rkind) :: res + INTEGER, PARAMETER :: ndim=2 + INTEGER, DIMENSION(ndim) :: s, e + REAL(rkind) :: res_loc + INTEGER :: me, ierr +! + CALL mpi_comm_rank(comm, me, ierr) + s = x%s + e = x%e + res_loc = SUM(x%val(s(1):e(1),s(2):e(2))**2) + res = 0.0 + CALL mpi_reduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM,& + & root, comm, ierr) + IF(me.EQ.root) res = SQRT(res) + END FUNCTION norm2_root_g_2d +!======================================================================= + FUNCTION norm2_all_g_2d(x, comm) RESULT(res) +! +! Vector norm of 2d distributed array with ghost cells +! + USE mpi + TYPE(gvector_2d), INTENT(in) :: x + INTEGER, INTENT(in) :: comm + REAL(rkind) :: res + INTEGER, PARAMETER :: ndim=2 + INTEGER, DIMENSION(ndim) :: s, e + REAL(rkind) :: res_loc + INTEGER :: ierr +! + s = x%s + e = x%e + res_loc = SUM(x%val(s(1):e(1),s(2):e(2))**2) + CALL mpi_allreduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & + & comm, ierr) + res = SQRT(res) + END FUNCTION norm2_all_g_2d +!======================================================================= +END MODULE gvector diff --git a/multigrid/src/parmg_mod.f90 b/multigrid/src/parmg_mod.f90 new file mode 100644 index 0000000..5de78ca --- /dev/null +++ b/multigrid/src/parmg_mod.f90 @@ -0,0 +1,722 @@ +!> +!> @file parmg_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE parmg +! +! parmg: Utilities for parallel multigrid +! +! T.M. Tran, CRPP-EPFL +! December 2013 +! + USE mpi + USE iso_fortran_env, ONLY : rkind => real64 + USE gvector, ONLY : gvector_2d + USE stencil, ONLY : stencil_2d + IMPLICIT NONE +! + PRIVATE + PUBLIC :: grid2_type, mg_info, create_grid, mg, get_lmax, coarse, disp, exchange, & + & get_resids, jacobi, prolong, init_restrict, restrict, & + & norm_vec, norm_mat +! + TYPE grid2_type + INTEGER, DIMENSION(2) :: s, e, s0, e0, npt_loc, npt + REAL(rkind), ALLOCATABLE :: x(:) + REAL(rkind), ALLOCATABLE :: y(:) + TYPE(gvector_2d) :: f + TYPE(gvector_2d) :: v + TYPE(stencil_2d) :: fdmat + TYPE(stencil_2d) :: restrict_mat + END TYPE grid2_type +! + TYPE mg_info + INTEGER :: comm ! Communicator + INTEGER :: nu1 ! Relaxation down sweeps + INTEGER :: nu2 ! Relaxation up sweeps + INTEGER :: mu ! mu-cycle number + INTEGER :: nu0 ! Number of FMG cycles + INTEGER :: levels ! Number of mg levels + INTEGER :: direct_solve_nits ! Jacobit nits for direct_solve + CHARACTER(len=4) :: relax ! Type of relation + REAL(rkind) :: omega ! for weighted Jacobi relaxation + END TYPE mg_info +! + INTERFACE create_grid + MODULE PROCEDURE create_grid_2d + END INTERFACE create_grid + INTERFACE mg + MODULE PROCEDURE mg_2d + END INTERFACE mg + INTERFACE coarse + MODULE PROCEDURE coarse_1d, coarse_2d + END INTERFACE coarse + INTERFACE exchange + MODULE PROCEDURE exchange_g_2d, exchange_g_2d_new + END INTERFACE exchange + INTERFACE prolong + MODULE PROCEDURE prolong_g_2d + END INTERFACE prolong + INTERFACE restrict + MODULE PROCEDURE restrict_g_2d + END INTERFACE restrict + INTERFACE jacobi + MODULE PROCEDURE jacobi_stencila_2d + MODULE PROCEDURE jacobi_stencilg_2d + END INTERFACE jacobi + INTERFACE get_resids + MODULE PROCEDURE resids_stencila_2d + MODULE PROCEDURE resids_stencilg_2d + END INTERFACE get_resids + INTERFACE disp + MODULE PROCEDURE dispi_0, dispi_1 + END INTERFACE disp +CONTAINS +! +!-------------------------------------------------------------------------------- + SUBROUTINE create_grid_2d(x, y, s_in, e_in, id, prb, grids, comm) +! +! Create arrays of partitionned grids +! + USE stencil, ONLY : init +! + REAL(rkind), INTENT(in) :: x(0:), y(0:) ! Global coordinates + INTEGER, INTENT(in) :: s_in(2), e_in(2) ! Partition of finest grid + INTEGER, INTENT(in) :: id(:,:) ! Structure of stencil + CHARACTER(len=*), INTENT(in) :: prb + INTEGER, INTENT(in) :: comm + TYPE(grid2_type) :: grids(:) +! + INTEGER :: levels + INTEGER :: s0(2), e0(2), s(2), e(2) + INTEGER :: npt_loc(2), npt_loc_min(2), npt_glob(2) + INTEGER :: l, ierr +! + levels = SIZE(grids) + s = s_in + e = e_in +! + DO l=1,levels + IF(l.GT.1) THEN + CALL coarse(s,e) + END IF + npt_loc = e-s+1 + CALL mpi_allreduce(s, s0, 2, MPI_INTEGER, MPI_MIN, comm, ierr) + CALL mpi_allreduce(e, e0, 2, MPI_INTEGER, MPI_MAX, comm, ierr) + CALL mpi_allreduce(npt_loc, npt_loc_min, 2, MPI_INTEGER, MPI_MIN, comm, ierr) + IF(MINVAL(npt_loc_min) .LT. 2) THEN + PRINT*, 'CREATE_GRID: number intervals too small!' + STOP + END IF + npt_glob = e0+1 + grids(l)%s0 = s0 + grids(l)%e0 = e0 + grids(l)%s = s + grids(l)%e = e + grids(l)%npt_loc = npt_loc + grids(l)%npt = npt_glob + grids(l)%f = gvector_2d(s, e, [1,1]) ! Arrays with ghost cell + grids(l)%v = gvector_2d(s, e, [1,1]) + ALLOCATE(grids(l)%x(s0(1):e0(1))) ! Global coords (x,y) + ALLOCATE(grids(l)%y(s0(2):e0(2))) + IF(l.EQ.1) THEN + grids(1)%x = x + grids(1)%y = y + ELSE + grids(l)%x(:) = grids(l-1)%x(0::2) + grids(l)%y(:) = grids(l-1)%y(0::2) + END IF + END DO +! +! Set up FD matrix +! + DO l=1,levels + s = grids(l)%s + e = grids(l)%e + CALL init(s, e, id, .FALSE., grids(l)%fdmat, comm) + END DO +! +! Set up restriction stencil +! + DO l=2,levels + CALL init_restrict(grids(l), prb, comm) + END DO +! + END SUBROUTINE create_grid_2d +!-------------------------------------------------------------------------------- + RECURSIVE SUBROUTINE mg_2d(grids, info, l) +! +! Execute a recursive V-cycle +! + USE gvector, ONLY : ASSIGNMENT(=), OPERATOR(+) + TYPE(grid2_type), INTENT(inout) :: grids(:) + TYPE(mg_info), INTENT(in) :: info + INTEGER, INTENT(in) :: l +! + TYPE(gvector_2d) :: resids, v_prolong + INTEGER, DIMENSION(2) :: s0, e0, s, e, g=[1,1] + INTEGER :: comm, levels, k +! + comm = info%comm + levels = info%levels +! + s0 = grids(l)%s0; e0 = grids(l)%e0 + s = grids(l)%s; e = grids(l)%e + resids = gvector_2d(s, e, g) +! + IF(l.EQ.levels) THEN + CALL direct_solve(grids(l)%fdmat, grids(l)%v, grids(l)%f) + ELSE + CALL relax(info%nu1) + resids = get_resids(comm, grids(l)%fdmat, grids(l)%v, grids(l)%f) + CALL exchange(comm, resids) + CALL restrict(grids(l+1)%restrict_mat, resids, grids(l+1)%f) + grids(l+1)%v = 0.0d0 +! +! Only 1 call to the coarsest level + DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) + CALL mg(grids, info, l+1) + END DO +! + v_prolong = gvector_2d(s, e, g) + CALL exchange(comm, grids(l+1)%v) + CALL prolong(grids(l+1)%v, v_prolong) + grids(l)%v = grids(l)%v + v_prolong + CALL relax(info%nu2) + END IF +! + CONTAINS + SUBROUTINE relax(nu) + INTEGER, INTENT(in) :: nu + SELECT CASE (TRIM(info%relax)) + CASE ("jac") + CALL jacobi(comm, grids(l)%fdmat, info%omega, nu, grids(l)%v, grids(l)%f) + CASE default + PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!" + STOP + END SELECT + END SUBROUTINE relax + SUBROUTINE direct_solve(mat, v, f) + TYPE(stencil_2d), INTENT(in) :: mat + TYPE(gvector_2d), INTENT(inout) :: v + TYPE(gvector_2d), INTENT(in) :: f + v = 0.0d0 + CALL jacobi(comm, mat, 1.0_rkind, info%direct_solve_nits, v, f) + END SUBROUTINE direct_solve + END SUBROUTINE mg_2d +!-------------------------------------------------------------------------------- + FUNCTION get_lmax(s_in, npt_loc, npt_min, comm) RESULT(lmax) +! +! Get max number of levels on all processes +! + INTEGER :: lmax + INTEGER, INTENT(in) :: s_in, npt_loc, npt_min, comm + INTEGER :: me, ierr + INTEGER :: s, e, kpt_loc, kpt, kpt_loc_min +! + CALL mpi_comm_rank(comm, me, ierr) + s = s_in + kpt_loc = npt_loc + e = s+npt_loc-1 + lmax = 1 + DO + CALL mpi_allreduce(kpt_loc, kpt_loc_min, 1, MPI_INTEGER, MPI_MIN, & + & comm, ierr) + CALL mpi_allreduce(kpt_loc, kpt, 1, MPI_INTEGER, MPI_SUM, & + & comm, ierr) +! +! Stop if npt-1 not even or when minumum local npt is attained + IF(MODULO(kpt-1,2).NE.0 .OR. kpt_loc_min .LE. npt_min) EXIT +! + lmax = lmax+1 + CALL coarse(s, e) + kpt_loc = e-s+1 + END DO +! + END FUNCTION get_lmax +!-------------------------------------------------------------------------------- + SUBROUTINE coarse_1d(s, e) +! +! Compute (s,e) of next coarse grid +! + INTEGER, INTENT(inout) :: s, e + INTEGER :: s0, npt, i +! +! Previous odd indices are discarded + s0 = s + IF( MODULO(s0,2) .NE. 0 ) THEN + s0 = s+1 + END IF +! +! Count local number of points + npt = 0 + DO i=s0,e,2 + npt = npt+1 + END DO +! +! Coarse s, e + s = s0/2 + e = s + npt - 1 + END SUBROUTINE coarse_1d +!-------------------------------------------------------------------------------- + SUBROUTINE coarse_2d(s, e) +! +! Compute (s,e) of next coarse grid +! + INTEGER, INTENT(inout) :: s(2), e(2) +! + CALL coarse_1d(s(1), e(1)) + CALL coarse_1d(s(2), e(2)) + END SUBROUTINE coarse_2d +!-------------------------------------------------------------------------------- + SUBROUTINE dispi_0(str, a, comm) +! +! Display integer local scalar +! + INTEGER, INTENT(in) :: a, comm + CHARACTER(len=*), INTENT(in) :: str + INTEGER :: npes, me, ierr + INTEGER, ALLOCATABLE, DIMENSION(:) :: a_gather(:) +! + CALL MPI_COMM_RANK(comm, me, ierr) + CALL MPI_COMM_SIZE(comm, npes, ierr) + ALLOCATE(a_gather(npes)) + CALL MPI_GATHER(a, 1, MPI_INTEGER, a_gather, 1, MPI_INTEGER, & + & 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(a/(20i6))') str, a_gather + END IF + DEALLOCATE(a_gather) + END SUBROUTINE dispi_0 +!-------------------------------------------------------------------------------- + SUBROUTINE dispi_1(str, a, comm) +! +! Display integer local array +! + INTEGER, INTENT(in) :: a(:), comm + CHARACTER(len=*), INTENT(in) :: str + INTEGER :: npes, me, ierr, n, i + INTEGER, ALLOCATABLE, DIMENSION(:) :: a_gather(:,:) +! + n = SIZE(a,1) + CALL MPI_COMM_RANK(comm, me, ierr) + CALL MPI_COMM_SIZE(comm, npes, ierr) + ALLOCATE(a_gather(n,npes)) + CALL MPI_GATHER(a, n, MPI_INTEGER, a_gather, n, MPI_INTEGER, & + & 0, comm, ierr) + IF( me.EQ.0 ) THEN + WRITE(*,'(a)') str + DO i=1,n + WRITE(*,'(20i6)') a_gather(i,:) + END DO + END IF + DEALLOCATE(a_gather) + END SUBROUTINE dispi_1 +!-------------------------------------------------------------------------------- + SUBROUTINE exchange_g_2d_new(comm, u) +! +! Exchange ghost cells with (west,east,south,north) neighbors. +! Assume same ghost cells on each dimension: +! u%g(1) : number of ghost cells on west and east boundaries +! u%g(2) : number of ghost cells on south and north boundaries +! + INTEGER, INTENT(in) :: comm + TYPE(gvector_2d), INTENT(inout) :: u + INTEGER :: neighs(4), ierr +! + CALL mpi_cart_shift(comm, 0, 1, neighs(1), neighs(2), ierr) + CALL mpi_cart_shift(comm, 1, 1, neighs(3), neighs(4), ierr) + CALL exchange_g_2d(comm, neighs, u) + END SUBROUTINE exchange_g_2d_new +!-------------------------------------------------------------------------------- + SUBROUTINE exchange_g_2d(comm, neighs, u) +! +! Exchange ghost cells with (west,east,south,north) neighbors. +! Assume same ghost cells on each dimension: +! u%g(1) : number of ghost cells on west and east boundaries +! u%g(2) : number of ghost cells on south and north boundaries +! + INTEGER, INTENT(in) :: comm + INTEGER, INTENT(in) :: neighs(4) + TYPE(gvector_2d), INTENT(inout) :: u +! + INTEGER :: cols, rows + INTEGER :: ierr + INTEGER, PARAMETER :: ndim=2 + INTEGER, DIMENSION(ndim) :: g, lb, ub, s, e, n +! + s = u%s + e = u%e + g = u%g + lb = s - g + ub = e + g + n = ub - lb + 1 ! include ghost cells +! +! g(2) matrix full rows with stride n(1) + CALL mpi_type_vector(n(2), g(2), n(1), MPI_DOUBLE_PRECISION, rows, ierr) + CALL mpi_type_commit(rows, ierr) +! +! g(1) contiguous matrix full columns + CALL mpi_type_contiguous(n(1)*g(1), MPI_DOUBLE_PRECISION, cols, ierr) + CALL mpi_type_commit(cols, ierr) +! +! Exchange along first dimension + CALL mpi_sendrecv(u%val(s(1), lb(2)), 1, rows, neighs(1), 0, & + & u%val(e(1)+1,lb(2)), 1, rows, neighs(2), 0, & + & comm, MPI_STATUS_IGNORE, ierr) + CALL mpi_sendrecv(u%val(e(1)-g(1)+1,lb(2)), 1, rows, neighs(2), 0, & + & u%val(lb(1), lb(2)), 1, rows, neighs(1), 0, & + & comm, MPI_STATUS_IGNORE, ierr) +! +! Exchange along second dimension + CALL mpi_sendrecv(u%val(lb(1),s(2)), 1, cols, neighs(3), 0, & + & u%val(lb(1),e(2)+1), 1, cols, neighs(4), 0, & + & comm, MPI_STATUS_IGNORE, ierr) + CALL mpi_sendrecv(u%val(lb(1),e(2)-g(2)+1), 1, cols, neighs(4), 0, & + & u%val(lb(1),lb(2)), 1, cols, neighs(3), 0, & + & comm, MPI_STATUS_IGNORE, ierr) + END SUBROUTINE exchange_g_2d +!-------------------------------------------------------------------------------- + SUBROUTINE prolong_g_2d(vbar, v) +! +! 2D bilinear prolongation +! + TYPE(gvector_2d), INTENT(in) :: vbar + TYPE(gvector_2d), INTENT(inout) :: v +! + INTEGER :: i,j,i1,i2,j1,j2 +! + i1 = v%s(1)-MODULO(v%s(1),2); i2 = v%e(1)+MODULO(v%e(1),2) + j1 = v%s(2)-MODULO(v%s(2),2); j2 = v%e(2)+MODULO(v%e(2),2) +! +! Even numbered nodes on fine mesh +! + DO j=j1,j2,2 + DO i=i1,i2,2 + v%val(i,j) = vbar%val(i/2,j/2) + END DO + END DO +! +! Linear interpolation on x +! + DO j=j1,j2,2 + DO i=i1+1,i2-1,2 + v%val(i,j) = 0.5d0*(v%val(i-1,j)+v%val(i+1,j)) + END DO + END DO +! +! Linear interpolation on y +! + DO j=j1+1,j2-1,2 + DO i=i1,i2 + v%val(i,j) = 0.5d0*(v%val(i,j-1)+v%val(i,j+1)) + END DO + END DO + END SUBROUTINE prolong_g_2d +!-------------------------------------------------------------------------------- + SUBROUTINE init_restrict(grid, prb, comm) +! +! Set up restriction stencil +! + USE stencil, ONLY : init + TYPE(grid2_type), INTENT(inout) :: grid + CHARACTER(len=*), INTENT(in) :: prb + INTEGER, INTENT(in) :: comm +! + INTEGER, PARAMETER :: npoints=9, ndim=2 + INTEGER :: s(2), e(2), n(2), id(9,2) + INTEGER :: i, j +! +! Stencil structure initialization +! + s = grid%s + e = grid%e + n = grid%npt-1 +! +! N +! 6---7---8 +! | | | +! W 4---0---5 E Numbering of stencil +! | | | +! 1---2---3 +! S +! + id = RESHAPE([0, -1, 0, 1,-1, 1,-1, 0, 1, & + & 0, -1,-1,-1, 0, 0, 1, 1, 1], & + & [npoints, ndim]) + CALL init(s, e, id, .FALSE., grid%restrict_mat, comm) +! +! Fill in stencil +! + DO j=s(2),e(2) + DO i=s(1),e(1) + grid%restrict_mat%val(i,j,:) = [4._rkind, & + & 1._rkind, 2._rkind, 1._rkind, & + & 2._rkind, 2._rkind, & + & 1._rkind, 2._rkind, 1._rkind ]& + & / 16._rkind + END DO + END DO +! +! Apply Dirichlet BC +! + IF(s(1).EQ.0 .AND. prb(1:1).EQ.'d') THEN ! West face + grid%restrict_mat%val(s(1),:,3) = 0._rkind + grid%restrict_mat%val(s(1),:,5) = 0._rkind + grid%restrict_mat%val(s(1),:,8) = 0._rkind + END IF + IF(e(1).EQ.n(1) .AND. prb(2:2).EQ.'d') THEN ! East face + grid%restrict_mat%val(e(1),:,1) = 0._rkind + grid%restrict_mat%val(e(1),:,4) = 0._rkind + grid%restrict_mat%val(e(1),:,6) = 0._rkind + END IF + IF(s(2).EQ.0 .AND. prb(3:3).EQ.'d') THEN ! South face + grid%restrict_mat%val(:,s(2),6) = 0._rkind + grid%restrict_mat%val(:,s(2),7) = 0._rkind + grid%restrict_mat%val(:,s(2),8) = 0._rkind + END IF + IF(e(2).EQ.n(2) .AND. prb(4:4).EQ.'d') THEN ! North face + grid%restrict_mat%val(:,e(2),1) = 0._rkind + grid%restrict_mat%val(:,e(2),2) = 0._rkind + grid%restrict_mat%val(:,e(2),3) = 0._rkind + END IF + END SUBROUTINE init_restrict +!-------------------------------------------------------------------------------- + SUBROUTINE jacobi_stencila_2d(mat, omega, nu, v, f) +! +! Weighted Jacobi relaxation +! + TYPE(stencil_2d),INTENT(in) :: mat + REAL(rkind), INTENT(in) :: omega + INTEGER, INTENT(in) :: nu + REAL(rkind), ALLOCATABLE, INTENT(inout) :: v(:,:) + REAL(rkind), ALLOCATABLE, INTENT(in) :: f(:,:) +! + REAL(rkind), ALLOCATABLE :: temp(:,:), inv_diag(:,:) + INTEGER, DIMENSION(2) :: smin, emax, s, e, d, lb, ub + INTEGER :: it, k, i, j +! + s(:) = mat%s(:) + e(:) = mat%e(:) + smin(:) = mat%s0(:) + emax(:) = mat%e0(:) +! + ALLOCATE(temp(s(1):e(1),s(2):e(2))) + ALLOCATE(inv_diag(s(1):e(1),s(2):e(2))) +! + inv_diag(:,:) = omega/mat%val(:,:,0) + DO it=1,nu + temp(:,:) = f(s(1):e(1),s(2):e(2)) + DO k=1,mat%npoints-1 ! exclude the diagonal term, f - (L+U)*v + d(:) = mat%id(k,:) + lb = MAX(smin, smin-d, mat%s) + ub = MIN(emax, emax-d, mat%e) + DO j=lb(2),ub(2) + DO i=lb(1),ub(1) + temp(i,j) = temp(i,j) - mat%val(i,j,k)*v(i+d(1),j+d(2)) + END DO + END DO + END DO + temp = temp * inv_diag + v(s(1):e(1),s(2):e(2)) = (1.d0-omega)*v(s(1):e(1),s(2):e(2)) + temp + END DO +! + DEALLOCATE(temp) + DEALLOCATE(inv_diag) + END SUBROUTINE jacobi_stencila_2d +!-------------------------------------------------------------------------------- + SUBROUTINE jacobi_stencilg_2d(comm, mat, omega, nu, v, f) +! +! Weighted Jacobi relaxation +! + USE gvector, ONLY : ASSIGNMENT(=) + INTEGER, INTENT(in) :: comm + TYPE(stencil_2d),INTENT(in) :: mat + REAL(rkind), INTENT(in) :: omega + INTEGER, INTENT(in) :: nu + TYPE(gvector_2d), INTENT(inout) :: v + TYPE(gvector_2d), INTENT(in) :: f +! + REAL(rkind), ALLOCATABLE :: temp(:,:), inv_diag(:,:) + INTEGER, DIMENSION(2) :: s, e, d + INTEGER :: it, k, i, j +! + s(:) = v%s(:) + e(:) = v%e(:) +! + ALLOCATE(temp(s(1):e(1),s(2):e(2))) + ALLOCATE(inv_diag(s(1):e(1),s(2):e(2))) +! + inv_diag(:,:) = omega/mat%val(:,:,0) + DO it=1,nu + CALL exchange(comm, v) + temp(:,:) = f%val(s(1):e(1),s(2):e(2)) + DO k=1,mat%npoints-1 ! exclude the diagonal term, f - (L+U)*v + d(:) = mat%id(k,:) + DO j=s(2),e(2) + DO i=s(1),e(1) + temp(i,j) = temp(i,j) - & + & mat%val(i,j,k) * v%val(i+d(1),j+d(2)) + END DO + END DO + END DO + temp = temp * inv_diag + v%val(s(1):e(1),s(2):e(2)) = (1.d0-omega) * v%val(s(1):e(1),s(2):e(2)) + & + & temp + END DO + END SUBROUTINE jacobi_stencilg_2d +!-------------------------------------------------------------------------------- + FUNCTION resids_stencila_2d(mat, xarr, farr) RESULT(res) +! +! Return residuals res = mat*x, where x, farr and res are simple arrays +! + TYPE(stencil_2d), INTENT(in) :: mat + REAL(rkind), ALLOCATABLE, INTENT(in) :: xarr(:,:) + REAL(rkind), ALLOCATABLE, INTENT(in) :: farr(:,:) + REAL(rkind) :: res(LBOUND(xarr,1):UBOUND(xarr,1), & + & LBOUND(xarr,2):UBOUND(xarr,2)) + INTEGER :: k, i, j + INTEGER, DIMENSION(2) :: smin, emax, d, lb, ub +! + smin(:) = mat%s0(:) + emax(:) = mat%e0(:) + res = farr + DO k=0,mat%npoints-1 + d(:) = mat%id(k,:) + lb = MAX(smin, smin-d, mat%s) + ub = MIN(emax, emax-d, mat%e) + DO j=lb(2),ub(2) + DO i=lb(1),ub(1) + res(i,j) = res(i,j) - mat%val(i,j,k)*xarr(i+d(1),j+d(2)) + END DO + END DO + END DO + END FUNCTION resids_stencila_2d +!-------------------------------------------------------------------------------- + FUNCTION resids_stencilg_2d(comm, mat, xarr, farr) RESULT(res) +! +! Return residuals res= f-mat*x, where x, f and res are gvectors +! + INTEGER, INTENT(in) :: comm + TYPE(stencil_2d), INTENT(in) :: mat + TYPE(gvector_2d), INTENT(inout) :: xarr + TYPE(gvector_2d), INTENT(in) :: farr + TYPE(gvector_2d) :: res + INTEGER :: k, i, j + INTEGER, DIMENSION(2) :: s, e, d +! + s = xarr%s + e = xarr%e + res = gvector_2d(xarr%s, xarr%e, xarr%g) + res%val = farr%val + CALL exchange(comm, xarr) + DO k=0,mat%npoints-1 + d(:) = mat%id(k,:) + DO j=s(2),e(2) + DO i=s(1),e(1) + res%val(i,j) = res%val(i,j) - mat%val(i,j,k)*xarr%val(i+d(1),j+d(2)) + END DO + END DO + END DO + END FUNCTION resids_stencilg_2d +!-------------------------------------------------------------------------------- + SUBROUTINE restrict_g_2d(mat, f, fbar) +! +! 2D full weighting restriction +! + TYPE(stencil_2d), INTENT(in) :: mat + TYPE(gvector_2d), INTENT(in) :: f + TYPE(gvector_2d), INTENT(inout) :: fbar +! + INTEGER, DIMENSION(2) :: s, e, d + INTEGER :: k, i, j +! + s = fbar%s + e = fbar%e +! +! Diagonal contributions: d(0) = (0,0) + DO j=s(2),e(2) + DO i=s(1),e(1) + fbar%val(i,j) = mat%val(i,j,0) * f%val(2*i,2*j) + END DO + END DO +! + DO k=1,mat%npoints-1 + d(:) = mat%id(k,:) + DO j=s(2),e(2) + DO i=s(1),e(1) + fbar%val(i,j) = fbar%val(i,j) + & + & mat%val(i,j,k) * f%val(2*i+d(1),2*j+d(2)) + END DO + END DO + END DO + + END SUBROUTINE restrict_g_2d +!-------------------------------------------------------------------------------- + REAL(rkind) FUNCTION norm_vec(x, comm, root) +! +! Infinity vector norm +! + TYPE(gvector_2d), INTENT(in) :: x + INTEGER, INTENT(in) :: comm + INTEGER, OPTIONAL, intent(in) :: root + REAL(rkind) :: temp + INTEGER :: ierr + temp = MAXVAL( ABS(x%val(x%s(1):x%e(1),x%s(2):x%e(2))) ) + IF(PRESENT(root)) THEN + CALL mpi_reduce(temp, norm_vec, 1, MPI_DOUBLE_PRECISION, MPI_MAX, & + & root, comm, ierr) + ELSE + CALL mpi_allreduce(temp, norm_vec, 1, MPI_DOUBLE_PRECISION, MPI_MAX, & + & comm, ierr) + END IF + END FUNCTION norm_vec +!-------------------------------------------------------------------------------- + REAL(rkind) FUNCTION norm_mat(mat, comm, root) +! +! Infinity matrix norm +! + TYPE(stencil_2d), INTENT(in) :: mat + INTEGER, INTENT(in) :: comm + INTEGER, OPTIONAL, intent(in) :: root + REAL(rkind) :: arr_temp(mat%s(1):mat%e(1),mat%s(2):mat%e(2)) + REAL(rkind) :: temp + INTEGER :: i, j, s(2), e(2), ierr + s = mat%s; e = mat%e + DO j=s(2),e(2) + DO i=s(1),e(1) + arr_temp(i,j) = SUM(ABS(mat%val(i,j,:))) + END DO + END DO + temp = MAXVAL(arr_temp) + IF(PRESENT(root)) THEN + CALL mpi_reduce(temp, norm_mat, 1, MPI_DOUBLE_PRECISION, MPI_MAX, & + & root, comm, ierr) + ELSE + CALL mpi_allreduce(temp, norm_mat, 1, MPI_DOUBLE_PRECISION, MPI_MAX, & + & comm, ierr) + END IF + END FUNCTION norm_mat +!-------------------------------------------------------------------------------- +END MODULE parmg diff --git a/multigrid/src/partition.f90 b/multigrid/src/partition.f90 new file mode 100644 index 0000000..1a3b2e6 --- /dev/null +++ b/multigrid/src/partition.f90 @@ -0,0 +1,77 @@ +!> +!> @file partition.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main + USE mpi + USE pputils2, ONLY : dist1d + USE parmg, ONLY : get_lmax, coarse, disp + IMPLICIT NONE +! + INTEGER :: me, npes, ierr + INTEGER :: n, npt, npt_loc, s, e + INTEGER :: l, lmax +! + CALL MPI_INIT(ierr) + CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr) + CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr) +! + IF(me.EQ.0) THEN + WRITE(*,*) 'Enter n' + READ(*,*) n + END IF + CALL mpi_bcast(n,1,MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) +! +! Partition ot finest grid +! + npt = n+1 + CALL dist1d(MPI_COMM_WORLD, 0, npt, s, npt_loc) + e = s+npt_loc-1 + CALL disp('start index=', s, MPI_COMM_WORLD) +! +! Max number of levels +! + lmax = get_lmax(s, npt_loc, 1, MPI_COMM_WORLD) + IF(me.EQ.0) WRITE(*,'(a,i0)') 'Max number of levels ', lmax +! +! Grid coarsening +! + DO l=1,lmax + IF(l.GT.1) THEN + CALL coarse(s, e) + npt_loc = e-s+1 + CALL mpi_allreduce(npt_loc, npt, 1, MPI_INTEGER, MPI_SUM, & + & MPI_COMM_WORLD, ierr) + END IF + IF(me.EQ.0) WRITE(*, '(a,i3)') 'level', l + CALL disp('s ', s, MPI_COMM_WORLD) + CALL disp('e ', e, MPI_COMM_WORLD) + CALL disp('npt_loc', npt_loc, MPI_COMM_WORLD) + CALL disp('npt ', npt, MPI_COMM_WORLD) + END DO +! + CALL MPI_FINALIZE(ierr) +! +!+++++ +END PROGRAM main diff --git a/multigrid/src/poisson_fd.f90 b/multigrid/src/poisson_fd.f90 new file mode 100644 index 0000000..d44100d --- /dev/null +++ b/multigrid/src/poisson_fd.f90 @@ -0,0 +1,485 @@ +!> +!> @file poisson_fd.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Solving the following 2d PDE using finite differences: +! +! d2/dx2 f(x,y) + tau*d2/dxdy f(x,y) + d2/dy2 f(x,y) = w(x,y), x,y in [0:Lx][0:Ly] +! w(x,y) = -4*pi^2 *[(kx^2/Lx^2+ky^2/Ly^2)*cos(2*kx*pi*x/Lx)*sin(2*ky*pi*y/Ly) +! -tau*(kx*ky)/(Lx*Ly)*sin(2*kx*pi*x/Lx)*cos(2*ky*pi*y/Lx)] +! +! West, East boundaries: Neumann +! South, North boundaries: Dirichlet +! +! Analytic solution : f(x,y) = cos(2*kx*pi*x/Lx)*sin(2*kx*pi*y/Ly) +! + USE multigrid + USE fdmat_mod + IMPLICIT NONE + INCLUDE 'mpif.h' +! + INTEGER, PARAMETER :: nnumx=32 +! + INTEGER :: ierr, np, me + DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) + DOUBLE PRECISION :: Lx, Ly, icrosst, beta, miome + INTEGER :: n, nx,ny,nz,kx,ky + CHARACTER(len=4) :: prb + INTEGER :: nits + DOUBLE PRECISION :: atol, rtol + LOGICAL :: nldirect, nldebug +! + TYPE(mg_info) :: info ! info for MG + INTEGER :: levels, nnu, mu, nu0 +! + INTEGER :: inu, nu1(nnumx), nu2(nnumx), niter(nnumx) + DOUBLE PRECISION :: titer(nnumx) +! + LOGICAL :: nlfixed + DOUBLE PRECISION :: omega + CHARACTER(len=4) :: mat_type, relax +! + DOUBLE PRECISION :: dx, dy + INTEGER :: ix, iy, l, its + DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:), dense(:) + TYPE(grid2d), ALLOCATABLE :: grids(:) +! + DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_ana2d(:,:), sol_direct2d(:,:) + DOUBLE PRECISION, POINTER :: sol_ana(:), sol_direct(:) + DOUBLE PRECISION :: err_direct, resid_direct + DOUBLE PRECISION :: norma, normb + DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:) + DOUBLE PRECISION :: t0, tsetup, tmat(2), tdirect, tbsolve + DOUBLE PRECISION, EXTERNAL :: mem +! + NAMELIST /parameters/ prb, mat_type,nx, ny, nz, kx, ky, Lx, Ly, icrosst, beta, & + & miome, nldebug, nlfixed, levels, nnu, nu1, nu2, mu, nu0, & + & relax,omega, nldirect, nits, atol, rtol +!-------------------------------------------------------------------------------- +! 1.0 Prologue +! + CALL MPI_INIT(ierr) + CALL MPI_COMM_RANK(MPI_COMM_WORLD,me,ierr) + CALL MPI_COMM_SIZE(MPI_COMM_WORLD,np,ierr) +! +! Default inputs +! + nx=32 + ny=32 + nz=1 + kx=1 + ky=1 + icrosst=1.0d0 + Lx = 1.0D0 + Ly = 1.0D0 + beta = 0d0 + miome = 200d0 + nldebug = .FALSE. + prb = 'dddd' + mat_type = 'cds' + nldirect = .TRUE. +! + nlfixed = .FALSE. + levels = 2 + nnu = 1 + nu1 = 1 + nu2 = 1 + mu = 1 + nu0 = 1 + nits = 10 + atol = 1.e-8; rtol = 1.e-8 + relax = 'jac' + omega = 0.6667 +! + IF(me==0) THEN + READ(*,parameters) + WRITE(*,parameters) + END IF +! +! Send input parameters to other processors +! + CALL MPI_BCAST(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL MPI_BCAST(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL MPI_BCAST(nz, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL MPI_BCAST(kx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL MPI_BCAST(ky, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL MPI_BCAST(icrosst, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL MPI_BCAST(Lx,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr) + CALL MPI_BCAST(Ly,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr) + CALL MPI_BCAST(beta,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr) + CALL MPI_BCAST(miome, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) +! + CALL mpi_bcast(nldebug, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nldirect, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nnu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nu1, nnumx, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nu2, nnumx, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(mat_type, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(atol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) +! + IF(nnu.GT.nnumx) THEN + IF(me.EQ.0) THEN + PRINT*, 'Value of nnu larger than', nnumx + END IF + CALL mpi_finalize(ierr) + STOP + END IF +! +! Adjust number of levels and fill mg info. +! + levels = MIN(levels, get_lmax(nx), get_lmax(ny)) + info%nu1 = nu1(1) + info%nu2 = nu2(1) + info%mu = mu + info%nu0 = nu0 + info%levels = levels + info%relax = relax + info%omega = omega +!-------------------------------------------------------------------------------- +! 2.0 Setup grids +! +! Grid on the finest level +! + dx = lx/REAL(nx,8) + dy = ly/REAL(ny,8) + ALLOCATE(x(0:nx), y(0:ny)) + DO ix=0,nx + x(ix) = ix*dx + END DO + DO iy=0,ny + y(iy) = iy*dy + END DO + WRITE(*,'(a,3(1pe12.3))') 'dx, dy, dx/dy =', dx, dy, dx/dy +! + ALLOCATE(dense(0:nx)) + dense = fdense(x) +! +! Set up grids +! + t0 = mpi_wtime() + ALLOCATE(grids(levels)) + CALL create_grid_fd(x, y, grids, info, mat_type=mat_type, debug=nldebug) + WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' + WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) + IF(nldebug) THEN + CALL printmat('** Prolongation matrix in 1st dim.**', grids(2)%transf(1)) + CALL printmat('** Prolongation matrix in 2nd dim.**', grids(2)%transf(2)) + END IF +! +! Set BC on grid transfer matrices +! + IF(prb.EQ.'dddd') CALL ibc_transf(grids,1,3) ! Direction X + CALL ibc_transf(grids,2,3) ! Direction Y + tsetup = mpi_wtime()-t0 +!-------------------------------------------------------------------------------- +! 3.0 Problem discretization +! +! Construct FD matrix and impose BC on all grids +! + t0=mpi_wtime() + DO l=1,levels + CALL fdmat(grids(l), fdense, icrosst) + IF(mat_type.EQ.'csr') CALL to_mat(grids(l)%mata) + END DO + tmat(1) = mpi_wtime()-t0 +! + t0=mpi_wtime() + DO l=1,levels + CALL ibc_fdmat(grids(l), prb) + END DO + tmat(2) = mpi_wtime()-t0 +! +! Set RHS and impose BC on the fiest grid +! + grids(1)%f(:,:) = frhs(x,y) +! + IF(prb.EQ.'dddd') THEN + grids(1)%f(0,:) = 0.0d0 ! Dirichlet on west and east + grids(1)%f(nx,:) = 0.0d0 + ELSE IF(prb.EQ.'nndd') THEN ! Neumann on west and east + grids(1)%f(0,:) = 0.5d0*grids(1)%f(0,:) + grids(1)%f(nx,:) = 0.5d0*grids(1)%f(nx,:) + END IF + grids(1)%f(:,0) = 0.0d0 ! Dirichlet on south and north + grids(1)%f(:,ny) = 0.0d0 +! +!-------------------------------------------------------------------------------- +! 4.0 Analytical solutions and RHS at the finest grid (l=1) +! + n = (nx+1)*(ny+1) ! Number of unknowns + ALLOCATE(sol_ana2d(0:nx,0:ny)) + sol_ana(1:n) => sol_ana2d + sol_ana2d(:,:) = fsol(x,y) +!-------------------------------------------------------------------------------- +! 5.0 Direct solution at the finest grid (l=1) +! + IF(nldirect) THEN + WRITE(*,'(/a)') 'Direct solution for the finest grid problem ...' + ALLOCATE(sol_direct2d(0:nx,0:ny)) + sol_direct(1:n) => sol_direct2d +! + t0 = mpi_wtime() + sol_direct = grids(1)%f1d + CALL direct_solve(grids(1), sol_direct, debug=nldebug) + tdirect = mpi_wtime()-t0 +! + t0 = mpi_wtime() + sol_direct = grids(1)%f1d + CALL direct_solve(grids(1), sol_direct, debug=nldebug) + tbsolve = mpi_wtime()-t0 +! +! Max norm and residual +! + err_direct = MAXVAL(ABS(sol_direct-sol_ana)) + resid_direct = residue(grids(1), grids(1)%f1d, sol_direct, 'inf') + WRITE(*,'(a,2(1pe12.3))') 'Max norm of error and residual norm', & + & err_direct, resid_direct + END IF +!-------------------------------------------------------------------------------- +! 5.0 Iterative solution using MG V-cycle +! + WRITE(*,'(/a)') 'Multigrid MG V-cycles ...' + ALLOCATE(errdisc(0:nits)) + ALLOCATE(resid(0:nits)) + ALLOCATE(rresid(0:nits)) +! +! Norm of A and b +! + IF(mat_type.EQ.'csr') THEN + norma = matnorm(grids(1)%mata, 'inf') + ELSE + norma = matnorm(grids(1)%mata_cds, 'inf') + END IF + normb = MAXVAL(ABS(grids(1)%f1d)) + WRITE(*,'(a,2(1pe12.3))') 'Norm A and RHS', norma, normb +! +! Initial guess +! + DO inu=1,nnu + info%nu1 = nu1(inu) + info%nu2 = nu2(inu) + WRITE(*,'(/2(a5,i3,2x))') 'nu1 =', nu1(inu), 'nu2 =', nu2(inu) + IF(nlfixed .AND. nldirect) THEN + grids(1)%v = sol_direct2d + ELSE + grids(1)%v = 0.0d0 + END IF +! + errdisc(0) = MAXVAL(ABS(grids(1)%v1d-sol_ana)) + resid(0) = residue(grids(1), grids(1)%f1d, grids(1)%v1d, 'inf') + rresid(0) = resid(0) / ( norma*MAXVAL(ABS(grids(1)%v1d)) + normb ) + WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', & + & 'rel. resid', 'ratio' + WRITE(*,'(i4,3(1pe12.3,8x))') 0, resid(0), errdisc(0), rresid(0) +! +! Iterations +! + t0 = mpi_wtime() + DO its=1,nits + CALL mg(grids, info, 1) + errdisc(its) = MAXVAL(ABS(grids(1)%v1d-sol_ana)) + resid(its) = residue(grids(1), grids(1)%f1d, grids(1)%v1d, 'inf') + rresid(its) = resid(its) / ( norma*MAXVAL(ABS(grids(1)%v1d)) + normb ) + WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') its, & + & resid(its), resid(its)/resid(its-1), & + & errdisc(its), errdisc(its)/errdisc(its-1), & + & rresid(its), rresid(its)/rresid(its-1) + IF(resid(its) .LE. atol .or. rresid(its) .le. rtol) EXIT + END DO + niter(inu) = MIN(nits,its) + titer(inu) = mpi_wtime() - t0 + END DO +!-------------------------------------------------------------------------------- +! 9.0 Epilogue +! +! Display timing +! + WRITE(*,'(/a)') 'Timing ...' + WRITE(*,'(a,1pe12.3,i5)') 'Setup time (s) ', tsetup + WRITE(*,'(a,2(1pe12.3))') 'Matrix construction time(s)', tmat + WRITE(*,'(a,2(1pe12.3))') 'Direct and bsolve time (s) ', tdirect, tbsolve + WRITE(*,'(/3a6,a15)') 'nu1', 'nu2', 'niter', 'Iter time(s)' + DO inu=1,nnu + WRITE(*,'(3i6,3x,1pe12.3)') nu1(inu), nu2(inu), niter(inu), titer(inu) + END DO +! + WRITE(*,'(/a,f12.3)') 'Mem used so far (MB)', mem() +! +! Creata HDF5 file +! + IF(me.EQ.0) CALL h5file +! +! Clean up +! + DEALLOCATE(x) + DEALLOCATE(y) + DEALLOCATE(dense) + DEALLOCATE(grids) + DEALLOCATE(sol_ana2d) + IF(nldirect) DEALLOCATE(sol_direct2d) + DEALLOCATE(errdisc) + DEALLOCATE(resid) + DEALLOCATE(rresid) +! + CALL MPI_FINALIZE(ierr) +!-------------------------------------------------------------------------------- +CONTAINS +!+++ + FUNCTION fdense(x) +! +! Return density +! + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: fdense(SIZE(x)) + fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 ); + END FUNCTION fdense +!+++ + FUNCTION frhs(x,y) +! +! Return RHS +! + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: frhs(SIZE(x),SIZE(y)) + DOUBLE PRECISION :: c, s, d(SIZE(x)) + DOUBLE PRECISION :: corr + INTEGER :: j + corr = 1.d0+icrosst**2/4.0d0 + d(:) = fdense(x(:)) + IF(prb.EQ.'dddd') THEN + DO j=1,SIZE(y) + c = COS(2.0d0*pi*ky*y(j)/Ly) + s = SIN(2.0d0*pi*ky*y(j)/Ly) + frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s & + & -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) & + & + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s + END DO + ELSE IF (prb.EQ.'nndd') THEN + DO j=1,SIZE(y) + c = COS(2.0d0*pi*ky*y(j)/Ly) + s = SIN(2.0d0*pi*ky*y(j)/Ly) + frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s & + & +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) & + & + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s + END DO + END IF +!!$ frhs = -frhs + END FUNCTION frhs +!+++ + FUNCTION fsol(x,y) +! +! Return analytical solution +! + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: fsol(SIZE(x),SIZE(y)) + DOUBLE PRECISION :: c + INTEGER :: j + IF(prb.EQ.'dddd') THEN + DO j=1,SIZE(y) + c = SIN(2.0d0*pi*ky*y(j)/Ly) + fsol(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c + END DO + ELSE IF (prb.EQ.'nndd') THEN + DO j=1,SIZE(y) + c = SIN(2.0d0*pi*ky*y(j)/Ly) + fsol(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c + END DO + END IF + END FUNCTION fsol +!+++ + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='poisson_mg.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'KX', kx) + CALL attach(fid, '/', 'KY', ky) + CALL attach(fid, '/', 'LX', Lx) + CALL attach(fid, '/', 'LY', Ly) + CALL attach(fid, '/', 'BETA', beta) + CALL attach(fid, '/', 'OMEGA', omega) + CALL attach(fid, '/', 'RELAX', relax) + CALL attach(fid, '/', 'MAT_TYPE', mat_type) + CALL attach(fid, '/', 'NITS', nits) + CALL attach(fid, '/', 'LEVELS', levels) + CALL attach(fid, '/', 'NNU', nnu) + CALL attach(fid, '/', 'NU0', nu0) + CALL attach(fid, '/', 'MU', mu) +! + CALL putarr(fid, '/nu1', nu1(1:nnu)) + CALL putarr(fid, '/nu2', nu2(1:nnu)) + CALL putarr(fid, '/dense', dense) + CALL creatg(fid, '/mglevels') + DO l=1,levels + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL creatg(fid, TRIM(dsname)) + IF(mat_type.EQ.'csr') THEN + CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) + ELSE + CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata_cds) + END IF + IF(l.GT.1) THEN + CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) + CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) + END IF + CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) + CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) + CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) + CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) + CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) + CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) + END DO +! +! Solutions at finest grid +! + CALL creatg(fid, '/solutions') + CALL putarr(fid, '/solutions/xg', grids(1)%x) + CALL putarr(fid, '/solutions/yg', grids(1)%y) + CALL putarr(fid, '/solutions/calc', grids(1)%v) + CALL putarr(fid, '/solutions/anal', sol_ana2d) + IF(nldirect) CALL putarr(fid, '/solutions/direct', sol_direct2d) +! + nits=niter(nnu) + CALL creatg(fid, '/Iterations') + CALL putarr(fid, '/Iterations/residues', resid(0:nits)) + CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits)) +! + CALL closef(fid) + END SUBROUTINE h5file +!+++ + +END PROGRAM main diff --git a/multigrid/src/ppoisson_fd.f90 b/multigrid/src/ppoisson_fd.f90 new file mode 100644 index 0000000..0fbf675 --- /dev/null +++ b/multigrid/src/ppoisson_fd.f90 @@ -0,0 +1,418 @@ +!> +!> @file ppoisson_fd.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Test 2D parallel multigrid V-cycle +! +MODULE mod + USE iso_fortran_env, ONLY : rkind => real64 + IMPLICIT NONE +! + REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0) +CONTAINS +END MODULE mod +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +PROGRAM main + USE mpi + USE fdmat_mod, ONLY : fdmat, ibc_fdmat, ibc_rhs + USE pputils2, ONLY : dist1d, timera, hostlist + USE gvector, ONLY : gvector_2d, norm2, ASSIGNMENT(=), OPERATOR(-) + USE parmg, ONLY : grid2_type, mg_info, create_grid, mg, exchange, & + & get_resids, disp, norm_vec, norm_mat + USE stencil, ONLY : stencil_2d, putmat + USE mod + IMPLICIT NONE +! + INTEGER, PARAMETER :: ndims=2 +! + INTEGER :: me, neighs(4), npes, ierr + INTEGER, DIMENSION(ndims) :: coords, comm1d + LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] + LOGICAL :: reorder =.FALSE. + INTEGER :: comm_cart + INTEGER, DIMENSION(ndims) :: e0, s0, e, s, npt_glob, npt_loc +! + REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:) + INTEGER, ALLOCATABLE :: id(:,:) + REAL(rkind) :: dx, dy + INTEGER :: npoints ! Number of points in FD stencil + REAL(rkind) :: t_mg, t_mg0, t_mg_min, t_mg_max +! + TYPE(gvector_2d) :: v_exact, resids, errs + REAL(rkind) :: norma, normb, normv + REAL(rkind), ALLOCATABLE :: resid_it(:), err_it(:), rresid(:) + REAL(rkind) :: ratio_err, ratio_resid, ratio_rresid + INTEGER, DIMENSION(ndims) :: g, npt_loc_min + INTEGER :: l, i, it + CHARACTER(len=64) :: str +! + TYPE(grid2_type), ALLOCATABLE :: grids(:) + TYPE(mg_info) :: info ! info for MG +! +! Input quantities +! + LOGICAL :: nldebug=.FALSE. + CHARACTER(len=64) :: filein = 'ppoisson_fd.in' + INTEGER :: dims(2)=[0,0] + CHARACTER(len=4) :: prb='dddd' + CHARACTER(len=4) :: relax='jac' + INTEGER :: nx=4, ny=4 ! Number of intervals + INTEGER :: kx=1, ky=1 + REAL(rkind) :: Lx=1.0, Ly=1.0 + REAL(rkind) :: icrosst=1.0, beta=0.0, miome=200.0 + REAL(rkind) :: omega=1.0d0 + INTEGER :: nits=100, direct_solve_nits=5 + INTEGER :: levels=2, nu1=3, nu2=3, mu=1, nu0=1 + REAL(rkind) :: rtol=1.e-8, atol=1.e-8, errtol=1.e-3 +! + NAMELIST /in/ nldebug, dims, prb, nx, ny, kx, ky, Lx, Ly, icrosst, beta, & + & miome, omega, nits, levels, relax, nu1, nu2, mu, nu0, & + & direct_solve_nits, rtol, atol, errtol +!================================================================================ +! 1.0 Prologue +! +! 2D process grid +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) +! +! Read input filename from commmand line argument +! + IF( command_argument_count() > 0 ) THEN + CALL get_command_argument(1, filein) + END IF + IF(me.EQ.0) WRITE(*,'(a,a)') 'filein = ', TRIM(filein) +! +! Read problem inputs +! + OPEN(unit=99, file=filein, form='formatted') + READ(99,in) + CLOSE(99) +! + CALL mpi_dims_create(npes, ndims, dims, ierr) + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& + & ierr) + CALL mpi_comm_rank(comm_cart, me, ierr) + CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) + CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr) + CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr) +! + CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) + CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) +! + info%comm = comm_cart + info%nu1 = nu1 + info%nu2 = nu2 + info%mu = mu + info%nu0 = nu0 + info%levels = levels + info%direct_solve_nits = direct_solve_nits + info%relax = relax + info%omega = omega +! + IF(me.EQ.0) THEN + WRITE(*, in) + END IF + IF(nldebug) THEN + CALL hostlist(comm_cart) + END IF +!================================================================================ +! 2.0 2d Grid construction +! +! Partition 2D grid +! + CALL timera(0, 'Grid_construction') + npt_glob(1) = nx+1 + npt_glob(2) = ny+1 + DO i=1,ndims + CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) + e(i) = s(i) + npt_loc(i) - 1 + END DO + IF(nldebug) THEN + WRITE(*,'(a,i3.3,a,2(3i8,", "))') 'PE', me, ' coords, s,e:', & + & (coords(i),s(i),e(i),i=1,ndims) + END IF +! +! Global mesh +! + dx = Lx/REAL(nx) + dy = Ly/REAL(ny) + ALLOCATE(xgrid(0:nx)) + ALLOCATE(ygrid(0:ny)) + xgrid = [ (i*dx, i=0,nx) ] + ygrid = [ (i*dy, i=0,ny) ] +! +! Create grid structure +! + ALLOCATE(grids(levels)) + npoints = 9 ! Size of FD stencil + ALLOCATE(id(npoints,2)) + id=RESHAPE([ 0, -1, 0, 1, -1, 1, -1, 0, 1, & + 0, -1, -1, -1, 0, 0, 1, 1, 1], & + [npoints,2]) + CALL create_grid(xgrid, ygrid, s, e, id, prb, grids, comm_cart) +! + IF(nldebug) THEN + DO l=1,levels + WRITE(str,'(a,i0)') 'Number of local points at level ', l + CALL disp(TRIM(str), grids(l)%npt_loc, comm_cart) + END DO + END IF + CALL mpi_reduce(grids(levels)%npt_loc, npt_loc_min, 2, MPI_INTEGER, MPI_MIN, & + & 0, comm_cart, ierr) + IF(me.EQ.0) THEN + WRITE(*,'(a,2i4)') 'Minimum local npt at coarsest grid:', npt_loc_min + END IF +! + CALL timera(1, 'Grid_construction') +!================================================================================ +! 3.0 FD Operator +! + CALL timera(0, 'FD Operator') +! + DO l=1,levels + CALL fdmat(grids(l), fdense, icrosst, grids(l)%fdmat) + CALL ibc_fdmat(grids(l)%fdmat, prb) + END DO +! + CALL timera(1, 'FD Operator') +!================================================================================ +! 4.0 RHS and exact solution at the finest grid (l=1) +! +! Allocate memory +! + CALL timera(0, 'RHS and exact sol') +! + s0 = grids(1)%s0; e0 = grids(1)%e0 + s = grids(1)%s; e = grids(1)%e + g = [1,1] + v_exact = gvector_2d(s, e, g) ! Exact solutions + errs = gvector_2d(s, e, g) ! Disc. errors + resids = gvector_2d(s, e, g) ! Residuals + ALLOCATE(resid_it(0:nits)) + ALLOCATE(rresid(0:nits)) + ALLOCATE(err_it(0:nits)) +! +! Set RHS at the finest grid and impose Dirichlet/Neuman BC. +! + grids(1)%f = frhs(xgrid(s(1):e(1)),ygrid(s(2):e(2))) + CALL ibc_rhs(grids(1)%f, s0, e0, prb) +! +! Exact solutions +! + v_exact = fexact(xgrid(s(1):e(1)),ygrid(s(2):e(2))) +! + CALL timera(1, 'RHS and exact sol') +!================================================================================ +! 5.0 MG V-cycle iteration loop +! +! Norm of A and b +! + norma = norm_mat(grids(1)%fdmat, comm_cart) + normb = norm_vec(grids(1)%f, comm_cart) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe12.3))') 'Norm A and RHS', norma, normb + END IF +! + grids(1)%v = 0.0d0 + resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f) + errs = grids(1)%v - v_exact + err_it(0) = norm_vec(errs, comm_cart, root=0) + resid_it(0) = norm_vec(resids, comm_cart) + normv = norm_vec(grids(1)%v, comm_cart) + rresid(0) = resid_it(0) / ( norma*normv + normb ) +! + IF(me.EQ.0) THEN + WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', & + & 'rel. resid', 'ratio' + WRITE(*,'(i4,3(1pe12.3,8X))') 0, resid_it(0), err_it(0), rresid(0) + END IF +! + CALL timera(0, 'MG V-cycle loop') + t_mg = 0.0d0 + DO it=1,nits + t_mg0 = mpi_wtime() + CALL mg(grids, info, 1) + t_mg = t_mg + (mpi_wtime()-t_mg0) + resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f) + errs = grids(1)%v - v_exact + err_it(it) = norm_vec(errs, comm_cart) + resid_it(it) = norm_vec(resids, comm_cart) + normv = norm_vec(grids(1)%v, comm_cart) + rresid(it) = resid_it(it) / ( norma*normv + normb ) + ratio_err = err_it(it)/err_it(it-1) + ratio_resid = resid_it(it)/resid_it(it-1) + ratio_rresid= rresid(it)/ rresid(it-1) + IF(me.EQ.0) THEN + WRITE(*,'(i4,3(1pe12.3,0pf8.2))') it, & + & resid_it(it), ratio_resid,& + & err_it(it), ratio_err, & + & rresid(it), ratio_rresid + END IF + IF(resid_it(it) .LE. atol .OR. rresid(it) .LE. rtol .OR. & + & ABS(ratio_err-1._rkind).LT.errtol) THEN + nits = it + EXIT + END IF + END DO +! + CALL timera(1, 'MG V-cycle loop') + CALL mpi_reduce(t_mg, t_mg_max, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm_cart, ierr) + CALL mpi_reduce(t_mg, t_mg_min, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm_cart, ierr) + IF(me.EQ.0) THEN + WRITE(*,'(a,2(1pe10.3)/)') 'Minmax of MG (only) time (s):', t_mg_min, t_mg_max + END IF +!================================================================================ +! 9.0 Epilogue +! + IF(nldebug) THEN + CALL h5file + END IF +! + CALL timera(9, '') + CALL MPI_FINALIZE(ierr) +CONTAINS +! +!+++ + FUNCTION fdense(x) +! +! Return density +! + REAL(rkind), INTENT(in) :: x(:) + REAL(rkind) :: fdense(SIZE(x)) + fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 ); + END FUNCTION fdense +!+++ + FUNCTION fexact(x,y) +! +! Return analytical solution +! + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: fexact(SIZE(x),SIZE(y)) + REAL(rkind) :: c + INTEGER :: j + IF(prb.EQ.'dddd') THEN + DO j=1,SIZE(y) + c = SIN(2.0d0*pi*ky*y(j)/Ly) + fexact(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c + END DO + ELSE IF (prb.EQ.'nndd') THEN + DO j=1,SIZE(y) + c = SIN(2.0d0*pi*ky*y(j)/Ly) + fexact(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c + END DO + END IF + END FUNCTION fexact +!+++ + FUNCTION frhs(x,y) +! +! Return RHS +! + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: frhs(SIZE(x),SIZE(y)) + REAL(rkind) :: c, s, d(SIZE(x)) + REAL(rkind) :: corr + INTEGER :: j + corr = 1.d0+icrosst**2/4.0d0 + d(:) = fdense(x(:)) + IF(prb.EQ.'dddd') THEN + DO j=1,SIZE(y) + c = COS(2.0d0*pi*ky*y(j)/Ly) + s = SIN(2.0d0*pi*ky*y(j)/Ly) + frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s & + & -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) & + & + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s + END DO + ELSE IF (prb.EQ.'nndd') THEN + DO j=1,SIZE(y) + c = COS(2.0d0*pi*ky*y(j)/Ly) + s = SIN(2.0d0*pi*ky*y(j)/Ly) + frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s & + & +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) & + & + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s + END DO + END IF + END FUNCTION frhs +!+++! + FUNCTION outerprod(x, y) RESULT(r) +! +! outer product +! + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: r(SIZE(x),SIZE(y)) + INTEGER :: i, j + DO j=1,SIZE(y) + DO i=1,SIZE(x) + r(i,j) = x(i)*y(j) + END DO + END DO + END FUNCTION outerprod +!+++ + SUBROUTINE h5file +! +! Result hdf5 file +! + USE futils + CHARACTER(len=128) :: file='ppoisson_fd.h5' + INTEGER :: fid + CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart) + CALL attach(fid, '/', 'PRB', prb) + CALL attach(fid, '/', 'RELAX', relax) + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'KX', kx) + CALL attach(fid, '/', 'KY', ky) + CALL attach(fid, '/', 'LX', Lx) + CALL attach(fid, '/', 'LY', Ly) + CALL attach(fid, '/', 'ICROSST', icrosst) + CALL attach(fid, '/', 'BETA', beta) + CALL attach(fid, '/', 'MIOME', miome) + CALL attach(fid, '/', 'OMEGA', omega) + CALL attach(fid, '/', 'NITS', nits) + CALL attach(fid, '/', 'DIRECT_SOLVE_NITS', direct_solve_nits) + CALL attach(fid, '/', 'LEVELS', levels) + CALL attach(fid, '/', 'NU1', nu1) + CALL attach(fid, '/', 'NU2', nu2) + CALL attach(fid, '/', 'NU0', nu0) + CALL attach(fid, '/', 'MU', mu) +! + CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO + CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO +! + CALL putarrnd(fid, '/f', grids(1)%f%val, (/1,2/), garea=g) + CALL putarrnd(fid, '/v', v_exact%val, (/1,2/), garea=g) + CALL putarrnd(fid, '/u', grids(1)%v%val, (/1,2/), garea=g) + CALL putarrnd(fid, '/errs', errs%val, (/1,2/), garea=(/1,1/)) + CALL putarrnd(fid, '/resids', resids%val,(/1,2/), garea=(/1,1/)) +! + CALL putarr(fid, '/resid', resid_it(0:nits), ionode=0) + CALL putarr(fid, '/error', err_it(0:nits), ionode=0) +! + CALL putmat(fid, '/MAT', grids(1)%fdmat) + CALL closef(fid) + END SUBROUTINE h5file +!+++ +END PROGRAM main diff --git a/multigrid/src/stencil_mod.f90 b/multigrid/src/stencil_mod.f90 new file mode 100644 index 0000000..04095c5 --- /dev/null +++ b/multigrid/src/stencil_mod.f90 @@ -0,0 +1,243 @@ +!> +!> @file stencil_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE stencil +! +! stencil_2d: Implement 2D stencil for matrix-less operations +! +! T.M. Tran, CRPP-EPFL +! August 2013 +! + USE iso_fortran_env, ONLY : rkind => real64 + IMPLICIT NONE +! + PRIVATE + PUBLIC :: stencil_2d, init, vmx, putmat, laplacian, & + & OPERATOR(*) +! + TYPE stencil_2d + LOGICAL :: nluni + INTEGER, DIMENSION(2) :: ldim, gdim + INTEGER, DIMENSION(2) :: s0, e0, s, e + INTEGER :: npoints + INTEGER, ALLOCATABLE :: id(:,:) + REAL(rkind), ALLOCATABLE :: val(:,:,:) + END TYPE stencil_2d +! + INTERFACE init + MODULE PROCEDURE init_stencil_2d + END INTERFACE init + INTERFACE vmx + MODULE PROCEDURE vmx_stencila_2d + MODULE PROCEDURE vmx_stencilg_2d + END INTERFACE vmx + INTERFACE putmat + module procedure putmat_stencil + END INTERFACE putmat +! + INTERFACE OPERATOR(*) + MODULE PROCEDURE vmx_stencila_2d + MODULE PROCEDURE vmx_stencilg_2d + END INTERFACE OPERATOR(*) +! +CONTAINS +!================================================================================ + SUBROUTINE init_stencil_2d(s, e, id, nluni, mat, comm) +! +! stencil_2d constructor +! + USE mpi + INTEGER, INTENT(in) :: s(2), e(2) ! Bounds in each dim. + INTEGER, INTENT(in) :: id(:,:) ! Structure of stencil + LOGICAL, INTENT(in) :: nluni ! Uniform stencil + TYPE(stencil_2d), INTENT(out) :: mat + INTEGER, INTENT(in) :: comm + INTEGER :: me, ndim=2, ierr + INTEGER :: npoints ! Size of the stencil +! + CALL mpi_comm_rank(comm, me, ierr) +! + IF(id(1,1).NE.0 .AND. id(1,2).NE.0) THEN + IF(me.EQ.0) THEN + WRITE(*,*) 'INIT_STENCIL: id(1,:) should be (0,0)!' + CALL mpi_abort(comm, -1, ierr) + END IF + END IF +! + npoints = SIZE(id,1) + mat%npoints = npoints + mat%s = s + mat%e = e + mat%nluni = nluni + IF(nluni) THEN + ALLOCATE(mat%val(1,1, 0:npoints-1)) + ELSE + ALLOCATE(mat%val(s(1):e(1), s(2):e(2), 0:npoints-1)) + END IF + ALLOCATE(mat%id(0:npoints-1, ndim)) + mat%id(:,:) = id(:,:) + mat%val(:,:,:) = 0.0 +! + mat%ldim = e-s+1 + CALL mpi_allreduce(mat%s, mat%s0, ndim, MPI_INTEGER, MPI_MIN, comm, ierr) + CALL mpi_allreduce(mat%e, mat%e0, ndim, MPI_INTEGER, MPI_MAX, comm, ierr) + mat%gdim = mat%e0 - mat%s0 + 1 +! + END SUBROUTINE init_stencil_2d +!================================================================================ + FUNCTION vmx_stencila_2d(mat, xarr) RESULT(res) +! +! Return product res = mat*x, where x and res are simple arrays +! + TYPE(stencil_2d), INTENT(in) :: mat + REAL(rkind), ALLOCATABLE, INTENT(in) :: xarr(:,:) + REAL(rkind) :: res(LBOUND(xarr,1):UBOUND(xarr,1), & + & LBOUND(xarr,2):UBOUND(xarr,2)) + INTEGER :: k, i, j + INTEGER, DIMENSION(2) :: smin, emax, d, lb, ub +! + smin(:) = mat%s0(:) + emax(:) = mat%e0(:) + res = 0.0 + DO k=0,mat%npoints-1 + d(:) = mat%id(k,:) + lb = MAX(smin, smin-d, mat%s) + ub = MIN(emax, emax-d, mat%e) + DO j=lb(2),ub(2) + DO i=lb(1),ub(1) + res(i,j) = res(i,j) + mat%val(i,j,k)*xarr(i+d(1),j+d(2)) + END DO + END DO + END DO + END FUNCTION vmx_stencila_2d +!================================================================================ + FUNCTION vmx_stencilg_2d(mat, xarr) RESULT(res) +! +! Return product res= mat*x, where x and res are gvectors +! + USE gvector, ONLY : gvector_2d + TYPE(stencil_2d), INTENT(in) :: mat + TYPE(gvector_2d), INTENT(in) :: xarr + TYPE(gvector_2d) :: res + INTEGER :: k, i, j + INTEGER, DIMENSION(2) :: d, s, e +! + s = xarr%s + e = xarr%e + res = gvector_2d(xarr%s, xarr%e, xarr%g) +! +! Diagonal contributions: d(0) = (0,0) + DO j=s(2),e(2) + DO i=s(1),e(1) + res%val(i,j) = mat%val(i,j,0)*xarr%val(i,j) + END DO + END DO +! + DO k=1,mat%npoints-1 + d(:) = mat%id(k,:) + DO j=s(2),e(2) + DO i=s(1),e(1) + res%val(i,j) = res%val(i,j) + mat%val(i,j,k)*xarr%val(i+d(1),j+d(2)) + END DO + END DO + END DO + END FUNCTION vmx_stencilg_2d +!================================================================================ + SUBROUTINE putmat_stencil(fid, label, mat, str) + USE futils + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(stencil_2d), INTENT(in) :: mat + CHARACTER(len=*), INTENT(in), OPTIONAL :: str +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF +! + CALL putarr(fid, TRIM(label)//'/dists', mat%id, ionode=0) + CALL putarrnd(fid, TRIM(label)//'/val', mat%val, (/1,2/)) + END SUBROUTINE putmat_stencil +!======================================================================= + SUBROUTINE laplacian(dx, dy, mat) +! +! Construct a Laplacian using 5-point FD discretization +! Assume homegeneous Dirichlet BC on all 4 faces. +! + REAL(rkind), INTENT(in) :: dx, dy + TYPE(stencil_2d), INTENT(inout) :: mat +! + INTEGER :: i, j, k + INTEGER :: ieast, iwest, jsouth, jnorth + INTEGER, DIMENSION(2) :: d + REAL(rkind) :: dx2inv, dy2inv +! +! Assemble the stencil +! + dx2inv = 1.0d0/dx**2 + dy2inv = 1.0d0/dy**2 +! + mat%val(:,:,0) = -2.0d0*(dx2inv+dy2inv) ! Diagonal +! + DO k=1,mat%npoints-1 ! Off diagonal + d = mat%id(k,:) + DO j=mat%s(2),mat%e(2) + DO i=mat%s(1),mat%e(1) + IF(d(1).EQ.0) THEN ! north and south + mat%val(i,j,k) = dy2inv + ELSE IF(d(2).EQ.0) THEN ! east and west + mat%val(i,j,k) = dx2inv + END IF + END DO + END DO + END DO +! +! Impose Dirichlet BC on all 4 boundaries +! + ieast = mat%s0(1) + IF(ieast .EQ. mat%s(1)) THEN ! East boundary + mat%val(ieast, mat%s(2):mat%e(2), :) = 0.0 + mat%val(ieast, mat%s(2):mat%e(2), 0) = 1.0 + END IF + iwest = mat%e0(1) + IF(iwest .EQ. mat%e(1)) THEN ! West boundary + mat%val(iwest, mat%s(2):mat%e(2), :) = 0.0 + mat%val(iwest, mat%s(2):mat%e(2), 0) = 1.0 + END IF + jsouth = mat%s0(2) + IF(jsouth .EQ. mat%s(2)) THEN ! South boundary + mat%val(mat%s(1):mat%e(1), jsouth, :) = 0.0 + mat%val(mat%s(1):mat%e(1), jsouth, 0) = 1.0 + END IF + jnorth = mat%e0(2) + IF(jnorth .EQ. mat%e(2)) THEN ! North boundary + mat%val(mat%s(1):mat%e(1), jnorth, :) = 0.0 + mat%val(mat%s(1):mat%e(1), jnorth, 0) = 1.0 + END IF +! + END SUBROUTINE laplacian +!================================================================================ +END MODULE stencil diff --git a/multigrid/src/test_csr.f90 b/multigrid/src/test_csr.f90 new file mode 100644 index 0000000..02172b2 --- /dev/null +++ b/multigrid/src/test_csr.f90 @@ -0,0 +1,145 @@ +!> +!> @file test_csr.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test routines of module csr_mod +! + USE multigrid + USE csr + IMPLICIT NONE +! + INTEGER :: nx=8, nidbas=1, alpha=0, modem=10 + DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0 + LOGICAL :: nlper=.FALSE. + INTEGER :: ngauss, nterms + INTEGER :: i, j +! + TYPE(grid1d) :: gridx(1) + TYPE(csr_mat) :: mata +! + DOUBLE PRECISION, ALLOCATABLE :: arow(:), sum_row(:) + DOUBLE PRECISION, ALLOCATABLE :: acol(:), sum_col(:) + DOUBLE PRECISION, ALLOCATABLE :: sol(:), rhs(:) +! + NAMELIST /newrun/ nx, nidbas, sigma, kmode, modem, alpha, nlper +!---------------------------------------------------------------------------- + READ(*,newrun) + WRITE(*,newrun) +! +! Set grid +! + ngauss = CEILING(REAL(2*nidbas+alpha+1,8)/2.d0) + CALL create_grid(nx, nidbas, ngauss, alpha, gridx, nlper) +! +! Create FE matrice and set BC u(0)=u(1)=0 +! + nterms = 3 + CALL femat(gridx(1)%spl, mata, coefeq, nterms) + CALL to_mat(mata) + WRITE(*,'(/a,2i6)') 'rank, nnz', mata%rank, mata%nnz + WRITE(*,'(a/(12(1pe12.3)))') 'diag', mata%val(mata%idiag) + ALLOCATE(arow(mata%rank)) + ALLOCATE(acol(mata%rank)) + ALLOCATE(sum_row(mata%rank)) + ALLOCATE(sum_col(mata%rank)) + sum_col = 0.0d0 + DO i=1,mata%rank + CALL getrow(mata, i, arow) + sum_row(i) = SUM(arow) + sum_col = sum_col+arow + IF(i.EQ.1) WRITE(*,'(/a)') 'Matrix A' + WRITE(*,'(12(1pe12.3))') arow + END DO + WRITE(*,'(a/(12(1pe12.3)))') 'sum of row', sum_row + WRITE(*,'(a/(12(1pe12.3)))') 'sum of col', sum_col + DO j=1,mata%rank + CALL getcol(mata, j, acol) + sum_col(j) = SUM(acol) + END DO + WRITE(*,'(a/(12(1pe12.3)))') 'sum of col', sum_col +! +! Clear and rebuild matrix +! + WRITE(*,'(/a)') 'Clear and rebuild matrix ...' + CALL clear_mat(mata) + CALL femat(gridx(1)%spl, mata, coefeq, nterms) + WRITE(*,'(a,2i6)') 'rank, nnz', mata%rank, mata%nnz + DO i=1,mata%rank + CALL getrow(mata, i, arow) + WRITE(*,'(12(1pe12.3))') arow + END DO + WRITE(*,'(a/(12(1pe12.3)))') 'diag', mata%val(mata%idiag) +! +! Test VMX +! + ALLOCATE(sol(mata%rank)) + ALLOCATE(rhs(mata%rank)) + sol = 1.0d0 +! + rhs = vmx(mata, sol) + acol = rhs-sum_row + WRITE(*,'(/a)') 'Test VMX ...' + WRITE(*,'(a/(12(1pe12.3)))') 'amat*sol', rhs + WRITE(*,'(a,1pe12.3)') 'Error norm =', SQRT(DOT_PRODUCT(acol,acol)) +! + rhs = vmx(mata, sol, 'T') + acol = rhs-sum_col + WRITE(*,'(a/(12(1pe12.3)))') "amat'*sol", rhs + WRITE(*,'(a,1pe12.3)') 'Error norm =', SQRT(DOT_PRODUCT(acol,acol)) +! + CALL destroy(mata) +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + SELECT CASE (alpha) + CASE(0) ! Cartesian geometry + c(1) = 1.0d0 + idt(1) = 1 + idw(1) = 1 + c(2) = sigma + idt(2) = 0 + idw(2) = 0 + c(3) = 1.0d0 + idt(3) = 1 + idw(3) = 0 + CASE(1) + c(1) = x + idt(1) = 1 + idw(1) = 1 + c(2) = modem**2/x + idt(2) = 0 + idw(2) = 0 + c(3) = 1.0d0 + idt(3) = 1 + idw(3) = 0 + CASE default + WRITE(*,'(a,i0,a)') 'COEFEQ: alpha ', alpha, ' not defined!' + END SELECT + END SUBROUTINE coefeq +!---------------------------------------------------------------------------- +END PROGRAM main diff --git a/multigrid/src/test_gvec1d.f90 b/multigrid/src/test_gvec1d.f90 new file mode 100644 index 0000000..1dd22f5 --- /dev/null +++ b/multigrid/src/test_gvec1d.f90 @@ -0,0 +1,190 @@ +!> +!> @file test_gvec1d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Test implementation of 1D vectors with arbirary +! vector bounds and ghost cell width. +! +! T.M. Tran (09/2013) +! +MODULE gvector + USE iso_fortran_env, ONLY : rkind => real64 + IMPLICIT NONE + PRIVATE + PUBLIC :: rkind, gvector_1d, disp, norm2, & + & OPERATOR(+), OPERATOR(-), OPERATOR(*), & + & ASSIGNMENT(=) + + TYPE gvector_1d + INTEGER :: s, e, g + REAL(rkind), ALLOCATABLE :: val(:) + END TYPE gvector_1d + + INTERFACE gvector_1d + MODULE PROCEDURE constructor + END INTERFACE gvector_1d + INTERFACE OPERATOR(+) + MODULE PROCEDURE add_scal + MODULE PROCEDURE add_vec + END INTERFACE OPERATOR(+) + INTERFACE OPERATOR(-) + MODULE PROCEDURE minus_vec + MODULE PROCEDURE substract_vec + END INTERFACE OPERATOR(-) + INTERFACE OPERATOR(*) + MODULE PROCEDURE scale_left + MODULE PROCEDURE scale_right + END INTERFACE OPERATOR(*) + INTERFACE ASSIGNMENT(=) + MODULE PROCEDURE from_scal + MODULE PROCEDURE from_vec + END INTERFACE ASSIGNMENT(=) + INTERFACE norm2 + module procedure norm2_gvector_1d + END INTERFACE norm2 + +CONTAINS + FUNCTION constructor(s, e, g) RESULT(res) + INTEGER, INTENT(in) :: s, e + INTEGER, OPTIONAL, INTENT(in) :: g + TYPE(gvector_1d) :: res + INTEGER :: lb, ub + res%g= 0 + IF(PRESENT(g)) res%g=g + res%s=s + res%e=e + lb = res%s-res%g + ub = res%e+res%g + ALLOCATE(res%val(lb:ub)) + res%val = -9999.0 + END FUNCTION constructor + + FUNCTION add_vec(lhs, rhs) RESULT(res) + TYPE(gvector_1d), INTENT(in) :: lhs, rhs + TYPE(gvector_1d) :: res + res = gvector_1d(lhs%s, lhs%e, lhs%g) + res%val(res%s:res%e) = lhs%val(res%s:res%e) + rhs%val(res%s:res%e) + END FUNCTION add_vec + + FUNCTION add_scal(lhs, rhs) RESULT(res) + TYPE(gvector_1d), INTENT(in) :: lhs + REAL(rkind), INTENT(in) :: rhs + TYPE(gvector_1d) :: res + res = gvector_1d(lhs%s, lhs%e, lhs%g) + res%val(res%s:res%e) = lhs%val(res%s:res%e) + rhs + END FUNCTION add_scal + + FUNCTION minus_vec(this) RESULT(res) + TYPE(gvector_1d), INTENT(in) :: this + TYPE(gvector_1d) :: res + res = gvector_1d(this%s, this%e, this%g) + res%val(res%s:res%e) = -this%val(res%s:res%e) + END FUNCTION minus_vec + + FUNCTION substract_vec(lhs, rhs) RESULT(res) + TYPE(gvector_1d), INTENT(in) :: lhs, rhs + TYPE(gvector_1d) :: res + res = gvector_1d(lhs%s, lhs%e, lhs%g) + res = lhs + (-rhs) + END FUNCTION substract_vec + + FUNCTION scale_left(lhs, rhs) RESULT(res) + REAL(rkind), INTENT(in) :: lhs + TYPE(gvector_1d), INTENT(in) :: rhs + TYPE(gvector_1d) :: res + res = gvector_1d(rhs%s, rhs%e, rhs%g) + res%val(res%s:res%e) = lhs * rhs%val(res%s:res%e) + END FUNCTION scale_left + + FUNCTION scale_right(lhs, rhs) RESULT(res) + TYPE(gvector_1d), INTENT(in) :: lhs + REAL(rkind), INTENT(in) :: rhs + TYPE(gvector_1d) :: res + res = gvector_1d(lhs%s, lhs%e, lhs%g) + res%val(res%s:res%e) = rhs * lhs%val(res%s:res%e) + END FUNCTION scale_right + + SUBROUTINE from_vec(lhs, rhs) + TYPE(gvector_1d), INTENT(inout) :: lhs + REAL(rkind), INTENT(in) :: rhs(:) + INTEGER :: n + n = lhs%e - lhs%s + 1 + IF(SIZE(rhs) .NE. n) THEN + PRINT*, 'from_vec: sizes of rhs and lhs not equal!' + STOP + END IF + lhs%val(lhs%s:lhs%e) = rhs(1:n) + END SUBROUTINE from_vec + + SUBROUTINE from_scal(lhs, rhs) + TYPE(gvector_1d), INTENT(inout) :: lhs + REAL(rkind), INTENT(in) :: rhs + lhs%val(lhs%s:lhs%e) = rhs + END SUBROUTINE from_scal + + SUBROUTINE disp(str,this) + CHARACTER(len=*), INTENT(in) :: str + TYPE(gvector_1d), INTENT(in) :: this + WRITE(*,'(/a,3i6)') str//': s, e, g =', this%s, this%e, this%g + WRITE(*,'(10(1pe12.3))') this%val + END SUBROUTINE disp + + FUNCTION norm2_gvector_1d(this) RESULT(res) + TYPE(gvector_1d), INTENT(in) :: this + REAL(rkind) :: res + res = NORM2(this%val(this%s:this%e)) + END FUNCTION norm2_gvector_1d +END MODULE gvector + +PROGRAM main + USE gvector + IMPLICIT NONE + INTEGER :: s=0, e=5, g=1 + INTEGER :: i, lb, ub + REAL(rkind) :: a=0.1 + TYPE(gvector_1d) :: v1, v2, v3 +! + lb = s-g + ub = e+g + v1 = gvector_1d(s, e, g) + v1%val(s:e) = [ (i, i=s,e) ] + CALL disp('v1', v1) +! + v2 = v1 + a*v1 + CALL disp('v1+a*v1', v2) +! + v3 = v1 - v1*a + CALL disp('v1-v1*a', v3) +! + WRITE(*,'(a,1pe12.3)') 'norm of v1 =', NORM2(v1) + WRITE(*,'(a,1pe12.3)') 'norm of v1-a*v1 =', NORM2(v1-a*v1) +! + v1 = 0.0d0 + CALL disp('Should be all zero', v1) + v2 = [ 1.d0, 2.d0, 3.d0, 4.d0, 5.d0, 6.d0 ] + CALL disp('Should be (1. 2. 3. 4. 5. 6.)', v2) + +END PROGRAM main + diff --git a/multigrid/src/test_intergrid0.f90 b/multigrid/src/test_intergrid0.f90 new file mode 100644 index 0000000..7a76d32 --- /dev/null +++ b/multigrid/src/test_intergrid0.f90 @@ -0,0 +1,230 @@ +!> +!> @file test_intergrid0.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! intergrid transfer using *serial* multigrid module: +! - restriction of rhs +! - prolongation of sol +! + USE multigrid, ONLY : grid2d, mg_info, & + & get_lmax, create_grid_fd, ibc_transf, & + & prolong, restrict + IMPLICIT NONE + DOUBLE PRECISION, PARAMETER :: pi=4.0d0*ATAN(1.0d0) + DOUBLE PRECISION :: Lx, Ly, kx, ky, icrosst, beta, miome + INTEGER :: nx, ny, levels + CHARACTER(len=4) :: prb + LOGICAL :: nldebug +! + DOUBLE PRECISION :: dx, dy + DOUBLE PRECISION, ALLOCATABLE :: x(:),y(:) +! + TYPE(mg_info) :: info ! info for MG + TYPE(grid2d), ALLOCATABLE :: grids(:) +! + INTEGER :: i, l +! + NAMELIST /parameters/ prb, nx, ny, levels, Lx, Ly, kx, ky, icrosst, beta, & + & miome, nldebug +!-------------------------------------------------------------------------------- +! +! Default inputs +! + nx=32 + ny=32 + levels = 2 + kx=1 + ky=1 + icrosst=1.0d0 + Lx = 1.0D0 + Ly = 1.0D0 + miome = 200d0 + beta = 0d0 + prb = 'dddd' + nldebug = .FALSE. +! + READ(*,parameters) + WRITE(*,parameters) +! +! Fine grid +! + dx = lx/REAL(nx,8) + dy = ly/REAL(ny,8) + ALLOCATE(x(0:nx), y(0:ny)) + x = dx * [(i,i=0,nx)] + y = dy * [(i,i=0,ny)] + WRITE(*,'(a/10(1pe12.3))') 'x =', x + WRITE(*,'(a/10(1pe12.3))') 'y =', y +! +! Create array of grids +! + levels = MIN(levels, get_lmax(nx), get_lmax(ny)) + WRITE(*,'(a,i4)') 'Number of levels', levels + ALLOCATE(grids(levels)) + info%nu1 = 1 + info%nu2 = 1 + info%mu = 1 + info%nu0 = 1 + info%levels = levels + info%relax = 'jac' + info%omega = 1 + CALL create_grid_fd(x, y, grids, info, mat_type='cds', debug=nldebug) +! +! Set BC on grid transfer matrices +! + IF(prb.EQ.'dddd') CALL ibc_transf(grids,1,3) ! Direction X + CALL ibc_transf(grids,2,3) ! Direction Y +! +! Define RHS at l=1, compute RHS at l=2,...,levels by "restriction". +! + grids(1)%f(:,:) = frhs(grids(1)%x,grids(1)%y) + DO l=2,levels + grids(l)%f = restrict(grids(l)%matp, grids(l-1)%f) + grids(l)%f = 0.25d0*grids(l)%f ! Scaling for FD + END DO +! +! Define SOL at l=levels, compute SOL at l=levels-1,..,1 by "prolongation" +! + grids(levels)%v(:,:) = fsol(grids(levels)%x,grids(levels)%y) + DO l=levels-1,1,-1 + grids(l)%v = prolong(grids(l+1)%matp, grids(l+1)%v) + END DO +! + IF(nldebug) THEN + DO l=1,levels + WRITE(*,'(a,i3)') '==== Level', l + WRITE(*,'(a)') 'f =' + DO i=0,grids(l)%n(1) + WRITE(*,'(10f8.3)') grids(l)%f(i,:) + END DO + WRITE(*,'(a)') 'v =' + DO i=0,grids(l)%n(1) + WRITE(*,'(10f8.3)') grids(l)%v(i,:) + END DO + END DO + END IF +! +! Epilogue +! + CALL h5file +!-------------------------------------------------------------------------------- +CONTAINS +!+++ + FUNCTION fdense(x) +! +! Return density +! + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: fdense(SIZE(x)) + fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 ); + END FUNCTION fdense +!+++ + FUNCTION frhs(x,y) +! +! Return RHS +! + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: frhs(SIZE(x),SIZE(y)) + DOUBLE PRECISION :: c, s, d(SIZE(x)) + DOUBLE PRECISION :: corr + INTEGER :: j + corr = 1.d0+icrosst**2/4.0d0 + d(:) = fdense(x(:)) + IF(prb.EQ.'dddd') THEN + DO j=1,SIZE(y) + c = COS(2.0d0*pi*ky*y(j)/Ly) + s = SIN(2.0d0*pi*ky*y(j)/Ly) + frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s & + & -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) & + & + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s + END DO + ELSE IF (prb.EQ.'nndd') THEN + DO j=1,SIZE(y) + c = COS(2.0d0*pi*ky*y(j)/Ly) + s = SIN(2.0d0*pi*ky*y(j)/Ly) + frhs(:,j) = -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s & + & +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) & + & + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s + END DO + END IF + END FUNCTION frhs +!+++ + FUNCTION fsol(x,y) +! +! Return analytical solution +! + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: fsol(SIZE(x),SIZE(y)) + DOUBLE PRECISION :: c + INTEGER :: j + IF(prb.EQ.'dddd') THEN + DO j=1,SIZE(y) + c = SIN(2.0d0*pi*ky*y(j)/Ly) + fsol(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c + END DO + ELSE IF (prb.EQ.'nndd') THEN + DO j=1,SIZE(y) + c = SIN(2.0d0*pi*ky*y(j)/Ly) + fsol(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c + END DO + END IF + END FUNCTION fsol +!+++ + SUBROUTINE h5file + USE futils + USE csr, ONLY : putmat + CHARACTER(len=128) :: file='test_intergrid0.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'KX', kx) + CALL attach(fid, '/', 'KY', ky) + CALL attach(fid, '/', 'LX', Lx) + CALL attach(fid, '/', 'LY', Ly) + CALL attach(fid, '/', 'BETA', beta) + CALL attach(fid, '/', 'LEVELS', levels) + CALL attach(fid, '/', 'PRB', prb) + CALL attach(fid, '/', 'NLDEBUG', nldebug) + CALL creatg(fid, '/mglevels') + DO l=1,levels + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL creatg(fid, TRIM(dsname)) + IF(l.GT.1) THEN + CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) + CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) + END IF + CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) + CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) + CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) + CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) + END DO + CALL closef(fid) + END SUBROUTINE h5file +!+++ +END PROGRAM main diff --git a/multigrid/src/test_intergrid1.f90 b/multigrid/src/test_intergrid1.f90 new file mode 100644 index 0000000..0d0a23b --- /dev/null +++ b/multigrid/src/test_intergrid1.f90 @@ -0,0 +1,240 @@ +!> +!> @file test_intergrid1.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test implementation of (parallel) matrix-free +! + USE iso_fortran_env, ONLY : rkind => real64 + USE parmg, ONLY : grid2_type, init_restrict, coarse, get_lmax, & + & exchange, prolong, restrict, disp, norm_vec + USE pputils2, ONLY : dist1d + USE gvector, ONLY : gvector_2d,OPERATOR(-) + USE futils + USE mpi + IMPLICIT NONE +! + INTEGER, PARAMETER :: ndims=2 + INTEGER :: ierr, me, npes + INTEGER, DIMENSION(ndims) :: dims=[0,0] + INTEGER, DIMENSION(ndims) :: lmax, coords, comm1d + LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] + LOGICAL :: reorder =.FALSE. + INTEGER :: comm_cart, comm_futils +! + INTEGER :: fin + CHARACTER(len=64) :: filein = 'test_intergrid0.h5' + CHARACTER(len=64) :: dsname + CHARACTER(len=4) :: prb + LOGICAL :: nldebug +! + INTEGER :: nx, ny, levels + TYPE(grid2_type), ALLOCATABLE :: grids(:), new_grids(:) + INTEGER, DIMENSION(ndims) :: e, s, npt_glob, npt_loc, npt_loc_min +! + CHARACTER(len=64) :: str + REAL(rkind) :: err + INTEGER :: i, k, l +!-------------------------------------------------------------------------------- +! 1.0 Prologue +! +! Init MPI and setup 2D grid topology +! + CALL mpi_init(ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) + CALL mpi_dims_create(npes, ndims, dims, ierr) + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& + & ierr) + CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) + CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) + CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) +! + IF( me .EQ. 0 ) WRITE(*,'(a,i3,i3)') '2d processor grid', dims +! +! Get nx, ny, levels from h5 file created by test_intergrid0 +! + IF( command_argument_count() > 0 ) THEN + CALL get_command_argument(1, filein) + END IF + IF(me.EQ.0) WRITE(*,'(a,a)') 'filein = ', TRIM(filein) +! + CALL mpi_comm_dup(comm_cart, comm_futils, ierr) + CALL openf(filein, fin, mpicomm=comm_futils) + CALL getatt(fin, '/', 'NX', nx, ierr) + CALL getatt(fin, '/', 'NY', ny, ierr) + CALL getatt(fin, '/', 'LEVELS', levels, ierr) + CALL getatt(fin, '/', 'PRB', prb, ierr) + CALL getatt(fin, '/', 'NLDEBUG', nldebug, ierr) + IF(me.EQ.0) WRITE(*,'(a,a,3i5,l3)') 'prb, nx, ny, levels: ', prb, nx, ny, & + & levels, nldebug +!-------------------------------------------------------------------------------- +! 2.0 Read (f,v) from h5 file +! + ALLOCATE(grids(levels)) +! +! Partition on finest grid +! + npt_glob(1) = nx+1 + npt_glob(2) = ny+1 + DO i=1,ndims + CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) + e(i) = s(i) + npt_loc(i) - 1 + lmax(i) = get_lmax(s(i), npt_loc(i), 1, comm1d(i)) + END DO + npt_loc = e-s+1 + IF(me.EQ.0) WRITE(*,'(a,2i4)') 'lmax', lmax +! +! Partition on coaser grids +! + DO l=1,levels + IF(l.GT.1) THEN + CALL coarse(s,e) + npt_loc = e-s+1 + CALL mpi_allreduce(npt_loc, npt_loc_min, 2, MPI_INTEGER, & + & MPI_MIN, comm_cart, ierr) + CALL mpi_allreduce(e, npt_glob, 2, MPI_INTEGER, MPI_MAX, & + & comm_cart, ierr) + npt_glob = npt_glob+1 + END IF + WRITE(str,'(a,i3,a)') 'Partition at level', l, ': start. index =' + CALL disp(TRIM(str), s, comm_cart) + IF(me.EQ.0) THEN + WRITE(*,'(a,2i6)') 'npt_glob ', npt_glob + WRITE(*,'(a,2i6)') 'npt_loc_min', npt_loc_min + END IF + grids(l)%s = s + grids(l)%e = e + grids(l)%npt = npt_glob + grids(l)%f = gvector_2d(s, e, [1,1]) + grids(l)%v = gvector_2d(s, e, [1,1]) + ALLOCATE(grids(l)%x(0:npt_glob(1)-1)) ! Global coords (x,y) + ALLOCATE(grids(l)%y(0:npt_glob(2)-1)) + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL getarr(fin, TRIM(dsname)//"/x", grids(l)%x) + CALL getarr(fin, TRIM(dsname)//"/y", grids(l)%y) + CALL getarrnd(fin, TRIM(dsname)//"/v", grids(l)%v%val, [1,2], garea=[1,1]) + CALL getarrnd(fin, TRIM(dsname)//"/f", grids(l)%f%val, [1,2], garea=[1,1]) + END DO +!-------------------------------------------------------------------------------- +! 3.0 Parallel intergrid transfer +! + ALLOCATE(new_grids(levels)) + CALL copy_grids(grids, new_grids) +! +! Set up restriction stencil +! + DO l=2,levels + CALL init_restrict(new_grids(l), prb, comm_cart) + END DO +! +! Prolongation of v +! + DO l=levels-1,1,-1 + CALL exchange(comm_cart, grids(l+1)%v) + CALL prolong(grids(l+1)%v, new_grids(l)%v) + IF(nldebug) THEN + IF(me.EQ.0) WRITE(*,'(a)') '=====' + DO k=0,npes + IF(me.EQ.k) THEN + s = grids(l+1)%f%s + e = grids(l+1)%f%e + WRITE(*,'(a,i2)') 'reference vbar on proc.', me + DO i=s(1),e(1) + WRITE(*,'(10f8.3)') grids(l+1)%v%val(i,s(2):e(2)) + END DO + s = grids(l)%f%s + e = grids(l)%f%e + WRITE(*,'(a,i2)') 'reference v on proc.', me + DO i=s(1),e(1) + WRITE(*,'(10f8.3)') grids(l)%v%val(i,s(2):e(2)) + END DO + WRITE(*,'(a,i2)') 'compute v on proc.', me + DO i=s(1),e(1) + WRITE(*,'(10f8.3)') new_grids(l)%v%val(i,s(2):e(2)) + END DO + END IF + CALL mpi_barrier(comm_cart, ierr) + END DO + END IF + err = norm_vec(new_grids(l)%v-grids(l)%v, comm_cart, 0) + IF(me.EQ.0) WRITE(*,'(a,i3,1pe12.3)') 'Error of prolongation: ', l, err + END DO +! +! Restriction of f +! + DO l=2,levels + CALL exchange(comm_cart, grids(l-1)%f) + CALL restrict(new_grids(l)%restrict_mat, grids(l-1)%f, new_grids(l)%f) + IF(nldebug) THEN + IF(me.EQ.0) WRITE(*,'(a)') '=====' + DO k=0,npes + IF(me.EQ.k) THEN + s = grids(l-1)%f%s + e = grids(l-1)%f%e + WRITE(*,'(a,i2)') 'reference f on proc.', me + DO i=s(1),e(1) + WRITE(*,'(10f8.3)') grids(l-1)%f%val(i,s(2):e(2)) + END DO + s = grids(l)%f%s + e = grids(l)%f%e + WRITE(*,'(a,i2)') 'reference fbar on proc.', me + DO i=s(1),e(1) + WRITE(*,'(10f8.3)') grids(l)%f%val(i,s(2):e(2)) + END DO + WRITE(*,'(a,i2)') 'compute fbar on proc.', me + DO i=s(1),e(1) + WRITE(*,'(10f8.3)') new_grids(l)%f%val(i,s(2):e(2)) + END DO + END IF + CALL mpi_barrier(comm_cart, ierr) + END DO + END IF + err = norm_vec(new_grids(l)%f-grids(l)%f, comm_cart, 0) + IF(me.EQ.0) WRITE(*,'(a,i3,1pe12.3)') 'Error of restriction: ', l, err + END DO +!-------------------------------------------------------------------------------- +! 9.0 Epilogue +! + CALL closef(fin) + CALL mpi_finalize(ierr) +! +CONTAINS + SUBROUTINE copy_grids(g1, g2) + TYPE(grid2_type), INTENT(in) :: g1(:) + TYPE(grid2_type), INTENT(out) :: g2(:) + INTEGER :: l + DO l=1,SIZE(g1) + g2(l)%s = g1(l)%s + g2(l)%e = g1(l)%e + g2(l)%npt_loc = g1(l)%npt_loc + g2(l)%npt = g1(l)%npt + ALLOCATE(g2(l)%x(0:g2(l)%npt(1)-1)); g2(l)%x = g1(l)%x + ALLOCATE(g2(l)%y(0:g2(l)%npt(2)-1)); g2(l)%y = g1(l)%y + g2(l)%v = gvector_2d(g1(l)%v%s, g1(l)%v%e, g1(l)%v%g); g2(l)%v%val = g1(l)%f%val + g2(l)%f = gvector_2d(g1(l)%f%s, g1(l)%f%e, g1(l)%f%g); g2(l)%f%val = g1(l)%f%val + END DO + END SUBROUTINE copy_grids +END PROGRAM main diff --git a/multigrid/src/test_jacobi.f90 b/multigrid/src/test_jacobi.f90 new file mode 100644 index 0000000..f914535 --- /dev/null +++ b/multigrid/src/test_jacobi.f90 @@ -0,0 +1,254 @@ +!> +!> @file test_jacobi.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Test 2D parallel Jacobi using STENCIL_2D matrix-free structure. +! +MODULE mod + USE iso_fortran_env, ONLY : rkind => real64 + IMPLICIT NONE +! + LOGICAL, PARAMETER :: nldebug=.FALSE. + REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0) +CONTAINS +END MODULE mod +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +PROGRAM main + USE mpi + USE pputils2, ONLY : dist1d, exchange, norm2_vec=>ppnorm2, timera, hostlist + USE parmg, ONLY : jacobi, get_resids + USE stencil, ONLY : stencil_2d, init, laplacian, putmat + USE mod + IMPLICIT NONE +! + INTEGER, PARAMETER :: ndims=2 +! + INTEGER :: me, neighs(4), npes, ierr + INTEGER, DIMENSION(ndims) :: dims=[0,0] + INTEGER, DIMENSION(ndims) :: coords, comm1d + LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] + LOGICAL :: reorder =.FALSE. + INTEGER :: comm_cart +! + INTEGER :: nx=4, ny=4 ! Number of intervals + INTEGER, DIMENSION(ndims) :: e, s, lb, ub, npt_glob, npt_loc +! + REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:) + REAL(rkind) :: dx, dy + INTEGER, DIMENSION(5,2) :: id ! 5-point stencil + INTEGER :: npoints + TYPE(stencil_2d) :: mat + INTEGER :: i +! + REAL(rkind), ALLOCATABLE :: f(:,:), v(:,:), u(:,:) + REAL(rkind), ALLOCATABLE :: resids(:,:), errs(:,:) + REAL(rkind), ALLOCATABLE :: resid_it(:), err_it(:) + REAL(rkind) :: omega=1.0d0, resid + INTEGER :: it, it_skip, nits=100 +! + NAMELIST /in/ nx, ny, omega, nits +!================================================================================ +! 1.0 Prologue +! +! 2D process grid + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_dims_create(npes, ndims, dims, ierr) + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& + & ierr) +! + CALL mpi_comm_rank(comm_cart, me, ierr) + CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) + CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr) + CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr) +! + CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) + CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) +! + CALL hostlist(comm_cart) + IF(me.EQ.0) THEN + WRITE(*,'(a,i0,a,i0/)') "Process grid: ", dims(1), " X ", dims(2) + END IF +! +! Read problem inputs + IF(me.EQ.0) THEN + READ(*,in) + WRITE(*,in) + END IF +! + CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr) + CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr) + CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, comm_cart, ierr) + CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr) +!================================================================================ +! 2.0 2d Grid construction +! +! Partition 2D grid + CALL timera(0, 'Grid_construction') + npt_glob(1) = nx+1 + npt_glob(2) = ny+1 + DO i=1,ndims + CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) + e(i) = s(i) + npt_loc(i) - 1 + END DO + WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', & + & (coords(i),s(i),e(i),i=1,ndims) +! +! Global mesh + dx = 1.0d0/REAL(nx) + dy = 1.0d0/REAL(ny) + ALLOCATE(xgrid(0:nx)) + ALLOCATE(ygrid(0:ny)) + xgrid = [ (i*dx, i=0,nx) ] + ygrid = [ (i*dy, i=0,ny) ] + CALL timera(1, 'Grid_construction') +!================================================================================ +! 3.0 FD Laplacian +! + CALL timera(0, 'Laplacian') + id=RESHAPE([ 0, -1, 0, 1, 0, & + 0, 0,-1, 0, 1], & + [5,2]) + npoints = 5 + CALL init(s, e, id, .FALSE., mat, comm_cart) +! + CALL laplacian(dx, dy, mat) + CALL timera(1, 'Laplacian') +!================================================================================ +! 4.0 Test parallel Jacobi with \nabla u(x,y) = f(x,y) +! +! Problem definition +! + s = mat%s + e = mat%e + lb = s-1 + ub = e+1 + ALLOCATE(f(lb(1):ub(1),lb(2):ub(2))) ! RHS + ALLOCATE(v(lb(1):ub(1),lb(2):ub(2))) ! Exact solutions + ALLOCATE(u(lb(1):ub(1),lb(2):ub(2))) ! Computed solutions + ALLOCATE(resids(lb(1):ub(1),lb(2):ub(2))) ! Residuals + ALLOCATE(errs(lb(1):ub(1),lb(2):ub(2))) ! Errors + ALLOCATE(resid_it(0:nits)) + ALLOCATE(err_it(0:nits)) +! + f(s(1):e(1),s(2):e(2)) = rhs(xgrid(s(1):e(1)),ygrid(s(2):e(2))) + v(s(1):e(1),s(2):e(2)) = exact(xgrid(s(1):e(1)),ygrid(s(2):e(2))) + CALL exchange(comm_cart, f) + CALL exchange(comm_cart, v) +! +! Residuals of exact solutions + resids = get_resids(mat,v,f) + resid = norm2_vec(resids, comm_cart) +! +! Jacobi iteration loop +! + IF(me.EQ.0) WRITE(*,'(/a6,t14,a,t34,a)') 'it', 'residual norm', 'discretization error' + u = 0.0d0 + CALL exchange(comm_cart, u) + resids = get_resids(mat,u,f) + errs = u-v + resid_it(0) = norm2_vec(resids, comm_cart) + err_it(0) = norm2_vec(errs, comm_cart) + it_skip = MAX(1,nits/10) +! + CALL timera(0, 'Jacobi') + DO it=1,nits + CALL jacobi(mat, omega, 1, u, f) + CALL exchange(comm_cart, u) + resids = get_resids(mat,u,f) + errs = u-v + resid_it(it) = norm2_vec(resids, comm_cart) + err_it(it) = norm2_vec(errs, comm_cart) + IF(me.EQ.0 .AND. MODULO(it,it_skip).EQ.0 ) THEN + WRITE(*,'(i6,4(1pe12.3))') it, resid_it(it), resid_it(it)/resid_it(it-1),& + & err_it(it), err_it(it)/err_it(it-1) + END IF + END DO + CALL timera(1, 'Jacobi') +!================================================================================ +! 9.0 Epilogue + CALL h5file +! + CALL timera(9, '') + CALL MPI_FINALIZE(ierr) +CONTAINS + SUBROUTINE disp(str, arr) + CHARACTER(len=*), INTENT(in) :: str + REAL(rkind), INTENT(in) :: arr(:,:) + INTEGER :: j + WRITE(*,'(/a)') str + DO j=1,SIZE(arr,2) + WRITE(*,'(10f8.3)') arr(:,j) + END DO + END SUBROUTINE disp +! + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_jacobi.h5' + INTEGER :: fid + CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart) + CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO + CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO +! + CALL putarrnd(fid, '/f', f, (/1,2/), garea=(/1,1/)) + CALL putarrnd(fid, '/v', v, (/1,2/), garea=(/1,1/)) + CALL putarrnd(fid, '/u', u, (/1,2/), garea=(/1,1/)) + CALL putarrnd(fid, '/errs', errs, (/1,2/), garea=(/1,1/)) + CALL putarrnd(fid, '/resids', resids,(/1,2/), garea=(/1,1/)) +! + CALL putarr(fid, '/resid', resid_it, ionode=0) + CALL putarr(fid, '/error', err_it, ionode=0) +! + CALL putmat(fid, '/MAT', mat) + CALL closef(fid) + END SUBROUTINE h5file +! + FUNCTION outerprod(x, y) RESULT(r) +! +! outer product +! + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: r(SIZE(x),SIZE(y)) + INTEGER :: i, j + DO j=1,SIZE(y) + DO i=1,SIZE(x) + r(i,j) = x(i)*y(j) + END DO + END DO + END FUNCTION outerprod +! + FUNCTION rhs(x,y) + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: rhs(SIZE(x),SIZE(y)) + rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y)) + END FUNCTION rhs +! + FUNCTION exact(x,y) + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: exact(SIZE(x),SIZE(y)) + exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y)) + END FUNCTION exact +END PROGRAM main diff --git a/multigrid/src/test_jacobig.f90 b/multigrid/src/test_jacobig.f90 new file mode 100644 index 0000000..1fdca83 --- /dev/null +++ b/multigrid/src/test_jacobig.f90 @@ -0,0 +1,331 @@ +!> +!> @file test_jacobig.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Test 2D parallel Jacobi using STENCIL_2D matrix-free structure. +! +MODULE mod + USE iso_fortran_env, ONLY : rkind => real64 + IMPLICIT NONE +! + LOGICAL, PARAMETER :: nldebug=.FALSE. + REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0) +CONTAINS +END MODULE mod +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +PROGRAM main + USE mpi + USE fdmat_mod, ONLY : fdmat, ibc_fdmat, ibc_rhs + USE pputils2, ONLY : dist1d, timera, hostlist + USE gvector, ONLY : gvector_2d, ASSIGNMENT(=), OPERATOR(-) + USE parmg, ONLY : grid2_type, create_grid, jacobi, exchange, get_resids, norm_vec + USE stencil, ONLY : stencil_2d, putmat + USE mod + IMPLICIT NONE +! + INTEGER, PARAMETER :: ndims=2 +! + INTEGER :: me, neighs(4), npes, ierr + INTEGER, DIMENSION(ndims) :: dims=[0,0] + INTEGER, DIMENSION(ndims) :: coords, comm1d + LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] + LOGICAL :: reorder =.FALSE. + INTEGER :: comm_cart + INTEGER, DIMENSION(ndims) :: e0, s0, e, s, npt_glob, npt_loc +! + REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:) + INTEGER, ALLOCATABLE :: id(:,:) + REAL(rkind) :: dx, dy + INTEGER :: npoints ! Number of points in FD stencil +! + TYPE(gvector_2d) :: v_exact, resids, errs + REAL(rkind), ALLOCATABLE :: resid_it(:), err_it(:) + INTEGER, DIMENSION(ndims) :: g + INTEGER :: i, it, it_skip +! + INTEGER :: levels=1 + TYPE(grid2_type), ALLOCATABLE :: grids(:) +! +! Input quantities +! + CHARACTER(len=4) :: prb='dddd' + INTEGER :: nx=4, ny=4 ! Number of intervals + INTEGER :: kx=1, ky=1 + REAL(rkind) :: Lx=1.0, Ly=1.0 + REAL(rkind) :: icrosst=1.0, beta=0.0, miome=200.0 + REAL(rkind) :: omega=1.0d0 + INTEGER :: nits=100, nu=1 +! + NAMELIST /in/ prb, nx, ny, kx, ky, Lx, Ly, icrosst, beta, & + & miome, omega, nits, nu +!================================================================================ +! 1.0 Prologue +! +! 2D process grid +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_dims_create(npes, ndims, dims, ierr) + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& + & ierr) +! + CALL mpi_comm_rank(comm_cart, me, ierr) + CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) + CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr) + CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr) +! + CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) + CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) +! + CALL hostlist(comm_cart) + IF(me.EQ.0) THEN + WRITE(*,'(a,i0,a,i0/)') "Process grid: ", dims(1), " X ", dims(2) + END IF +! +! Read problem inputs +! + IF(me.EQ.0) THEN + READ(*,in) + WRITE(*,in) + END IF + CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, comm_cart, ierr) + CALL mpi_bcast(kx, 1, MPI_INTEGER, 0, comm_cart, ierr) + call mpi_bcast(ky, 1, MPI_INTEGER, 0, comm_cart, ierr) + CALL mpi_bcast(icrosst, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr) + CALL mpi_bcast(Lx,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr) + CALL mpi_bcast(Ly,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr) + CALL mpi_bcast(beta,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr) + CALL mpi_bcast(miome, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr) + CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr) + CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr) + CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, comm_cart, ierr) + CALL mpi_bcast(nu, 1, MPI_INTEGER, 0, comm_cart, ierr) + CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr) +!================================================================================ +! 2.0 2d Grid construction +! +! Partition 2D grid +! + CALL timera(0, 'Grid_construction') + npt_glob(1) = nx+1 + npt_glob(2) = ny+1 + DO i=1,ndims + CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) + e(i) = s(i) + npt_loc(i) - 1 + END DO + WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', & + & (coords(i),s(i),e(i),i=1,ndims) +! +! Global mesh +! + dx = Lx/REAL(nx) + dy = Ly/REAL(ny) + ALLOCATE(xgrid(0:nx)) + ALLOCATE(ygrid(0:ny)) + xgrid = [ (i*dx, i=0,nx) ] + ygrid = [ (i*dy, i=0,ny) ] + CALL timera(1, 'Grid_construction') +! +! Create grid structure +! + ALLOCATE(grids(levels)) + npoints = 9 ! Size of FD stencil + ALLOCATE(id(npoints,2)) + id=RESHAPE([ 0, -1, 0, 1, -1, 1, -1, 0, 1, & + 0, -1, -1, -1, 0, 0, 1, 1, 1], & + [npoints,2]) + CALL create_grid(xgrid, ygrid, s, e, id, prb, grids, comm_cart) +!================================================================================ +! 3.0 FD Operator +! + CALL timera(0, 'Laplacian') +! + CALL fdmat(grids(1), fdense, icrosst, grids(1)%fdmat) + CALL ibc_fdmat(grids(1)%fdmat, prb) +! + CALL timera(1, 'Laplacian') +!================================================================================ +! 4.0 RHS and exact solution +! +! Allocate memory +! + s0 = grids(1)%s0; e0 = grids(1)%e0 + s = grids(1)%s; e = grids(1)%e + g = [1,1] + v_exact = gvector_2d(s, e, g) ! Exact solutions + errs = gvector_2d(s, e, g) ! Disc. errors + resids = gvector_2d(s, e, g) ! Residuals + ALLOCATE(resid_it(0:nits)) + ALLOCATE(err_it(0:nits)) +! +! Set RHS at the finest grid. Impose Dirichlet BC. +! + grids(1)%f = frhs(xgrid(s(1):e(1)),ygrid(s(2):e(2))) + CALL ibc_rhs(grids(1)%f, s0, e0, prb) +! +! Exact solutions +! + v_exact = fexact(xgrid(s(1):e(1)),ygrid(s(2):e(2))) +!================================================================================ +! 5.0 Jacobi iteration loop +! + IF(me.EQ.0) WRITE(*,'(/a6,t14,a,t34,a)') 'it', 'residual norm', 'discretization error' + grids(1)%v = 0.0d0 + resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f) + errs = grids(1)%v - v_exact + resid_it(0) = norm_vec(resids, comm_cart, root=0) + err_it(0) = norm_vec(errs, comm_cart, root=0) + it_skip = MAX(1,nits/10) +! + CALL timera(0, 'Jacobi') + DO it=1,nits + CALL jacobi(comm_cart, grids(1)%fdmat, omega, nu, grids(1)%v, grids(1)%f) + resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f) + errs = grids(1)%v - v_exact + resid_it(it) = norm_vec(resids, comm_cart, root=0) + err_it(it) = norm_vec(errs, comm_cart, root=0) + IF(me.EQ.0 .AND. MODULO(it,it_skip).EQ.0 ) THEN + WRITE(*,'(i6,4(1pe12.3))') it, resid_it(it), resid_it(it)/resid_it(it-1),& + & err_it(it), err_it(it)/err_it(it-1) + END IF + END DO + CALL timera(1, 'Jacobi') +!================================================================================ +! 9.0 Epilogue + CALL h5file +! + CALL timera(9, '') + CALL MPI_FINALIZE(ierr) +CONTAINS +! +!+++ + FUNCTION fdense(x) +! +! Return density +! + REAL(rkind), INTENT(in) :: x(:) + REAL(rkind) :: fdense(SIZE(x)) + fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 ); + END FUNCTION fdense +!+++ + FUNCTION fexact(x,y) +! +! Return analytical solution +! + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: fexact(SIZE(x),SIZE(y)) + REAL(rkind) :: c + INTEGER :: j + IF(prb.EQ.'dddd') THEN + DO j=1,SIZE(y) + c = SIN(2.0d0*pi*ky*y(j)/Ly) + fexact(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c + END DO + ELSE IF (prb.EQ.'nndd') THEN + DO j=1,SIZE(y) + c = SIN(2.0d0*pi*ky*y(j)/Ly) + fexact(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c + END DO + END IF + END FUNCTION fexact +!+++ + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_jacobig.h5' + INTEGER :: fid + CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart) + CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO + CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO +! + CALL putarrnd(fid, '/f', grids(1)%f%val, (/1,2/), garea=g) + CALL putarrnd(fid, '/v', v_exact%val, (/1,2/), garea=g) + CALL putarrnd(fid, '/u', grids(1)%v%val, (/1,2/), garea=g) + CALL putarrnd(fid, '/errs', errs%val, (/1,2/), garea=(/1,1/)) + CALL putarrnd(fid, '/resids', resids%val,(/1,2/), garea=(/1,1/)) +! + CALL putarr(fid, '/resid', resid_it, ionode=0) + CALL putarr(fid, '/error', err_it, ionode=0) +! + CALL putmat(fid, '/MAT', grids(1)%fdmat) + CALL closef(fid) + END SUBROUTINE h5file +!+++ + FUNCTION frhs(x,y) +! +! Return RHS +! + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: frhs(SIZE(x),SIZE(y)) + REAL(rkind) :: c, s, d(SIZE(x)) + REAL(rkind) :: corr + INTEGER :: j + corr = 1.d0+icrosst**2/4.0d0 + d(:) = fdense(x(:)) + IF(prb.EQ.'dddd') THEN + DO j=1,SIZE(y) + c = COS(2.0d0*pi*ky*y(j)/Ly) + s = SIN(2.0d0*pi*ky*y(j)/Ly) + frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s & + & -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) & + & + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s + END DO + ELSE IF (prb.EQ.'nndd') THEN + DO j=1,SIZE(y) + c = COS(2.0d0*pi*ky*y(j)/Ly) + s = SIN(2.0d0*pi*ky*y(j)/Ly) + frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s & + & +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) & + & + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s + END DO + END IF + END FUNCTION frhs +!+++! + FUNCTION outerprod(x, y) RESULT(r) +! +! outer product +! + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: r(SIZE(x),SIZE(y)) + INTEGER :: i, j + DO j=1,SIZE(y) + DO i=1,SIZE(x) + r(i,j) = x(i)*y(j) + END DO + END DO + END FUNCTION outerprod +! + FUNCTION rhs(x,y) + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: rhs(SIZE(x),SIZE(y)) + rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y)) + END FUNCTION rhs +! + FUNCTION exact(x,y) + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: exact(SIZE(x),SIZE(y)) + exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y)) + END FUNCTION exact +END PROGRAM main diff --git a/multigrid/src/test_mg.f90 b/multigrid/src/test_mg.f90 new file mode 100644 index 0000000..8275f7a --- /dev/null +++ b/multigrid/src/test_mg.f90 @@ -0,0 +1,279 @@ +!> +!> @file test_mg.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test multigrid V-cycle +! + USE multigrid + USE math_util, ONLY : root_bessj + IMPLICIT NONE +! + INTEGER :: nx=8, nidbas=1, ngauss=2, alpha=0, nits=40 + INTEGER :: modem=22, modep=10 + INTEGER :: levels=2, nu1=1, nu2=1, mu=1, nu0=1 + CHARACTER(len=4) :: relax='jac ' + LOGICAL :: nlfixed = .FALSE. + DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0) + DOUBLE PRECISION :: omega=2.0d0/3.0d0 + INTEGER :: l, nrank, its + DOUBLE PRECISION :: errdisc_dir + DOUBLE PRECISION, ALLOCATABLE :: u_direct(:), u_exact(:), u_calc(:) + DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:), sol_calc(:) + DOUBLE PRECISION, ALLOCATABLE :: err(:), resid(:), errdisc(:) + DOUBLE PRECISION, ALLOCATABLE :: errdisc_fmg(:) +! + TYPE(grid1d), ALLOCATABLE :: gridx(:) + TYPE(mg_info) :: info +! + NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, alpha, & + & relax, omega, nits, nlfixed, levels, nu1, nu2, mu, nu0 +!-------------------------------------------------------------------------------- +! 1. Prologue +! Inputs +! + READ(*,newrun) + WRITE(*,newrun) +! + levels = MIN(levels, get_lmax(nx)) +! + info%nu1 = nu1 + info%nu2 = nu2 + info%mu = mu + info%nu0 = nu0 + info%levels = levels + info%relax = relax + info%omega = omega +! +! Create grids +! + ALLOCATE(gridx(levels)) + CALL create_grid(nx, nidbas, ngauss, alpha, gridx) + WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels) +! +! Create FE matrice and set BC u(0)=u(1)=0 +! + DO l=1,levels + CALL femat(gridx(l)%spl, gridx(l)%mata, coefeq) +! +! Left Dirichlet BC (only for Cartesian geometry) + IF(alpha .EQ. 0) THEN + CALL ibcmat(1, gridx(l)%mata) + END IF +! +! Right Dirichlet BC + CALL ibcmat(gridx(l)%mata%rank, gridx(l)%mata) +! +! BC on grid transfer operator + IF(l.GT.1) THEN + WHERE( ABS(gridx(l)%transf%val) < 1.d-8) gridx(l)%transf%val=0.0d0 + IF(alpha .EQ. 0) gridx(l)%transf%val(2:,1)=0.0d0 + gridx(l)%transf%val(1:gridx(l-1)%rank-1,gridx(l)%rank)=0.0d0 + END IF + END DO + CALL printdiag_gb('Diagonal of coarsest A', gridx(levels)%mata) +! +! Construct RHS and set BC only on the finest grid +! + nrank = gridx(1)%rank + CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs) +! +! Left Dirichlet BC (only for Cartesian geometry) + IF(alpha .EQ. 0) THEN + gridx(1)%f(1) = 0.0d0 + END IF +! +! Right Dirichlet BC + gridx(1)%f(nrank) = 0.0d0 +!-------------------------------------------------------------------------------- +! 2. Direct solution +! + WRITE(*,'(//a)') 'Direct solution for the finest grid problem' + ALLOCATE(u_direct(0:nx)) + ALLOCATE(sol_direct(nrank)) + CALL direct_solve(gridx(1), sol_direct) + CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_direct) + errdisc_dir = disc_err(gridx(1)%spl, sol_direct, sol) + WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir +!-------------------------------------------------------------------------------- +! 3. Solution from MG V-cycles +! + WRITE(*,'(//a)') 'Multigrid MG V-cycles' + ALLOCATE(sol_calc(nrank)) + ALLOCATE(err(0:nits)) + ALLOCATE(errdisc(0:nits)) + ALLOCATE(resid(0:nits)) +! +! Initial guess +! + sol_calc(:) = 0.0d0 + IF(nlfixed) THEN + sol_calc(:) = sol_direct(:) + END IF + gridx(1)%v(:) = sol_calc(:) + err(0) = normf(gridx(1)%matm, sol_calc-sol_direct) + errdisc(0) = disc_err(gridx(1)%spl, sol_calc, sol) + resid(0) = residue(gridx(1)%mata, gridx(1)%f, sol_calc) +! +! Iterations +! + DO its=1,nits + CALL mg(gridx, info, 1) + sol_calc(:) = gridx(1)%v(:) + err(its) = normf(gridx(1)%matm, sol_calc-sol_direct) + errdisc(its) = disc_err(gridx(1)%spl, sol_calc, sol) + resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc) + END DO +! + WRITE(*,'(a4,3(a12,a8))') 'its', 'error', 'ratio', 'residue', 'ratio', & + & 'disc. err', 'ratio' + WRITE(*,'(i4,3(1pe12.3,8x))') 0, err(0), resid(0), errdisc(0) + WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') (its, err(its), err(its)/err(its-1), & + & resid(its), resid(its)/resid(its-1), & + & errdisc(its), errdisc(its)/errdisc(its-1), its=1,nits) +!-------------------------------------------------------------------------------- +! 4. Solution from FMG +! + WRITE(*,'(//a)') 'Full Multigrid' + + ALLOCATE(errdisc_fmg(nits)) + DO its=1,nits + info%nu0 = its + CALL fmg(gridx, info, 1) + sol_calc(:) = gridx(1)%v(:) + errdisc_fmg(its) = disc_err(gridx(1)%spl, sol_calc, sol) + resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc) + END DO + WRITE(*,'(a4,2(a12,a8))') 'nu0', 'residue', 'ratio','disc. err', 'ratio' + WRITE(*,'((i4,2(1pe12.3,0pf8.3)))') (its, resid(its), resid(its)/resid(its-1), & + & errdisc_fmg(its),errdisc_fmg(its)/errdisc_dir, its=1,nits) +! +! Grid values at final iteration +! + ALLOCATE(u_exact(0:nx)) + ALLOCATE(u_calc(0:nx)) + u_exact = sol(gridx(1)%x) + CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_calc) +!-------------------------------------------------------------------------------- +! 9. Epilogue +! +! Creata HDF5 file +! + CALL h5file +!-------------------------------------------------------------------------------- +CONTAINS + FUNCTION rhs(x) + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION :: rhs + DOUBLE PRECISION :: nump + SELECT CASE (alpha) + CASE(0) ! Cartesian geometry + rhs = SIN(pi*kmode*x) + CASE(1) ! Cylindrical + nump = root_bessj(modem, modep) + rhs = x * nump**2 * bessel_jn(modem, nump*x) + END SELECT + END FUNCTION rhs + FUNCTION sol(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sol(SIZE(x)) + DOUBLE PRECISION :: nump + SELECT CASE (alpha) + CASE(0) ! Cartesian geometry + sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:)) + CASE(1) ! Cylindrical + nump = root_bessj(modem, modep) + sol(:) = bessel_jn(modem, nump*x(:)) + END SELECT + END FUNCTION sol + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + SELECT CASE (alpha) + CASE(0) ! Cartesian geometry + c(1) = 1.0d0 + idt(1) = 1 + idw(1) = 1 + c(2) = sigma + idt(2) = 0 + idw(2) = 0 + CASE(1) ! Cylindrical + c(1) = x + idt(1) = 1 + idw(1) = 1 + c(2) = REAL(modem,8)**2/x + idt(2) = 0 + idw(2) = 0 + END SELECT + END SUBROUTINE coefeq + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_mg.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL attach(fid, '/', 'SIGMA', sigma) + CALL attach(fid, '/', 'KMODE', kmode) + CALL attach(fid, '/', 'MODEM', modem) + CALL attach(fid, '/', 'MODEP', modep) + CALL attach(fid, '/', 'ALPHA', alpha) + CALL attach(fid, '/', 'RELAX', relax) + CALL attach(fid, '/', 'OMEGA', omega) + CALL attach(fid, '/', 'NITS', nits) + CALL attach(fid, '/', 'NLFIXED', nlfixed) + CALL attach(fid, '/', 'LEVELS', levels) + CALL attach(fid, '/', 'NU1', nu1) + CALL attach(fid, '/', 'NU2', nu2) + CALL attach(fid, '/', 'NU0', nu0) + CALL attach(fid, '/', 'MU', mu) + CALL creatg(fid, '/mglevels') + DO l=1,levels + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL creatg(fid, TRIM(dsname)) + CALL putmat(fid, TRIM(dsname)//'/mata', gridx(l)%mata) + IF(l.GT.1) THEN + CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val) + CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows) + CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols) + END IF + CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f) + CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v) + END DO + CALL creatg(fid, '/Iterations') + CALL putarr(fid, '/Iterations/errors', err) + CALL putarr(fid, '/Iterations/residues', resid) + CALL putarr(fid, '/Iterations/disc_errors', errdisc) + CALL putarr(fid, '/Iterations/disc_errors_fmg', errdisc_fmg) + CALL putarr(fid, '/Iterations/xgrid', gridx(1)%x) + CALL putarr(fid, '/Iterations/u_direct', u_direct) + CALL putarr(fid, '/Iterations/u_exact', u_exact) + CALL putarr(fid, '/Iterations/u_calc', u_calc) + CALL closef(fid) + END SUBROUTINE h5file +END PROGRAM main diff --git a/multigrid/src/test_mg2d.f90 b/multigrid/src/test_mg2d.f90 new file mode 100644 index 0000000..4730a65 --- /dev/null +++ b/multigrid/src/test_mg2d.f90 @@ -0,0 +1,413 @@ +!> +!> @file test_mg2d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test 2d multigrid: Cartesian case +! + USE multigrid + USE csr + IMPLICIT NONE + INCLUDE 'mpif.h' +! + DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) + INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha + DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0 + CHARACTER(len=4) :: prb='poly' + INTEGER :: levels=1, nu1=1, nu2=1, mu=1, nu0=1, nits + CHARACTER(len=4) :: relax='jac ' + DOUBLE PRECISION :: omega=2.0d0/3.0d0, tol + LOGICAL :: nlfixed=.FALSE. + DOUBLE PRECISION :: t0, tsetup(2), tdirect, tbsolve, titer, titer_per_step + DOUBLE PRECISION :: resid_direct, errdisc_direct + DOUBLE PRECISION :: norma, normb +! + DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) + DOUBLE PRECISION :: dx, dy + INTEGER :: ix, iy +! + DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: sol_calc_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:) +! + DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:) + DOUBLE PRECISION, POINTER :: sol_direct_1d(:) + DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) +! + INTEGER :: ierr, me + INTEGER :: l, nterms + INTEGER :: its +! + TYPE(grid2d), ALLOCATABLE :: grids(:) + TYPE(mg_info) :: info +! + NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels, prb, & + & nu1, nu2, mu, nu0, relax, nits, tol, nlfixed, omega +!-------------------------------------------------------------------------------- +! 1. Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Inputs +! + n = (/8, 8/) + nidbas=(/3,3/) + ngauss=(/2,2/) + alpha = (/0,0/) + kx=4 + ky=3 + sigma=10.0d0 + levels=2 + prb='poly' + nu1 = 1 + nu2 = 1 + mu = 1 + nu0 = 1 + nits = 10 + tol = 1.e-8 + relax = 'jac' + nlfixed= .FALSE. +! + IF(me.EQ.0) THEN + READ(*,newrun) + WRITE(*,newrun) + END IF + CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nu1, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nu2, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(tol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) +! +! Adjust number of levels and fill mg info. +! + levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) + info%nu1 = nu1 + info%nu2 = nu2 + info%mu = mu + info%nu0 = nu0 + info%levels = levels + info%relax = relax + info%omega = omega +! +! Create grids +! + t0 = mpi_wtime() +! + dx = 1.0d0/REAL(n(1),8) + dy = 1.0d0/REAL(n(2),8) + ALLOCATE(x(0:n(1)), y(0:n(2))) + DO ix=0,n(1) + x(ix) = ix*dx + END DO + DO iy=0,n(2) + y(iy) = iy*dy + END DO +! + ALLOCATE(grids(levels)) + CALL create_grid(x, y, nidbas, ngauss, alpha, grids) + WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' + WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) +! +! Construct RHS and set BC only on the finest grid +! + CALL disrhs(grids(1)%spl, grids(1)%f, rhs) + CALL ibcrhs(grids(1), grids(1)%f) +! +! Build FE matrices and set BC +! + nterms = 3 + DO l=1,levels + CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) + CALL ibcmat(grids(l), grids(l)%mata) + CALL to_mat(grids(l)%mata) + END DO +! +! Set BC on grid transfer matrices +! + CALL ibc_transf(grids,1,3) + CALL ibc_transf(grids,2,3) +! + tsetup(1) = mpi_wtime()-t0 +! +! Clear and rebuild FE matrices and set BC +! + t0 = mpi_wtime() + nterms = 2 + DO l=1,levels + CALL clear_mat(grids(l)%mata) + CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms, noinit=.TRUE.) + CALL ibcmat(grids(l), grids(l)%mata) + END DO + tsetup(2) = mpi_wtime()-t0 +!-------------------------------------------------------------------------------- +! 1. Analytical solution (at the finest grid, l=1) +! + ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) + sol_anal_grid = sol(grids(1)%x, grids(1)%y) +!-------------------------------------------------------------------------------- +! 2. Direct solution (at the finest grid, l=1) +! + WRITE(*,'(//a)') 'Direct solution for the finest grid problem' +! + ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), & + & source=grids(1)%f) + ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) +! + sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct +! +! + PRINT*, 'shape of sol_direct', SHAPE(sol_direct) + PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d) +! + t0 = mpi_wtime() + sol_direct = grids(1)%f + CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) + tdirect = mpi_wtime()-t0 +! + t0 = mpi_wtime() + sol_direct = grids(1)%f + CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) + resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d) + errdisc_direct = disc_err(grids(1)%spl, sol_direct, sol) +! + tbsolve = mpi_wtime()-t0 +! + CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & + & [0,0], sol_direct) +! + WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', & + & errdisc_direct, resid_direct + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid) +! + WRITE(*, '(a,1pe12.3)') 'Frobenius norm of A', matnorm(grids(1)%mata) + WRITE(*, '(a,1pe12.3)') 'Infinity norm of A ', matnorm(grids(1)%mata, 'inf') + WRITE(*, '(a,1pe12.3)') '1 norm of A ', matnorm(grids(1)%mata, '1') +!-------------------------------------------------------------------------------- +! 3. Test multigrid V-cycle +! + WRITE(*,'(/a)') 'Multigrid MG V-cycles ...' + ALLOCATE(sol_calc_grid(0:n(1),0:n(2))) + ALLOCATE(errdisc(0:nits)) + ALLOCATE(resid(0:nits)) + ALLOCATE(rresid(0:nits)) +! +! Norm of A and b +! + norma = matnorm(grids(1)%mata) + normb = NORM2(grids(1)%f1d) +! +! Initial guess +! + IF(nlfixed) THEN + grids(1)%v = sol_direct + ELSE + grids(1)%v = 0.0d0 + END IF +! + errdisc(0) = disc_err(grids(1)%spl, grids(1)%v, sol) + resid(0) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d) + rresid(0) = resid(0) / ( norma*NORM2(grids(1)%v1d) + normb ) + WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', & + & 'rel. resid', 'ratio' + WRITE(*,'(i4,3(1pe12.3,8x))') 0, resid(0), errdisc(0), rresid(0) +! +! Iterations +! + t0 = mpi_wtime() + DO its=1,nits + CALL mg(grids, info, 1) + errdisc(its) = disc_err(grids(1)%spl, grids(1)%v, sol) + resid(its) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d) + rresid(its) = resid(its) / ( norma*NORM2(grids(1)%v1d) + normb ) + WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') its, & + & resid(its), resid(its)/resid(its-1), & + & errdisc(its), errdisc(its)/errdisc(its-1), & + & rresid(its), rresid(its)/rresid(its-1) + IF(resid(its) .LE. tol) EXIT + END DO + nits = MIN(nits,its) + titer = mpi_wtime() - t0 + titer_per_step = titer/REAL(its,8) +! + CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_calc_grid, & + & [0,0], grids(1)%v) +!-------------------------------------------------------------------------------- +! 9. Epilogue +! +! Display timing +! + WRITE(*,'(a,2(1pe12.3))') 'Set up time (s) ', tsetup + WRITE(*,'(a,2(1pe12.3))') 'Direct and solve time (s) ', tdirect, tbsolve + WRITE(*,'(a,1pe12.3,i5)') 'Iter time (s) ', titer, nits +! +! Creata HDF5 file +! + IF(me.EQ.0) CALL h5file +! + CALL mpi_finalize(ierr) +!-------------------------------------------------------------------------------- +CONTAINS +!+++ + FUNCTION rhs(x, y) +! +! Return problem RHS +! + DOUBLE PRECISION, INTENT(in) :: x, y + DOUBLE PRECISION :: rhs + DOUBLE PRECISION :: x2, y2 + SELECT CASE(TRIM(prb)) + CASE('poly') + x2 = x*x; y2 = y*y; + rhs = 2.d0 * ( (1.0d0-6.d0*x2)*y2*(1.d0-y2) + & + & (1.0d0-6.d0*y2)*x2*(1.d0-x2) ) + CASE('trig') + rhs = SIN(PI*kx*x)*SIN(PI*ky*y) + END SELECT + END FUNCTION rhs +!+++ + FUNCTION sol(x, y) +! +! Return exact problem solution +! + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: sol(SIZE(x),SIZE(y)) + DOUBLE PRECISION :: c + DOUBLE PRECISION :: x2(SIZE(x)), y2(SIZE(y)) + INTEGER :: j + SELECT CASE(TRIM(prb)) + CASE('poly') + x2 = x*x; y2 = y*y; + DO j=1,SIZE(y) + c = y2(j)*(y2(j)-1.d0) + sol(:,j) = c*x2(:)*(1.0d0-x2(:)) + END DO + CASE('trig') + DO j=1,SIZE(y) + c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2) + sol(:,j) = c * SIN(PI*kx*x(:)) + END DO + END SELECT + END FUNCTION sol +!+++ + SUBROUTINE coefeq(x, y, idt, idw, c) +! +! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) +! + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! + c(1) = 1.0d0 + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.0d0 + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 +! + c(3) = sigma + idt(3,1) = 0 + idt(3,2) = 0 + idw(3,1) = 0 + idw(3,2) = 0 + + END SUBROUTINE coefeq +!+++ + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_mg2d.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', n(1)) + CALL attach(fid, '/', 'NY', n(2)) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'KX', kx) + CALL attach(fid, '/', 'KY', ky) + CALL attach(fid, '/', 'SIGMA', sigma) + CALL attach(fid, '/', 'ALPHA1', alpha(1)) + CALL attach(fid, '/', 'ALPHA2', alpha(2)) + CALL attach(fid, '/', 'RELAX', relax) + CALL attach(fid, '/', 'NITS', nits) + CALL attach(fid, '/', 'LEVELS', levels) + CALL attach(fid, '/', 'NU1', nu1) + CALL attach(fid, '/', 'NU2', nu2) + CALL attach(fid, '/', 'NU0', nu0) + CALL attach(fid, '/', 'MU', mu) + CALL creatg(fid, '/mglevels') + DO l=1,levels + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL creatg(fid, TRIM(dsname)) + CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) + IF(l.GT.1) THEN + CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) + CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) + END IF + CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) + CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) + CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) + CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) + CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) + CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) + END DO +! +! Solutions at finest grid +! + CALL creatg(fid, '/solutions') + CALL putarr(fid, '/solutions/xg', grids(1)%x) + CALL putarr(fid, '/solutions/yg', grids(1)%y) + CALL putarr(fid, '/solutions/anal', sol_anal_grid) + CALL putarr(fid, '/solutions/calc', sol_calc_grid) + CALL putarr(fid, '/solutions/direct', sol_direct_grid) +! + CALL creatg(fid, '/Iterations') + CALL putarr(fid, '/Iterations/residues', resid(0:nits)) + CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits)) +! + CALL closef(fid) + END SUBROUTINE h5file +!+++ +END PROGRAM diff --git a/multigrid/src/test_mg2d_cyl.f90 b/multigrid/src/test_mg2d_cyl.f90 new file mode 100644 index 0000000..73644ec --- /dev/null +++ b/multigrid/src/test_mg2d_cyl.f90 @@ -0,0 +1,427 @@ +!> +!> @file test_mg2d_cyl.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test 2d multigrid +! Cylindrical case +! + USE multigrid + USE csr + IMPLICIT NONE + INCLUDE 'mpif.h' +! + DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) + INTEGER, DIMENSION(2) :: n, nidbas, ngauss + INTEGER :: modem=22, modep=10 + CHARACTER(len=4) :: prb='poly' + INTEGER :: levels=1, nu1=1, nu2=1, mu=1, nu0=1, nits + CHARACTER(len=4) :: relax='jac ' + DOUBLE PRECISION :: omega=2.0d0/3.0d0, tol, rtol + LOGICAL :: nluniq=.TRUE. + LOGICAL :: nlfixed=.FALSE. + DOUBLE PRECISION :: t0, tsetup(2), tdirect, tbsolve, titer, titer_per_step + DOUBLE PRECISION :: resid_direct, errdisc_direct + DOUBLE PRECISION :: norma, normb +! + DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) + DOUBLE PRECISION :: dx, dy + INTEGER :: ix, iy +! + DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: sol_calc_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:) +! + DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:) + DOUBLE PRECISION, POINTER :: sol_direct_1d(:) +! + DOUBLE PRECISION, ALLOCATABLE :: sol_direct_orig(:,:) + DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) +! + DOUBLE PRECISION, ALLOCATABLE :: sol_orig(:,:) +! + INTEGER :: ierr, me + INTEGER :: l, nterms + INTEGER :: its +! + TYPE(grid2d), ALLOCATABLE :: grids(:) + TYPE(mg_info) :: info +! + NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, prb, & + & nu1, nu2, mu, nu0, relax, nits, tol, rtol, nlfixed, & + & nluniq, omega +!-------------------------------------------------------------------------------- +! 1. Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Inputs +! + n = (/8, 8/) + nidbas=(/3,3/) + ngauss=(/2,2/) + modem = 22 + modep = 10 + prb='poly' + levels=2 + nu1 = 1 + nu2 = 1 + mu = 1 + nu0 = 1 + nits = 10 + tol = 1.e-8 + rtol = 1.e-10 + relax = 'jac' + nlfixed= .FALSE. + nluniq = .TRUE. +! + IF(me.EQ.0) THEN + READ(*,newrun) + WRITE(*,newrun) + END IF + CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nu1, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nu2, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(tol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) +! +! Adjust number of levels and fill mg info. +! + levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) + info%nu1 = nu1 + info%nu2 = nu2 + info%mu = mu + info%nu0 = nu0 + info%levels = levels + info%relax = relax + info%omega = omega +! +! Create grids +! + t0 = mpi_wtime() +! + dx = 1.0d0/REAL(n(1),8) + dy = 2.0d0*pi/REAL(n(2),8) + ALLOCATE(x(0:n(1)), y(0:n(2))) + DO ix=0,n(1) + x(ix) = ix*dx + END DO + DO iy=0,n(2) + y(iy) = iy*dy + END DO +! + ALLOCATE(grids(levels)) + CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], & + & debug_in=.FALSE.) + WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' + WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) +!!$ CALL printmat('** Prolongation matrix in 1st dim.**', grids(2)%transf(1)) +!!$ CALL printmat('** Prolongation matrix in 2nd dim.**', grids(2)%transf(2)) +! +! Construct RHS and set BC only on the finest grid +! + CALL disrhs(grids(1)%spl, grids(1)%f, rhs) +!!$ WRITE(*,'(a/(8(1pe12.3)))') 'Orig RHS at the axis', grids(1)%f(1,1:n(2)) + CALL ibcrhs(grids(1), grids(1)%f, nluniq_in=nluniq) +!!$ WRITE(*,'(a/(8(1pe12.3)))') 'RHS at the axis', grids(1)%f(1,1:n(2)) +! +! Build FE matrices and set BC +! + nterms = 2 + DO l=1,levels + CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) + CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq) + CALL to_mat(grids(l)%mata) + END DO +! +! Set BC on grid transfer matrices +! + CALL ibc_transf(grids, 1, 2) ! Only right boundary on r (1st dim.) + tsetup(1) = mpi_wtime()-t0 +! +! Clear and rebuild FE matrices and set BC +! + t0 = mpi_wtime() + nterms = 2 + DO l=1,levels + CALL clear_mat(grids(l)%mata) + CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms, noinit=.TRUE.) + CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq) + END DO + tsetup(2) = mpi_wtime()-t0 +!-------------------------------------------------------------------------------- +! 1. Analytical solution (at the finest grid, l=1) +! + ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) + sol_anal_grid = sol(grids(1)%x, grids(1)%y) +!-------------------------------------------------------------------------------- +! 2. Direct solution (at the finest grid, l=1) +! + WRITE(*,'(//a)') 'Direct solution for the finest grid problem' +! + ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), & + & source=grids(1)%f) + ALLOCATE(sol_direct_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) + ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) +! + sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct +! +! + PRINT*, 'shape of sol_direct', SHAPE(sol_direct) + PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d) +! + t0 = mpi_wtime() + sol_direct = grids(1)%f + CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) + tdirect = mpi_wtime()-t0 +! + t0 = mpi_wtime() + sol_direct = grids(1)%f + CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) + resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d) +! + sol_direct_orig = sol_direct + CALL back_transf(grids(1), sol_direct_orig, nluniq_in=nluniq) + errdisc_direct = disc_err(grids(1)%spl, sol_direct_orig, sol) +! + tbsolve = mpi_wtime()-t0 +! + CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & + & [0,0], sol_direct_orig) +! + WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', & + & errdisc_direct, resid_direct + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid) +!-------------------------------------------------------------------------------- +! 3. Test multigrid V-cycle +! + WRITE(*,'(/a)') 'Multigrid MG V-cycles ...' + ALLOCATE(sol_calc_grid(0:n(1),0:n(2))) + ALLOCATE(errdisc(0:nits)) + ALLOCATE(resid(0:nits)) + ALLOCATE(rresid(0:nits)) + ALLOCATE(sol_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) +! +! Norm of A and b +! + norma = matnorm(grids(1)%mata) + normb = NORM2(grids(1)%f1d) +! + WRITE(*, '(a,1pe12.3)') 'Frobenius norm of A', norma + WRITE(*, '(a,1pe12.3)') 'Infinity norm of A ', matnorm(grids(1)%mata, 'inf') + WRITE(*, '(a,1pe12.3)') '1 norm of A ', matnorm(grids(1)%mata, '1') +! +! Initial guess +! + IF(nlfixed) THEN + grids(1)%v = sol_direct + ELSE + grids(1)%v = 0.0d0 + END IF +! + sol_orig(:,:) = grids(1)%v(:,:) + CALL back_transf(grids(1), sol_orig, nluniq_in=nluniq) + errdisc(0) = disc_err(grids(1)%spl, sol_orig, sol) +! + resid(0) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d) + rresid(0) = resid(0) / ( norma*NORM2(grids(1)%v1d) + normb ) + WRITE(*,'(a4,3(a12,a8),a12)') 'its', 'residue', 'ratio', 'disc. err', & + & 'ratio', 'rel. resid', 'ratio', '||v||' + WRITE(*,'(i4,3(1pe12.3,8x),1pe12.3)') 0, resid(0), errdisc(0), rresid(0), NORM2(grids(1)%v1d) +! +! Iterations +! + t0 = mpi_wtime() + DO its=1,nits + CALL mg_cyl(grids, info, 1, nluniq_in=nluniq) + resid(its) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d) + rresid(its) = resid(its) / ( norma*NORM2(grids(1)%v1d) + normb ) +! + sol_orig(:,:) = grids(1)%v(:,:) + CALL back_transf(grids(1), sol_orig, nluniq_in=nluniq) + errdisc(its) = disc_err(grids(1)%spl, sol_orig, sol) +! + WRITE(*,'((i4,3(1pe12.3,0pf8.2)),1pe12.3)') its, & + & resid(its), resid(its)/resid(its-1), & + & errdisc(its), errdisc(its)/errdisc(its-1), & + & rresid(its), rresid(its)/rresid(its-1), & + & NORM2(grids(1)%v1d) + IF(resid(its) .LE. tol .OR. rresid(its).LE. rtol ) EXIT + END DO + nits = MIN(nits,its) + titer = mpi_wtime() - t0 + titer_per_step = titer/REAL(its,8) +! + CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_calc_grid, & + & [0,0], sol_orig) +!-------------------------------------------------------------------------------- +! 9. Epilogue +! +! Display timing +! + WRITE(*,'(a,2(1pe12.3))') 'Set up time (s) ', tsetup + WRITE(*,'(a,2(1pe12.3))') 'Direct and solve time (s) ', tdirect, tbsolve + WRITE(*,'(a,1pe12.3,i5)') 'Iter time (s) ', titer, nits +! +! Creata HDF5 file +! + IF(me.EQ.0) CALL h5file +! + CALL mpi_finalize(ierr) +!-------------------------------------------------------------------------------- +CONTAINS +!+++ + FUNCTION rhs(r, theta) +! +! Return problem RHS +! + USE math_util, ONLY : root_bessj + DOUBLE PRECISION, INTENT(in) :: r, theta + DOUBLE PRECISION :: rhs + DOUBLE PRECISION :: nump +! + SELECT CASE(TRIM(prb)) + CASE('poly') + rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta) + CASE('bess') + nump = root_bessj(modem, modep) + rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta) + END SELECT + END FUNCTION rhs +!+++ + FUNCTION sol(r, theta) +! +! Return exact problem solution +! + USE math_util, ONLY : root_bessj + DOUBLE PRECISION, INTENT(in) :: r(:), theta(:) + DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta)) + DOUBLE PRECISION :: nump + INTEGER :: j +! + SELECT CASE(TRIM(prb)) + CASE('poly') + DO j=1,SIZE(theta) + sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j)) + END DO + CASE('bess') + nump = root_bessj(modem, modep) + DO j=1,SIZE(theta) + sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j)) + END DO + END SELECT + END FUNCTION sol +!+++ + SUBROUTINE coefeq(r, theta, idt, idw, c) +! +! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) +! + DOUBLE PRECISION, INTENT(in) :: r, theta + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! + c(1) = r + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.0d0/r + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq +!+++ + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_mg2d_cyl.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', n(1)) + CALL attach(fid, '/', 'NY', n(2)) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'MODEM', modem) + CALL attach(fid, '/', 'MODEP', modep) + CALL attach(fid, '/', 'RELAX', relax) + CALL attach(fid, '/', 'NITS', nits) + CALL attach(fid, '/', 'LEVELS', levels) + CALL attach(fid, '/', 'NU1', nu1) + CALL attach(fid, '/', 'NU2', nu2) + CALL attach(fid, '/', 'NU0', nu0) + CALL attach(fid, '/', 'MU', mu) + CALL creatg(fid, '/mglevels') + DO l=1,levels + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL creatg(fid, TRIM(dsname)) + CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) + IF(l.GT.1) THEN + CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) + CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) + END IF + CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) + CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) + CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) + CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) + CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) + CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) + END DO +! +! Solutions at finest grid +! + CALL creatg(fid, '/solutions') + CALL putarr(fid, '/solutions/xg', grids(1)%x) + CALL putarr(fid, '/solutions/yg', grids(1)%y) + CALL putarr(fid, '/solutions/anal', sol_anal_grid) + CALL putarr(fid, '/solutions/calc', sol_calc_grid) + CALL putarr(fid, '/solutions/direct', sol_direct_grid) +! + CALL creatg(fid, '/Iterations') + CALL putarr(fid, '/Iterations/residues', resid(0:nits)) + CALL putarr(fid, '/Iterations/rresidues', rresid(0:nits)) + CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits)) +! + CALL closef(fid) + END SUBROUTINE h5file +!+++ +END PROGRAM diff --git a/multigrid/src/test_mgp.f90 b/multigrid/src/test_mgp.f90 new file mode 100644 index 0000000..26f3f24 --- /dev/null +++ b/multigrid/src/test_mgp.f90 @@ -0,0 +1,242 @@ +!> +!> @file test_mgp.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test multigrid V-cycle for periodic problems +! + USE multigrid + IMPLICIT NONE +! + INTEGER :: nx=8, nidbas=1, ngauss=2, nits=40 + INTEGER :: levels=2, nu1=1, nu2=1, mu=1, nu0=1 + CHARACTER(len=4) :: relax='jac ' + LOGICAL :: nlfixed = .FALSE. + DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0) + DOUBLE PRECISION :: omega=2.0d0/3.0d0 + INTEGER :: l, nrank, dim, its + DOUBLE PRECISION :: errdisc_dir + DOUBLE PRECISION, ALLOCATABLE :: u_direct(:), u_exact(:), u_calc(:) + DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:), sol_calc(:), sol_grid(:) + DOUBLE PRECISION, ALLOCATABLE :: err(:), resid(:), errdisc(:) + DOUBLE PRECISION, ALLOCATABLE :: errdisc_fmg(:) +! + TYPE(grid1d), ALLOCATABLE :: gridx(:) + TYPE(mg_info) :: info +! + NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, & + & relax, nits, nlfixed, levels, nu1, nu2, mu, nu0 +!-------------------------------------------------------------------------------- +! 1. Prologue +! Inputs +! + READ(*,newrun) + WRITE(*,newrun) +! + levels = MIN(levels, get_lmax(nx)) +! + info%nu1 = nu1 + info%nu2 = nu2 + info%mu = mu + info%nu0 = nu0 + info%levels = levels + info%relax = relax + info%omega = omega +! +! Create grids +! + ALLOCATE(gridx(levels)) + CALL create_grid(nx, nidbas, ngauss, 0, gridx, period=.TRUE.) + WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels) +! +! Create FE matrice and set BC u(0)=u(1)=0 +! + DO l=1,levels + CALL femat(gridx(l)%spl, gridx(l)%matap, coefeq) + END DO +! +! Construct RHS only on the finest grid +! + nrank = gridx(1)%rank ! Rank of the system (number of unknowns) + dim = nrank+nidbas ! Dimension of Splines space + CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs) +!-------------------------------------------------------------------------------- +! 2. Direct solution +! + WRITE(*,'(//a)') 'Direct solution for the finest grid problem' + ALLOCATE(u_direct(0:nx)) + ALLOCATE(sol_direct(nrank)) + ALLOCATE(sol_grid(dim)) ! Required by GRIDVAL +! + CALL direct_solve(gridx(1), sol_direct) + sol_grid(1:nrank) = sol_direct(1:nrank) + sol_grid(nrank+1:dim) = sol_direct(1:nidbas) + CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_grid) +! + errdisc_dir = disc_err(gridx(1)%spl, sol_grid, sol) + WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir +!-------------------------------------------------------------------------------- +! 3. Solution from MG V-cycles +! + WRITE(*,'(//a)') 'Multigrid MG V-cycles' + ALLOCATE(sol_calc(nrank)) + ALLOCATE(err(0:nits)) + ALLOCATE(errdisc(0:nits)) + ALLOCATE(resid(0:nits)) +! +! Initial guess +! + sol_calc(:) = 0.0d0 + sol_grid(:) = 0.0d0 + IF(nlfixed) THEN + sol_calc(:) = sol_direct(:) + sol_grid(1:nrank) = sol_calc(1:nrank) + sol_grid(nrank+1:dim) = sol_calc(1:nidbas) + END IF + gridx(1)%v(:) = sol_calc(:) + err(0) = normf(gridx(1)%matmp, sol_calc-sol_direct) + errdisc(0) = disc_err(gridx(1)%spl, sol_grid, sol) + resid(0) = residue(gridx(1)%matap, gridx(1)%f, sol_calc) +! +! Iterations +! + DO its=1,nits +! + CALL mg(gridx, info, 1) + sol_calc(:) = gridx(1)%v(:) + sol_grid(1:nrank) = sol_calc(1:nrank) + sol_grid(nrank+1:dim) = sol_calc(1:nidbas) +! + err(its) = normf(gridx(1)%matmp, sol_calc-sol_direct) + errdisc(its) = disc_err(gridx(1)%spl, sol_grid, sol) ! will call GRIDVAL + resid(its) = residue(gridx(1)%matap, gridx(1)%f, sol_calc) + END DO +! + WRITE(*,'(a4,3(a12,a8))') 'its', 'error', 'ratio', 'residue', 'ratio', & + & 'disc. err', 'ratio' + WRITE(*,'(i4,3(1pe12.3,8x))') 0, err(0), resid(0), errdisc(0) + WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') (its, err(its), err(its)/err(its-1), & + & resid(its), resid(its)/resid(its-1), & + & errdisc(its), errdisc(its)/errdisc(its-1), its=1,nits) +!-------------------------------------------------------------------------------- +! 4. Solution from FMG +! + WRITE(*,'(//a)') 'Full Multigrid' + + ALLOCATE(errdisc_fmg(nits)) + DO its=1,nits + info%nu0 = its +! + CALL fmg(gridx, info, 1) + sol_calc(:) = gridx(1)%v(:) + sol_grid(1:nrank) = sol_calc(1:nrank) + sol_grid(nrank+1:dim) = sol_calc(1:nidbas) +! + errdisc_fmg(its) = disc_err(gridx(1)%spl, sol_grid, sol) ! will call GRIDVAL + resid(its) = residue(gridx(1)%matap, gridx(1)%f, sol_calc) + END DO + WRITE(*,'(a4,2(a12,a8))') 'nu0', 'residue', 'ratio','disc. err', 'ratio' + WRITE(*,'((i4,2(1pe12.3,0pf8.3)))') (its, resid(its), resid(its)/resid(its-1), & + & errdisc_fmg(its),errdisc_fmg(its)/errdisc_dir, its=1,nits) +! +! Grid values at final iteration +! + ALLOCATE(u_exact(0:nx)) + ALLOCATE(u_calc(0:nx)) + u_exact = sol(gridx(1)%x) + CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_grid) +!-------------------------------------------------------------------------------- +! 9. Epilogue +! +! Creata HDF5 file +! + CALL h5file +!-------------------------------------------------------------------------------- +CONTAINS + FUNCTION rhs(x) + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION :: rhs + rhs = SIN(pi*kmode*x) + END FUNCTION rhs + FUNCTION sol(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sol(SIZE(x)) + sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:)) + END FUNCTION sol + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + c(1) = 1.0d0 + idt(1) = 1 + idw(1) = 1 + c(2) = sigma + idt(2) = 0 + idw(2) = 0 + END SUBROUTINE coefeq + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_mgp.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL attach(fid, '/', 'SIGMA', sigma) + CALL attach(fid, '/', 'KMODE', kmode) + CALL attach(fid, '/', 'RELAX', relax) + CALL attach(fid, '/', 'NITS', nits) + CALL attach(fid, '/', 'NLFIXED', nlfixed) + CALL attach(fid, '/', 'LEVELS', levels) + CALL attach(fid, '/', 'NU1', nu1) + CALL attach(fid, '/', 'NU2', nu2) + CALL attach(fid, '/', 'NU0', nu0) + CALL attach(fid, '/', 'MU', mu) + CALL creatg(fid, '/mglevels') + DO l=1,levels + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL creatg(fid, TRIM(dsname)) + CALL putarr(fid, TRIM(dsname)//'/mata', gridx(l)%matap%val) + IF(l.GT.1) THEN + CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val) + CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows) + CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols) + END IF + CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f) + CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v) + END DO + CALL creatg(fid, '/Iterations') + CALL putarr(fid, '/Iterations/errors', err) + CALL putarr(fid, '/Iterations/residues', resid) + CALL putarr(fid, '/Iterations/disc_errors', errdisc) + CALL putarr(fid, '/Iterations/disc_errors_fmg', errdisc_fmg) + CALL putarr(fid, '/Iterations/xgrid', gridx(1)%x) + CALL putarr(fid, '/Iterations/u_direct', u_direct) + CALL putarr(fid, '/Iterations/u_exact', u_exact) + CALL putarr(fid, '/Iterations/u_calc', u_calc) + CALL closef(fid) + END SUBROUTINE h5file +END PROGRAM main diff --git a/multigrid/src/test_relax.f90 b/multigrid/src/test_relax.f90 new file mode 100644 index 0000000..9d00c66 --- /dev/null +++ b/multigrid/src/test_relax.f90 @@ -0,0 +1,227 @@ +!> +!> @file test_relax.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test different relaxations +! + USE multigrid + USE math_util, ONLY : root_bessj + IMPLICIT NONE +! + INTEGER :: nx=8, nidbas=1, alpha=0, nits=40 + INTEGER :: modem=22, modep=10 + CHARACTER(len=4) :: relax='jac ' + LOGICAL :: nlfixed = .FALSE. + DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0) + DOUBLE PRECISION :: omega=2.0d0/3.0d0 + INTEGER :: ngauss, i, nrank, its + DOUBLE PRECISION, ALLOCATABLE :: u_exact(:), u_direct(:), u_calc(:) + DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:) + DOUBLE PRECISION, ALLOCATABLE :: sol_calc(:), err(:), resid(:), errdisc(:) + DOUBLE PRECISION :: errdisc_dir +! + TYPE(grid1d) :: gridx(1) +! + NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, & + & alpha, relax, omega, nits, nlfixed +!-------------------------------------------------------------------------------- +! 1. Prologue: read input, construct matrix and RHS + READ(*,newrun) + WRITE(*,newrun) +! +! Set grid +! + CALL create_grid(nx, nidbas, ngauss, alpha, gridx) +! +! Create FE matrice and set BC u(0)=u(1)=0 +! + CALL femat(gridx(1)%spl, gridx(1)%mata, coefeq) + nrank = gridx(1)%rank +! +! Left Dirichlet BC (only for Cartesian geometry) + IF(alpha .EQ. 0) THEN + CALL ibcmat(1, gridx(1)%mata) + END IF +! +! Right Dirichlet BC + CALL ibcmat(nrank, gridx(1)%mata) +! +! Construct RHS +! + CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs) +! +! Left Dirichlet BC (only for Cartesian geometry) + IF(alpha .EQ. 0) THEN + gridx(1)%f(1) = 0.0d0 + END IF +! +! Right Dirichlet BC + gridx(1)%f(nrank) = 0.0d0 +!-------------------------------------------------------------------------------- +! 2. Direct solution +! +! Direct solutions +! + ALLOCATE(sol_direct(nrank)) + CALL direct_solve(gridx(1), sol_direct) +! +! Grid values +! + ALLOCATE(u_exact(0:nx)) + ALLOCATE(u_direct(0:nx)) + ALLOCATE(u_calc(0:nx)) +! + u_exact = sol(gridx(1)%x) + CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_direct) + errdisc_dir = disc_err(gridx(1)%spl, sol_direct, sol) + WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir +!-------------------------------------------------------------------------------- +! 3. Relaxation +! + ALLOCATE(sol_calc(nrank)) + ALLOCATE(err(0:nits)) + ALLOCATE(errdisc(0:nits)) + ALLOCATE(resid(0:nits)) +! +! Initial guess + sol_calc(:) = 0.0d0 + IF(nlfixed) THEN + sol_calc(:) = sol_direct(:) + END IF + err(0) = normf(gridx(1)%matm, sol_calc-sol_direct) + errdisc(0) = disc_err(gridx(1)%spl, sol_calc, sol) + resid(0) = residue(gridx(1)%mata, gridx(1)%f, sol_calc) +! +! Iterations + DO its=1,nits + SELECT CASE (TRIM(relax)) + CASE('jac') + CALL jacobi(gridx(1)%mata, omega, 1, sol_calc, gridx(1)%f) + CASE('gs') + CALL gs(gridx(1)%mata, 1, sol_calc, gridx(1)%f) + END SELECT + err(its) = normf(gridx(1)%matm, sol_calc-sol_direct) + errdisc(its) = disc_err(gridx(1)%spl, sol_calc, sol) + resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc) + END DO + CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_calc) +! + WRITE(*,'(/a4,3a12)') 'its', 'error', 'residue', 'disc. err' + WRITE(*,'(i4,3(1pe12.3))') 0, err(0), resid(0), errdisc(0) + WRITE(*,'((i4,6(1pe12.3)))') (its, err(its), resid(its), errdisc(its), & + & err(its)/err(its-1), resid(its)/resid(its-1), & + & errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10)) +! + CALL h5file +!-------------------------------------------------------------------------------- +CONTAINS +!+++ + FUNCTION rhs(x) + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION :: rhs + DOUBLE PRECISION :: nump + SELECT CASE (alpha) + CASE(0) ! Cartesian geometry + rhs = SIN(pi*kmode*x) + CASE(1) ! Cylindrical + nump = root_bessj(modem, modep) + rhs = x * nump**2 * bessel_jn(modem, nump*x) + END SELECT + END FUNCTION rhs +!+++ + FUNCTION sol(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sol(SIZE(x)) + DOUBLE PRECISION :: nump + SELECT CASE (alpha) + CASE(0) ! Cartesian geometry + sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:)) + CASE(1) ! Cylindrical + nump = root_bessj(modem, modep) + sol(:) = bessel_jn(modem, nump*x(:)) + END SELECT + END FUNCTION sol +!+++ + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + SELECT CASE (alpha) + CASE(0) ! Cartesian geometry + c(1) = 1.0d0 + idt(1) = 1 + idw(1) = 1 + c(2) = sigma + idt(2) = 0 + idw(2) = 0 + CASE(1) ! Cylindrical + c(1) = x + idt(1) = 1 + idw(1) = 1 + c(2) = REAL(modem,8)**2/x + idt(2) = 0 + idw(2) = 0 + END SELECT + END SUBROUTINE coefeq +!+++ + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_relax.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL attach(fid, '/', 'KX', kmode) + CALL attach(fid, '/', 'SIGMA', sigma) + CALL attach(fid, '/', 'ALPHA', alpha) + CALL attach(fid, '/', 'RELAX', relax) + CALL attach(fid, '/', 'OMEGA', omega) + CALL attach(fid, '/', 'NITS', nits) + CALL attach(fid, '/', 'MODEM', modem) + CALL attach(fid, '/', 'MODEP', modep) +! +! Solutions at finest grid +! + CALL creatg(fid, '/solutions') + CALL putarr(fid, '/solutions/xg', gridx(1)%x) + CALL putarr(fid, '/solutions/direct', u_direct) + CALL putarr(fid, '/solutions/anal', u_exact) + CALL putarr(fid, '/solutions/calc', u_calc) +! + CALL creatg(fid, '/relaxation') + CALL putarr(fid, '/relaxation/errdisc', errdisc) + CALL putarr(fid, '/relaxation/resid', resid) +! +! Store FE matrix +! + CALL putmat(fid, '/MATA', gridx(1)%mata) +! + CALL closef(fid) + END SUBROUTINE h5file +!+++ +END PROGRAM main diff --git a/multigrid/src/test_relax2d.f90 b/multigrid/src/test_relax2d.f90 new file mode 100644 index 0000000..09583c5 --- /dev/null +++ b/multigrid/src/test_relax2d.f90 @@ -0,0 +1,334 @@ +!> +!> @file test_relax2d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test 2d direcxt solve and relaxation methods +! + USE multigrid + USE csr + IMPLICIT NONE + INCLUDE 'mpif.h' +! + DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) + INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha + DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0 + INTEGER :: levels=1, nits=1000 + CHARACTER(len=4) :: relax='jac ' + DOUBLE PRECISION :: omega=2.0d0/3.0d0 + DOUBLE PRECISION :: t0 +! + DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) + DOUBLE PRECISION :: dx, dy + INTEGER :: ix, iy +! + DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:), sol_relax(:,:) + DOUBLE PRECISION, POINTER :: sol_direct_1d(:), sol_relax_1d(:) + DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: resid(:), errdisc(:) +! + INTEGER :: ierr, me + INTEGER :: l, nterms + INTEGER :: its +! + TYPE(grid2d), ALLOCATABLE :: grids(:) +! + NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels, nits, relax +!-------------------------------------------------------------------------------- +! 1. Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Inputs +! + n = (/8, 8/) + nidbas=(/3,3/) + ngauss=(/2,2/) + alpha = (/0,0/) + kx=4 + ky=3 + sigma=10.0d0 + levels=2 + relax='jac' + nits=100 +! + IF(me.EQ.0) THEN + READ(*,newrun) + WRITE(*,newrun) + END IF + CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(relax, LEN(relax), MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) +! +! Adjust number of levels +! + levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) +! +! Create grids +! + dx = 1.0d0/REAL(n(1),8) + dy = 1.0d0/REAL(n(2),8) + ALLOCATE(x(0:n(1)), y(0:n(2))) + DO ix=0,n(1) + x(ix) = ix*dx + END DO + DO iy=0,n(2) + y(iy) = iy*dy + END DO +! + ALLOCATE(grids(levels)) + CALL create_grid(x, y, nidbas, ngauss, alpha, grids) + WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' + WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) +! +! Construct RHS and set BC only on the finest grid +! + CALL disrhs(grids(1)%spl, grids(1)%f, rhs) + CALL ibcrhs(grids(1), grids(1)%f) +!!$ CALL printmat('** RHS **', grids(1)%f) +! +! Build FE matrices and set BC +! + nterms = 3 + DO l=1,levels + CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) + CALL ibcmat(grids(l), grids(l)%mata) + CALL to_mat(grids(l)%mata) + END DO +!-------------------------------------------------------------------------------- +! 1. Direct solution (at the finest grid, l=1) +! + WRITE(*,'(//a)') 'Direct solution for the finest grid problem' + ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), & + & source=grids(1)%f) + sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct + PRINT*, 'shape of sol_direct', SHAPE(sol_direct) + PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d) +! + t0 = mpi_wtime() + CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) + WRITE(*,'(a,1pe12.3)') 'Fact. + solve time (s) =', mpi_wtime()-t0 +! + sol_direct = grids(1)%f + t0 = mpi_wtime() + CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) + WRITE(*,'(a,1pe12.3)') 'Solve time (s) =', mpi_wtime()-t0 +! + ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) + ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) + CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & + & [0,0], sol_direct) + sol_anal_grid = sol(grids(1)%x, grids(1)%y) + WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', & + & disc_err(grids(1)%spl, sol_direct, sol), & + & residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d) +!-------------------------------------------------------------------------------- +! 2. Relaxation (at the finest grid, l=1) +! + ALLOCATE(errdisc(0:nits)) + ALLOCATE(resid(0:nits)) + ALLOCATE(sol_relax(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) + sol_relax_1d(1:SIZE(grids(1)%v1d)) => sol_relax +! + sol_relax_1d = 0.0d0 + errdisc(0) = disc_err(grids(1)%spl, sol_relax, sol) + resid(0) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d) + t0 = mpi_wtime() + DO its=1,nits + SELECT CASE (TRIM(relax)) + CASE('jac') + CALL jacobi(grids(1)%mata, omega, 1, sol_relax_1d, grids(1)%f1d) + CASE('gs') + CALL gs(grids(1)%mata, 1, sol_relax_1d, grids(1)%f1d) + END SELECT + errdisc(its) = disc_err(grids(1)%spl, sol_relax, sol) + resid(its) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d) + END DO + WRITE(*,'(a,1pe12.3)') 'Iterative solve time (s/iteration) =', (mpi_wtime()-t0)/REAL(nits,8) +! + WRITE(*,'(/a4,3a12)') 'its', 'residue', 'disc. err' + WRITE(*,'(i4,3(1pe12.3))') 0, resid(0), errdisc(0) + WRITE(*,'((i4,4(1pe12.3)))') (its, resid(its), errdisc(its), & + & resid(its)/resid(its-1), & + & errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10)) +!-------------------------------------------------------------------------------- +! 9. Epilogue +! +! Creata HDF5 file +! + IF(me.EQ.0) CALL h5file +! + CALL mpi_finalize(ierr) +!-------------------------------------------------------------------------------- +CONTAINS +!+++ + FUNCTION rhs(x, y) +! +! Return problem RHS +! + DOUBLE PRECISION, INTENT(in) :: x, y + DOUBLE PRECISION :: rhs + rhs = SIN(PI*kx*x)*SIN(PI*ky*y) + END FUNCTION rhs +!+++ + FUNCTION sol(x, y) +! +! Return exact problem solution +! + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: sol(SIZE(x),SIZE(y)) + DOUBLE PRECISION :: c + INTEGER :: j + DO j=1,SIZE(y) + c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2) + sol(:,j) = c * SIN(PI*kx*x(:)) + END DO + END FUNCTION sol +!+++ + SUBROUTINE coefeq(x, y, idt, idw, c) +! +! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) +! + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! + c(1) = 1.0d0 + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.0d0 + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 +! + c(3) = sigma + idt(3,1) = 0 + idt(3,2) = 0 + idw(3,1) = 0 + idw(3,2) = 0 + + END SUBROUTINE coefeq +!+++ + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_relax2d.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', n(1)) + CALL attach(fid, '/', 'NY', n(2)) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'KX', kx) + CALL attach(fid, '/', 'KY', ky) + CALL attach(fid, '/', 'SIGMA', sigma) + CALL attach(fid, '/', 'ALPHA1', alpha(1)) + CALL attach(fid, '/', 'ALPHA2', alpha(2)) + CALL attach(fid, '/', 'LEVELS', levels) + CALL attach(fid, '/', 'RELAX', relax) + CALL attach(fid, '/', 'NITS', nits) + CALL creatg(fid, '/mglevels') + DO l=1,levels + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL creatg(fid, TRIM(dsname)) + CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) + IF(l.GT.1) THEN + CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) + CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) + END IF + CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) + CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) + CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) + CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) + CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) + CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) + END DO +! +! Solutions at finest grid +! + CALL creatg(fid, '/solutions') + CALL putarr(fid, '/solutions/xg', grids(1)%x) + CALL putarr(fid, '/solutions/yg', grids(1)%y) + CALL putarr(fid, '/solutions/direct', sol_direct_grid) + CALL putarr(fid, '/solutions/anal', sol_anal_grid) +! + CALL creatg(fid, '/relaxation') + CALL putarr(fid, '/relaxation/errdisc', errdisc) + CALL putarr(fid, '/relaxation/resid', resid) +! + IF(ALLOCATED(grids(1)%mata%mumps)) THEN + CALL myputmat(fid, '/MUMPS', grids(1)%mata%mumps) + END IF +! + CALL closef(fid) + END SUBROUTINE h5file +!+++ + SUBROUTINE myputmat(fid, label, mat, str) +! +! Write matrix to hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(mumps_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str + CHARACTER(len=128) :: mumps_grp +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'NNZ', mat%nnz) + CALL attach(fid, label, 'NLSYM', mat%nlsym) + CALL attach(fid, label, 'NLPOS', mat%nlpos) + CALL putarr(fid, TRIM(label)//'/irow', mat%irow) + CALL putarr(fid, TRIM(label)//'/cols', mat%mumps_par%JCN_loc) + CALL putarr(fid, TRIM(label)//'/val', mat%mumps_par%A_loc) +! + mumps_grp = TRIM(label)//'/mumps_par' + CALL creatg(fid, mumps_grp) + CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR) + CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM) + CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN_loc) +! + END SUBROUTINE myputmat +END PROGRAM diff --git a/multigrid/src/test_relax2d_cyl.f90 b/multigrid/src/test_relax2d_cyl.f90 new file mode 100644 index 0000000..c02b199 --- /dev/null +++ b/multigrid/src/test_relax2d_cyl.f90 @@ -0,0 +1,369 @@ +!> +!> @file test_relax2d_cyl.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test 2d direcxt solve and relaxation methods +! Cylindrical case +! + USE multigrid + USE csr + IMPLICIT NONE + INCLUDE 'mpif.h' +! + DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) + INTEGER, DIMENSION(2) :: n, nidbas, ngauss + INTEGER :: modem=22, modep=10 + INTEGER :: levels=1, nits=1000 + CHARACTER(len=4) :: relax='jac ', prb='poly' + DOUBLE PRECISION :: omega=2.0d0/3.0d0 + LOGICAL :: nluniq=.TRUE. + LOGICAL :: nlfixed=.FALSE. + DOUBLE PRECISION :: t0 + DOUBLE PRECISION :: resid_direct, errdisc_direct +! + DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) + DOUBLE PRECISION :: dx, dy + INTEGER :: ix, iy +! + DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:), sol_relax(:,:) + DOUBLE PRECISION, POINTER :: sol_direct_1d(:), sol_relax_1d(:) + DOUBLE PRECISION, ALLOCATABLE :: sol_direct_orig(:,:), sol_relax_orig(:,:) + DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: sol_relax_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: resid(:), errdisc(:) +! + INTEGER :: ierr, me + INTEGER :: l, nterms, j + INTEGER :: its +! + TYPE(grid2d), ALLOCATABLE :: grids(:) +! + NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, omega, nits, & + & relax, prb, nlfixed, nluniq +!-------------------------------------------------------------------------------- +! 1. Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Inputs +! + n = (/8, 8/) + nidbas=(/3,3/) + ngauss=(/2,2/) + modem = 22 + modep = 10 + levels=2 + relax='jac' + prb='poly' + nits=100 + nluniq = .TRUE. +! + IF(me.EQ.0) THEN + READ(*,newrun) + WRITE(*,newrun) + END IF + CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(relax, LEN(relax), MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) +! +! Adjust number of levels +! + levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) +! +! Create grids +! + dx = 1.0d0/REAL(n(1),8) + dy = 2.0d0*pi/REAL(n(2),8) + ALLOCATE(x(0:n(1)), y(0:n(2))) + DO ix=0,n(1) + x(ix) = ix*dx + END DO + DO iy=0,n(2) + y(iy) = iy*dy + END DO +! + ALLOCATE(grids(levels)) + CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], & + & debug_in=.FALSE.) + WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' + WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) +! +! Construct RHS and set BC only on the finest grid +! + CALL disrhs(grids(1)%spl, grids(1)%f, rhs) + CALL ibcrhs(grids(1), grids(1)%f, nluniq_in=nluniq) +!!$ CALL printmat('** RHS **', grids(1)%f) +! +! Build FE matrices and set BC +! + nterms = 2 + DO l=1,levels + CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) + CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq) + CALL to_mat(grids(l)%mata) + END DO +!-------------------------------------------------------------------------------- +! 1. Direct solution (at the finest grid, l=1) +! + WRITE(*,'(//a)') 'Direct solution for the finest grid problem' +! + ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), & + & source=grids(1)%f) + sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct +! + ALLOCATE(sol_direct_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) +! + PRINT*, 'shape of sol_direct', SHAPE(sol_direct) + PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d) +! + t0 = mpi_wtime() + CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) + WRITE(*,'(a,1pe12.3)') 'Fact. + solve time (s) =', mpi_wtime()-t0 +! + sol_direct = grids(1)%f + t0 = mpi_wtime() + CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.) + resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d) +! + sol_direct_orig = sol_direct + CALL back_transf(grids(1), sol_direct_orig, nluniq_in=nluniq) + errdisc_direct = disc_err(grids(1)%spl, sol_direct_orig, sol) +! + WRITE(*,'(a,1pe12.3)') 'Solve time (s) =', mpi_wtime()-t0 +! + ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) + ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) +! + CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & + & [0,0], sol_direct_orig) +! + sol_anal_grid = sol(grids(1)%x, grids(1)%y) + WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', & + & errdisc_direct, resid_direct + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid) +!-------------------------------------------------------------------------------- +! 2. Relaxation (at the finest grid, l=1) +! + ALLOCATE(errdisc(0:nits)) + ALLOCATE(resid(0:nits)) + ALLOCATE(sol_relax(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) + ALLOCATE(sol_relax_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2))) + sol_relax_1d(1:SIZE(grids(1)%v1d)) => sol_relax +! +! Initial guess +! + IF(nlfixed) THEN + sol_relax = sol_direct ! Test fixed point\ + ELSE + sol_relax = 0.0d0 + END IF +! + sol_relax_orig = sol_relax + CALL back_transf(grids(1), sol_relax_orig, nluniq_in=nluniq) + errdisc(0) = disc_err(grids(1)%spl, sol_relax_orig, sol) + resid(0) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d) +! + t0 = mpi_wtime() + DO its=1,nits + SELECT CASE (TRIM(relax)) + CASE('jac') + CALL jacobi(grids(1)%mata, omega, 1, sol_relax_1d, grids(1)%f1d) + CASE('gs') + CALL gs(grids(1)%mata, 1, sol_relax_1d, grids(1)%f1d) + END SELECT + resid(its) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d) +! + sol_relax_orig = sol_relax + CALL back_transf(grids(1), sol_relax_orig, nluniq_in=nluniq) +! + errdisc(its) = disc_err(grids(1)%spl, sol_relax_orig, sol) + END DO + WRITE(*,'(a,1pe12.3)') 'Iterative solve time (s/iteration) =', (mpi_wtime()-t0)/REAL(nits,8) +! + ALLOCATE(sol_relax_grid(0:n(1),0:n(2))) + CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_relax_grid, & + & [0,0], sol_relax_orig) +! + WRITE(*,'(/a4,3a12)') 'its', 'residue', 'disc. err' + WRITE(*,'(i4,3(1pe12.3))') 0, resid(0), errdisc(0) + WRITE(*,'((i4,4(1pe12.3)))') (its, resid(its), errdisc(its), & + & resid(its)/resid(its-1), & + & errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10)) +!-------------------------------------------------------------------------------- +! 9. Epilogue +! +! Creata HDF5 file +! + IF(me.EQ.0) CALL h5file +! + CALL mpi_finalize(ierr) +!-------------------------------------------------------------------------------- +CONTAINS +!+++ + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 +!+++ + FUNCTION rhs(r, theta) +! +! Return problem RHS +! + USE math_util, ONLY : root_bessj + DOUBLE PRECISION, INTENT(in) :: r, theta + DOUBLE PRECISION :: rhs + DOUBLE PRECISION :: nump +! + SELECT CASE(TRIM(prb)) + CASE('poly') + rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta) + CASE('bess') + nump = root_bessj(modem, modep) + rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta) + END SELECT + END FUNCTION rhs +!+++ + FUNCTION sol(r, theta) +! +! Return exact problem solution +! + USE math_util, ONLY : root_bessj + DOUBLE PRECISION, INTENT(in) :: r(:), theta(:) + DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta)) + DOUBLE PRECISION :: nump + INTEGER :: j +! + SELECT CASE(TRIM(prb)) + CASE('poly') + DO j=1,SIZE(theta) + sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j)) + END DO + CASE('bess') + nump = root_bessj(modem, modep) + DO j=1,SIZE(theta) + sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j)) + END DO + END SELECT + END FUNCTION sol +!+++ + SUBROUTINE coefeq(r, theta, idt, idw, c) +! +! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) +! + DOUBLE PRECISION, INTENT(in) :: r, theta + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! + c(1) = r + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.0d0/r + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq +!+++ + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_relax2d_cyl.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', n(1)) + CALL attach(fid, '/', 'NY', n(2)) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'MODEM', modem) + CALL attach(fid, '/', 'MODEP', modep) + CALL attach(fid, '/', 'LEVELS', levels) + CALL attach(fid, '/', 'RELAX', relax) + CALL attach(fid, '/', 'NITS', nits) + CALL attach(fid, '/', 'OMEGA', omega) + CALL creatg(fid, '/mglevels') + DO l=1,levels + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL creatg(fid, TRIM(dsname)) + CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) + IF(l.GT.1) THEN + CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) + CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) + END IF + CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) + CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) + CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) + CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) + CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) + CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) + END DO +! +! Solutions at finest grid +! + CALL creatg(fid, '/solutions') + CALL putarr(fid, '/solutions/xg', grids(1)%x) + CALL putarr(fid, '/solutions/yg', grids(1)%y) + CALL putarr(fid, '/solutions/direct', sol_direct_grid) + CALL putarr(fid, '/solutions/relax', sol_relax_grid) + CALL putarr(fid, '/solutions/anal', sol_anal_grid) +! + CALL creatg(fid, '/relaxation') + CALL putarr(fid, '/relaxation/errdisc', errdisc) + CALL putarr(fid, '/relaxation/resid', resid) +! + CALL closef(fid) + END SUBROUTINE h5file +!+++ +END PROGRAM diff --git a/multigrid/src/test_stencil.f90 b/multigrid/src/test_stencil.f90 new file mode 100644 index 0000000..e1a0dc0 --- /dev/null +++ b/multigrid/src/test_stencil.f90 @@ -0,0 +1,238 @@ +!> +!> @file test_stencil.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE mod + USE iso_fortran_env, ONLY : real64 + IMPLICIT NONE +! + INTEGER, PARAMETER :: rkind = real64 + LOGICAL, PARAMETER :: nldebug=.FALSE. + REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0) +CONTAINS +END MODULE mod +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +PROGRAM main + USE mpi + USE pputils2, ONLY : dist1d, exchange, norm2_vec=>ppnorm2 + USE stencil, ONLY : stencil_2d, init, laplacian, vmx, putmat + USE mod + IMPLICIT NONE +! + INTEGER, PARAMETER :: ndims=2 +! + INTEGER :: me, neighs(4), npes, ierr + INTEGER, DIMENSION(ndims) :: dims=[0,0] + INTEGER, DIMENSION(ndims) :: coords, comm1d + LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] + LOGICAL :: reorder =.FALSE. + INTEGER :: comm_cart +! + INTEGER :: nx=4, ny=4 ! Number of intervals + INTEGER, DIMENSION(ndims) :: e, s, lb, ub, npt_glob, npt_loc +! + REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:) + REAL(rkind), ALLOCATABLE :: arr(:,:), fexact(:,:) + REAL(rkind), ALLOCATABLE :: barr1(:,:), barr2(:,:), barr3(:,:) + REAL(rkind) :: dx, dy + REAL(rkind) :: err + INTEGER, DIMENSION(5,2) :: id ! 5-point stencil + INTEGER :: npoints + TYPE(stencil_2d) :: mat + INTEGER :: i, j +! + NAMELIST /in/ nx, ny +!================================================================================ +! 1.0 Prologue +! +! 2D process grid + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_dims_create(npes, ndims, dims, ierr) + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& + & ierr) +! + CALL mpi_comm_rank(comm_cart, me, ierr) + CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) + CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr) + CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr) +! + CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) + CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) +! +! Read problem inputs + IF(me.EQ.0) THEN + READ(*,in) + WRITE(*,in) + END IF + CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr) + CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr) +!================================================================================ +! 2.0 2d Grid construction +! +! Partition 2D grid + npt_glob(1) = nx+1 + npt_glob(2) = ny+1 + DO i=1,ndims + CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) + e(i) = s(i) + npt_loc(i) - 1 + END DO + WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', & + & (coords(i),s(i),e(i),i=1,ndims) +! +! Global mesh + dx = 1.0d0/REAL(nx) + dy = 1.0d0/REAL(ny) + ALLOCATE(xgrid(0:nx)) + ALLOCATE(ygrid(0:ny)) + xgrid = [ (i*dx, i=0,nx) ] + ygrid = [ (i*dy, i=0,ny) ] +!================================================================================ +! 3.0 FD Laplacian +! + id=RESHAPE([ 0, -1, 0, 1, 0, & + 0, 0,-1, 0, 1], & + [5,2]) + npoints = 5 + CALL init(s, e, id, .FALSE., mat, comm_cart) +! + CALL laplacian(dx, dy, mat) +!================================================================================ +! 4.0 Check matrice-vector product +! +! Local arrays with ghost cells + lb = mat%s-1 + ub = mat%e+1 + ALLOCATE(arr(lb(1):ub(1),lb(2):ub(2))) + ALLOCATE(fexact(lb(1):ub(1),lb(2):ub(2))) + ALLOCATE(barr1(lb(1):ub(1),lb(2):ub(2))) + ALLOCATE(barr2(lb(1):ub(1),lb(2):ub(2))) + ALLOCATE(barr3(lb(1):ub(1),lb(2):ub(2))) +! +! Constant vector => Laplacian = 0 + barr1 = 0 + arr = 1.0 + barr1 = vmx(mat,arr) + IF(mat%s(1).EQ.0) barr1(0,:) = 0.0 ! discard boundary values + IF(mat%e(1).EQ.nx) barr1(nx,:) = 0.0 + IF(mat%s(2).EQ.0) barr1(:,0) = 0.0 + IF(mat%e(2).EQ.ny) barr1(:,ny) = 0.0 + err = norm2_vec(barr1,comm_cart,root=0,garea=[1,1]) + IF(me.EQ.0) THEN + WRITE(*,'(/a,1pe12.3)') 'Constant vector: ||B1|| =', err + END IF +! +! Bilinear vector => Laplacian = 0 + arr =0.0d0 + barr2=0.0d0 + DO j=mat%s(2),mat%e(2) + DO i=mat%s(1),mat%e(1) + arr(i,j) = xgrid(i)*ygrid(j) + END DO + END DO + CALL exchange(comm_cart, arr) + barr2 = vmx(mat,arr) + IF(mat%s(1).EQ.0) barr2(0,:) = 0.0 ! discard boundary values + IF(mat%e(1).EQ.nx) barr2(nx,:) = 0.0 + IF(mat%s(2).EQ.0) barr2(:,0) = 0.0 + IF(mat%e(2).EQ.ny) barr2(:,ny) = 0.0 + err = norm2_vec(barr2, comm_cart,root=0,garea=[1,1]) + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3)') 'Bilinear vector: ||B2|| =', err + END IF +! +! Biquadratic vector => Laplacian = fexact + DO j=mat%s(2),mat%e(2) + DO i=mat%s(1),mat%e(1) + arr(i,j) = (xgrid(i)*ygrid(j))**2/4.0d0 + fexact(i,j) = (xgrid(i)**2 + ygrid(j)**2)/2.0d0 + END DO + END DO + CALL exchange(comm_cart, arr) + CALL exchange(comm_cart, fexact) + barr3 = vmx(mat,arr) - fexact + IF(mat%s(1).EQ.0) barr3(0,:) = 0.0 ! discard boundary values + IF(mat%e(1).EQ.nx) barr3(nx,:) = 0.0 + IF(mat%s(2).EQ.0) barr3(:,0) = 0.0 + IF(mat%e(2).EQ.ny) barr3(:,ny) = 0.0 + err = norm2_vec(barr3,comm_cart,root=0,garea=[1,1]) + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3)') 'Biquadratic vector: ||B3|| =', err + END IF +!================================================================================ +! 9.0 Epilogue + CALL h5file + CALL MPI_FINALIZE(ierr) +CONTAINS + SUBROUTINE disp(str, arr) + CHARACTER(len=*), INTENT(in) :: str + REAL(rkind), INTENT(in) :: arr(:,:) + INTEGER :: j + WRITE(*,'(/a)') str + DO j=1,SIZE(arr,2) + WRITE(*,'(10f8.3)') arr(:,j) + END DO + END SUBROUTINE disp +! + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_stencil.h5' + INTEGER :: fid + CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart) + CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO + CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO + CALL putarrnd(fid, '/barr1', barr1,(/1,2/), garea=(/1,1/)) + CALL putarrnd(fid, '/barr2', barr2,(/1,2/), garea=(/1,1/)) + CALL putarrnd(fid, '/barr3', barr3,(/1,2/), garea=(/1,1/)) + CALL putmat(fid, '/MAT', mat) + CALL closef(fid) + END SUBROUTINE h5file +! + FUNCTION outerprod(x, y) RESULT(r) +! +! outer product +! + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: r(SIZE(x),SIZE(y)) + INTEGER :: i, j + DO j=1,SIZE(y) + DO i=1,SIZE(x) + r(i,j) = x(i)*y(j) + END DO + END DO + END FUNCTION outerprod +! + FUNCTION rhs(x,y) + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: rhs(SIZE(x),SIZE(y)) + rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y)) + END FUNCTION rhs +! + FUNCTION exact(x,y) + REAL(rkind), INTENT(in) :: x(:), y(:) + REAL(rkind) :: exact(SIZE(x),SIZE(y)) + exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y)) + END FUNCTION exact +END PROGRAM main diff --git a/multigrid/src/test_stencilg.f90 b/multigrid/src/test_stencilg.f90 new file mode 100644 index 0000000..0b920cc --- /dev/null +++ b/multigrid/src/test_stencilg.f90 @@ -0,0 +1,203 @@ +!> +!> @file test_stencilg.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE mod + USE iso_fortran_env, ONLY : rkind => real64 + IMPLICIT NONE +! + LOGICAL, PARAMETER :: nldebug=.FALSE. + REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0) +CONTAINS +END MODULE mod +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +PROGRAM main + USE mpi + USE pputils2, ONLY : dist1d + USE gvector, ONLY : gvector_2d, ASSIGNMENT(=), OPERATOR(-) + USE parmg, ONLY : exchange, norm_vec + USE stencil, ONLY : stencil_2d, init, laplacian, putmat, OPERATOR(*) + USE mod + IMPLICIT NONE +! + INTEGER, PARAMETER :: ndims=2 +! + INTEGER :: me, neighs(4), npes, ierr + INTEGER, DIMENSION(ndims) :: dims=[0,0] + INTEGER, DIMENSION(ndims) :: coords, comm1d + LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.] + LOGICAL :: reorder =.FALSE. + INTEGER :: comm_cart +! + INTEGER :: nx=4, ny=4 ! Number of intervals + INTEGER, DIMENSION(ndims) :: e, s, g, npt_glob, npt_loc +! + REAL(rkind), ALLOCATABLE :: xgrid(:), ygrid(:) + TYPE(gvector_2d) :: arr, fexact + TYPE(gvector_2d) :: barr1, barr2, barr3 + REAL(rkind) :: dx, dy + REAL(rkind) :: err + INTEGER, DIMENSION(5,2) :: id ! 5-point stencil + INTEGER :: npoints + TYPE(stencil_2d) :: mat + INTEGER :: i, j +! + NAMELIST /in/ nx, ny +!================================================================================ +! 1.0 Prologue +! +! 2D process grid + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_dims_create(npes, ndims, dims, ierr) + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,& + & ierr) +! + CALL mpi_comm_rank(comm_cart, me, ierr) + CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr) + CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr) + CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr) +! + CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr) + CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr) +! +! Read problem inputs + IF(me.EQ.0) THEN + READ(*,in) + WRITE(*,in) + END IF + CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr) + CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr) +!================================================================================ +! 2.0 2d Grid construction +! +! Partition 2D grid + npt_glob(1) = nx+1 + npt_glob(2) = ny+1 + DO i=1,ndims + CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i)) + e(i) = s(i) + npt_loc(i) - 1 + END DO + WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', & + & (coords(i),s(i),e(i),i=1,ndims) +! +! Global mesh + dx = 1.0d0/REAL(nx) + dy = 1.0d0/REAL(ny) + ALLOCATE(xgrid(0:nx)) + ALLOCATE(ygrid(0:ny)) + xgrid = [ (i*dx, i=0,nx) ] + ygrid = [ (i*dy, i=0,ny) ] +!================================================================================ +! 3.0 FD Laplacian +! + id=RESHAPE([ 0, -1, 0, 1, 0, & + 0, 0,-1, 0, 1], & + [5,2]) + npoints = 5 + CALL init(s, e, id, .FALSE., mat, comm_cart) +! + CALL laplacian(dx, dy, mat) +!================================================================================ +! 4.0 Check matrice-vector product +! +! Local arrays with ghost cells + g = [1,1] + arr = gvector_2d(s, e, g) + barr1 = gvector_2d(s, e, g) + barr2 = gvector_2d(s, e, g) + barr3 = gvector_2d(s, e, g) + fexact = gvector_2d(s, e, g) +! +! Constant vector => Laplacian = 0 + arr = 1.0d0 + CALL exchange(comm_cart, arr) + barr1 = mat*arr + IF(s(1).EQ.0) barr1%val(0,:) = 0.0 ! discard boundary values + IF(e(1).EQ.nx) barr1%val(nx,:) = 0.0 + IF(s(2).EQ.0) barr1%val(:,0) = 0.0 + IF(e(2).EQ.ny) barr1%val(:,ny) = 0.0 + err = norm_vec(barr1, comm_cart, root=0) + IF(me.EQ.0) THEN + WRITE(*,'(/a,1pe12.3)') 'Constant vector: ||B1|| =', err + END IF +! +! Bilinear vector => Laplacian = 0 + DO j=s(2),e(2) + DO i=s(1),e(1) + arr%val(i,j) = xgrid(i)*ygrid(j) + END DO + END DO + CALL exchange(comm_cart, arr) + barr2 = mat*arr + IF(s(1).EQ.0) barr2%val(0,:) = 0.0 ! discard boundary values + IF(e(1).EQ.nx) barr2%val(nx,:) = 0.0 + IF(s(2).EQ.0) barr2%val(:,0) = 0.0 + IF(e(2).EQ.ny) barr2%val(:,ny) = 0.0 + err = norm_vec(barr2, comm_cart, root=0) + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3)') 'Bilinear vector: ||B2|| =', err + END IF +! +! Biquadratic vector => Laplacian = fexact + DO j=s(2),e(2) + DO i=s(1),e(1) + arr%val(i,j) = (xgrid(i)*ygrid(j))**2/4.0d0 + fexact%val(i,j) = (xgrid(i)**2 + ygrid(j)**2)/2.0d0 + END DO + END DO + CALL exchange(comm_cart, arr) + CALL exchange(comm_cart, fexact) + barr3 = mat*arr - fexact + IF(s(1).EQ.0) barr3%val(0,:) = 0.0 ! discard boundary values + IF(e(1).EQ.nx) barr3%val(nx,:) = 0.0 + IF(s(2).EQ.0) barr3%val(:,0) = 0.0 + IF(e(2).EQ.ny) barr3%val(:,ny) = 0.0 + err = norm_vec(barr3, comm_cart, root=0) + IF(me.EQ.0) THEN + WRITE(*,'(a,1pe12.3)') 'Biquadratic vector: ||B3|| =', err + END IF +!================================================================================ +! 9.0 Epilogue + CALL h5file + CALL MPI_FINALIZE(ierr) +! +CONTAINS + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_stencilg.h5' + INTEGER :: fid + CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart) + CALL putarr(fid, '/xgrid', xgrid, ionode=0) ! only rank 0 does IO + CALL putarr(fid, '/ygrid', ygrid, ionode=0) ! only rank 0 does IO + CALL putarrnd(fid, '/arr', arr%val,(/1,2/), garea=g) + CALL putarrnd(fid, '/barr1', barr1%val,(/1,2/), garea=g) + CALL putarrnd(fid, '/barr2', barr2%val,(/1,2/), garea=g) + CALL putarrnd(fid, '/barr3', barr3%val,(/1,2/), garea=g) + CALL putmat(fid, '/MAT', mat) + CALL closef(fid) + END SUBROUTINE h5file +! +END PROGRAM main diff --git a/multigrid/src/test_transf2d.f90 b/multigrid/src/test_transf2d.f90 new file mode 100644 index 0000000..a126b7e --- /dev/null +++ b/multigrid/src/test_transf2d.f90 @@ -0,0 +1,301 @@ +!> +!> @file test_transf2d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test 2d multigrid +! + USE multigrid + USE csr + IMPLICIT NONE + INCLUDE 'mpif.h' +! + DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) + INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha + DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0 + INTEGER :: levels=1 + DOUBLE PRECISION :: omega=2.0d0/3.0d0 +! + DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) + DOUBLE PRECISION :: dx, dy + INTEGER :: ix, iy +! + DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: errdisc(:), resid(:) +! + DOUBLE PRECISION, ALLOCATABLE, TARGET :: fcoarse(:,:) + DOUBLE PRECISION, POINTER :: fcoarse_1d(:) + DOUBLE PRECISION, ALLOCATABLE, TARGET :: vfine(:,:) + DOUBLE PRECISION, POINTER :: vfine_1d(:) + DOUBLE PRECISION, ALLOCATABLE :: vfine_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: err_restrict(:), err_prolong(:), & + & disc_err_prolong(:) +! + INTEGER :: ierr, me + INTEGER :: l, nterms + INTEGER :: its +! + TYPE(grid2d), ALLOCATABLE :: grids(:) +! + NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels +!-------------------------------------------------------------------------------- +! 1. Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Inputs +! + n = (/8, 8/) + nidbas=(/3,3/) + ngauss=(/2,2/) + alpha = (/0,0/) + kx=4 + ky=3 + sigma=10.0d0 + levels=2 +! + IF(me.EQ.0) THEN + READ(*,newrun) + WRITE(*,newrun) + END IF + CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) +! +! Adjust number of levels +! + levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) +! +! Create grids +! + dx = 1.0d0/REAL(n(1),8) + dy = 1.0d0/REAL(n(2),8) + ALLOCATE(x(0:n(1)), y(0:n(2))) + DO ix=0,n(1) + x(ix) = ix*dx + END DO + DO iy=0,n(2) + y(iy) = iy*dy + END DO +! + ALLOCATE(grids(levels)) + CALL create_grid(x, y, nidbas, ngauss, alpha, grids) + WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry' + WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels) +! +! Build FE matrices and set BC +! + nterms = 3 + DO l=1,levels + CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) + CALL ibcmat(grids(l), grids(l)%mata) + CALL to_mat(grids(l)%mata) + END DO +! +! Set BC on grid transfer matrices +! + CALL ibc_transf(grids,1,3) + CALL ibc_transf(grids,2,3) +!-------------------------------------------------------------------------------- +! 1. Direct solutions +! + WRITE(*,'(/a)') 'Direct solutions for all levels ...' + ALLOCATE(errdisc(levels)) + ALLOCATE(resid(levels)) + WRITE(*,'(3a5,2a12)') 'l', 'nx', 'ny', 'err', 'resid' + DO l=1,levels + CALL disrhs(grids(l)%spl, grids(l)%f, rhs) + CALL ibcrhs(grids(l), grids(l)%f) + grids(l)%v = grids(l)%f + CALL direct_solve(grids(l), grids(l)%v1d, debug=.FALSE.) + errdisc(l) = disc_err(grids(l)%spl, grids(l)%v, sol) + resid(l) = residue(grids(l)%mata, grids(l)%f1d, grids(l)%v1d) + WRITE(*,'(3i5,2(1pe12.3))') l, grids(l)%n, Errdisc(l), resid(l) + END DO +! +! Grid values of direct solutions at the finest levels + ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) + ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) + CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & + & [0,0], grids(1)%v) + sol_anal_grid = sol(grids(1)%x, grids(1)%y) +!-------------------------------------------------------------------------------- +! 2. Test restrict and prolong +! + WRITE(*,'(/a)') 'Testing restrict and prolong...' + WRITE(*,'(3a5,3a12)') 'l', 'nx', 'ny', 'rhs', 'sol', 'disc_err' + ALLOCATE(err_restrict(2:levels)) + ALLOCATE(err_prolong(2:levels)) + ALLOCATE(disc_err_prolong(2:levels)) + ALLOCATE(vfine_grid(0:n(1),0:n(2))) + DO l=2,levels + ALLOCATE(fcoarse(SIZE(grids(l)%f,1),SIZE(grids(l)%f,2))) + fcoarse_1d(1:SIZE(grids(l)%f1d)) => fcoarse + ALLOCATE(vfine(SIZE(grids(l-1)%v,1),SIZE(grids(l-1)%v,2))) + vfine_1d(1:SIZE(grids(l-1)%v1d)) => vfine +! + fcoarse = restrict(grids(l)%matp, grids(l-1)%f) + err_restrict(l) = MAXVAL(ABS(fcoarse_1d-grids(l)%f1d)) +! + CALL direct_solve(grids(l), fcoarse_1d) + vfine = prolong(grids(l)%matp, fcoarse) + disc_err_prolong(l) = disc_err(grids(l-1)%spl, vfine, sol) + err_prolong(l) = MAXVAL(ABS(vfine_1d-grids(l-1)%v1d)) +! + IF(l.EQ.2) THEN ! Grid val on finest grid + CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, vfine_grid, & + & [0,0], vfine) + END IF +! + WRITE(*,'(3i5,3(1pe12.3))') l, grids(l)%n, err_restrict(l), err_prolong(l), & + & disc_err_prolong(l) + DEALLOCATE(fcoarse) + DEALLOCATE(vfine) + END DO +!-------------------------------------------------------------------------------- +! 9. Epilogue +! +! Creata HDF5 file +! + IF(me.EQ.0) CALL h5file +! + CALL mpi_finalize(ierr) +!-------------------------------------------------------------------------------- +CONTAINS +!+++ + FUNCTION rhs(x, y) +! +! Return problem RHS +! + DOUBLE PRECISION, INTENT(in) :: x, y + DOUBLE PRECISION :: rhs + rhs = SIN(PI*kx*x)*SIN(PI*ky*y) + END FUNCTION rhs +!+++ + FUNCTION sol(x, y) +! +! Return exact problem solution +! + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: sol(SIZE(x),SIZE(y)) + DOUBLE PRECISION :: c + INTEGER :: j + DO j=1,SIZE(y) + c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2) + sol(:,j) = c * SIN(PI*kx*x(:)) + END DO + END FUNCTION sol +!+++ + SUBROUTINE coefeq(x, y, idt, idw, c) +! +! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) +! + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! + c(1) = 1.0d0 + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.0d0 + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 +! + c(3) = sigma + idt(3,1) = 0 + idt(3,2) = 0 + idw(3,1) = 0 + idw(3,2) = 0 + + END SUBROUTINE coefeq +!+++ + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_transf2d.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', n(1)) + CALL attach(fid, '/', 'NY', n(2)) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'KX', kx) + CALL attach(fid, '/', 'KY', ky) + CALL attach(fid, '/', 'SIGMA', sigma) + CALL attach(fid, '/', 'ALPHA1', alpha(1)) + CALL attach(fid, '/', 'ALPHA2', alpha(2)) + CALL attach(fid, '/', 'LEVELS', levels) + CALL creatg(fid, '/mglevels') + DO l=1,levels + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL creatg(fid, TRIM(dsname)) + CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) + IF(l.GT.1) THEN + CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) + CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) + END IF + CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) + CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) + CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) + CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) + CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) + CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) + END DO +! +! Solutions at finest grid +! + CALL creatg(fid, '/solutions') + CALL putarr(fid, '/solutions/xg', grids(1)%x) + CALL putarr(fid, '/solutions/yg', grids(1)%y) + CALL putarr(fid, '/solutions/direct', sol_direct_grid) + CALL putarr(fid, '/solutions/anal', sol_anal_grid) + CALL putarr(fid, '/solutions/vfine', vfine_grid) +! +! Some errors +! + CALL creatg(fid, '/errors') + CALL putarr(fid, '/errors/errdisc', errdisc) + CALL putarr(fid, '/errors/resid', resid) + CALL putarr(fid, '/errors/restrict', err_restrict) + CALL putarr(fid, '/errors/prolong', err_prolong) + CALL putarr(fid, '/errors/disc_err_prolong', disc_err_prolong) +! + CALL closef(fid) + END SUBROUTINE h5file +!+++ +END PROGRAM diff --git a/multigrid/src/test_transf2d_cyl.f90 b/multigrid/src/test_transf2d_cyl.f90 new file mode 100644 index 0000000..66427e6 --- /dev/null +++ b/multigrid/src/test_transf2d_cyl.f90 @@ -0,0 +1,321 @@ +!> +!> @file test_transf2d_cyl.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Test 2d multigrid +! Cylindrical case +! + USE multigrid + USE csr + IMPLICIT NONE + INCLUDE 'mpif.h' +! + DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0) + INTEGER, DIMENSION(2) :: n, nidbas, ngauss + INTEGER :: modem=22, modep=10 + INTEGER :: levels=1 + CHARACTER(len=4) :: prb='poly' + LOGICAL :: nluniq=.TRUE. +! + DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) + DOUBLE PRECISION :: dx, dy + INTEGER :: ix, iy +! + DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: errdisc(:), resid(:) +! + DOUBLE PRECISION, ALLOCATABLE, TARGET :: fcoarse(:,:) + DOUBLE PRECISION, POINTER :: fcoarse_1d(:) + DOUBLE PRECISION, ALLOCATABLE, TARGET :: vfine(:,:) + DOUBLE PRECISION, POINTER :: vfine_1d(:) + DOUBLE PRECISION, ALLOCATABLE :: vfine_grid(:,:) + DOUBLE PRECISION, ALLOCATABLE :: err_restrict(:), err_prolong(:), & + & disc_err_prolong(:) +! + INTEGER :: ierr, me + INTEGER :: l, nterms + INTEGER :: its + INTEGER :: n2 +! + TYPE(grid2d), ALLOCATABLE :: grids(:) +! + NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, prb, nluniq +!-------------------------------------------------------------------------------- +! 1. Prologue +! + CALL mpi_init(ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Inputs +! + n = (/8, 8/) + nidbas=(/3,3/) + ngauss=(/2,2/) + modem = 22 + modep = 10 + prb='poly' + levels=2 + nluniq = .TRUE. +! + IF(me.EQ.0) THEN + READ(*,newrun) + WRITE(*,newrun) + END IF + CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) +! +! Adjust number of levels +! + levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2))) +! +! Create grids +! + dx = 1.0d0/REAL(n(1),8) + dy = 2.0d0*pi/REAL(n(2),8) + ALLOCATE(x(0:n(1)), y(0:n(2))) + DO ix=0,n(1) + x(ix) = ix*dx + END DO + DO iy=0,n(2) + y(iy) = iy*dy + END DO +! + ALLOCATE(grids(levels)) + CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], & + & debug_in=.FALSE.) + WRITE(*,'(5a6,a12)') 'l', 'nx', 'ny', 'rx', 'ry', 'shape of v' + WRITE(*,'(7i6)') (l, grids(l)%n, grids(l)%rank, SHAPE(grids(l)%v), l=1,levels) +! +! Build FE matrices and set BC +! + nterms = 2 + DO l=1,levels + CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms) + CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq) + CALL to_mat(grids(l)%mata) + END DO +! +! Set BC on grid transfer matrices +! + CALL ibc_transf(grids, 1, 2) ! Only right boundary on r (1st dim.) +!-------------------------------------------------------------------------------- +! 1. Direct solutions +! + WRITE(*,'(/a)') 'Direct solutions for all levels ...' + WRITE(*,'(3a5,2a12)') 'l', 'nx', 'ny', 'err', 'resid' +! + ALLOCATE(errdisc(levels)) + ALLOCATE(resid(levels)) +! + DO l=1,levels + CALL disrhs(grids(l)%spl, grids(l)%f, rhs) + CALL ibcrhs(grids(l), grids(l)%f, nluniq_in=nluniq) +! + grids(l)%v = grids(l)%f + CALL direct_solve(grids(l), grids(l)%v1d, debug=.FALSE.) +! + resid(l) = residue(grids(l)%mata, grids(l)%f1d, grids(l)%v1d) + CALL back_transf(grids(l), grids(l)%v, nluniq_in=nluniq) + errdisc(l) = disc_err(grids(l)%spl, grids(l)%v, sol) + WRITE(*,'(3i5,2(1pe12.3))') l, grids(l)%n, Errdisc(l), resid(l) + END DO +! +! Grid values of direct solutions at the finest levels + ALLOCATE(sol_direct_grid(0:n(1),0:n(2))) + ALLOCATE(sol_anal_grid(0:n(1),0:n(2))) + CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, & + & [0,0], grids(1)%v) + sol_anal_grid = sol(grids(1)%x, grids(1)%y) +!-------------------------------------------------------------------------------- +! 2. Test restrict and prolong +! + WRITE(*,'(/a)') 'Testing restrict and prolong...' + WRITE(*,'(3a5,3a12)') 'l', 'nx', 'ny', 'rhs', 'sol', 'disc_err' + ALLOCATE(err_restrict(2:levels)) + ALLOCATE(err_prolong(2:levels)) + ALLOCATE(disc_err_prolong(2:levels)) + ALLOCATE(vfine_grid(0:n(1),0:n(2))) + DO l=2,levels + ALLOCATE(fcoarse(SIZE(grids(l)%f,1),SIZE(grids(l)%f,2))) + fcoarse_1d(1:SIZE(grids(l)%f1d)) => fcoarse + ALLOCATE(vfine(SIZE(grids(l-1)%v,1),SIZE(grids(l-1)%v,2))) + vfine_1d(1:SIZE(grids(l-1)%v1d)) => vfine +! + fcoarse(:,:) = restrict_cyl(grids(l), grids(l-1)%f, nluniq) +! + err_restrict(l) = MAXVAL(ABS(fcoarse_1d-grids(l)%f1d)) +! + CALL direct_solve(grids(l), fcoarse_1d) +! + vfine(:,:) = prolong_cyl(grids(l), fcoarse, nluniq) +! + CALL back_transf(grids(l-1), vfine, nluniq_in=nluniq) + disc_err_prolong(l) = disc_err(grids(l-1)%spl, vfine, sol) + err_prolong(l) = MAXVAL(ABS(vfine_1d-grids(l-1)%v1d)) +! + IF(l.EQ.2) THEN ! Grid val on finest grid + CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, vfine_grid, & + & [0,0], vfine) + END IF +! + WRITE(*,'(3i5,3(1pe12.3))') l, grids(l)%n, err_restrict(l), err_prolong(l), & + & disc_err_prolong(l) + DEALLOCATE(fcoarse) + DEALLOCATE(vfine) + END DO +!-------------------------------------------------------------------------------- +! 9. Epilogue +! +! Creata HDF5 file +! + IF(me.EQ.0) CALL h5file +! + CALL mpi_finalize(ierr) +!-------------------------------------------------------------------------------- +CONTAINS +!+++ + FUNCTION rhs(r, theta) +! +! Return problem RHS +! + USE math_util, ONLY : root_bessj + DOUBLE PRECISION, INTENT(in) :: r, theta + DOUBLE PRECISION :: rhs + DOUBLE PRECISION :: nump +! + SELECT CASE(TRIM(prb)) + CASE('poly') + rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta) + CASE('bess') + nump = root_bessj(modem, modep) + rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta) + END SELECT + END FUNCTION rhs +!+++ + FUNCTION sol(r, theta) +! +! Return exact problem solution +! + USE math_util, ONLY : root_bessj + DOUBLE PRECISION, INTENT(in) :: r(:), theta(:) + DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta)) + DOUBLE PRECISION :: nump + INTEGER :: j +! + SELECT CASE(TRIM(prb)) + CASE('poly') + DO j=1,SIZE(theta) + sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j)) + END DO + CASE('bess') + nump = root_bessj(modem, modep) + DO j=1,SIZE(theta) + sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j)) + END DO + END SELECT + END FUNCTION sol +!+++ + SUBROUTINE coefeq(r, theta, idt, idw, c) +! +! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV) +! + DOUBLE PRECISION, INTENT(in) :: r, theta + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! + c(1) = r + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 +! + c(2) = 1.0d0/r + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq +!+++ + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='test_transf2d_cyl.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', n(1)) + CALL attach(fid, '/', 'NY', n(2)) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'MODEM', modem) + CALL attach(fid, '/', 'MODEP', modep) + CALL attach(fid, '/', 'LEVELS', levels) + CALL creatg(fid, '/mglevels') + DO l=1,levels + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL creatg(fid, TRIM(dsname)) + CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata) + IF(l.GT.1) THEN + CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1)) + CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2)) + END IF + CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x) + CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y) + CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f) + CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v) + CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d) + CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d) + END DO +! +! Solutions at finest grid +! + CALL creatg(fid, '/solutions') + CALL putarr(fid, '/solutions/xg', grids(1)%x) + CALL putarr(fid, '/solutions/yg', grids(1)%y) + CALL putarr(fid, '/solutions/direct', sol_direct_grid) + CALL putarr(fid, '/solutions/anal', sol_anal_grid) + CALL putarr(fid, '/solutions/vfine', vfine_grid) +! +! Some errors +! + CALL creatg(fid, '/errors') + CALL putarr(fid, '/errors/errdisc', errdisc) + CALL putarr(fid, '/errors/resid', resid) + CALL putarr(fid, '/errors/restrict', err_restrict) + CALL putarr(fid, '/errors/prolong', err_prolong) + CALL putarr(fid, '/errors/disc_err_prolong', disc_err_prolong) +! + CALL closef(fid) + END SUBROUTINE h5file +!+++ +END PROGRAM diff --git a/multigrid/src/transfer1d.f90 b/multigrid/src/transfer1d.f90 new file mode 100644 index 0000000..c245f09 --- /dev/null +++ b/multigrid/src/transfer1d.f90 @@ -0,0 +1,126 @@ +!> +!> @file transfer1d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +!-------------------------------------------------------------------------------- +!-------------------------------------------------------------------------------- +!-------------------------------------------------------------------------------- +PROGRAM main + USE multigrid + IMPLICIT NONE +! + INTEGER :: nx=8, nidbas=1, ngauss=4, alpha=0, modem=0 + DOUBLE PRECISION :: sigma=1.0d0 + LOGICAL :: nlper=.FALSE. + INTEGER :: j +! + TYPE(grid1d) :: gridx(2) + TYPE(gemat) :: prolong_mat, restrict_mat, coarse_mat + DOUBLE PRECISION, ALLOCATABLE :: arow(:), temp(:,:) +! + NAMELIST /newrun/ nx, nidbas, ngauss, sigma, alpha, modem, nlper +!-------------------------------------------------------------------------------- + READ(*,newrun) + WRITE(*,newrun) +! +! Set up fine and coarse grids +! + CALL create_grid(nx, nidbas, ngauss, alpha, gridx, period=nlper) + CALL printmat('** Prolongation matrix **', gridx(2)%transf) +! +! Restriction matrix = transpose of prolongation matrix +! + CALL mcopy(gridx(2)%transf, prolong_mat) + CALL init(prolong_mat%mrows, 1, restrict_mat, mrows=prolong_mat%ncols) + restrict_mat%val = TRANSPOSE(prolong_mat%val) +! +! Compute femat on fine and coarse grids +! + IF(nlper) THEN + CALL femat(gridx(1)%spl, gridx(1)%matap, coefeq) + CALL printmat('** FE matrix on fine mesh **', gridx(1)%matap) + CALL femat(gridx(2)%spl, gridx(2)%matap, coefeq) + CALL printmat('** FE matrix on coarse mesh **', gridx(2)%matap) + ELSE + CALL femat(gridx(1)%spl, gridx(1)%mata, coefeq) + CALL printmat('** FE matrix on fine mesh **', gridx(1)%mata) + CALL femat(gridx(2)%spl, gridx(2)%mata, coefeq) + CALL printmat('** FE matrix on coarse mesh **', gridx(2)%mata) + END IF +! +! Compute coarse FE matrix using transfer matrix +! + IF(nlper) THEN + CALL init(gridx(2)%matap%rank, 1, coarse_mat) + ALLOCATE(temp(gridx(1)%matap%rank,gridx(2)%matap%rank)) + DO j=1,gridx(2)%matap%rank + temp(:,j) = vmx(gridx(1)%matap,prolong_mat%val(:,j)) + END DO + coarse_mat%val = vmx(restrict_mat,temp) + DEALLOCATE(temp) + ELSE + CALL init(gridx(2)%mata%rank, 1, coarse_mat) + coarse_mat%val = vmx(restrict_mat,vmx(gridx(1)%mata,prolong_mat%val)) + END IF + CALL printmat('** Coarse FE matrix using transfer operators **', coarse_mat) +! +! Compute the diff of Ac - R*Af*P +! + IF(nlper) THEN + coarse_mat%val = coarse_mat%val - gridx(2)%matap%val + ELSE + ALLOCATE(arow(gridx(2)%mata%rank)) + DO j=1,gridx(2)%mata%rank + CALL getcol(gridx(2)%mata, j, arow) + coarse_mat%val(:,j) = coarse_mat%val(:,j)-arow(:) + END DO + DEALLOCATE(arow) + END IF + WRITE(*,'(a,1pe12.3)') 'Diff =', MAXVAL(ABS(coarse_mat%val)) +CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + SELECT CASE (alpha) + CASE(0) ! Cartesian geometry + c(1) = 1.0d0 + idt(1) = 1 + idw(1) = 1 + c(2) = sigma + idt(2) = 0 + idw(2) = 0 + CASE(1) + c(1) = x + idt(1) = 1 + idw(1) = 1 + c(2) = modem**2/x + idt(2) = 0 + idw(2) = 0 + CASE default + WRITE(*,'(a,i0,a)') 'COEFEQ: alpha ', alpha, ' not defined!' + END SELECT + END SUBROUTINE coefeq +!-------------------------------------------------------------------------------- +END PROGRAM main diff --git a/multigrid/src/transfer1d_col.f90 b/multigrid/src/transfer1d_col.f90 new file mode 100644 index 0000000..d0724d5 --- /dev/null +++ b/multigrid/src/transfer1d_col.f90 @@ -0,0 +1,53 @@ +!> +!> @file transfer1d_col.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Obtain grid transfer by collocation +! + USE multigrid + IMPLICIT NONE +! + INTEGER :: nx=8, nidbas=1 + LOGICAL :: nlper=.TRUE. +! + TYPE(grid1d) :: gridx(2) + TYPE(gemat) :: pmat +! + NAMELIST /newrun/ nx, nidbas, nlper +!-------------------------------------------------------------------------------- + READ(*,newrun) + WRITE(*,newrun) +! + CALL create_grid(nx, nidbas, 1, 0, gridx, period=nlper) + CALL printmat('** Prolongation matrix (using mass matrix) **', gridx(2)%transf) +! + CALL calc_pmat(gridx(1), gridx(2), pmat, .TRUE.) + CALL printmat('** Prolongation matrix (by collocation) **', pmat) +! + WRITE(*,'(/a,1pe12.3)') 'Max diff =', MAXVAL(ABS(pmat%val-gridx(2)%transf%val)) +! +END PROGRAM main + diff --git a/multigrid/src/two_grid.f90 b/multigrid/src/two_grid.f90 new file mode 100644 index 0000000..db513a8 --- /dev/null +++ b/multigrid/src/two_grid.f90 @@ -0,0 +1,189 @@ +!> +!> @file two_grid.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Check some properties of grid transfer +! + USE multigrid + USE math_util, ONLY : root_bessj + IMPLICIT NONE +! + INTEGER :: nx=8, nidbas=1, ngauss=2, alpha=0 + INTEGER :: modem=22, modep=10 + INTEGER :: levels=2 + INTEGER :: l, nrank + DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0) + DOUBLE PRECISION, ALLOCATABLE :: v_prolong(:) +! + TYPE(grid1d) :: gridx(2) +! + NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, alpha +!-------------------------------------------------------------------------------- +! 1. Prologue +! Inputs +! + READ(*,newrun) + WRITE(*,newrun) +! +! Create grids +! + CALL create_grid(nx, nidbas, ngauss, alpha, gridx) + WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels) +! +! Create FE matrice and set BC u(0)=u(1)=0 +! + DO l=1,levels + CALL femat(gridx(l)%spl, gridx(l)%mata, coefeq) +! +! Left Dirichlet BC (only for Cartesian geometry) + IF(alpha .EQ. 0) THEN + CALL ibcmat(1, gridx(l)%mata) + END IF +! +! Right Dirichlet BC + CALL ibcmat(gridx(l)%mata%rank, gridx(l)%mata) +! +! BC on grid transfer operator + IF(l.GT.1) THEN + WHERE( ABS(gridx(l)%transf%val) < 1.d-8) gridx(l)%transf%val=0.0d0 + IF(alpha .EQ. 0) gridx(l)%transf%val(2:,1)=0.0d0 + gridx(l)%transf%val(1:gridx(l-1)%rank-1,gridx(l)%rank)=0.0d0 + END IF + END DO +! +! Construct RHS and set BC only on the finest grid +! + nrank = gridx(1)%rank + CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs) +! +! Left Dirichlet BC (only for Cartesian geometry) + IF(alpha .EQ. 0) THEN + gridx(1)%f(1) = 0.0d0 + END IF +! +! Right Dirichlet BC + gridx(1)%f(nrank) = 0.0d0 +! +! RHS on coarse grid by restriction +! + gridx(2)%f = restrict(gridx(2)%transf,gridx(1)%f) +!-------------------------------------------------------------------------------- +! 2. Direct solutions +! + DO l=1,levels + CALL direct_solve(gridx(l), gridx(l)%v) + WRITE(*,'(a,i3/(10(1pe12.3)))') 'Sol at level', l, gridx(l)%v + END DO +! +! Prolongation of coarse solution +! + ALLOCATE(v_prolong(SIZE(gridx(1)%v))) +! + v_prolong = prolong(gridx(2)%transf, gridx(2)%v) + WRITE(*,'(a,i3/(10(1pe12.3)))') 'Prolong. sol.', l, v_prolong + WRITE(*,'(a,1pe12.3)') 'Error ||V_prolong-V_fine||', normf(gridx(1)%matm, v_prolong-gridx(1)%v) +!-------------------------------------------------------------------------------- +! 9. Epilogue +! +! Creata HDF5 file +! + CALL h5file +!-------------------------------------------------------------------------------- +CONTAINS + SUBROUTINE h5file + USE futils + CHARACTER(len=128) :: file='two_grid.h5' + INTEGER :: fid + INTEGER :: l + CHARACTER(len=64) :: dsname + CALL creatf(file, fid, real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NIDBAS', nidbas) + CALL attach(fid, '/', 'SIGMA', sigma) + CALL attach(fid, '/', 'KMODE', kmode) + CALL attach(fid, '/', 'ALPHA', alpha) + CALL attach(fid, '/', 'LEVELS', levels) + CALL creatg(fid, '/mglevels') + DO l=1,levels + WRITE(dsname,'("/mglevels/level.",i2.2)') l + CALL creatg(fid, TRIM(dsname)) + CALL putmat(fid, TRIM(dsname)//'/mata', gridx(l)%mata) + IF(l.GT.1) THEN + CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val) + CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows) + CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols) + END IF + CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f) + CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v) + END DO + CALL putarr(fid, '/v_prolong', v_prolong) + END SUBROUTINE h5file + FUNCTION rhs(x) + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION :: rhs + DOUBLE PRECISION :: nump + SELECT CASE (alpha) + CASE(0) ! Cartesian geometry + rhs = SIN(pi*kmode*x) + CASE(1) ! Cylindrical + nump = root_bessj(modem, modep) + rhs = x * nump**2 * bessel_jn(modem, nump*x) + END SELECT + END FUNCTION rhs + FUNCTION sol(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: sol(SIZE(x)) + DOUBLE PRECISION :: nump + SELECT CASE (alpha) + CASE(0) ! Cartesian geometry + sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:)) + CASE(1) ! Cylindrical + nump = root_bessj(modem, modep) + sol(:) = bessel_jn(modem, nump*x(:)) + END SELECT + END FUNCTION sol + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + SELECT CASE (alpha) + CASE(0) ! Cartesian geometry + c(1) = 1.0d0 + idt(1) = 1 + idw(1) = 1 + c(2) = sigma + idt(2) = 0 + idw(2) = 0 + CASE(1) ! Cylindrical + c(1) = x + idt(1) = 1 + idw(1) = 1 + c(2) = REAL(modem,8)**2/x + idt(2) = 0 + idw(2) = 0 + END SELECT + END SUBROUTINE coefeq +END PROGRAM main diff --git a/multigrid/wk/CMakeLists.txt b/multigrid/wk/CMakeLists.txt new file mode 100644 index 0000000..b784517 --- /dev/null +++ b/multigrid/wk/CMakeLists.txt @@ -0,0 +1,53 @@ +/** + * @file CMakeLists.txt + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Trach-Minh Tran + */ +project(multigrid_wk) + +set(MG_TESTS + transfer1d + test_relax + test_mg + test_mgp + test_csr + two_grid + test_mg2d + test_relax2d + test_transf2d + transfer1d_col + test_relax2d_cyl + test_transf2d_cyl + test_mg2d_cyl + poisson_fd +) + +set(RUNTESTS "${CMAKE_CURRENT_SOURCE_DIR}/runtest.sh") +set(BIN_DIR "${multigrid_tests_BINARY_DIR}") +set(INPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + +foreach(prog ${MG_TESTS}) + add_test(${prog} ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 + ${RUNTESTS} ${BIN_DIR}/${prog} ${INPUT_DIR} + ) +endforeach() diff --git a/multigrid/wk/poisson_fd.in b/multigrid/wk/poisson_fd.in new file mode 100644 index 0000000..931e881 --- /dev/null +++ b/multigrid/wk/poisson_fd.in @@ -0,0 +1,28 @@ +¶meters + prb='dddd' + prb='nndd' + mat_type='cds' + nx=16, ny=64 + nx=72, ny=224, + nx=64, ny=256 + nx=256, ny=1024 + nx=1024,ny=4096 + nx=1536, ny=6144 + nx=512, ny=2048 + nx=32, ny=128 + nx=128, ny=512 + nz=1, + kx=4, ky=4, + Lx=100.d00, Ly=800.d00, + nldebug=f + nldirect=t, + icrosst=1, + beta=-1E-2 + levels=5 + nnu=1 + nu1= 3,1,2,3,4,5 + nu2= 3,1,2,3,4,5 + mu=1, nu0=1, + relax= 'jac', omega=0.9 + nits=15, atol=0., rtol=1.e-8 +/ diff --git a/multigrid/wk/ppoisson_fd.in b/multigrid/wk/ppoisson_fd.in new file mode 100644 index 0000000..731a35d --- /dev/null +++ b/multigrid/wk/ppoisson_fd.in @@ -0,0 +1,19 @@ +&in + nldebug=t + prb='nndd' + prb='dddd' + nx=32, ny=128 + nx=16, ny=64 + nx=512, ny=2048 + nx=1024,ny=4096 + nx=256, ny=1024, + nx=64, ny=256 + nx=128, ny=512 + kx=4, ky=4, + Lx=100.d00, Ly=800.d00, + icrosst=1, beta=-0.01, miome = 200.0, + levels=5, nu1=3, nu2=3, mu=1, nu0=1 + relax='jac', omega=0.9, + nits=20, rtol=1.e-8, atol=0., errtol=1.e-6 + direct_solve_nits=5, +/ diff --git a/multigrid/wk/run.sh b/multigrid/wk/run.sh new file mode 100644 index 0000000..152144a --- /dev/null +++ b/multigrid/wk/run.sh @@ -0,0 +1,59 @@ +# +# @file run.sh +# +# @brief +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Trach-Minh Tran +# +#!/bin/bash + +EXE=/home/ttran/bsplines/multigrid/src/poisson_mg +TMP=/misc/multigrid +[ -e $TMP ] || mkdir -p $TMP + +cat > in0 <. +# +# @authors +# (in alphabetical order) +# @author Trach-Minh Tran +# +#!/bin/sh + +progname=$1 +input_dir=$2 + +prog=$(basename ${progname}) +input_file=${input_dir}/${prog}.in + +${progname} < $input_file + +exit $? + diff --git a/multigrid/wk/test_csr.in b/multigrid/wk/test_csr.in new file mode 100644 index 0000000..fcbe78a --- /dev/null +++ b/multigrid/wk/test_csr.in @@ -0,0 +1,8 @@ +&newrun + nx=4 + nidbas=2, + sigma = 10.0d0, kmode=10, + modem=0, + alpha = 0, + nlper = f, +/ diff --git a/multigrid/wk/test_intergrid.in b/multigrid/wk/test_intergrid.in new file mode 100644 index 0000000..28db575 --- /dev/null +++ b/multigrid/wk/test_intergrid.in @@ -0,0 +1,9 @@ +¶meters + prb='nndd' + prb='dddd' + nx=8, ny=6, kx=1, ky=1, Lx=1.d00, Ly=1.d00, nldebug=t + nx=32, ny=128, kx=1, ky=4, Lx=100.d00, Ly=800.d00, nldebug=f + icrosst=1, + beta=-1E-2 + levels=5 +/ diff --git a/multigrid/wk/test_jacobi.in b/multigrid/wk/test_jacobi.in new file mode 100644 index 0000000..d5ddd5f --- /dev/null +++ b/multigrid/wk/test_jacobi.in @@ -0,0 +1,4 @@ +&in + nx=8, ny=24 + omega=1.0, nits=400, +/ diff --git a/multigrid/wk/test_jacobig.in b/multigrid/wk/test_jacobig.in new file mode 100644 index 0000000..c93ecc3 --- /dev/null +++ b/multigrid/wk/test_jacobig.in @@ -0,0 +1,13 @@ +&in + prb='nndd' + prb='dddd' + nx=256, ny=1024, + nx=32, ny=128 + nx=64, ny=256 + nx=16, ny=64 + nx=128, ny=512 + kx=4, ky=4, + Lx=100.d00, Ly=800.d00, + icrosst=1, beta=-0.01, miome = 200.0, + omega=1.0, nits=40, nu=10, +/ diff --git a/multigrid/wk/test_mg.in b/multigrid/wk/test_mg.in new file mode 100644 index 0000000..c1e2de2 --- /dev/null +++ b/multigrid/wk/test_mg.in @@ -0,0 +1,13 @@ +&newrun + nx=128, + nidbas=3, ngauss=2, + sigma = 0.0d0, kmode=10, + modem=1, modep=10, + modem=22, modep=10, + alpha = 1, + relax='gs', nits=10 + omega=0.6667 + nlfixed=f, + levels=6, + nu1=1 nu2=1, mu=1, nu0=1 +/ diff --git a/multigrid/wk/test_mg2d.in b/multigrid/wk/test_mg2d.in new file mode 100644 index 0000000..56cff89 --- /dev/null +++ b/multigrid/wk/test_mg2d.in @@ -0,0 +1,15 @@ +&newrun + n=2*128 + nidbas=2*3 + ngauss=2*1 + kx=3, ky=40, + kx=3, ky=3, + alpha=0,0 + sigma=0. + levels=12 + nu1=2, nu2=1, mu=1, nu0=1 + relax='gs', omega=0.6667, + nits=20, tol=1.e-10, + nlfixed=f + prb = 'poly' +/ diff --git a/multigrid/wk/test_mg2d_cyl.in b/multigrid/wk/test_mg2d_cyl.in new file mode 100644 index 0000000..5ce28da --- /dev/null +++ b/multigrid/wk/test_mg2d_cyl.in @@ -0,0 +1,14 @@ +&newrun + n=2*128 + nidbas=2*3 + ngauss=2*6, + modem=22, modep=10 + prb='poly', + levels=12, + nu1=7, nu2=7, mu=1, nu0=1 + omega=0.65, + relax='gs', + nits=60, tol=1.e-8, rtol=0.0 + nlfixed=f + nluniq=t +/ diff --git a/multigrid/wk/test_mgp.in b/multigrid/wk/test_mgp.in new file mode 100644 index 0000000..80a305b --- /dev/null +++ b/multigrid/wk/test_mgp.in @@ -0,0 +1,9 @@ +&newrun + nx=1024, + nidbas=2, ngauss=2, + sigma = 0.01, kmode=10, + relax='gs', nits=10 + nlfixed=f, + levels=6, + nu1=1, nu2=1, mu=1, nu0=1 +/ diff --git a/multigrid/wk/test_relax.in b/multigrid/wk/test_relax.in new file mode 100644 index 0000000..fe5319c --- /dev/null +++ b/multigrid/wk/test_relax.in @@ -0,0 +1,11 @@ +&newrun + nx=32 + nidbas=1, + ngauss = 6 + sigma = 0.0d0, kmode=10, + modem=22, modep=10, + alpha = 0, + relax='gs', nits=200 + omega = 0.6667 + nlfixed=f, +/ diff --git a/multigrid/wk/test_relax2d.in b/multigrid/wk/test_relax2d.in new file mode 100644 index 0000000..42892a9 --- /dev/null +++ b/multigrid/wk/test_relax2d.in @@ -0,0 +1,11 @@ +&newrun + n=2*128 + nidbas=2*3 + ngauss=2*4 + kx=3, ky=3, + sigma=0.0, + alpha=0,0 + levels=4 + nits=1000 + relax='gs' +/ diff --git a/multigrid/wk/test_relax2d_cyl.in b/multigrid/wk/test_relax2d_cyl.in new file mode 100644 index 0000000..7a96c0e --- /dev/null +++ b/multigrid/wk/test_relax2d_cyl.in @@ -0,0 +1,14 @@ +&newrun + n=2*16 + nidbas=2*3 + ngauss=2*6 + prb='bess', + modem=3, modep=10, + modem=0, modep=5, + levels=4 + nits=500, + omega = 0.6667, + relax='gs' + nlfixed = f + nluniq = t +/ diff --git a/multigrid/wk/test_stencil.in b/multigrid/wk/test_stencil.in new file mode 100644 index 0000000..a8bffc0 --- /dev/null +++ b/multigrid/wk/test_stencil.in @@ -0,0 +1,4 @@ +&in + nx=12, + ny=10, +/ diff --git a/multigrid/wk/test_stencilg.in b/multigrid/wk/test_stencilg.in new file mode 100644 index 0000000..a8bffc0 --- /dev/null +++ b/multigrid/wk/test_stencilg.in @@ -0,0 +1,4 @@ +&in + nx=12, + ny=10, +/ diff --git a/multigrid/wk/test_transf2d.in b/multigrid/wk/test_transf2d.in new file mode 100644 index 0000000..7c75afb --- /dev/null +++ b/multigrid/wk/test_transf2d.in @@ -0,0 +1,9 @@ +&newrun + n=2*128 + nidbas=2*3 + ngauss=2*8 + kx=3, ky=2, + sigma=0.0, + alpha=0,0 + levels=6 +/ diff --git a/multigrid/wk/test_transf2d_cyl.in b/multigrid/wk/test_transf2d_cyl.in new file mode 100644 index 0000000..b8c00b3 --- /dev/null +++ b/multigrid/wk/test_transf2d_cyl.in @@ -0,0 +1,10 @@ +&newrun + n=2*128 + nidbas=2*1 + ngauss=2*12 + modem=3, modep=10, + modem=0, modep=5, + levels=7, + prb='bess' + nluniq = f, +/ diff --git a/multigrid/wk/transfer1d.in b/multigrid/wk/transfer1d.in new file mode 100644 index 0000000..8c6ded9 --- /dev/null +++ b/multigrid/wk/transfer1d.in @@ -0,0 +1,9 @@ +&newrun + nx=8, + nidbas=2, + ngauss = 3, + sigma = 1.0d0 + alpha = 1, + modem=0, + nlper=f, +/ diff --git a/multigrid/wk/transfer1d_col.in b/multigrid/wk/transfer1d_col.in new file mode 100644 index 0000000..ef91365 --- /dev/null +++ b/multigrid/wk/transfer1d_col.in @@ -0,0 +1,5 @@ +&newrun + nx=8, + nidbas=3, + nlper=f, +/ diff --git a/multigrid/wk/two_grid.in b/multigrid/wk/two_grid.in new file mode 100644 index 0000000..6f862d1 --- /dev/null +++ b/multigrid/wk/two_grid.in @@ -0,0 +1,8 @@ +&newrun + nx=128, + nidbas=3, ngauss=2, + sigma = 0.0d0, kmode=10, + modem=1, modep=10, + modem=22, modep=10, + alpha = 1, +/ diff --git a/pppack/CMakeLists.txt b/pppack/CMakeLists.txt new file mode 100644 index 0000000..a6796da --- /dev/null +++ b/pppack/CMakeLists.txt @@ -0,0 +1,37 @@ +/** + * @file CMakeLists.txt + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + * @author Trach-Minh Tran + */ +set(SRCS + bvalue.f90 + interv.f90 +) + +add_library(pppack STATIC ${SRCS}) + +install(TARGETS pppack + EXPORT ${BSPLINES_EXPORT_TARGETS} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} +) diff --git a/pppack/Makefile b/pppack/Makefile new file mode 100644 index 0000000..0a5cbcf --- /dev/null +++ b/pppack/Makefile @@ -0,0 +1,72 @@ +# +# @file Makefile +# +# @brief +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Sébastien Jolliet +# @author Trach-Minh Tran +# +SRCS = banfac.f90 banslv.f90 bchfac.f90 bchslv.f90 bsplpp.f90 bsplvb.f90 \ + bsplvd.f90 bspp2d.f90 bvalue.f90 chol1d.f90 colloc.f90 colpnt.f90 \ + cspint.f90 cubset.f90 cubslo.f90 cubspl.f90 cwidth.f90 difequ.f90 \ + dtblok.f90 eqblok.f90 evnnot.f90 factrb.f90 fcblok.f90 interv.f90 \ + knots.f90 l2appr.f90 l2err.f90 l2knts.f90 newnot.f90 ppvalu.f90 \ + putit.f90 rvec_print.f90 sbblok.f90 setupq.f90 shiftb.f90 slvblk.f90 \ + smooth.f90 spli2d.f90 spline_hermite_set.f90 spline_hermite_val.f90 \ + splint.f90 splopt.f90 subbak.f90 subfor.f90 tautsp.f90 titanium.f90 + +OBJS = banfac.o banslv.o bchfac.o bchslv.o bsplpp.o bsplvb.o bsplvd.o \ + bspp2d.o bvalue.o chol1d.o colloc.o colpnt.o cspint.o cubset.o \ + cubslo.o cubspl.o cwidth.o difequ.o dtblok.o eqblok.o evnnot.o \ + factrb.o fcblok.o interv.o knots.o l2appr.o l2err.o l2knts.o newnot.o \ + ppvalu.o putit.o rvec_print.o sbblok.o setupq.o shiftb.o slvblk.o \ + smooth.o spli2d.o spline_hermite_set.o spline_hermite_val.o splint.o \ + splopt.o subbak.o subfor.o tautsp.o titanium.o + +OBJS = interv.o bvalue.o + +LIBS = + +CC = cc +CFLAGS = -g +FC = ifort +FFLAGS = $(OPT) +F90 = $(FC) +F90FLAGS = $(FFLAGS) +LDFLAGS = + +lib: libpppack.a + +libpppack.a: $(OBJS) + xiar r $@ $? + ranlib $@ + +clean: + rm -f *.o *.mod *~ core + +distclean: clean + rm -f libpppack.a a.out + +.SUFFIXES: +.SUFFIXES: .o .c .f90 + +.f90.o: + $(F90) $(F90FLAGS) -c $< diff --git a/pppack/banfac.f90 b/pppack/banfac.f90 new file mode 100644 index 0000000..50ecb50 --- /dev/null +++ b/pppack/banfac.f90 @@ -0,0 +1,234 @@ +!> +!> @file banfac.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine banfac ( w, nroww, nrow, nbandl, nbandu, iflag ) + +!************************************************************************* +! +!! BANFAC factors a banded matrix without pivoting. +! +! Discussion: +! +! BANFAC returns in W the LU-factorization, without pivoting, of +! the banded matrix A of order NROW with (NBANDL+1+NBANDU) bands +! or diagonals in the work array W. +! +! Gauss elimination without pivoting is used. The routine is +! intended for use with matrices A which do not require row +! interchanges during factorization, especially for the totally +! positive matrices which occur in spline calculations. +! +! The matrix storage mode used is the same one used by LINPACK +! and LAPACK, and results in efficient innermost loops. +! +! Explicitly, A has +! +! NBANDL bands below the diagonal +! 1 main diagonal +! NBANDU bands above the diagonal +! +! and thus, with MIDDLE=NBANDU+1, +! A(I+J,J) is in W(I+MIDDLE,J) for I=-NBANDU,...,NBANDL, J=1,...,NROW. +! +! For example, the interesting entries of a banded matrix +! matrix of order 9, with NBANDL=1, NBANDU=2: +! +! 11 12 13 0 0 0 0 0 0 +! 21 22 23 24 0 0 0 0 0 +! 0 32 33 34 35 0 0 0 0 +! 0 0 43 44 45 46 0 0 0 +! 0 0 0 54 55 56 57 0 0 +! 0 0 0 0 65 66 67 68 0 +! 0 0 0 0 0 76 77 78 79 +! 0 0 0 0 0 0 87 88 89 +! 0 0 0 0 0 0 0 98 99 +! +! would appear in the first 1+1+2=4 rows of W as follows: +! +! 0 0 13 24 35 46 57 68 79 +! 0 12 23 34 45 56 67 78 89 +! 11 22 33 44 55 66 77 88 99 +! 21 32 43 54 65 76 87 98 0 +! +! All other entries of W not identified in this way with an +! entry of A are never referenced. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input/output, real ( kind = 8 ) W(NROWW,NROW). +! On input, W contains the "interesting" part of a banded +! matrix A, with the diagonals or bands of A stored in the +! rows of W, while columns of A correspond to columns of W. +! On output, W contains the LU-factorization of A into a unit +! lower triangular matrix L and an upper triangular matrix U +! (both banded) and stored in customary fashion over the +! corresponding entries of A. +! +! This makes it possible to solve any particular linear system A*X=B +! for X by the call +! +! call banslv ( w, nroww, nrow, nbandl, nbandu, b ) +! +! with the solution X contained in B on return. +! +! If IFLAG=2, then one of NROW-1, NBANDL, NBANDU failed to be nonnegative, +! or else one of the potential pivots was found to be zero +! indicating that A does not have an LU-factorization. This +! implies that A is singular in case it is totally positive. +! +! Input, integer NROWW, the row dimension of the work array W. +! NROWW must be at least NBANDL+1 + NBANDU. +! +! Input, integer NROW, the number of rows in A. +! +! Input, integer NBANDL, the number of bands of A below the main diagonal. +! +! Input, integer NBANDU, the number of bands of A above the main diagonal. +! +! Output, integer IFLAG, error flag. +! 1, success. +! 2, failure, the matrix was not factored. +! + implicit none + + integer nrow + integer nroww + + real ( kind = 8 ) factor + integer i + integer iflag + integer j + integer k + integer middle + integer nbandl + integer nbandu + real ( kind = 8 ) pivot + real ( kind = 8 ) w(nroww,nrow) + + iflag = 1 + + if ( nrow < 1 ) then + iflag = 2 + return + end if +! +! W(MIDDLE,*) contains the main diagonal of A. +! + middle = nbandu + 1 + + if ( nrow == 1 ) then + if ( w(middle,nrow) == 0.0D+00 ) then + iflag = 2 + end if + return + end if +! +! A is upper triangular. Check that the diagonal is nonzero. +! + if ( nbandl <= 0 ) then + + do i = 1, nrow-1 + if ( w(middle,i) == 0.0D+00 ) then + iflag = 2 + return + end if + end do + + if ( w(middle,nrow) == 0.0D+00 ) then + iflag = 2 + end if + + return +! +! A is lower triangular. Check that the diagonal is nonzero and +! divide each column by its diagonal. +! + else if ( nbandu <= 0 ) then + + do i = 1, nrow-1 + + pivot = w(middle,i) + + if ( pivot == 0.0D+00 ) then + iflag = 2 + return + end if + + do j = 1, min ( nbandl, nrow-i ) + w(middle+j,i) = w(middle+j,i) / pivot + end do + + end do + + return + + end if +! +! A is not just a triangular matrix. +! Construct the LU factorization. +! + do i = 1, nrow-1 +! +! W(MIDDLE,I) is the pivot for the I-th step. +! + if ( w(middle,i) == 0.0D+00 ) then + iflag = 2 + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'BANFAC - Fatal error!' + write ( *, '(a,i6)' ) ' Zero pivot encountered in column ', i + stop + end if +! +! Divide each entry in column I below the diagonal by PIVOT. +! + do j = 1, min ( nbandl, nrow-i ) + w(middle+j,i) = w(middle+j,i) / w(middle,i) + end do +! +! Subtract A(I,I+K)*(I-th column) from (I+K)-th column (below row I). +! + do k = 1, min ( nbandu, nrow-i ) + factor = w(middle-k,i+k) + do j = 1, min ( nbandl, nrow-i ) + w(middle-k+j,i+k) = w(middle-k+j,i+k) - w(middle+j,i) * factor + end do + end do + + end do +! +! Check the last diagonal entry. +! + if ( w(middle,nrow) == 0.0D+00 ) then + iflag = 2 + end if + + return +end diff --git a/pppack/banslv.f90 b/pppack/banslv.f90 new file mode 100644 index 0000000..a8159de --- /dev/null +++ b/pppack/banslv.f90 @@ -0,0 +1,112 @@ +!> +!> @file banslv.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine banslv ( w, nroww, nrow, nbandl, nbandu, b ) + +!************************************************************************* +! +!! BANSLV solves a banded linear system X * X = B factored by BANFAC. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) W(NROWW,NROW). W contains the banded matrix, +! after it has been factored by BANFAC. +! +! Input, integer NROWW, the row dimension of the work array W. +! NROWW must be at least NBANDL+1 + NBANDU. +! +! Input, integer NROW, the number of rows in A. +! +! Input, integer NBANDL, the number of bands of A below the +! main diagonal. +! +! Input, integer NBANDU, the number of bands of A above the +! main diagonal. +! +! Input/output, real ( kind = 8 ) B(NROW). +! On input, B contains the right hand side of the system to be solved. +! On output, B contains the solution, X. +! + implicit none + + integer nrow + integer nroww + + real ( kind = 8 ) b(nrow) + integer i + integer j + integer jmax + integer middle + integer nbandl + integer nbandu + real ( kind = 8 ) w(nroww,nrow) + + middle = nbandu + 1 + + if ( nrow == 1 ) then + b(1) = b(1) / w(middle,1) + return + end if +! +! Forward pass +! +! For I = 1, 2, ..., NROW-1, subtract RHS(I)*(I-th column of L) +! from the right side, below the I-th row. +! + if ( 0 < nbandl ) then + do i = 1, nrow-1 + jmax = min ( nbandl, nrow-i ) + do j = 1, jmax + b(i+j) = b(i+j) - b(i) * w(middle+j,i) + end do + end do + end if +! +! Backward pass +! +! For I=NROW, NROW-1,...,1, divide RHS(I) by +! the I-th diagonal entry of U, then subtract +! RHS(I)*(I-th column of U) from right side, above the I-th row. +! + do i = nrow, 2, -1 + + b(i) = b(i) / w(middle,i) + + do j = 1, min ( nbandu, i-1 ) + b(i-j) = b(i-j) - b(i) * w(middle-j,i) + end do + + end do + + b(1) = b(1) / w(middle,1) + + return +end diff --git a/pppack/bchfac.f90 b/pppack/bchfac.f90 new file mode 100644 index 0000000..c582ad0 --- /dev/null +++ b/pppack/bchfac.f90 @@ -0,0 +1,168 @@ +!> +!> @file bchfac.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine bchfac ( w, nbands, nrow, diag ) + +!************************************************************************* +! +!! BCHFAC constructs a Cholesky factorization of a matrix. +! +! Discussion: +! +! The factorization has the form +! +! C = L * D * L' +! +! with L unit lower triangular and D diagonal, for a given matrix C of +! order NROW, where C is symmetric positive semidefinite and banded, +! having NBANDS diagonals at and below the main diagonal. +! +! Gauss elimination is used, adapted to the symmetry and bandedness of C. +! +! Near-zero pivots are handled in a special way. The diagonal +! element C(N,N)=W(1,N) is saved initially in DIAG(N), all N. +! +! At the N-th elimination step, the current pivot element, W(1,N), +! is compared with its original value, DIAG(N). If, as the result +! of prior elimination steps, this element has been reduced by about +! a word length, (i.e., if W(1,N)+DIAG(N) <= DIAG(N)), then the pivot +! is declared to be zero, and the entire N-th row is declared to +! be linearly dependent on the preceding rows. This has the effect +! of producing X(N) = 0 when solving C*X = B for X, regardless of B. +! +! Justification for this is as follows. In contemplated applications +! of this program, the given equations are the normal equations for +! some least-squares approximation problem, DIAG(N) = C(N,N) gives +! the norm-square of the N-th basis function, and, at this point, +! W(1,N) contains the norm-square of the error in the least-squares +! approximation to the N-th basis function by linear combinations +! of the first N-1. +! +! Having W(1,N)+DIAG(N) <= DIAG(N) signifies that the N-th function +! is linearly dependent to machine accuracy on the first N-1 +! functions, therefore can safely be left out from the basis of +! approximating functions. +! +! The solution of a linear system C*X=B is effected by the +! succession of the following two calls: +! +! CALL BCHFAC(W,NBANDS,NROW,DIAG) +! +! CALL BCHSLV(W,NBANDS,NROW,B,X) +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input/output, real ( kind = 8 ) W(NBANDS,NROW). +! +! On input, W contains the NBANDS diagonals in its rows, +! with the main diagonal in row 1. Precisely, W(I,J) +! contains C(I+J-1,J), I=1,...,NBANDS, J=1,...,NROW. +! +! For example, the interesting entries of a seven diagonal +! symmetric matrix C of order 9 would be stored in W as +! +! 11 22 33 44 55 66 77 88 99 +! 21 32 43 54 65 76 87 98 * +! 31 42 53 64 75 86 97 * * +! 41 52 63 74 85 96 * * * +! +! Entries of the array not associated with an +! entry of C are never referenced. +! +! On output, W contains the Cholesky factorization +! C = L*D*L-transp, with W(1,I) containing 1/D(I,I) and W(I,J) +! containing L(I-1+J,J), I=2,...,NBANDS. +! +! Input, integer NBANDS, indicates the bandwidth of the +! matrix C, i.e., C(I,J) = 0 for NBANDS < ABS(I-J). +! +! Input, integer NROW, is the order of the matrix C. +! +! Work array, real ( kind = 8 ) DIAG(NROW). +! + implicit none + + integer nbands + integer nrow + + real ( kind = 8 ) diag(nrow) + integer i + integer imax + integer j + integer jmax + integer n + real ( kind = 8 ) ratio + real ( kind = 8 ) w(nbands,nrow) + + if ( nrow <= 1 ) then + if ( 0.0D+00 < w(1,1) ) then + w(1,1) = 1.0D+00 / w(1,1) + end if + return + end if +! +! Store the diagonal. +! + diag(1:nrow) = w(1,1:nrow) +! +! Factorization. +! + do n = 1, nrow + + if ( w(1,n) + diag(n) <= diag(n) ) then + w(1:nbands,n) = 0.0D+00 + else + + w(1,n) = 1.0D+00 / w(1,n) + + imax = min ( nbands-1, nrow-n ) + + jmax = imax + + do i = 1, imax + + ratio = w(i+1,n) * w(1,n) + + do j = 1, jmax + w(j,n+i) = w(j,n+i) - w(j+i,n) * ratio + end do + + jmax = jmax-1 + w(i+1,n) = ratio + + end do + + end if + + end do + + return +end diff --git a/pppack/bchslv.f90 b/pppack/bchslv.f90 new file mode 100644 index 0000000..503e3e1 --- /dev/null +++ b/pppack/bchslv.f90 @@ -0,0 +1,114 @@ +!> +!> @file bchslv.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine bchslv ( w, nbands, nrow, b ) + +!************************************************************************* +! +!! BCHSLV solves a banded symmetric positive definite system. +! +! Discussion: +! +! The system is of the form: +! +! C * X = B +! +! and the Cholesky factorization of C has been constructed +! by BCHFAC. +! +! With the factorization +! +! C = L * D * L' +! +! available, where L is unit lower triangular and D is diagonal, +! the triangular system +! +! L * Y = B +! +! is solved for Y (forward substitution), Y is stored in B, the +! vector D**(-1)*Y is computed and stored in B, then the +! triangular system L'*X = D**(-1)*Y is solved for X +! (backsubstitution). +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) W(NBANDS,NROW), the Cholesky factorization for C, +! as computed by BCHFAC. +! +! Input, integer NBANDS, the bandwidth of C. +! +! Input, integer NROW, the order of the matrix C. +! +! Input/output, real ( kind = 8 ) B(NROW). +! On input, the right hand side. +! On output, the solution. +! + implicit none + + integer nbands + integer nrow + + real ( kind = 8 ) b(nrow) + integer j + integer n + real ( kind = 8 ) w(nbands,nrow) + + if ( nrow <= 1 ) then + b(1) = b(1) * w(1,1) + return + end if +! +! Forward substitution. +! Solve L*Y=B. +! + do n = 1, nrow + + do j = 1, min(nbands-1,nrow-n) + b(j+n) = b(j+n) - w(j+1,n) * b(n) + end do + + end do +! +! Backsubstitution. +! Solve L'*X=D**(-1)*Y. +! + do n = nrow, 1, -1 + + b(n) = b(n)*w(1,n) + + do j = 1, min(nbands-1,nrow-n) + b(n) = b(n) - w(j+1,n) * b(j+n) + end do + + end do + + return +end diff --git a/pppack/bsplpp.f90 b/pppack/bsplpp.f90 new file mode 100644 index 0000000..41b76bd --- /dev/null +++ b/pppack/bsplpp.f90 @@ -0,0 +1,165 @@ +!> +!> @file bsplpp.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine bsplpp ( t, bcoef, n, k, scrtch, break, coef, l ) + +!************************************************************************* +! +!! BSPLPP converts from B-spline to piecewise polynomial form. +! +! Discussion: +! +! The B-spline representation of a spline is ( T, BCOEF, N, K ), +! while the piecewise polynomial representation is +! ( BREAK, COEF, L, K ). +! +! For each breakpoint interval, the K relevant B-spline coefficients +! of the spline are found and then differenced repeatedly to get the +! B-spline coefficients of all the derivatives of the spline on that +! interval. +! +! The spline and its first K-1 derivatives are then evaluated at the +! left end point of that interval, using BSPLVB repeatedly to obtain +! the values of all B-splines of the appropriate order at that point. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) T(N+K), the knot sequence. +! +! Input, real ( kind = 8 ) BCOEF(N), the B spline coefficient sequence. +! +! Input, integer N, the number of B spline coefficients. +! +! Input, integer K, the order of the spline. +! +! Work array, real ( kind = 8 ) SCRTCH(K,K). +! +! Output, real ( kind = 8 ) BREAK(L+1), the piecewise polynomial breakpoint +! sequence. BREAK contains the distinct points in the +! sequence T(K),...,T(N+1) +! +! Output, real ( kind = 8 ) COEF(K,N), with COEF(I,J) = (I-1)st derivative +! of the spline at BREAK(J) from the right. +! +! Output, integer L, the number of polynomial pieces which +! make up the spline in the interval (T(K),T(N+1)). +! + implicit none + + integer k + integer l + integer n + + real ( kind = 8 ) bcoef(n) + real ( kind = 8 ) biatx(k) + real ( kind = 8 ) break(*) + real ( kind = 8 ) coef(k,n) + real ( kind = 8 ) diff + integer i + integer j + integer jp1 + integer left + integer lsofar + real ( kind = 8 ) scrtch(k,k) + real ( kind = 8 ) sum1 + real ( kind = 8 ) t(n+k) + + lsofar = 0 + break(1) = t(k) + + do left = k, n +! +! Find the next nontrivial knot interval. +! + if ( t(left+1) == t(left) ) then + cycle + end if + + lsofar = lsofar + 1 + break(lsofar+1) = t(left+1) + + if ( k <= 1 ) then + coef(1,lsofar) = bcoef(left) + cycle + end if +! +! Store the K B-spline coefficients relevant to current knot +! interval in SCRTCH(*,1). +! + do i = 1, k + scrtch(i,1) = bcoef(left-k+i) + end do +! +! For j=1,...,k-1, compute the k-j b-spline coefficients relevant to +! current knot interval for the j-th derivative by differencing +! those for the (j-1)st derivative, and store in scrtch(.,j+1) . +! + do jp1 = 2, k + j = jp1-1 + do i = 1, k-j + diff = t(left+i)-t(left+i-(k-j)) + if ( 0.0D+00 < diff ) then + scrtch(i,jp1)=((scrtch(i+1,j)-scrtch(i,j)) / diff ) & + * real ( k - j, kind = 8 ) + end if + end do + end do +! +! For J=0, ..., K-1, find the values at T(left) of the j+1 +! B-splines of order J+1 whose support contains the current +! knot interval from those of order J (in biatx ), then comb- +! ine with the B-spline coefficients (in scrtch(.,k-j) ) found earlier +! to compute the (k-j-1)st derivative at t(left) of the given +! spline. +! + call bsplvb ( t, 1, 1, t(left), left, biatx ) + + coef(k,lsofar) = scrtch(1,k) + + do jp1 = 2, k + + call bsplvb ( t, jp1, 2, t(left), left, biatx ) + + sum1 = 0.0D+00 + do i = 1, jp1 + sum1 = sum1 + biatx(i) * scrtch(i,k+1-jp1) + end do + + coef(k+1-jp1,lsofar) = sum1 + + end do + + end do + + l = lsofar + + return +end diff --git a/pppack/bsplvb.f90 b/pppack/bsplvb.f90 new file mode 100644 index 0000000..89d9578 --- /dev/null +++ b/pppack/bsplvb.f90 @@ -0,0 +1,170 @@ +!> +!> @file bsplvb.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine bsplvb ( t, jhigh, index, x, left, biatx ) + +!************************************************************************* +! +!! BSPLVB evaluates B-splines at a point X with a given knot sequence. +! +! Discusion: +! +! BSPLVB evaluates all possibly nonzero B-splines at X of order +! +! JOUT = MAX ( JHIGH, (J+1)*(INDEX-1) ) +! +! with knot sequence T. +! +! The recurrence relation +! +! X - T(I) T(I+J+1) - X +! B(I,J+1)(X) = ----------- * B(I,J)(X) + --------------- * B(I+1,J)(X) +! T(I+J)-T(I) T(I+J+1)-T(I+1) +! +! is used to generate B(LEFT-J:LEFT,J+1)(X) from B(LEFT-J+1:LEFT,J)(X) +! storing the new values in BIATX over the old. +! +! The facts that +! +! B(I,1)(X) = 1 if T(I) <= X < T(I+1) +! +! and that +! +! B(I,J)(X) = 0 unless T(I) <= X < T(I+J) +! +! are used. +! +! The particular organization of the calculations follows +! algorithm 8 in chapter X of the text. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) T(LEFT+JOUT), the knot sequence. T is assumed to +! be nondecreasing, and also, T(LEFT) must be strictly less than +! T(LEFT+1). +! +! Input, integer JHIGH, INDEX, determine the order +! JOUT = MAX(JHIGH,(J+1)*(INDEX-1)) +! of the B-splines whose values at X are to be returned. +! INDEX is used to avoid recalculations when several +! columns of the triangular array of B-spline values are +! needed, for example, in BVALUE or in BSPLVD. +! +! If INDEX = 1, the calculation starts from scratch and the entire +! triangular array of B-spline values of orders +! 1, 2, ...,JHIGH is generated order by order, i.e., +! column by column. +! +! If INDEX = 2, only the B-spline values of order J+1, J+2, ..., JOUT +! are generated, the assumption being that BIATX, J, +! DELTAL, DELTAR are, on entry, as they were on exit +! at the previous call. In particular, if JHIGH = 0, +! then JOUT = J+1, i.e., just the next column of B-spline +! values is generated. +! +! WARNING: the restriction JOUT <= JMAX (= 20) is +! imposed arbitrarily by the dimension statement for DELTAL +! and DELTAR, but is nowhere checked for. +! +! Input, real ( kind = 8 ) X, the point at which the B-splines +! are to be evaluated. +! +! Input, integer LEFT, an integer chosen so that +! T(LEFT) <= X <= T(LEFT+1). +! +! Output, real ( kind = 8 ) BIATX(JOUT), with BIATX(I) containing the +! value at X of the polynomial of order JOUT which agrees +! with the B-spline B(LEFT-JOUT+I,JOUT,T) on the interval +! (T(LEFT),T(LEFT+1)). +! + implicit none + + integer, parameter :: jmax = 20 + + integer jhigh + + real ( kind = 8 ) biatx(jhigh) +!!$ real ( kind = 8 ), save, dimension ( jmax ) :: deltal +!!$ real ( kind = 8 ), save, dimension ( jmax ) :: deltar + real ( kind = 8 ), dimension ( jmax ) :: deltal + real ( kind = 8 ), dimension ( jmax ) :: deltar + integer i + integer index +!!$ integer, save :: j = 1 + integer :: j + integer left + real ( kind = 8 ) saved + real ( kind = 8 ) t(left+jhigh) + real ( kind = 8 ) term + real ( kind = 8 ) x + +! Forces starting always from scratch! +!!$ if ( index == 1 ) then + j = 1 + biatx(1) = 1.0D+00 + if ( jhigh <= j ) then + return + end if +!!$ end if + + if ( t(left+1) <= t(left) ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'BSPLVB - Fatal error!' + write ( *, '(a)' ) ' It is required that T(LEFT) < T(LEFT+1).' + write ( *, '(a,i6)' ) ' But LEFT = ', left + write ( *, '(a,g14.6)' ) ' T(LEFT) = ', t(left) + write ( *, '(a,g14.6)' ) ' T(LEFT+1) = ', t(left+1) + stop + end if + + do + + deltar(j) = t(left+j) - x + deltal(j) = x - t(left+1-j) + + saved = 0.0D+00 + do i = 1, j + term = biatx(i) / ( deltar(i) + deltal(j+1-i) ) + biatx(i) = saved + deltar(i) * term + saved = deltal(j+1-i) * term + end do + + biatx(j+1) = saved + j = j + 1 + + if ( jhigh <= j ) then + exit + end if + + end do + + return +end diff --git a/pppack/bsplvd.f90 b/pppack/bsplvd.f90 new file mode 100644 index 0000000..82d203c --- /dev/null +++ b/pppack/bsplvd.f90 @@ -0,0 +1,189 @@ +!> +!> @file bsplvd.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine bsplvd ( t, k, x, left, a, dbiatx, nderiv ) + +!************************************************************************* +! +!! BSPLVD calculates the nonvanishing B-splines and derivatives at X. +! +! Discussion: +! +! Values at X of all the relevant B-splines of order K, K-1,..., K+1-NDERIV +! are generated via BSPLVB and stored temporarily in DBIATX. +! +! Then, the B-spline coefficients of the required derivatives +! of the B-splines of interest are generated by differencing, +! each from the preceding one of lower order, and combined with +! the values of B-splines of corresponding order in DBIATX +! to produce the desired values. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) T(LEFT+K), the knot sequence. It is assumed that +! T(LEFT) < T(LEFT+1). Also, the output is correct only if +! T(LEFT) <= X <= T(LEFT+1) . +! +! Input, integer K, the order of the B-splines to be evaluated. +! +! Input, real ( kind = 8 ) X, the point at which these values are sought. +! +! Input, integer LEFT, indicates the left endpoint of the interval of +! interest. The K B-splines whose support contains the interval +! (T(LEFT), T(LEFT+1)) are to be considered. +! +! Workspace, real ( kind = 8 ) A(K,K). +! +! Output, real ( kind = 8 ) DBIATX(K,NDERIV). DBIATX(I,M) contains +! the value of the (M-1)st derivative of the (LEFT-K+I)-th B-spline +! of order K for knot sequence T, I=M,...,K, M=1,...,NDERIV. +! +! Input, integer NDERIV, indicates that values of +! B-splines and their derivatives up to but not +! including the NDERIV-th are asked for. +! + implicit none + + integer k + integer left + integer nderiv + + real ( kind = 8 ) a(k,k) + real ( kind = 8 ) dbiatx(k,nderiv) + real ( kind = 8 ) factor + real ( kind = 8 ) fkp1mm + integer i + integer ideriv + integer il + integer j + integer jlow + integer jp1mid + integer ldummy + integer m + integer mhigh + real ( kind = 8 ) sum1 + real ( kind = 8 ) t(left+k) + real ( kind = 8 ) x + + mhigh = max ( min ( nderiv, k ), 1 ) +! +! MHIGH is usually equal to nderiv. +! + call bsplvb ( t, k+1-mhigh, 1, x, left, dbiatx ) + + if ( mhigh == 1 ) then + return + end if +! +! The first column of DBIATX always contains the B-spline values +! for the current order. These are stored in column K+1-current +! order before BSPLVB is called to put values for the next +! higher order on top of it. +! + ideriv = mhigh + do m = 2, mhigh + jp1mid = 1 + do j = ideriv, k + dbiatx(j,ideriv) = dbiatx(jp1mid,1) + jp1mid = jp1mid+1 + end do + ideriv = ideriv-1 + call bsplvb(t,k+1-ideriv,2,x,left,dbiatx) + end do +! +! At this point, b(left-k+i, k+1-j)(x) is in dbiatx(i,j) for +! i=j,...,k and j=1,...,mhigh ('=' nderiv). in particular, the +! first column of dbiatx is already in final form. to obtain cor- +! ??? LOST A LINE ??? +! rate their b-repr. by differencing, then evaluate at x. +! + jlow = 1 + do i = 1, k + do j = jlow,k + a(j,i) = 0.0D+00 + end do + jlow = i + a(i,i) = 1.0D+00 + end do +! +! At this point, a(.,j) contains the b-coefficients for the J-th of the +! k b-splines of interest here. +! + do m = 2, mhigh + + fkp1mm = real ( k + 1 - m, kind = 8 ) + il = left + i = k +! +! For j=1,...,k, construct b-coefficients of (m-1)st derivative of +! b-splines from those for preceding derivative by differencing +! and store again in a(.,j) . The fact that a(i,j)=0 for +! i < j is used. +! + do ldummy = 1, k+1-m + + factor = fkp1mm/(t(il+k+1-m)-t(il)) +! +! The assumption that t(left) < t(left+1) makes denominator +! in factor nonzero. +! + do j = 1, i + a(i,j) = (a(i,j)-a(i-1,j))*factor + end do + + il = il-1 + i = i-1 + + end do +! +! For i=1,...,k, combine b-coefficients a(.,i) with B-spline values +! stored in dbiatx(.,m) to get value of (m-1)st derivative of +! i-th b-spline (of interest here) at x , and store in +! dbiatx(i,m). storage of this value over the value of a b-spline +! of order m there is safe since the remaining b-spline derivat- +! ives of the same order do not use this value due to the fact +! that a(j,i)=0 for j < i. +! + do i = 1, k + + sum1 = 0.0D+00 + jlow = max(i,m) + do j = jlow,k + sum1 = sum1 + a(j,i) * dbiatx(j,m) + end do + + dbiatx(i,m) = sum1 + end do + + end do + + return +end diff --git a/pppack/bspp2d.f90 b/pppack/bspp2d.f90 new file mode 100644 index 0000000..59db260 --- /dev/null +++ b/pppack/bspp2d.f90 @@ -0,0 +1,203 @@ +!> +!> @file bspp2d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine bspp2d ( t, bcoef, n, k, m, scrtch, break, coef, l ) + +!************************************************************************* +! +!! BSPP2D converts from B-spline to piecewise polynomial representation. +! +! Discussion: +! +! The B-spline representation +! +! T, BCOEF(.,J), N, K +! +! is converted to its piecewise polynomial representation +! +! BREAK, COEF(J,.,.), L, K, J=1, ..., M. +! +! This is an extended version of BSPLPP for use with tensor products. +! +! For each breakpoint interval, the K relevant B-spline +! coefficients of the spline are found and then differenced +! repeatedly to get the B-spline coefficients of all the +! derivatives of the spline on that interval. +! +! The spline and its first K-1 derivatives are then evaluated +! at the left endpoint of that interval, using BSPLVB +! repeatedly to obtain the values of all B-splines of the +! appropriate order at that point. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) T(N+K), the knot sequence. +! +! Input, real ( kind = 8 ) BCOEF(N,M). For each J, B(*,J) is the +! B-spline coefficient sequence, of length N. +! +! Input, integer N, the length of BCOEF. +! +! Input, integer K, the order of the spline. +! +! Input, integer M, the number of data sets. +! +! Work array, real ( kind = 8 ) SCRTCH(K,K,M). +! +! Output, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence +! containing the distinct points in the sequence T(K),...,T(N+1) +! +! Output, real ( kind = 8 ) COEF(M,K,N), with COEF(MM,I,J) = the (I-1)st +! derivative of the MM-th spline at BREAK(J) from the right, MM=1, ..., M. +! +! Output, integer L, the number of polynomial pieces which make up the +! spline in the interval (T(K), T(N+1)). +! + implicit none + + integer k + integer m + integer n + + real ( kind = 8 ) bcoef(n,m) + real ( kind = 8 ) biatx(k) + real ( kind = 8 ) break(*) + real ( kind = 8 ) coef(m,k,*) + real ( kind = 8 ) diff + real ( kind = 8 ) fkmj + integer i + integer j + integer jp1 + integer kmj + integer l + integer left + integer lsofar + integer mm + real ( kind = 8 ) scrtch(k,k,m) + real ( kind = 8 ) sum1 + real ( kind = 8 ) t(n+k) + + lsofar = 0 + break(1) = t(k) + + do left = k, n +! +! Find the next nontrivial knot interval. +! + if ( t(left+1) == t(left) ) then + cycle + end if + + lsofar = lsofar+1 + break(lsofar+1) = t(left+1) + + if ( k <= 1 ) then + + do mm = 1, m + coef(mm,1,lsofar) = bcoef(left,mm) + end do + + cycle + + end if +! +! Store the K b-spline coefficients relevant to current knot interval +! in scrtch(.,1) . +! + do i = 1, k + do mm = 1, m + scrtch(i,1,mm) = bcoef(left-k+i,mm) + end do + end do +! +! for j=1,...,k-1, compute the k-j b-spline coefficients relevant to +! current knot interval for the j-th derivative by differencing +! those for the (j-1)st derivative, and store in scrtch(.,j+1) . +! + do jp1 = 2, k + + j = jp1-1 + kmj = k-j + fkmj = real ( k - j, kind = 8 ) + + do i = 1, k-j + + diff = (t(left+i)-t(left+i-kmj))/fkmj + + if ( 0.0D+00 < diff ) then + + do mm = 1, m + scrtch(i,jp1,mm)=(scrtch(i+1,j,mm)-scrtch(i,j,mm))/diff + end do + + end if + + end do + + end do +! +! For j=0, ..., k-1, find the values at T(left) of the j+1 +! b-splines of order j+1 whose support contains the current +! knot interval from those of order j (in biatx ), then comb- +! ine with the b-spline coefficients (in scrtch(.,k-j) ) found earlier +! to compute the (k-j-1)st derivative at t(left) of the given +! spline. +! + call bsplvb ( t, 1, 1, t(left), left, biatx ) + + do mm = 1, m + coef(mm,k,lsofar) = scrtch(1,k,mm) + end do + + do jp1 = 2, k + + call bsplvb (t,jp1,2,t(left),left,biatx) + kmj = k+1-jp1 + + do mm = 1, m + + sum1 = 0.0D+00 + do i = 1, jp1 + sum1 = sum1 + biatx(i) * scrtch(i,kmj,mm) + end do + + coef(mm,kmj,lsofar) = sum1 + + end do + + end do + + end do + + l = lsofar + + return +end diff --git a/pppack/bvalue.f90 b/pppack/bvalue.f90 new file mode 100644 index 0000000..ead864f --- /dev/null +++ b/pppack/bvalue.f90 @@ -0,0 +1,226 @@ +!> +!> @file bvalue.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +function bvalue ( t, bcoef, n, k, x, jderiv ) + +!************************************************************************* +! +!! BVALUE evaluates a derivative of a spline from its B-spline representation. +! +! Discussion: +! +! The spline is taken to be continuous from the right. +! +! The nontrivial knot interval (T(I),T(I+1)) containing X is +! located with the aid of INTERV. The K B-spline coefficients +! of F relevant for this interval are then obtained from BCOEF, +! or are taken to be zero if not explicitly available, and are +! then differenced JDERIV times to obtain the B-spline +! coefficients of (D**JDERIV)F relevant for that interval. +! +! Precisely, with J = JDERIV, we have from X.(12) of the text that: +! +! (D**J)F = sum ( BCOEF(.,J)*B(.,K-J,T) ) +! +! where +! / BCOEF(.), , J == 0 +! / +! BCOEF(.,J) = / BCOEF(.,J-1) - BCOEF(.-1,J-1) +! / -----------------------------, 0 < J +! / (T(.+K-J) - T(.))/(K-J) +! +! Then, we use repeatedly the fact that +! +! sum ( A(.)*B(.,M,T)(X) ) = sum ( A(.,X)*B(.,M-1,T)(X) ) +! +! with +! (X - T(.))*A(.) + (T(.+M-1) - X)*A(.-1) +! A(.,X) = --------------------------------------- +! (X - T(.)) + (T(.+M-1) - X) +! +! to write (D**J)F(X) eventually as a linear combination of +! B-splines of order 1, and the coefficient for B(I,1,T)(X) +! must then be the desired number (D**J)F(X). +! See x.(17)-(19) of text. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) T(N+K), the knot sequence. T is assumed +! to be nondecreasing. +! +! Input, real ( kind = 8 ) BCOEF(N), B-spline coefficient sequence. +! +! Input, integer N, the length of BCOEF. +! +! Input, integer K, the order of the spline. +! +! Input, real ( kind = 8 ) X, the point at which to evaluate. +! +! Input, integer JDERIV, the order of the derivative to +! be evaluated. JDERIV is assumed to be zero or positive. +! +! Output, real ( kind = 8 ) BVALUE, the value of the (JDERIV)-th +! derivative of the spline at X. +! + implicit none + + integer k + integer n + + real ( kind = 8 ) aj(k) + real ( kind = 8 ) bcoef(n) + real ( kind = 8 ) bvalue + real ( kind = 8 ) dl(k) + real ( kind = 8 ) dr(k) + integer i + integer ilo + integer j + integer jc + integer jcmax + integer jcmin + integer jderiv + integer jj + integer mflag + real ( kind = 8 ) t(n+k) + real ( kind = 8 ) x + + bvalue = 0.0D+00 + + if ( k <= jderiv ) then + return + end if +! +! Find I so that 1 <= i < n+k and t(i) < t(i+1) and t(i) <= x < t(i+1). +! +! If no such i can be found, X lies +! outside the support of the spline F and bvalue=0. +! (the asymmetry in this choice of i makes F rightcontinuous) +! + call interv ( t, n+k, x, i, mflag ) + + if ( mflag /= 0 ) then + return + end if +! +! If K=1 (and jderiv = 0), bvalue = bcoef(i). +! + if ( k <= 1 ) then + bvalue = bcoef(i) + return + end if +! +! Store the K b-spline coefficients relevant for the knot interval +! (T(i),T(i+1)) in aj(1),...,aj(k) and compute dl(j)=x-t(i+1-j), +! dr(j)=T(i+j)-x, j=1,...,k-1 . set any of the aj not obtainable +! from input to zero. Set any T's not obtainable equal to T(1) or +! to T(n+k) appropriately. +! + jcmin = 1 + + if ( k <= i ) then + + do j = 1, k-1 + dl(j) = x-t(i+1-j) + end do + + else + + jcmin = 1-(i-k) + + do j = 1, i + dl(j) = x-t(i+1-j) + end do + + do j = i, k-1 + aj(k-j) = 0.0D+00 + dl(j) = dl(i) + end do + + end if + + jcmax = k + + if ( i <= n ) then + go to 90 + end if + + jcmax = k + n - i + do j = 1, k+n-i + dr(j) = t(i+j)-x + end do + + do j = k+n-i, k-1 + aj(j+1) = 0.0D+00 + dr(j) = dr(k+n-i) + end do + + go to 110 + + 90 continue + + do j = 1, k-1 + dr(j) = t(i+j)-x + end do + + 110 continue + + do jc = jcmin, jcmax + aj(jc) = bcoef(i-k+jc) + end do +! +! Difference the coefficients JDERIV times. +! + do j = 1, jderiv + + ilo = k-j + do jj = 1, k-j + aj(jj) = ((aj(jj+1)-aj(jj))/(dl(ilo)+dr(jj))) * real ( k - j, kind = 8 ) + ilo = ilo-1 + end do + + end do +! +! Compute value at X in (t(i),t(i+1)) of jderiv-th derivative, +! given its relevant b-spline coefficients in aj(1),...,aj(k-jderiv). +! + do j = jderiv+1, k-1 + ilo = k-j + do jj = 1, k-j + aj(jj) = ( aj(jj+1) * dl(ilo) + aj(jj) * dr(jj) ) & + / ( dl(ilo) + dr(jj) ) + ilo = ilo-1 + end do + end do + + bvalue = aj(1) + + return +end diff --git a/pppack/chol1d.f90 b/pppack/chol1d.f90 new file mode 100644 index 0000000..8a0e19b --- /dev/null +++ b/pppack/chol1d.f90 @@ -0,0 +1,146 @@ +!> +!> @file chol1d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine chol1d ( p, v, qty, npoint, u, qu ) + +!************************************************************************* +! +!! CHOL1D sets up and solves linear systems needed by SMOOTH. +! +! Discussion: +! +! This routine constructs the upper three diagonals of +! +! V(I,J), I=2 to NPOINT-1, J=1,3, +! +! of the matrix +! +! 6*(1-P)*Q-transpose*(D**2)*Q + P*R. +! +! It then computes its L*L' decomposition and stores it also +! in V, then applies forward and backsubstitution to the right side +! +! Q'*Y +! +! in QTY to obtain the solution in U. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) P, ? +! +! ?put, real ( kind = 8 ) V(NPOINT,7), ? +! +! ?put, real ( kind = 8 ) QTY(NPOINT), ? +! +! Input, integer NPOINT, ? +! +! Output, real ( kind = 8 ) U(NPOINT), the solution. +! +! Output, real ( kind = 8 ) QU(NPOINT), the value of Q * U. +! + implicit none + + integer npoint + + integer i + real ( kind = 8 ) p + real ( kind = 8 ) qty(npoint) + real ( kind = 8 ) qu(npoint) + real ( kind = 8 ) u(npoint) + real ( kind = 8 ) v(npoint,7) + real ( kind = 8 ) prev + real ( kind = 8 ) ratio + real ( kind = 8 ) six1mp + real ( kind = 8 ) twop +! +! Construct 6*(1-p)*q'*(d**2)*q + p*r +! + six1mp = 6.0D+00 * ( 1.0D+00 - p ) + twop = 2.0D+00 * p + + do i = 2, npoint-1 + v(i,1) = six1mp * v(i,5)+twop*(v(i-1,4)+v(i,4)) + v(i,2) = six1mp * v(i,6)+p*v(i,4) + v(i,3) = six1mp * v(i,7) + end do + + if ( npoint < 4 ) then + u(1) = 0.0D+00 + u(2) = qty(2) / v(2,1) + u(3) = 0.0D+00 +! +! Factorization +! + else + + do i = 2, npoint-2 + ratio = v(i,2)/v(i,1) + v(i+1,1) = v(i+1,1)-ratio*v(i,2) + v(i+1,2) = v(i+1,2)-ratio*v(i,3) + v(i,2) = ratio + ratio = v(i,3)/v(i,1) + v(i+2,1) = v(i+2,1)-ratio*v(i,3) + v(i,3) = ratio + end do +! +! Forward substitution +! + u(1) = 0.0D+00 + v(1,3) = 0.0D+00 + u(2) = qty(2) + do i = 2, npoint-2 + u(i+1) = qty(i+1)-v(i,2)*u(i)-v(i-1,3)*u(i-1) + end do +! +! Back substitution. +! + u(npoint) = 0.0D+00 + u(npoint-1) = u(npoint-1) / v(npoint-1,1) + + do i = npoint-2, 2, -1 + u(i) = u(i)/v(i,1)-u(i+1)*v(i,2)-u(i+2)*v(i,3) + end do + + end if +! +! Construct Q*U. +! + prev = 0.0D+00 + do i = 2, npoint + qu(i) = (u(i)-u(i-1))/v(i-1,4) + qu(i-1) = qu(i)-prev + prev = qu(i) + end do + + qu(npoint) = -qu(npoint) + + return +end diff --git a/pppack/colloc.f90 b/pppack/colloc.f90 new file mode 100644 index 0000000..df60a0e --- /dev/null +++ b/pppack/colloc.f90 @@ -0,0 +1,275 @@ +!> +!> @file colloc.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine colloc ( aleft, aright, lbegin, iorder, ntimes, addbrk, & + relerr ) + +!************************************************************************* +! +!! COLLOC solves an ordinary differential equation by collocation. +! +! Method: +! +! The M-th order ordinary differential equation with M side +! conditions, to be specified in subroutine DIFEQU, is solved +! approximately by collocation. +! +! The approximation F to the solution G is piecewise polynomial of order +! k+m with L pieces and M-1 continuous derivatives. F is determined by +! the requirement that it satisfy the differential equation at K points +! per interval (to be specified in COLPNT ) and the M side conditions. +! +! This usually nonlinear system of equations for f is solved by +! Newton's method. the resulting linear system for the b-coefficients of an +! iterate is constructed appropriately in eqblok and then solved +! in slvblk, a program designed to solve almost block +! diagonal linear systems efficiently. +! +! There is an opportunity to attempt improvement of the breakpoint +! sequence (both in number and location) through use of NEWNOT. +! +! Printed output consists of the pp-representation of the approximate +! solution, and of the error at selected points. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) ALEFT, ARIGHT, the endpoints of the interval. +! +! Input, integer LBEGIN, the initial number of polynomial pieces +! in the approximation. A uniform breakpoint sequence will be chosen. +! +! Input, integer IORDER, the order of the polynomial pieces to be +! used in the approximation +! +! Input, integer NTIMES, the number of passes to be made through NEWNOT. +! +! addbrk the number (possibly fractional) of breaks to be added per +! pass through newnot. e.g., if addbrk=.33334, then a break- +! point will be added at every third pass through newnot. +! +! relerr a tolerance. Newton iteration is stopped if the difference +! between the b-coefficients of two successive iterates is no more +! than relerr*(absolute largest b-coefficient). +! + implicit none + + integer, parameter :: npiece = 100 + integer, parameter :: ndim = 200 + integer, parameter :: ncoef = 2000 + integer, parameter :: lenblk = 2000 + + real ( kind = 8 ) a(ndim) + real ( kind = 8 ) addbrk + real ( kind = 8 ) aleft + real ( kind = 8 ) amax + real ( kind = 8 ) aright + real ( kind = 8 ) asave(ndim) + real ( kind = 8 ) b(ndim) + real ( kind = 8 ) bloks(lenblk) + real ( kind = 8 ) break + real ( kind = 8 ) coef + real ( kind = 8 ) dx + real ( kind = 8 ) err + integer i + integer iflag + integer ii + integer integs(3,npiece) + integer iorder + integer iside + integer itemps(ndim) + integer iter + integer itermx + integer j + integer k + integer kpm + integer l + integer lbegin + integer lnew + integer m + integer n + integer nbloks + integer nt + integer ntimes + real ( kind = 8 ) relerr + real ( kind = 8 ) rho + real ( kind = 8 ) t(ndim) + real ( kind = 8 ) templ(lenblk) + real ( kind = 8 ) temps(ndim) + real ( kind = 8 ) xside + + equivalence (bloks,templ) + + common /approx/ break(npiece),coef(ncoef),l,kpm + common /side/ m,iside,xside(10) + common /other/ itermx,k,rho(19) + + kpm = iorder + + if ( ncoef < lbegin * kpm ) then + go to 120 + end if +! +! Set the various parameters concerning the particular dif.equ. +! including a first approximation in case the de is to be solved by +! iteration ( 0 < itermx ). +! + call difequ ( 1, temps(1), temps ) +! +! Obtain the K collocation points for the standard interval. +! + k = kpm-m + call colpnt(k,rho) +! +! The following five statements could be replaced by a read in or- +! der to obtain a specific (nonuniform) spacing of the breakpnts. +! + dx = (aright-aleft) / real ( lbegin, kind = 8 ) + + temps(1) = aleft + do i = 2, lbegin + temps(i) = temps(i-1)+dx + end do + temps(lbegin+1) = aright +! +! Generate the required knots t(1),...,t(n+kpm). +! + call knots ( temps, lbegin, kpm, t, n ) + nt = 1 +! +! Generate the almost block diagonal coefficient matrix bloks and +! right side b from collocation equations and side conditions. +! then solve via slvblk , obtaining the b-representation of the +! approximation in T, A, N, KPM. +! +20 continue + + call eqblok ( t, n, kpm, temps, a, bloks, lenblk, integs, nbloks, b ) + + call slvblk ( bloks, integs, nbloks, b, itemps, a, iflag ) + + iter = 1 + if ( itermx <= 1 ) then + go to 60 + end if +! +! Save b-spline coefficients of current approx. in asave , then get new +! approx. and compare with old. if coefficients are more than relerr +! apart (relatively) or if number of iterations is less than itermx , +! continue iterating. +! + 30 continue + + call bsplpp(t,a,n,kpm,templ,break,coef,l) + + do i = 1, n + asave(i) = a(i) + end do + + call eqblok ( t, n, kpm, temps, a, bloks, lenblk, integs, nbloks, b ) + + call slvblk(bloks,integs,nbloks,b,itemps,a,iflag) + + err = 0.0D+00 + amax = 0.0D+00 + do i = 1, n + amax = max ( amax, abs ( a(i) ) ) + err = max ( err, abs ( a(i)-asave(i) ) ) + end do + + if ( err <= relerr*amax ) then + go to 60 + end if + + iter = iter + 1 + + if ( iter < itermx ) then + go to 30 + end if +! +! Iteration (if any) completed. print out approx. based on current +! breakpoint sequence, then try to improve the sequence. +! + 60 continue + + write(*,70)kpm,l,n,(break(i),i=2,l) + 70 format (' approximation from a space of splines of order',i3, & + ' on ',i3,' intervals,'/' of dimension',i4,'. breakpoints -'/ & + (5e20.10)) + + if ( 0 < itermx ) then + write(*,*)' ' + write(*,*)'Results on interation ',iter + end if + + call bsplpp(t,a,n,kpm,templ,break,coef,l) + + write ( *, * ) ' ' + write ( *, * ) 'The piecewise polynomial representation of the approximation:' + write ( *, * ) ' ' + + do i = 1, l + ii = ( i - 1 ) * kpm + write(*,'(f9.3,e13.6,10e11.3)')break(i),(coef(ii+j),j=1,kpm) + end do +! +! The following call is provided here for possible further analysis +! of the approximation specific to the problem being solved. +! it is, of course, easily omitted. +! + call difequ ( 4, temps(1), temps ) + + if ( ntimes < nt ) then + return + end if +! +! From the pp-rep. of the current approx., obtain in NEWNOT a new +! (and possibly better) sequence of breakpoints, adding (on the +! average) ADDBRK breakpoints per pass through NEWNOT. +! + lnew = lbegin + int ( real ( nt, kind = 8 ) * addbrk ) + + if ( ncoef < lnew * kpm ) then + go to 120 + end if + + call newnot(break,coef,l,kpm,temps,lnew,templ) + + call knots ( temps, lnew, kpm, t, n ) + nt = nt+1 + go to 20 + + 120 continue + write(*,*)' ' + write(*,*)'COLLOC - Fatal error!' + write(*,*)' The assigned dimension for COEF is ',ncoef + write(*,*)' but this is too small.' + stop +end diff --git a/pppack/colpnt.f90 b/pppack/colpnt.f90 new file mode 100644 index 0000000..8bfb5ad --- /dev/null +++ b/pppack/colpnt.f90 @@ -0,0 +1,117 @@ +!> +!> @file colpnt.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine colpnt ( k, rho ) + +!************************************************************************* +! +!! COLPNT supplies collocation points. +! +! Discussion: +! +! The collocation points are for the standard interval (-1,1) as the +! zeros of the Legendre polynomial of degree K, provided K <= 8. +! +! Otherwise, uniformly spaced points are given. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, integer K, the number of collocation points desired. +! +! Output, real ( kind = 8 ) RHO(K), the collocation points. +! + implicit none + + integer k + + integer j + real ( kind = 8 ) rho(k) + + if ( k == 1 ) then + rho(1) = 0.0D+00 + else if ( k == 2 ) then + rho(1) = -0.577350269189626D+00 + rho(2) = 0.577350269189626D+00 + else if ( k == 3 ) then + rho(1) = -0.774596669241483D+00 + rho(2) = 0.0 + rho(3) = 0.774596669241483D+00 + else if ( k == 4 ) then + rho(1) = -0.861136311594053D+00 + rho(2) = -0.339981043584856D+00 + rho(3) = 0.339981043584856D+00 + rho(4) = 0.861136311594053D+00 + else if ( k == 5 ) then + rho(1) = -0.906179845938664D+00 + rho(2) = -0.538469310105683D+00 + rho(3) = 0.0D+00 + rho(4) = 0.538469310105683D+00 + rho(5) = 0.906179845938664D+00 + else if ( k == 6 ) then + rho(1) = -0.932469514203152D+00 + rho(2) = -0.661209386466265D+00 + rho(3) = -0.238619186083197D+00 + rho(4) = 0.238619186083197D+00 + rho(5) = 0.661209386466265D+00 + rho(6) = 0.932469514203152D+00 + else if ( k == 7 ) then + rho(5) = 0.405845151377397D+00 + rho(3) = -rho(5) + rho(6) = 0.741531185599394D+00 + rho(2) = -rho(6) + rho(7) = 0.949107912342759D+00 + rho(1) = -rho(7) + rho(4) = 0.0 + else if ( k == 8 ) then + rho(5) = 0.183434642495650D+00 + rho(4) = -rho(5) + rho(6) = 0.525532409916329D+00 + rho(3) = -rho(6) + rho(7) = 0.796666477413627D+00 + rho(2) = -rho(7) + rho(8) = 0.960289856497536D+00 + rho(1) = -rho(8) + else + + write ( *, * )' ' + write ( *, * )'ColPnt - Warning!' + write ( *, * )' Equispaced collocation points will be used,' + write ( *, * )' because K =',k,' which is greater than 8.' + + do j = 1, k + rho(j) = -1.0D+00 + 2.0D+00 * real ( j - 1, kind = 8 ) & + / real ( k - 1, kind = 8 ) + end do + + end if + + return +end diff --git a/pppack/cspint.f90 b/pppack/cspint.f90 new file mode 100644 index 0000000..fb201fb --- /dev/null +++ b/pppack/cspint.f90 @@ -0,0 +1,214 @@ +!> +!> @file cspint.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine cspint ( ftab, xtab, ntab, a, b, y, e, work, result, ind ) + +!************************************************************************* +! +!! CSPINT estimates an integral using a spline interpolant. +! +! Discussion: +! +! CSPINT estimates the integral from A to B of F(X) by +! computing the natural spline S(X) that interpolates to F +! and integrating that exactly. +! +! F is supplied to the routine in the form of tabulated data. +! +! Other output from the program includes the definite integral +! from X(1) to X(I) of the spline, and the coefficients +! necessary for the user to evaluate the spline outside of +! this routine. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) FTAB(NTAB), contains the tabulated values +! of the functions, FTAB(I)=F(XTAB(I)). +! +! Input, real ( kind = 8 ) XTAB(NTAB), contains the points at +! which the function was evaluated. The XTAB's must be +! distinct and in ascending order. +! +! Input, integer NTAB, the number of entries in FTAB +! and XTAB. NTAB must be at least 3. +! +! Input, real ( kind = 8 ) A, lower limit of integration. +! +! Input, real ( kind = 8 ) B, upper limit of integration. +! +! Output, real ( kind = 8 ) Y(3,NTAB), will contain the coefficients +! of the interpolating natural spline over each subinterval. +! +! For XTAB(I) < = X <= XTAB(I+1), +! +! S(X) = FTAB(I) + Y(1,I)*(X-XTAB(I)) + Y(2,I)*(X-XTAB(I))**2 +! + Y(3,I)*(X-XTAB(I))**3 +! +! Output, real ( kind = 8 ) E(NTAB), E(I)=the definite integral +! from XTAB(1) to XTAB(I) of S(X). +! +! Workspace, real ( kind = 8 ) WORK(NTAB). +! +! Output, real ( kind = 8 ) RESULT, the estimated value of the integral. +! +! Output, integer IND, error flag. +! IND=0 if NTAB < 3 or the XTAB's are not distinct and in +! ascending order. +! IND=1 otherwise. +! + implicit none + + integer ntab + + real ( kind = 8 ) a + real ( kind = 8 ) b + real ( kind = 8 ) e(ntab) + real ( kind = 8 ) ftab(ntab) + integer i + integer ind + integer j + real ( kind = 8 ) r + real ( kind = 8 ) result + real ( kind = 8 ) s + real ( kind = 8 ) term + real ( kind = 8 ) u + real ( kind = 8 ) work(ntab) + real ( kind = 8 ) xtab(ntab) + real ( kind = 8 ) y(3,ntab) + + ind = 0 + + if ( ntab < 3 ) then + write(*,*)' ' + write(*,*)'CSPINT - Fatal error!' + write(*,*)' NTAB must be at least 3,' + write(*,*)' but your value was NTAB = ',ntab + stop + end if + + do i = 1, ntab-1 + + if ( xtab(i+1) <= xtab(i) ) then + write(*,*)' ' + write(*,*)'CSPINT - Fatal error!' + write(*,*)' Interval ',i,' is illegal.' + write(*,*)' XTAB(I) =',xtab(i) + write(*,*)' XTAB(I+1)=',xtab(i+1) + stop + end if + + end do + + s = 0.0D+00 + do i = 1, ntab-1 + r = ( ftab(i+1) - ftab(i) ) / ( xtab(i+1) - xtab(i) ) + y(2,i) = r - s + s = r + end do + + result = 0.0D+00 + s = 0.0D+00 + r = 0.0D+00 + y(2,1) = 0.0D+00 + y(2,ntab) = 0.0D+00 + + do i = 2, ntab-1 + y(2,i) = y(2,i) + r * y(2,i-1) + work(i) = 2.0D+00 * ( xtab(i-1) - xtab(i+1) ) - r * s + s = xtab(i+1) - xtab(i) + r = s / work(i) + end do + + do j = 2, ntab-1 + i = ntab+1-j + y(2,i) = ((xtab(i+1)-xtab(i))*y(2,i+1)-y(2,i))/work(i) + end do + + do i = 1, ntab-1 + s = xtab(i+1) - xtab(i) + r = y(2,i+1) - y(2,i) + y(3,i) = r / s + y(2,i) = 3.0D+00 * y(2,i) + y(1,i) = ( ftab(i+1) - ftab(i) ) / s - ( y(2,i) + r ) * s + end do + + e(1) = 0.0D+00 + + do i = 1, ntab-1 + + s = xtab(i+1) - xtab(i) + + term = ( ( ( y(3,i) * 0.25D+00 * s & + + y(2,i) / 3.0D+00 ) * s & + + y(1,i) * 0.5D+00 ) * s + ftab(i) ) * s + + e(i+1) = e(i) + term + + end do +! +! Determine where the endpoints A and B lie in the mesh of XTAB's. +! + r = a + u = 1.0D+00 + + do j = 1, 2 + + if ( r <= xtab(1) ) then + result = result-u*((r-xtab(1))*y(1,1)* 0.5D+00 + ftab(1))*(r-xtab(1)) + else if ( xtab(ntab) <= r ) then + result = result-u*(e(ntab)+(r-xtab(ntab))*(ftab(ntab) + 0.5D+00 * & + (ftab(ntab-1)+(xtab(ntab)-xtab(ntab-1))*y(1,ntab-1))*(r- & + xtab(ntab)))) + else + do i = 1, ntab-1 + + if ( r <= xtab(i+1) ) then + r = r - xtab(i) + result = result-u*(e(i)+(((y(3,i)*0.25D+00*r+y(2,i)/3.0D+00)*r & + +y(1,i) * 0.5D+00 )*r+ftab(i))*r) + go to 100 + end if + + end do + + end if + + 100 continue + + u = -1.0D+00 + r = b + + end do + + ind = 1 + + return +end diff --git a/pppack/cubset.f90 b/pppack/cubset.f90 new file mode 100644 index 0000000..567b9de --- /dev/null +++ b/pppack/cubset.f90 @@ -0,0 +1,106 @@ +!> +!> @file cubset.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine cubset ( tau, c, n, ibcbeg, ibcend ) + +!******************************************************************************* +! +!! CUBSET sets up a simple cubic spline interpolant. +! +! WARNING: IBCBEG and IBCEND are not set up yet. +! +! A tridiagonal linear system for the unknown slopes S(I) of +! F at TAU(I), I=1,..., N, is generated and then solved by Gauss +! elimination, with S(I) ending up in C(2,I), for all I. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) TAU(N), the abscissas or X values of +! the data points. The entries of TAU are assumed to be +! strictly increasing. +! +! Input, integer N, the number of data points. N is +! assumed to be at least 2. +! +! Input/output, real ( kind = 8 ) C(4,N). +! On input, if IBCBEG or IBCBEG is 1 or 2, then C(2,1) +! or C(2,N) should have been set to the desired derivative +! values, as described further under IBCBEG and IBCEND. +! On output, C contains the polynomial coefficients of +! the cubic interpolating spline with interior knots +! TAU(2) through TAU(N-1). +! In the interval interval (TAU(I), TAU(I+1)), the spline +! F is given by F(X) = +! C(1,I) + +! C(2,I) * ( X - TAU(I) ) + +! C(3,I) * ( X - TAU(I) )**2 + +! C(4,I) * ( X - TAU(I) )**3 +! +! IBCBEG, +! IBCEND Input, integer IBCBEG, IBCEND, boundary condition +! indicators. +! +! IBCBEG=0 means no boundary condition at TAU(1) is given. +! In this case, the "not-a-knot condition" is used. That +! is, the jump in the third derivative across TAU(2) is +! forced to zero. Thus the first and the second cubic +! polynomial pieces are made to coincide. +! +! IBCBEG=1 means that the slope at TAU(1) is to equal the +! input value C(2,1). +! +! IBCBEG=2 means that the second derivative at TAU(1) is +! to equal C(2,1). +! +! IBCEND=0, 1, or 2 has analogous meaning concerning the +! boundary condition at TAU(N), with the additional +! information taken from C(2,N). +! + implicit none + + integer n + + real ( kind = 8 ) c(4,n) + integer ibcbeg + integer ibcend + real ( kind = 8 ) tau(n) +! +! Solve for the slopes at internal nodes. +! + call cubslo ( tau, c, n ) +! +! Now compute the quadratic and cubic coefficients used in the +! piecewise polynomial representation. +! + call spline_hermite_set ( n, tau, c ) + + return +end diff --git a/pppack/cubslo.f90 b/pppack/cubslo.f90 new file mode 100644 index 0000000..659792d --- /dev/null +++ b/pppack/cubslo.f90 @@ -0,0 +1,111 @@ +!> +!> @file cubslo.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine cubslo ( tau, c, n ) + +!******************************************************************************* +! +!! CUBSLO solves for slopes defining a cubic spline. +! +! Discussion: +! +! A tridiagonal linear system for the unknown slopes S(I) of +! F at TAU(I), I=1,..., N, is generated and then solved by Gauss +! elimination, with S(I) ending up in C(2,I), for all I. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) TAU(N), the abscissas or X values of +! the data points. The entries of TAU are assumed to be +! strictly increasing. +! +! Input, integer N, the number of data points. N is +! assumed to be at least 2. +! +! Input/output, real ( kind = 8 ) C(4,N). +! On input, C(1,I) contains the function value at TAU(I), +! for I = 1 to N. +! C(2,1) contains the slope at TAU(1) and C(2,N) contains +! the slope at TAU(N). +! On output, the intermediate slopes at TAU(I) have been +! stored in C(2,I), for I = 2 to N-1. +! + implicit none + + integer n + + real ( kind = 8 ) c(4,n) + real ( kind = 8 ) g + integer i + integer ibcbeg + integer ibcend + real ( kind = 8 ) tau(n) +! +! Set up the right hand side of the linear system. +! C(2,1) and C(2,N) are presumably already set. +! + do i = 2, n-1 + c(2,i) = 3.0D+00 * ( & + ( tau(i) - tau(i-1) ) * ( c(1,i+1) - c(1,i) ) / ( tau(i+1) - tau(i) ) + & + ( tau(i+1) - tau(i) ) * ( c(1,i) - c(1,i-1) ) / ( tau(i) - tau(i-1) ) ) + end do +! +! Set the diagonal coefficients. +! + c(4,1) = 1.0D+00 + do i = 2, n-1 + c(4,i) = 2.0D+00 * ( tau(i+1) - tau(i-1) ) + end do + c(4,n) = 1.0D+00 +! +! Set the off-diagonal coefficients. +! + c(3,1) = 0.0D+00 + do i = 2, n + c(3,i) = tau(i) - tau(i-1) + end do +! +! Forward elimination. +! + do i = 2, n-1 + g = -c(3,i+1) / c(4,i-1) + c(4,i) = c(4,i) + g * c(3,i-1) + c(2,i) = c(2,i) + g * c(2,i-1) + end do +! +! Back substitution for the interior slopes. +! + do i = n-1, 2, -1 + c(2,i) = ( c(2,i) - c(3,i) * c(2,i+1) ) / c(4,i) + end do + + return +end diff --git a/pppack/cubspl.f90 b/pppack/cubspl.f90 new file mode 100644 index 0000000..ffbc652 --- /dev/null +++ b/pppack/cubspl.f90 @@ -0,0 +1,282 @@ +!> +!> @file cubspl.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine cubspl ( tau, c, n, ibcbeg, ibcend ) + +!******************************************************************************* +! +!! CUBSPL defines an interpolatory cubic spline. +! +! Discussion: +! +! A tridiagonal linear system for the unknown slopes S(I) of +! F at TAU(I), I=1,..., N, is generated and then solved by Gauss +! elimination, with S(I) ending up in C(2,I), for all I. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) TAU(N), the abscissas or X values of +! the data points. The entries of TAU are assumed to be +! strictly increasing. +! +! Input, integer N, the number of data points. N is +! assumed to be at least 2. +! +! Input/output, real ( kind = 8 ) C(4,N). +! On input, if IBCBEG or IBCBEG is 1 or 2, then C(2,1) +! or C(2,N) should have been set to the desired derivative +! values, as described further under IBCBEG and IBCEND. +! On output, C contains the polynomial coefficients of +! the cubic interpolating spline with interior knots +! TAU(2) through TAU(N-1). +! +! In the interval interval (TAU(I), TAU(I+1)), the spline +! F is given by +! +! F(X) = +! C(1,I) + +! C(2,I) * H + +! C(3,I) * H**2 / 2 + +! C(4,I) * H**3 / 6. +! +! where H=X-TAU(I). The routine PPVALU may be used to +! evaluate F or its derivatives from TAU, C, L=N-1, +! and K=4. +! +! Input, integer IBCBEG, IBCEND, boundary condition indicators. +! +! IBCBEG=0 means no boundary condition at TAU(1) is given. +! In this case, the "not-a-knot condition" is used. That +! is, the jump in the third derivative across TAU(2) is +! forced to zero. Thus the first and the second cubic +! polynomial pieces are made to coincide. +! +! IBCBEG=1 means the slope at TAU(1) is to equal the +! input value C(2,1). +! +! IBCBEG=2 means the second derivative at TAU(1) is +! to equal C(2,1). +! +! IBCEND=0, 1, or 2 has analogous meaning concerning the +! boundary condition at TAU(N), with the additional +! information taken from C(2,N). +! + implicit none + + integer n + + real ( kind = 8 ) c(4,n) + real ( kind = 8 ) divdf1 + real ( kind = 8 ) divdf3 + real ( kind = 8 ) dtau + real ( kind = 8 ) g + integer i + integer ibcbeg + integer ibcend + real ( kind = 8 ) tau(n) +! +! C(3,*) and C(4,*) are used initially for temporary storage. +! +! Store first differences of the TAU sequence in C(3,*). +! +! Store first divided difference of data in C(4,*). +! + do i = 2, n + c(3,i) = tau(i) - tau(i-1) + end do + + do i = 2, n + c(4,i) = ( c(1,i) - c(1,i-1) ) / ( tau(i) - tau(i-1) ) + end do +! +! Construct the first equation from the boundary condition +! at the left endpoint, of the form: +! +! C(4,1)*S(1) + C(3,1)*S(2) = C(2,1) +! +! IBCBEG = 0: Not-a-knot +! + if ( ibcbeg == 0 ) then + + if ( n <= 2 ) then + c(4,1) = 1.0D+00 + c(3,1) = 1.0D+00 + c(2,1) = 2.0D+00 * c(4,2) + go to 120 + end if + + c(4,1) = c(3,3) + c(3,1) = c(3,2) + c(3,3) + c(2,1) = ( ( c(3,2) + 2.0D+00 * c(3,1) ) * c(4,2) * c(3,3) & + + c(3,2)**2 * c(4,3) ) / c(3,1) +! +! IBCBEG = 1: derivative specified. +! + else if ( ibcbeg == 1 ) then + + c(4,1) = 1.0D+00 + c(3,1) = 0.0D+00 + + if ( n == 2 ) then + go to 120 + end if +! +! Second derivative prescribed at left end. +! + else + + c(4,1) = 2.0D+00 + c(3,1) = 1.0D+00 + c(2,1) = 3.0D+00 * c(4,2) - c(3,2) / 2.0D+00 * c(2,1) + + if ( n == 2 ) then + go to 120 + end if + + end if +! +! If there are interior knots, generate the corresponding +! equations and carry out the forward pass of Gauss elimination, +! after which the I-th equation reads: +! +! C(4,I) * S(I) + C(3,I) * S(I+1) = C(2,I). +! + do i = 2, n-1 + g = -c(3,i+1) / c(4,i-1) + c(2,i) = g * c(2,i-1) + 3.0D+00 * ( c(3,i) * c(4,i+1) + c(3,i+1) * c(4,i) ) + c(4,i) = g * c(3,i-1) + 2.0D+00 * ( c(3,i) + c(3,i+1)) + end do +! +! Construct the last equation from the second boundary condition, of +! the form +! +! -G * C(4,N-1) * S(N-1) + C(4,N) * S(N) = C(2,N) +! +! If slope is prescribed at right end, one can go directly to +! back-substitution, since the C array happens to be set up just +! right for it at this point. +! + if ( ibcend == 1 ) then + go to 160 + end if + + if ( 1 < ibcend ) then + go to 110 + end if + +90 continue +! +! Not-a-knot and 3 <= N, and either 3 < N or also not-a-knot +! at left end point. +! + if ( n /= 3 .or. ibcbeg /= 0 ) then + g = c(3,n-1) + c(3,n) + c(2,n) = ( ( c(3,n) + 2.0D+00 * g ) * c(4,n) * c(3,n-1) + c(3,n)**2 & + * ( c(1,n-1) - c(1,n-2) ) / c(3,n-1) ) / g + g = - g / c(4,n-1) + c(4,n) = c(3,n-1) + c(4,n) = c(4,n) + g * c(3,n-1) + c(2,n) = ( g * c(2,n-1) + c(2,n) ) / c(4,n) + go to 160 + end if +! +! N=3 and not-a-knot also at left. +! +100 continue + + c(2,n) = 2.0D+00 * c(4,n) + c(4,n) = 1.0D+00 + g = -1.0D+00 / c(4,n-1) + c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1) + c(2,n) = ( g * c(2,n-1) + c(2,n) ) / c(4,n) + go to 160 +! +! IBCEND = 2: Second derivative prescribed at right endpoint. +! +110 continue + + c(2,n) = 3.0D+00 * c(4,n) + c(3,n) / 2.0D+00 * c(2,n) + c(4,n) = 2.0D+00 + g = -1.0D+00 / c(4,n-1) + c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1) + c(2,n) = ( g * c(2,n-1)+c(2,n))/c(4,n) + go to 160 +! +! N = 2. +! +120 continue + + if ( ibcend == 2 ) then + + c(2,n) = 3.0D+00 * c(4,n) + c(3,n) / 2.0D+00 * c(2,n) + c(4,n) = 2.0D+00 + g = -1.0D+00 / c(4,n-1) + c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1) + c(2,n) = (g*c(2,n-1)+c(2,n)) / c(4,n) + + else if ( ibcend == 0 .and. ibcbeg /= 0 ) then + + c(2,n) = 2.0D+00 * c(4,n) + c(4,n) = 1.0D+00 + g = -1.0D+00 / c(4,n-1) + c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1) + c(2,n) = (g*c(2,n-1)+c(2,n))/c(4,n) + + else if ( ibcend == 0 .and. ibcbeg == 0 ) then + + c(2,n) = c(4,n) + + end if +! +! Back solve the upper triangular system +! C(4,I) * S(I) + C(3,I) * S(I+1) = B(I) +! for the slopes C(2,I), given that S(N) is already known. +! +160 continue + + do i = n-1, 1, -1 + c(2,i) = ( c(2,i) - c(3,i) * c(2,i+1) ) / c(4,i) + end do +! +! Generate cubic coefficients in each interval, that is, the +! derivatives at its left endpoint, from value and slope at its +! endpoints. +! + do i = 2, n + dtau = c(3,i) + divdf1 = ( c(1,i) - c(1,i-1) ) / dtau + divdf3 = c(2,i-1) + c(2,i) - 2.0D+00 * divdf1 + c(3,i-1) = 2.0D+00 * ( divdf1 - c(2,i-1) - divdf3 ) / dtau + c(4,i-1) = 6.0D+00 * divdf3 / dtau**2 + end do + + return +end diff --git a/pppack/cwidth.f90 b/pppack/cwidth.f90 new file mode 100644 index 0000000..cdca694 --- /dev/null +++ b/pppack/cwidth.f90 @@ -0,0 +1,351 @@ +!> +!> @file cwidth.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine cwidth ( w, b, nequ, ncols, integs, nbloks, d, x, iflag ) + +!************************************************************************* +! +!! CWIDTH solves an almost block diagonal linear system. +! +! Discussion: +! +! This routine is a variation of the theme in the algorithm bandet1 +! by Martin and Wilkinson (numer.math. 9(1976)279-307). It solves +! the linear system +! A*X = B +! of NEQU equations in case A is almost block diagonal with all +! blocks having NCOLS columns using no more storage than it takes to +! store the interesting part of A. Such systems occur in the determ- +! ination of the b-spline coefficients of a spline approximation. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! w on input, a two-dimensional array of size (nequ,ncols) contain- +! ing the interesting part of the almost block diagonal coeffici- +! ent matrix a (see description and example below). the array +! integs describes the storage scheme. +! on output, w contains the upper triangular factor u of the +! lu factorization of a possibly permuted version of a . in par- +! ticular, the determinant of a could now be found as +! iflag*w(1,1)*w(2,1)* ... * w(nequ,1) . +! +! b on input, the right side of the linear system, of length nequ. +! the contents of b are changed during execution. +! +! Input, integer NEQU, the number of equations. +! +! Input, integer NCOLS, the block width, that is, the number of +! columns in each block. +! +! integs integer array, of size (2,nequ), describing the block +! structure of a . +! integs(1,i)=no. of rows in block i = nrow +! integs(2,i)=no. of elimination steps in block i +! =overhang over next block = last +! nbloks number of blocks +! +! d work array, to contain row sizes . if storage is scarce, the +! array x could be used in the calling sequence for d . +! +! x on output, contains computed solution (if iflag /= 0), of +! length nequ . +! +! iflag on output, integer +! =(-1)**(no.of interchanges during elimination) +! if a is invertible +! = 0 if a is singular +! +! block structure of a +! +! the interesting part of a is taken to consist of nbloks con- +! secutive blocks, with the i-th block made up of nrowi=integs(1,i) +! consecutive rows and ncols consecutive columns of a , and with +! the first lasti=integs(2,i) columns to the left of the next block. +! these blocks are stored consecutively in the workarray w . +! +! for example, here is an 11th order matrix and its arrangement in +! the workarray w . (the interesting entries of a are indicated by +! their row and column index modulo 10.) +! +! --- a --- --- w --- +! +! nrow1=3 +! 11 12 13 14 11 12 13 14 +! 21 22 23 24 21 22 23 24 +! 31 32 33 34 nrow2=2 31 32 33 34 +! last1=2 43 44 45 46 43 44 45 46 +! 53 54 55 56 nrow3=3 53 54 55 56 +! last2=3 66 67 68 69 66 67 68 69 +! 76 77 78 79 76 77 78 79 +! 86 87 88 89 nrow4=1 86 87 88 89 +! last3=1 97 98 99 90 nrow5=2 97 98 99 90 +! last4=1 08 09 00 01 08 09 00 01 +! 18 19 10 11 18 19 10 11 +! last5=4 +! +! for this interpretation of a as an almost block diagonal matrix, +! we have nbloks=5 , and the integs array is +! +! i= 1 2 3 4 5 +! k= +! integs(k,i)= 1 3 2 3 1 2 +! 2 2 3 1 1 4 +! +! +! Method: +! +! gauss elimination with scaled partial pivoting is used, but mult- +! ipliers are n o t s a v e d in order to save storage. rather, the +! right side is operated on during elimination. the two parameters +! i p v t e q and l a s t e q +! are used to keep track of the action. ipvteq is the index of the +! variable to be eliminated next, from equations ipvteq+1,...,lasteq, +! using equation ipvteq (possibly after an interchange) as the pivot +! equation. the entries in the pivot column are a l w a y s in column +! 1 of w . this is accomplished by putting the entries in rows +! ipvteq+1,...,lasteq revised by the elimination of the ipvteq-th +! variable one to the left in w . in this way, the columns of the +! equations in a given block (as stored in w ) will be aligned with +! those of the next block at the moment when these next equations be- +! come involved in the elimination process. +! +! thus, for the above example, the first elimination steps proceed +! as follows. +! +! *11 12 13 14 11 12 13 14 11 12 13 14 11 12 13 14 +! *21 22 23 24 *22 23 24 22 23 24 22 23 24 +! *31 32 33 34 *32 33 34 *33 34 33 34 +! 43 44 45 46 43 44 45 46 *43 44 45 46 *44 45 46 etc. +! 53 54 55 56 53 54 55 56 *53 54 55 56 *54 55 56 +! 66 67 68 69 66 67 68 69 66 67 68 69 66 67 68 69 +! . . . . +! +! In all other respects, the procedure is standard, including the +! scaled partial pivoting. +! + implicit none + + integer nbloks + integer ncols + integer nequ + + real ( kind = 8 ) awi1od + real ( kind = 8 ) b(nequ) + real ( kind = 8 ) colmax + real ( kind = 8 ) d(nequ) + integer i + integer icount + integer iflag + integer ii + integer integs(2,nbloks) + integer ipvteq + integer ipvtp1 + integer istar + integer j + integer jmax + integer lastcl + integer lasteq + integer lasti + integer nexteq + integer nrowad + real ( kind = 8 ) ratio + real ( kind = 8 ) rowmax + real ( kind = 8 ) sum1 + real ( kind = 8 ) temp + real ( kind = 8 ) w(nequ,ncols) + real ( kind = 8 ) x(nequ) + + iflag = 1 + ipvteq = 0 + lasteq = 0 +! +! The I loop runs over the blocks. +! + do i = 1, nbloks +! +! The equations for the current block are added to those current- +! ly involved in the elimination process, by increasing lasteq +! by integs(1,i) after the rowsize of these equations has been +! recorded in the array D. +! + nrowad = integs(1,i) + + do icount = 1, nrowad + + nexteq = lasteq + icount + + rowmax = 0.0D+00 + do j = 1, ncols + rowmax = max ( rowmax, abs ( w(nexteq,j) ) ) + end do + + if ( rowmax == 0.0D+00 ) then + go to 150 + end if + + d(nexteq) = rowmax + + end do + + lasteq = lasteq + nrowad +! +! There will be lasti=integs(2,i) elimination steps before +! the equations in the next block become involved. further, +! l a s t c l records the number of columns involved in the cur- +! rent elimination step. it starts equal to ncols when a block +! first becomes involved and then drops by one after each elim- +! ination step. +! + lastcl = ncols + lasti = integs(2,i) + + do icount = 1, lasti + + ipvteq = ipvteq+1 + + if ( ipvteq < lasteq ) then + go to 30 + end if + + if ( d(ipvteq) < abs ( w(ipvteq,1)) + d(ipvteq) ) then + go to 100 + end if + + go to 150 +! +! Determine the smallest ISTAR in (ipvteq,lasteq) for +! which abs(w(istar,1))/d(istar) is as large as possible, and +! interchange equations ipvteq and istar in case ipvteq +! < istar . +! + 30 continue + + colmax = abs(w(ipvteq,1)) / d(ipvteq) + istar = ipvteq + ipvtp1 = ipvteq+1 + + do ii = ipvtp1, lasteq + awi1od = abs(w(ii,1)) / d(ii) + if ( colmax < awi1od ) then + colmax = awi1od + istar = ii + end if + end do + + if ( abs(w(istar,1))+d(istar) == d(istar) ) then + go to 150 + end if + + if ( istar == ipvteq ) then + go to 60 + end if + + iflag = -iflag + + temp = d(istar) + d(istar) = d(ipvteq) + d(ipvteq) = temp + + temp = b(istar) + b(istar) = b(ipvteq) + b(ipvteq) = temp + + do j = 1, lastcl + temp = w(istar,j) + w(istar,j) = w(ipvteq,j) + w(ipvteq,j) = temp + end do +! +! Subtract the appropriate multiple of equation ipvteq from +! equations ipvteq+1,...,lasteq to make the coefficient of the +! ipvteq-th unknown (presently in column 1 of w ) zero, but +! store the new coefficients in w one to the left from the old. +! + 60 continue + + do ii = ipvtp1, lasteq + + ratio = w(ii,1)/w(ipvteq,1) + do j = 2, lastcl + w(ii,j-1) = w(ii,j)-ratio*w(ipvteq,j) + end do + w(ii,lastcl) = 0.0D+00 + b(ii) = b(ii)-ratio*b(ipvteq) + + end do + + lastcl = lastcl-1 + + end do + +100 continue + + end do +! +! At this point, W and B contain an upper triangular linear system +! equivalent to the original one, with w(i,j) containing entry +! (i, i-1+j ) of the coefficient matrix. solve this system by backsub- +! stitution, taking into account its block structure. +! +! i-loop over the blocks, in reverse order +! + i = nbloks + + 110 continue + + lasti = integs(2,i) + jmax = ncols-lasti + + do icount = 1, lasti + + sum1 = 0.0D+00 + do j = 1, jmax + sum1 = sum1 + x(ipvteq+j) * w(ipvteq,j+1) + end do + + x(ipvteq) = ( b(ipvteq) - sum1 ) / w(ipvteq,1) + jmax = jmax+1 + ipvteq = ipvteq-1 + + end do + + i = i-1 + if ( 0 < i ) then + go to 110 + end if + + return + + 150 continue + + iflag = 0 + return +end diff --git a/pppack/difequ.f90 b/pppack/difequ.f90 new file mode 100644 index 0000000..51a61f3 --- /dev/null +++ b/pppack/difequ.f90 @@ -0,0 +1,185 @@ +!> +!> @file difequ.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine difequ ( mode, xx, v ) + +!************************************************************************* +! +!! DIFEQU returns information about a differential equation. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, integer MODE, an integer indicating the task to be performed. +! 1, initialization +! 2, evaluate de at xx +! 3, specify the next side condition +! 4, analyze the approximation +! +! Input, real ( kind = 8 ) XX, a point at which information is wanted +! +! Output, real ( kind = 8 ) V, depends on the MODE. See comments below +! + implicit none + + integer, parameter :: npiece = 100 + integer, parameter :: ncoef = 2000 + + real ( kind = 8 ) break + real ( kind = 8 ) coef + real ( kind = 8 ) eps + real ( kind = 8 ) ep1 + real ( kind = 8 ) ep2 + real ( kind = 8 ) error + real ( kind = 8 ) factor + integer i + integer iside + integer itermx + integer k + integer kpm + integer l + integer m + integer mode + real ( kind = 8 ) rho + real ( kind = 8 ) s2ovep + real ( kind = 8 ) solutn + real ( kind = 8 ) un + real ( kind = 8 ) v(20) + real ( kind = 8 ) value + real ( kind = 8 ) x + real ( kind = 8 ) xside + real ( kind = 8 ) xx + + common /approx/ break(npiece),coef(ncoef),l,kpm + common /side/ m,iside,xside(10) + common /other/ itermx,k,rho(19) +! +! This sample of DIFEQU is for the example in chapter xv. It is a +! nonlinear second order two point boundary value problem. +! + go to (10,50,60,110), mode +! +! Initialize everything, Set the order M of the differential equation, +! the nondecreasing sequence xside(i),i=1,...,m, of points at which side +! conditions are given and anything else necessary. +! + 10 continue + + m = 2 + xside(1) = 0.0D+00 + xside(2) = 1.0D+00 +! +! Print out heading. +! + write ( *, * ) ' ' + write ( *, * ) ' Carrier''s nonlinear perturb. problem' + write ( *, * ) ' ' + + eps = 0.005D+00 + write(*,*)'EPS = ',eps +! +! Set constants used in formula for solution below. +! + factor = ( sqrt ( 2.0D+00 ) + sqrt ( 3.0D+00 ) )**2 + s2ovep = sqrt ( 2.0D+00 / eps ) +! +! Initial guess for Newton iteration. un(x)=x*x-1. +! + l = 1 + break(1) = 0.0D+00 + do i = 1, kpm + coef(i) = 0.0D+00 + end do + coef(1) = -1.0D+00 + coef(3) = 2.0D+00 + itermx = 10 + return +! +! Provide value of left side coefficients and right side at xx . +! specifically, at xx the dif.equ. reads: +! +! v(m+1)d**m+v(m)d**(m-1) + ... + v(1)d**0 = v(m+2) +! +! in terms of the quantities v(i),i=1,...,m+2, to be computed here. +! + 50 continue + + v(3) = eps + v(2) = 0.0D+00 + call ppvalu(break,coef,l,kpm,xx,0,un) + v(1) = 2.0D+00 * un + v(4) = un**2 + 1.0D+00 + return +! +! provide the M side conditions. these conditions are of the form +! v(m+1)d**m+v(m)d**(m-1) + ... + v(1)d**0 = v(m+2) +! in terms of the quantities v(i),i=1,...,m+2, to be specified here. +! note that v(m+1)=0 for customary side conditions. +! + 60 continue + + v(m+1) = 0.0D+00 + if ( iside == 1 ) then + v(2) = 1.0D+00 + v(1) = 0.0D+00 + v(4) = 0.0D+00 + iside = iside+1 + else if ( iside == 2 ) then + v(2) = 0.0D+00 + v(1) = 1.0D+00 + v(4) = 0.0D+00 + iside = iside + 1 + end if + + return +! +! Calculate the error near the boundary layer at 1. +! + 110 continue + + write(*,*)' ' + write(*,*)' X, G(X) and G(X)-F(X) at selected points:' + write(*,*)' ' + + x = 0.75D+00 + + do i = 1, 9 + ep1 = exp ( s2ovep * ( 1.0D+00 - x ) ) * factor + ep2 = exp ( s2ovep * ( 1.0D+00 + x ) ) * factor + solutn = 12.0D+00 / ( 1.0D+00 + ep1 )**2 * ep1 & + +12.0D+00 / ( 1.0D+00 + ep2 )**2 * ep2 - 1.0D+00 + call ppvalu(break,coef,l,kpm,x,0,value) + error = solutn-value + write ( *, '(1x,3g14.6)' ) x, solutn, error + x = x+0.03125 + end do + + return +end diff --git a/pppack/dtblok.f90 b/pppack/dtblok.f90 new file mode 100644 index 0000000..03d5727 --- /dev/null +++ b/pppack/dtblok.f90 @@ -0,0 +1,103 @@ +!> +!> @file dtblok.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine dtblok ( bloks, integs, nbloks, ipivot, iflag, detsgn, detlog ) + +!************************************************************************* +! +!! DTBLOK gets the determinant of an almost block diagonal matrix. +! +! Discussion: +! +! The matrix's PLU factorization must have been obtained +! previously by FCBLOK. +! +! The logarithm of the determinant is computed instead of the +! determinant itself to avoid the danger of overflow or underflow +! inherent in this calculation. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! bloks, integs, nbloks, ipivot, iflag are as on return from fcblok. +! in particular, iflag=(-1)**(number of interchanges dur- +! ing factorization) if successful, otherwise iflag=0. +! +! detsgn on output, contains the sign of the determinant. +! +! detlog on output, contains the natural logarithm of the determi- +! nant if determinant is not zero. otherwise contains 0. +! + implicit none + + integer nbloks + + real ( kind = 8 ) bloks(1) + real ( kind = 8 ) detlog + real ( kind = 8 ) detsgn + integer i + integer iflag + integer index + integer indexp + integer integs(3,nbloks) + integer ip + integer ipivot(1) + integer k + integer last + integer nrow + + detsgn = iflag + detlog = 0.0D+00 + + if ( iflag == 0 ) then + return + end if + + index = 0 + indexp = 0 + + do i = 1, nbloks + + nrow = integs(1,i) + last = integs(3,i) + + do k = 1, last + ip = index + nrow * (k-1) + ipivot(indexp+k) + detlog = detlog + log ( abs ( bloks(ip) ) ) + detsgn = detsgn * sign ( 1.0D+00, bloks(ip) ) + end do + + index = nrow*integs(2,i)+index + indexp = indexp+nrow + + end do + + return +end diff --git a/pppack/eqblok.f90 b/pppack/eqblok.f90 new file mode 100644 index 0000000..411f1de --- /dev/null +++ b/pppack/eqblok.f90 @@ -0,0 +1,192 @@ +!> +!> @file eqblok.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine eqblok ( t, n, kpm, work1, work2, bloks, lenblk, integs, & + nbloks, b ) + +!************************************************************************* +! +!! EQBLOK is to be called in COLLOC. +! +! Method: +! +! Each breakpoint interval gives rise to a block in the linear system. +! this block is determined by the K collocation equations in the interval +! with the side conditions (if any) in the interval interspersed ap- +! propriately, and involves the kpm b-splines having the interval in +! their support. correspondingly, such a block has nrow=k+isidel +! rows, with isidel=number of side conditions in this and the prev- +! ious intervals, and ncol=kpm columns. +! +! Further, because the interior knots have multiplicity k, we can +! carry out (in slvblk) k elimination steps in a block before pivot- +! ing might involve an equation from the next block. in the last block, +! of course, all kpm elimination steps will be carried out (in slvblk). +! +! see the detailed comments in the solveblok package for further in- +! formation about the almost block diagonal form used here. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! input +! +! Input, real ( kind = 8 ) T(N+KPM), the knot sequence. +! +! Input, integer N, the dimension of the approximating spline space, +! that is, the order of the linear system to be constructed. +! +! Input, integer KPM, = K + M, the order of the approximating spline. +! +! Input, integer LENBLK, the maximum length of the array BLOKS, +! as allowed by the dimension statement in COLLOC. +! +! work areas +! +! work1 used in putit, of size (kpm,kpm) +! work2 used in putit, of size (kpm,m+1) +! +! output +! +! bloks the coefficient matrix of the linear system, stored in al- +! most block diagonal form, of size +! kpm*sum(integs(1,i) , i=1,...,nbloks) +! +! integs an integer array, of size (3,nbloks), describing the block +! structure. +! integs(1,i) = number of rows in block i +! integs(2,i) = number of columns in block i +! integs(3,i) = number of elimination steps which can be +! carried out in block i before pivoting might +! bring in an equation from the next block. +! +! nbloks number of blocks, equals number of polynomial pieces +! +! b the right side of the linear system, stored corresponding to the +! almost block diagonal form, of size sum(integs(1,i) , i=1,..., +! nbloks). +! + implicit none + + integer kpm + integer n + + real ( kind = 8 ) b(*) + real ( kind = 8 ) bloks(*) + integer i + integer index + integer indexb + integer integs(3,*) + integer iside + integer isidel + integer itermx + integer k + integer left + integer lenblk + integer m + integer nbloks + integer nrow + real ( kind = 8 ) rho + real ( kind = 8 ) t(n+kpm) + real ( kind = 8 ) work1(kpm,kpm) + real ( kind = 8 ) work2(kpm,*) + real ( kind = 8 ) xside + + common /side/ m,iside,xside(10) + common /other/ itermx,k,rho(19) + + index = 1 + indexb = 1 + i = 0 + iside = 1 + + do left = kpm, n, k + + i = i + 1 +! +! Determine integs(.,i) +! + integs(2,i) = kpm + + if ( n <= left ) then + integs(3,i) = kpm + isidel = m + go to 30 + end if + + integs(3,i) = k +! +! At this point, iside-1 gives the number of side conditions +! incorporated so far. adding to this the side conditions in the +! current interval gives the number isidel . +! + isidel = iside - 1 + + do + + if ( isidel == m ) then + exit + end if + + if ( t(left+1) <= xside(isidel+1) ) then + exit + end if + + isidel = isidel + 1 + + end do + +30 continue + + nrow = k + isidel + integs(1,i) = nrow +! +! The detailed equations for this block are generated and put +! together in PUTIT. +! + if ( lenblk < index + nrow * kpm - 1 ) then + write ( *, * ) ' ' + write ( *, * ) 'EQBLOK - Fatal error!' + write ( *, * ) ' The dimension of BLOKS is too small.' + write ( *, * ) ' LENBLK = ', lenblk + stop + end if + + call putit ( t, kpm, left, work1, work2, bloks(index), nrow, b(indexb) ) + + index = index + nrow * kpm + indexb = indexb + nrow + + end do + + nbloks = i + + return +end diff --git a/pppack/evnnot.f90 b/pppack/evnnot.f90 new file mode 100644 index 0000000..518bf00 --- /dev/null +++ b/pppack/evnnot.f90 @@ -0,0 +1,85 @@ +!> +!> @file evnnot.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine evnnot ( break, coef, l, k, brknew, lnew, coefg ) + +!************************************************************************* +! +!! EVNNOT is a "fake" version of NEWNOT. +! +! Discussion: +! +! EVNNOT returns LNEW+1 knots in BRKNEW which are +! evenly spaced between BREAK(1) and BREAK(L+1). +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) BREAK(L+1), coef, l, k.....contains the +! pp-representation of a certain function F of order K. Specifically, +! d**(k-1)f(x)=coef(k,i) for break(i) <= x < break(i+1) +! +! Input, integer LNEW, the number of subintervals into which the interval +! (a,b) is to be sectioned by the new breakpoint sequence brknew . +! +! Output, real ( kind = 8 ) BRKNEW(LNEW+1), the new breakpoints. +! +! Output, coefg the coefficient part of the pp-repr. break, coefg, l, 2 +! for the monotone p.linear function G with respect to which brknew will +! be equidistributed. +! + implicit none + + integer k + integer l + integer lnew + + real ( kind = 8 ) break(l+1) + real ( kind = 8 ) brknew(lnew+1) + real ( kind = 8 ) coef(k,l) + real ( kind = 8 ) coefg(2,l) + integer i + + if ( lnew == 0 ) then + + brknew(1) = 0.5D+00 * ( break(1) + break(l+1) ) + + else + + do i = 1, lnew+1 + brknew(i) = ( real ( lnew - i + 1, kind = 8 ) * break(1) & + + real ( i - 1, kind = 8 ) * break(l+1) ) & + / real ( lnew, kind = 8 ) + end do + + end if + + return +end diff --git a/pppack/factrb.f90 b/pppack/factrb.f90 new file mode 100644 index 0000000..298312b --- /dev/null +++ b/pppack/factrb.f90 @@ -0,0 +1,188 @@ +!> +!> @file factrb.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine factrb ( w, ipivot, d, nrow, ncol, last, iflag ) + +!************************************************************************* +! +!! FACTRB constructs a partial PLU factorization. +! +! Discussion: +! +! This factorization corresponds to steps 1 through LAST in Gauss +! elimination for the matrix W of order ( NROW, NCOL ), using +! pivoting of scaled rows. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input/output, real ( kind = 8 ) W(NROW,NCOL); on input, contains the +! matrix to be partially factored; on output, the partial factorization. +! +! Output, integer IPIVOT(NROW), contains a record of the pivoting +! strategy used; row IPIVOT(I) is used during the I-th elimination step, +! for I = 1, ..., LAST. +! +! Workspace, real ( kind = 8 ) D(NROW), used to store the maximum entry +! in each row. +! +! Input, integer NROW, the number of rows of W. +! +! Input, integer NCOL, the number of columns of W. +! +! Input, integer LAST, the number of elimination steps to be carried out. +! +! Input/output, integer IFLAG. On output, equals the input value +! times (-1)**(number of row interchanges during the factorization +! process), in case no zero pivot was encountered. +! Otherwise, iflag=0 on output. +! + implicit none + + integer ncol + integer nrow + + real ( kind = 8 ) awikdi + real ( kind = 8 ) colmax + real ( kind = 8 ) d(nrow) + integer i + integer iflag + integer ipivi + integer ipivk + integer ipivot(nrow) + integer j + integer k + integer kp1 + integer last + real ( kind = 8 ) ratio + real ( kind = 8 ) rowmax + real ( kind = 8 ) w(nrow,ncol) +! +! Initialize IPIVOT and D. +! + do i = 1, nrow + ipivot(i) = i + end do + + do i = 1, nrow + + rowmax = 0.0D+00 + do j = 1, ncol + rowmax = max ( rowmax, abs ( w(i,j) ) ) + end do + + if ( rowmax == 0.0D+00 ) then + iflag = 0 + return + end if + + d(i) = rowmax + + end do +! +! Gauss elimination with pivoting of scaled rows, loop over k=1,.,last +! + k = 1 +! +! As pivot row for k-th step, pick among the rows not yet used, +! i.e., from rows ipivot(k),...,ipivot(nrow), the one whose k-th +! entry (compared to the row size) is largest. then, if this row +! does not turn out to be row ipivot(k), redefine ipivot(k) ap- +! propriately and record this interchange by changing the sign +! of IFLAG. +! + 30 continue + + ipivk = ipivot(k) + + if ( k == nrow ) then + if ( abs(w(ipivk,nrow))+d(ipivk) <= d(ipivk) ) then + iflag = 0 + end if + return + end if + + j = k + kp1 = k+1 + colmax = abs(w(ipivk,k))/d(ipivk) +! +! Find the largest pivot +! + do i = kp1, nrow + ipivi = ipivot(i) + awikdi = abs(w(ipivi,k)) / d(ipivi) + if ( colmax < awikdi ) then + colmax = awikdi + j = i + end if + end do + + if ( j /= k ) then + ipivk = ipivot(j) + ipivot(j) = ipivot(k) + ipivot(k) = ipivk + iflag = -iflag + end if +! +! If pivot element is too small in absolute value, declare +! matrix to be noninvertible and quit. +! + if ( abs(w(ipivk,k))+d(ipivk) <= d(ipivk) ) then + iflag = 0 + return + end if +! +! Otherwise, subtract the appropriate multiple of the pivot +! row from remaining rows, i.e., the rows ipivot(k+1),..., +! ipivot(nrow), to make k-th entry zero. save the multiplier in +! its place. +! + do i = kp1, nrow + + ipivi = ipivot(i) + w(ipivi,k) = w(ipivi,k)/w(ipivk,k) + + ratio = -w(ipivi,k) + do j = kp1, ncol + w(ipivi,j) = ratio*w(ipivk,j)+w(ipivi,j) + end do + + end do + + k = kp1 +! +! Check for having reached the next block. +! + if ( k <= last ) then + go to 30 + end if + + return +end diff --git a/pppack/fcblok.f90 b/pppack/fcblok.f90 new file mode 100644 index 0000000..72f11d8 --- /dev/null +++ b/pppack/fcblok.f90 @@ -0,0 +1,126 @@ +!> +!> @file fcblok.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine fcblok ( bloks, integs, nbloks, ipivot, scrtch, iflag ) + +!************************************************************************* +! +!! FCBLOK supervises the PLU factorization with pivoting of +! scaled rows of the almost block diagonal matrix. +! +! The almost block diagonal matrix is stored in the arrays +! BLOKS and INTEGS. +! +! The FACTRB routine carries out steps 1,...,last of gauss +! elimination (with pivoting) for an individual block. +! +! The SHIFTB routine shifts the remaining rows to the top of +! the next block +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! bloks an array that initially contains the almost block diagonal +! matrix a to be factored, and on return contains the com- +! puted factorization of a . +! +! integs an integer array describing the block structure of a . +! +! nbloks the number of blocks in a . +! +! ipivot an integer array of dimension sum (integs(1,n) ; n=1, +! ...,nbloks) which, on return, contains the pivoting stra- +! tegy used. +! +! scrtch work area required, of length max (integs(1,n) ; n=1, +! ...,nbloks). +! +! iflag output parameter; +! =0 in case matrix was found to be singular. +! otherwise, +! =(-1)**(number of row interchanges during factorization) +! + implicit none + + integer nbloks + + real ( kind = 8 ) bloks(*) + integer i + integer iflag + integer index + integer indexb + integer indexn + integer integs(3,nbloks) + integer ipivot(*) + integer last + integer ncol + integer nrow + real ( kind = 8 ) scrtch(*) + + iflag = 1 + indexb = 1 + indexn = 1 + i = 1 +! +! Loop over the blocks. i is loop index +! + do + + index = indexn + nrow = integs(1,i) + ncol = integs(2,i) + last = integs(3,i) +! +! Carry out elimination on the I-th block until next block +! enters, for columns 1 through LAST of I-th block. +! + call factrb ( bloks(index), ipivot(indexb), scrtch, nrow, ncol, & + last, iflag ) +! +! Check for having reached a singular block or the last block. +! + if ( iflag == 0 .or. i == nbloks ) then + exit + end if + + i = i + 1 + indexn = nrow * ncol + index +! +! Put the rest of the I-th block onto the next block. +! + call shiftb ( bloks(index), ipivot(indexb), nrow, ncol, last, & + bloks(indexn), integs(1,i), integs(2,i) ) + + indexb = indexb + nrow + + end do + + return +end diff --git a/pppack/interv.f90 b/pppack/interv.f90 new file mode 100644 index 0000000..60b9cb1 --- /dev/null +++ b/pppack/interv.f90 @@ -0,0 +1,223 @@ +!> +!> @file interv.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine interv ( xt, lxt, x, left, mflag ) + +!******************************************************************************* +! +!! INTERV brackets a real value in an ascending vector of values. +! +! Discussion: +! +! The XT array is a set of increasing values. The goal of the routine +! is to determine the largest index I so that XT(I) <= X. +! +! The routine is designed to be efficient in the common situation +! that it is called repeatedly, with X taken from an increasing +! or decreasing sequence. +! +! This will happen when a piecewise polynomial is to be graphed. +! The first guess for LEFT is therefore taken to be the value +! returned at the previous call and stored in the local variable ILO. +! +! A first check ascertains that ILO < LXT. This is necessary +! since the present call may have nothing to do with the previous +! call. Then, if XT(ILO) < = X < XT(ILO+1), we set LEFT=ILO +! and are done after just three comparisons. +! +! Otherwise, we repeatedly double the difference ISTEP=IHI-ILO +! while also moving ILO and IHI in the direction of X, until +! XT(ILO) < = X < XT(IHI) +! after which we use bisection to get, in addition, ILO+1=IHI. +! LEFT=ILO is then returned. +! +! Modified: +! +! 05 February 2004 +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) XT(LXT), a nondecreasing sequence of values. +! +! Input, integer LXT, the dimension of XT. +! +! Input, real ( kind = 8 ) X, the point whose location with +! respect to the sequence XT is to be determined. +! +! Output, integer LEFT, the index of the bracketing value: +! 1 if X < XT(1) +! I if XT(I) <= X < XT(I+1) +! LXT if XT(LXT) <= X +! +! Output, integer MFLAG, indicates whether X lies within the +! range of the data. +! -1: X < XT(1) +! 0: XT(I) <= X < XT(I+1) +! +1: XT(LXT) <= X +! + implicit none + + integer lxt + + integer left + integer mflag + integer ihi + integer, save :: ilo = 1 + integer istep + integer middle + real ( kind = 8 ) x + real ( kind = 8 ) xt(lxt) + +!$omp threadprivate(ilo) + + ihi = ilo + 1 + + if ( lxt <= ihi ) then + + if ( xt(lxt) <= x ) then + go to 110 + end if + + if ( lxt <= 1 ) then + mflag = -1 + left = 1 + return + end if + + ilo = lxt - 1 + ihi = lxt + + end if + + if ( xt(ihi) <= x ) then + go to 40 + end if + + if ( xt(ilo) <= x ) then + mflag = 0 + left = ilo + return + end if +! +! Now X < XT(ILO). Decrease ILO to capture X. +! + istep = 1 + + 31 continue + + ihi = ilo + ilo = ihi - istep + + if ( 1 < ilo ) then + if ( xt(ilo) <= x ) then + go to 50 + end if + istep = istep * 2 + go to 31 + end if + + ilo = 1 + + if ( x < xt(1) ) then + mflag = -1 + left = 1 + return + end if + + go to 50 +! +! Now XT(IHI) <= X. Increase IHI to capture X. +! + 40 continue + + istep = 1 + + 41 continue + + ilo = ihi + ihi = ilo + istep + + if ( ihi < lxt ) then + if ( x < xt(ihi) ) then + go to 50 + end if + istep = istep * 2 + go to 41 + end if + + if ( xt(lxt) <= x ) then + go to 110 + end if +! +! Now XT(ILO) < = X < XT(IHI). Narrow the interval. +! + ihi = lxt + +50 continue + + do + + middle = ( ilo + ihi ) / 2 + + if ( middle == ilo ) then + mflag = 0 + left = ilo + return + end if +! +! It is assumed that MIDDLE = ILO in case IHI = ILO+1. +! + if ( xt(middle) <= x ) then + ilo = middle + else + ihi = middle + end if + + end do +! +! Set output and return. +! + 110 continue + + mflag = 1 + + if ( x == xt(lxt) ) then + mflag = 0 + end if + + do left = lxt, 1, -1 + if ( xt(left) < xt(lxt) ) then + return + end if + end do + + return +end diff --git a/pppack/knots.f90 b/pppack/knots.f90 new file mode 100644 index 0000000..c7be744 --- /dev/null +++ b/pppack/knots.f90 @@ -0,0 +1,103 @@ +!> +!> @file knots.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine knots ( break, l, kpm, t, n ) + +!************************************************************************* +! +!! KNOTS is to be called in COLLOC. +! +! Discussion: +! +! From the given breakpoint sequence BREAK the routine constructs the +! knot sequence T so that +! +! SPLINE(K+M,T) = PP(K+M,BREAK) +! +! with M-1 continuous derivatives. This means that +! +! t(1),...,t(n+kpm) = break(1) kpm times, then break(2),..., +! break(l) each k times, then, finally, break(l+1) kpm times. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence. +! +! Input, integer L, the number of intervals or pieces. +! +! Input, integer KPM, = K+M, the order of the piecewise polynomial +! function or spline. +! +! Output, real ( kind = 8 ) T(N+KPM), the knot sequence. +! +! Output, integer N, = L*K+M = the dimension of SPLINE(K+M,T). +! + implicit none + + integer kpm + integer l + integer n + + real ( kind = 8 ) break(l+1) + integer iside + integer j + integer jj + integer jjj + integer k + integer ll + integer m + real ( kind = 8 ) t(*) + real ( kind = 8 ) xside + + common /side/ m,iside,xside(10) + + k = kpm-m + n = l*k+m + jj = n+kpm + jjj = l+1 + + do ll = 1, kpm + t(jj) = break(jjj) + jj = jj-1 + end do + + do j = 1, l + jjj = jjj-1 + do ll = 1, k + t(jj) = break(jjj) + jj = jj-1 + end do + end do + + t(1:kpm) = break(1) + + return +end diff --git a/pppack/l2appr.f90 b/pppack/l2appr.f90 new file mode 100644 index 0000000..861d3bd --- /dev/null +++ b/pppack/l2appr.f90 @@ -0,0 +1,196 @@ +!> +!> @file l2appr.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine l2appr ( t, n, k, q, diag, bcoef ) + +!************************************************************************* +! +!! L2APPR constructs a weighted L2 spline approximation to given data. +! +! Discussion: +! +! The routine constructs the weighted discrete L2-approximation by +! splines of order K with knot sequence T(1), ..., T(n+k) to +! given data points ( TAU(1:NTAU), GTAU(1:NTAU) ). +! +! The B-spline coefficients BCOEF of the approximating spline are +! determined from the normal equations using Cholesky's method. +! +! Method: +! +! The B-spline coefficients of the L2-approximation are determined as the +! solution of the normal equations +! +! sum ( (b(i), b(j) ) * bcoef(j) : j=1,...,n) =(b(i),g), +! i=1, ..., n . +! Here, b(i) denotes the i-th B-spline, G denotes the function to +! be approximated, and the inner product of two functions F and G +! is given by +! +! (f,g) := sum ( f(tau(i))*g(tau(i))*weight(i) : i=1,...,ntau) . +! +! The arrays TAU and WEIGHT are given in common block +! DATA, as is the array GTAU containing the sequence +! g(tau(i)), i=1,..., NTAU. +! +! The relevant function values of the B-splines b(i), i=1,...,n, are +! supplied by the subprogram BSPLVB. +! +! The coefficient matrix C, with +! c(i,j) := (b(i), b(j)), i,j=1,...,n, +! of the normal equations is symmetric and (2*k-1)-banded, therefore +! can be specified by giving its K bands at or below the diagonal. +! For i=1,...,n, we store +! (b(i),b(j)) = c(i,j) in q(i-j+1,j), j=i,...,min(i+k-1,n) +! and the right side +! (b(i), g ) in bcoef(i). +! +! Since B-spline values are most efficiently generated by finding sim- +! ultaneously the value of every nonzero B-spline at one point, +! the entries of C (i.e., of Q ), are generated by computing, for +! each ll, all the terms involving tau(ll) simultaneously and adding +! them to all relevant entries. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) T(N+K), the knot sequence. +! +! Input, integer N, the dimension of the space of splines of order K +! with knots t. +! +! Input, integer K, the order. +! +! work arrays +! +! q....a work array of size (at least) k*n. its first k rows are used +! for the k lower diagonals of the gramian matrix c. +! +! diag.....a work array of length n used in bchfac . +! +! input via c o m m o n /data/ +! +! ntau.....number of data points +! (tau(i),gtau(i)), i=1,...,ntau are the ntau data points to be +! fitted . +! weight(i), i=1,...,ntau are the corresponding weights . +! +! output +! bcoef(1), ..., bcoef(n) the b-spline coefficients of the l2-appr. +! + implicit none + + integer k + integer n + integer, parameter :: ntmax = 200 + + real ( kind = 8 ) bcoef(n) + real ( kind = 8 ) biatx(k) + real ( kind = 8 ) diag(n) + real ( kind = 8 ) dw + real ( kind = 8 ) gtau + integer i + integer j + integer jj + integer left + integer leftmk + integer ll + integer mm + integer ntau + real ( kind = 8 ) q(k,n) + real ( kind = 8 ) t(n+k) + real ( kind = 8 ) tau + real ( kind = 8 ) totalw + real ( kind = 8 ) weight + + COMMON /DATA/ tau(ntmax),gtau(ntmax),weight(ntmax),totalw,ntau + + bcoef(1:n) = 0.0D+00 + q(1:k,1:n) = 0.0D+00 + + left = k + leftmk = 0 + + do ll = 1, ntau +! +! Locate LEFT such that tau(ll) in (t(left),t(left+1)). +! + do + + if ( left == n ) then + exit + end if + + if ( tau(ll) < t(left+1) ) then + exit + end if + + left = left + 1 + leftmk = leftmk + 1 + + end do + + call bsplvb ( t, k, 1, tau(ll), left, biatx ) +! +! biatx(mm) contains the value of b(left-k+mm) at tau(ll). +! hence, with dw := biatx(mm)*weight(ll), the number dw*gtau(ll) +! is a summand in the inner product +! (b(left-k+mm), g) which goes into bcoef(left-k+mm) +! and the number biatx(jj)*dw is a summand in the inner product +! (b(left-k+jj), b(left-k+mm)), into q(jj-mm+1,left-k+mm) +! since (left-k+jj)-(left-k+mm)+1 = jj - mm + 1 . +! + do mm = 1, k + + dw = biatx(mm)*weight(ll) + j = leftmk+mm + bcoef(j) = dw*gtau(ll)+bcoef(j) + i = 1 + + do jj = mm, k + q(i,j) = biatx(jj)*dw+q(i,j) + i = i+1 + end do + + end do + + end do +! +! Construct the Cholesky factorization for C in q , then +! use it to solve the normal equations +! c*x = bcoef +! for X, and store X in BCOEF. +! + call bchfac ( q, k, n, diag ) + + call bchslv ( q, k, n, bcoef ) + + return +end diff --git a/pppack/l2err.f90 b/pppack/l2err.f90 new file mode 100644 index 0000000..7449380 --- /dev/null +++ b/pppack/l2err.f90 @@ -0,0 +1,146 @@ +!> +!> @file l2err.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine l2err ( iprfun, ftau, error ) + +!************************************************************************* +! +!! L2ERR computes the errors of an L2 approximation. +! +! Discussion: +! +! This routine computes various errors of the current L2-approximation, +! whose piecewise polynomial representation is contained in common +! block APPROX, to the given data contained in common block data. +! +! It prints out the average error errl1, the l2-error errl2, and the +! maximum error errmax. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, integer IPRFUN. If iprfun= 1, the routine prints out +! the value of the approximation as well as its error at +! every data point. +! +! Output, real ( kind = 8 ) FTAU(NTAU), contains the value of the computed +! approximation at each value TAU(1:NTAU). +! +! Output, error(1), ..., error(ntau), with error(i)=scale*(g-f) +! at tau(i), all i. here, SCALE equals 1. in case +! iprfun /= 1 , or the absolute error is greater than 100 some- +! where. otherwise, SCALE is such that the maximum of +! abs(error)) over all I lies between 10 and 100. This +! makes the printed output more illustrative. +! + implicit none + + integer, parameter :: lpkmax = 100 + integer, parameter :: ntmax = 200 + integer, parameter :: ltkmax = 2000 + + integer ntau + + real ( kind = 8 ) break + real ( kind = 8 ) coef + real ( kind = 8 ) err + real ( kind = 8 ) errl1 + real ( kind = 8 ) errl2 + real ( kind = 8 ) errmax + real ( kind = 8 ) error(ntau) + real ( kind = 8 ) ftau(ntau) + real ( kind = 8 ) gtau + integer ie + integer iprfun + integer k + integer l + integer ll + real ( kind = 8 ) scale + real ( kind = 8 ) tau + real ( kind = 8 ) totalw + real ( kind = 8 ) weight + + COMMON /DATA/ tau(ntmax),gtau(ntmax),weight(ntmax),totalw,ntau + common /approx/ break(lpkmax),coef(ltkmax),l,k + + errl1 = 0.0D+00 + errl2 = 0.0D+00 + errmax = 0.0D+00 + + do ll = 1, ntau + call ppvalu(break,coef,l,k,tau(ll),0,ftau(ll)) + error(ll) = gtau(ll)-ftau(ll) + err = abs(error(ll)) + if ( errmax < err ) then + errmax = err + end if + errl1 = errl1 + err * weight(ll) + errl2 = errl2 + err**2 * weight(ll) + end do + + errl1 = errl1 / totalw + errl2 = sqrt ( errl2 / totalw ) + + write ( *, * ) ' ' + write ( *, * ) ' Least square error =',errl2 + write ( *, * ) ' Average error =',errl1 + write ( *, * ) ' Maximum error =',errmax + write ( *, * ) ' ' + + if ( iprfun /= 1 ) then + return + end if +! +! Scale error curve and print +! + ie = 0 + scale = 1.0D+00 + + if ( errmax < 10.0D+00 ) then + + do ie = 1, 9 + scale = scale * 10.0D+00 + if ( 10.0D+00 <= errmax * scale ) then + exit + end if + end do + + end if + + error(1:ntau) = error(1:ntau) * scale + + write(*,60) ie, (ll,tau(ll),ftau(ll),error(ll),ll=1,ntau) + + 60 format (///14x,'approximation and scaled error curve'/ & + 7x,'data point',7x,'approximation',3x,'deviation x 10**',i1/ & + (i4, f16.8,f16.8,f17.6)) + + return +end diff --git a/pppack/l2knts.f90 b/pppack/l2knts.f90 new file mode 100644 index 0000000..6c064a6 --- /dev/null +++ b/pppack/l2knts.f90 @@ -0,0 +1,83 @@ +!> +!> @file l2knts.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine l2knts ( break, l, k, t, n ) + +!************************************************************************* +! +!! L2KNTS converts breakpoints to knots. +! +! Discussion: +! +! The breakpoint sequence BREAK is converted into a corresponding +! knot sequence T to allow the representation of a piecewise +! polynomial function of order K with K-2 continuous derivatives +! as a spline of order K with knot sequence T. +! +! This means that +! t(1), ..., t(n+k)= break(1) k times, then break(i), i=2,...,l, each +! once, then break(l+1) k times. Therefore, n=k-1+l. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, integer K, the order. +! +! Input, integer L, the number of polynomial pieces. +! +! Input, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence. +! +! Output, real ( kind = 8 ) T(N+K), the knot sequence. +! +! Output, integer N, the dimension of the corresponding spline space +! of order K. +! + implicit none + + integer k + integer l + integer n + + real ( kind = 8 ) break(l+1) + integer i + real ( kind = 8 ) t(k-1+l+k) + + t(1:k-1) = break(1) + + do i = 1, l + t(k-1+i) = break(i) + end do + + n = k-1+l + + t(n+1:n+k) = break(l+1) + + return +end diff --git a/pppack/newnot.f90 b/pppack/newnot.f90 new file mode 100644 index 0000000..f3c556a --- /dev/null +++ b/pppack/newnot.f90 @@ -0,0 +1,206 @@ +!> +!> @file newnot.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine newnot ( break, coef, l, k, brknew, lnew, coefg ) + +!************************************************************************* +! +!! NEWNOT returns LNEW+1 knots which are equidistributed on (A,B). +! +! Discussion: +! +! (a,b) = (break(1),break(l+1)) with respect to a certain monotone +! function G related to the K-th root of the K-th derivative of the +! piecewise polynomial function F whose piecewise polynomial +! representation is contained in break, coef, l, k . +! +! method +! +! The K-th derivative of the given piecewise polynomial function F does +! not exist, except perhaps as a linear combination of delta functions. +! Nevertheless, we construct a piecewise constant function H with +! breakpoint sequence BREAK which is approximately proportional +! to abs(d**k(f)). +! +! Specifically, on (break(i), break(i+1)), +! +! abs(jump at break(i) of pc) abs(jump at break(i+1) of pc) +! h=-------------- + ---------------------------- +! break(i+1)-break(i-1) break(i+2) - break(i) +! +! with pc the p.constant (k-1)st derivative of f . +! then, the p.linear function g is constructed as +! +! g(x) = integral of h(y)**(1/k) for y from a to x +! +! and its pp coefficients stored in coefg . +! +! then brknew is determined by +! +! brknew(i) = a+g**(-1)((i-1)*step) , i=1,...,lnew+1 +! +! where step=g(b)/lnew and (a,b) = (break(1),break(l+1)). +! +! In the event that pc=d**(k-1)(f) is constant in (a,b) and +! therefore h=0 identically, brknew is chosen uniformly spaced. +! +! optional printed output +! coefg.....the pp coefficients of g are printed out if iprint is set +! positive in data statement below. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, break, coef, l, k.....contains the pp-representation of a certain +! function f of order k . specifically, +! d**(k-1)f(x)=coef(k,i) for break(i) <= x < break(i+1) +! +! Input, lnew.....number of intervals into which the interval (a,b) is to be +! sectioned by the new breakpoint sequence brknew . +! +! Output, real ( kind = 8 ) BRKNEW(LNEW+1), the new breakpoint sequence. +! +! Output, coefg.....the coefficient part of the pp-repr. break, coefg, l, 2 +! for the monotone p.linear function g with respect to which brknew will +! be equidistributed. +! + implicit none + + integer k + integer l + integer lnew + + real ( kind = 8 ) break(l+1) + real ( kind = 8 ) brknew(lnew+1) + real ( kind = 8 ) coef(k,l) + real ( kind = 8 ) coefg(2,l) + real ( kind = 8 ) dif + real ( kind = 8 ) difprv + integer i + integer, save :: iprint = 0 + integer j + real ( kind = 8 ) oneovk + real ( kind = 8 ) step + real ( kind = 8 ) stepi + + brknew(1) = break(1) + brknew(lnew+1) = break(l+1) +! +! If G is constant, BRKNEW is uniform. +! + if ( l <= 1) then + + step = (break(l+1)-break(1))/ real ( lnew, kind = 8 ) + + do i = 2, lnew + brknew(i) = break(1) + real ( i - 1, kind = 8 ) * step + end do + + return + + end if +! +! Construct the continuous piecewise linear function G. +! + oneovk = 1.0D+00 / real ( k, kind = 8 ) + coefg(1,1) = 0.0D+00 + difprv = abs(coef(k,2)-coef(k,1))/(break(3)-break(1)) + + do i = 2, l + dif = abs(coef(k,i)-coef(k,i-1))/(break(i+1)-break(i-1)) + coefg(2,i-1) = (dif+difprv)**oneovk + coefg(1,i) = coefg(1,i-1)+coefg(2,i-1)*(break(i)-break(i-1)) + difprv = dif + end do + + coefg(2,l) = ( 2.0D+00 * difprv )**oneovk +! +! step = g(b)/lnew +! + step=(coefg(1,l)+coefg(2,l)*(break(l+1)-break(l))) / real ( lnew, kind = 8 ) + + if ( 0 < iprint ) then + write(*,20)step,(i,coefg(1,i),coefg(2,i),i=1,l) + end if + + 20 format (' step =',e16.7/(i5,2e16.5)) +! +! if G is constant, BRKNEW is uniform. +! + if ( step <= 0.0D+00 ) then + + step = (break(l+1)-break(1)) / real ( lnew, kind = 8 ) + + do i = 2, lnew + brknew(i) = break(1) + real ( i - 1, kind = 8 ) * step + end do + + return + + end if +! +! for i=2,...,lnew, construct brknew(i)=a+g**(-1)(stepi), +! with stepi=(i-1)*step . this requires inversion of the p.lin- +! ear function g . for this, j is found so that +! g(break(j)) <= stepi .le. g(break(j+1)) +! and then +! brknew(i) = break(j)+(stepi-g(break(j)))/dg(break(j)) . +! the midpoint is chosen if dg(break(j))=0 . +! + j = 1 + + do i = 2, lnew + + stepi = real ( i - 1, kind = 8 ) * step + + do + + if ( j == l ) then + exit + end if + + if ( stepi <= coefg(1,j+1) ) then + exit + end if + + j = j + 1 + + end do + + if ( coefg(2,j) /= 0.0D+00 ) then + brknew(i) = break(j)+(stepi-coefg(1,j))/coefg(2,j) + else + brknew(i) = ( break(j) + break(j+1) ) / 2.0D+00 + end if + + end do + + return +end diff --git a/pppack/ppvalu.f90 b/pppack/ppvalu.f90 new file mode 100644 index 0000000..b392355 --- /dev/null +++ b/pppack/ppvalu.f90 @@ -0,0 +1,134 @@ +!> +!> @file ppvalu.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine ppvalu ( break, coef, l, k, x, jderiv, value ) + +!******************************************************************************* +! +!! PPVALU evaluates a piecewise polynomial function or its derivative. +! +! Discussion: +! +! PPVALU calculates the value at X of the JDERIV-th derivative of +! the piecewise polynomial function F from its piecewise +! polynomial representation. +! +! The interval index I, appropriate for X, is found through a +! call to INTERV. The formula for the JDERIV-th derivative +! of F is then evaluated by nested multiplication. +! +! The J-th derivative of F is given by: +! +! (d**j)f(x) = +! coef(j+1,i) + h * ( +! coef(j+2,i) + h * ( +! ... +! coef(k-1,i) + h * ( +! coef(k,i) / (k-j-1) ) / (k-j-2) ... ) / 2 ) / 1 +! +! with +! +! H=X-BREAK(I) +! +! and +! +! i = max( 1 , max( j , break(j) <= x , 1 <= j <= l ) ). +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) BREAK(L+1), real COEF(*), integer L, for +! piecewise polynomial representation of the function F to +! be evaluated. +! +! Input, integer K, the order of the polynomial pieces +! that make up the function F. The usual value for +! K is 4, signifying a piecewise cubic polynomial. +! +! Input, real ( kind = 8 ) X, the point at which to evaluate F or +! of its derivatives. +! +! Input, integer JDERIV, the order of the derivative to be +! evaluated. If JDERIV is 0, then F itself is evaluated, +! which is actually the most common case. It is assumed +! that JDERIV is zero or positive. +! +! Output, real ( kind = 8 ) VALUE, the value of the JDERIV-th +! derivative of F at X. +! + implicit none + + integer k + integer l + + real ( kind = 8 ) break(l+1) + real ( kind = 8 ) coef(k,l) + real ( kind = 8 ) fmmjdr + real ( kind = 8 ) h + integer i + integer jderiv + integer m + integer ndummy + real ( kind = 8 ) value + real ( kind = 8 ) x + + value = 0.0D+00 + + fmmjdr = k - jderiv +! +! Derivatives of order K or higher are identically zero. +! + if ( k <= jderiv ) then + return + end if +! +! Find the index I of the largest breakpoint to the left of X. +! + call interv ( break, l+1, x, i, ndummy ) +! +! Evaluate the JDERIV-th derivative of the I-th polynomial piece at X. +! + h = x - break(i) + m = k + + do + + value = ( value / fmmjdr ) * h + coef(m,i) + m = m - 1 + fmmjdr = fmmjdr - 1.0D+00 + + if ( fmmjdr <= 0.0D+00 ) then + exit + end if + + end do + + return +end diff --git a/pppack/putit.f90 b/pppack/putit.f90 new file mode 100644 index 0000000..28886c2 --- /dev/null +++ b/pppack/putit.f90 @@ -0,0 +1,178 @@ +!> +!> @file putit.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine putit ( t, kpm, left, scrtch, dbiatx, q, nrow, b ) + +!************************************************************************* +! +!! PUTIT puts together one block of the collocation equation system. +! +! Method: +! +! The K collocation equations for the interval (t(left),t(left+1)) +! are constructed with the aid of the subroutine DIFEQU( 2, ., +! . ) and interspersed (in order) with the side conditions (if any) in +! this interval, using DIFEQU ( 3, ., . ) for the information. +! +! The block Q has kpm columns, corresponding to the kpm b- +! splines of order kpm which have the interval (t(left),t(left+1)) +! in their support. the block's diagonal is part of the diagonal of the +! total system. The first equation in this block not overlapped by the +! preceding block is therefore equation LOWROW, with lowrow = +! number of side conditions in preceding intervals (or blocks). +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) T(LEFT+KPM), the knot sequence. +! +! Input, integer KPM, the order of the spline. +! +! Input, integer LEFT, indicates the interval of interest, viz the interval +! (t(left), t(left+1)). +! +! Input, integer NROW, number of rows in block to be put together +! +! Workspace, scrtch used in bsplvd, of size (kpm,kpm) +! +! Workspace, real ( kind = 8 ) DBIATX(KPM,M+1), derivatives of b-splines, +! with dbiatx(j,i+1) containing the i-th derivative of the +! j-th b-spline of interest +! +! Output, Q the block, of size (nrow,kpm). +! +! Output, B the corresponding piece of the right side, of size (nrow) +! + implicit none + + integer kpm + integer nrow + + real ( kind = 8 ) b(*) + real ( kind = 8 ) dbiatx(kpm,*) + real ( kind = 8 ) dx + integer i + integer irow + integer iside + integer itermx + integer j + integer k + integer left + integer ll + integer lowrow + integer m + integer mode + integer mp1 + real ( kind = 8 ) q(nrow,kpm) + real ( kind = 8 ) rho + real ( kind = 8 ) scrtch(*) + real ( kind = 8 ) sum1 + real ( kind = 8 ) t(*) + real ( kind = 8 ) v(20) + real ( kind = 8 ) xm + real ( kind = 8 ) xside + real ( kind = 8 ) xx + + common /side/ m,iside,xside(10) + common /other/ itermx,k,rho(19) + + mp1 = m + 1 + + q(1:nrow,1:kpm) = 0.0D+00 + + xm = ( t(left+1) + t(left) ) / 2.0D+00 + dx = ( t(left+1) - t(left) ) / 2.0D+00 + + ll = 1 + lowrow = iside + + do irow = lowrow, nrow + + if ( k < ll ) then + go to 20 + end if + + mode = 2 +! +! next collocation point is ... +! + xx = xm+dx*rho(ll) + ll = ll+1 +! +! The corresponding collocation equation is next unless the next side +! condition occurs at a point at, or to the left of, the next +! collocation point. +! + if ( m < iside ) then + go to 30 + end if + + if ( xx < xside(iside) ) then + go to 30 + end if + + ll = ll-1 + + 20 continue + + mode = 3 + xx = xside(iside) + + 30 continue + + call difequ(mode,xx,v) +! +! The next equation, a collocation equation (mode=2) or a side +! condition (mode=3), reads +! (*) (v(m+1)*d**m+v(m)*d**(m-1) +...+ v(1)*d**0)f(xx)=v(m+2) +! in terms of the info supplied by difequ . the corresponding +! equation for the b-coefficients of f therefore has the left side of +! (*), evaluated at each of the kpm b-splines having xx in +! their support, as its kpm possibly nonzero coefficients. +! + call bsplvd(t,kpm,xx,left,scrtch,dbiatx,mp1) + + do j = 1, kpm + + sum1 = 0.0D+00 + do i = 1, mp1 + sum1 = sum1 + v(i) * dbiatx(j,i) + end do + + q(irow,j) = sum1 + + end do + + b(irow) = v(m+2) + + end do + + return +end diff --git a/pppack/rvec_print.f90 b/pppack/rvec_print.f90 new file mode 100644 index 0000000..e482e0a --- /dev/null +++ b/pppack/rvec_print.f90 @@ -0,0 +1,83 @@ +!> +!> @file rvec_print.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine rvec_print ( n, a, title ) + +!******************************************************************************* +! +!! RVEC_PRINT prints a real vector. +! +! Discussion: +! +! If all the entries are integers, the data is printed +! in integer format. +! +! Modified: +! +! 19 November 2002 +! +! Author: +! +! John Burkardt +! +! Parameters: +! +! Input, integer N, the number of components of the vector. +! +! Input, real ( kind = 8 ) A(N), the vector to be printed. +! +! Input, character ( len = * ) TITLE, a title to be printed. +! + implicit none + + integer n + + real ( kind = 8 ) a(n) + integer i + character ( len = * ) title + + if ( 0 < len_trim ( title ) ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) trim ( title ) + end if + + write ( *, '(a)' ) ' ' + + if ( all ( a(1:n) == aint ( a(1:n) ) ) ) then + do i = 1, n + write ( *, '(i6,i6)' ) i, int ( a(i) ) + end do + else if ( all ( abs ( a(1:n) ) < 1000000.0D+00 ) ) then + do i = 1, n + write ( *, '(i6,f14.6)' ) i, a(i) + end do + else + do i = 1, n + write ( *, '(i6,g14.6)' ) i, a(i) + end do + end if + + return +end diff --git a/pppack/sbblok.f90 b/pppack/sbblok.f90 new file mode 100644 index 0000000..e490a42 --- /dev/null +++ b/pppack/sbblok.f90 @@ -0,0 +1,106 @@ +!> +!> @file sbblok.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine sbblok ( bloks, integs, nbloks, ipivot, b, x ) + +!************************************************************************* +! +!! SBBLOK solves a linear system that was factored by FCBLOK. +! +! Discussion: +! +! The routine supervises the solution, by forward and backward +! substitution, of the linear system +! +! A * x = b +! +! for X, with the PLU factorization of A already generated in FCBLOK. +! Individual blocks of equations are solved via SUBFOR and SUBBAK. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! bloks, integs, nbloks, ipivot are as on return from fcblok. +! +! b the right side, stored corresponding to the storage of +! the equations. see comments in SLVBLK for details. +! +! Output, real ( kind = 8 ) X(*), the solution vector. +! + implicit none + + integer nbloks + + real ( kind = 8 ) b(*) + real ( kind = 8 ) bloks(*) + integer i + integer index + integer indexb + integer indexx + integer integs(3,nbloks) + integer ipivot(*) + integer j + integer last + integer nbp1 + integer ncol + integer nrow + real ( kind = 8 ) x(*) +! +! Forward substitution pass: +! + index = 1 + indexb = 1 + indexx = 1 + do i = 1, nbloks + nrow = integs(1,i) + last = integs(3,i) + call subfor(bloks(index),ipivot(indexb),nrow,last,b(indexb),x(indexx)) + index = nrow*integs(2,i)+index + indexb = indexb+nrow + indexx = indexx+last + end do +! +! Back substitution pass. +! + nbp1 = nbloks + 1 + + do j = 1, nbloks + i = nbp1 - j + nrow = integs(1,i) + ncol = integs(2,i) + last = integs(3,i) + index = index - nrow * ncol + indexb = indexb - nrow + indexx = indexx - last + call subbak ( bloks(index), ipivot(indexb), nrow, ncol, last, x(indexx) ) + end do + + return +end diff --git a/pppack/setupq.f90 b/pppack/setupq.f90 new file mode 100644 index 0000000..56aa439 --- /dev/null +++ b/pppack/setupq.f90 @@ -0,0 +1,101 @@ +!> +!> @file setupq.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine setupq ( x, dx, y, npoint, v, qty ) + +!************************************************************************* +! +!! SETUPQ is to be called in SMOOTH. +! +! Discussion: +! +! put delx=x(.+1)-x(.) into v(.,4), +! put the three bands of q-transp*d into v(.,1-3), and +! put the three bands of (d*q)-transp*(d*q) at and above the diagonal +! into v(.,5-7) . +! +! here, q is the tridiagonal matrix of order (npoint-2,npoint) +! with general row 1/delx(i) , -1/delx(i)-1/delx(i+1) , 1/delx(i+1) +! and d is the diagonal matrix with general row dx(i) . +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! + implicit none + + integer npoint + + real ( kind = 8 ) diff + real ( kind = 8 ) dx(npoint) + integer i + real ( kind = 8 ) prev + real ( kind = 8 ) qty(npoint) + real ( kind = 8 ) v(npoint,7) + real ( kind = 8 ) x(npoint) + real ( kind = 8 ) y(npoint) + + v(1,4) = x(2)-x(1) + + do i = 2, npoint-1 + v(i,4) = x(i+1) - x(i) + v(i,1) = dx(i-1) / v(i-1,4) + v(i,2) = -dx(i) / v(i,4) - dx(i) / v(i-1,4) + v(i,3) = dx(i+1) / v(i,4) + end do + + v(npoint,1) = 0.0D+00 + do i = 2, npoint-1 + v(i,5) = v(i,1)**2 + v(i,2)**2 + v(i,3)**2 + end do + + do i = 3, npoint-1 + v(i-1,6) = v(i-1,2)*v(i,1)+v(i-1,3)*v(i,2) + end do + + v(npoint-1,6) = 0.0D+00 + + do i = 4, npoint-1 + v(i-2,7) = v(i-2,3) * v(i,1) + end do + + v(npoint-2,7) = 0.0D+00 + v(npoint-1,7) = 0.0D+00 +! +! Construct q-transp. * y in QTY. +! + prev = (y(2)-y(1)) / v(1,4) + do i = 2, npoint-1 + diff = (y(i+1)-y(i)) / v(i,4) + qty(i) = diff-prev + prev = diff + end do + + return +end diff --git a/pppack/shiftb.f90 b/pppack/shiftb.f90 new file mode 100644 index 0000000..0ddcd79 --- /dev/null +++ b/pppack/shiftb.f90 @@ -0,0 +1,107 @@ +!> +!> @file shiftb.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine shiftb ( ai, ipivot, nrowi, ncoli, last, ai1, nrowi1, ncoli1 ) + +!************************************************************************* +! +!! SHIFTB shifts the rows in current block, ai, not used as pivot +! rows, if any, i.e., rows ipivot(last+1),...,ipivot(nrowi), +! onto the first mmax=nrow-last rows of the next block, ai1, +! with column last+j of ai going to column j , +! for j=1,...,jmax=ncoli-last. the remaining columns of these +! rows of ai1 are zeroed out. +! +! picture +! +! original situation after results in a new block i+1 +! last=2 columns have been created and ready to be +! done in factrb (assuming no factored by next factrb call. +! interchanges of rows) +! 1 +! x x 1x x x x x x x x +! 1 +! 0 x 1x x x 0 x x x x +! block i 1 --- +! nrowi=4 0 0 1x x x 0 0 1x x x 0 01 +! ncoli=5 1 1 1 +! last=2 0 0 1x x x 0 0 1x x x 0 01 +! ------------------- 1 1 new +! 1x x x x x 1x x x x x1 block +! 1 1 1 i+1 +! block i+1 1x x x x x 1x x x x x1 +! nrowi1= 5 1 1 1 +! ncoli1= 5 1x x x x x 1x x x x x1 +! ------------------- 1-------------1 +! 1 +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! + implicit none + + integer ncoli + integer ncoli1 + integer nrowi1 + integer nrowi + + real ( kind = 8 ) ai(nrowi,ncoli) + real ( kind = 8 ) ai1(nrowi1,ncoli1) + integer ip + integer ipivot(nrowi) + integer j + integer last + integer m + + if ( nrowi-last < 1 ) then + return + end if + + if ( ncoli-last < 1 ) then + return + end if +! +! Put the remainder of block I into AI1. +! + do m = 1, nrowi-last + ip = ipivot(last+m) + do j = 1, ncoli-last + ai1(m,j) = ai(ip,last+j) + end do + end do +! +! Zero out the upper right corner of ai1. +! + do j = ncoli+1-last, ncoli1 + do m = 1, nrowi-last + ai1(m,j) = 0.0D+00 + end do + end do + + return +end diff --git a/pppack/slvblk.f90 b/pppack/slvblk.f90 new file mode 100644 index 0000000..0ce546c --- /dev/null +++ b/pppack/slvblk.f90 @@ -0,0 +1,180 @@ +!> +!> @file slvblk.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine slvblk ( bloks, integs, nbloks, b, ipivot, x, iflag ) + +!************************************************************************* +! +!! SLVBLK solves the almost block diagonal linear system A*x=b. +! +! Discussion: +! +! Such almost block diagonal matrices arise naturally in piecewise +! polynomial interpolation or approximation and in finite element +! methods for two-point boundary value problems. The PLU factorization +! method is implemented here to take advantage of the special structure +! of such systems for savings in computing time and storage requirements. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! bloks a one-dimenional array, of length +! sum( integs(1,i)*integs(2,i) ; i=1,nbloks ) +! on input, contains the blocks of the almost block diagonal +! matrix a . the array integs (see below and the example) +! describes the block structure. +! on output, contains correspondingly the plu factorization +! of a (if iflag /= 0). certain of the entries into bloks +! are arbitrary (where the blocks overlap). +! +! integs integer array description of the block structure of a . +! integs(1,i)=no. of rows of block i = nrow +! integs(2,i)=no. of colums of block i = ncol +! integs(3,i)=no. of elim. steps in block i = last +! i =1,2,...,nbloks +! the linear system is of order +! n = sum ( integs(3,i) , i=1,...,nbloks ), +! but the total number of rows in the blocks is +! nbrows=sum( integs(1,i) ; i = 1,...,nbloks) +! +! nbloks number of blocks +! b right side of the linear system, array of length nbrows. +! certain of the entries are arbitrary, corresponding to +! rows of the blocks which overlap (see block structure and +! the example below). +! ipivot on output, integer array containing the pivoting sequence +! used. length is nbrows +! x on output, contains the computed solution (if iflag /= 0) +! length is n. +! iflag on output, integer +! =(-1)**(no. of interchanges during factorization) +! if a is invertible +! =0 if a is singular +! +! auxiliary programs +! fcblok (bloks,integs,nbloks,ipivot,scrtch,iflag) factors the matrix +! a , and is used for this purpose in slvblk. its arguments +! are as in slvblk, except for +! scrtch=a work array of length max(integs(1,i)). +! +! sbblok (bloks,integs,nbloks,ipivot,b,x) solves the system a*x=b +! once a is factored. this is done automatically by slvblk +! for one right side b, but subsequent solutions may be +! obtained for additional b-vectors. the arguments are all +! as in slvblk. +! +! dtblok (bloks,integs,nbloks,ipivot,iflag,detsgn,detlog) computes the +! determinant of a once slvblk or fcblok has done the fact- +! orization.the first five arguments are as in slvblk. +! detsgn =sign of the determinant +! detlog =natural log of the determinant +! +! block structure of a +! the nbloks blocks are stored consecutively in the array bloks . +! the first block has its (1,1)-entry at bloks(1), and, if the i-th +! block has its (1,1)-entry at bloks(index(i)), then +! index(i+1)=index(i) + nrow(i)*ncol(i) . +! the blocks are pieced together to give the interesting part of a +! as follows. for i=1,2,...,nbloks-1, the (1,1)-entry of the next +! block (the (i+1)st block ) corresponds to the (last+1,last+1)-entry +! of the current i-th block. recall last=integs(3,i) and note that +! this means that +! a. every block starts on the diagonal of a . +! b. the blocks overlap (usually). the rows of the (i+1)st block +! which are overlapped by the i-th block may be arbitrarily de- +! fined initially. they are overwritten during elimination. +! the right side for the equations in the i-th block are stored cor- +! respondingly as the last entries of a piece of b of length nrow +! (= integs(1,i)) and following immediately in b the corresponding +! piece for the right side of the preceding block, with the right side +! for the first block starting at b(1) . in this, the right side for +! an equation need only be specified once on input, in the first block +! in which the equation appears. +! +! example and test driver +! the test driver for this package contains an example, a linear +! system of order 11, whose nonzero entries are indicated in the fol- +! lowing schema by their row and column index modulo 10. next to it +! are the contents of the integs arrray when the matrix is taken to +! be almost block diagonal with nbloks=5, and below it are the five +! blocks. +! +! nrow1=3, ncol1 = 4 +! 11 12 13 14 +! 21 22 23 24 nrow2=3, ncol2 = 3 +! 31 32 33 34 +! last1=2 43 44 45 +! 53 54 55 nrow3=3, ncol3 = 4 +! last2=3 66 67 68 69 nrow4 = 3, ncol4 = 4 +! 76 77 78 79 nrow5=4, ncol5 = 4 +! 86 87 88 89 +! last3=1 97 98 99 90 +! last4=1 08 09 00 01 +! 18 19 10 11 +! last5=4 +! +! actual input to bloks shown by rows of blocks of a . +! (the ** items are arbitrary, this storage is used by slvblk) +! +! 11 12 13 14 / ** ** ** / 66 67 68 69 / ** ** ** ** / ** ** ** ** +! 21 22 23 24 / 43 44 45 / 76 77 78 79 / ** ** ** ** / ** ** ** ** +! 31 32 33 34/ 53 54 55/ 86 87 88 89/ 97 98 99 90/ 08 09 00 01 +! 18 19 10 11 +! +! index=1 index = 13 index = 22 index = 34 index = 46 +! +! actual right side values with ** for arbitrary values +! b1 b2 b3 ** b4 b5 b6 b7 b8 ** ** b9 ** ** b10 b11 +! +! (it would have been more efficient to combine block 3 with block 4) +! + implicit none + + integer nbloks + + real ( kind = 8 ) b(*) + real ( kind = 8 ) bloks(*) + integer iflag + integer integs(3,nbloks) + integer ipivot(*) + real ( kind = 8 ) x(*) +! +! In the call to FCBLOK, X is used for temporary storage. +! + call fcblok ( bloks, integs, nbloks, ipivot, x, iflag ) + + if ( iflag == 0 ) then + return + end if + + call sbblok ( bloks, integs, nbloks, ipivot, b, x ) + + return +end diff --git a/pppack/smooth.f90 b/pppack/smooth.f90 new file mode 100644 index 0000000..cab3d0b --- /dev/null +++ b/pppack/smooth.f90 @@ -0,0 +1,218 @@ +!> +!> @file smooth.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine smooth ( x, y, dy, npoint, s, v, a, sfp ) + +!************************************************************************* +! +!! SMOOTH constructs the cubic smoothing spline to given data. +! +! Discussion: +! +! The data is of the form +! +! (x(i),y(i)), i=1,...,npoint, +! +! The cubic smoothing spline has as small a second derivative as +! possible while +! +! s(f)=sum( ((y(i)-f(x(i)))/dy(i))**2 , i=1,...,npoint ) <= s . +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! input +! +! Input, real ( kind = 8 ) X(NPOINT), the abscissas, assumed to be strictly +! increasing . +! +! Input, real ( kind = 8 ) Y(NPOINT), the corresponding ordinates. +! +! dy(1),...,dy(npoint) estimate of uncertainty in data, assumed +! to be positive . +! +! npoint.....number of data points, assumed greater than 1 +! +! s.....upper bound on the discrete weighted mean square distance of +! the approximation f from the data . +! +! work arrays: +! +! v of size (npoint,7) +! a of size (npoint,4) +! +! output +! +! a(.,1).....contains the sequence of smoothed ordinates . +! a(i,j)=d**(j-1)f(x(i)), j=2,3,4, i=1,...,npoint-1 , i.e., the +! first three derivatives of the smoothing spline f at the +! left end of each of the data intervals . +! Warning . . . a would have to be transposed before it +! could be used in ppvalu . +! +! Method: +! +! the matrices q-transp*d and q-transp*d**2*q are constructed in +! SETUPQ from x and dy , as is the vector qty=q-transp*y . +! then, for given p , the vector U is determined in CHOL1D as +! the solution of the linear system +! (6(1-p)q-transp*d**2*q+p*r)u =qty . +! from u , the smoothing spline f (for this choice of smoothing par- +! ameter p ) is obtained in the sense that +! f(x(.)) = y-6(1-p)d**2*q*u and +! (d**2)f(x(.)) = 6*p*u . +! +! the smoothing parameter p is found (if possible) so that +! sf(p) = s , +! with sf(p)=s(f) , where f is the smoothing spline as it depends +! on p . if s=0, then p = 1 . if sf(0) <= s , then p = 0 . +! otherwise, the secant method is used to locate an appropriate p in +! the open interval (0,1) . specifically, +! p(0)=0, p(1) = (s-sf(0))/dsf +! with dsf=-24*u-transp*r*u a good approximation to d(sf(0)) = dsf +! +60*(d*q*u)-transp*(d*q*u) , and u as obtained for p=0 . +! after that, for n=1,2,... until sf(p(n)) <= 1.01*s, do.... +! determine p(n+1) as the point at which the secant to sf at the +! points p(n) and p(n-1) takes on the value s . +! if p(n+1) >= 1 , choose instead p(n+1) as the point at which +! the parabola sf(p(n))*((1-.)/(1-p(n)))**2 takes on the value s. +! +! Note that, in exact arithmetic, always p(n+1) < p(n) , hence +! sf(p(n+1)) < sf(p(n)) . therefore, also stop the iteration, +! with final p=1 , in case sf(p(n+1)) >= sf(p(n)) . +! + implicit none + + integer npoint + + real ( kind = 8 ) a(npoint,4) + real ( kind = 8 ) change + real ( kind = 8 ) dy(npoint) + integer i + real ( kind = 8 ) p + real ( kind = 8 ) prevp + real ( kind = 8 ) prevsf + real ( kind = 8 ) s + real ( kind = 8 ) sfp + real ( kind = 8 ) utru + real ( kind = 8 ) v(npoint,7) + real ( kind = 8 ) x(npoint) + real ( kind = 8 ) y(npoint) + + call setupq ( x, dy, y, npoint, v, a(1,4) ) + + if ( 0.0D+00 < s ) then + go to 20 + end if + +10 continue + + p = 1.0D+00 + call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) ) + sfp = 0.0D+00 + go to 70 + +20 continue + + p = 0.0D+00 + call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) ) + + sfp = 0.0D+00 + do i = 1, npoint + sfp = sfp + ( a(i,1) * dy(i) )**2 + end do + sfp = sfp * 36.0D+00 + + if ( sfp <= s ) then + go to 70 + end if + + prevp = 0.0D+00 + prevsf = sfp + + utru = 0.0D+00 + do i = 2, npoint + utru = utru + v(i-1,4) * ( a(i-1,3) * ( a(i-1,3) + a(i,3) ) + a(i,3)**2 ) + end do + + p = ( sfp - s ) / ( 24.0D+00 * utru ) +! +! Secant iteration for the determination of p starts here. +! + 50 continue + + call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) ) + + sfp = 0.0D+00 + do i = 1, npoint + sfp = sfp + ( a(i,1) * dy(i) )**2 + end do + sfp = sfp * 36.0D+00 * ( 1.0D+00 - p )**2 + + if ( sfp <= 1.01D+00 * s ) then + go to 70 + end if + + if ( prevsf <= sfp ) then + go to 10 + end if + + change = ( p - prevp ) / ( sfp - prevsf ) * ( sfp - s ) + prevp = p + p = p - change + prevsf = sfp + + if ( 1.0D+00 <= p ) then + p = 1.0D+00 - sqrt ( s / prevsf ) * ( 1.0D+00 - prevp ) + end if + + go to 50 +! +! The correct value of p has been found. +! Compute polynomial coefficients from q*u (in a(.,1)). +! + 70 continue + + do i = 1, npoint + a(i,1) = y(i) - 6.0D+00 * ( 1.0D+00 - p ) * dy(i)**2 * a(i,1) + end do + + do i = 1, npoint + a(i,3) = 6.0D+00 * a(i,3) * p + end do + + do i = 1, npoint-1 + a(i,4) = ( a(i+1,3) - a(i,3) ) / v(i,4) + a(i,2) = ( a(i+1,1) - a(i,1) ) / v(i,4) & + - ( a(i,3) + a(i,4) / 3.0D+00 * v(i,4) ) / 2.0D+00 * v(i,4) + end do + + return +end diff --git a/pppack/spli2d.f90 b/pppack/spli2d.f90 new file mode 100644 index 0000000..348d253 --- /dev/null +++ b/pppack/spli2d.f90 @@ -0,0 +1,241 @@ +!> +!> @file spli2d.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine spli2d ( tau, gtau, t, n, k, m, work, q, bcoef, iflag ) + +!************************************************************************* +! +!! SPLI2D produces a interpolatory tensor product spline. +! +! Discussion: +! +! SPLI2D is an extended version of SPLINT. +! +! SPLI2D produces the B-spline coefficients BCOEF(J,.) of the +! spline of order K with knots T(I), I=1,..., N+K, which takes on +! the value GTAU(I,J) at TAU(I), I=1,..., N, J=1,...,M. +! +! The I-th equation of the linear system +! +! A * BCOEF = B +! +! for the B-spline coefficients of the interpolant enforces +! interpolation at TAU(I), I=1,...,N. Hence, B(I)=GTAU(I), +! all I, and A is a band matrix with 2K-1 bands, if it is +! invertible. +! +! The matrix A is generated row by row and stored, diagonal by +! diagonal, in the rows of the array Q, with the main diagonal +! going into row K. +! +! The banded system is then solved by a call to BANFAC, which +! constructs the triangular factorization for A and stores it +! again in Q, followed by a call to BANSLV, which then obtains +! the solution BCOEF by substitution. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) TAU(N), contains the data point abscissas. +! TAU must be strictly increasing +! +! Input, real ( kind = 8 ) GTAU(N), contains the data point ordinates, +! J=1,...,M. +! +! Input, real ( kind = 8 ) T(N+K), the knot sequence. +! +! Input, integer N, the number of data points and the +! dimension of the spline space SPLINE(K,T) +! +! Input, integer K, the order of the spline. +! +! Input, integer M, the number of data sets. +! +! Work space, real ( kind = 8 ) WORK(N). +! +! Output, real ( kind = 8 ) Q(2*K-1)*N, containing the triangular +! factorization of the coefficient matrix of the linear +! system for the B-spline coefficients of the spline interpolant. +! +! The B-spline coefficients for the interpolant of an additional +! data set (TAU(I),HTAU(I)), I=1,...,N with the same data +! abscissae can be obtained without going through all the +! calculations in this routine, simply by loading HTAU into +! BCOEF and then using the statement +! +! CALL BANSLV(Q,2*K-1,N,K-1,K-1,BCOEF) +! +! Output, real ( kind = 8 ) BCOEF(N), the B-spline coefficients of +! the interpolant. +! +! Output, integer IFLAG, error indicator. +! 1, no error. +! 2, an error occurred, which may have been caused by +! singularity of the linear system. +! +! The linear system to be solved is theoretically invertible if +! and only if +! +! T(I) < TAU(I) < TAU(I+K), for all I. +! +! Violation of this condition is certain to lead to IFLAG=2. +! + implicit none + + integer m + integer n + + real ( kind = 8 ) bcoef(m,n) + real ( kind = 8 ) gtau(n,m) + integer i + integer iflag + integer ilp1mx + integer j + integer jj + integer k + integer left + real ( kind = 8 ) q((2*k-1)*n) + real ( kind = 8 ) t(n+k) + real ( kind = 8 ) tau(n) + real ( kind = 8 ) taui + real ( kind = 8 ) work(n) + + left = k + + do i = 1, (2*k-1)*n + q(i) = 0.0 + end do +! +! Construct the N interpolation equations. +! + do i = 1, n + + taui = tau(i) + ilp1mx = min(i+k,n+1) +! +! Find the index LEFT in the closed interval (I,I+K-1) such +! that: +! +! T(LEFT) < = TAU(I) < T(LEFT+1) +! +! The matrix will be singular if this is not possible. +! + left = max(left,i) + + if ( taui < t(left) ) then + iflag = 2 + write(*,*)' ' + write(*,*)'SPLI2D - Fatal error!' + write(*,*)' The TAU array is not strictly increasing.' + stop + end if + + 20 continue + + if ( t(left+1) <= taui ) then + + left = left+1 + if ( left < ilp1mx ) then + go to 20 + end if + + left = left-1 + + if ( t(left+1) < taui ) then + iflag = 2 + write(*,*)' ' + write(*,*)'SPLI2D - Fatal error!' + write(*,*)' The TAU array is not strictly increasing.' + stop + end if + + end if +! +! The I-th equation enforces interpolation at TAUI, hence +! +! A(I,J)=B(J,K,T)(TAUI), for all J. +! +! Only the K entries with J=LEFT-K+1, ..., LEFT actually might be +! nonzero. These K numbers are returned, in WORK (used for +! temporary storage here), by the following call: +! + call bsplvb(t,k,1,taui,left,work) +! +! We therefore want +! +! WORK(J)=B(LEFT-K+J)(TAUI) +! +! to go into +! +! a(i,left-k+j), i.e., into q(i-(left+j)+2*k,(left+j)-k) since +! a(i+j,j) is to go into q(i+k,j), all i,j, if we consider q +! as a two-dim. array , with 2*k-1 rows (see comments in +! banfac). in the present program, we treat q as an equivalent +! one-dimensional array (because of fortran restrictions on +! ?? LOST LINE ?? +! entry +! i -(left+j)+2*k + ((left+j)-k-1)*(2*k-1) +! = i-left+1+(left -k)*(2*k-1) + (2*k-2)*j +! of q . +! + jj = i-left+1+(left-k)*(k+k-1) + + do j = 1, k + jj = jj+k+k-2 + q(jj) = work(j) + end do + + end do +! +! Factor A, stored again in Q. +! + call banfac(q,k+k-1,n,k-1,k-1,iflag) + + if ( iflag == 2 ) then + write(*,*)' ' + write(*,*)'SPLI2D - Fatal error!' + write(*,*)' BANFAC reports that the matrix is singular.' + stop + end if +! +! Solve A*BCOEF=GTAU by backsubstitution. +! + do j = 1, m + + work(1:n) = gtau(1:n,j) + + call banslv ( q, k+k-1, n, k-1, k-1, work ) + + bcoef(j,1:n) = work(1:n) + + end do + + return +end diff --git a/pppack/spline_hermite_set.f90 b/pppack/spline_hermite_set.f90 new file mode 100644 index 0000000..3b7e28e --- /dev/null +++ b/pppack/spline_hermite_set.f90 @@ -0,0 +1,90 @@ +!> +!> @file spline_hermite_set.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine spline_hermite_set ( ndata, tdata, c ) + +!************************************************************************* +! +!! SPLINE_HERMITE_SET sets up a piecewise cubic Hermite interpolant. +! +! Modified: +! +! 06 April 1999 +! +! Reference: +! +! Conte and de Boor, +! Algorithm CALCCF, +! Elementary Numerical Analysis, +! 1973, page 235. +! +! Parameters: +! +! Input, integer NDATA, the number of data points. +! NDATA must be at least 2. +! +! Input, real ( kind = 8 ) TDATA(NDATA), the abscissas of the data points. +! The entries of TDATA are assumed to be strictly increasing. +! +! Input/output, real ( kind = 8 ) C(4,NDATA). +! On input, C(1,I) and C(2,I) should contain the value of the +! function and its derivative at TDATA(I), for I = 1 to NDATA. +! These values will not be changed by this routine. +! On output, C(3,I) and C(4,I) contain the quadratic +! and cubic coefficients of the Hermite polynomial +! in the interval (TDATA(I), TDATA(I+1)), for I=1 to NDATA-1. +! C(3,NDATA) and C(4,NDATA) are set to 0. +! In the interval (TDATA(I), TDATA(I+1)), the interpolating Hermite +! polynomial is given by +! +! SVAL(TVAL) = C(1,I) +! + ( TVAL - TDATA(I) ) * ( C(2,I) +! + ( TVAL - TDATA(I) ) * ( C(3,I) +! + ( TVAL - TDATA(I) ) * C(4,I) ) ) +! + implicit none + + integer ndata + + real ( kind = 8 ) c(4,ndata) + real ( kind = 8 ) divdif1 + real ( kind = 8 ) divdif3 + real ( kind = 8 ) dt + integer i + real ( kind = 8 ) tdata(ndata) + + do i = 1, ndata-1 + dt = tdata(i+1) - tdata(i) + divdif1 = ( c(1,i+1) - c(1,i) ) / dt + divdif3 = c(2,i) + c(2,i+1) - 2.0D+00 * divdif1 + c(3,i) = ( divdif1 - c(2,i) - divdif3 ) / dt + c(4,i) = divdif3 / ( dt * dt ) + end do + + c(3,ndata) = 0.0D+00 + c(4,ndata) = 0.0D+00 + + return +end diff --git a/pppack/spline_hermite_val.f90 b/pppack/spline_hermite_val.f90 new file mode 100644 index 0000000..12bdcc9 --- /dev/null +++ b/pppack/spline_hermite_val.f90 @@ -0,0 +1,97 @@ +!> +!> @file spline_hermite_val.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine spline_hermite_val ( ndata, tdata, c, tval, sval ) + +!************************************************************************* +! +!! SPLINE_HERMITE_VAL evaluates a piecewise cubic Hermite interpolant. +! +! Discussion: +! +! SPLINE_HERMITE_SET must be called first, to set up the +! spline data from the raw function and derivative data. +! +! Modified: +! +! 06 April 1999 +! +! Reference: +! +! Conte and de Boor, +! Algorithm PCUBIC, +! Elementary Numerical Analysis, +! 1973, page 234. +! +! Parameters: +! +! Input, integer NDATA, the number of data points. +! NDATA is assumed to be at least 2. +! +! Input, real ( kind = 8 ) TDATA(NDATA), the abscissas of the data points. +! The entries of TDATA are assumed to be strictly increasing. +! +! Input, real ( kind = 8 ) C(4,NDATA), contains the data computed by +! SPLINE_HERMITE_SET. +! +! Input, real ( kind = 8 ) TVAL, the point where the interpolant is to +! be evaluated. +! +! Output, real ( kind = 8 ) SVAL, the value of the interpolant at TVAL. +! + implicit none + + integer ndata + + real ( kind = 8 ) c(4,ndata) + real ( kind = 8 ) dt + integer i + integer j + real ( kind = 8 ) sval + real ( kind = 8 ) tdata(ndata) + real ( kind = 8 ) tval +! +! Find the interval J = [ TDATA(J), TDATA(J+1) ] that contains +! or is nearest to TVAL. +! + j = ndata - 1 + + do i = 1, ndata-2 + + if ( tval < tdata(i+1) ) then + j = i + exit + end if + + end do +! +! Evaluate the cubic polynomial. +! + dt = tval - tdata(j) + + sval = c(1,j) + dt * ( c(2,j) + dt * ( c(3,j) + dt * c(4,j) ) ) + + return +end diff --git a/pppack/splint.f90 b/pppack/splint.f90 new file mode 100644 index 0000000..10ef1f8 --- /dev/null +++ b/pppack/splint.f90 @@ -0,0 +1,208 @@ +!> +!> @file splint.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine splint ( tau, gtau, t, n, k, q, bcoef, iflag ) + +!************************************************************************* +! +!! SPLINT produces the B-spline coefficients BCOEF of an interpolating spline. +! +! Discussion: +! +! The spline is of order K with knots T(1:N+K), and takes on the +! value GTAU(I) at TAU(I), for I = 1 to N. +! +! The I-th equation of the linear system +! A * BCOEF = B +! for the b-coefficients of the interpolant enforces interpolation +! at TAU(1:N). +! +! Hence, b(i)=gtau(i), all i, and a is a band matrix with 2k-1 +! bands (if it is invertible). +! +! The matrix A is generated row by row and stored, diagonal by di- +! agonal, in the rows of the array q , with the main diagonal go- +! ing into row K. see comments in the program below. +! +! The banded system is then solved by a call to banfac (which con- +! structs the triangular factorization for a and stores it again in +! q ), followed by a call to banslv (which then obtains the solution +! bcoef by substitution). +! +! BANFAC does no pivoting, since the total positivity of the matrix +! A makes this unnecessary. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) TAU(N), the data point abscissas. The entries in +! TAU should be strictly increasing. +! +! Input, real ( kind = 8 ) GTAU(N), the data ordinates. +! +! Input, real ( kind = 8 ) T(N+K), the knot sequence. +! +! Input, integer N, the number of data points. +! +! Input, integer K, the order of the spline. +! +! output +! +! q, array of size (2*k-1)*n , containing the triangular factoriz- +! ation of the coefficient matrix of the linear system for the b- +! coefficients of the spline interpolant. +! the b-coefficients for the interpolant of an additional data set can +! be obtained without going through all the calculations in this +! routine, simply by loading htau into bcoef and then execut- +! ing the call banslv ( q, 2*k-1, n, k-1, k-1, bcoef ) +! +! bcoef, the b-coefficients of the interpolant, of length n. +! +! iflag, an integer indicating success (= 1) or failure (= 2) +! the linear system to be solved is (theoretically) invertible if +! and only if +! t(i) < tau(i) < tau(i+k), all i. +! violation of this condition is certain to lead to iflag=2 . +! + implicit none + + integer n + + real ( kind = 8 ) bcoef(n) + real ( kind = 8 ) gtau(n) + integer i + integer iflag + integer ilp1mx + integer j + integer jj + integer k + integer kpkm2 + integer left + real ( kind = 8 ) q((2*k-1)*n) + real ( kind = 8 ) t(n+k) + real ( kind = 8 ) tau(n) + real ( kind = 8 ) taui + + kpkm2 = 2*(k-1) + left = k + + do i = 1, (2*k-1)*n + q(i) = 0.0D+00 + end do +! +! loop over i to construct the n interpolation equations +! + do i = 1, n + + taui = tau(i) + ilp1mx = min(i+k,n+1) +! +! find left in the closed interval (i,i+k-1) such that +! t(left) <= tau(i) < t(left+1) +! matrix is singular if this is not possible +! + left = max(left,i) + + if ( taui < t(left)) then + go to 70 + end if + + 20 continue + + if ( taui < t(left+1)) then + go to 30 + end if + + left = left+1 + if ( left < ilp1mx) then + go to 20 + end if + + left = left-1 + if ( t(left+1) < taui ) then + go to 70 + end if +! +! The i-th equation enforces interpolation at taui, hence +! a(i,j)=b(j,k,t)(taui), all j. only the k entries with j = +! left-k+1,...,left actually might be nonzero. these k numbers +! are returned, in bcoef (used for temp.storage here), by the +! following +! + 30 continue + + call bsplvb(t,k,1,taui,left,bcoef) +! +! We therefore want bcoef(j)=b(left-k+j)(taui) to go into +! a(i,left-k+j), i.e., into q(i-(left+j)+2*k,(left+j)-k) since +! a(i+j,j) is to go into q(i+k,j), all i,j, if we consider q +! as a two-dim. array , with 2*k-1 rows (see comments in +! banfac). in the present program, we treat q as an equivalent +! one-dimensional array (because of fortran restrictions on +! dimension statements) . we therefore want bcoef(j) to go into +! entry +! i -(left+j)+2*k + ((left+j)-k-1)*(2*k-1) +! = i-left+1+(left -k)*(2*k-1) + (2*k-2)*j +! of q . +! + jj = i-left+1+(left-k)*(k+k-1) + do j = 1,k + jj = jj+kpkm2 + q(jj) = bcoef(j) + end do + + end do +! +! Obtain factorization of A, stored again in Q. +! + call banfac ( q, k+k-1, n, k-1, k-1, iflag ) + + if ( iflag == 2 ) then + write(*,*)' ' + write(*,*)'SPLINT - Fatal Error!' + write(*,*)' The linear system is not invertible!' + return + end if +! +! Solve a*bcoef=gtau by backsubstitution +! + bcoef(1:n) = gtau(1:n) + + call banslv ( q, k+k-1, n, k-1, k-1, bcoef ) + return + + 70 iflag=2 + + write ( *, * ) ' ' + write ( *, * ) 'SPLINT - Fatal Error!' + write ( *, * ) ' The linear system is not invertible!' + + return +end diff --git a/pppack/splopt.f90 b/pppack/splopt.f90 new file mode 100644 index 0000000..e27065d --- /dev/null +++ b/pppack/splopt.f90 @@ -0,0 +1,371 @@ +!> +!> @file splopt.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine splopt ( tau, n, k, scrtch, t, iflag ) + +!************************************************************************* +! +!! SPLOPT computes the knots for an optimal recovery scheme. +! +! Discussion: +! +! The optimal recovery scheme is of order K for data at TAU(1:N). +! +! The interior knots T(K+1:N) are determined by Newton's method in +! such a way that the signum function which changes sign at +! T(K+1), ..., T(N) and nowhere else in ( TAU(1), TAU(n) ) is +! orthogonal to the spline space SPLINE ( K, TAU ) on that interval. +! +! Let XI(J) be the current guess for T(K+J), j=1,...,n-k. Then +! the next Newton iterate is of the form +! xi(j) + (-)**(n-k-j)*x(j) , j=1,...,n-k, +! with X the solution of the linear system +! C * X = D. +! +! Here, c(i,j)=b(i)(xi(j)), all j, with b(i) the i-th b-spline of +! order K for the knot sequence TAU, all i, and D is the vector +! given by d(i)=sum( -a(j) , j=i,...,n )*(TAU(i+k)-TAU(i))/k, all i, +! with a(i)=sum ( (-)**(n-k-j)*b(i,k+1,tau)(xi(j)) , j=1,...,n-k ) +! for i=1,...,n-1, and a(n)=-.5 . +! +! See chapter XIII of text and references there for a derivation. +! +! The first guess for t(k+j) is (TAU(j+1)+...+TAU(j+k-1))/(k-1) . +! iteration terminates if max(abs(x(j))) < t o l , with +! TOL = t o l r t e *(TAU(n)-TAU(1))/(n-k) , +! or else after NEWTMX iterations , currently, +! newtmx, tolrte / 10, .000001 +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) TAU(N), the interpolation points. +! assumed to be nondecreasing, with tau(i) < tau(i+k),all i. +! +! Input, integer N, the number of data points. +! +! Input, integer K, the order of the optimal recovery scheme to be used. +! +! Workspace, real ( kind = 8 ) SCRTCH((N-K)*(2*K+3)+5*K+3). The various +! contents are specified in the text below . +! +! Output, real ( kind = 8 ) T(N+K), the optimal knots ready for +! use in optimal recovery. specifically, t(1)=... = t(k) = +! tau(1) and t(n+1)=... = t(n+k) = tau(n) , while the n-k +! interior knots t(k+1), ..., t(n) are calculated. +! +! Output, integer IFLAG, error indicator. +! = 1, success. T contains the optimal knots. +! = 2, failure. K < 3 or N < K or the linear system was singular. +! + implicit none + + integer k + integer n + + real ( kind = 8 ) del + real ( kind = 8 ) delmax + real ( kind = 8 ) floatk + integer i + integer id + integer iflag + integer index + integer j + integer kp1 + integer kpkm1 + integer kpn + integer l + integer left + integer leftmk + integer lenw + integer ll + integer llmax + integer llmin + integer na + integer nb + integer nc + integer nd + integer, parameter :: newtmx = 10 + integer newton + integer nmk + integer nx + real ( kind = 8 ) scrtch((n-k)*(2*k+3)+5*k+3) + real ( kind = 8 ) t(n+k) + real ( kind = 8 ) tau(n) + real ( kind = 8 ) sign + real ( kind = 8 ) signst + real ( kind = 8 ) sum1 + real ( kind = 8 ) tol + real ( kind = 8 ), parameter :: tolrte = 0.000001D+00 + real ( kind = 8 ) xij + + nmk = n - k + + if ( n < k ) then + write ( *, * ) ' ' + write ( *, * ) 'SPLOPT - Fatal error!' + write ( *, * ) ' N < K, N = ',n,' K = ',k + iflag = 2 + return + end if + + if ( n == k ) then + do i = 1, k + t(i) = tau(1) + t(n+i) = tau(n) + end do + return + end if + + if ( k <= 2 ) then + write(*,*)' ' + write(*,*)'SPLOPT - Fatal error!' + write(*,*)' K < 2, K = ',k + iflag = 2 + stop + end if + + floatk = k + kp1 = k+1 + kpkm1 = k+k-1 + kpn = k+n + + signst = -1.0D+00 + if ( (nmk/2) * 2 < nmk ) then + signst = 1.0D+00 + end if +! +! scrtch(i)=tau-extended(i), i=1,...,n+k+k +! + nx = n + k + k + 1 +! +! scrtch(i+nx)=xi(i),i=0,...,n-k+1 +! + na = nx + nmk + 1 +! +! scrtch(i+na)=-a(i), i=1,...,n +! + nd = na + n +! +! scrtch(i+nd)=x(i) or d(i), i=1,...,n-k +! + nb = nd+nmk +! +! scrtch(i+nb)=biatx(i),i=1,...,k+1 +! + nc = nb+kp1 +! +! scrtch(i+(j-1)*(2k-1)+nc)=w(i,j) = c(i-k+j,j), i=j-k,...,j+k, +! j=1,...,n-k. +! + lenw = kpkm1*nmk +! +! Extend TAU to a knot sequence and store in scrtch. +! + do j = 1, k + scrtch(j) = tau(1) + scrtch(kpn+j) = tau(n) + end do + + do j = 1, n + scrtch(k+j) = tau(j) + end do +! +! First guess for scrtch (.+nx) = xi . +! + scrtch(nx) = tau(1) + scrtch(nmk+1+nx) = tau(n) + + do j = 1, nmk + + sum1 = 0.0D+00 + do l = 1, k-1 + sum1 = sum1 + tau(j+l) + end do + + scrtch(j+nx) = sum1 / real ( k - 1, kind = 8 ) + + end do +! +! last entry of scrtch (.+na) =-a is always ... +! + scrtch(n+na) = 0.5D+00 +! +! Start the Newton iteration. +! + newton = 1 + tol = tolrte * ( tau(n) - tau(1) ) / real ( nmk, kind = 8 ) +! +! Start the Newton step. +! compute the 2k-1 bands of the matrix c and store in scrtch(.+nc), +! and compute the vector scrtch(.+na)=-a. +! + 100 continue + + do i = 1, lenw + scrtch(i+nc) = 0.0D+00 + end do + + do i = 2, n + scrtch(i-1+na) = 0.0D+00 + end do + + sign = signst + left = kp1 + + do j = 1, nmk + + xij = scrtch(j+nx) + + 130 continue + + if ( xij < scrtch(left+1) ) then + go to 140 + end if + + left = left+1 + if ( left < kpn ) then + go to 130 + end if + left = left-1 + + 140 continue + + call bsplvb(scrtch,k,1,xij,left,scrtch(1+nb)) +! +! The TAU sequence in scrtch is preceded by k additional knots +! therefore, scrtch(ll+nb) now contains b(left-2k+ll)(xij) +! which is destined for c(left-2k+ll,j), and therefore for +! w(left-k-j+ll,j)= scrtch(left-k-j+ll+(j-1)*kpkm1 + nc) +! since we store the 2k-1 bands of c in the 2k-1 r o w s of +! the work array w, and w in turn is stored in s c r t c h , +! with w(1,1)=scrtch(1+nc). +! +! also, c being of order n-k, we would want +! 1 <= left-2k+ll .le. n-k or +! llmin=2k-left <= ll .le. n-left+k = llmax . +! + leftmk = left-k + index = leftmk-j+(j-1)*kpkm1+nc + llmin = max(1,k-leftmk) + llmax = min(k,n-leftmk) + do ll = llmin, llmax + scrtch(ll+index)=scrtch(ll+nb) + end do + + call bsplvb (scrtch,kp1,2,xij,left,scrtch(1+nb)) + id=max(0,leftmk-kp1) + llmin=1-min(0,leftmk-kp1) + do ll=llmin, kp1 + id=id+1 + scrtch(id+na)=scrtch(id+na)-sign*scrtch(ll+nb) + end do + + sign=-sign + + end do + + call banfac(scrtch(1+nc),kpkm1,nmk,k-1,k-1,iflag) + + if ( iflag == 2 ) then + write ( *, * ) ' ' + write ( *, * ) 'SPLOPT - Fatal error!' + write ( *, * ) ' Matrix C is not invertible.' + stop + end if +! +! compute scrtch (.+nd)= d from scrtch (.+na) =-a . +! + do i=n,2,-1 + scrtch(i-1+na)=scrtch(i-1+na)+scrtch(i+na) + end do + + do i=1,nmk + scrtch(i+nd)=scrtch(i+na)*(tau(i+k)-tau(i))/floatk + end do +! +! Compute scrtch (.+nd)= x . +! + call banslv(scrtch(1+nc),kpkm1,nmk,k-1,k-1,scrtch(1+nd)) +! +! Compute scrtch (.+nd)=change in xi . modify, if necessary, to +! prevent new xi from moving more than 1/3 of the way to its +! neighbors. then add to xi to obtain new xi in scrtch(.+nx). +! + delmax = 0.0D+00 + sign = signst + do i = 1, nmk + del = sign * scrtch(i+nd) + delmax = max ( delmax, abs ( del ) ) + if ( 0.0D+00 < del ) then + go to 230 + end if + del = max ( del, ( scrtch(i-1+nx) - scrtch(i+nx) ) / 3.0D+00 ) + go to 240 + 230 del = min (del,(scrtch(i+1+nx)-scrtch(i+nx))/3.0D+00 ) + 240 sign = -sign + scrtch(i+nx) = scrtch(i+nx)+del + end do +! +! Call it a day in case change in xi was small enough or too many +! steps were taken. +! + if ( delmax < tol ) then + go to 270 + end if + + newton = newton + 1 + if ( newton <= newtmx ) then + go to 100 + end if + + write ( *, * ) ' ' + write ( *, * ) 'SPLOPT - Warning!' + write ( *, * ) ' No convergence. Number of Newton steps was ', newtmx + + 270 continue + + do i = 1, nmk + t(k+i) = scrtch(i+nx) + end do + + 290 continue + + do i=1,k + t(i)=tau(1) + t(n+i)=tau(n) + end do + + return + +! 310 iflag=2 +! +! return +end diff --git a/pppack/subbak.f90 b/pppack/subbak.f90 new file mode 100644 index 0000000..c3df5e9 --- /dev/null +++ b/pppack/subbak.f90 @@ -0,0 +1,80 @@ +!> +!> @file subbak.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine subbak ( w, ipivot, nrow, ncol, last, x ) + +!************************************************************************* +! +!! SUBBAK carries out backsubstitution for the current block. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! w, ipivot, nrow, ncol, last are as on return from factrb. +! +! x(1),...,x(ncol) contains, on input, the right side for the +! equations in this block after backsubstitution has been +! carried up to but not including equation ipivot(last). +! means that x(j) contains the right side of equation ipi- +! vot(j) as modified during elimination, j=1,...,last, while +! for j > last, x(j) is already a component of the solut- +! ion vector. +! +! x(1),...,x(ncol) contains, on output, the components of the solut- +! ion corresponding to the present block. +! + implicit none + + integer ncol + integer nrow + + integer ip + integer ipivot(nrow) + integer j + integer k + integer last + real ( kind = 8 ) s + real ( kind = 8 ) w(nrow,ncol) + real ( kind = 8 ) x(ncol) + + do k = last, 1, -1 + + ip = ipivot(k) + + s = 0.0D+00 + do j = k+1, ncol + s = s + w(ip,j) * x(j) + end do + + x(k) = ( x(k) - s ) / w(ip,k) + + end do + +end diff --git a/pppack/subfor.f90 b/pppack/subfor.f90 new file mode 100644 index 0000000..58f7afe --- /dev/null +++ b/pppack/subfor.f90 @@ -0,0 +1,100 @@ +!> +!> @file subfor.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine subfor ( w, ipivot, nrow, last, b, x ) + +!************************************************************************* +! +!! SUBFOR carries out the forward pass of substitution for the current block. +! +! Discussion: +! +! The forward pass is the action on the right side corresponding to the +! elimination carried out in FACTRB for this block. +! +! At the end, x(j) contains the right side of the transformed +! ipivot(j)-th equation in this block, j=1,...,nrow. then, since +! for i=1,...,nrow-last, b(nrow+i) is going to be used as the right +! side of equation I in the next block (shifted over there from +! this block during factorization), it is set equal to x(last+i) here. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! w, ipivot, nrow, last are as on return from factrb. +! +! b(j) is expected to contain, on input, the right side of j-th +! equation for this block, j=1,...,nrow. +! b(nrow+j) contains, on output, the appropriately modified right +! side for equation j in next block, j=1,...,nrow-last. +! +! x(j) contains, on output, the appropriately modified right +! side of equation ipivot(j) in this block, j=1,...,last (and +! even for j=last+1,...,nrow). +! + implicit none + + integer last + integer nrow + + real ( kind = 8 ) b(nrow+nrow-last) + integer ip + integer ipivot(nrow) + integer j + integer k + real ( kind = 8 ) s + real ( kind = 8 ) w(nrow,last) + real ( kind = 8 ) x(nrow) + + ip = ipivot(1) + x(1) = b(ip) + + do k = 2, nrow + + ip = ipivot(k) + + s = 0.0D+00 + do j = 1, min ( k-1, last ) + s = s + w(ip,j) * x(j) + end do + + x(k) = b(ip) - s + + end do +! +! Transfer modified right sides of equations ipivot(last+1),..., +! ipivot(nrow) to next block. +! + do k = last+1, nrow + b(nrow-last+k) = x(k) + end do + + return +end diff --git a/pppack/tautsp.f90 b/pppack/tautsp.f90 new file mode 100644 index 0000000..d585f07 --- /dev/null +++ b/pppack/tautsp.f90 @@ -0,0 +1,530 @@ +!> +!> @file tautsp.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine tautsp ( tau, gtau, ntau, gamma, s, break, coef, l, k, iflag ) + +!************************************************************************* +! +!! TAUTSP constructs a cubic spline interpolant to given data. +! +! Discussion: +! +! If 0 < GAMMA, additional knots are introduced where needed to +! make the interpolant more flexible locally. This avoids extraneous +! inflection points typical of cubic spline interpolation at knots to +! rapidly changing data. +! +! Method: +! +! On the I-th interval, (TAU(I), TAU(I+1)), the interpolant is of the +! form: +! +! (*) f(u(x))=a+b*u + c*h(u,z) + d*h(1-u,1-z) , +! +! with +! +! U = U(X) = ( X - TAU(I) ) / DTAU(I). +! +! Here, +! z=z(i) = addg(i+1)/(addg(i)+addg(i+1)) +! (= .5, in case the denominator vanishes). with +! addg(j)=abs(ddg(j)), ddg(j) = dg(j+1)-dg(j), +! dg(j)=divdif ( j) = (gtau(j+1)-gtau(j))/dtau(j) +! and +! h(u,z)=alpha*u**3+(1-alpha)*(max(((u-zeta)/(1-zeta)),0)**3 +! with +! alpha(z)=(1-gamma/3)/zeta +! zeta(z)=1-gamma*min((1 - z), 1/3) +! thus, for 1/3 <= z .le. 2/3, f is just a cubic polynomial on +! the interval i. otherwise, it has one additional knot, at +! tau(i)+zeta*dtau(i) . +! as z approaches 1, h(.,z) has an increasingly sharp bend near 1, +! thus allowing f to turn rapidly near the additional knot. +! in terms of f(j)=gtau(j) and +! fsecnd(j)= second derivative of f at tau(j), +! the coefficients for (*) are given as +! a=f(i)-d +! b=(f(i+1)-f(i)) - (c - d) +! c=fsecnd(i+1)*dtau(i)**2/hsecnd(1,z) +! d=fsecnd(i)*dtau(i)**2/hsecnd(1,1-z) +! hence can be computed once fsecnd(i),i=1,...,ntau, is fixed. +! +! F is automatically continuous and has a continuous second derivative +! (except when z=0 or 1 for some i). we determine fscnd(.) from +! the requirement that also the first derivative of F be continuous. +! +! In addition, we require that the third derivative be continuous +! across TAU(2) and across TAU(NTAU-1). This leads to a strictly +! diagonally dominant tridiagonal linear system for the fsecnd(i) +! which we solve by Gauss elimination without pivoting. +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Input, real ( kind = 8 ) TAU(NTAU), the sequence of data points. +! TAU must be strictly increasing. +! +! Input, real ( kind = 8 ) GTAU(NTAU), the corresponding sequence of +! function values. +! +! Input, integer NTAU, the number of data points. NTAU must be at least 4. +! +! Input, gamma indicates whether additional flexibility is desired. +! =0., no additional knots +! in (0.,3.), under certain conditions on the given data at +! points i-1, i, i+1, and i+2, a knot is added in the +! i-th interval, i=2,...,ntau-2. see description of meth- +! od below. the interpolant gets rounded with increasing +! gamma. a value of 2.5 for gamma is typical. +! in (3.,6.), same , except that knots might also be added in +! intervals in which an inflection point would be permit- +! ted. a value of 5.5 for gamma is typical. +! +! Output, break, coef, l, k give the pp-representation of the interpolant. +! specifically, for break(i) <= x .le. break(i+1), the +! interpolant has the form +! f(x)=coef(1,i) +dx(coef(2,i) +(dx/2)(coef(3,i) +(dx/3)coef(4,i))) +! with dx=x-break(i) and i=1,...,l . +! +! Output, iflag =1, ok +! =2, input was incorrect. a printout specifying the mistake +! was made. +! workspace +! +! Output, s is required, of size (ntau,6). the individual columns of this +! array contain the following quantities mentioned in the write- +! up and below. +! s(.,1)=dtau = tau(.+1)-tau +! s(.,2)=diag = diagonal in linear system +! s(.,3)=u = upper diagonal in linear system +! s(.,4)=r = right side for linear system (initially) +! = fsecnd = solution of linear system , namely the second +! derivatives of interpolant at tau +! s(.,5)=z = indicator of additional knots +! s(.,6)=1/hsecnd(1,x) with x = z or = 1-z. see below. +! + implicit none + + integer ntau + + real ( kind = 8 ) alph + real ( kind = 8 ) alpha + real ( kind = 8 ) break(*) + real ( kind = 8 ) c + real ( kind = 8 ) coef(4,*) + real ( kind = 8 ) d + real ( kind = 8 ) del + real ( kind = 8 ) denom + real ( kind = 8 ) divdif + real ( kind = 8 ) entry + real ( kind = 8 ) entry3 + real ( kind = 8 ) factor + real ( kind = 8 ) factr2 + real ( kind = 8 ) gam + real ( kind = 8 ) gamma + real ( kind = 8 ) gtau(ntau) + integer i + integer iflag + integer k + integer l + integer method + real ( kind = 8 ) onemg3 + real ( kind = 8 ) onemzt + real ( kind = 8 ) ratio + real ( kind = 8 ) s(ntau,6) + real ( kind = 8 ) sixth + real ( kind = 8 ) tau(ntau) + real ( kind = 8 ) temp + real ( kind = 8 ) x + real ( kind = 8 ) z + real ( kind = 8 ) zeta + real ( kind = 8 ) zt2 + + alph(x) = min ( 1.0D+00, onemg3 / x ) +! +! There must be at least 4 interpolation points. +! + if ( ntau < 4 ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'TAUTSP - Fatal error!' + write ( *, '(a)' ) ' Input NTAU must be at least 4.' + write ( *, '(a,i6)' ) ' NTAU = ', ntau + iflag = 2 + stop + end if +! +! Construct delta tau and first and second (divided) differences of data. +! + do i = 1, ntau-1 + + s(i,1) = tau(i+1)-tau(i) + + if ( s(i,1) <= 0.0D+00 ) then + write(*,30)i,tau(i),tau(i+1) + 30 format (' point ',i3,' and the next',2e15.6,' are disordered') + iflag=2 + return + end if + + s(i+1,4) = ( gtau(i+1) - gtau(i) ) / s(i,1) + end do + + do i = 2, ntau-1 + s(i,4) = s(i+1,4)-s(i,4) + end do +! +! Construct system of equations for second derivatives at tau. at each +! interior data point, there is one continuity equation, at the first +! and the last interior data point there is an additional one for a +! total of NTAU equations in ntau unknowns. +! + i = 2 + s(2,2) = s(1,1) / 3.0D+00 + sixth = 1.0D+00 / 6.0D+00 + method = 2 + gam = gamma + + if ( gam <= 0.0D+00 ) then + method=1 + end if + + if ( 3.0D+00 < gam ) then + method = 3 + gam = gam - 3.0D+00 + end if + + onemg3 = 1.0D+00 - gam / 3.0D+00 +! +! loop over i +! + 70 continue +! +! Construct z(i) and zeta(i) +! + z = 0.5D+00 + + if ( method == 1) then + go to 100 + end if + + if ( method == 3) then + go to 90 + end if + + if ( s(i,4)*s(i+1,4) < 0.0D+00 ) then + go to 100 + end if + + 90 continue + + temp = abs ( s(i+1,4) ) + denom = abs ( s(i,4) ) +temp + + if ( denom /= 0.0D+00 ) then + z = temp/denom + if ( abs ( z - 0.5D+00 ) <= sixth ) then + z=0.5D+00 + end if + end if + + 100 continue + + s(i,5) = z +! +! Set up part of the i-th equation which depends on the i-th interval. +! + if ( z < 0.5D+00 ) then + + zeta = gam*z + onemzt = 1.0D+00 - zeta + zt2 = zeta**2 + alpha = alph(onemzt) + factor = zeta/(alpha*(zt2-1.0D+00 ) + 1.0D+00 ) + s(i,6) = zeta*factor / 6.0D+00 + s(i,2) = s(i,2) + s(i,1) & + * ( ( 1.0D+00 - alpha * onemzt ) * factor / 2.0D+00-s(i,6)) +! +! If z=0 and the previous z = 1, then d(i) = 0. since then +! also u(i-1)=l(i+1) = 0, its value does not matter. reset +! d(i)=1 to insure nonzero pivot in elimination. +! + if ( s(i,2) <= 0.0D+00 ) then + s(i,2) = 1.0D+00 + end if + + s(i,3)=s(i,1) / 6.0D+00 + else if ( z - 0.5D+00 == 0.0D+00 ) then + s(i,2)=s(i,2)+s(i,1) / 3.0D+00 + s(i,3)=s(i,1) / 6.0D+00 + else if ( 0.0D+00 < z - 0.5D+00 ) then + onemzt = gam*(1.0D+00 - z) + zeta = 1.0D+00 - onemzt + alpha = alph(zeta) + factor = onemzt/(1.0D+00 - alpha * zeta * ( 1.0D+00 + onemzt ) ) + s(i,6) = onemzt*factor / 6.0D+00 + s(i,2) = s(i,2)+s(i,1) / 3.0D+00 + s(i,3) = s(i,6)*s(i,1) + end if + + if ( 2 < i ) then + go to 190 + end if + + s(1,5) = 0.5D+00 +! +! The first two equations enforce continuity of the first and of +! the third derivative across tau(2). +! + s(1,2)=s(1,1) / 6.0D+00 + s(1,3)=s(2,2) + entry3=s(2,3) + if ( z-0.5D+00) 150, 160, 170 + + 150 continue + + factr2 = zeta * ( alpha * ( zt2 - 1.0D+00 ) + 1.0D+00 ) & + / ( alpha * ( zeta * zt2 - 1.0D+00 ) + 1.0D+00 ) + + ratio=factr2*s(2,1)/s(1,2) + s(2,2)=factr2*s(2,1)+s(1,1) + s(2,3)=-factr2*s(1,1) + go to 180 + + 160 continue + + ratio=s(2,1)/s(1,2) + s(2,2)=s(2,1)+s(1,1) + s(2,3)=-s(1,1) + go to 180 + + 170 continue + + ratio=s(2,1)/s(1,2) + s(2,2)=s(2,1)+s(1,1) + s(2,3)=-s(1,1)*6.0D+00 * alpha * s(2,6) +! +! At this point, the first two equations read +! diag(1)*x1+u(1)*x2 + entry3*x3=r(2) +! -ratio*diag(1)*x1+diag(2)*x2 + u(2)*x3=0.0 +! Eliminate first unknown from second equation +! + 180 continue + + s(2,2)=ratio*s(1,3)+s(2,2) + s(2,3)=ratio*entry3+s(2,3) + s(1,4)=s(2,4) + s(2,4)=ratio*s(1,4) + go to 200 + + 190 continue +! +! The i-th equation enforces continuity of the first derivative +! across tau(i). it has been set up in statements 35 up to 40 +! and 21 up to 25 and reads now +! -ratio*diag(i-1)*xi-1+diag(i)*xi + u(i)*xi+1=r(i) . +! eliminate (i-1)st unknown from this equation +! + s(i,2)=ratio*s(i-1,3)+s(i,2) + s(i,4)=ratio*s(i-1,4)+s(i,4) +! +! Set up the part of the next equation which depends on the +! i-th interval. +! + 200 continue + + if ( z- 0.5D+00 ) 210, 220, 230 + + 210 continue + ratio = -s(i,6) * s(i,1) / s(i,2) + s(i+1,2)=s(i,1) / 3.0D+00 + go to 240 + + 220 continue + ratio=-(s(i,1) / 6.0D+00 ) / s(i,2) + s(i+1,2)=s(i,1) / 3.0D+00 + go to 240 + + 230 continue + ratio=-( s(i,1) / 6.0D+00 )/s(i,2) + s(i+1,2)=s(i,1)*((1.0D+00-zeta*alpha) * factor / 2.0D+00 - s(i,6) ) +! +! end of i loop +! + 240 continue + + i=i+1 + if ( i < ntau-1) then + go to 70 + end if + + s(i,5) = 0.5D+00 +! +! The last two equations enforce continuity of third derivative and +! of first derivative across tau(ntau-1). +! + entry=ratio*s(i-1,3)+s(i,2)+s(i,1)/3.0D+00 + s(i+1,2)=s(i,1)/6.0D+00 + s(i+1,4)=ratio*s(i-1,4)+s(i,4) + if ( z- 0.5D+00 ) 250, 260, 270 + + 250 continue + + ratio = s(i,1) * 6.0D+00 * s(i-1,6) * alpha / s(i-1,2) + s(i,2)=ratio*s(i-1,3)+s(i,1)+s(i-1,1) + s(i,3)=-s(i-1,1) + go to 280 + + 260 continue + + ratio=s(i,1)/s(i-1,2) + s(i,2)=ratio*s(i-1,3)+s(i,1)+s(i-1,1) + s(i,3)=-s(i-1,1) + go to 280 + + 270 continue + + factr2=onemzt*(alpha*(onemzt**2-1.0D+00)+1.0D+00) & + /(alpha*(onemzt**3-1.0D+00)+1.0D+00) + + ratio = factr2*s(i,1) / s(i-1,2) + s(i,2)=ratio*s(i-1,3)+factr2*s(i-1,1)+s(i,1) + s(i,3)=-factr2*s(i-1,1) +! +! At this point, the last two equations read: +! +! diag(i)*xi+ u(i)*xi+1=r(i) +! -ratio*diag(i)*xi+diag(i+1)*xi+1=r(i+1) +! +! Eliminate XI from the last equation. +! + 280 continue + + s(i,4)=ratio*s(i-1,4) + ratio=-entry/s(i,2) + s(i+1,2)=ratio*s(i,3)+s(i+1,2) + s(i+1,4)=ratio*s(i,4)+s(i+1,4) +! +! Back substitution. +! + s(ntau,4) = s(ntau,4) / s(ntau,2) + + 290 continue + + s(i,4)=(s(i,4)-s(i,3)*s(i+1,4))/s(i,2) + i=i-1 + if ( 1 < i ) then + go to 290 + end if + + s(1,4)=(s(1,4)-s(1,3)*s(2,4)-entry3*s(3,4))/s(1,2) +! +! Construct polynomial pieces. +! + break(1)=tau(1) + l=1 + + do i=1, ntau-1 + coef(1,l)=gtau(i) + coef(3,l)=s(i,4) + divdif=(gtau(i+1)-gtau(i))/s(i,1) + z=s(i,5) + if ( z- 0.5D+00 ) 300, 310, 320 + + 300 continue + + if ( z == 0.0D+00 ) go to 330 + zeta=gam*z + onemzt=1.0D+00-zeta + c=s(i+1,4) / 6.0D+00 + d=s(i,4)*s(i,6) + l=l+1 + del=zeta*s(i,1) + break(l)=tau(i)+del + zt2=zeta**2 + alpha=alph(onemzt) + factor=onemzt**2*alpha + coef(1,l)=gtau(i)+divdif*del+s(i,1)**2*(d*onemzt*(factor-1.0D+00) & + +c*zeta*(zt2-1.0D+00)) + coef(2,l)=divdif+s(i,1)*(d*(1.0D+00-3.0D+00*factor)+c*(3.0D+00*zt2-1.0D+00)) + coef(3,l)=6.0D+00*(d*alpha*onemzt+c*zeta) + coef(4,l)=6.0D+00*(c-d*alpha)/s(i,1) + coef(4,l-1)=coef(4,l)-6.0D+00*d*(1.0D+00-alpha)/(del*zt2) + coef(2,l-1)=coef(2,l)-del*(coef(3,l)-(del/2.0D+00)*coef(4,l-1)) + go to 340 + + 310 continue + + coef(2,l) = divdif - s(i,1) * ( 2.0D+00 * s(i,4) + s(i+1,4) ) / 6.0D+00 + coef(4,l)=(s(i+1,4)-s(i,4))/s(i,1) + go to 340 + + 320 continue + + onemzt=gam*(1.0D+00-z) + + if ( onemzt == 0.0D+00 ) then + go to 330 + end if + + zeta = 1.0D+00 - onemzt + alpha=alph(zeta) + c=s(i+1,4)*s(i,6) + d=s(i,4)/6.0D+00 + del=zeta*s(i,1) + break(l+1)=tau(i)+del + coef(2,l)=divdif-s(i,1)*(2.0D+00*d+c) + coef(4,l)=6.0D+00*(c*alpha-d)/s(i,1) + l=l+1 + coef(4,l)=coef(4,l-1)+6.0D+00*(1.0D+00-alpha)*c/(s(i,1)*onemzt**3) + coef(3,l)=coef(3,l-1)+del*coef(4,l-1) + coef(2,l)=coef(2,l-1)+del*(coef(3,l-1)+(del/2.0D+00)*coef(4,l-1)) + coef(1,l)=coef(1,l-1)+del*(coef(2,l-1)+(del/2.0D+00)*(coef(3,l-1) & + +(del/3.0D+00)*coef(4,l-1))) + go to 340 + + 330 continue + + coef(2,l) = divdif + coef(3,l) = 0D+00 + coef(4,l) = 0.0D+00 + + 340 continue + + l = l + 1 + break(l) = tau(i+1) + + end do + + l = l - 1 + k = 4 + iflag = 1 + + return +end diff --git a/pppack/titanium.f90 b/pppack/titanium.f90 new file mode 100644 index 0000000..9879a5f --- /dev/null +++ b/pppack/titanium.f90 @@ -0,0 +1,88 @@ +!> +!> @file titanium.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +subroutine titanium ( n, t, g ) + +!*********************************************************************** +! +!! TITANIUM represents a temperature dependent property of titanium. +! +! Discussion: +! +! The data has been used extensively as an example in spline +! approximation with variable knots. +! +! Modified: +! +! 20 November 2000 +! +! Reference: +! +! Carl DeBoor, +! A Practical Guide to Splines, +! Springer Verlag. +! +! Parameters: +! +! Output, integer N, the number of data points, which is 49. +! +! Output, real ( kind = 8 ) T(N), the location of the data points. +! +! Output, real ( kind = 8 ) G(N), the value associated with the data points. +! + implicit none + + real ( kind = 8 ) g(*) + integer n + real ( kind = 8 ) t(*) + + n = 49 + + t(1:49) = (/ & + 595.0D+00, 605.0D+00, 615.0D+00, 625.0D+00, 635.0D+00, & + 645.0D+00, 655.0D+00, 665.0D+00, 675.0D+00, 685.0D+00, & + 695.0D+00, 705.0D+00, 715.0D+00, 725.0D+00, 735.0D+00, & + 745.0D+00, 755.0D+00, 765.0D+00, 775.0D+00, 785.0D+00, & + 795.0D+00, 805.0D+00, 815.0D+00, 825.0D+00, 835.0D+00, & + 845.0D+00, 855.0D+00, 865.0D+00, 875.0D+00, 885.0D+00, & + 895.0D+00, 905.0D+00, 915.0D+00, 925.0D+00, 935.0D+00, & + 945.0D+00, 955.0D+00, 965.0D+00, 975.0D+00, 985.0D+00, & + 995.0D+00, 1005.0D+00, 1015.0D+00, 1025.0D+00, 1035.0D+00, & + 1045.0D+00, 1055.0D+00, 1065.0D+00, 1075.0D+00 /) + + g(1:49) = (/ & + 0.644D+00, 0.622D+00, 0.638D+00, 0.649D+00, 0.652D+00, & + 0.639D+00, 0.646D+00, 0.657D+00, 0.652D+00, 0.655D+00, & + 0.644D+00, 0.663D+00, 0.663D+00, 0.668D+00, 0.676D+00, & + 0.676D+00, 0.686D+00, 0.679D+00, 0.678D+00, 0.683D+00, & + 0.694D+00, 0.699D+00, 0.710D+00, 0.730D+00, 0.763D+00, & + 0.812D+00, 0.907D+00, 1.044D+00, 1.336D+00, 1.881D+00, & + 2.169D+00, 2.075D+00, 1.598D+00, 1.211D+00, 0.916D+00, & + 0.746D+00, 0.672D+00, 0.627D+00, 0.615D+00, 0.607D+00, & + 0.606D+00, 0.609D+00, 0.603D+00, 0.601D+00, 0.603D+00, & + 0.601D+00, 0.611D+00, 0.601D+00, 0.608D+00 /) + + return +end diff --git a/pputils2/CMakeLists.txt b/pputils2/CMakeLists.txt new file mode 100644 index 0000000..d0e4592 --- /dev/null +++ b/pputils2/CMakeLists.txt @@ -0,0 +1,74 @@ +/** + * @file CMakeLists.txt + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + * @author Trach-Minh Tran + */ +project(pputils2) + +set(SRCS + pputils2.f90 +) + +set(CMAKE_Fortran_MODULE_DIRECTORY + ${CMAKE_CURRENT_BINARY_DIR}/modules + ) + +add_library(pputils2 STATIC ${SRCS}) +target_include_directories(pputils2 + PRIVATE $ + ${MPI_Fortran_INCLUDE_PATH} + INTERFACE $ + $ + ${MPI_Fortran_INCLUDE_PATH} + ) + +target_compile_options(pputils2 PUBLIC ${MPI_Fortran_COMPILE_FLAGS}) +target_link_libraries(pputils2 PUBLIC ${MPI_Fortran_LIBRARIES}) + +set_property(TARGET pputils2 + PROPERTY PUBLIC_HEADER ${CMAKE_CURRENT_BINARY_DIR}/modules/pputils.mod) + +include(GNUInstallDirs) +install(TARGETS pputils2 + EXPORT ${BSPLINES_EXPORT_TARGETS} + LIBRARY DESTINATION ${CNAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CNAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + ) + +if(BSPLINES_EXAMPLES) + set(EXAMPLES ex1 ex2 ex3 ex4 ex5 ex6 ex7) + foreach(ex ${EXAMPLES}) + add_executable(pputils2_${ex} ${ex}.f90) + target_link_libraries(pputils2_${ex} pputils2 futils) + endforeach() + + add_test(ex1 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex1) + add_test(ex2 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 9 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex2) + add_test(ex3 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 5 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex3) + add_test(ex4 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 12 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex4) + add_test(ex5 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 8 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex5) + add_test(ex6 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 12 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex6) + add_test(ex7 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 6 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex7) +endif() diff --git a/pputils2/Makefile b/pputils2/Makefile new file mode 100644 index 0000000..b0f3ddd --- /dev/null +++ b/pputils2/Makefile @@ -0,0 +1,105 @@ +# +# @file Makefile +# +# @brief +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Trach-Minh Tran +# +F90 = mpiifort +CC = cc + +debug = -g -traceback -CB +optim = -O3 -xSSE4.2 + +#OPT=$(debug) +OPT=$(optim) + +F90FLAGS = $(OPT) -I. -I$(FUTILS)/include -I${HDF5}/lib +CFLAGS = -O2 +LDFLAGS = $(OPT) -fPIC -L. -L$(FUTILS)/lib -L${HDF5}/lib +LIBS = -lfutils pputils2.o -lhdf5_fortran -lhdf5 -lz + +.SUFFIXES: +.SUFFIXES: .o .c .f90 + +.f90.o: + $(F90) $(F90FLAGS) -c $< + +all: ex1 ex2 ex3 ex4 ex5 ex6 ex7 + +lib: libpputils2.a + +libpputils2.a: pputils2.o + xiar r $@ $? + ranlib $@ + +ex1: ex1.o + $(F90) $(LDFLAGS) -o $@ $< $(LIBS) + +ex2: ex2.o + $(F90) $(LDFLAGS) -o $@ $< $(LIBS) + +ex3: ex3.o + $(F90) $(LDFLAGS) -o $@ $< $(LIBS) + +ex4: ex4.o + $(F90) $(LDFLAGS) -o $@ $< $(LIBS) + +ex5: ex5.o + $(F90) $(LDFLAGS) -o $@ $< $(LIBS) + +ex6: ex6.o + $(F90) $(LDFLAGS) -o $@ $< $(LIBS) + +ex7: ex7.o + $(F90) $(LDFLAGS) -o $@ $< $(LIBS) + +tests: ex1 ex2 ex3 ex4 ex5 ex6 ex7 + @echo ==== Running ex1 ====== + @mpiexec -n 4 ./ex1 + @echo ==== Running ex2 ====== + @mpiexec -n 9 ./ex2 + @echo ==== Running ex3 ====== + @mpiexec -n 5 ./ex3 + @echo ==== Running ex4 ====== + @mpiexec -n 12 ./ex4 + @echo ==== Running ex5 ====== + @mpiexec -n 8 ./ex5 + @echo ==== Running ex6 ====== + @mpiexec -n 12 ./ex6 + @echo ==== Running ex7 ====== + @mpiexec -n 6 ./ex7 + +ex1.o: pputils2.o +ex2.o: pputils2.o +ex3.o: pputils2.o +ex4.o: pputils2.o +ex5.o: pputils2.o +ex6.o: pputils2.o +ex7.o: pputils2.o + +tags: + etags *.f90 $(FUTILS)/futils.f90 + +clean: + rm -f *.o *~ a.out +distclean: clean + rm -f ex1 ex2 ex3 ex4 ex5 ex6 ex7 *.h5 *.a *.mod diff --git a/pputils2/ex1.f90 b/pputils2/ex1.f90 new file mode 100644 index 0000000..46dcca6 --- /dev/null +++ b/pputils2/ex1.f90 @@ -0,0 +1,113 @@ +!> +!> @file ex1.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Tranpsose of 2d matrix partitionned on a 1d proc grid: +! - A(n1,n2/P1) -> AT(n2,n1/P1) -> B(n1,n2/P1) +! + USE pputils2 + USE futils + IMPLICIT NONE + INCLUDE "mpif.h" + CHARACTER(len=32) :: file='ex1.h5' + INTEGER :: fid +! + INTEGER, PARAMETER :: ndims=1 ! N. of dims of proc. grid + INTEGER :: ierr, me, npes + INTEGER, DIMENSION(ndims) :: dims, coords + LOGICAL :: periods(ndims), reorder + INTEGER :: cart +! + INTEGER :: n1, n2, n1p, n2p + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: a, atr, b + DOUBLE PRECISION :: x + INTEGER :: i, j, iglob, jglob, kerrors, nerrors +!================================================================================ +! +! Init MPI + CALL mpi_init(ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Create cartesian topololy + dims = npes + periods = (/.FALSE./) + reorder = .FALSE. + IF( PRODUCT(dims) .NE. npes ) THEN + IF( me .EQ. 0 ) THEN + PRINT*, PRODUCT(dims), " processors required!" + CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) + END IF + END IF + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) + CALL mpi_cart_coords(cart, me, ndims, coords, ierr) +! +! Define local array + n1p=2; n1=n1p*dims(1) + n2p=2; n2=n2p*dims(1) + ALLOCATE( a(n1,n2p), atr(n2,n1p), b(n1,n2p) ) + a = 0 + atr = 0 + b = 0 + DO i=1,n1 + DO j=1,n2p + jglob = coords(1)*n2p + j + a(i,j) = 10*i + jglob + END DO + END DO + IF( me.EQ. 0 ) THEN + WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2 + END IF +! +! Tranpose A(n1,n2/P1) -> AT(n2,n1/P1) -> B(n1,n2/P1) + CALL pptransp(cart, a, atr) + CALL pptransp(cart, atr, b) +! +! Check ATR + kerrors = 0 + DO i=1,n1p + iglob = coords(1)*n1p + i + DO j=1,n2 + x = 10*iglob + j + IF( x .NE. atr(j,i) ) kerrors = kerrors+1 + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking ATR', nerrors +! +! Write to file +! + CALL creatf(file, fid, mpicomm=cart) + CALL putarrnd(fid, '/arraya', a, (/2/) ) + CALL putarrnd(fid, '/arrayat', atr, (/2/) ) + CALL putarrnd(fid, '/arrayb', b, (/2/) ) +! +! Clean up and quit + DEALLOCATE(a, atr) + CALL closef(fid) + CALL mpi_finalize(ierr) +END PROGRAM main diff --git a/pputils2/ex2.f90 b/pputils2/ex2.f90 new file mode 100644 index 0000000..8b8d1c2 --- /dev/null +++ b/pputils2/ex2.f90 @@ -0,0 +1,170 @@ +!> +!> @file ex2.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Tranpsose of matrices partitionned on a 2d proc grid: +! - A(n1,n2/P1,n3/P2) -> AT(n3,n2/P1,n1/P2) +! - B(n1,n2,n3/P1,n4/P2) -> BT(n4,n2,n3/P1,n1/P2) +! + USE pputils2 + USE futils + IMPLICIT NONE + INCLUDE "mpif.h" + CHARACTER(len=32) :: file='ex2.h5' + INTEGER :: fid +! + INTEGER, PARAMETER :: ndims=2 ! N. of dims of proc. grid + INTEGER :: ierr, me, npes + INTEGER, DIMENSION(ndims) :: dims, coords + LOGICAL :: periods(ndims), reorder + INTEGER :: cart, cartcol, cartrow +! + INTEGER :: n1, n2, n3, n4, n1p, n2p, n3p, n4p + DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t + DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t + DOUBLE PRECISION :: x + INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors +!================================================================================ +! +! Init MPI + CALL mpi_init(ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Create cartesian topololy + dims = (/3, 3/) + periods = (/.FALSE., .TRUE./) + reorder = .FALSE. + IF( PRODUCT(dims) .NE. npes ) THEN + IF( me .EQ. 0 ) THEN + PRINT*, PRODUCT(dims), " processors required!" + CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) + END IF + END IF + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) + CALL mpi_cart_coords(cart, me, ndims, coords, ierr) + CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr) + CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr) +! +! Define local array A3 + n1p=2; n1=n1p*dims(2) + n2p=4; n2=n2p*dims(1) + n3p=3; n3=n3p*dims(2) + ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) ) + a3 = 0 + a3t = 0 + DO i=1,n1 + DO j=1,n2p + jglob = coords(1)*n2p + j + DO k=1,n3p + kglob = coords(2)*n3p + k + a3(i,j,k) = 10000*i + 100*jglob + kglob + END DO + END DO + END DO + IF( me .EQ. 0 ) THEN + WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2, n3 + END IF +! +! Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2) + CALL pptransp(cartrow, a3, a3t, 1, 3) +! +! Check A3T + kerrors = 0 + DO i=1,n1p + iglob = coords(2)*n1p + i + DO j=1,n2p + jglob = coords(1)*n2p + j + DO k=1,n3 + x = 10000*iglob + 100*jglob + k + IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1 + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors +! +! Define local array B4 + n1p=2; n1=n1p*dims(2) + n2p=4; n2=n2p*dims(1) + n3p=3; n3=n3p*dims(1) + n4p=3; n4=n4p*dims(2) + ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n4,n2,n3p,n1p) ) + b4 = 0 + b4t = 0 + DO i=1,n1 + DO j=1,n2 + DO k=1,n3p + kglob = coords(1)*n3p + k + DO l=1,n4p + lglob = coords(2)*n4p + l + b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob + END DO + END DO + END DO + END DO + IF( me.EQ. 0 ) THEN + WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4 + END IF +! +! Tranpose B4(n1,n2,n3/P1,n4/P2) -> B4T(n4,n2,n3/P1,n1/P2) + CALL pptransp(cartrow, b4, b4t, 1, 4) +! + CALL mpi_barrier(MPI_COMM_WORLD, ierr) +! +! Check B4T + kerrors = 0 + DO i=1,n1p + iglob = coords(2)*n1p + i + DO j=1,n2 + DO k=1,n3p + kglob = coords(1)*n3p + k + DO l=1,n4 + x = 1000000*iglob + 10000*j + 100*kglob + l + IF( x .NE. b4t(l,j,k,i) ) kerrors = kerrors+1 + END DO + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors +! +! Write to file +! + CALL creatf(file, fid, mpicomm=cart) + CALL putarrnd(fid, '/a3' , a3, (/2,3/) ) + CALL putarrnd(fid, '/a3t', a3t,(/2,3/) ) + CALL putarrnd(fid, '/b4' , b4, (/3,4/) ) + CALL putarrnd(fid, '/b4t', b4t,(/3,4/) ) + +! Clean up and quit + DEALLOCATE(a3, a3t) + DEALLOCATE(b4, b4t) + CALL closef(fid) + CALL mpi_finalize(ierr) +END PROGRAM main diff --git a/pputils2/ex3.f90 b/pputils2/ex3.f90 new file mode 100644 index 0000000..dabf46b --- /dev/null +++ b/pputils2/ex3.f90 @@ -0,0 +1,111 @@ +!> +!> @file ex3.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Tranpsose of 2d matrix partitionned on a 1d proc grid +! - A(n1,n2/P1) -> AT(n2,n1/P1) +! n1, n2 NOT REQUIRED to be divided evenly by NPES +! + USE pputils2 + USE futils + IMPLICIT NONE + INCLUDE "mpif.h" + CHARACTER(len=32) :: file='ex3.h5' + INTEGER :: fid +! + INTEGER, PARAMETER :: ndims=1 ! N. of dims of proc. grid + INTEGER :: ierr, me, npes + INTEGER, DIMENSION(ndims) :: dims, coords + LOGICAL :: periods(ndims), reorder + INTEGER :: cart +! + INTEGER :: n1=9, n2=8, s1, s2, n1p, n2p + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: a, atr + DOUBLE PRECISION :: x + INTEGER :: i, j, iglob, jglob, kerrors, nerrors +!================================================================================ +! +! Init MPI + CALL mpi_init(ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Create cartesian topololy + dims = npes + periods = (/.FALSE./) + reorder = .FALSE. + IF( PRODUCT(dims) .NE. npes ) THEN + IF( me .EQ. 0 ) THEN + PRINT*, PRODUCT(dims), " processors required!" + CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) + END IF + END IF + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) + CALL mpi_cart_coords(cart, me, ndims, coords, ierr) +! +! Partition array + CALL dist1d(cart, 0, n1, s1, n1p) + CALL dist1d(cart, 0, n2, s2, n2p) + ALLOCATE( a(n1,n2p), atr(n2,n1p) ) + a = 0 + atr = 0 + DO i=1,n1 + DO j=1,n2p + jglob = s2 + j + a(i,j) = 10*i + jglob + END DO + END DO + IF( me.EQ. 0 ) THEN + WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2 + END IF +! +! Tranpose A(n1,n2/P1) -> ATR(n2,n1/P1) + CALL pptransp(cart, a, atr) +! +! Check ATR + kerrors = 0 + DO i=1,n1p + iglob = s1 + i + DO j=1,n2 + x = 10*iglob + j + IF( x .NE. atr(j,i) ) kerrors = kerrors+1 + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking ATR', nerrors +! +! Write to file +! + CALL creatf(file, fid, mpicomm=cart) + CALL putarrnd(fid, '/arraya', a, (/2/) ) + CALL putarrnd(fid, '/arrayat', atr, (/2/) ) +! +! Clean up and quit + DEALLOCATE(a, atr) + CALL closef(fid) + CALL mpi_finalize(ierr) +END PROGRAM main diff --git a/pputils2/ex4.f90 b/pputils2/ex4.f90 new file mode 100644 index 0000000..1f75216 --- /dev/null +++ b/pputils2/ex4.f90 @@ -0,0 +1,171 @@ +!> +!> @file ex4.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Tranpsose of matrices partitionned on a 2d proc grid: +! - A(n1,n2/P1,n3/P2) -> AT(n3,n2/P1,n1/P2) +! - B(n1,n2,n3/P1,n4/P2) -> BT(n4,n2,n3/P1,n1/P2) +! n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES +! + USE pputils2 + USE futils + IMPLICIT NONE + INCLUDE "mpif.h" + CHARACTER(len=32) :: file='ex4.h5' + INTEGER :: fid +! + INTEGER, PARAMETER :: ndims=2 ! N. of dims of proc. grid + INTEGER :: ierr, me, npes + INTEGER, DIMENSION(ndims) :: dims, coords + LOGICAL :: periods(ndims), reorder + INTEGER :: cart, cartcol, cartrow +! + INTEGER :: n1=15, n2=10, n3=9, n4=8, n1p, n2p, n3p, n4p, s1, s2, s3, s4 + DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t + DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t + DOUBLE PRECISION :: x + INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors +!================================================================================ +! +! Init MPI + CALL mpi_init(ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Create cartesian topololy + dims = (/4, 3/) + periods = (/.FALSE., .TRUE./) + reorder = .FALSE. + IF( PRODUCT(dims) .NE. npes ) THEN + IF( me .EQ. 0 ) THEN + PRINT*, PRODUCT(dims), " processors required!" + CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) + END IF + END IF + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) + CALL mpi_cart_coords(cart, me, ndims, coords, ierr) + CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr) + CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr) +! +! Define local array A3 + CALL dist1d(cartrow, 0, n1, s1, n1p) + CALL dist1d(cartcol, 0, n2, s2, n2p) + CALL dist1d(cartrow, 0, n3, s3, n3p) + ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) ) + a3 = 0 + a3t = 0 + DO i=1,n1 + DO j=1,n2p + jglob = s2 + j + DO k=1,n3p + kglob = s3 + k + a3(i,j,k) = 10000*i + 100*jglob + kglob + END DO + END DO + END DO + IF( me.EQ. 0 ) THEN + WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2, n3 + END IF +! +! Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2) + CALL pptransp(cartrow, a3, a3t, 1, 3) +! +! Check A3T + kerrors = 0 + DO i=1,n1p + iglob = s1 + i + DO j=1,n2p + jglob = s2 + j + DO k=1,n3 + x = 10000*iglob + 100*jglob + k + IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1 + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors +! +! Define local array B4 + CALL dist1d(cartrow, 0, n1, s1, n1p) + CALL dist1d(cartcol, 0, n3, s3, n3p) + CALL dist1d(cartrow, 0, n4, s4, n4p) + ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n4,n2,n3p,n1p) ) + b4 = 0 + b4t = 0 + DO i=1,n1 + DO j=1,n2 + DO k=1,n3p + kglob = s3 + k + DO l=1,n4p + lglob = s4 + l + b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob + END DO + END DO + END DO + END DO + IF( me.EQ. 0 ) THEN + WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4 + END IF +! +! Tranpose B4(n1,n2,n3/P1,n4/P2) -> B4T(n4,n2,n3/P1,n1/P2) +!!$ CALL pptransp(cartrow, b4, b4t) + CALL pptransp(cartrow, b4, b4t, 1, 4) +! + CALL mpi_barrier(MPI_COMM_WORLD, ierr) +! +! Check B4T + kerrors = 0 + DO i=1,n1p + iglob = s1 + i + DO j=1,n2 + DO k=1,n3p + kglob = s3 + k + DO l=1,n4 + x = 1000000*iglob + 10000*j + 100*kglob + l + IF( x .NE. b4t(l,j,k,i) ) kerrors = kerrors+1 + END DO + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors +! +! Write to file +! + CALL creatf(file, fid, mpicomm=cart) + CALL putarrnd(fid, '/a3' , a3, (/2,3/) ) + CALL putarrnd(fid, '/a3t', a3t,(/2,3/) ) + CALL putarrnd(fid, '/b4' , b4, (/3,4/) ) + CALL putarrnd(fid, '/b4t', b4t,(/3,4/) ) + +! Clean up and quit + DEALLOCATE(a3, a3t) + DEALLOCATE(b4, b4t) + CALL closef(fid) + CALL mpi_finalize(ierr) +END PROGRAM main diff --git a/pputils2/ex5.f90 b/pputils2/ex5.f90 new file mode 100644 index 0000000..181a2dc --- /dev/null +++ b/pputils2/ex5.f90 @@ -0,0 +1,221 @@ +!> +!> @file ex5.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Tranpsose of matrices partitionned on a 2d proc grid: +! - A(n1,n2/P1,n3/P2) -> AT(n2,n1/P1,n3/P2) +! - B(n1,n2,n3/P1,n4/P2) -> BT(n3,n2,n1/P1,n4/P2) +! - C(n1,n2/P1,n3,n4/P2) -> CT(n2,n1/P1,n3,n4/P2) +! n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES +! + USE pputils2 + USE futils + IMPLICIT NONE + INCLUDE "mpif.h" + CHARACTER(len=32) :: file='ex4.h5' + INTEGER :: fid +! + INTEGER, PARAMETER :: ndims=2 ! N. of dims of proc. grid + INTEGER :: ierr, me, npes + INTEGER, DIMENSION(ndims) :: dims, coords + LOGICAL :: periods(ndims), reorder + INTEGER :: cart, cartcol, cartrow +! + INTEGER :: n1=8, n2=10, n3=6, n4=5, n1p, n2p, n3p, n4p, s1, s2, s3, s4 + DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t + DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t, c4, c4t + DOUBLE PRECISION :: x + INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors +!================================================================================ +! +! Init MPI + CALL mpi_init(ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Create cartesian topololy + dims = (/4, 2/) + periods = (/.FALSE., .TRUE./) + reorder = .FALSE. + IF( PRODUCT(dims) .NE. npes ) THEN + IF( me .EQ. 0 ) THEN + PRINT*, PRODUCT(dims), " processors required!" + CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) + END IF + END IF + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) + CALL mpi_cart_coords(cart, me, ndims, coords, ierr) + CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr) + CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr) +! +! Define local array A3 + CALL dist1d(cartcol, 0, n1, s1, n1p) + CALL dist1d(cartcol, 0, n2, s2, n2p) + CALL dist1d(cartrow, 0, n3, s3, n3p) + ALLOCATE( a3(n1,n2p,n3p), a3t(n2,n1p,n3p) ) + a3 = 0 + a3t = 0 + DO i=1,n1 + DO j=1,n2p + jglob = s2 + j + DO k=1,n3p + kglob = s3 + k + a3(i,j,k) = 10000*i + 100*jglob + kglob + END DO + END DO + END DO + IF( me .EQ. 0 ) THEN + WRITE(*,'(a,4i4)') 'Global dimension of matrix a', n1, n2, n3 + END IF +! +! Tranpose A(n1,n2/P1,n3/P2) -> AT(n2,n1/P1,n3/P2) + CALL pptransp(cartcol, a3, a3t, 1, 2) +! + CALL mpi_barrier(MPI_COMM_WORLD, ierr) +! +! Check A3T + kerrors = 0 + DO i=1,n1p + iglob = s1 + i + DO j=1,n2 + DO k=1,n3p + kglob = s3 + k + x = 10000*iglob + 100*j + kglob + IF( x .NE. a3t(j,i,k) ) kerrors = kerrors+1 + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors +! +! Define local array B4 + CALL dist1d(cartcol, 0, n1, s1, n1p) + CALL dist1d(cartcol, 0, n3, s3, n3p) + CALL dist1d(cartrow, 0, n4, s4, n4p) + ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n3,n2,n1p,n4p) ) + b4 = 0 + b4t = 0 + DO i=1,n1 + DO j=1,n2 + DO k=1,n3p + kglob = s3 + k + DO l=1,n4p + lglob = s4 + l + b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob + END DO + END DO + END DO + END DO + IF( me .EQ. 0 ) THEN + WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4 + END IF +! +! Tranpose B(n1,n2,n3/P1,n4/P2) -> BT(n3,n2,n1/P1,n4/P2) + CALL pptransp(cartcol, b4, b4t, 1, 3) +! + CALL mpi_barrier(MPI_COMM_WORLD, ierr) +! +! Check B4T + kerrors = 0 + DO i=1,n1p + iglob = s1 + i + DO j=1,n2 + DO k=1,n3 + DO l=1,n4p + lglob = s4 + l + x = 1000000*iglob + 10000*j + 100*k + lglob + IF( x .NE. b4t(k,j,i,l) ) kerrors = kerrors+1 + END DO + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors +! +! Define local array C4 + CALL dist1d(cartcol, 0, n1, s1, n1p) + CALL dist1d(cartcol, 0, n2, s2, n2p) + CALL dist1d(cartrow, 0, n4, s4, n4p) + ALLOCATE( c4(n1,n2p,n3,n4p), c4t(n2,n1p,n3,n4p) ) + c4 = 0 + c4t = 0 + DO i=1,n1 + DO j=1,n2p + jglob = s2 + j + DO k=1,n3 + DO l=1,n4p + lglob = s4 + l + c4(i,j,k,l) = 1000000*i + 10000*jglob + 100*k +lglob + END DO + END DO + END DO + END DO + IF( me.EQ. 0 ) THEN + WRITE(*,'(a,4i4)') 'Global dimension of matrix c', n1, n2, n3, n4 + END IF +! +! Tranpose C(n1,n2/P1,n3,n4/P2) -> CT(n2,n1/P1,n3,n4/P2) + CALL pptransp(cartcol, c4, c4t, 1, 2) +! + CALL mpi_barrier(MPI_COMM_WORLD, ierr) +! +! Check C4T + kerrors = 0 + DO i=1,n1p + iglob = s1 + i + DO j=1,n2 + DO k=1,n3 + DO l=1,n4p + lglob = s4 + l + x = 1000000*iglob + 10000*j + 100*k + lglob + IF( x .NE. c4t(j,i,k,l) ) kerrors = kerrors+1 + END DO + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking c4', nerrors +! +! Write to file +! + CALL creatf(file, fid, mpicomm=cart) + CALL putarrnd(fid, '/a3' , a3, (/2,3/) ) + CALL putarrnd(fid, '/a3t', a3t,(/2,3/) ) + CALL putarrnd(fid, '/b4' , b4, (/3,4/) ) + CALL putarrnd(fid, '/b4t', b4t,(/3,4/) ) + CALL putarrnd(fid, '/c4' , c4, (/2,4/) ) + CALL putarrnd(fid, '/c4t', c4t,(/2,4/) ) + +! Clean up and quit + DEALLOCATE(a3, a3t) + DEALLOCATE(b4, b4t) + DEALLOCATE(c4, c4t) + CALL closef(fid) + CALL mpi_finalize(ierr) +END PROGRAM main diff --git a/pputils2/ex6.f90 b/pputils2/ex6.f90 new file mode 100644 index 0000000..defba7c --- /dev/null +++ b/pputils2/ex6.f90 @@ -0,0 +1,270 @@ +!> +!> @file ex6.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Tranpsose of matrices partitionned on a 2d proc grid: +! - A(n1/P1,n2,n3/P2) -> AT(n1/P1,n3,n2/P2) +! - B(n1,n2,n3/P1,n4/P2) -> BT(n1,n3,n2/P1,n4/P2) +! - C(n1,n2,n3/P1,n4/P2) -> CT(n1,n4,n3/P1,n2/P2) +! - D(n1,n2/P1,n3,n4/P2) -> DT(n1,n2/P1,n4,n3/P2) +! n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES +! + USE pputils2 + USE futils + IMPLICIT NONE + INCLUDE "mpif.h" + CHARACTER(len=32) :: file='ex4.h5' + INTEGER :: fid +! + INTEGER, PARAMETER :: ndims=2 ! N. of dims of proc. grid + INTEGER :: ierr, me, npes + INTEGER, DIMENSION(ndims) :: dims, coords + LOGICAL :: periods(ndims), reorder + INTEGER :: cart, cartcol, cartrow +! + INTEGER :: n1=8, n2=10, n3=6, n4=5, n1p, n2p, n3p, n4p, s1, s2, s3, s4 + DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t + DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t, c4, c4t, d4, d4t + DOUBLE PRECISION :: x + INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors +!================================================================================ +! +! Init MPI + CALL mpi_init(ierr) + CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) + CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) +! +! Create cartesian topololy + dims = (/4, 3/) + periods = (/.FALSE., .TRUE./) + reorder = .FALSE. + IF( PRODUCT(dims) .NE. npes ) THEN + IF( me .EQ. 0 ) THEN + PRINT*, PRODUCT(dims), " processors required!" + CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) + END IF + END IF + CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr) + CALL mpi_cart_coords(cart, me, ndims, coords, ierr) + CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr) + CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr) +! +! Define local array A3 + CALL dist1d(cartcol, 0, n1, s1, n1p) + CALL dist1d(cartrow, 0, n2, s2, n2p) + CALL dist1d(cartrow, 0, n3, s3, n3p) + ALLOCATE( a3(n1p,n2,n3p), a3t(n1p,n3,n2p) ) + a3 = 0 + a3t = 0 + DO i=1,n1p + iglob = s1 + i + DO j=1,n2 + DO k=1,n3p + kglob = s3 + k + a3(i,j,k) = 10000*iglob + 100*j + kglob + END DO + END DO + END DO + IF( me .EQ. 0 ) THEN + WRITE(*,'(a,4i4)') 'Global dimension of matrix a', n1, n2, n3 + END IF +! +! Tranpose A(n1/P1,n2,n3/P2) -> AT(n1/P1,n3,n2/P2) + CALL pptransp(cartrow, a3, a3t, 2, 3) +! + CALL mpi_barrier(MPI_COMM_WORLD, ierr) +! +! Check A3T + kerrors = 0 + DO i=1,n1p + iglob = s1 + i + DO j=1,n2p + jglob = s2 + j + DO k=1,n3 + x = 10000*iglob + 100*jglob + k + IF( x .NE. a3t(i,k,j) ) kerrors = kerrors+1 + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors +! +! Define local array B4 + CALL dist1d(cartcol, 0, n2, s2, n2p) + CALL dist1d(cartcol, 0, n3, s3, n3p) + CALL dist1d(cartrow, 0, n4, s4, n4p) + ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n1,n3,n2p,n4p) ) + b4 = 0 + b4t = 0 + DO i=1,n1 + DO j=1,n2 + DO k=1,n3p + kglob = s3 + k + DO l=1,n4p + lglob = s4 + l + b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob + END DO + END DO + END DO + END DO + IF( me .EQ. 0 ) THEN + WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4 + END IF +! +! Tranpose B(n1,n2,n3/P1,n4/P2) -> BT(n1,n3,n2/P1,n4/P2) + CALL pptransp(cartcol, b4, b4t, 2, 3) +! + CALL mpi_barrier(MPI_COMM_WORLD, ierr) +! +! Check B4T + kerrors = 0 + DO i=1,n1 + DO j=1,n2p + jglob = s2 + j + DO k=1,n3 + DO l=1,n4p + lglob = s4 + l + x = 1000000*i + 10000*jglob + 100*k + lglob + IF( x .NE. b4t(i,k,j,l) ) kerrors = kerrors+1 + END DO + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors +! +! Define local array C4 + CALL dist1d(cartrow, 0, n2, s2, n2p) + CALL dist1d(cartcol, 0, n3, s3, n3p) + CALL dist1d(cartrow, 0, n4, s4, n4p) + ALLOCATE( c4(n1,n2,n3p,n4p), c4t(n1,n4,n3p,n2p) ) + c4 = 0 + c4t = 0 + DO i=1,n1 + DO j=1,n2 + DO k=1,n3p + kglob = s3 + k + DO l=1,n4p + lglob = s4 + l + c4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob + END DO + END DO + END DO + END DO + IF( me.EQ. 0 ) THEN + WRITE(*,'(a,4i4)') 'Global dimension of matrix c', n1, n2, n3, n4 + END IF +! +! Tranpose C(n1,n2,n3/P1,n4/P2) -> CT(n1,n4,n3/P1,n2/P2) + CALL pptransp(cartrow, c4, c4t, 2, 4) +! + CALL mpi_barrier(MPI_COMM_WORLD, ierr) +! +! Check C4T + kerrors = 0 + DO i=1,n1 + DO j=1,n2p + jglob = s2 + j + DO k=1,n3p + kglob = s3 + k + DO l=1,n4 + x = 1000000*i + 10000*jglob + 100*kglob + l + IF( x .NE. c4t(i,l,k,j) ) kerrors = kerrors+1 + END DO + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking c4', nerrors +! +! Define local array D4 + CALL dist1d(cartcol, 0, n2, s2, n2p) + CALL dist1d(cartrow, 0, n3, s3, n3p) + CALL dist1d(cartrow, 0, n4, s4, n4p) + ALLOCATE( d4(n1,n2p,n3,n4p), d4t(n1,n2p,n4,n3p) ) + d4 = 0 + d4t = 0 + DO i=1,n1 + DO j=1,n2p + jglob = s2 + j + DO k=1,n3 + DO l=1,n4p + lglob = s4 + l + d4(i,j,k,l) = 1000000*i + 10000*jglob + 100*k +lglob + END DO + END DO + END DO + END DO + IF( me.EQ. 0 ) THEN + WRITE(*,'(a,4i4)') 'Global dimension of matrix d', n1, n2, n3, n4 + END IF +! +! Tranpose D(n1,n2/P1,n3,n4/P2) -> DT(n1,n2/P1,n4,n3/P2) + CALL pptransp(cartrow, d4, d4t, 3, 4) +! + CALL mpi_barrier(MPI_COMM_WORLD, ierr) +! +! Check D4T + kerrors = 0 + DO i=1,n1 + DO j=1,n2p + jglob = s2 + j + DO k=1,n3p + kglob = s3 + k + DO l=1,n4 + x = 1000000*i + 10000*jglob + 100*kglob + l + IF( x .NE. d4t(i,j,l,k) ) kerrors = kerrors+1 + END DO + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & MPI_COMM_WORLD, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking d4', nerrors +! +! Write to file +! + CALL creatf(file, fid, mpicomm=cart) + CALL putarrnd(fid, '/a3' , a3, (/1,3/) ) + CALL putarrnd(fid, '/a3t', a3t,(/1,3/) ) + CALL putarrnd(fid, '/b4' , b4, (/3,4/) ) + CALL putarrnd(fid, '/b4t', b4t,(/3,4/) ) + CALL putarrnd(fid, '/c4' , c4, (/3,4/) ) + CALL putarrnd(fid, '/c4t', c4t,(/3,4/) ) + CALL putarrnd(fid, '/d4' , d4, (/2,4/) ) + CALL putarrnd(fid, '/d4t', d4t,(/2,4/) ) +! +! Clean up and quit + DEALLOCATE(a3, a3t) + DEALLOCATE(b4, b4t) + DEALLOCATE(c4, c4t) + DEALLOCATE(d4, d4t) + CALL closef(fid) + CALL mpi_finalize(ierr) +END PROGRAM main diff --git a/pputils2/ex7.f90 b/pputils2/ex7.f90 new file mode 100644 index 0000000..0247599 --- /dev/null +++ b/pputils2/ex7.f90 @@ -0,0 +1,160 @@ +!> +!> @file ex7.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +PROGRAM main +! +! Tranpsose of 3d matrices partitionned in 1 and 2 proc grid: +! - A(n1,n2,n3/P) -> AT(n3,n2,n1/P) +! - B(n1,n2/P1,n3/P2) -> BT(n3,n2/P1,n1/P2) +! n1, n2, n3 NOT REQUIRED to be divided evenly by P +! +USE pputils2 + IMPLICIT NONE + INCLUDE "mpif.h" + INTEGER :: ierr, me, npes, comm=MPI_COMM_WORLD + INTEGER :: n1=15, n2=10, n3=20, n1p, n2p, n3p, s1, s2,s3 + DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t + DOUBLE PRECISION :: x + INTEGER :: i, j, k, iglob, jglob, kglob, kerrors, nerrors +! + INTEGER, PARAMETER :: ndims=2 ! N. of dims of proc. grid + INTEGER, DIMENSION(ndims) :: dims, coords + LOGICAL :: periods(ndims), reorder + INTEGER :: cart, cartcol, cartrow +!================================================================================ +! +! Init MPI + CALL mpi_init(ierr) + CALL mpi_comm_size(comm, npes, ierr) + CALL mpi_comm_rank(comm, me, ierr) +! +!-------------------------------------------------------------------------------- +! +! 1D partition: +! +! Define local array A3 + CALL dist1d(comm, 0, n1, s1, n1p) + CALL dist1d(comm, 0, n3, s3, n3p) + ALLOCATE( a3(n1,n2,n3p), a3t(n3,n2,n1p) ) + a3 = 0 + a3t = 0 + DO i=1,n1 + DO j=1,n2 + DO k=1,n3p + kglob = s3 + k + a3(i,j,k) = 10000*i + 100*j + kglob + END DO + END DO + END DO + IF( me.EQ. 0 ) THEN + WRITE(*,'(a)') '*** 1D partition ***' + WRITE(*,'(a,3i4)') 'Global dimensions of matrix a', n1, n2, n3 + END IF +! +! Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2) + CALL pptransp(comm, a3, a3t, 1, 3) +! +! Check A3T + kerrors = 0 + DO i=1,n1p + iglob = s1 + i + DO j=1,n2 + DO k=1,n3 + x = 10000*iglob + 100*j + k + IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1 + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & comm, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors + DEALLOCATE(a3, a3t) +!-------------------------------------------------------------------------------- +! +! 2D partition: +! +! Create cartesian topololy + dims = (/2, 3/) + periods = (/.FALSE., .FALSE./) + reorder = .FALSE. + IF( PRODUCT(dims) .NE. npes ) THEN + IF( me .EQ. 0 ) THEN + PRINT*, PRODUCT(dims), " processors required!" + CALL mpi_abort(comm, -1, ierr) + END IF + END IF + CALL mpi_barrier(comm, ierr) +! + CALL mpi_cart_create(comm, ndims, dims, periods, reorder, cart, ierr) + CALL mpi_cart_coords(cart, me, ndims, coords, ierr) + CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr) + CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr) +! +! Define local array A3 + CALL dist1d(cartrow, 0, n1, s1, n1p) + CALL dist1d(cartcol, 0, n2, s2, n2p) + CALL dist1d(cartrow, 0, n3, s3, n3p) + ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) ) + a3 = 0 + a3t = 0 + DO i=1,n1 + DO j=1,n2p + jglob = s2 + j + DO k=1,n3p + kglob = s3 + k + a3(i,j,k) = 10000*i + 100*jglob + kglob + END DO + END DO + END DO + IF( me.EQ. 0 ) THEN + WRITE(*,'(a)') '*** 2D partition ***' + WRITE(*,'(a,3i4)') 'Global dimensions of matrix a', n1, n2, n3 + END IF +! +! Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2) + CALL pptransp(cartrow, a3, a3t, 1, 3) +! +! Check A3T + kerrors = 0 + DO i=1,n1p + iglob = s1 + i + DO j=1,n2p + jglob = s2 + j + DO k=1,n3 + x = 10000*iglob + 100*jglob + k + IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1 + END DO + END DO + END DO + CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, & + & comm, ierr) + IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors + DEALLOCATE(a3, a3t) +!-------------------------------------------------------------------------------- +! Epilogue +! + CALL mpi_finalize(ierr) +END PROGRAM main + diff --git a/pputils2/pptransp2.tpl b/pputils2/pptransp2.tpl new file mode 100644 index 0000000..ada1f71 --- /dev/null +++ b/pputils2/pptransp2.tpl @@ -0,0 +1,89 @@ +!> +!> @file pptransp2.tpl +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> + ! + INTEGER :: me, npes, i, j, istr, iend, ierr + INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr + INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists + INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets + INTEGER :: dims(lastdim), np(2), npmx(2) + INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount + INTEGER :: status(MPI_STATUS_SIZE) +!---------------------------------------------------------------------- +! 0. Prologue +! + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_comm_size(comm, npes, ierr) +! +! Determine send/receive proc. id + ALLOCATE(ids(npes), idr(npes)) + CALL partners(comm, ids, idr) +!---------------------------------------------------------------------- +! 1. Send/receive buffers +! +! Distribution of first and last partitionned dimensions + ALLOCATE(ndists(2,npes)) + ALLOCATE(offsets(2,0:npes)) + np(1) = SIZE(b,lastdim) ! Local first + np(2) = SIZE(a,lastdim) ! and last dimension + CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr) + offsets = 0 + DO i=1,npes + offsets(:,i) = offsets(:,i-1) + ndists(:,i) + END DO +! +! Allocate send and receive 1d buffers + npmx = MAXVAL(ndists,2) + bufsiz = npmx(1)*npmx(2) ! Maximum size of send/receive buffers + DO i=2,lastdim-1 + bufsiz = bufsiz * SIZE(a,i) + END DO + ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) ) +!---------------------------------------------------------------------- +! 2. Exchange blocks +! + DO i=1,npes + istr = offsets(1,ids(i)) + 1 ! Partition a along first dim + iend = offsets(1,ids(i)+1) + dims = SHAPE(a) + dims(1) = iend-istr+1 + scount = PRODUCT(dims) + s_buf(1:scount) = RESHAPE(a(istr:iend,:), (/scount/)) !*** dim dependant ***! + CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,& + & r_buf, bufsiz, mpitype, idr(i), i,& + & comm, status, ierr) + istr = offsets(2,idr(i)) + 1 ! Partition b along first dim + iend = offsets(2,idr(i)+1) + dims = SHAPE(b) + dims(1) = iend-istr+1 + b(istr:iend,:) = RESHAPE(r_buf, dims, order=(/lastdim, 1/)) !*** dim dependant ***! + END DO +!---------------------------------------------------------------------- +! 9. Epilogue +! + DEALLOCATE(ids, idr) + DEALLOCATE(ndists, offsets) + DEALLOCATE(s_buf, r_buf) +! diff --git a/pputils2/pptransp3.tpl b/pputils2/pptransp3.tpl new file mode 100644 index 0000000..ce817af --- /dev/null +++ b/pputils2/pptransp3.tpl @@ -0,0 +1,113 @@ +!> +!> @file pptransp3.tpl +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> + INTEGER :: me, npes, i, j, istr, iend, ierr + INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr + INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists + INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets + INTEGER :: dims(lastdim), np(2), npmx(2) + INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount + INTEGER :: status(MPI_STATUS_SIZE) +!---------------------------------------------------------------------- +! 0. Prologue +! + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_comm_size(comm, npes, ierr) +! +! Determine send/receive proc. id + ALLOCATE(ids(npes), idr(npes)) + CALL partners(comm, ids, idr) +!---------------------------------------------------------------------- +! 1. Send/receive buffers +! +! Distribution of dim1 and dim2 partitionned dimensions + ALLOCATE(ndists(2,npes)) + ALLOCATE(offsets(2,0:npes)) + np(1) = SIZE(b, dim2) ! Local first + np(2) = SIZE(a, dim2) ! and second dimension + CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr) + offsets = 0 + DO i=1,npes + offsets(:,i) = offsets(:,i-1) + ndists(:,i) + END DO +! +! Allocate send and receive 1d buffers + npmx = MAXVAL(ndists,2) + bufsiz = npmx(1)*npmx(2) ! Maximum size of send/receive buffers + DO i=1,lastdim + IF ( (i .NE. dim1) .AND. (i .NE. dim2) ) bufsiz = bufsiz * SIZE(a,i) + END DO + ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) ) +!---------------------------------------------------------------------- +! 2. Exchange blocks +! + IF ( (dim1 .EQ. 1) .AND. ( dim2 .EQ. 2 ) ) THEN !*** dim dependant ***! + recv_order = (/2,1,3/) !*** dim dependant ***! + ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***! + recv_order = (/3,2,1/) !*** dim dependant ***! + ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***! + recv_order = (/1,3,2/) !*** dim dependant ***! + ELSE + IF ( me .EQ. 0 ) THEN + WRITE(*, '(a,i4,a,i4,a)') 'pptransp3: Cannot handle case dim1 = ', dim1, ', dim2 = ', dim2, '!' + CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) + END IF + END IF +! + DO i=1,npes + istr = offsets(1,ids(i)) + 1 ! Partition a along dimension dim1 + iend = offsets(1,ids(i)+1) + dims = SHAPE(a) + dims(dim1) = iend-istr+1 + scount = PRODUCT(dims) + + IF (dim1 .EQ. 1) THEN !*** dim dependant ***! + s_buf(1:scount) = RESHAPE(a(istr:iend,:,:), (/scount/)) !*** dim dependant ***! + ELSE IF (dim1 .EQ. 2) THEN !*** dim dependant ***! + s_buf(1:scount) = RESHAPE(a(:,istr:iend,:), (/scount/)) !*** dim dependant ***! + END IF !*** dim dependant ***! + + CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,& + & r_buf, bufsiz, mpitype, idr(i), i,& + & comm, status, ierr) + istr = offsets(2,idr(i)) + 1 ! Partition b along dimension dim1 + iend = offsets(2,idr(i)+1) + dims = SHAPE(b) + dims(dim1) = iend-istr+1 + + IF (dim1 .EQ. 1) THEN !*** dim dependant ***! + b(istr:iend,:,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***! + ELSE IF (dim1 .EQ. 2) THEN !*** dim dependant ***! + b(:,istr:iend,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***! + END IF !*** dim dependant ***! + + END DO +!---------------------------------------------------------------------- +! 9. Epilogue +! + DEALLOCATE(ids, idr) + DEALLOCATE(ndists, offsets) + DEALLOCATE(s_buf, r_buf) +! diff --git a/pputils2/pptransp4.tpl b/pputils2/pptransp4.tpl new file mode 100644 index 0000000..cec2004 --- /dev/null +++ b/pputils2/pptransp4.tpl @@ -0,0 +1,122 @@ +!> +!> @file pptransp4.tpl +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> + INTEGER :: me, npes, i, j, istr, iend, ierr + INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr + INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists + INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets + INTEGER :: dims(lastdim), np(2), npmx(2) + INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount + INTEGER :: status(MPI_STATUS_SIZE) +!---------------------------------------------------------------------- +! 0. Prologue +! + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_comm_size(comm, npes, ierr) +! +! Determine send/receive proc. id + ALLOCATE(ids(npes), idr(npes)) + CALL partners(comm, ids, idr) +!---------------------------------------------------------------------- +! 1. Send/receive buffers +! +! Distribution of dim1 and dim2 partitionned dimensions + ALLOCATE(ndists(2,npes)) + ALLOCATE(offsets(2,0:npes)) + np(1) = SIZE(b, dim2) ! Local first + np(2) = SIZE(a, dim2) ! and second dimension + CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr) + offsets = 0 + DO i=1,npes + offsets(:,i) = offsets(:,i-1) + ndists(:,i) + END DO +! +! Allocate send and receive 1d buffers + npmx = MAXVAL(ndists,2) + bufsiz = npmx(1)*npmx(2) ! Maximum size of send/receive buffers + DO i=1,lastdim + IF ( (i .NE. dim1) .AND. (i .NE. dim2) ) bufsiz = bufsiz * SIZE(a,i) + END DO + ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) ) +!---------------------------------------------------------------------- +! 2. Exchange blocks +! + IF ( (dim1 .EQ. 1) .AND. (dim2 .EQ. 2) ) THEN !*** dim dependant ***! + recv_order = (/2,1,3,4/) !*** dim dependant ***! + ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***! + recv_order = (/3,2,1,4/) !*** dim dependant ***! + ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***! + recv_order = (/4,2,3,1/) !*** dim dependant ***! + ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***! + recv_order = (/1,3,2,4/) !*** dim dependant ***! + ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***! + recv_order = (/1,4,3,2/) !*** dim dependant ***! + ELSE IF ( (dim1 .EQ. 3) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***! + recv_order = (/1,2,4,3/) !*** dim dependant ***! + ELSE + IF ( me .EQ. 0 ) THEN + WRITE(*, '(a,i4,a,i4,a)') 'pptransp4: Cannot handle case dim1 = ', dim1, ', dim2 = ', dim2, '!' + CALL mpi_abort(MPI_COMM_WORLD, -1, ierr) + END IF + END IF +! + DO i=1,npes + istr = offsets(1,ids(i)) + 1 ! Partition a along dimension dim1 + iend = offsets(1,ids(i)+1) + dims = SHAPE(a) + dims(dim1) = iend-istr+1 + scount = PRODUCT(dims) + + IF (dim1 .EQ. 1) THEN !*** dim dependant ***! + s_buf(1:scount) = RESHAPE(a(istr:iend,:,:,:), (/scount/)) !*** dim dependant ***! + ELSE IF (dim1 .EQ. 2) THEN !*** dim dependant ***! + s_buf(1:scount) = RESHAPE(a(:,istr:iend,:,:), (/scount/)) !*** dim dependant ***! + ELSE IF (dim1 .EQ. 3) THEN !*** dim dependant ***! + s_buf(1:scount) = RESHAPE(a(:,:,istr:iend,:), (/scount/)) !*** dim dependant ***! + END IF !*** dim dependant ***! + + CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,& + & r_buf, bufsiz, mpitype, idr(i), i,& + & comm, status, ierr) + istr = offsets(2,idr(i)) + 1 ! Partition b along dimension dim1 + iend = offsets(2,idr(i)+1) + dims = SHAPE(b) + dims(dim1) = iend-istr+1 + + IF (dim1 .EQ. 1) THEN !*** dim dependant ***! + b(istr:iend,:,:,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***! + ELSE IF (dim1 .EQ. 2) THEN !*** dim dependant ***! + b(:,istr:iend,:,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***! + ELSE IF (dim1 .EQ. 3) THEN !*** dim dependant ***! + b(:,:,istr:iend,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***! + END IF !*** dim dependant ***! + END DO +!---------------------------------------------------------------------- +! 9. Epilogue +! + DEALLOCATE(ids, idr) + DEALLOCATE(ndists, offsets) + DEALLOCATE(s_buf, r_buf) +! diff --git a/pputils2/pputils2.f90 b/pputils2/pputils2.f90 new file mode 100644 index 0000000..348a14a --- /dev/null +++ b/pputils2/pputils2.f90 @@ -0,0 +1,456 @@ +!> +!> @file pputils2.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE pputils2 +! +! PPUTILS2: Some MPI utilities. +! +! T.M. Tran, CRPP-EPFL +! September 2010 +! September 2013: add exchange, norm2 +! November 2013: add timera, hostlist +! + USE iso_fortran_env, ONLY : rkind => real64 + USE mpi + IMPLICIT NONE + PRIVATE + PUBLIC :: pptransp, dist1d, exchange, ppnorm2, timera, hostlist +! + INTERFACE pptransp + MODULE PROCEDURE pptransp2_r, pptransp3_r, pptransp4_r + MODULE PROCEDURE pptransp2_c, pptransp3_c, pptransp4_c + END INTERFACE + INTERFACE exchange + MODULE PROCEDURE exchange_2d, exchange_2d_new + END INTERFACE exchange + INTERFACE ppnorm2 + MODULE PROCEDURE norm2_para_2d + END INTERFACE ppnorm2 +! +CONTAINS +!======================================================================= + SUBROUTINE pptransp2_r(comm, a, b) +! +! Handles double precision-type matrices. +! +! Transpose of rank 2 matrix A: +! A(n1,n2/P) -> B(n2,n1/P) +! + INTEGER, INTENT(in) :: comm + REAL(rkind), DIMENSION(:,:), INTENT(in) :: a !*** dim dependant ***! + REAL(rkind), DIMENSION(:,:), INTENT(out) :: b !*** dim dependant ***! + INTEGER, PARAMETER :: lastdim = 2, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***! + REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf +! + INCLUDE 'pptransp2.tpl' +! + END SUBROUTINE pptransp2_r +!======================================================================= + SUBROUTINE pptransp2_c(comm, a, b) +! +! Same as pptransp2_r, but for double complex-type matrices. +! + INTEGER, INTENT(in) :: comm + COMPLEX(rkind), DIMENSION(:,:), INTENT(in) :: a !*** dim dependant ***! + COMPLEX(rkind), DIMENSION(:,:), INTENT(out) :: b !*** dim dependant ***! + INTEGER, PARAMETER :: lastdim = 2, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***! + COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf +! + INCLUDE 'pptransp2.tpl' +! + END SUBROUTINE pptransp2_c +!======================================================================= + SUBROUTINE pptransp3_r(comm, a, b, dim1, dim2) +! +! Handles double precision-type matrices. +! +! Transpose dimensions dim1 and dim2 of rank 3 matrix A. +! dim1 and dim2 are such that 1 <= dim1 < dim2 <= 3. +! At input, matrix A is partitioned along dimension dim2 of matrix A. +! At exit, B = transpose(A), and B is partitioned along dimension dim1 of matrix A. +! +! For example: +! dim1 = 1, dim2 = 2 : A(n1,n2/P,n3) -> B(n2,n1/P,n3) +! dim1 = 1, dim2 = 3 : A(n1,n2,n3/P) -> B(n3,n2,n1/P) +! dim1 = 2, dim2 = 3 : A(n1,n2,n3/P) -> B(n1,n3,n2/P) +! + INTEGER, INTENT(in) :: comm + REAL(rkind), DIMENSION(:,:,:), INTENT(in) :: a !*** dim dependant ***! + REAL(rkind), DIMENSION(:,:,:), INTENT(out) :: b !*** dim dependant ***! + INTEGER, INTENT(in) :: dim1, dim2 + INTEGER, PARAMETER :: lastdim = 3, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***! + REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf + INTEGER :: recv_order(lastdim) + ! + INCLUDE 'pptransp3.tpl' + ! + END SUBROUTINE pptransp3_r +!======================================================================= + SUBROUTINE pptransp3_c(comm, a, b, dim1, dim2) +! +! Same as pptransp3_r, but for double complex-type matrices. +! + INTEGER, INTENT(in) :: comm + COMPLEX(rkind), DIMENSION(:,:,:), INTENT(in) :: a !*** dim dependant ***! + COMPLEX(rkind), DIMENSION(:,:,:), INTENT(out) :: b !*** dim dependant ***! + INTEGER, INTENT(in) :: dim1, dim2 + INTEGER, PARAMETER :: lastdim = 3, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***! + COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf + INTEGER :: recv_order(lastdim) + ! + INCLUDE 'pptransp3.tpl' + ! + END SUBROUTINE pptransp3_c +!======================================================================= + SUBROUTINE pptransp4_r(comm, a, b, dim1, dim2) +! +! Handles double precision-type matrices. +! +! Transpose dimensions dim1 and dim2 of rank 4 matrix A. +! dim1 and dim2 are such that 1 <= dim1 < dim2 <= 4. +! At input, matrix A is partitioned along dimension dim2 of matrix A. +! At exit, B = transpose(A), and B is partitioned along dimension dim1 of matrix A. +! +! For example: +! dim1 = 1, dim2 = 2 : A(n1,n2/P,n3 ,n4 ) -> B(n2,n1/P,n3 ,n4 ) +! dim1 = 1, dim2 = 3 : A(n1,n2 ,n3/P,n4 ) -> B(n3,n2 ,n1/P,n4 ) +! dim1 = 1, dim2 = 4 : A(n1,n2 ,n3 ,n4/P) -> B(n4,n2 ,n3 ,n1/P) +! dim1 = 2, dim2 = 3 : A(n1,n2 ,n3/P,n4 ) -> B(n1,n3 ,n2/P,n4 ) +! dim1 = 2, dim2 = 4 : A(n1,n2 ,n3 ,n4/P) -> B(n1,n4 ,n3 ,n2/P) +! dim1 = 3, dim2 = 4 : A(n1,n2 ,n3 ,n4/P) -> B(n1,n2 ,n4 ,n3/P) +! + INTEGER, INTENT(in) :: comm + REAL(rkind), DIMENSION(:,:,:,:), INTENT(in ) :: a !*** dim dependant ***! + REAL(rkind), DIMENSION(:,:,:,:), INTENT(out) :: b !*** dim dependant ***! + INTEGER, INTENT(in) :: dim1, dim2 + INTEGER, PARAMETER :: lastdim = 4, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***! + REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf + INTEGER :: recv_order(lastdim) +! + INCLUDE 'pptransp4.tpl' +! + END SUBROUTINE pptransp4_r +!======================================================================= + SUBROUTINE pptransp4_c(comm, a, b, dim1, dim2) +! +! Same as pptransp4_r, but for double complex-type matrices +! + INTEGER, INTENT(in) :: comm + COMPLEX(rkind), DIMENSION(:,:,:,:), INTENT(in) :: a !*** dim dependant ***! + COMPLEX(rkind), DIMENSION(:,:,:,:), INTENT(out) :: b !*** dim dependant ***! + INTEGER, INTENT(in) :: dim1, dim2 + INTEGER, PARAMETER :: lastdim = 4, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***! + COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf + INTEGER :: recv_order(lastdim) +! + INCLUDE 'pptransp4.tpl' +! + END SUBROUTINE pptransp4_c +!======================================================================= + SUBROUTINE dist1d(comm, s0, ntot, s, nloc) +! +! 1d distribute ntot elements, returns offset s and local number of +! elements nloc. +! + INTEGER, INTENT(in) :: s0, ntot + INTEGER, INTENT(out) :: s, nloc + INTEGER :: comm, me, npes, ierr, naver, rem +! + CALL MPI_COMM_SIZE(comm, npes, ierr) + CALL MPI_COMM_RANK(comm, me, ierr) + naver = ntot/npes + rem = MODULO(ntot,npes) + s = s0 + MIN(rem,me) + me*naver + nloc = naver + IF( me.LT.rem ) nloc = nloc+1 +! + END SUBROUTINE dist1d +!======================================================================= + SUBROUTINE exchange_2d_new(comm, u, garea) +! +! Exhange ghost cells with (west,east,south,north) neighbors. +! Assume same ghost cells on each dimension: +! garea(1) : number of ghost cells on west and east boundaries +! garea(2) : number of ghost cells on south and north boundaries +! Both are equal to 1 by default. +! + INTEGER, INTENT(in) :: comm + REAL(rkind), ALLOCATABLE, INTENT(inout) :: u(:,:) + INTEGER, OPTIONAL, INTENT(in) :: garea(2) + INTEGER :: neighs(4), ierr +! + CALL mpi_cart_shift(comm, 0, 1, neighs(1), neighs(2), ierr) + CALL mpi_cart_shift(comm, 1, 1, neighs(3), neighs(4), ierr) + CALL exchange_2d(comm, neighs, u, garea) + END SUBROUTINE exchange_2d_new +!======================================================================= + SUBROUTINE exchange_2d(comm, neighs, u, garea) +! +! Exhange ghost cells with (west,east,south,north) neighbors. +! Assume same ghost cells on each dimension: +! garea(1) : number of ghost cells on west and east boundaries +! garea(2) : number of ghost cells on south and north boundaries +! Both are equal to 1 by default. +! + INTEGER, INTENT(in) :: comm + INTEGER, INTENT(in) :: neighs(4) + REAL(rkind), ALLOCATABLE, INTENT(inout) :: u(:,:) + INTEGER, OPTIONAL, INTENT(in) :: garea(2) +! + INTEGER :: cols, rows + INTEGER :: ierr + INTEGER, PARAMETER :: ndim=2 + INTEGER, DIMENSION(ndim) :: g, lb, ub, s, e, n +! + g = [1,1] + IF(PRESENT(garea)) g = garea + lb = LBOUND(u) + ub = UBOUND(u) + s = lb + g + e = ub - g + n = ub - lb + 1 ! include ghost cells +! +! g(2) matrix full rows with stride n(1) + CALL mpi_type_vector(n(2), g(2), n(1), MPI_DOUBLE_PRECISION, rows, ierr) + CALL mpi_type_commit(rows, ierr) +! +! g(1) contiguous matrix full columns + CALL mpi_type_contiguous(n(1)*g(1), MPI_DOUBLE_PRECISION, cols, ierr) + CALL mpi_type_commit(cols, ierr) +! +! Exchange along first dimension + CALL mpi_sendrecv(u(s(1), lb(2)), 1, rows, neighs(1), 0, & + & u(e(1)+1,lb(2)), 1, rows, neighs(2), 0, & + & comm, MPI_STATUS_IGNORE, ierr) + CALL mpi_sendrecv(u(e(1)-g(1)+1,lb(2)), 1, rows, neighs(2), 0, & + & u(lb(1), lb(2)), 1, rows, neighs(1), 0, & + & comm, MPI_STATUS_IGNORE, ierr) +! +! Exchange along second dimension + CALL mpi_sendrecv(u(lb(1),s(2)), 1, cols, neighs(3), 0, & + & u(lb(1),e(2)+1), 1, cols, neighs(4), 0, & + & comm, MPI_STATUS_IGNORE, ierr) + CALL mpi_sendrecv(u(lb(1),e(2)-g(2)+1), 1, cols, neighs(4), 0, & + & u(lb(1),lb(2)), 1, cols, neighs(3), 0, & + & comm, MPI_STATUS_IGNORE, ierr) + END SUBROUTINE exchange_2d +!======================================================================= + FUNCTION norm2_para_2d(x, comm, root, garea) RESULT(res) +! +! Vector norm of 2d distributed array with ghost cells +! + USE mpi + REAL(rkind), ALLOCATABLE, INTENT(in) :: x(:,:) + INTEGER, INTENT(in) :: comm + INTEGER, INTENT(in), OPTIONAL :: root + INTEGER, INTENT(in), OPTIONAL :: garea(:) + REAL(rkind) :: res + INTEGER, PARAMETER :: ndim=2 + INTEGER, DIMENSION(ndim) :: g, s, e + REAL(rkind) :: res_loc + INTEGER :: r, me, ierr +! + CALL mpi_comm_rank(comm, me, ierr) + g = [1,1] + IF(PRESENT(garea)) g = garea + r = 0 + IF(PRESENT(root)) r = root + s = LBOUND(x) + g + e = UBOUND(x) - g + res_loc = SUM(x(s(1):e(1),s(2):e(2))**2) + CALL mpi_reduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM, r, comm, ierr) + if(me.eq.r) res = SQRT(res) + END FUNCTION norm2_para_2d +!======================================================================= + SUBROUTINE timera(cntrl, str, eltime, comm) +! +! Timers (cntrl=0/1 to Init/Update) +! + USE mpi + INTEGER, INTENT(in) :: cntrl + CHARACTER(len=*), INTENT(in) :: str + DOUBLE PRECISION, OPTIONAL, INTENT(out) :: eltime + INTEGER, OPTIONAL, INTENT(in) :: comm +! + INTEGER, PARAMETER :: ncmax=128, maxlen=32 +! + INTEGER, SAVE :: icall=0, nc=0 + DOUBLE PRECISION, SAVE :: startt0=0.0 + DOUBLE PRECISION, DIMENSION(ncmax), SAVE :: startt = 0.0, endt = 0.0 + CHARACTER(len=maxlen), SAVE :: which(ncmax) +! + DOUBLE PRECISION, DIMENSION(ncmax) :: endtmin, endtmax + INTEGER :: comm0, me, lstr, found, i, ierr +!________________________________________________________________________________ + IF(PRESENT(comm)) THEN + comm0 = comm + ELSE + comm0 = MPI_COMM_WORLD + END IF + CALL mpi_comm_rank(comm0, me, ierr) + CALL mpi_barrier(comm0, ierr) +!________________________________________________________________________________ +! + IF( icall .EQ. 0 ) THEN + icall = icall+1 + startt0 = mpi_wtime() + END IF + + lstr = MIN(LEN_TRIM(str),maxlen) + IF( lstr .GT. 0 ) found = loc(str) +!________________________________________________________________________________ +! + SELECT CASE (cntrl) +! + CASE(-1) ! Current wall time + IF( PRESENT(eltime) ) THEN + eltime = mpi_wtime() - startt0 + ELSE IF (me .EQ. 0 ) THEN + WRITE(*,'(/a,a,1pe10.3/)') "++ ", ' Wall time used so far = ', & + & mpi_wtime() - startt0 + END IF +! + CASE(0) ! Init Timer + IF( found .EQ. 0 ) THEN ! Called for the 1st time for 'str' + nc = nc+1 + which(nc) = str(1:lstr) + found = nc + END IF + startt(found) = mpi_wtime() +! + CASE(1) ! Update timer + endt(found) = mpi_wtime() - startt(found) + IF( PRESENT(eltime) ) THEN + eltime = endt(found) + ELSE IF (me .EQ. 0 ) THEN + WRITE(*,'(/a,a,1pe10.3/)') "++ "//str, ' wall clock time = ', & + & endt(found) + END IF +! + CASE(2) ! Update and reset timer + endt(found) = endt(found) + mpi_wtime() - startt(found) + startt(found) = mpi_wtime() + IF( PRESENT(eltime) ) THEN + eltime = endt(found) + END IF +! + CASE(9) ! Display all timers + IF( nc .GT. 0 ) THEN + CALL mpi_reduce(endt, endtmin, nc, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm0, ierr) + CALL mpi_reduce(endt, endtmax, nc, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm0, ierr) + IF( me .EQ. 0 ) THEN + WRITE(*,'(a)') "Minmax Timer Summary" + WRITE(*,'(a)') "====================" + DO i=1,nc + WRITE(*,'(a20,2x,2(1pe12.3))') TRIM(which(i))//":", endtmin(i), endtmax(i) + END DO + END IF + END IF +! + END SELECT +! + CONTAINS + INTEGER FUNCTION loc(str) + CHARACTER(len=*), INTENT(in) :: str + INTEGER :: i, ind + loc = 0 + DO i=1,nc + ind = INDEX(which(i), str(1:lstr)) + IF( ind .GT. 0 .AND. LEN_TRIM(which(i)) .EQ. lstr ) THEN + loc = i + EXIT + END IF + END DO + END FUNCTION loc + END SUBROUTINE timera +!======================================================================= + SUBROUTINE hostlist(comm) +! +! Print list of hostnames in comm +! + USE mpi + INTEGER, OPTIONAL, INTENT(in) :: comm +! + CHARACTER(len=MPI_MAX_PROCESSOR_NAME) :: procname + CHARACTER(len=MPI_MAX_PROCESSOR_NAME), ALLOCATABLE :: procnames(:) + INTEGER :: comm0, me, nprocs, ierr, i, l +! + IF(PRESENT(comm)) THEN + comm0 = comm + ELSE + comm0 = mpi_comm_world + END IF + CALL MPI_COMM_RANK(comm0, me, ierr) + CALL MPI_COMM_SIZE(comm0, nprocs, ierr) + CALL MPI_GET_PROCESSOR_NAME(procname, l, ierr) + ALLOCATE(procnames(0:nprocs-1)) + CALL mpi_gather(procname,MPI_MAX_PROCESSOR_NAME,mpi_character, & + & procnames,MPI_MAX_PROCESSOR_NAME,mpi_character,0, & + & comm0,ierr) + IF(me.EQ.0) THEN + WRITE(*,'(a/(10(1x,a)))') 'Host list:', & + & (TRIM(procnames(i)),i=0,nprocs-1) + END IF + END SUBROUTINE hostlist +!======================================================================= +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Private routines/functions ! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + SUBROUTINE partners(comm, ids, idr) +! +! Compute ranks of send and receive procs. +! + IMPLICIT NONE + INTEGER, INTENT(in) :: comm + INTEGER, INTENT(out) :: ids(:), idr(:) + INTEGER :: me, npes, ierr, i +! + CALL mpi_comm_rank(comm, me, ierr) + CALL mpi_comm_size(comm, npes, ierr) + IF( ispower2(npes) ) THEN + DO i=0,npes-1 + ids(i+1) = IEOR(me, i) + idr(i+1) = ids(i+1) + END DO + ELSE + DO i=0,npes-1 + ids(i+1) = MODULO(me+i, npes) + idr(i+1) = MODULO(me-i, npes) + END DO + END IF + END SUBROUTINE partners +!======================================================================= + LOGICAL FUNCTION ispower2(n) + INTEGER, INTENT(in) :: n + INTEGER :: l + l=2 + DO WHILE ( l .LT. n ) + l = 2*l + END DO + ispower2 = l .EQ. n + END FUNCTION ispower2 +!======================================================================= +END MODULE pputils2 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..6c6dac0 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,111 @@ +/** + * @file CMakeLists.txt + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + * @author Trach-Minh Tran + */ +project(bsplines_src) + +set(SRCS + bsplines.f90 + matrix.f90 + sparse_mod.f90 + lapack_extra.f + math_util.f90 +) + +set(SRCS_PP + conmat.f90 + ) + +set(PUBLIC_MODULES + bsplines.mod + matrix.mod + math_util.mod + conmat_mod.mod + sparse.mod + ) + +if(HAS_PARDISO) + list(APPEND ${SRCS_PP} pardiso_mod.f90) +endif() + +set_property(SOURCE conmat.f90 APPEND PROPERTY COMPILE_OPTIONS -DWSMP ${MKL_DEFINITIONS}) + +if(HAS_MUMPS) + list(APPEND SRCS + multigrid_mod.f90 + ) + + list(APPEND SRCS_PP + mumps_mod.f90 + csr_mod.f90 + cds_mod.f90 + ) + + list(APPEND PUBLIC_MODULES + cds.mod + csr.mod + mumps_bsplines.mod) + + set_property(SOURCE conmat.f90 APPEND PROPERTY COMPILE_OPTIONS -DMUMPS) +endif() + +set(_public_headers) +foreach(_modules ${PUBLIC_MODULES}) + list(APPEND _public_headers ${CMAKE_CURRENT_BINARY_DIR}/${_modules}) +endforeach() +set_property(SOURCE ${SRCS_PP} APPEND PROPERTY COMPILE_OPTIONS -cpp) + +include(GNUInstallDirs) + +add_library(bsplines STATIC ${SRCS} ${SRCS_PP}) +target_include_directories(bsplines + PRIVATE $ + ${MUMPS_INCLUDE_DIR} + INTERFACE $ + $ + ) + +set_property(TARGET bsplines + PROPERTY PUBLIC_HEADER ${_public_headers}) + +target_link_libraries(bsplines + PUBLIC futils pppack pputils2 fft + ${BLAS_LIBRARIES} + ${MUMPS_LIBRARIES} + ${LAPACK_LIBRARIES} + ) + + +if(MKL_Fortran_FLAGS) + separate_arguments(MKL_Fortran_FLAGS) + target_compile_options(bsplines PUBLIC ${MKL_Fortran_FLAGS}) + target_link_options(bsplines PUBLIC ${MKL_Fortran_FLAGS}) +endif() + +install(TARGETS bsplines + EXPORT ${BSPLINES_EXPORT_TARGETS} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..229a18e --- /dev/null +++ b/src/Makefile @@ -0,0 +1,176 @@ +# +# @file Makefile +# +# @brief +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Stephan Brunner +# @author Sébastien Jolliet +# @author Trach-Minh Tran +# +PREFIX=/usr/local/crpp +FUTILS=$(PREFIX)/futils +PPPACK=../pppack +PPUTILS2=../pputils2 + +MPIF90 = mpif90 +F90 = mpif90 +LD = $(MPIF90) + +debug = -g -traceback -check bounds -fpe0 -warn alignments -warn unused +debug = -g -traceback -check bounds -fpe0 -warn alignments +optim = -O3 -xHOST + +#OPT=$(debug) +OPT=$(optim) + +F90FLAGS = $(OPT) -fPIC -I. -I$(FUTILS)/include + +CC = cc +CFLAGS = -O2 + +SPL_OBJS = bsplines.o matrix.o sparse_mod.o pardiso_mod.o \ + lapack_extra.o conmat.o math_util.o + +ifdef MKL +SPBLAS = -DMKL +endif + +ifdef MUMPS +SPL_OBJS += mumps_mod.o csr_mod.o cds_mod.o multigrid_mod.o +F90FLAGS += -I$(MUMPS)/include +endif + +ifdef WSMP +SPL_OBJS += wsmp_mod.o pwsmp_mod.o +endif + +ifdef PETSC_DIR +SPL_OBJS += petsc_mod.o +FCCPFLAGS = -I$(PETSC_DIR)/include -I$(PETSC_DIR)/$(PETSC_ARCH)/include +endif + +.SUFFIXES: +.SUFFIXES: .o .c .f90 .f .F90 + +.f90.o: + $(MPIF90) $(F90FLAGS) -c $< +.F90.o: + $(MPIF90) $(F90FLAGS) $(FCCPFLAGS) -c $< +.f.o: + $(F90) $(F90FLAGS) -c $< + +SUBDIRS = pputils2 pppack fft +subdirs: $(SUBDIRS) +.PHONY: subdirs $(SUBSDIRS) $(PPUTILS2) + +$(SUBDIRS): + $(MAKE) "OPT=$(OPT)" -C ../$@ lib + +lib: subdirs libbsplines.a + cp -p $(PPPACK)/libpppack.a ./ + touch lib + cp -p lib ../examples + +libbsplines.a: $(SPL_OBJS) + xiar r $@ $? + ranlib $@ + +debug: + make clean + make "OPT=$(debug)" lib + mkdir -p .g + cp -p libbsplines.a $(PPPACK)/libpppack.a *.mod .g/ + +opt: + make clean + make "OPT=$(optim)" lib + mkdir -p $(PREFIX)/{lib,include}/O + mkdir -p .O + cp -p libbsplines.a $(PPPACK)/libpppack.a *.mod .O/ + +install: debug opt + mkdir -p $(PREFIX)/{lib,include}/g + mv .g/*.a $(PREFIX)/lib/g/ + mv .g/*.mod $(PREFIX)/include/g/ + mkdir -p $(PREFIX)/{lib,include}/O + mv .O/*.a $(PREFIX)/lib/O/ + mv .O/*.mod $(PREFIX)/include/O/ + + +uninstall: + rm -f $(PREFIX)/include/{O,g}/bsplines.mod \ + $(PREFIX)/include/{O,g}/cds.mod \ + $(PREFIX)/include/{O,g}/conmat_mod.mod \ + $(PREFIX)/include/{O,g}/csr.mod \ + $(PREFIX)/include/{O,g}/math_util.mod \ + $(PREFIX)/include/{O,g}/matrix.mod \ + $(PREFIX)/include/{O,g}/multigrid.mod \ + $(PREFIX)/include/{O,g}/mumps_bsplines.mod \ + $(PREFIX)/include/{O,g}/pardiso_bsplines.mod \ + $(PREFIX)/include/{O,g}/petsc_bsplines.mod \ + $(PREFIX)/include/{O,g}/sparse.mod \ + $(PREFIX)/include/{O,g}/wsmp_bsplines.mod \ + $(PREFIX)/lib/{O,g}/libbsplines.a \ + $(PREFIX)/lib/{O,g}/libpppack.a + +matrix.o: matrix.f90 +sparse_mod.o: sparse_mod.f90 +bsplines.o: bsplines.f90 matrix.o +multigrid_mod.o: bsplines.o matrix.o conmat.o csr_mod.o cds_mod.o + +conmat.o: conmat.f90 conmat.tpl conmat_1d.tpl zconmat.tpl zconmat_1d.tpl conrhs.tpl + $(F90) -fpp -DMKL -DWSMP -DMUMPS $(F90FLAGS) -c conmat.f90 + +cds_mod.o: cds_mod.f90 + $(F90) -fpp $(SPBLAS) $(F90FLAGS) -c cds_mod.f90 + +pardiso_mod.o: pardiso_mod.f90 sparse_mod.o psum_mat.tpl p2p_mat.tpl + $(F90) -fpp $(SPBLAS) $(F90FLAGS) -c pardiso_mod.f90 + +mumps_mod.o:mumps_mod.f90 sparse_mod.o psum_mat.tpl p2p_mat.tpl + $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c mumps_mod.f90 + +wsmp_mod.o: wsmp_mod.f90 sparse_mod.o psum_mat.tpl p2p_mat.tpl + $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c wsmp_mod.f90 + +pwsmp_mod.o: pwsmp_mod.f90 sparse_mod.o wsmp_mod.o psum_mat.tpl p2p_mat.tpl + $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c pwsmp_mod.f90 + +petsc_mod.o: petsc_mod.F90 sparse_mod.o + $(F90) -fpp $(FCCPFLAGS) $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c petsc_mod.F90 + +csr_mod.o: csr_mod.f90 sparse_mod.o mumps_mod.o + $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c csr_mod.f90 + +tags: + etags *.f *.f90 $(PPPACK)/*.f90 + +clean: + $(MAKE) -C $(PPPACK) clean + $(MAKE) -C ../fft clean + rm -f *.o *.mod *~ a.out + +distclean: clean + $(MAKE) -C $(PPPACK) distclean + $(MAKE) -C ../fft distclean + $(MAKE) -C $(PPUTILS2) distclean + rm -f lib *.a *.mod ../bin/* + rm -rf .O .g diff --git a/src/bsplines.f90 b/src/bsplines.f90 new file mode 100644 index 0000000..1feff79 --- /dev/null +++ b/src/bsplines.f90 @@ -0,0 +1,4285 @@ +!> +!> @file bsplines.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Stephan Brunner +!> @author Trach-Minh Tran +!> +MODULE bsplines +! +! BSPLINES: A module to construct B-Splines of any order on +! non-equidistant mesh. Can be used for interpolation and +! Finite Element discretization. +! +! T.M. Tran, S. Brunner, CRPP-EPFL +! February 2007 +! + USE matrix + IMPLICIT NONE + PRIVATE + PUBLIC :: spline1d, set_spline, get_dim, get_gauss, gridval + PUBLIC :: spline2d, spline2d1d, def_knots, allsplines + PUBLIC :: set_splcoef, get_splcoef + PUBLIC :: fintg, calc_integ, destroy_sp + PUBLIC :: gauleg, CompMassMatrix + PUBLIC :: basfun_recur, basfun, def_basfun, is_equid, locintv_old, locintv + PUBLIC :: calc_fftmass, calc_fftmass_old + PUBLIC :: init_dft, ft_basfun + PUBLIC :: getgrad + PUBLIC :: dftmap +! + DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0 +! + TYPE dftmap + INTEGER :: n ! Total number of modes + INTEGER :: kmin, kmax ! Define the Foyrier window + INTEGER :: dk ! Number of modes in window + DOUBLE PRECISION :: dx ! Interval in real space + INTEGER, POINTER :: mode_couplings(:) => NULL() ! Table of mode couplings + DOUBLE COMPLEX, POINTER :: coefs(:,:) => NULL() ! The restricted Fourier coefs + END TYPE dftmap +! + TYPE spline1d + INTEGER :: order ! Spline order = spline degree + 1 + INTEGER :: nints ! Number of knot intervals + INTEGER :: nsites ! Number of interpolation sites + INTEGER :: dim ! Dimension of spline space + INTEGER :: left=0 ! Save value used by routine LOCATE + LOGICAL :: period ! is the grid periodic ? + LOGICAL :: nlequid ! is the grid equidistant ? + LOGICAL :: nlppform ! Construct and build PPFORM in GRIDVAL if .TRUE. + DOUBLE PRECISION :: lperiod ! Periodicity + DOUBLE PRECISION :: hinv ! Inverse of mesh size, when nlequid=T + INTEGER, POINTER ::& + & fmap(:) => NULL() ! Mapping of fine to coarse mesh + DOUBLE PRECISION, POINTER :: & + & knots(:) => NULL(), & ! Spline knots (-p:n+p) + & val0(:,:,:) => NULL(), & ! Values and deriv. at left boundary of all splines + & valc(:,:) => NULL(), & ! Values and deriv. at left boundary of equid. per. splines + & gausx(:,:) => NULL(), & ! Gauss abscissas + & gausw(:,:) => NULL(), & ! Gauss weights + & intspl(:) => NULL(), & ! Integral of splines + & ppform(:,:) => NULL(), & ! PPFORM coefs + & bcoefs(:) => NULL() ! Spline coefs + DOUBLE COMPLEX, POINTER :: & + & ppformz(:,:) => NULL(), & ! PPFORM coefs for complex function + & bcoefsc(:) => NULL() ! Spline coefs for complex function + TYPE(GBMAT) :: mat ! Interpolation matrix + TYPE(periodic_mat) :: matp ! Interpolation matrix (periodic case) + TYPE(dftmap) :: dft ! Define DFT mapping + END TYPE spline1d +! + TYPE spline2d + TYPE(spline1d) :: sp1 ! Spline in direction 1 + TYPE(spline1d) :: sp2 ! Spline in direction 2 + DOUBLE PRECISION, POINTER :: ppform(:,:,:,:) => NULL() ! 2d PPFORM coefs + DOUBLE PRECISION, POINTER :: bcoefs(:,:) => NULL() ! Spline coefs + DOUBLE COMPLEX, POINTER :: ppformz(:,:,:,:) => NULL() ! PPFORM coefs for complex function + DOUBLE COMPLEX, POINTER :: bcoefsc(:,:) => NULL() ! Spline coefs for complex function + END TYPE spline2d +! + TYPE spline2d1d + TYPE(spline2d) :: sp12 ! 2D spline for dir. 1 and 2 + TYPE(spline1d) :: sp3 ! 1D spline for dir. 3 + DOUBLE PRECISION, POINTER :: ppform(:,:,:,:,:,:) => NULL() ! PPFORM coefs + DOUBLE PRECISION, POINTER :: bcoefs(:,:,:) => NULL() ! Spline coefs + DOUBLE PRECISION, POINTER :: ppformz(:,:,:,:,:,:) => NULL()! PPFORM coefs for complex function + DOUBLE COMPLEX, POINTER :: bcoefsc(:,:,:) => NULL() ! Spline coefs for complex function + END TYPE spline2d1d +! + INTERFACE set_spline + MODULE PROCEDURE set_spline1d, set_spline2d, set_spline2d1d + END INTERFACE + INTERFACE get_dim + MODULE PROCEDURE get_dim1, get_dim2 + END INTERFACE + INTERFACE gridval + MODULE PROCEDURE gridval1d, gridval1dz, & + & gridval2d, gridval2dz, & + & gridval2d_1d, gridval2d_1dz, & + & gridval2d_2d, gridval2d_2dz, & + & gridval2d1d_3d, gridval2d1d_1d + END INTERFACE + INTERFACE set_splcoef + MODULE PROCEDURE set_splcoef1d, set_splcoef2d + END INTERFACE + INTERFACE get_splcoef + MODULE PROCEDURE get_splcoef1, get_splcoef1z, get_splcoefn, & + & get_splcoef2d, get_splcoef2dz + END INTERFACE + INTERFACE fintg + MODULE PROCEDURE fintg1, fintg2 + END INTERFACE + INTERFACE destroy_sp + MODULE PROCEDURE destroy_sp1d, destroy_sp2d, destroy_sp2d1d + END INTERFACE + INTERFACE CompMassMatrix + MODULE PROCEDURE CompMassMatrix1, CompMassMatrix_gb, CompMassMatrix_zgb + END INTERFACE + INTERFACE calc_integ + MODULE PROCEDURE calc_integ0,calc_integn + END INTERFACE + INTERFACE locintv + MODULE PROCEDURE locintv0, locintv1 + END INTERFACE locintv + INTERFACE locintv_old + MODULE PROCEDURE locintv0_old, locintv1_old + END INTERFACE locintv_old + INTERFACE ppval + MODULE PROCEDURE ppval0, ppval1, ppval2, & + & ppval0_n, & + & ppval0z, ppval1z, ppval2z, & + & ppval0z_n + END INTERFACE ppval + INTERFACE basfun + MODULE PROCEDURE basfun0, basfun1 + END INTERFACE basfun + INTERFACE ft_basfun + MODULE PROCEDURE ft_basfun0, ft_basfun1 + END INTERFACE ft_basfun + INTERFACE def_basfun + MODULE PROCEDURE def_basfun0, def_basfun1 + END INTERFACE def_basfun + INTERFACE getgrad + MODULE PROCEDURE getgradr, getgradz + END INTERFACE getgrad +! +CONTAINS +!=========================================================================== + SUBROUTINE set_spline1d(p, ngauss, grid, sp, period, nlppform, nlequid) +! +! Setup a spline +! + INTEGER, INTENT(in) :: p, ngauss + DOUBLE PRECISION, INTENT(in) :: grid(:) + TYPE(spline1d), INTENT(out) :: sp + LOGICAL, OPTIONAL, INTENT(in) :: period + LOGICAL, OPTIONAL, INTENT(in) :: nlppform + LOGICAL, OPTIONAL, INTENT(in) :: nlequid +! + DOUBLE COMPLEX :: zc + DOUBLE PRECISION :: leng, xp, factinv, h + INTEGER :: order, nints, i, k + DOUBLE PRECISION :: temp(1:p+1,0:p) +! +! Order of splines + order = p+1 + sp%order = order +! +! Dimension of spline space + nints = SIZE(grid)-1 + sp%nints = nints + sp%period = .FALSE. + IF( PRESENT(period) ) THEN + sp%period = period + sp%lperiod = grid(nints+1) - grid(1) + END IF + sp%dim = nints+p +! +! Use or not PPFORM + sp%nlppform = .TRUE. + IF( PRESENT(nlppform) ) THEN + sp%nlppform = nlppform + END IF +! +! Determine sequence of knots + IF( ASSOCIATED(sp%knots) ) DEALLOCATE(sp%knots) + ALLOCATE( sp%knots(-p:nints+p) ) + sp%knots(0:nints) = grid(:) +! +! Is the grid equidistant ? + IF( PRESENT(nlequid) ) THEN + sp%nlequid = nlequid + ELSE + sp%nlequid = is_equid(grid) + END IF +! +! Coarse to fine mesh mapping for non-equidistant mesh + IF(sp%nlequid) THEN + sp%hinv = 1.0d0/(sp%knots(1)-sp%knots(0)) + ELSE + IF(ASSOCIATED(sp%fmap)) DEALLOCATE(sp%fmap) + CALL create_fine(sp%knots(0:nints), h, sp%fmap) + sp%hinv = 1.0d0/h + END IF +! +! Extend knots at both sides of given grid points + IF( sp%period ) THEN + leng = sp%knots(nints) - sp%knots(0) + DO i=-1,-p,-1 + sp%knots(i) = sp%knots(nints+i) - leng + END DO + DO i=1,p + sp%knots(nints+i) = sp%knots(i) + leng + END DO +!!$ sp%knots(-p:-1) = sp%knots(nints-p:nints-1) - leng +!!$ sp%knots(nints+1:nints+p) = leng + sp%knots(1:p) + ELSE + sp%knots(-p:-1) = sp%knots(0) + sp%knots(nints+1:nints+p) = sp%knots(nints) + END IF +! +! Precalculated values of all splines and their derivatives at left boundaries + IF( ASSOCIATED(sp%val0) ) DEALLOCATE(sp%val0) + ALLOCATE( sp%val0(0:p, p+1, 1:nints) ) + sp%val0 = 0.0d0 + DO i=1,nints + xp = sp%knots(i-1) + EPSILON(1.0d0)*ABS(sp%knots(i-1)) + CALL basfun_recur(xp, sp, temp, i) + sp%val0(:,:,i) = TRANSPOSE(temp) + END DO +! + factinv = 1.0d0 + DO k=2,p ! Divide by k! for use in PPFORM_ALT + factinv = factinv/k + sp%val0(k,:,:) = sp%val0(k,:,:)*factinv + END DO +! +! Case of periodic equidistant splines (translational invariance) + IF(sp%period .AND. sp%nlequid) THEN + IF( ASSOCIATED(sp%valc) ) DEALLOCATE(sp%valc) + ALLOCATE(sp%valc(0:p, p+1)) + sp%valc = sp%val0(:,:,1) + END IF +! +! Gauss abscissas and weights + IF( ngauss .GT. 0 ) THEN + IF( ASSOCIATED(sp%gausx) ) DEALLOCATE(sp%gausx) + IF( ASSOCIATED(sp%gausw) ) DEALLOCATE(sp%gausw) + ALLOCATE(sp%gausx(ngauss,nints)) + ALLOCATE(sp%gausw(ngauss,nints)) + DO i=1,nints + CALL gauleg(sp%knots(i-1), sp%knots(i), & + & sp%gausx(1:ngauss,i), sp%gausw(1:ngauss,i), ngauss) + END DO + END IF +! +! Compute integral of each splines + IF( ASSOCIATED(sp%intspl) ) DEALLOCATE(sp%intspl) + ALLOCATE(sp%intspl(0:sp%dim-1)) + CALL calc_integ(sp, sp%intspl) +! + END SUBROUTINE set_spline1d +!=========================================================================== + SUBROUTINE init_dft(sp, kmin, kmax, couplings) +! +! Initialize DFT +! + TYPE(spline1d) :: sp + INTEGER, INTENT(in) :: kmin, kmax + INTEGER, INTENT(in), OPTIONAL :: couplings(:) +! + INTEGER :: n, p, dk, k, j, nc + DOUBLE COMPLEX :: zc +! + n = sp%nints + p = sp%order-1 + dk = kmax-kmin+1 +! +! Check that -N/2 .LE. Kmin .LE. Kmax .LT. N/2 +! + IF(kmin.GT.kmax .OR. kmin.LT.-n/2 .OR. kmax.GE.n/2) THEN + WRITE(*,'(a,2i6,a)') 'kmin, kmax =', kmin, kmax, ' erroneous!' + STOP + END IF +! +! The Fourier window +! + sp%dft%n = n + sp%dft%kmin = kmin + sp%dft%kmax = kmax + sp%dft%dk = dk + sp%dft%dx = sp%knots(1) - sp%knots(0) +! +! Precalculate the DFT coefs exp( i(2*pi/N)jk ), k=kmin,kmax, j=0,p +! + IF( ASSOCIATED(sp%dft%coefs) ) DEALLOCATE(sp%dft%coefs) + ALLOCATE(sp%dft%coefs(kmin:kmax,0:p)) + zc = EXP( CMPLX(0.0d0, 2.0d0*pi/REAL(n,8),8) ) + sp%dft%coefs(:,0) = 1.0d0 ! j=0 + sp%dft%coefs(kmin,1) = zc**kmin ! j=1 + DO k=kmin+1,kmax + sp%dft%coefs(k,1) = sp%dft%coefs(k-1,1)*zc + END DO + DO j=2,p + sp%dft%coefs(:,j) = sp%dft%coefs(:,1)*sp%dft%coefs(:,j-1) + END DO +! +! Mode couplings: by default use the whole window +! + nc = dk + IF( PRESENT(couplings)) nc = SIZE(couplings) +! + IF(ASSOCIATED(sp%dft%mode_couplings)) DEALLOCATE(sp%dft%mode_couplings) + ALLOCATE(sp%dft%mode_couplings(nc)) +! + IF(PRESENT(couplings)) THEN + sp%dft%mode_couplings = couplings + ELSE + sp%dft%mode_couplings = (/ (k,k=kmin,kmax) /) + END IF + END SUBROUTINE init_dft +!=========================================================================== + SUBROUTINE set_spline2d(p, ngauss, grid1, grid2, sp, period, nlppform,& + & nlequid) +! +! Setup a 2d spline +! + INTEGER, INTENT(in) :: p(2), ngauss(2) + DOUBLE PRECISION, INTENT(in) :: grid1(:) + DOUBLE PRECISION, INTENT(in) :: grid2(:) + TYPE(spline2d), INTENT(out) :: sp + LOGICAL, OPTIONAL, INTENT(in) :: period(2) + LOGICAL, OPTIONAL, INTENT(in) :: nlppform + LOGICAL, OPTIONAL, INTENT(in) :: nlequid(2) +! + IF(PRESENT(period).AND.PRESENT(nlppform).AND.PRESENT(nlequid)) THEN + CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1), nlppform, & + & nlequid(1)) + CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2), nlppform, & + & nlequid(2)) + ELSE IF(PRESENT(period).AND.PRESENT(nlppform)) THEN + CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1), nlppform) + CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2), nlppform) + ELSE IF(PRESENT(period).AND.PRESENT(nlequid)) THEN + CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period=period(1), nlequid=nlequid(1)) + CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period=period(2), nlequid=nlequid(2)) + ELSE IF(PRESENT(nlppform).AND.PRESENT(nlequid)) THEN + CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlppform=nlppform, nlequid=nlequid(1)) + CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlppform=nlppform, nlequid=nlequid(2)) + ELSE IF(PRESENT(period)) THEN + CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1)) + CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2)) + ELSE IF(PRESENT(nlppform)) THEN + CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlppform=nlppform) + CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlppform=nlppform) + ELSE IF(PRESENT(nlequid)) THEN + CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlequid=nlequid(1)) + CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlequid=nlequid(2)) + ELSE + CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1) + CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2) + END IF + END SUBROUTINE set_spline2d +!=========================================================================== + SUBROUTINE set_spline2d1d(p, ngauss, grid1, grid2, grid3, sp, period, & + & nlppform, nlequid) +! +! Setup a 2d1d spline (for axisymmetric problems) +! + INTEGER, INTENT(in) :: p(3), ngauss(3) + DOUBLE PRECISION, INTENT(in) :: grid1(:) + DOUBLE PRECISION, INTENT(in) :: grid2(:) + DOUBLE PRECISION, INTENT(in) :: grid3(:) + TYPE(spline2d1d), INTENT(out) :: sp + LOGICAL, OPTIONAL, INTENT(in) :: period(3) + LOGICAL, OPTIONAL, INTENT(in) :: nlppform + LOGICAL, OPTIONAL, INTENT(in) :: nlequid(3) +! + IF(PRESENT(period).AND.PRESENT(nlppform).AND.PRESENT(nlequid)) THEN + CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2),& + & nlppform, nlequid(1:2)) + CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3), nlppform, & + & nlequid(3)) + ELSE IF(PRESENT(period).AND.PRESENT(nlppform)) THEN + CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2),& + & nlppform) + CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3), nlppform) + ELSE IF(PRESENT(period).AND.PRESENT(nlequid)) THEN + CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period=period(1:2),& + & nlequid=nlequid(1:2)) + CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period=period(3), & + & nlequid=nlequid(3)) + ELSE IF(PRESENT(nlppform).AND.PRESENT(nlequid)) THEN + CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlppform=nlppform,& + & nlequid=nlequid(1:2)) + CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlppform=nlppform, & + & nlequid=nlequid(3)) + ELSE IF(PRESENT(period)) THEN + CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2)) + CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3)) + ELSE IF(PRESENT(nlppform)) THEN + CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlppform=nlppform) + CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlppform=nlppform) + ELSE IF(PRESENT(nlequid)) THEN + CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlequid=nlequid(1:2)) + CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlequid=nlequid(3)) + ELSE + CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12) + CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3) + END IF + END SUBROUTINE set_spline2d1d +!=========================================================================== + SUBROUTINE get_dim1(sp, dim, nx, nidbas) +! +! Return spline dimension of 1d spline sp and optionally +! number of knot intervals nx and spline degree nidbas +! + TYPE(spline1d), INTENT(in) :: sp + INTEGER, INTENT(out) :: dim + INTEGER, OPTIONAL, INTENT(out) :: nx, nidbas + dim = sp%dim + IF( PRESENT(nx) ) nx = sp%nints + IF( PRESENT(nidbas) ) nidbas = sp%order - 1 + END SUBROUTINE get_dim1 +!=========================================================================== + SUBROUTINE get_dim2(sp, dim, nx, nidbas) +! +! Return spline dimension of 2d spline sp and optionally +! number of knot intervals nx and spline degree nidbas +! + TYPE(spline2d), INTENT(in) :: sp + INTEGER, INTENT(out) :: dim(2) + INTEGER, OPTIONAL, INTENT(out) :: nx(2), nidbas(2) +! + dim(1) = sp%sp1%dim + IF( PRESENT(nx) ) nx(1) = sp%sp1%nints + IF( PRESENT(nidbas) ) nidbas(1) = sp%sp1%order - 1 +! + dim(2) = sp%sp2%dim + IF( PRESENT(nx) ) nx(2) = sp%sp2%nints + IF( PRESENT(nidbas) ) nidbas(2) = sp%sp2%order - 1 + END SUBROUTINE get_dim2 +!=========================================================================== + SUBROUTINE get_gauss(sp, n, i, x, w) +! +! Get Gauss points and weights from spline sp +! + TYPE(spline1d), INTENT(in) :: sp + INTEGER, INTENT(out) :: n + INTEGER, INTENT(in), OPTIONAL :: i + DOUBLE PRECISION, DIMENSION(:), OPTIONAL, INTENT(out) :: x, w +! + n = SIZE(sp%gausx, 1) + IF( PRESENT(i) ) THEN + x(:) = sp%gausx(:,i) + w(:) = sp%gausw(:,i) + END IF + END SUBROUTINE get_gauss +!=========================================================================== + SUBROUTINE def_basfun0(xp, sp, fun, left) +! +! Define the basis function and its derivatives at x +! fun(i,j) = (j-1)th derivative of ith basis function. +! + DOUBLE PRECISION, INTENT(in) :: xp + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(out) :: fun(:,:) + INTEGER, OPTIONAL, INTENT(out) :: left + DOUBLE PRECISION :: x + INTEGER :: p, n, kleft + INTEGER :: ierr, j, k +! + CALL locintv(sp, xp, kleft) + CALL basfun(xp, sp, fun, kleft+1) + IF(PRESENT(left)) THEN + left = kleft + END IF + END SUBROUTINE def_basfun0 +!=========================================================================== + SUBROUTINE basfun0(xp, sp, f, left) +! +! Define the basis function and its derivatives at x, in interval +! [left,left+1], using PPFORM defined by sp%val0., left=1,..,nints +! f(i,j) = jth derivative of ith basis function. +! + DOUBLE PRECISION, INTENT(in) :: xp + DOUBLE PRECISION, INTENT(out) :: f(:,0:) + INTEGER, INTENT(in) :: left ! =1,2,...,nints + TYPE(spline1d) :: sp +! + INTEGER :: p, n, jdermx, i, jder + DOUBLE PRECISION :: x, h +! + p = sp%order - 1 + n = sp%nints + jdermx = SIZE(f,2)-1 +! + h = xp-sp%knots(left-1) ! knots are numbered from 0 +! + IF(sp%period .AND. sp%nlequid) THEN + DO jder=0,jdermx ! Derivative jder + CALL my_ppval(p, h, sp%valc, jder, f(:,jder)) + END DO + ELSE + DO jder=0,jdermx ! Derivative jder + CALL my_ppval(p, h, sp%val0(:,:,left), jder, f(:,jder)) + END DO + END IF + CONTAINS + SUBROUTINE my_ppval(p, x, ppform, jder, f) + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:) + INTEGER, INTENT(in) :: jder + DOUBLE PRECISION, INTENT(out) :: f(:) + DOUBLE PRECISION :: fact + INTEGER :: j + SELECT CASE (jder) + CASE(0) ! function value + SELECT CASE(p) + CASE(1) + f(1) = ppform(1,1) + x*ppform(2,1) + CASE(2) + f(1) = ppform(1,1) + x*(ppform(2,1)+x*ppform(3,1)) + f(2) = ppform(1,2) + x*(ppform(2,2)+x*ppform(3,2)) + CASE(3) + f(1) = ppform(1,1) + x*(ppform(2,1)+x*(ppform(3,1)+x*ppform(4,1))) + f(2) = ppform(1,2) + x*(ppform(2,2)+x*(ppform(3,2)+x*ppform(4,2))) + f(3) = ppform(1,3) + x*(ppform(2,3)+x*(ppform(3,3)+x*ppform(4,3))) + CASE(4:) + f(1:p) = ppform(p+1,1:p) + DO j=p,1,-1 + f(1:p) = f(1:p)*x + ppform(j,1:p) + END DO + END SELECT + f(p+1) = 1.0d0 - SUM(f(1:p)) + CASE(1) ! 1st derivative + SELECT CASE(p) + CASE(1) + f(1) = ppform(2,1) + CASE(2) + f(1) = ppform(2,1) + x*2.d0*ppform(3,1) + f(2) = ppform(2,2) + x*2.d0*ppform(3,2) + CASE(3) + f(1) = ppform(2,1) + x*(2.d0*ppform(3,1)+x*3.0d0*ppform(4,1)) + f(2) = ppform(2,2) + x*(2.d0*ppform(3,2)+x*3.0d0*ppform(4,2)) + f(3) = ppform(2,3) + x*(2.d0*ppform(3,3)+x*3.0d0*ppform(4,3)) + CASE(4:) + f(1:p) = p*ppform(p+1,1:p) + DO j=p-1,1,-1 + f(1:p) = f(1:p)*x + j*ppform(j+1,1:p) + END DO + END SELECT + f(p+1) = -SUM(f(1:p)) + CASE default ! 2nd and higher derivatives + fact = p-jder + f(1:p) = ppform(p+1,1:p) + DO j=p,jder+1,-1 + f(1:p) = f(1:p)/fact*j*x + ppform(j,1:p) + fact = fact-1.0d0 + END DO + DO j=2,jder + f(1:p) = f(1:p)*j + END DO + f(p+1) = -SUM(f(1:p)) + END SELECT + END SUBROUTINE my_ppval + END SUBROUTINE basfun0 +!=========================================================================== + SUBROUTINE basfun1(xp, sp, f, left) +! +! Define the basis function and its derivatives at x, in interval i=1,2, +! using PPFORM defined by sp%val0. +! f(i,j,p) = jth derivative of ith basis function at coordinate xp +! + DOUBLE precision, INTENT(in) :: xp(:) + DOUBLE PRECISION, INTENT(out) :: f(0:,0:,:) + INTEGER, INTENT(in) :: left(:) ! =1,2,...,nints + TYPE(spline1d) :: sp +! + INTEGER :: p, n, kleft, i, j, jder, ierr + INTEGER :: npt, jdermx + DOUBLE PRECISION :: h(SIZE(xp)), temp(SIZE(xp)) + DOUBLE PRECISION :: ppform(SIZE(xp),sp%order) +! + p = sp%order - 1 + n = sp%nints + npt = SIZE(xp) + jdermx = SIZE(f,2)-1 +! + h = xp - sp%knots(left-1) ! knots are numbered from 0 +! + IF( sp%period .AND. sp%nlequid) THEN + DO jder=0,jdermx + CALL my_ppval_same(p, h, sp%valc, jder, f(:,jder,1:npt)) + END DO + ELSE + DO i=0,p ! Spline i + DO j=1,npt + ppform(j,:) = sp%val0(:,i+1,left(j)) + END DO + DO jder=0,jdermx ! Derivative jder + CALL my_ppval(p, h, ppform, jder, temp) + f(i,jder,1:npt) = temp + END DO + END DO + END IF + CONTAINS +!+++ + SUBROUTINE my_ppval(p, x, ppform, jder, f) +! +! Compute function and derivatives from the PP representation +! for many points x(:) + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION, INTENT(in) :: ppform(:,:) + INTEGER, INTENT(in) :: jder + DOUBLE PRECISION, INTENT(out) :: f(:) + DOUBLE PRECISION :: fact + INTEGER :: j + SELECT CASE (jder) + CASE(0) ! function value + SELECT CASE(p) + CASE(1) + f(:) = ppform(:,1) + x(:)*ppform(:,2) + CASE(2) + f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3)) + CASE(3) + f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4))) + CASE(4:) + f(:) = ppform(:,p+1) + DO j=p,1,-1 + f(:) = ppform(:,j) + f(:)*x(:) + END DO + END SELECT + CASE(1) ! 1st derivative + SELECT CASE(p) + CASE(1) + f(:) = ppform(:,2) + CASE(2) + f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3) + CASE(3) + f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4)) + CASE(4:) + f(:) = p*ppform(:,p+1) + DO j=p-1,1,-1 + f(:) = f(:)*x(:) + j*ppform(:,j+1) + END DO + END SELECT + CASE default ! 2nd and higher derivatives + f(:) = ppform(:,p+1) + fact = p-jder + DO j=p,jder+1,-1 + f(:) = f(:)/fact*j*x(:) + ppform(:,j) + fact = fact-1.0d0 + END DO + DO j=2,jder + f(:) = f(:)*j + END DO + END SELECT + END SUBROUTINE my_ppval +!+++ + SUBROUTINE my_ppval_same(p, x, ppform, jder, f) +! +! Compute function and derivatives from the PP representation +! for many points x(:), same ppform (translationnal invariant spline) + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION, INTENT(in) :: ppform(:,:) + INTEGER, INTENT(in) :: jder + DOUBLE PRECISION, INTENT(out) :: f(:,:) + DOUBLE PRECISION :: fact + INTEGER :: j,k + SELECT CASE (jder) +! +! function value + CASE(0) + SELECT CASE(p) + CASE(1) + f(1,:) = ppform(1,1) + x(:)*ppform(2,1) + CASE(2) + f(1,:) = ppform(1,1) + x(:)*(ppform(2,1)+x(:)*ppform(3,1)) + f(2,:) = ppform(1,2) + x(:)*(ppform(2,2)+x(:)*ppform(3,2)) + CASE(3) + f(1,:) = ppform(1,1) + x(:)*(ppform(2,1)+x(:)*(ppform(3,1)+x(:)*ppform(4,1))) + f(2,:) = ppform(1,2) + x(:)*(ppform(2,2)+x(:)*(ppform(3,2)+x(:)*ppform(4,2))) + f(3,:) = ppform(1,3) + x(:)*(ppform(2,3)+x(:)*(ppform(3,3)+x(:)*ppform(4,3))) + CASE(4:) + DO k=1,p + f(k,:) = ppform(p+1,k) + DO j=p,1,-1 + f(k,:) = ppform(j,k) + f(k,:)*x(:) + END DO + END DO + END SELECT + f(p+1,:) = 1.0d0 - SUM(f(1:p,:),DIM=1) +! +! 1st derivative + CASE(1) + SELECT CASE(p) + CASE(1) + f(1,:) = ppform(2,1) + CASE(2) + f(1,:) = ppform(2,1) + x(:)*2.d0*ppform(3,1) + f(2,:) = ppform(2,2) + x(:)*2.d0*ppform(3,2) + CASE(3) + f(1,:) = ppform(2,1) + x(:)*(2.d0*ppform(3,1)+x(:)*3.0d0*ppform(4,1)) + f(2,:) = ppform(2,2) + x(:)*(2.d0*ppform(3,2)+x(:)*3.0d0*ppform(4,2)) + f(3,:) = ppform(2,3) + x(:)*(2.d0*ppform(3,3)+x(:)*3.0d0*ppform(4,3)) + CASE(4:) + DO k=1,p + f(k,:) = p*ppform(p+1,k) + DO j=p-1,1,-1 + f(k,:) = f(k,:)*x(:) + j*ppform(j+1,k) + END DO + END DO + END SELECT + f(p+1,:) = -SUM(f(1:p,:),DIM=1) +! +! 2nd and higher derivatives + CASE(2:) + DO k=1,p + f(k,:) = ppform(p+1,k) + fact = p-jder + DO j=p,jder+1,-1 + f(k,:) = f(k,:)/fact*j*x(:) + ppform(j,k) + fact = fact-1.0d0 + END DO + DO j=2,jder + f(k,:) = f(k,:)*j + END DO + END DO + f(p+1,:) = -SUM(f(1:p,:),DIM=1) + END SELECT + END SUBROUTINE my_ppval_same +!+++ + END SUBROUTINE basfun1 +!=========================================================================== + SUBROUTINE def_basfun1(xp, sp, fun, left) +! +! Define the basis function and its derivatives at x +! fun(i,j) = (j-1)th derivative of ith basis function. +! + DOUBLE PRECISION, INTENT(in) :: xp(:) + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(out) :: fun(:,:,:) + INTEGER, OPTIONAL, INTENT(out) :: left(:) + DOUBLE PRECISION :: x(SIZE(xp)) + INTEGER :: kleft(SIZE(xp)) + INTEGER :: p, n + INTEGER :: ierr, j, k +! + CALL locintv(sp, xp, kleft) + CALL basfun(xp, sp, fun, kleft+1) + IF(PRESENT(left)) THEN + left = kleft + END IF + END SUBROUTINE def_basfun1 +!=========================================================================== + SUBROUTINE ft_basfun0(xp, sp, ft_f, left) +! +! DFT of basis functions: ft_f(k,j), k=sp%dft%kmin, sp$dft%kmax (modes) +! j=0, p-1 (order of derivative)) +! + DOUBLE PRECISION, INTENT(in) :: xp + DOUBLE COMPLEX, INTENT(out) :: ft_f(:,:) + INTEGER, INTENT(in) :: left + TYPE(spline1d) :: sp + DOUBLE PRECISION :: f(sp%order,SIZE(ft_f,2)) +! +! Construct all splines on interval [left,left+1] at coordinate xp + CALL basfun(xp, sp, f, left) +! +! DFT of splines + ft_f = MATMUL(sp%dft%coefs, f) + END SUBROUTINE ft_basfun0 +!=========================================================================== + SUBROUTINE ft_basfun1(xp, sp, ft_f, left) +! +! DFT of basis functions: ft_f(k,j), k=sp%dft%kmin, sp$dft%kmax (modes) +! j=0, p-1 (order of derivative)) +! at xp(i) +! + DOUBLE PRECISION, INTENT(in) :: xp(:) + DOUBLE COMPLEX, INTENT(out) :: ft_f(:,:,:) + INTEGER, INTENT(in) :: left(:) + TYPE(spline1d) :: sp +! + INTEGER :: i, n3 + DOUBLE PRECISION :: f(sp%order,SIZE(ft_f,2),SIZE(ft_f,3)) +! +! Construct all splines on interval [left,left+1] at coordinate xp + CALL basfun(xp, sp, f, left) +! +! DFT of splines + n3 = SIZE(xp) + DO i=1,n3 + ft_f(:,:,i) = MATMUL(sp%dft%coefs, f(:,:,i)) + END DO + END SUBROUTINE ft_basfun1 +!=========================================================================== + SUBROUTINE basfun_recur(xp, sp, fun, left) +! +! Define the basis function and its derivatives at x, in interval i=1,2, +! using recurrence construct in function BVALUE +! fun(i,j) = (j-1)th derivative of ith basis function. +! + DOUBLE PRECISION, INTENT(in) :: xp + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(out) :: fun(:,:) + INTEGER, INTENT(in) :: left + DOUBLE PRECISION :: bcoef(1)=1.0d0, bvalue, x + INTEGER :: p, n, kleft + INTEGER :: ierr, j, k +! + p = sp%order - 1 + n = sp%nints + fun = 0.0d0 +! + IF( sp%period ) THEN ! ** Applly periodicity ** + x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod) + ELSE + x = xp + END IF +! + kleft = left-1 + DO j=kleft-p, kleft + DO k=0, SIZE(fun,2)-1 + fun(j-kleft+p+1, k+1) = bvalue(sp%knots(j), bcoef, 1, p+1, x, k) + END DO + END DO + END SUBROUTINE basfun_recur +!=========================================================================== + SUBROUTINE gauleg(x1,x2,x,w,n) +! +! Compute Gauss-Legendre abscissas and weights in interval [x1, x2] +! + INTEGER, INTENT(in) :: n + DOUBLE PRECISION, INTENT(in) :: x1,x2 + DOUBLE PRECISION, INTENT(out) :: x(n),w(n) + DOUBLE PRECISION :: EPS + INTEGER i,j,m + DOUBLE PRECISION p1,p2,p3,pp,xl,xm,z,z1 +! + eps=EPSILON(eps) + m=(n+1)/2 + xm=0.5d0*(x2+x1) + xl=0.5d0*(x2-x1) + DO i=1,m + z=COS(3.141592654d0*(i-.25d0)/(n+.5d0)) + DO + p1=1.d0; p2=0.d0 + DO j=1,n + p3=p2; p2=p1 + p1=((2.d0*j-1.d0)*z*p2-(j-1.d0)*p3)/j + END DO + pp=n*(z*p1-p2)/(z*z-1.d0) + z1=z + z=z1-p1/pp + IF( ABS(z-z1) .LE. EPS ) EXIT + END DO + x(i)=xm-xl*z + x(n+1-i)=xm+xl*z + w(i)=2.d0*xl/((1.d0-z*z)*pp*pp) + w(n+1-i)=w(i) + END DO + END SUBROUTINE gauleg +!=========================================================================== + SUBROUTINE gridval1dz(sp, xp, f, jder, c, ppformz) +! +! Compute values or jder-th dervivative of f(x) from ppform +! of spline sp. Recompute the ppform if the optional spline +! coefficients are given. +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp + DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: f + INTEGER, INTENT(in) :: jder + DOUBLE COMPLEX, DIMENSION(:), OPTIONAL, INTENT(in) :: c + DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL :: ppformz + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION :: x(SIZE(xp)), h, fact + INTEGER :: order, nints, i, j, nidbas + INTEGER :: leftx(SIZE(xp)) +! + order = sp%order + nints = sp%nints + nidbas = order-1 +! +! Compute PPFORM/BCOEFS if spline coefs are passed +! + IF (PRESENT(c)) THEN + IF (sp%nlppform) THEN + IF( PRESENT(ppformz) ) THEN + CALL topp0z(sp, c, ppformz) + ELSE + IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz) + ALLOCATE(sp%ppformz(order,nints)) + CALL topp0z(sp, c, sp%ppformz) + END IF + ELSE + IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) + ALLOCATE(sp%bcoefsc(SIZE(c))) + sp%bcoefsc = c + END IF + END IF +! +! Applly periodicity if required +! + IF( sp%period ) THEN + x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod) + ELSE + x = xp + END IF +! +! Locate the intervals containing x +! + CALL locintv(sp, x, leftx) +! +! Compute function/derivatives +! + IF( sp%nlppform ) THEN ! using PP form + DO i=1,SIZE(x) + IF( PRESENT(ppformz) ) THEN + CALL ppval(sp, x(i), ppformz(:,leftx(i)+1), leftx(i), jder, f(i)) + ELSE + CALL ppval(sp, x(i), sp%ppformz(:,leftx(i)+1), leftx(i), jder, f(i)) + END IF + END DO + ELSE ! using spline expansion + ALLOCATE(fun(0:nidbas,0:jder)) + f = 0.0d0 + DO i=1,SIZE(x) + CALL basfun(x(i), sp, fun, leftx(i)+1) + DO j=0,nidbas + f(i) = f(i) + sp%bcoefsc(leftx(i)+j+1)*fun(j,jder) + END DO + END DO + DEALLOCATE(fun) + END IF +! + END SUBROUTINE gridval1dz +!=========================================================================== + SUBROUTINE gridval1d(sp, xp, f, jder, c, ppform) +! +! Compute values or jder-th dervivative of f(x) from ppform +! of spline sp. Recompute the ppform if the optional spline +! coefficients are given. +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp + DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: f + INTEGER, INTENT(in) :: jder + DOUBLE PRECISION, DIMENSION(:), OPTIONAL, INTENT(in) :: c + DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL :: ppform + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + DOUBLE PRECISION :: x(SIZE(xp)), h, fact + INTEGER :: order, nints, i, j, nidbas + INTEGER :: leftx(SIZE(xp)) +! + order = sp%order + nints = sp%nints + nidbas = order-1 +! +! Compute PPFORM/BCOEFS if spline coefs are passed +! + IF (PRESENT(c)) THEN + IF (sp%nlppform) THEN + IF( PRESENT(ppform) ) THEN + CALL topp0(sp, c, ppform) + ELSE + IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) + ALLOCATE(sp%ppform(order,nints)) + CALL topp0(sp, c, sp%ppform) + END IF + ELSE + IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) + ALLOCATE(sp%bcoefs(SIZE(c))) + sp%bcoefs = c + END IF + END IF +! +! Applly periodicity if required +! + IF( sp%period ) THEN + x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod) + ELSE + x = xp + END IF +! +! Locate the intervals containing x +! + CALL locintv(sp, x, leftx) +! +! Compute function/derivatives +! + IF( sp%nlppform ) THEN ! using PP form + DO i=1,SIZE(x) + IF( PRESENT(ppform) ) THEN + CALL ppval(sp, x(i), ppform(:,leftx(i)+1), leftx(i), jder, f(i)) + ELSE + CALL ppval(sp, x(i), sp%ppform(:,leftx(i)+1), leftx(i), jder, f(i)) + END IF + END DO + ELSE ! using spline expansion + ALLOCATE(fun(0:nidbas,0:jder)) + f = 0.0d0 + DO i=1,SIZE(x) + CALL basfun(x(i), sp, fun, leftx(i)+1) + DO j=0,nidbas + f(i) = f(i) + sp%bcoefs(leftx(i)+j+1)*fun(j,jder) + END DO + END DO + DEALLOCATE(fun) + END IF +! + END SUBROUTINE gridval1d +!=========================================================================== + SUBROUTINE def_knots(p, xg, knots, period, nlskip) +! +! Define spline knots for interpolating at sites given by xg +! + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: xg(0:) + DOUBLE PRECISION, POINTER :: knots(:) + LOGICAL, OPTIONAL, INTENT(in) :: period, nlskip + LOGICAL :: kperiod, mlskip + INTEGER :: npt, dim, nx, i, ii +! + kperiod=.FALSE. + mlskip = .TRUE. + IF( PRESENT(period) ) kperiod=period + IF( PRESENT(nlskip) ) mlskip = nlskip +! +! Periodic splines +! + IF( kperiod ) THEN + nx = SIZE(xg) -1 + IF( ASSOCIATED(knots) ) DEALLOCATE(knots) + ALLOCATE(knots(0:nx)) + IF( MODULO(p,2) .NE. 0 ) THEN ! Odd degree + knots(0:nx) = xg(0:nx) + ELSE ! Even degree + DO i=1,nx + knots(i) = 0.5d0*(xg(i-1)+xg(i)) + END DO + knots(0) = knots(nx) - (xg(nx)-xg(0)) + END IF + RETURN + END IF +! +! Non-periodic splines +! + npt = SIZE(xg) + dim = npt + IF( .NOT. mlskip ) THEN + dim = dim + 2*(p/2) ! Add BC on derivatives + END IF + nx = dim-p + IF( ASSOCIATED(knots) ) DEALLOCATE(knots) + ALLOCATE(knots(0:nx)) +! + knots(0) = xg(0) + knots(nx) = xg(npt-1) +! + IF( MODULO(p,2) .EQ. 0 ) THEN + ii = 0 + IF( mlskip ) ii = p/2 ! skip first p/2 intervals + DO i=1,nx-1 + ii = ii+1 + knots(i) = (xg(ii)+xg(ii-1))/2 + END DO + ELSE + ii = 0 + IF( mlskip ) ii = (p-1)/2 ! skip (p-1)/2 points after the first point ii=0 + DO i=1,nx-1 + ii = ii+1 + knots(i) = xg(ii) + END DO + END IF +! + END SUBROUTINE def_knots +!=========================================================================== + SUBROUTINE allsplines(sp, xpt, splines) +! +! Return all splines defined on points xpt +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: xpt(:) + DOUBLE PRECISION, POINTER :: splines(:,:) + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + INTEGER :: i, n, left, dim, p +! + p = sp%order - 1 + dim = sp%dim + n = SIZE(xpt) +! + IF( ASSOCIATED(splines) ) DEALLOCATE(splines) + ALLOCATE(splines(n,dim), fun(p+1,1)) + splines = 0.0d0 + DO i=1,n + CALL locintv(sp, xpt(i), left) + CALL basfun(xpt(i), sp, fun, left+1) + splines(i,left+1:left+p+1) = fun(1:p+1,1) + END DO + DEALLOCATE(fun) + END SUBROUTINE allsplines +!=========================================================================== + SUBROUTINE set_splcoef1d(p, x, sp, period, ibc) +! +! Setup 1d interpolation matrix for spline of degree p +! + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x(:) + TYPE(spline1d), INTENT(out) :: sp + LOGICAL, OPTIONAL, INTENT(in) :: period + INTEGER, OPTIONAL :: ibc(:,:) +! + LOGICAL :: kperiod +! + kperiod = .FALSE. + IF( PRESENT(period) ) kperiod = period +! + IF( kperiod ) THEN + CALL splcoefp_setup(p, x, sp) + ELSE + IF( PRESENT(ibc) ) THEN + CALL splcoef_setup(p, x, sp, ibc) + ELSE + CALL splcoef_setup(p, x, sp) + END IF + END IF + END SUBROUTINE set_splcoef1d +!=========================================================================== + SUBROUTINE set_splcoef2d(p, x1, x2, sp, period, ibc1, ibc2) +! +! Setup 2d interpolation matrix for spline of degree p +! + INTEGER, INTENT(in) :: p(2) + DOUBLE PRECISION, INTENT(in) :: x1(:), x2(:) + TYPE(spline2d), INTENT(out) :: sp + LOGICAL, OPTIONAL, INTENT(in) :: period(2) + INTEGER, OPTIONAL :: ibc1(:,:),ibc2(:,:) +! + LOGICAL :: kperiod(2) +! + kperiod = .FALSE. + IF( PRESENT(period) ) kperiod = period +! +! Direction 1 + IF( kperiod(1) ) THEN + CALL splcoefp_setup(p(1), x1, sp%sp1) + ELSE + IF( PRESENT(ibc1) ) THEN + CALL splcoef_setup(p(1), x1, sp%sp1, ibc1) + ELSE + CALL splcoef_setup(p(1), x1, sp%sp1) + END IF + END IF +! +! Direction 2 + IF( kperiod(2) ) THEN + CALL splcoefp_setup(p(2), x2, sp%sp2) + ELSE + IF( PRESENT(ibc2) ) THEN + CALL splcoef_setup(p(2), x2, sp%sp2, ibc2) + ELSE + CALL splcoef_setup(p(2), x2, sp%sp2) + END IF + END IF + END SUBROUTINE set_splcoef2d +!=========================================================================== + SUBROUTINE get_splcoef1(sp, f, c, fbc) +! +! Compute the spline coefficients c from grid values f +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: f(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:) +! + IF( sp%period ) THEN + CALL splcoefp1(sp, f, c) + ELSE + IF( PRESENT(fbc) ) THEN + CALL splcoef1(sp, f, c, fbc) + ELSE + CALL splcoef1(sp, f, c) + END IF + END IF + END SUBROUTINE get_splcoef1 +!=========================================================================== + SUBROUTINE get_splcoef2d(sp, f, c, fbc1, fbc2) +! +! Compute the spline coefficients c from 2d grid values f +! + TYPE(spline2d) :: sp + DOUBLE PRECISION, INTENT(in) :: f(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:,:) + DOUBLE PRECISION :: ctr(SIZE(c,2), SIZE(f,1)) + DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc1(:,:,:), fbc2(:,:,:) + + DOUBLE PRECISION, DIMENSION(:, :) , ALLOCATABLE :: c_fbc1_left, c_fbc1_right + DOUBLE PRECISION, DIMENSION(:, :, :), ALLOCATABLE :: c_fbc1_all +! +! Along direction 2 +! + IF( PRESENT(fbc2) ) THEN + CALL get_splcoefn(sp%sp2, TRANSPOSE(f), ctr, fbc2) + ELSE + CALL get_splcoefn(sp%sp2, TRANSPOSE(f), ctr) + END IF +! +! Along direction 1 +! + IF( PRESENT(fbc1) ) THEN + ALLOCATE( c_fbc1_left(SIZE(c, 2), SIZE(fbc1, 2))) + ALLOCATE(c_fbc1_right(SIZE(c, 2), SIZE(fbc1, 2))) + ALLOCATE(c_fbc1_all(2, SIZE(fbc1, 2), SIZE(c, 2))) + + CALL get_splcoefn(sp%sp2, TRANSPOSE(fbc1(1, :, :)), c_fbc1_left ) + CALL get_splcoefn(sp%sp2, TRANSPOSE(fbc1(2, :, :)), c_fbc1_right) + + c_fbc1_all(1, :, :) = TRANSPOSE(c_fbc1_left ) + c_fbc1_all(2, :, :) = TRANSPOSE(c_fbc1_right) + + CALL get_splcoefn(sp%sp1, TRANSPOSE(ctr), c, c_fbc1_all) + + DEALLOCATE(c_fbc1_left, c_fbc1_right, c_fbc1_all) + ELSE + CALL get_splcoefn(sp%sp1, TRANSPOSE(ctr), c) + END IF +! + END SUBROUTINE get_splcoef2d +!=========================================================================== + SUBROUTINE get_splcoef2dz(sp, f, c) +! +! Compute the spline coefficients c from 2d grid values f +! + TYPE(spline2d) :: sp + DOUBLE COMPLEX, INTENT(in) :: f(:,:) + DOUBLE COMPLEX, INTENT(out) :: c(:,:) + DOUBLE PRECISION, DIMENSION(SIZE(c,1), SIZE(c,2),2) :: pc +! + CALL get_splcoef2d(sp, REAL(f), pc(:,:,1)) + CALL get_splcoef2d(sp, AIMAG(f), pc(:,:,2)) + c(:,:) = CMPLX(pc(:,:,1),pc(:,:,2)) + END SUBROUTINE get_splcoef2dz +!=========================================================================== + SUBROUTINE get_splcoef1z(sp, f, c, fbc) +! +! Compute the spline coefficients c from grid values f +! + TYPE(spline1d) :: sp + DOUBLE COMPLEX, INTENT(in) :: f(:) + DOUBLE COMPLEX, INTENT(out) :: c(:) + DOUBLE COMPLEX, INTENT(in), OPTIONAL :: fbc(:,:) + DOUBLE PRECISION :: pf(SIZE(f),2), pc(SIZE(c),2) + DOUBLE PRECISION, ALLOCATABLE :: pfbc(:,:,:) +! + pf(:,1) = REAL(f(:)) + pf(:,2) = AIMAG(f(:)) + IF(PRESENT(fbc)) THEN + ALLOCATE(pfbc(SIZE(fbc,1),SIZE(fbc,2),2)) + pfbc(:,:,1) = REAL(fbc(:,:)) + pfbc(:,:,2) = AIMAG(fbc(:,:)) + CALL get_splcoefn(sp, pf, pc, pfbc) + DEALLOCATE(pfbc) + ELSE + CALL get_splcoefn(sp, pf, pc) + END IF + c(:) = CMPLX(pc(:,1), pc(:,2)) +! + END SUBROUTINE get_splcoef1z +!=========================================================================== + SUBROUTINE get_splcoefn(sp, f, c, fbc) +! +! Compute the spline coefficients c from grid values f +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: f(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:,:) + DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:,:) +! + IF( sp%period ) THEN + CALL splcoefpn(sp, f, c) + ELSE + IF( PRESENT(fbc) ) THEN + CALL splcoefn(sp, f, c, fbc) + ELSE + CALL splcoefn(sp, f, c) + END IF + END IF + END SUBROUTINE get_splcoefn +!=========================================================================== + SUBROUTINE splcoef_setup(p, x, sp, ibc) +! +! Setup the interpolation matrix +! for spline of degree p +! + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x(:) + TYPE(spline1d), INTENT(out) :: sp + INTEGER, OPTIONAL :: ibc(:,:) + DOUBLE PRECISION, POINTER :: knots(:)=>NULL(), arow(:)=>NULL(), & + & fun(:,:)=>NULL() + INTEGER :: nx, dim, kl, ku, rank + INTEGER :: i, left, ishift + LOGICAL :: nlskip +! +! Type of Boundary Conditions + nlskip = .TRUE. + ishift = 0 + IF( PRESENT(ibc) ) THEN + nlskip = .FALSE. + ishift = p/2 + END IF +! +! Set up spline + nx = SIZE(x) - 1 ! X is the interpolation sites + CALL def_knots(p, x, knots, nlskip=nlskip) + CALL set_spline(p, 0, knots, sp) + sp%nsites = nx + 1 ! Store away the number of interpolation sites + DEALLOCATE(knots) +! +! Set up interpolation matrix + dim = sp%dim + kl = MAX(p-1,0) + ku = MAX(p-1,0) + rank = dim + CALL init(kl, ku, rank, 0, sp%mat) +!!$ WRITE(*,'(a,3i6)') 'Interpolation matrix:, kl, ku, rank ', kl, ku, rank +! +! COMPUTE matrix row by row + ALLOCATE(arow(dim), fun(p+1,0:p)) + DO i=1,SIZE(x) + arow = 0.0d0 + CALL locintv(sp, x(i), left) + CALL basfun(x(i), sp, fun(:,0:0), left+1) + arow(left+1:left+p+1) = fun(1:p+1,0) + CALL putrow(sp%mat, i+ishift, arow) +!!$ WRITE(*,'(i5,13f8.3)') i+ishift, arow + END DO +! +! Add BC if specified + IF( PRESENT(ibc) ) THEN + CALL locintv(sp, x(1), left) + CALL basfun(x(1), sp, fun, left+1) ! BC at the left side + DO i=1,p/2 + arow = 0.0d0 + arow(left+1:left+p+1) = fun(1:p+1,ibc(1,i)) + CALL putrow(sp%mat, i, arow) +!!$ WRITE(*,'(i5,13f8.3)') i, arow + END DO + CALL locintv(sp, x(SIZE(x)), left) + CALL basfun(x(SIZE(x)), sp, fun, left+1) ! BC at the right side + DO i=1,p/2 + arow = 0.0d0 + arow(left+1:left+p+1) = fun(1:p+1,ibc(2,i)) + CALL putrow(sp%mat, dim-i+1, arow) +!!$ WRITE(*,'(i5,13f8.3)') dim-i+1, arow + END DO + END IF + DEALLOCATE(arow, fun) +! +! Factor the matrix + CALL factor(sp%mat) +! + END SUBROUTINE splcoef_setup +!=========================================================================== + SUBROUTINE splcoefp_setup(p, x, sp) +! +! Set up the interpolation matrix +! for periodic case +! + USE matrix + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x(:) + TYPE(spline1d), INTENT(out) :: sp +! + TYPE(gemat) :: hmat + DOUBLE PRECISION, POINTER :: knots(:)=>NULL(), arow(:)=>NULL(), & + & fun(:,:)=>NULL() +!!$ DOUBLE PRECISION, POINTER :: arr2d(:,:)=>null() + INTEGER :: nx, kl, ku, rank, mr, nc + INTEGER :: i, left, j, jj + DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0 +!________________________________________________________________________________ +! +! Set up spline +! + nx = SIZE(x) - 1 ! X is the interpolation sites + CALL def_knots(p, x, knots, period=.TRUE.) + CALL set_spline(p, 0, knots, sp, period=.TRUE.) + sp%nsites = nx + 1 ! Store away the number of interpolation sites + DEALLOCATE(knots) +!________________________________________________________________________________ +! +! Set up interpolation matrix sp%matp +! + kl = MAX(p/2,0) + ku = kl + rank = nx +!!$ WRITE(*,'(a,3i6)') 'Interpolation matrix:, kl, ku, rank ', kl, ku, rank +! + CALL init(kl, ku, rank, 0, sp%matp%mat) ! matp%mat is a GB matrix + ALLOCATE(sp%matp%matu(rank, kl+ku), sp%matp%matvt(kl+ku,rank)) +! + sp%matp%matu = zero + sp%matp%matvt = zero ! kl = ku = 2 + DO j=1,kl ! [ 1 0 0 . . . . ] + sp%matp%matu(j,j) = one ! [ 0 1 0 . . . . ] + END DO ! [ . 0 . . . . . ] + DO j=1,ku ! [ . . . . . 0 . ] + i=rank-ku+j ! [ . . . . 0 1 0 ] + sp%matp%matu(i,kl+j) = one ! [ . . . . 0 0 1 ] + END DO +!________________________________________________________________________________ +! +! COMPUTE matrix row by row +! + ALLOCATE(arow(rank), fun(p+1,1)) + DO i=1,rank + arow = zero + CALL locintv(sp, x(i), left) + CALL basfun(x(i), sp, fun, left+1) + left = left-p/2 + DO j=0,p + jj = MODULO(left+j, rank) + 1 + arow(jj) = fun(j+1,1) + END DO + CALL putrow(sp%matp%mat, i, arow) + IF( i .LE. kl ) THEN + sp%matp%matvt(i,rank-kl+1:rank) = arow(rank-kl+1:rank) + ELSE IF ( i .GE. rank-ku+1 ) THEN + j = i-(rank-ku+1) + 1 + sp%matp%matvt(kl+j,1:ku) = arow(1:ku) + END IF +!!$ WRITE(8, '(i5, 12(1pe12.3))') left, x(i), fun +!!$ WRITE(*,'(i5, 12(1pe12.3))') i, arow + END DO + DEALLOCATE(arow, fun) +! +!!$ PRINT*, 'Matrix U, V' +!!$ DO i=1,rank +!!$ WRITE(*, '(i5,12(1pe12.3))') i, sp%matp%matu(i,:), sp%matp%matvt(:,i) +!!$ END DO +!!$ ALLOCATE(arr2d(rank,rank)) +!!$ arr2d = MATMUL(sp%matp%matu, sp%matp%matvt) +!!$ PRINT*, 'Product U*V^T' +!!$ DO i=1,rank +!!$ WRITE(*, '(i5,12(1pe12.3))') i, arr2d(i,:) +!!$ END DO +!!$ DEALLOCATE(arr2d) +!________________________________________________________________________________ +! +! Factorisation +! +! Factor A + CALL factor(sp%matp%mat) +! +! For constant and linear splines, A is diagnonal! +! Should skip the rest +! + IF( kl.EQ.0 .OR. ku.EQ.0 ) THEN + RETURN + END IF +! +! U <-- A^(-1) * U + CALL bsolve(sp%matp%mat, sp%matp%matu) +! +! H <-- 1 + V^T * U + mr = SIZE(sp%matp%matvt, 1) + nc = SIZE(sp%matp%matvt, 2) + CALL init(mr, 0, hmat) ! hmat is initialized to 0! + DO i=1,mr + hmat%val(i,i) = one + END DO + CALL dgemm('N', 'N', mr, mr, nc, one, sp%matp%matvt, mr, & + & sp%matp%matu, nc, one, hmat%val, mr) +! +!!$ hmat%val = MATMUL(sp%matp%matvt, sp%matp%matu) +!!$ DO i=1,kl+ku +!!$ hmat%val(i,i) = 1.0d0 + hmat%val(i,i) +!!$ END DO +! +! V^T <-- H^(-1) V^T + CALL factor(hmat) + CALL bsolve(hmat, sp%matp%matvt) + CALL destroy(hmat) +! + END SUBROUTINE splcoefp_setup +!=========================================================================== + SUBROUTINE splcoef1(sp, f, c, fbc) +! +! Compute the spline coefficients c from grid values f and BC fbc +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: f(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:) + INTEGER :: p, dim, i, ishift +! + p = sp%order-1 + dim = sp%dim +! +! BC at left and right boundary + ishift = 0 + IF( PRESENT(fbc) ) THEN + DO i=1,p/2 + c(i) = fbc(1,i) ! Left boundary + c(dim-i+1) = fbc(2,i) ! Right boundary + END DO + ishift = p/2 + END IF +! +! Interior points + DO i=1,sp%nsites + c(i+ishift) = f(i) + END DO +! WRITE(*,'(a/(13f8.3))') 'RHS', c +! +! Solve for the interpolation coefs. using the factored sp%mat + CALL bsolve(sp%mat, c) +! + END SUBROUTINE splcoef1 +!=========================================================================== + SUBROUTINE splcoefn(sp, f, c, fbc) +! +! Compute the spline coefficients c from grid values f and BC fbc +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: f(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:,:) + DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:,:) + INTEGER :: p, dim, i, ishift +! + p = sp%order-1 + dim = sp%dim +! +! BC at left and right boundary + ishift = 0 + IF( PRESENT(fbc) ) THEN + DO i=1,p/2 + c(i,:) = fbc(1,i,:) ! Left boundary + c(dim-i+1,:) = fbc(2,i,:) ! Right boundary + END DO + ishift = p/2 + END IF +! +! Interior points ! c(:,j) for j>SIZE(f,2) could be anything + DO i=1,sp%nsites ! (periodicity in the 2nd dimension)! + c(i+ishift,1:SIZE(f,2)) = f(i,:) + END DO +!!$ WRITE(*,'(a/(13f8.3))') 'RHS', c +! +! Solve for the interpolation coefs. using the factored sp%mat + CALL bsolve(sp%mat, c) +! + END SUBROUTINE splcoefn +!=========================================================================== + SUBROUTINE splcoefp1(sp, f, c) +! +! Compute the spline coefficient c from grid values f +! f(x) is periodic +! + USE matrix +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: f(:) + DOUBLE PRECISION, INTENT(out) :: c(:) +! + DOUBLE PRECISION, POINTER :: arow(:), brow(:) + DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0, minus1=-1.0d0 + INTEGER :: dim, p, rank, bandw + INTEGER :: i, j +!________________________________________________________________________________ +! + p = sp%order-1 + rank = sp%nints + bandw = SIZE(sp%matp%matvt,1) +! +! Solve the interpolation system +! +! Solve Ay = f + ALLOCATE(arow(rank), brow(bandw)) +! + arow(1:rank) = f(1:rank) + CALL bsolve(sp%matp%mat, arow) +! +! For constant and linear splines, A is diagnonal! +! Should skip the rest +! + IF( p.LE.1 ) GOTO 100 +! +! +! t = V^T*y + CALL dgemv('N', bandw, rank, one, sp%matp%matvt, bandw, arow, 1, zero, & + & brow, 1) +! +! y = y - Ut + CALL dgemv('N', rank, bandw, minus1, sp%matp%matu, rank, brow, 1, one, & + & arow, 1) +! +100 CONTINUE +! +! Interpolation coefficients + dim = sp%dim + DO i=1,dim + j = MODULO(i-1-p/2, rank) + 1 + c(i) = arow(j) + END DO +! + DEALLOCATE(arow,brow) +! + END SUBROUTINE splcoefp1 +!=========================================================================== + SUBROUTINE splcoefpn(sp, f, c) +! +! Compute the spline coefficient c from grid values f +! f(x) is periodic +! + USE matrix +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: f(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:,:) +! + DOUBLE PRECISION, POINTER :: arow(:,:), brow(:,:) + DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0, minus1=-1.0d0 + INTEGER :: p, dim, rank, bandw, nrhs + INTEGER :: i, j, k +!________________________________________________________________________________ +! + p = sp%order-1 + rank = sp%nints + bandw = SIZE(sp%matp%matvt,1) + nrhs = SIZE(f,2) +! +! +! Solve the interpolation system +! +! Solve Ay = f + ALLOCATE(arow(rank,nrhs), brow(bandw,nrhs)) +! + arow(1:rank,1:nrhs) = f(1:rank,1:nrhs) + CALL bsolve(sp%matp%mat, arow) +! +! For constant and linear splines, A is diagnonal! +! Should skip the rest +! + IF( p.LE.1 ) GOTO 100 +! +! +! t = V^T*y + CALL dgemm('N', 'N', bandw, nrhs, rank, one, sp%matp%matvt, bandw, arow, & + & rank, zero, brow, bandw) +! +! y = y - Ut + CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, sp%matp%matu, rank, brow, & + & bandw, one, arow, rank) +! +100 CONTINUE +! +! Interpolation coefficients + dim = sp%dim + DO k=1,nrhs + DO i=1,dim + j = MODULO(i-1-p/2, rank) + 1 + c(i,k) = arow(j,k) + END DO + END DO +! + DEALLOCATE(arow,brow) +! + END SUBROUTINE splcoefpn +! +!=========================================================================== + SUBROUTINE topp0(sp, c, ppform) +! +! Compute PPFORM of a fuction defined by the spline SP +! and spline coefficients C(1:d) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: c(:) + DOUBLE PRECISION, INTENT(out) :: ppform(0:,:) + INTEGER :: p, nints, i, j, k +! + p = sp%order - 1 + nints = sp%nints +! + ppform = 0.0d0 + DO i=1,nints ! on each knot interval + DO j=1,p+1 ! all spline in interval i + DO k=0,p ! k_th derivatives + ppform(k,i) = ppform(k,i) + sp%val0(k,j,i)*c(j+i-1) + END DO + END DO + END DO +! + END SUBROUTINE topp0 +!=========================================================================== + SUBROUTINE topp0z(sp, c, ppform) +! +! Compute PPFORM of a fuction defined by the spline SP +! and spline coefficients C(1:d) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE COMPLEX, INTENT(in) :: c(:) + DOUBLE COMPLEX, INTENT(out) :: ppform(0:,:) + INTEGER :: p, nints, i, j, k +! + p = sp%order - 1 + nints = sp%nints +! + ppform = (0.0d0, 0.0d0) + DO i=1,nints ! on each knot interval + DO j=1,p+1 ! all spline in interval i + DO k=0,p ! k_th derivatives + ppform(k,i) = ppform(k,i) + sp%val0(k,j,i)*c(j+i-1) + END DO + END DO + END DO +! + END SUBROUTINE topp0z +!=========================================================================== + SUBROUTINE topp1(sp, c, ppform) +! +! Compute PPFORM of a fuction defined by the spline SP +! and spline coefficients C(1:d,:) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: c(:,:) + DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:) + INTEGER :: m +! + DO m=1,SIZE(c,2) + CALL topp0(sp, c(:,m), ppform(m,:,:)) + END DO +! + END SUBROUTINE topp1 +!=========================================================================== + SUBROUTINE topp1z(sp, c, ppform) +! +! Compute PPFORM of a fuction defined by the spline SP +! and spline coefficients C(1:d,:) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE COMPLEX, INTENT(in) :: c(:,:) + DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:) + INTEGER :: m +! + DO m=1,SIZE(c,2) + CALL topp0z(sp, c(:,m), ppform(m,:,:)) + END DO +! + END SUBROUTINE topp1z +!=========================================================================== + SUBROUTINE topp2(sp, c, ppform) +! +! Compute PPFORM of a fuction defined by the spline SP +! and spline coefficients C(1:d,:,:) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: c(:,:,:) + DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:) + INTEGER :: m, mm +! + DO mm=1,SIZE(c,3) + DO m=1,SIZE(c,2) + CALL topp0(sp, c(:,m,mm), ppform(m,mm,:,:)) + END DO + END DO +! + END SUBROUTINE topp2 +!=========================================================================== + SUBROUTINE topp2z(sp, c, ppform) +! +! Compute PPFORM of a fuction defined by the spline SP +! and spline coefficients C(1:d,:,:) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE COMPLEX, INTENT(in) :: c(:,:,:) + DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:) + INTEGER :: m, mm +! + DO mm=1,SIZE(c,3) + DO m=1,SIZE(c,2) + CALL topp0z(sp, c(:,m,mm), ppform(m,mm,:,:)) + END DO + END DO +! + END SUBROUTINE topp2z +!=========================================================================== + SUBROUTINE topp3(sp, c, ppform) +! +! Compute PPFORM of a fuction defined by the spline SP +! and spline coefficients C(1:d,:,:,:) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: c(:,:,:,:) + DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:,:) + INTEGER :: m, mm, mmm +! + DO mmm=1,SIZE(c,4) + DO mm=1,SIZE(c,3) + DO m=1,SIZE(c,2) + CALL topp0(sp, c(:,m,mm,mmm), ppform(m,mm,mmm,:,:)) + END DO + END DO + END DO +! + END SUBROUTINE topp3 +!=========================================================================== + SUBROUTINE topp3z(sp, c, ppform) +! +! Compute PPFORM of a fuction defined by the spline SP +! and spline coefficients C(1:d,:,:,:) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE COMPLEX, INTENT(in) :: c(:,:,:,:) + DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:,:) + INTEGER :: m, mm, mmm +! + DO mmm=1,SIZE(c,4) + DO mm=1,SIZE(c,3) + DO m=1,SIZE(c,2) + CALL topp0z(sp, c(:,m,mm,mmm), ppform(m,mm,mmm,:,:)) + END DO + END DO + END DO +! + END SUBROUTINE topp3z +!=========================================================================== + SUBROUTINE topp4(sp, c, ppform) +! +! Compute PPFORM of a fuction defined by the spline SP +! and spline coefficients C(1:d,:,:) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: c(:,:,:,:,:) + DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:,:,:) + INTEGER :: m, mm, mmm, mmmm +! + DO mmmm=1,SIZE(c,5) + DO mmm=1,SIZE(c,4) + DO mm=1,SIZE(c,3) + DO m=1,SIZE(c,2) + CALL topp0(sp, c(:,m,mm,mmm,mmmm), ppform(m,mm,mmm,mmmm,:,:)) + END DO + END DO + END DO + END DO +! + END SUBROUTINE topp4 +!=========================================================================== + SUBROUTINE topp4z(sp, c, ppform) +! +! Compute PPFORM of a fuction defined by the spline SP +! and spline coefficients C(1:d,:,:) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE COMPLEX, INTENT(in) :: c(:,:,:,:,:) + DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:,:,:) + INTEGER :: m, mm, mmm, mmmm +! + DO mmmm=1,SIZE(c,5) + DO mmm=1,SIZE(c,4) + DO mm=1,SIZE(c,3) + DO m=1,SIZE(c,2) + CALL topp0z(sp, c(:,m,mm,mmm,mmmm), ppform(m,mm,mmm,mmmm,:,:)) + END DO + END DO + END DO + END DO +! + END SUBROUTINE topp4z +!=========================================================================== + SUBROUTINE ppval0(sp, x, ppform, left, jder, f) +! +! Compute function and derivatives from the PP representation +! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: x, ppform(:) + INTEGER, INTENT(in) :: left, jder + DOUBLE PRECISION, INTENT(out) :: f + DOUBLE PRECISION :: h, fact + INTEGER :: j, order +! + order = sp%order ! Polynomial degree p + 1 +! + h = x-sp%knots(left) + f = 0.0d0 + IF( jder .LT. 0 .OR. jder .GE. order ) RETURN +! + SELECT CASE (jder) + CASE(0) ! function value + DO j=order,1,-1 + f = f*h + ppform(j) + END DO + CASE(1) ! 1st derivative + DO j=order-1,1,-1 + f = f*h + j*ppform(j+1) + END DO + CASE default ! 2nd and higher derivatives + fact = order-jder + DO j=order,jder+1,-1 + f = f/fact*j*h + ppform(j) + fact = fact-1.0d0 + END DO + DO j=2,jder + f = f*j + END DO + END SELECT + END SUBROUTINE ppval0 +!=========================================================================== + SUBROUTINE ppval0z(sp, x, ppform, left, jder, f) +! +! Compute function and derivatives from the PP representation +! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE COMPLEX, INTENT(in) :: ppform(:) + INTEGER, INTENT(in) :: left, jder + DOUBLE COMPLEX, INTENT(out) :: f + DOUBLE PRECISION :: h, fact + INTEGER :: j, order +! + order = sp%order ! Polynomial degree p + 1 +! + h = x-sp%knots(left) + f = (0.0d0,0.0d0) + IF( jder .LT. 0 .OR. jder .GE. order ) RETURN +! + SELECT CASE (jder) + CASE(0) ! function value + DO j=order,1,-1 + f = f*h + ppform(j) + END DO + CASE(1) ! 1st derivative + DO j=order-1,1,-1 + f = f*h + j*ppform(j+1) + END DO + CASE default ! 2nd and higher derivatives + fact = order-jder + DO j=order,jder+1,-1 + f = f/fact*j*h + ppform(j) + fact = fact-1.0d0 + END DO + DO j=2,jder + f = f*j + END DO + END SELECT + END SUBROUTINE ppval0z +!=========================================================================== + SUBROUTINE ppval0z_n(sp, x, ppform, left, jder, f) +! +! PPVAL0Z for many points x(:) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE COMPLEX, INTENT(in) :: ppform(:,:) + INTEGER, INTENT(in) :: left(:), jder + DOUBLE COMPLEX, INTENT(out) :: f(:) + DOUBLE PRECISION :: h(SIZE(x)), fact + INTEGER :: j, order +! + order = sp%order ! Polynomial degree p + 1 +! + h(:) = x(:)-sp%knots(left(:)) + f(:) = 0.0d0 + IF( jder .LT. 0 .OR. jder .GE. order ) RETURN +! + SELECT CASE (jder) + CASE(0) ! function value + DO j=order,1,-1 + f(:) = f(:)*h(:) + ppform(:,j) + END DO + CASE(1) ! 1st derivative + DO j=order-1,1,-1 + f(:) = f(:)*h(:) + j*ppform(:,j+1) + END DO + CASE default ! 2nd and higher derivatives + fact = order-jder + DO j=order,jder+1,-1 + f(:) = f(:)/fact*j*h(:) + ppform(:,j) + fact = fact-1.0d0 + END DO + DO j=2,jder + f(:) = f(:)*j + END DO + END SELECT + END SUBROUTINE ppval0z_n +!=========================================================================== + SUBROUTINE ppval0_n(sp, x, ppform, left, jder, f) +! +! PPVAL0 for many points x(:) +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: x(:), ppform(:,:) + INTEGER, INTENT(in) :: left(:), jder + DOUBLE PRECISION, INTENT(out) :: f(:) + DOUBLE PRECISION :: h(SIZE(x)), fact + INTEGER :: j, order +! + order = sp%order ! Polynomial degree p + 1 +! + h(:) = x(:)-sp%knots(left(:)) + f(:) = 0.0d0 + IF( jder .LT. 0 .OR. jder .GE. order ) RETURN +! + SELECT CASE (jder) + CASE(0) ! function value + DO j=order,1,-1 + f(:) = f(:)*h(:) + ppform(:,j) + END DO + CASE(1) ! 1st derivative + DO j=order-1,1,-1 + f(:) = f(:)*h(:) + j*ppform(:,j+1) + END DO + CASE default ! 2nd and higher derivatives + fact = order-jder + DO j=order,jder+1,-1 + f(:) = f(:)/fact*j*h(:) + ppform(:,j) + fact = fact-1.0d0 + END DO + DO j=2,jder + f(:) = f(:)*j + END DO + END SELECT + END SUBROUTINE ppval0_n +!=========================================================================== + SUBROUTINE ppval1(sp, x, ppform, left, jder, f) +! +! Compute function and derivatives from the PP representation +! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:) + INTEGER, INTENT(in) :: left, jder + DOUBLE PRECISION, INTENT(out) :: f(:) + DOUBLE PRECISION :: h, fact + INTEGER :: j, order +! + order = sp%order ! Polynomial degree p + 1 +! + h = x-sp%knots(left) + f = 0.0d0 + IF( jder .LT. 0 .OR. jder .GE. order ) RETURN +! + SELECT CASE (jder) + CASE(0) ! function value + DO j=order,1,-1 + f(:) = f(:)*h + ppform(j,:) + END DO + CASE(1) ! 1st derivative + DO j=order-1,1,-1 + f(:) = f(:)*h + j*ppform(j+1,:) + END DO + CASE default ! 2nd and higher derivatives + fact = order-jder + DO j=order,jder+1,-1 + f(:) = f(:)/fact*j*h + ppform(j,:) + fact = fact-1.0d0 + END DO + DO j=2,jder + f(:) = f(:)*j + END DO + END SELECT + END SUBROUTINE ppval1 +!=========================================================================== + SUBROUTINE ppval1z(sp, x, ppform, left, jder, f) +! +! Compute function and derivatives from the PP representation +! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE COMPLEX, INTENT(in) :: ppform(:,:) + INTEGER, INTENT(in) :: left, jder + DOUBLE COMPLEX, INTENT(out) :: f(:) + DOUBLE PRECISION :: h, fact + INTEGER :: j, order +! + order = sp%order ! Polynomial degree p + 1 +! + h = x-sp%knots(left) + f = (0.0d0,0.0d0) + IF( jder .LT. 0 .OR. jder .GE. order ) RETURN +! + SELECT CASE (jder) + CASE(0) ! function value + DO j=order,1,-1 + f(:) = f(:)*h + ppform(j,:) + END DO + CASE(1) ! 1st derivative + DO j=order-1,1,-1 + f(:) = f(:)*h + j*ppform(j+1,:) + END DO + CASE default ! 2nd and higher derivatives + fact = order-jder + DO j=order,jder+1,-1 + f(:) = f(:)/fact*j*h + ppform(j,:) + fact = fact-1.0d0 + END DO + DO j=2,jder + f(:) = f(:)*j + END DO + END SELECT + END SUBROUTINE ppval1z +!=========================================================================== + SUBROUTINE ppval2(sp, x, ppform, left, jder, f) +! +! Compute function and derivatives from the PP representation +! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:,:) + INTEGER, INTENT(in) :: left, jder + DOUBLE PRECISION, INTENT(out) :: f(:,:) + DOUBLE PRECISION :: h, fact + INTEGER :: j, order +! + order = sp%order ! Polynomial degree p + 1 +! + h = x-sp%knots(left) + f = 0.0d0 + IF( jder .LT. 0 .OR. jder .GE. order ) RETURN +! + SELECT CASE (jder) + CASE(0) ! function value + DO j=order,1,-1 + f(:,:) = f(:,:)*h + ppform(j,:,:) + END DO + CASE(1) ! 1st derivative + DO j=order-1,1,-1 + f(:,:) = f(:,:)*h + j*ppform(j+1,:,:) + END DO + CASE default ! 2nd and higher derivatives + fact = order-jder + DO j=order,jder+1,-1 + f(:,:) = f(:,:)/fact*j*h + ppform(j,:,:) + fact = fact-1.0d0 + END DO + DO j=2,jder + f(:,:) = f(:,:)*j + END DO + END SELECT + END SUBROUTINE ppval2 +!=========================================================================== + SUBROUTINE ppval2z(sp, x, ppform, left, jder, f) +! +! Compute function and derivatives from the PP representation +! f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k +! + TYPE(spline1d), INTENT(in) :: sp + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE COMPLEX, INTENT(in) :: ppform(:,:,:) + INTEGER, INTENT(in) :: left, jder + DOUBLE COMPLEX, INTENT(out) :: f(:,:) + DOUBLE PRECISION :: h, fact + INTEGER :: j, order +! + order = sp%order ! Polynomial degree p + 1 +! + h = x-sp%knots(left) + f = (0.0d0,0.0d0) + IF( jder .LT. 0 .OR. jder .GE. order ) RETURN +! + SELECT CASE (jder) + CASE(0) ! function value + DO j=order,1,-1 + f(:,:) = f(:,:)*h + ppform(j,:,:) + END DO + CASE(1) ! 1st derivative + DO j=order-1,1,-1 + f(:,:) = f(:,:)*h + j*ppform(j+1,:,:) + END DO + CASE default ! 2nd and higher derivatives + fact = order-jder + DO j=order,jder+1,-1 + f(:,:) = f(:,:)/fact*j*h + ppform(j,:,:) + fact = fact-1.0d0 + END DO + DO j=2,jder + f(:,:) = f(:,:)*j + END DO + END SELECT + END SUBROUTINE ppval2z +!=========================================================================== + SUBROUTINE locintv0_old(sp, x, left) +! +! Locate the interval containing x +! Should be in [0, nints-1] +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: left + DOUBLE PRECISION :: hinv + INTEGER :: nints +! + nints = sp%nints +! +! Case of equidistant mesh + IF( sp%nlequid) THEN + hinv = sp%hinv + left = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nints-1)) + RETURN + END IF +! +! Non-equistant mesh + left = sp%left + DO + IF( left .EQ. nints ) THEN + left = nints-1 + EXIT + END IF + IF( left .LT. 0 ) THEN + left = 0 + EXIT + END IF + IF( x .LT. sp%knots(left+1) ) THEN + IF( x .GE. sp%knots(left) ) THEN + EXIT + ELSE + left = left-1 + END IF + ELSE + left = left+1 + END IF + END DO + IF(left .GT. 0 .AND. left .LT. nints) THEN + sp%left = left + END IF + END SUBROUTINE locintv0_old +! +!=========================================================================== + SUBROUTINE locintv1_old(sp, x, left) +! +! Locate the intervals left(:) containing x(:) +! Should be in [0, nints-1] +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: x(:) + INTEGER, INTENT(out) :: left(:) + DOUBLE PRECISION :: hinv + INTEGER :: nints, i +! +! Case of equidistant mesh + nints = sp%nints + IF( sp%nlequid) THEN + hinv = sp%hinv + left(:) = MAX(0,MIN(FLOOR((x(:)-sp%knots(0))*hinv),nints-1)) + RETURN + END IF +! +! Non-equistant mesh + DO i=1,SIZE(x) + CALL locintv0_old(sp, x(i), left(i)) + END DO + END SUBROUTINE locintv1_old +! +!=========================================================================== + SUBROUTINE locintv0(sp, x, left) +! +! Locate the interval containing x +! Should be in [0, nints-1] +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: left + DOUBLE PRECISION :: hinv + INTEGER :: l, nf, nints +! + nints = sp%nints +! +! Case of equidistant mesh + IF( sp%nlequid) THEN + hinv = sp%hinv + left = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nints-1)) + RETURN + END IF +! +! Non-equistant mesh + hinv = sp%hinv + nf = SIZE(sp%fmap) - 1 + l = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nf-1)) ! left on fine mesh + left = sp%fmap(l) + IF( x.GE.sp%knots(left+1) ) left = MIN(left+1,nints-1) + END SUBROUTINE locintv0 +! +!=========================================================================== + SUBROUTINE locintv1(sp, x, left) +! +! Locate the intervals left(:) containing x(:) +! Should be in [0, nints-1] +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: x(:) + INTEGER, INTENT(out) :: left(:) + INTEGER :: l(SIZE(x)) + DOUBLE PRECISION :: hinv + INTEGER :: npt, nf, nints, i +! + npt = SIZE(x) +! +! Case of equidistant mesh + nints = sp%nints + IF( sp%nlequid) THEN + hinv = sp%hinv + left(1:npt) = MAX(0,MIN(FLOOR((x(1:npt)-sp%knots(0))*hinv),nints-1)) + RETURN + END IF +! +! Non-equistant mesh + hinv = sp%hinv + nf = SIZE(sp%fmap) - 1 + l(:) = MAX(0,MIN(FLOOR((x(:)-sp%knots(0))*hinv),nf-1)) ! left on fine mesh + left(1:npt) = sp%fmap(l(1:npt)) + WHERE( x.GE.sp%knots(left+1) ) left = MIN(left+1,nints-1) + END SUBROUTINE locintv1 +! +!=========================================================================== + SUBROUTINE destroy_sp1d(sp) +! +! Clean up 1d spline object +! + TYPE(spline1d) :: sp +! + IF( ASSOCIATED(sp%knots) ) DEALLOCATE (sp%knots) + IF( ASSOCIATED(sp%val0) ) DEALLOCATE (sp%val0) + IF( ASSOCIATED(sp%valc) ) DEALLOCATE (sp%valc) + IF( ASSOCIATED(sp%gausx) ) DEALLOCATE (sp%gausx) + IF( ASSOCIATED(sp%gausw) ) DEALLOCATE (sp%gausw) + IF( ASSOCIATED(sp%intspl) ) DEALLOCATE (sp%intspl) + IF( ASSOCIATED(sp%ppform) ) DEALLOCATE (sp%ppform) + IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE (sp%ppformz) + IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) + IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) + IF( ASSOCIATED(sp%fmap) ) DEALLOCATE(sp%fmap) +! + CALL destroy(sp%mat) + CALL destroy(sp%matp) + CALL destroy_dftmap(sp%dft) + END SUBROUTINE destroy_sp1d +! +!=========================================================================== + SUBROUTINE destroy_dftmap(m) +! +! Clean up DFTMAP +! + TYPE(dftmap) :: m +! + IF(ASSOCIATED(m%coefs)) DEALLOCATE(m%coefs) + END SUBROUTINE destroy_dftmap +!=========================================================================== + SUBROUTINE destroy_sp2d(sp) +! +! Clean up 2d spline object +! + TYPE(spline2d) :: sp +! + IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) + IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz) + IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) + IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) + CALL destroy_sp1d(sp%sp1) + CALL destroy_sp1d(sp%sp2) + END SUBROUTINE destroy_sp2d +!=========================================================================== + SUBROUTINE destroy_sp2d1d(sp) +! +! Clean up 2d1d spline object +! + TYPE(spline2d1d) :: sp +! + IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) + IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE (sp%ppformz) + IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) + IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) + CALL destroy_sp2d(sp%sp12) + CALL destroy_sp1d(sp%sp3) + END SUBROUTINE destroy_sp2d1d +! +!=========================================================================== + SUBROUTINE calc_integ0(sp, finteg) +! +! Compute integral of splines +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(out) :: finteg(0:) + DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:), fun(:,:) + DOUBLE PRECISION :: x1, x2 + INTEGER :: dim, nx, nidbas, ng, i, ig, j, jj, left +! + CALL get_dim(sp, dim, nx, nidbas) + ng = MAX(2, (nidbas+2)/2) + ALLOCATE(xg(ng), wg(ng), fun(0:nidbas,1)) + fun = 0.0d0 + finteg = 0.0d0 + DO i=1,nx ! Loop thru the intervals + left = i + x1 = sp%knots(i-1) + x2 = sp%knots(i) + CALL gauleg(x1, x2, xg, wg, ng) + DO ig=1,ng ! Loop thru Gauss points + CALL basfun(xg(ig), sp, fun, i) + left = i-1 + DO j=0,nidbas ! Loop thru the splines [left:left+nidbas] + jj = left+j ! in this interval + IF( sp%period ) jj = MODULO(left+j, nx) + finteg(jj) = finteg(jj) + wg(ig)*fun(j,1) + END DO + END DO + END DO + DEALLOCATE(xg, wg, fun) + END SUBROUTINE calc_integ0 +!=========================================================================== + SUBROUTINE calc_integn(sp, finteg) +! +! Compute integrals = Int( x^a \Lambda_j(x) ), a=0,1,... n +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(out) :: finteg(0:,0:) + DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:), fun(:,:) + DOUBLE PRECISION :: x1, x2, xpow + INTEGER :: dim, nx, nidbas, ng, i, ig, j, k, jj, left + INTEGER :: nord +! + nord = SIZE(finteg,2)-1 + CALL get_dim(sp, dim, nx, nidbas) + ng = MAX(2, (nidbas+nord+2)/2) + ALLOCATE(xg(ng), wg(ng), fun(0:nidbas,1)) + fun = 0.0d0 + finteg = 0.0d0 + DO i=1,nx ! Loop thru the intervals + left = i + x1 = sp%knots(i-1) + x2 = sp%knots(i) + CALL gauleg(x1, x2, xg, wg, ng) + DO ig=1,ng ! Loop thru Gauss points + CALL basfun(xg(ig), sp, fun, i) + left=i-1 + DO j=0,nidbas ! Loop thru the splines [left:left+nidbas] + jj = left+j ! in this interval + IF( sp%period ) jj = MODULO(left+j, nx) + xpow = wg(ig)*fun(j,1) + DO k=0,nord + finteg(jj,k) = finteg(jj,k) + xpow + xpow = xpow*xg(ig) + END DO + END DO + END DO + END DO + DEALLOCATE(xg, wg, fun) + END SUBROUTINE calc_integn +!=========================================================================== +! + DOUBLE PRECISION FUNCTION fintg1(sp, c) +! +! Integral of 1d function from its spline coefs c. +! + TYPE(spline1d) :: sp + DOUBLE PRECISION, INTENT(in) :: c(0:) + INTEGER :: dim + dim = sp%dim + fintg1 = DOT_PRODUCT(sp%intspl(0:dim-1), c(0:dim-1)) + END FUNCTION fintg1 +!=========================================================================== + DOUBLE PRECISION FUNCTION fintg2(sp, c) +! +! Integral of 2d function from its spline coefs c. +! + TYPE(spline2d) :: sp + DOUBLE PRECISION, INTENT(in) :: c(0:,0:) + INTEGER :: dim1, dim2, i, j + dim1 = sp%sp1%dim + dim2 = sp%sp2%dim + fintg2 = 0.0d0 + DO j=0,dim2-1 + DO i=0,dim1-1 + fintg2 = fintg2 + c(i,j)*sp%sp1%intspl(i)*sp%sp2%intspl(j) + END DO + END DO + END FUNCTION fintg2 +!=========================================================================== + SUBROUTINE gridval2d_2d(sp, xp, yp, fp, jder, c, ppform) +! +! Compute values or jder-th dervivative of f(x,y) from ppform +! of spline sp. Recompute the ppform if the optional spline +! coefficients c are given. +! +! F(I,J) = F(X(I), Y(J)) +! + TYPE(spline2d), INTENT(inout) :: sp + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp + DOUBLE PRECISION, DIMENSION(:,:), INTENT(out) :: fp + INTEGER, INTENT(in) :: jder(2) + DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c + DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform +! + INTEGER :: d1, d2, k1, k2, n1, n2 + DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:) + DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) + DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) + INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) + INTEGER :: i, j, k, ii, jj + LOGICAL :: nlppform +! + d1 = sp%sp1%dim + d2 = sp%sp2%dim + k1 = sp%sp1%order + k2 = sp%sp2%order + n1 = sp%sp1%nints + n2 = sp%sp2%nints + nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform +! +! Compute PPFORMM/BCOEFS if spline coefs are passed +! + IF( PRESENT(c) ) THEN + IF( nlppform ) THEN + ALLOCATE(work(d2,k1,n1)) + CALL topp1(sp%sp1, c , work) + IF(PRESENT(ppform)) THEN + CALL topp2(sp%sp2, work, ppform) + ELSE + IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) + ALLOCATE(sp%ppform(k1,n1,k2,n2)) + CALL topp2(sp%sp2, work, sp%ppform) + END IF + DEALLOCATE(work) + ELSE + IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) + ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2))) + sp%bcoefs = c + END IF + END IF +! +! Applly periodicity if required +! + IF( sp%sp1%period ) THEN ! ** Applly periodicity ** + x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) + ELSE + x = xp + END IF + IF( sp%sp2%period ) THEN ! ** Applly periodicity ** + y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) + ELSE + y = yp + END IF +! +! Locate interval containing (x,y) +! + CALL locintv(sp%sp1, x, leftx) + CALL locintv(sp%sp2, y, lefty) +! +! Compute function/derivatives +! + IF( nlppform ) THEN ! using PP form + ALLOCATE(temp(k2)) + DO j=1,SIZE(y) + DO i=1,SIZE(x) + IF(PRESENT(ppform)) THEN + CALL ppval(sp%sp1, x(i), ppform(:,leftx(i)+1,:,lefty(j)+1),& + & leftx(i), jder(1), temp) + ELSE + CALL ppval(sp%sp1, x(i), sp%ppform(:,leftx(i)+1,:,lefty(j)+1),& + & leftx(i), jder(1), temp) + END IF + CALL ppval(sp%sp2, y(j), temp, lefty(j), jder(2), fp(i,j)) + END DO + END DO + DEALLOCATE(temp) + ELSE ! using spline expansion + ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) + fp = 0.0d0 + DO j=1,SIZE(y) + CALL basfun(y(j), sp%sp2, funy, lefty(j)+1) + DO i=1,SIZE(x) + CALL basfun(x(i), sp%sp1, funx, leftx(i)+1) + DO jj=1,k2 + DO ii=1,k1 + fp(i,j) = fp(i,j) + sp%bcoefs(leftx(i)+ii,lefty(j)+jj) * & + & funx(ii,jder(1))*funy(jj,jder(2)) + END DO + END DO + END DO + END DO + DEALLOCATE(funx, funy) + END IF + END SUBROUTINE gridval2d_2d +!=========================================================================== + SUBROUTINE gridval2d1d_3d(sp, xp, yp, zp, fp, jder, c, ppform) +! +! Compute values or jder-th dervivative of f(x,y,z) from spline +! coefficients (nlppform=.false.) or ppform (nlppform=.true.) +! +! F(I,J,K) = F(X(I), Y(J), Z(K)) +! + TYPE(spline2d1d), TARGET :: sp + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp, zp + DOUBLE PRECISION, DIMENSION(:,:,:), INTENT(out) :: fp + INTEGER, INTENT(in) :: jder(3) + DOUBLE PRECISION, DIMENSION(:,:,:), OPTIONAL, INTENT(in) :: c + DOUBLE PRECISION, DIMENSION(:,:,:,:,:,:), OPTIONAL :: ppform +! + TYPE(spline2d), POINTER :: sp2 + DOUBLE PRECISION, ALLOCATABLE :: work1(:,:,:,:), work2(:,:,:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: temp1(:,:), temp2(:) + DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:), funz(:,:) + DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)), z(SIZE(zp)) + INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)), leftz(SIZE(zp)) + INTEGER :: d1, d2, d3, k1, k2, k3, n1, n2, n3 + INTEGER :: ipx, ipy, ipz, k, ii, jj, kk + LOGICAL :: nlppform +!-------------------------------------------------------------------------------- +! 1. Prologue +! + sp2 => sp%sp12 + d1 = sp2%sp1%dim + d2 = sp2%sp2%dim + d3 = sp%sp3%dim + k1 = sp2%sp1%order + k2 = sp2%sp2%order + k3 = sp%sp3%order + n1 = sp2%sp1%nints + n2 = sp2%sp2%nints + n3 = sp%sp3%nints + nlppform = sp2%sp1%nlppform .OR. sp2%sp2%nlppform .OR. sp%sp3%nlppform +! +! Applly periodicity if required + IF( sp2%sp1%period ) THEN + x = sp2%sp1%knots(0) + MODULO(xp-sp2%sp1%knots(0), sp2%sp1%lperiod) + ELSE + x = xp + END IF + IF( sp2%sp2%period ) THEN + y = sp2%sp2%knots(0) + MODULO(yp-sp2%sp2%knots(0), sp2%sp2%lperiod) + ELSE + y = yp + END IF + IF( sp%sp3%period ) THEN + z = sp%sp3%knots(0) + MODULO(zp-sp%sp3%knots(0), sp%sp3%lperiod) + ELSE + z = zp + END IF +! +! Locate interval containing (x,y,z) + CALL locintv(sp2%sp1, x, leftx) + CALL locintv(sp2%sp2, y, lefty) + CALL locintv(sp%sp3, z, leftz) +!-------------------------------------------------------------------------------- +! 2. Using PPFORM +! + IF( nlppform ) THEN +! +! Compute PPFORM from BCOEF + IF( PRESENT(c) ) THEN + ALLOCATE(work2(d3,k1,n1,k2,n2)) + ALLOCATE(work1(d2,d3,k1,n1)) + CALL topp2(sp2%sp1, c, work1) + CALL topp3(sp2%sp2, work1, work2) + DEALLOCATE(work1) + IF( PRESENT(ppform) )THEN + CALL topp4(sp%sp3, work2, ppform) + ELSE + IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) + ALLOCATE(sp%ppform(k1,n1,k2,n2,k3,n3)) + CALL topp4(sp%sp3, work2, sp%ppform) + END IF + DEALLOCATE(work2) + END IF +! +! Compute function/derivatives + ALLOCATE(temp1(k2,k3)) + ALLOCATE(temp2(k3)) + DO ipz=1,SIZE(z) + DO ipy=1,SIZE(y) + DO ipx=1,SIZE(x) + IF(PRESENT(ppform)) THEN + CALL ppval(sp2%sp1, x(ipx), & + & ppform(:,leftx(ipx)+1,:,lefty(ipy)+1,:,leftz(ipz)+1),& + & leftx(ipx), jder(1), temp1) + ELSE + CALL ppval(sp2%sp1, x(ipx), & + & sp%ppform(:,leftx(ipx)+1,:,lefty(ipy)+1,:,leftz(ipz)+1),& + & leftx(ipx), jder(1), temp1) + END IF + CALL ppval(sp2%sp2, y(ipy), temp1, lefty(ipy), jder(2), & + & temp2) + CALL ppval(sp%sp3, z(ipz), temp2, leftz(ipz), jder(3), & + & fp(ipx,ipy,ipz)) + END DO + END DO + END DO + DEALLOCATE(temp1) + DEALLOCATE(temp2) +!-------------------------------------------------------------------------------- +! 3. Using spline expansion +! + ELSE + IF( PRESENT(c) ) THEN + IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) + ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2),SIZE(c,3))) + sp%bcoefs = c + END IF +! +! Compute function/derivatives + ALLOCATE(funx(1:k1,0:jder(1))) + ALLOCATE(funy(1:k2,0:jder(2))) + ALLOCATE(funz(1:k3,0:jder(3))) + fp = 0.0d0 + DO ipz=1,SIZE(z) + CALL basfun(z(ipz), sp%sp3, funz, leftz(ipz)+1) + DO ipy=1,SIZE(y) + CALL basfun(y(ipy), sp2%sp2, funy, lefty(ipy)+1) + DO ipx=1,SIZE(x) + CALL basfun(x(ipx), sp2%sp1, funx, leftx(ipx)+1) + DO kk=1,k3 + DO jj=1,k2 + DO ii=1,k1 + fp(ipx,ipy,ipz) = fp(ipx,ipy,ipz) + & + & sp%bcoefs(leftx(ipx)+ii,lefty(ipy)+jj,leftz(ipz)+kk) * & + & funx(ii,jder(1)) * funy(jj,jder(2)) * funz(kk,jder(3)) + END DO + END DO + END DO + END DO + END DO + END DO + DEALLOCATE(funx, funy, funz) + END IF + END SUBROUTINE gridval2d1d_3d +!=========================================================================== + SUBROUTINE gridval2d1d_1d(sp, xp, yp, zp, fp, jder, c, ppform) +! +! Compute values or jder-th dervivative of f(x,y,z) from spline +! coefficients (nlppform=.false.) or ppform (nlppform=.true.) +! +! F(I) = F(X(I),Y(I),Z(I)) +! + TYPE(spline2d1d), TARGET :: sp + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp, zp + DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: fp + INTEGER, INTENT(in) :: jder(3) + DOUBLE PRECISION, DIMENSION(:,:,:), OPTIONAL, INTENT(in) :: c + DOUBLE PRECISION, DIMENSION(:,:,:,:,:,:), OPTIONAL :: ppform +! + TYPE(spline2d), POINTER :: sp2 + DOUBLE PRECISION, ALLOCATABLE :: work1(:,:,:,:), work2(:,:,:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: temp1(:,:), temp2(:) + DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:), funz(:,:) + DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)), z(SIZE(zp)) + INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)), leftz(SIZE(zp)) + INTEGER :: d1, d2, d3, k1, k2, k3, n1, n2, n3 + INTEGER :: np, ip, ii, jj, kk + LOGICAL :: nlppform +!-------------------------------------------------------------------------------- +! 1. Prologue +! + sp2 => sp%sp12 + d1 = sp2%sp1%dim + d2 = sp2%sp2%dim + d3 = sp%sp3%dim + k1 = sp2%sp1%order + k2 = sp2%sp2%order + k3 = sp%sp3%order + n1 = sp2%sp1%nints + n2 = sp2%sp2%nints + n3 = sp%sp3%nints + np = SIZE(xp) + nlppform = sp2%sp1%nlppform .OR. sp2%sp2%nlppform .OR. sp%sp3%nlppform +! +! Applly periodicity if required + IF( sp2%sp1%period ) THEN + x = sp2%sp1%knots(0) + MODULO(xp-sp2%sp1%knots(0), sp2%sp1%lperiod) + ELSE + x = xp + END IF + IF( sp2%sp2%period ) THEN + y = sp2%sp2%knots(0) + MODULO(yp-sp2%sp2%knots(0), sp2%sp2%lperiod) + ELSE + y = yp + END IF + IF( sp%sp3%period ) THEN + z = sp%sp3%knots(0) + MODULO(zp-sp%sp3%knots(0), sp%sp3%lperiod) + ELSE + z = zp + END IF +! +! Locate interval containing (x,y,z) + CALL locintv(sp2%sp1, x, leftx) + CALL locintv(sp2%sp2, y, lefty) + CALL locintv(sp%sp3, z, leftz) +!-------------------------------------------------------------------------------- +! 2. Using PPFORM +! + IF( nlppform ) THEN +! +! Compute PPFORM from BCOEF + IF( PRESENT(c) ) THEN + ALLOCATE(work2(d3,k1,n1,k2,n2)) + ALLOCATE(work1(d2,d3,k1,n1)) + CALL topp2(sp2%sp1, c, work1) + CALL topp3(sp2%sp2, work1, work2) + DEALLOCATE(work1) + IF( PRESENT(ppform) )THEN + CALL topp4(sp%sp3, work2, ppform) + ELSE + IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) + ALLOCATE(sp%ppform(k1,n1,k2,n2,k3,n3)) + CALL topp4(sp%sp3, work2, sp%ppform) + END IF + DEALLOCATE(work2) + END IF +! +! Compute function/derivatives + ALLOCATE(temp1(k2,k3)) + ALLOCATE(temp2(k3)) + DO ip=1,np + IF(PRESENT(ppform)) THEN + CALL ppval(sp2%sp1, x(ip), & + & ppform(:,leftx(ip)+1,:,lefty(ip)+1,:,leftz(ip)+1),& + & leftx(ip), jder(1), temp1) + ELSE + CALL ppval(sp2%sp1, x(ip), & + & sp%ppform(:,leftx(ip)+1,:,lefty(ip)+1,:,leftz(ip)+1),& + & leftx(ip), jder(1), temp1) + END IF + CALL ppval(sp2%sp2, y(ip), temp1, lefty(ip), jder(2), temp2) + CALL ppval(sp%sp3, z(ip), temp2, leftz(ip), jder(3), fp(ip)) + END DO + DEALLOCATE(temp1) + DEALLOCATE(temp2) +!-------------------------------------------------------------------------------- +! 3. Using spline expansion +! + ELSE + IF( PRESENT(c) ) THEN + IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) + ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2),SIZE(c,3))) + sp%bcoefs = c + END IF +! +! Compute function/derivatives + ALLOCATE(funx(1:k1,0:jder(1))) + ALLOCATE(funy(1:k2,0:jder(2))) + ALLOCATE(funz(1:k3,0:jder(3))) + fp = 0.0d0 + DO ip=1,np + CALL basfun(x(ip), sp2%sp1, funx, leftx(ip)+1) + CALL basfun(y(ip), sp2%sp2, funy, lefty(ip)+1) + CALL basfun(z(ip), sp%sp3, funz, leftz(ip)+1) + DO kk=1,k3 + DO jj=1,k2 + DO ii=1,k1 + fp(ip) = fp(ip) + & + & sp%bcoefs(leftx(ip)+ii,lefty(ip)+jj,leftz(ip)+kk) * & + & funx(ii,jder(1))*funy(jj,jder(2))*funz(kk,jder(3)) + END DO + END DO + END DO + END DO + DEALLOCATE(funx, funy, funz) + END IF + END SUBROUTINE gridval2d1d_1d +!=========================================================================== + SUBROUTINE gridval2d(sp, xp, yp, fp, jder, c, ppform) +! +! Compute values or jder-th dervivative of f(x,y) from ppform +! of spline sp. Recompute the ppform if the optional spline +! coefficients c are given. +! +! F = F(X, Y) +! + TYPE(spline2d), INTENT(inout) :: sp + DOUBLE PRECISION, INTENT(in) :: xp, yp + DOUBLE PRECISION, INTENT(out) :: fp + INTEGER, INTENT(in) :: jder(2) + DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c + DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform +! + INTEGER :: d1, d2, k1, k2, n1, n2 + DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:) + DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) + DOUBLE PRECISION :: x, y + INTEGER :: leftx, lefty + INTEGER :: i, j, k, ii, jj + LOGICAL :: nlppform +! + d1 = sp%sp1%dim + d2 = sp%sp2%dim + k1 = sp%sp1%order + k2 = sp%sp2%order + n1 = sp%sp1%nints + n2 = sp%sp2%nints + nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform +! +! Compute PPFORM/BCOEFS if spline coefs are passed +! + IF( PRESENT(c)) THEN + IF( nlppform ) THEN + ALLOCATE(work(d2,k1,n1)) + CALL topp1(sp%sp1, c , work) + IF(PRESENT(ppform)) THEN + CALL topp2(sp%sp2, work, ppform) + ELSE + IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) + ALLOCATE(sp%ppform(k1,n1,k2,n2)) + CALL topp2(sp%sp2, work, sp%ppform) + END IF + DEALLOCATE(work) + ELSE + IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) + ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2))) + sp%bcoefs = c + END IF + END IF +! +! Applly periodicity if required +! + IF( sp%sp1%period ) THEN ! ** Applly periodicity ** + x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) + ELSE + x = xp + END IF + IF( sp%sp2%period ) THEN ! ** Applly periodicity ** + y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) + ELSE + y = yp + END IF +! +! Locate the interval containing x, y +! + CALL locintv(sp%sp1, x, leftx) + CALL locintv(sp%sp2, y, lefty) +! +! Compute function/derivatives +! + IF( nlppform ) THEN ! using PP form + ALLOCATE(temp(k2)) + IF(PRESENT(ppform)) THEN + CALL ppval(sp%sp1, x, ppform(:,leftx+1,:,lefty+1),& + & leftx, jder(1), temp) + ELSE + CALL ppval(sp%sp1, x, sp%ppform(:,leftx+1,:,lefty+1),& + & leftx, jder(1), temp) + END IF + CALL ppval(sp%sp2, y, temp, lefty, jder(2), fp) + DEALLOCATE(temp) + ELSE ! using spline expansion + ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) + fp = 0.0d0 + CALL basfun(x, sp%sp1, funx, leftx+1) + CALL basfun(y, sp%sp2, funy, lefty+1) + DO jj=1,k2 + DO ii=1,k1 + fp = fp + & + & funy(jj,jder(2))*sp%bcoefs(leftx+ii,lefty+jj)* & + & funx(ii,jder(1)) + END DO + END DO + DEALLOCATE(funx, funy) + END IF + END SUBROUTINE gridval2d +!=========================================================================== + SUBROUTINE gridval2d_1d(sp, xp, yp, fp, jder, c, ppform) +! +! Compute values or jder-th dervivative of f(x,y) from ppform +! of spline sp. Recompute the ppform if the optional spline +! coefficients c are given. +! +! F(I) = F(X(I), Y(I)) +! + TYPE(spline2d), INTENT(inout) :: sp + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp + DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: fp + INTEGER, INTENT(in) :: jder(2) + DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c + DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform +! + INTEGER :: d1, d2, k1, k2, n1, n2, np + DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:) + DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) + DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) + INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) + INTEGER :: i, j, k, ii, jj + LOGICAL :: nlppform +! + d1 = sp%sp1%dim + d2 = sp%sp2%dim + k1 = sp%sp1%order + k2 = sp%sp2%order + n1 = sp%sp1%nints + n2 = sp%sp2%nints + nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform +! +! Compute PPFORM/BCOEFS if spline coefs are passed +! + IF( PRESENT(c)) THEN + IF( nlppform ) THEN + ALLOCATE(work(d2,k1,n1)) + CALL topp1(sp%sp1, c , work) + IF(PRESENT(ppform)) THEN + CALL topp2(sp%sp2, work, ppform) + ELSE + IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform) + ALLOCATE(sp%ppform(k1,n1,k2,n2)) + CALL topp2(sp%sp2, work, sp%ppform) + END IF + DEALLOCATE(work) + ELSE + IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs) + ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2))) + sp%bcoefs = c + END IF + END IF +! +! Applly periodicity if required +! + np = SIZE(xp) + IF( sp%sp1%period ) THEN ! ** Applly periodicity ** + x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) + ELSE + x = xp + END IF + IF( sp%sp2%period ) THEN ! ** Applly periodicity ** + y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) + ELSE + y = yp + END IF +! +! Locate the interval containing x, y +! + CALL locintv(sp%sp1, x, leftx) + CALL locintv(sp%sp2, y, lefty) +! +! Compute function/derivatives +! + IF( nlppform ) THEN ! using PP form + ALLOCATE(temp(k2)) + DO i=1,np + IF(PRESENT(ppform)) THEN + CALL ppval(sp%sp1, x(i), ppform(:,leftx(i)+1,:,lefty(i)+1),& + & leftx(i), jder(1), temp) + ELSE + CALL ppval(sp%sp1, x(i), sp%ppform(:,leftx(i)+1,:,lefty(i)+1),& + & leftx(i), jder(1), temp) + END IF + CALL ppval(sp%sp2, y(i), temp, lefty(i), jder(2), fp(i)) + END DO + DEALLOCATE(temp) + ELSE ! using spline expansion + ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) + fp = 0.0d0 + DO i=1,np + CALL basfun(x(i), sp%sp1, funx, leftx(i)+1) + CALL basfun(y(i), sp%sp2, funy, lefty(i)+1) + DO jj=1,k2 + DO ii=1,k1 + fp(i) = fp(i) + & + & funy(jj,jder(2))*sp%bcoefs(leftx(i)+ii,lefty(i)+jj)* & + & funx(ii,jder(1)) + END DO + END DO + END DO + DEALLOCATE(funx, funy) + END IF + END SUBROUTINE gridval2d_1d +!=========================================================================== + SUBROUTINE gridval2dz(sp, xp, yp, fp, jder, c, ppformz) +! +! Compute values or jder-th dervivative of f(x,y) from ppform +! of spline sp. Recompute the ppform if the optional spline +! coefficients c are given. +! +! F = F(X, Y) +! + TYPE(spline2d), INTENT(inout) :: sp + DOUBLE PRECISION, INTENT(in) :: xp, yp + DOUBLE COMPLEX, INTENT(out) :: fp + INTEGER, INTENT(in) :: jder(2) + DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c + DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz +! + INTEGER :: d1, d2, k1, k2, n1, n2 + DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:) + DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) + DOUBLE PRECISION :: x, y + INTEGER :: leftx, lefty + INTEGER :: i, j, k, ii, jj + LOGICAL :: nlppform +! + d1 = sp%sp1%dim + d2 = sp%sp2%dim + k1 = sp%sp1%order + k2 = sp%sp2%order + n1 = sp%sp1%nints + n2 = sp%sp2%nints + nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform +! +! Compute PPFORM/BCOEFS if spline coefs are passed +! + IF( PRESENT(c)) THEN + IF( nlppform ) THEN + ALLOCATE(work(d2,k1,n1)) + CALL topp1z(sp%sp1, c , work) + IF(PRESENT(ppformz)) THEN + CALL topp2z(sp%sp2, work, ppformz) + ELSE + IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz) + ALLOCATE(sp%ppformz(k1,n1,k2,n2)) + CALL topp2z(sp%sp2, work, sp%ppformz) + END IF + DEALLOCATE(work) + ELSE + IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) + ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2))) + sp%bcoefsc = c + END IF + END IF +! +! Applly periodicity if required +! + IF( sp%sp1%period ) THEN ! ** Applly periodicity ** + x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) + ELSE + x = xp + END IF + IF( sp%sp2%period ) THEN ! ** Applly periodicity ** + y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) + ELSE + y = yp + END IF +! +! Locate the interval containing x, y +! + CALL locintv(sp%sp1, x, leftx) + CALL locintv(sp%sp2, y, lefty) +! +! Compute function/derivatives +! + IF( nlppform ) THEN ! using PP form + ALLOCATE(temp(k2)) + IF(PRESENT(ppformz)) THEN + CALL ppval(sp%sp1, x, ppformz(:,leftx+1,:,lefty+1),& + & leftx, jder(1), temp) + ELSE + CALL ppval(sp%sp1, x, sp%ppformz(:,leftx+1,:,lefty+1),& + & leftx, jder(1), temp) + END IF + CALL ppval(sp%sp2, y, temp, lefty, jder(2), fp) + DEALLOCATE(temp) + ELSE ! using spline expansion + ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) + fp = (0.0d0,0.0d0) + CALL basfun(x, sp%sp1, funx, leftx+1) + CALL basfun(y, sp%sp2, funy, lefty+1) + DO jj=1,k2 + DO ii=1,k1 + fp = fp + & + & funy(jj,jder(2))*sp%bcoefsc(leftx+ii,lefty+jj)* & + & funx(ii,jder(1)) + END DO + END DO + DEALLOCATE(funx, funy) + END IF + END SUBROUTINE gridval2dz +!=========================================================================== + SUBROUTINE gridval2d_1dz(sp, xp, yp, fp, jder, c, ppformz) +! +! Compute values or jder-th dervivative of f(x,y) from ppform +! of spline sp. Recompute the ppform if the optional spline +! coefficients c are given. +! +! F(I) = F(X(I), Y(I)) +! + TYPE(spline2d), INTENT(inout) :: sp + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp + DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: fp + INTEGER, INTENT(in) :: jder(2) + DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c + DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz +! + INTEGER :: d1, d2, k1, k2, n1, n2, np + DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:) + DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) + DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) + INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) + INTEGER :: i, j, k, ii, jj + LOGICAL :: nlppform +! + d1 = sp%sp1%dim + d2 = sp%sp2%dim + k1 = sp%sp1%order + k2 = sp%sp2%order + n1 = sp%sp1%nints + n2 = sp%sp2%nints + nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform +! +! Compute PPFORM/BCOEFS if spline coefs are passed +! + IF( PRESENT(c)) THEN + IF( nlppform ) THEN + ALLOCATE(work(d2,k1,n1)) + CALL topp1z(sp%sp1, c , work) + IF(PRESENT(ppformz)) THEN + CALL topp2z(sp%sp2, work, ppformz) + ELSE + IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz) + ALLOCATE(sp%ppformz(k1,n1,k2,n2)) + CALL topp2z(sp%sp2, work, sp%ppformz) + END IF + DEALLOCATE(work) + ELSE + IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) + ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2))) + sp%bcoefsc = c + END IF + END IF +! +! Applly periodicity if required +! + np = SIZE(xp) + IF( sp%sp1%period ) THEN ! ** Applly periodicity ** + x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) + ELSE + x = xp + END IF + IF( sp%sp2%period ) THEN ! ** Applly periodicity ** + y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) + ELSE + y = yp + END IF +! +! Locate the interval containing x, y +! + CALL locintv(sp%sp1, x, leftx) + CALL locintv(sp%sp2, y, lefty) +! +! Compute function/derivatives +! + IF( nlppform ) THEN ! using PP form + ALLOCATE(temp(k2)) + DO i=1,np + IF(PRESENT(ppformz)) THEN + CALL ppval(sp%sp1, x(i), ppformz(:,leftx(i)+1,:,lefty(i)+1),& + & leftx(i), jder(1), temp) + ELSE + CALL ppval(sp%sp1, x(i), sp%ppformz(:,leftx(i)+1,:,lefty(i)+1),& + & leftx(i), jder(1), temp) + END IF + CALL ppval(sp%sp2, y(i), temp, lefty(i), jder(2), fp(i)) + END DO + DEALLOCATE(temp) + ELSE ! using spline expansion + ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) + fp = (0.0d0,0.0d0) + DO i=1,np + CALL basfun(x(i), sp%sp1, funx, leftx(i)+1) + CALL basfun(y(i), sp%sp2, funy, lefty(i)+1) + DO jj=1,k2 + DO ii=1,k1 + fp(i) = fp(i) + & + & funy(jj,jder(2))*sp%bcoefsc(leftx(i)+ii,lefty(i)+jj)* & + & funx(ii,jder(1)) + END DO + END DO + END DO + DEALLOCATE(funx, funy) + END IF + END SUBROUTINE gridval2d_1dz +!=========================================================================== + SUBROUTINE gridval2d_2dz(sp, xp, yp, fp, jder, c, ppformz) +! +! Compute values or jder-th dervivative of f(x,y) from ppform +! of spline sp. Recompute the ppform if the optional spline +! coefficients c are given. +! +! F(I,J) = F(X(I), Y(J)) +! + TYPE(spline2d), INTENT(inout) :: sp + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(out) :: fp + INTEGER, INTENT(in) :: jder(2) + DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c + DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz +! + INTEGER :: d1, d2, k1, k2, n1, n2 + DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:) + DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:) + DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) + INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) + INTEGER :: i, j, k, ii, jj + LOGICAL :: nlppform +! + d1 = sp%sp1%dim + d2 = sp%sp2%dim + k1 = sp%sp1%order + k2 = sp%sp2%order + n1 = sp%sp1%nints + n2 = sp%sp2%nints + nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform +! +! Compute PPFORMM/BCOEFS if spline coefs are passed +! + IF( PRESENT(c) ) THEN + IF( nlppform ) THEN + ALLOCATE(work(d2,k1,n1)) + CALL topp1z(sp%sp1, c , work) + IF(PRESENT(ppformz)) THEN + CALL topp2z(sp%sp2, work, ppformz) + ELSE + IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz) + ALLOCATE(sp%ppformz(k1,n1,k2,n2)) + CALL topp2z(sp%sp2, work, sp%ppformz) + END IF + DEALLOCATE(work) + ELSE + IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc) + ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2))) + sp%bcoefsc = c + END IF + END IF +! +! Applly periodicity if required +! + IF( sp%sp1%period ) THEN ! ** Applly periodicity ** + x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod) + ELSE + x = xp + END IF + IF( sp%sp2%period ) THEN ! ** Applly periodicity ** + y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod) + ELSE + y = yp + END IF +! +! Locate interval containing (x,y) +! + CALL locintv(sp%sp1, x, leftx) + CALL locintv(sp%sp2, y, lefty) +! +! Compute function/derivatives +! + IF( nlppform ) THEN ! using PP form + ALLOCATE(temp(k2)) + DO j=1,SIZE(y) + DO i=1,SIZE(x) + IF(PRESENT(ppformz)) THEN + CALL ppval(sp%sp1, x(i), ppformz(:,leftx(i)+1,:,lefty(j)+1),& + & leftx(i), jder(1), temp) + ELSE + CALL ppval(sp%sp1, x(i), sp%ppformz(:,leftx(i)+1,:,lefty(j)+1),& + & leftx(i), jder(1), temp) + END IF + CALL ppval(sp%sp2, y(j), temp, lefty(j), jder(2), fp(i,j)) + END DO + END DO + DEALLOCATE(temp) + ELSE ! using spline expansion + ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2))) + fp = 0.0d0 + DO j=1,SIZE(y) + CALL basfun(y(j), sp%sp2, funy, lefty(j)+1) + DO i=1,SIZE(x) + CALL basfun(x(i), sp%sp1, funx, leftx(i)+1) + DO jj=1,k2 + DO ii=1,k1 + fp(i,j) = fp(i,j) + sp%bcoefsc(leftx(i)+ii,lefty(j)+jj) * & + & funx(ii,jder(1))*funy(jj,jder(2)) + END DO + END DO + END DO + END DO + DEALLOCATE(funx, funy) + END IF + END SUBROUTINE gridval2d_2dz +!=========================================================================== + SUBROUTINE calc_fftmass(spl, fftmat) +! +! Compute FT of mass matrix for periodic spline on equidistant mesh +! + TYPE(spline1d) :: spl + DOUBLE PRECISION, INTENT(out) :: fftmat(0:) +! + INTEGER :: dim, nx, nidbas, ngauss + DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:) + DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:,:) + INTEGER :: igauss, intv +! + CALL get_dim(spl, dim, nx, nidbas) + CALL get_gauss(spl, ngauss) + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) + ALLOCATE(ft_fun(0:nx-1,1)) +! +! Integrate on first interval + intv = 1 + CALL get_gauss(spl, ngauss, intv, xgauss, wgauss) + fftmat = 0.0d0 + DO igauss=1,ngauss + CALL ft_basfun(xgauss(igauss), spl, ft_fun, intv) + fftmat(:) = fftmat(:) + wgauss(igauss)*ft_fun(:,1)*CONJG(ft_fun(:,1)) + END DO +! + DEALLOCATE(ft_fun) + DEALLOCATE(xgauss, wgauss) + END SUBROUTINE calc_fftmass +!=========================================================================== + SUBROUTINE calc_fftmass_old(spl, fftmat) +! +! Compute FT of mass matrix for periodic spline on equidistant mesh +! + TYPE(spline1d) :: spl + DOUBLE PRECISION, INTENT(out) :: fftmat(0:) + INTEGER :: dim, nx, nidbas, ngauss + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), xgauss(:), wgauss(:), arow(:) + INTEGER :: i, j, k, igauss, intv + DOUBLE PRECISION :: pi, arg0, arg +! + CALL get_dim(spl, dim, nx, nidbas) + CALL get_gauss(spl, ngauss) + ALLOCATE(fun(0:nidbas,1)) ! Spline + ALLOCATE(xgauss(ngauss), wgauss(ngauss)) + ALLOCATE(arow(0:nidbas)) +! +! Assemble the first row of the upper mass matrix + arow = 0.0d0 + intv = 1 ! Get splines on Gauss points in first interval + CALL get_gauss(spl, ngauss, intv, xgauss, wgauss) + DO igauss=1,ngauss + CALL basfun(xgauss(igauss), spl, fun, intv) + DO i=0,nidbas + DO j=0,nidbas-i + arow(i)=arow(i)+fun(j,1)*fun(i+j,1)*wgauss(igauss) + END DO + END DO + END DO +! +! Fourier transform + pi = 4.0d0*ATAN(1.0d0) + arg0 = 2.0d0*pi/REAL(nx,8) + DO k=0,nx-1 + fftmat(k) = arow(0) + arg = k*arg0 + DO i=1,nidbas + fftmat(k) = fftmat(k) + 2.0d0*arow(i)*COS(i*arg) + END DO + END DO +! + DEALLOCATE(arow) + DEALLOCATE(fun) + DEALLOCATE(xgauss, wgauss) + END SUBROUTINE calc_fftmass_old +!=========================================================================== + SUBROUTINE CompMassMatrix1(sp1, sp2, a, b, MassMatrix) + +! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over +! interval [a, b] + + IMPLICIT NONE + + TYPE(spline1d), INTENT(IN) :: sp1, sp2 + DOUBLE PRECISION, INTENT(IN) :: a, b + DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMatrix + + INTEGER :: ndim1, n1, nidbas1 + INTEGER :: ndim2, n2, nidbas2 + INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2 + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg + DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2 + +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(sp1, ndim1, n1, nidbas1) + CALL get_dim(sp2, ndim2, n2, nidbas2) + +! PRINT "('In CompMassMatrix1')" +! PRINT "('sp1: dim, #intervals, degree', I, I, I)", ndim1, n1, nidbas1 +! PRINT "('sp2: dim, #intervals, degree', I, I, I)", ndim2, n2, nidbas2 + + ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives) + + IF (sp1%period) THEN + IF (sp2%period) THEN + ALLOCATE(MassMatrix(n1, n2)) + ELSE + ALLOCATE(MassMatrix(n1, ndim2)) + END IF + ELSE + IF (sp2%period) THEN + ALLOCATE(MassMatrix(ndim1, n2)) + ELSE + ALLOCATE(MassMatrix(ndim1, ndim2)) + END IF + END IF + +! +! Gauss quadature +! + ALLOCATE(allknots(0:n1+n2+3)) + CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint) + nint = nint-1 + + ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0) + ALLOCATE(xg(ngauss), wg(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + MassMatrix = 0.d0 + DO int = 1, nint + ! Get gauss abscissas and weights for current interval + CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss) + DO ig = 1, ngauss + CALL locintv(sp1, xg(ig), left1) + CALL locintv(sp2, xg(ig), left2) + CALL basfun(xg(ig), sp1, fun1, left1+1) + CALL basfun(xg(ig), sp2, fun2, left2+1) + DO k1 = 0, nidbas1 + IF (sp1%period) THEN + i1 = modulo(left1+1 + k1 -1, n1) +1 + ELSE + i1 = left1+1 + k1 + END IF + + DO k2 = 0, nidbas2 + IF (sp2%period) THEN + j2 = modulo(left2+1 + k2 -1, n2) +1 + ELSE + j2 = left2+1 + k2 + END IF + + MassMatrix(i1, j2) = MassMatrix(i1, j2) + wg(ig)*fun1(k1, 1)*fun2(k2, 1) + END DO + END DO + END DO + END DO + +!=========================================================================== +! 3.0 Epilogue +! + DEALLOCATE(xg, wg) + DEALLOCATE(fun1, fun2) + DEALLOCATE(allknots) + + END SUBROUTINE CompMassMatrix1 +!=========================================================================== + SUBROUTINE CompMassMatrix_gb(sp1, sp2, a, b, MassMatrix) + +! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over +! interval [a, b] + + IMPLICIT NONE + + TYPE(spline1d), INTENT(IN) :: sp1, sp2 + DOUBLE PRECISION, INTENT(IN) :: a, b + TYPE(gbmat) :: MassMatrix + + INTEGER :: ndim1, n1, nidbas1 + INTEGER :: ndim2, n2, nidbas2 + INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2 + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg + DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2 + DOUBLE PRECISION :: val + !=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(sp1, ndim1, n1, nidbas1) + CALL get_dim(sp2, ndim2, n2, nidbas2) + ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + ALLOCATE(allknots(0:n1+n2+3)) + CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint) + nint = nint-1 + + ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0) + ALLOCATE(xg(ngauss), wg(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO int = 1, nint + ! Get gauss abscissas and weights for current interval + CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss) + DO ig = 1, ngauss + CALL locintv(sp1, xg(ig), left1) + CALL locintv(sp2, xg(ig), left2) + CALL basfun(xg(ig), sp1, fun1, left1+1) + CALL basfun(xg(ig), sp2, fun2, left2+1) + DO k1 = 0, nidbas1 + IF (sp1%period) THEN + i1 = modulo(left1+1 + k1 -1, n1) +1 + ELSE + i1 = left1+1 + k1 + END IF + + DO k2 = 0, nidbas2 + IF (sp2%period) THEN + j2 = modulo(left2+1 + k2 -1, n2) +1 + ELSE + j2 = left2+1 + k2 + END IF + val = wg(ig)*fun1(k1, 1)*fun2(k2, 1) + CALL updtmat(MassMatrix, i1, j2, val) + END DO + END DO + END DO + END DO + +!=========================================================================== +! 3.0 Epilogue +! + DEALLOCATE(xg, wg) + DEALLOCATE(fun1, fun2) + DEALLOCATE(allknots) + + END SUBROUTINE CompMassMatrix_gb +!=========================================================================== + SUBROUTINE CompMassMatrix_zgb(sp1, sp2, a, b, MassMatrix) + +! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over +! interval [a, b] + + IMPLICIT NONE + + TYPE(spline1d), INTENT(IN) :: sp1, sp2 + DOUBLE PRECISION, INTENT(IN) :: a, b + TYPE(zgbmat) :: MassMatrix + + INTEGER :: ndim1, n1, nidbas1 + INTEGER :: ndim2, n2, nidbas2 + INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2 + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg + DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2 + DOUBLE COMPLEX :: val +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(sp1, ndim1, n1, nidbas1) + CALL get_dim(sp2, ndim2, n2, nidbas2) + ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + ALLOCATE(allknots(0:n1+n2+3)) + CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint) + nint = nint-1 + + ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0) + ALLOCATE(xg(ngauss), wg(ngauss)) +!=========================================================================== +! 2.0 Assembly loop +! + DO int = 1, nint + ! Get gauss abscissas and weights for current interval + CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss) + DO ig = 1, ngauss + CALL locintv(sp1, xg(ig), left1) + CALL locintv(sp2, xg(ig), left2) + CALL basfun(xg(ig), sp1, fun1, left1+1) + CALL basfun(xg(ig), sp2, fun2, left2+1) + DO k1 = 0, nidbas1 + IF (sp1%period) THEN + i1 = modulo(left1+1 + k1 -1, n1) +1 + ELSE + i1 = left1+1 + k1 + END IF + + DO k2 = 0, nidbas2 + IF (sp2%period) THEN + j2 = modulo(left2+1 + k2 -1, n2) +1 + ELSE + j2 = left2+1 + k2 + END IF + val = wg(ig)*fun1(k1, 1)*fun2(k2, 1) + CALL updtmat(MassMatrix, i1, j2, val) + END DO + END DO + END DO + END DO + +!=========================================================================== +! 3.0 Epilogue +! + DEALLOCATE(xg, wg) + DEALLOCATE(fun1, fun2) + DEALLOCATE(allknots) + + END SUBROUTINE CompMassMatrix_zgb +!=========================================================================== + SUBROUTINE sorted_merge(arr1, n1, arr2, n2, a, b, arrm, nm) + + IMPLICIT NONE + +! Peforms: +! 1) Merge of arrays arr1 & arr2 including boundary values a & b +! 2) Sorts the merged arrays keeping only values in [a, b] +! 3) Removes duplicates + + INTEGER, INTENT(IN) :: n1, n2 + INTEGER, INTENT(OUT) :: nm + + DOUBLE PRECISION, INTENT(IN) :: a, b + DOUBLE PRECISION, INTENT(IN) :: arr1(n1), arr2(n2) + DOUBLE PRECISION, DIMENSION(*), INTENT(OUT) :: arrm + + INTEGER :: i, j + +! Merge the two arrays including a & b + nm = n1 + n2 + 2 + arrm(1:nm) = (/ a, arr1(1:n1), b, arr2(1:n2) /) + +! Sort + CALL sort(arrm, nm) + +! Remove duplicates + j = 1 + DO i = 2, nm + IF(arrm(i) .GT. arrm(j)) THEN + j = j + 1 + arrm(j) = arrm(i) + END IF + END DO + nm = j + +! Remove values outside [a, b] + j = 0 + DO i = 1, nm + IF((arrm(i) .GE. a) .AND. (arrm(i) .LE. b)) THEN + j = j + 1 + arrm(j) = arrm(i) + END IF + END DO + nm = j + + END SUBROUTINE sorted_merge +!=========================================================================== + SUBROUTINE sort(arr, n) + +! Sorts array ARR of length N into ascending numerical order by the +! Shell-Mezgar algorithm. +! See Sec. 8.1 of Numerical Recipes + + IMPLICIT NONE + + INTEGER, INTENT(IN) :: n + DOUBLE PRECISION, DIMENSION(n), INTENT(INOUT) :: arr + + INTEGER :: nsort, is, i, j, l, m + DOUBLE PRECISION :: tmp + DOUBLE PRECISION, PARAMETER :: tiny = 1D-5 + + nsort = INT(LOG(REAL(n, 8))/LOG(2.D0) + tiny) + + m = n + DO is = 1, nsort + m = m/2 + DO j = 1, n-m + i = j + DO + l = i+m + IF (arr(l) .LT. arr(i)) THEN + tmp = arr(i) + arr(i) = arr(l) + arr(l) = tmp + i = i-m + IF (i .LT. 1) EXIT + ELSE + EXIT + END IF + END DO + END DO + END DO + + END SUBROUTINE sort +!=========================================================================== + LOGICAL FUNCTION is_equid(x, dev) +! +! Check whether mesh is euidistant or not +! + DOUBLE PRECISION, INTENT(in) :: x(0:) + DOUBLE PRECISION, INTENT(out), OPTIONAL :: dev +! + DOUBLE PRECISION :: dx(SIZE(x)-1), dxmin, dxmax, dxaver, e + DOUBLE PRECISION, PARAMETER :: tol=1.d-6 + INTEGER :: n, i + n=SIZE(x)-1 + dx = (/ (x(i)-x(i-1),i=1,n) /) + dxmin = MINVAL(dx) + dxmax = MAXVAL(dx) + dxaver = (x(n)-x(0))/REAL(n,8) + e = (dxmax-dxmin)/dxaver +!!$ e = (dxmax-dxmin)/(SUM(x)/REAL(n+1)) + is_equid = e.LT.tol + IF(PRESENT(dev)) dev = e + END FUNCTION is_equid +!=========================================================================== + SUBROUTINE create_fine(cmesh, h, fmap) +! +! Create a fine mesh from a coarse mesh and returns its mapping +! + DOUBLE PRECISION, INTENT(in) :: cmesh(0:) + DOUBLE PRECISION, INTENT(out) :: h + INTEGER, POINTER, INTENT(out) :: fmap(:) +! + DOUBLE PRECISION, ALLOCATABLE :: fmesh(:) + DOUBLE PRECISION :: xlen, hmin + INTEGER :: n, nfine, i, ic +! + n = SIZE(cmesh)-1 + xlen = cmesh(n)-cmesh(0) +! +! Minimum interval size + hmin = xlen + DO i=1,n + hmin = MIN(hmin, cmesh(i)-cmesh(i-1)) + END DO +! +! Create the fine mesh + nfine = CEILING(xlen/hmin) + h = xlen / REAL(nfine,8) + ALLOCATE(fmap(0:nfine)) + ALLOCATE(fmesh(0:nfine)) + fmesh = cmesh(0) + (/ (i*h, i=0,nfine) /) + fmesh(nfine) = cmesh(n) +! +! Map fine to coarse mesh + ic = 0 + fmap(0) = ic + DO i=1,nfine-1 + DO + IF(fmesh(i).GE.cmesh(ic+1)) THEN + ic = ic+1 + ELSE + EXIT + END IF + END DO + fmap(i) = ic + END DO + fmap(nfine) = n-1 +! + DEALLOCATE(fmesh) + END SUBROUTINE create_fine +!=========================================================================== + SUBROUTINE getgradr(sp, xp, yp, f00, f10, f01) +! +! Compute the function f00 and its derivatives +! f10 = d/dx f +! f01 = d/dy f +! assuming that its PPFORM/BCOEFSC was already computed! +! + TYPE(spline2d), INTENT(inout) :: sp + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp + DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: f00, f10, f01 +! + INTEGER :: np + DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) + INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) + INTEGER :: i, ip, ii, jj, nidbas(2) + DOUBLE PRECISION :: temp0(SIZE(xp),sp%sp2%order), temp1(SIZE(xp),sp%sp2%order) + DOUBLE PRECISION, ALLOCATABLE, SAVE :: funx(:,:), funy(:,:) + DOUBLE PRECISION, ALLOCATABLE, SAVE :: ftemp0(:), ftemp1(:) + LOGICAL :: nlppform +! +! Apply periodicity if required +! + np = SIZE(xp) + nidbas(1) = sp%sp1%order-1 + nidbas(2) = sp%sp2%order-1 + nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform +! +! Locate the interval containing x, y +! + CALL locintv(sp%sp1, xp, leftx) + CALL locintv(sp%sp2, yp, lefty) + x(:) = xp(:) - sp%sp1%knots(leftx(:)) + y(:) = yp(:) - sp%sp2%knots(lefty(:)) +! +! Compute function/derivatives +! + IF(nlppform) THEN +! +! Using PPFORM +!---------- + DO i=1,np + CALL my_ppval1(nidbas(1), x(i), sp%ppform(:,leftx(i)+1,:,lefty(i)+1), & + & temp0(i,:), temp1(i,:)) + END DO +! + CALL my_ppval0(nidbas(2), y, temp0, 0, f00) + CALL my_ppval0(nidbas(2), y, temp0, 1, f01) + CALL my_ppval0(nidbas(2), y, temp1, 0, f10) + ELSE +! +! Using spline expansion with sp%bcoefsc +!---------- + IF(.NOT.ALLOCATED(funx)) THEN + ALLOCATE(funx(0:nidbas(1),0:1)) ! Spline and its first derivative + ALLOCATE(funy(0:nidbas(2),0:1)) + ALLOCATE(ftemp0(0:nidbas(1))) + ALLOCATE(ftemp1(0:nidbas(1))) + END IF +! + DO ip=1,np + CALL my_splines(nidbas(1), x(ip), sp%sp1%val0(:,:,leftx(ip)+1), funx) + CALL my_splines(nidbas(2), y(ip), sp%sp2%val0(:,:,lefty(ip)+1), funy) + DO ii=0,nidbas(1) + ftemp0(ii) = (0.d0,0.d0) + ftemp1(ii) = (0.d0,0.d0) + DO jj=0,nidbas(2) + ftemp0(ii) = ftemp0(ii) + sp%bcoefs(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,0) + ftemp1(ii) = ftemp1(ii) + sp%bcoefs(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,1) + END DO + END DO + f00(ip) = SUM(funx(:,0)*ftemp0(:)) + f01(ip) = SUM(funx(:,0)*ftemp1(:)) + f10(ip) = SUM(funx(:,1)*ftemp0(:)) + END DO +!----------- + END IF + CONTAINS +!+++ + SUBROUTINE my_ppval0(p, x, ppform, jder, f) +! +! Compute function and derivatives from the PP representation +! for many points x(:) + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION, INTENT(in) :: ppform(:,:) + INTEGER, INTENT(in) :: jder + DOUBLE PRECISION, INTENT(out) :: f(:) + DOUBLE PRECISION :: fact + INTEGER :: j + SELECT CASE (jder) + CASE(0) ! function value + SELECT CASE(p) + CASE(1) + f(:) = ppform(:,1) + x(:)*ppform(:,2) + CASE(2) + f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3)) +!!$ CASE(3) +!!$ f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4))) + CASE(3:) + f(:) = ppform(:,p+1) + DO j=p,1,-1 + f(:) = f(:)*x(:) + ppform(:,j) + END DO + END SELECT + CASE(1) ! 1st derivative + SELECT CASE(p) + CASE(1) + f(:) = ppform(:,2) + CASE(2) + f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3) +!!$ CASE(3) +!!$ f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4)) + CASE(3:) + f(:) = p*ppform(:,p+1) + DO j=p-1,1,-1 + f(:) = f(:)*x(:) + j*ppform(:,j+1) + END DO + END SELECT + CASE default ! 2nd and higher derivatives + f(:) = ppform(:,p+1) + fact = p-jder + DO j=p,jder+1,-1 + f(:) = f(:)/fact*j*x(:) + ppform(:,j) + fact = fact-1.0d0 + END DO + DO j=2,jder + f(:) = f(:)*j + END DO + END SELECT + END SUBROUTINE my_ppval0 +!+++ + SUBROUTINE my_ppval1(p, x, ppform, f0, f1) +! +! Compute function and first derivative from the PP representation + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION, INTENT(in) :: ppform(:,:) + DOUBLE PRECISION, INTENT(out) :: f0(:) + DOUBLE PRECISION, INTENT(out) :: f1(:) + DOUBLE PRECISION :: fact + INTEGER :: j + SELECT CASE(p) + CASE(1) + f0(:) = ppform(1,:) + x*ppform(2,:) + f1(:) = ppform(2,:) + CASE(2) + f0(:) = ppform(1,:) + x*(ppform(2,:)+x*ppform(3,:)) + f1(:) = ppform(2,:) + x*2.d0*ppform(3,:) + CASE(3) + f0(:) = ppform(1,:) + x*(ppform(2,:)+x*(ppform(3,:)+x*ppform(4,:))) + f1(:) = ppform(2,:) + x*(2.d0*ppform(3,:)+x*3.0d0*ppform(4,:)) + CASE(4:) + f0 = ppform(p+1,:) + f1 = f0 + DO j=p,2,-1 + f0(:) = ppform(j,:) + x*f0(:) + f1(:) = f0(:) + x*f1(:) + END DO + f0(:) = ppform(1,:) + x*f0(:) + END SELECT + END SUBROUTINE my_ppval1 +!+++ + SUBROUTINE my_splines(p, x, ppform, f) + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION, INTENT(in) :: ppform(0:p,0:p) + DOUBLE PRECISION, INTENT(out) :: f(0:p,0:1) + INTEGER :: i + DOUBLE PRECISION :: powerx(0:p) + SELECT CASE(p) + CASE(1) + f(0,0) = ppform(0,0) + x*ppform(1,0) + f(0,1) = ppform(1,0) + f(1,0) = 1.0-f(0,0) + f(1,1) = -f(0,1) + CASE(2) + f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*ppform(2,0)) + f(0,1) = ppform(1,0) + 2.d0*x*ppform(2,0) + f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*ppform(2,1)) + f(1,1) = ppform(1,1) + 2.d0*x*ppform(2,1) + f(2,0) = 1.0 - f(0,0) - f(1,0) + f(2,1) = - f(0,1) - f(1,1) + CASE(3) + f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*(ppform(2,0)+x*ppform(3,0))) + f(0,1) = ppform(1,0) + x*(2.d0*ppform(2,0)+3.d0*x*ppform(3,0)) + f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*(ppform(2,1)+x*ppform(3,1))) + f(1,1) = ppform(1,1) + x*(2.d0*ppform(2,1)+3.d0*x*ppform(3,1)) + f(2,0) = ppform(0,2) + x*(ppform(1,2)+x*(ppform(2,2)+x*ppform(3,2))) + f(2,1) = ppform(1,2) + x*(2.d0*ppform(2,2)+3.d0*x*ppform(3,2)) + f(3,0) = 1.0 - f(0,0) - f(1,0) - f(2,0) + f(3,1) = - f(0,1) - f(1,1) - f(2,1) + CASE(4:) + powerx(0) = 1.d0 + DO i=1,p + powerx(i) = powerx(i-1)*x + END DO + DO i=0,p-1 + f(i,0) = DOT_PRODUCT(ppform(:,i),powerx(:)) + END DO + f(p,0) = 1.d0 - SUM(f(0:p-1,0)) + f(p,1) = - SUM(f(0:p-1,1)) + END SELECT + END SUBROUTINE my_splines +!+++ + END SUBROUTINE getgradr +!=========================================================================== + SUBROUTINE getgradz(sp, xp, yp, f00, f10, f01) +! +! Compute the function f00 and its derivatives +! f10 = d/dx f +! f01 = d/dy f +! assuming that its PPFORM/BCOEFSC was already computed! +! + TYPE(spline2d), INTENT(inout) :: sp + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp + DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: f00, f10, f01 +! + INTEGER :: np + DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)) + INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp)) + INTEGER :: i, ip, ii, jj, nidbas(2) + DOUBLE COMPLEX :: temp0(SIZE(xp),sp%sp2%order), temp1(SIZE(xp),sp%sp2%order) + DOUBLE PRECISION, ALLOCATABLE, SAVE :: funx(:,:), funy(:,:) + DOUBLE COMPLEX, ALLOCATABLE, SAVE :: ftemp0(:), ftemp1(:) + LOGICAL :: nlppform +! +! Apply periodicity if required +! + np = SIZE(xp) + nidbas(1) = sp%sp1%order-1 + nidbas(2) = sp%sp2%order-1 + nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform +! +! Locate the interval containing x, y +! + CALL locintv(sp%sp1, xp, leftx) + CALL locintv(sp%sp2, yp, lefty) + x(:) = xp(:) - sp%sp1%knots(leftx(:)) + y(:) = yp(:) - sp%sp2%knots(lefty(:)) +! +! Compute function/derivatives +! + IF(nlppform) THEN +! +! Using PPFORM +!---------- + DO i=1,np + CALL my_ppval1(nidbas(1), x(i), sp%ppformz(:,leftx(i)+1,:,lefty(i)+1), & + & temp0(i,:), temp1(i,:)) + END DO +! + CALL my_ppval0(nidbas(2), y, temp0, 0, f00) + CALL my_ppval0(nidbas(2), y, temp0, 1, f01) + CALL my_ppval0(nidbas(2), y, temp1, 0, f10) + ELSE +! +! Using spline expansion with sp%bcoefsc +!---------- + IF(.NOT.ALLOCATED(funx)) THEN + ALLOCATE(funx(0:nidbas(1),0:1)) ! Spline and its first derivative + ALLOCATE(funy(0:nidbas(2),0:1)) + ALLOCATE(ftemp0(0:nidbas(1))) + ALLOCATE(ftemp1(0:nidbas(1))) + END IF +! + DO ip=1,np + CALL my_splines(nidbas(1), x(ip), sp%sp1%val0(:,:,leftx(ip)+1), funx) + CALL my_splines(nidbas(2), y(ip), sp%sp2%val0(:,:,lefty(ip)+1), funy) + DO ii=0,nidbas(1) + ftemp0(ii) = (0.d0,0.d0) + ftemp1(ii) = (0.d0,0.d0) + DO jj=0,nidbas(2) + ftemp0(ii) = ftemp0(ii) + sp%bcoefsc(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,0) + ftemp1(ii) = ftemp1(ii) + sp%bcoefsc(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,1) + END DO + END DO + f00(ip) = SUM(funx(:,0)*ftemp0(:)) + f01(ip) = SUM(funx(:,0)*ftemp1(:)) + f10(ip) = SUM(funx(:,1)*ftemp0(:)) + END DO +!----------- + END IF + CONTAINS +!+++ + SUBROUTINE my_ppval0(p, x, ppform, jder, f) +! +! Compute function and derivatives from the PP representation +! for many points x(:) + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE COMPLEX, INTENT(in) :: ppform(:,:) + INTEGER, INTENT(in) :: jder + DOUBLE COMPLEX, INTENT(out) :: f(:) + DOUBLE PRECISION :: fact + INTEGER :: j + SELECT CASE (jder) + CASE(0) ! function value + SELECT CASE(p) + CASE(1) + f(:) = ppform(:,1) + x(:)*ppform(:,2) + CASE(2) + f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3)) + CASE(3) + f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4))) + CASE(4:) + DO j=p+1,1,-1 + f(:) = f(:)*x(:) + ppform(:,j) + END DO + END SELECT + CASE(1) ! 1st derivative + SELECT CASE(p) + CASE(1) + f(:) = ppform(:,2) + CASE(2) + f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3) + CASE(3) + f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4)) + CASE(4:) + DO j=p,1,-1 + f(:) = f(:)*x(:) + j*ppform(:,j+1) + END DO + END SELECT + CASE default ! 2nd and higher derivatives + f(:) = ppform(:,p+1) + fact = p-jder + DO j=p,jder+1,-1 + f(:) = f(:)/fact*j*x(:) + ppform(:,j) + fact = fact-1.0d0 + END DO + DO j=2,jder + f(:) = f(:)*j + END DO + END SELECT + END SUBROUTINE my_ppval0 +!+++ + SUBROUTINE my_ppval1(p, x, ppform, f0, f1) +! +! Compute function and first derivative from the PP representation + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE COMPLEX, INTENT(in) :: ppform(:,:) + DOUBLE COMPLEX, INTENT(out) :: f0(:) + DOUBLE COMPLEX, INTENT(out) :: f1(:) + DOUBLE PRECISION :: fact + INTEGER :: j + SELECT CASE(p) + CASE(1) + f0(:) = ppform(1,:) + x*ppform(2,:) + f1(:) = ppform(2,:) + CASE(2) + f0(:) = ppform(1,:) + x*(ppform(2,:)+x*ppform(3,:)) + f1(:) = ppform(2,:) + x*2.d0*ppform(3,:) + CASE(3) + f0(:) = ppform(1,:) + x*(ppform(2,:)+x*(ppform(3,:)+x*ppform(4,:))) + f1(:) = ppform(2,:) + x*(2.d0*ppform(3,:)+x*3.0d0*ppform(4,:)) + CASE(4:) + f0 = ppform(p+1,:) + f1 = f0 + DO j=p,2,-1 + f0(:) = ppform(j,:) + x*f0(:) + f1(:) = f0(:) + x*f1(:) + END DO + f0(:) = ppform(1,:) + x*f0(:) + END SELECT + END SUBROUTINE my_ppval1 +!+++ + SUBROUTINE my_splines(p, x, ppform, f) + INTEGER, INTENT(in) :: p + DOUBLE PRECISION, INTENT(in) :: x + DOUBLE PRECISION, INTENT(in) :: ppform(0:p,0:p) + DOUBLE PRECISION, INTENT(out) :: f(0:p,0:1) + INTEGER :: i + DOUBLE PRECISION :: powerx(0:p) + SELECT CASE(p) + CASE(1) + f(0,0) = ppform(0,0) + x*ppform(1,0) + f(0,1) = ppform(1,0) + f(1,0) = 1.0-f(0,0) + f(1,1) = -f(0,1) + CASE(2) + f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*ppform(2,0)) + f(0,1) = ppform(1,0) + 2.d0*x*ppform(2,0) + f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*ppform(2,1)) + f(1,1) = ppform(1,1) + 2.d0*x*ppform(2,1) + f(2,0) = 1.0 - f(0,0) - f(1,0) + f(2,1) = - f(0,1) - f(1,1) + CASE(3) + f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*(ppform(2,0)+x*ppform(3,0))) + f(0,1) = ppform(1,0) + x*(2.d0*ppform(2,0)+3.d0*x*ppform(3,0)) + f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*(ppform(2,1)+x*ppform(3,1))) + f(1,1) = ppform(1,1) + x*(2.d0*ppform(2,1)+3.d0*x*ppform(3,1)) + f(2,0) = ppform(0,2) + x*(ppform(1,2)+x*(ppform(2,2)+x*ppform(3,2))) + f(2,1) = ppform(1,2) + x*(2.d0*ppform(2,2)+3.d0*x*ppform(3,2)) + f(3,0) = 1.0 - f(0,0) - f(1,0) - f(2,0) + f(3,1) = - f(0,1) - f(1,1) - f(2,1) + CASE(4:) + powerx(0) = 1.d0 + DO i=1,p + powerx(i) = powerx(i-1)*x + END DO + DO i=0,p-1 + f(i,0) = DOT_PRODUCT(ppform(:,i),powerx(:)) + END DO + f(p,0) = 1.d0 - SUM(f(0:p-1,0)) + f(p,1) = - SUM(f(0:p-1,1)) + END SELECT + END SUBROUTINE my_splines +!+++ + END SUBROUTINE getgradz +END MODULE bsplines diff --git a/src/cds_mod.f90 b/src/cds_mod.f90 new file mode 100644 index 0000000..b0ca9d6 --- /dev/null +++ b/src/cds_mod.f90 @@ -0,0 +1,626 @@ +!> +!> @file cds_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE cds +! +! CDSMAT: Implement sparse matrix using Compressed +! Diagonal Storage. +! +! T.M. Tran, CRPP-EPFL +! November 2010 +! + USE mumps_bsplines + IMPLICIT NONE +! + TYPE cds_mat ! Compressed Diagonal Storage + INTEGER :: rank + INTEGER :: kl, ku, ndiags + INTEGER :: nterms, kmat + INTEGER :: ny + INTEGER, DIMENSION(:), POINTER :: dists => NULL() + DOUBLE PRECISION, DIMENSION(:), POINTER :: rowv => NULL() + DOUBLE PRECISION, DIMENSION(:), POINTER :: colh => NULL() + DOUBLE PRECISION, DIMENSION(:), POINTER :: bal => NULL() + DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => NULL() + TYPE(mumps_mat), ALLOCATABLE :: mumps + END TYPE cds_mat +! +!-------------------------------------------------------------------------------- + INTERFACE init + MODULE PROCEDURE init_cds_mat + END INTERFACE init + INTERFACE clear_mat + MODULE PROCEDURE clear_cds_mat + END INTERFACE clear_mat + INTERFACE destroy + MODULE PROCEDURE destroy_cds_mat + END INTERFACE destroy + INTERFACE updtmat + MODULE PROCEDURE updt_cds + END INTERFACE updtmat + INTERFACE getele + MODULE PROCEDURE getele_cds + END INTERFACE getele + INTERFACE putele + MODULE PROCEDURE putele_cds + END INTERFACE putele + INTERFACE getcol + MODULE PROCEDURE getcol_cds + END INTERFACE getcol + INTERFACE getrow + MODULE PROCEDURE getrow_cds + END INTERFACE getrow + INTERFACE putcol + MODULE PROCEDURE putcol_cds + END INTERFACE putcol + INTERFACE putrow + MODULE PROCEDURE putrow_cds + END INTERFACE putrow + INTERFACE getdiag + MODULE PROCEDURE getdiag_cds + END INTERFACE getdiag + INTERFACE vmx + MODULE PROCEDURE vmx_cds, vmxn_cds + END INTERFACE + INTERFACE putmat + MODULE PROCEDURE putmat_cds + END INTERFACE + INTERFACE getmat + MODULE PROCEDURE getmat_cds + END INTERFACE + INTERFACE flops + MODULE PROCEDURE flops_cds + END INTERFACE flops + INTERFACE matnorm + MODULE PROCEDURE matnorm_cds + END INTERFACE matnorm +! +CONTAINS + !=========================================================================== + SUBROUTINE init_cds_mat(rank, dists, nterms, mat, bw0, kmat) +! +! Initialize a CDS matrix obtained for a 2d FE discretization +! using Splines of orders p(1) and p(2). +! Number first the 2nd (periodic) dimension. +! + INTEGER, INTENT(in) :: rank + INTEGER, ALLOCATABLE, INTENT(in) :: dists(:) + INTEGER, INTENT(in) :: nterms + TYPE(cds_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: bw0, kmat +! + INTEGER :: kl, ku +! + mat%rank = rank + mat%nterms = nterms + mat%ny = 0 ! Used for unicity condition in cyl. geometry. + IF(PRESENT(kmat)) mat%kmat = kmat +! + kl = -LBOUND(dists,1) + ku = UBOUND(dists,1) +! + mat%kl = kl + mat%ku = ku + mat%ndiags = ku + kl + 1 + IF(ASSOCIATED(mat%dists)) DEALLOCATE(mat%dists) + ALLOCATE(mat%dists(-kl:ku)) + mat%dists = dists +! + IF(ASSOCIATED(mat%rowv)) DEALLOCATE(mat%rowv) + IF(ASSOCIATED(mat%colh)) DEALLOCATE(mat%colh) + IF(PRESENT(bw0)) THEN + ALLOCATE(mat%rowv(bw0), mat%colh(bw0)) + mat%rowv = 0.0d0 + mat%colh = 0.0d0 + ELSE + ALLOCATE(mat%rowv(0)) + ALLOCATE(mat%colh(0)) + END IF +! + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%val(rank, -kl:ku)) + mat%val = 0.0d0 +! + IF(ASSOCIATED(mat%bal)) DEALLOCATE(mat%bal) + ALLOCATE(mat%bal(rank)) + mat%bal = 0.0d0 + END SUBROUTINE init_cds_mat +!=========================================================================== + SUBROUTINE clear_cds_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(cds_mat) :: mat +! + mat%val = 0.0d0 + END SUBROUTINE clear_cds_mat +!=========================================================================== + SUBROUTINE destroy_cds_mat(mat) +! +! Deallocate pointers in mat +! + TYPE(cds_mat) :: mat + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF( ASSOCIATED(mat%dists)) DEALLOCATE(mat%dists) + IF( ASSOCIATED(mat%rowv)) DEALLOCATE(mat%rowv) + IF( ASSOCIATED(mat%colh)) DEALLOCATE(mat%colh) + END SUBROUTINE destroy_cds_mat +!=========================================================================== + SUBROUTINE updt_cds(mat, i, j, val) +! +! Update element Aij into sparse CDS matrix +! + TYPE(cds_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: d, k +! + d = j-i + DO k = -mat%kl, mat%ku + IF( d .EQ. mat%dists(k) ) THEN + mat%val(i,k) = mat%val(i,k)+val + RETURN + END IF + END DO + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists + STOP '*** Abnormal EXIT in MODULE matrix ***' + END SUBROUTINE updt_cds +!=========================================================================== + SUBROUTINE getele_cds(mat, i, j, val) +! +! Get element Aij of sparse CDS matrix +! + TYPE(cds_mat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(out) :: val + INTEGER :: d, k +! + d = j-i + DO k = -mat%kl, mat%ku + IF( d .EQ. mat%dists(k) ) THEN + val = mat%val(i,k) + RETURN + END IF + END DO + WRITE(*,'(a,2i6)') 'GETELE: i, j out of range ', i, j + WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists + STOP '*** Abnormal EXIT in MODULE matrix ***' + END SUBROUTINE getele_cds +!=========================================================================== + SUBROUTINE putele_cds(mat, i, j, val) +! +! Update element Aij into sparse CDS matrix +! + TYPE(cds_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: d, k +! + d = j-i + DO k = -mat%kl, mat%ku + IF( d .EQ. mat%dists(k) ) THEN + mat%val(i,k) = val + RETURN + END IF + END DO + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists + STOP '*** Abnormal EXIT in MODULE matrix ***' + END SUBROUTINE putele_cds +!=========================================================================== + SUBROUTINE getcol_cds(mat, j, arr) +! +! Get a column from matrix +! + TYPE(cds_mat), INTENT(in) :: mat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: n,i, k +! + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'GETCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + arr(1:n) = 0.0d0 + DO k=-mat%kl, mat%ku + i = j-mat%dists(k) + IF( i.GE.1 .AND. i.LE.n ) arr(i) = mat%val(i,k) + END DO + END SUBROUTINE getcol_cds +!=========================================================================== + SUBROUTINE getrow_cds(mat, i, arr) +! +! Get a row from matrix +! + TYPE(cds_mat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: n, j, k +! + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'GETROW: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + arr(1:n) = 0.0d0 + DO k=-mat%kl, mat%ku + j = i+mat%dists(k) + IF( j.GE.1 .AND. j.LE.n ) arr(j) = mat%val(i,k) + END DO + END SUBROUTINE getrow_cds +!=========================================================================== + SUBROUTINE putcol_cds(mat, j, arr) +! +! Put a column to matrix +! + TYPE(cds_mat) :: mat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: n,i, k +! + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'PUTCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + DO k=-mat%kl, mat%ku + i = j-mat%dists(k) + IF( i.GE. 1 .AND. i.LE.n ) mat%val(i,k) = arr(i) + END DO + END SUBROUTINE putcol_cds +!=========================================================================== + SUBROUTINE putrow_cds(mat, i, arr) +! +! Put a row from matrix +! + TYPE(cds_mat) :: mat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: n, j, k +! + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'PUTROW: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + DO k=-mat%kl, mat%ku + j = i+mat%dists(k) + IF( j.GE.1 .AND. j.LE.n ) mat%val(i,k) = arr(j) + END DO + END SUBROUTINE putrow_cds +!=========================================================================== + SUBROUTINE getdiag_cds(mat, d) +! +! Returns diagonal of matrix +! + TYPE(cds_mat) :: mat + DOUBLE PRECISION :: d(:) + INTEGER :: ny +! + d(:) = mat%val(:,0) +! +! The extra row and column implied by periodic BC +!!$ ny = mat%ny +!!$ IF( ny .NE. 0 ) THEN +!!$ d(ny) = mat%rowv(ny) + mat%colh(ny) +!!$ END IF +!!$ WRITE(*,'(a/(8(1pe12.3)))') 'd', d + END SUBROUTINE getdiag_cds +!=========================================================================== + FUNCTION vmx_cds(mat, xarr) +! +! Return product mat*x +! + TYPE(cds_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:) + DOUBLE PRECISION :: vmx_cds(SIZE(xarr)) +! + DOUBLE PRECISION :: alpha=1.0d0, beta=1.0d0 + INTEGER :: m, bw0, ny, k, d, i, i1, i2 +! + m = mat%rank + bw0 = SIZE(mat%rowv) + ny = mat%ny + vmx_cds = 0.0d0 +! + IF( ny .NE. 0 ) THEN ! Contributions from unicity BC + vmx_cds(ny:bw0) = mat%colh(ny:bw0)*xarr(ny) + vmx_cds(ny) = vmx_cds(ny) + DOT_PRODUCT(mat%rowv(ny:bw0), xarr(ny:bw0)) + END IF +! +#ifdef MKL + CALL mkl_ddiamv('n', m, m, alpha, 'g', mat%val, m, mat%dists, & + & mat%ndiags, xarr, beta, vmx_cds) +#else + DO k=-mat%kl,mat%ku + d = mat%dists(k) + i1 = MAX(1,1-d) + i2 = MIN(mat%rank,mat%rank-d) + DO i=i1,i2 + vmx_cds(i) = vmx_cds(i) + mat%val(i,k)*xarr(i+d) + END DO + END DO +#endif + END FUNCTION vmx_cds +!=========================================================================== + FUNCTION vmxn_cds(mat, xarr) +! +! Return product mat*x +! + TYPE(cds_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:,:) + DOUBLE PRECISION :: vmxn_cds(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE PRECISION :: alpha=1.0d0, beta=1.0d0 + INTEGER :: m, nrhs, bw0, ny, k, d, i, j, i1, i2 +! + m = mat%rank + nrhs = SIZE(xarr,2) + bw0 = SIZE(mat%rowv) + ny = mat%ny + vmxn_cds = 0.0d0 +! + IF( ny .NE. 0 ) THEN ! Contributions from unicity BC + DO j=1,nrhs + vmxn_cds(ny:bw0,j) = mat%colh(ny:bw0)*xarr(ny,j) + vmxn_cds(ny,j) = vmxn_cds(ny,j) + & + & DOT_PRODUCT(mat%rowv(ny:bw0), xarr(ny:bw0,j)) + END DO + END IF +! +#ifdef MKL + CALL mkl_ddiamm('n', m, nrhs, m, alpha, 'g', mat%val, m, & + & mat%dists, mat%ndiags, xarr, m, beta, vmxn_cds, m) +#else + DO k=-mat%kl,mat%ku + d = mat%dists(k) + i1 = MAX(1,1-d) + i2 = MIN(mat%rank,mat%rank-d) + DO j=1,nrhs + DO i=i1,i2 + vmxn_cds(i,j) = vmxn_cds(i,j) + mat%val(i,k)*xarr(i+d,j) + END DO + END DO + END DO +#endif + END FUNCTION vmxn_cds +!=========================================================================== + SUBROUTINE getmat_cds(fid, label, mat) +! +! Read in CDS matrix from hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(cds_mat) :: mat +! + CALL getatt(fid, label, 'RANK', mat%rank) + CALL getatt(fid, label, 'KL', mat%kl) + CALL getatt(fid, label, 'KU', mat%ku) + CALL getatt(fid, label, 'NDIAGS', mat%ndiags) + CALL getatt(fid, label, 'NTERMS', mat%nterms) + CALL getatt(fid, label, 'KMAT', mat%kmat) + CALL getatt(fid, label, 'NY', mat%ny) + IF( ASSOCIATED(mat%dists) ) THEN + CALL getarr(fid, TRIM(label)//'/dists', mat%dists) + END IF + IF(ASSOCIATED(mat%bal)) THEN + CALL getarr(fid, TRIM(label)//'/bal', mat%bal) + END IF + CALL getarr(fid, TRIM(label)//'/vals', mat%val) + IF(ASSOCIATED(mat%rowv)) THEN + CALL getarr(fid, TRIM(label)//'/rowv', mat%rowv) + CALL getarr(fid, TRIM(label)//'/colh', mat%colh) + END IF + END SUBROUTINE getmat_cds +!=========================================================================== + SUBROUTINE putmat_cds(fid, label, mat, str) +! +! Write CDS matrix in hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(cds_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'KL', mat%kl) + CALL attach(fid, label, 'KU', mat%ku) + CALL attach(fid, label, 'NDIAGS', mat%ndiags) + CALL attach(fid, label, 'NTERMS', mat%nterms) + CALL attach(fid, label, 'KMAT', mat%kmat) + CALL attach(fid, label, 'NY', mat%ny) + IF( ASSOCIATED(mat%dists) ) THEN + CALL putarr(fid, TRIM(label)//'/dists', mat%dists) + END IF + IF(ASSOCIATED(mat%bal)) THEN + CALL putarr(fid, TRIM(label)//'/bal', mat%bal) + END IF + CALL putarr(fid, TRIM(label)//'/vals', mat%val) + IF(ASSOCIATED(mat%rowv)) THEN + CALL putarr(fid, TRIM(label)//'/rowv', mat%rowv) + CALL putarr(fid, TRIM(label)//'/colh', mat%colh) + END IF + END SUBROUTINE putmat_cds +!=========================================================================== + FUNCTION flops_cds(mat, xarr, ny) +! +! Return FLOPS in product mat*x +! + TYPE(cds_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:) + DOUBLE PRECISION :: flops_cds + INTEGER, OPTIONAL, INTENT(in) :: ny +! + INTEGER :: k, d, i, i1, i2 +! + flops_cds = 0.0d0 + IF( PRESENT(ny) ) THEN ! Contributions from unicity BC + flops_cds = 4.0d0*(SIZE(mat%rowv)-ny+1) + END IF + DO k=-mat%kl,mat%ku + d = mat%dists(k) + i1 = MAX(1,1-d) + i2 = MIN(mat%rank,mat%rank-d) + flops_cds = flops_cds + 2.0d0*(i2-i1+1) + END DO + END FUNCTION flops_cds +!=========================================================================== + SUBROUTINE cds2mumps(mat, mat_mumps) +! +! Fill mumps structure (based on routine to_mumps_mat) +! + INCLUDE 'mpif.h' + TYPE(cds_mat) :: mat + TYPE(mumps_mat) :: mat_mumps +! + INTEGER :: i, ii, i1, i2, j, k, rank, d, bw0, s, e + INTEGER :: comm, ierr, nnz_loc +! + CALL init(mat%rank, mat%nterms, mat_mumps) +! + comm = mat_mumps%mumps_par%COMM + mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix +! +! Compute nnz_loc +! + rank = mat_mumps%rank + s = mat_mumps%istart + e = mat_mumps%iend +! + nnz_loc=0 + DO k=-mat%kl,mat%ku + d = mat%dists(k) + i1 = MAX(s,1-d) + i2 = MIN(e,rank-d) + nnz_loc = nnz_loc + (i2-i1+1) + END DO +! +! Extra col and row from unicity conditions +! + bw0 = SIZE(mat%rowv) + IF(bw0.GT.0) THEN + IF(mat%ny.GE.s .AND. mat%ny.LE.e) THEN + nnz_loc = nnz_loc + bw0-mat%ny ! rowh(ny+1:bw0) + END IF + nnz_loc = nnz_loc + (MIN(bw0,e)-MAX(mat%ny,s)) ! colh(ny+1:bw0) + END IF +! + mat_mumps%nnz_start = 0 + CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) + mat_mumps%nnz_start = mat_mumps%nnz_start + 1 + mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1 + mat_mumps%nnz_loc = nnz_loc + CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) +! + mat_mumps%mumps_par%N = rank + mat_mumps%mumps_par%NZ_loc = nnz_loc +! +! Construct MUMPS (IRN, JCN, A) +! + ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc)) + ALLOCATE(mat_mumps%mumps_par%JCN_loc(nnz_loc)) + ALLOCATE(mat_mumps%mumps_par%A_loc(nnz_loc)) +! + ii=0 + DO k=-mat%kl,mat%ku + d = mat%dists(k) + i1 = MAX(s,1-d) + i2 = MIN(e,rank-d) + DO i=i1,i2 + ii = ii+1 + mat_mumps%mumps_par%IRN_loc(ii) = i + mat_mumps%mumps_par%JCN_loc(ii) = i+d + mat_mumps%mumps_par%A_loc(ii) = mat%val(i,k) + END DO + END DO +! + IF(bw0.GT.0) THEN + IF(mat%ny.GE.s .AND. mat%ny.LE.e) THEN + DO j=mat%ny+1,bw0 ! rowh(ny+1:bw0) + ii = ii+1 + mat_mumps%mumps_par%IRN_loc(ii) = mat%ny + mat_mumps%mumps_par%JCN_loc(ii) = j + mat_mumps%mumps_par%A_loc(ii) = mat%rowv(j) + END DO + END IF + DO i=MAX(mat%ny,s)+1,MIN(bw0,e) ! colh(ny+1:bw0) + ii = ii+1 + mat_mumps%mumps_par%IRN_loc(ii) = i + mat_mumps%mumps_par%JCN_loc(ii) = mat%ny + mat_mumps%mumps_par%A_loc(ii) = mat%colh(i) + END DO + END IF +! + CALL destroy(mat_mumps%mat) + NULLIFY(mat_mumps%mat) + END SUBROUTINE cds2mumps +!=========================================================================== + DOUBLE PRECISION FUNCTION matnorm_cds(mat, p) +! +! Compute matrix norm +! + TYPE(cds_mat), INTENT(in) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: p +! + CHARACTER(len=4) :: norm_type + INTEGER :: i, j, k, d + DOUBLE PRECISION :: temp(mat%rank) +! + norm_type = 'fro' + IF(PRESENT(p)) norm_type = p +! + SELECT CASE (norm_type) + CASE ('inf') + DO i=1,mat%rank + temp(i) = SUM(ABS(mat%val(i,:))) + END DO + matnorm_cds = MAXVAL(temp) + CASE ('1') + temp = 0.0d0 + DO k=-mat%kl,mat%ku + d = mat%dists(k) + DO i=MAX(1,1-d),MIN(mat%rank,mat%rank-d) + temp(i+d) = temp(i+d) + ABS(mat%val(i,k)) + END DO + END DO + matnorm_cds = MAXVAL(temp) + CASE('fro') + matnorm_cds = SQRT(SUM(mat%val**2)) + END SELECT + END FUNCTION matnorm_cds +!=========================================================================== +! +END MODULE cds diff --git a/src/conmat.f90 b/src/conmat.f90 new file mode 100644 index 0000000..3a489e5 --- /dev/null +++ b/src/conmat.f90 @@ -0,0 +1,257 @@ +!> +!> @file conmat.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE conmat_mod +! +! CONMAT: Matrix construction for FE discretization. +! +! T.M. Tran, CRPP-EPFL +! November 2011 +! + USE bsplines + USE matrix +#ifdef MKL + USE pardiso_bsplines +#endif + IMPLICIT NONE +! + INTERFACE conrhs + MODULE PROCEDURE conrhs_r, conrhs_z + END INTERFACE conrhs + INTERFACE conmat + MODULE PROCEDURE conmat_1d_gb, conmat_1d_ge, conmat_1d_pb, conmat_1d_periodic, & + & conmat_1d_zgb, conmat_1d_zpb, conmat_1d_zperiodic, & + & conmat_gb, conmat_pb, & + & conmat_zgb, conmat_zpb + END INTERFACE conmat +#ifdef MKL + INTERFACE conmat + MODULE PROCEDURE conmat_1d_pardiso, conmat_1d_zpardiso, & + & conmat_pardiso, conmat_zpardiso + END INTERFACE conmat +#endif +! +CONTAINS +!=========================================================================== + SUBROUTINE conmat_1d_gb(spl, mat, coefeq, maxder) +! +! Construction of FE matrix mat for 1D differential operator +! using spline spl +! + TYPE(gbmat) :: mat + TYPE(spline1d), INTENT(in) :: spl +! + INCLUDE 'conmat_1d.tpl' + END SUBROUTINE conmat_1d_gb +!=========================================================================== + SUBROUTINE conmat_1d_ge(spl, mat, coefeq, maxder) +! +! Construction of FE matrix mat for 1D differential operator +! using spline spl +! + TYPE(gemat) :: mat + TYPE(spline1d), INTENT(in) :: spl +! + INCLUDE 'conmat_1d.tpl' + END SUBROUTINE conmat_1d_ge +!=========================================================================== + SUBROUTINE conmat_1d_pb(spl, mat, coefeq, maxder) +! +! Construction of FE matrix mat for 1D differential operator +! using spline spl +! + TYPE(pbmat) :: mat + TYPE(spline1d), INTENT(in) :: spl +! + INCLUDE 'conmat_1d.tpl' + END SUBROUTINE conmat_1d_pb +!=========================================================================== + SUBROUTINE conmat_1d_periodic(spl, mat, coefeq, maxder) +! +! Construction of FE matrix mat for 1D differential operator +! using spline spl +! + TYPE(periodic_mat) :: mat + TYPE(spline1d), INTENT(in) :: spl +! + INCLUDE 'conmat_1d.tpl' + END SUBROUTINE conmat_1d_periodic +!=========================================================================== + SUBROUTINE conmat_1d_zgb(spl, mat, coefeq, maxder) +! +! Construction of FE matrix mat for 1D differential operator +! using spline spl +! + TYPE(zgbmat) :: mat + TYPE(spline1d), INTENT(in) :: spl +! + INCLUDE 'zconmat_1d.tpl' + END SUBROUTINE conmat_1d_zgb +!=========================================================================== + SUBROUTINE conmat_1d_zpb(spl, mat, coefeq, maxder) +! +! Construction of FE matrix mat for 1D differential operator +! using spline spl +! + TYPE(zpbmat) :: mat + TYPE(spline1d), INTENT(in) :: spl +! + INCLUDE 'zconmat_1d.tpl' + END SUBROUTINE conmat_1d_zpb +!=========================================================================== + SUBROUTINE conmat_1d_zperiodic(spl, mat, coefeq, maxder) +! +! Construction of FE matrix mat for 1D differential operator +! using spline spl +! + TYPE(zperiodic_mat) :: mat + TYPE(spline1d), INTENT(in) :: spl +! + INCLUDE 'zconmat_1d.tpl' + END SUBROUTINE conmat_1d_zperiodic +!=========================================================================== + SUBROUTINE conmat_gb(spl, mat, coefeq, maxder, nat_order) +! +! Construction of FE matrix mat for 2D differential operator +! using spline spl +! + TYPE(gbmat) :: mat + TYPE(spline2d), INTENT(in) :: spl +! + INCLUDE 'conmat.tpl' + END SUBROUTINE conmat_gb +!=========================================================================== + SUBROUTINE conmat_pb(spl, mat, coefeq, maxder, nat_order) +! +! Construction of FE matrix mat for 2D differential operator +! using spline spl +! + TYPE(pbmat) :: mat + TYPE(spline2d), INTENT(in) :: spl +! + INCLUDE 'conmat.tpl' + END SUBROUTINE conmat_pb +!=========================================================================== + SUBROUTINE conmat_zgb(spl, mat, coefeq, maxder, nat_order) +! +! Construction of FE matrix mat for 2D differential operator +! using spline spl +! + TYPE(zgbmat) :: mat + TYPE(spline2d), INTENT(in) :: spl +! + INCLUDE 'zconmat.tpl' + END SUBROUTINE conmat_zgb +!=========================================================================== + SUBROUTINE conmat_zpb(spl, mat, coefeq, maxder, nat_order) +! +! Construction of FE matrix mat for 2D differential operator +! using spline spl +! + TYPE(zpbmat) :: mat + TYPE(spline2d), INTENT(in) :: spl +! + INCLUDE 'zconmat.tpl' + END SUBROUTINE conmat_zpb +!=========================================================================== + SUBROUTINE conrhs_r(spl, farr, frhs) +! +! Projection of RHS on spline basis functions +! + TYPE(spline1d) :: spl + DOUBLE PRECISION, INTENT(out) :: farr(:) + INTERFACE + DOUBLE PRECISION FUNCTION frhs(x) + DOUBLE PRECISION, INTENT(in) :: x + END FUNCTION frhs + END INTERFACE + DOUBLE PRECISION :: contrib +! + INCLUDE 'conrhs.tpl' + END SUBROUTINE conrhs_r +!=========================================================================== + SUBROUTINE conrhs_z(spl, farr, frhs) +! +! Projection of RHS on spline basis functions +! + TYPE(spline1d) :: spl + DOUBLE COMPLEX, INTENT(out) :: farr(:) + INTERFACE + DOUBLE COMPLEX FUNCTION frhs(x) + DOUBLE PRECISION, INTENT(in) :: x + END FUNCTION frhs + END INTERFACE + DOUBLE COMPLEX :: contrib +! + INCLUDE 'conrhs.tpl' + END SUBROUTINE conrhs_z +!=========================================================================== +#ifdef MKL + SUBROUTINE conmat_1d_pardiso(spl, mat, coefeq, maxder) +! +! Construction of FE matrix mat for 1D differential operator +! using spline spl +! + TYPE(pardiso_mat) :: mat + TYPE(spline1d), INTENT(in) :: spl +! + INCLUDE 'conmat_1d.tpl' + END SUBROUTINE conmat_1d_pardiso +!=========================================================================== + SUBROUTINE conmat_1d_zpardiso(spl, mat, coefeq, maxder) +! +! Construction of FE matrix mat for 1D differential operator +! using spline spl +! + TYPE(zpardiso_mat) :: mat + TYPE(spline1d), INTENT(in) :: spl +! + INCLUDE 'zconmat_1d.tpl' + END SUBROUTINE conmat_1d_zpardiso +!=========================================================================== + SUBROUTINE conmat_pardiso(spl, mat, coefeq, maxder, nat_order) +! +! Construction of FE matrix mat for 2D differential operator +! using spline spl +! + TYPE(pardiso_mat) :: mat + TYPE(spline2d), INTENT(in) :: spl +! + INCLUDE 'conmat.tpl' + END SUBROUTINE conmat_pardiso +!=========================================================================== + SUBROUTINE conmat_zpardiso(spl, mat, coefeq, maxder, nat_order) +! +! Construction of FE matrix mat for 2D differential operator +! using spline spl +! + TYPE(zpardiso_mat) :: mat + TYPE(spline2d), INTENT(in) :: spl +! + INCLUDE 'zconmat.tpl' + END SUBROUTINE conmat_zpardiso +!=========================================================================== +#endif +END MODULE conmat_mod diff --git a/src/conmat.tpl b/src/conmat.tpl new file mode 100644 index 0000000..09f96c6 --- /dev/null +++ b/src/conmat.tpl @@ -0,0 +1,213 @@ +!> +!> @file conmat.tpl +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! In this version s[lines are precalculted +! (on all n1/n2 intervals +! + INTERFACE + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE + INTEGER, OPTIONAL :: maxder(2) ! maximum oder of derivatives + LOGICAL, OPTIONAL :: nat_order ! Natural ordering for 2d-1d mapping +! + INTEGER :: n1, nidbas1, ndim1, n1e + INTEGER :: n2, nidbas2, ndim2, n2e + INTEGER :: ng1, ng2 + INTEGER :: i1, i2, ig1, ig2 + INTEGER :: igt1, igt2, igw1, igw2, irow, jcol + INTEGER, ALLOCATABLE :: left1(:), left2(:) +! + LOGICAL :: nlper1, nlper2, nlnat +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER :: k, kmaxder, it1, iw1, it2, iw2 + INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form +! + DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:) + DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: mata(:,:,:,:), matc(:,:) + DOUBLE PRECISION, ALLOCATABLE :: matg(:,:,:), matf(:,:,:), matcg(:,:,:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + nlper1 = spl%sp1%period + nlper2 = spl%sp2%period +! + n1e = n1+nidbas1 ! Number of elements in 1st coordinate + n2e = n2+nidbas2 ! Number of elements in 2nd coordinate + iF(nlper2) n2e = n2 +! +! Gauss points and weights on all intervals +! + xg1 => spl%sp1%gausx ! xg1(ng1,n1) + wg1 => spl%sp1%gausw ! wg1(ng1,n1) + ng1 = SIZE(xg1,1) + xg2 => spl%sp2%gausx + wg2 => spl%sp2%gausw + ng2 = SIZE(xg2,1) +! +! Splines on all intervals +! + kmaxder = 1 + IF(PRESENT(maxder)) kmaxder = maxder(1) + ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1)) + ALLOCATE(left1(ng1)) + DO i1=1,n1 + left1 = i1 + CALL basfun(xg1(:,i1), spl%sp1, fun1(:,:,:,i1), left1) + END DO + DEALLOCATE(left1) +! + kmaxder = 1 + IF(PRESENT(maxder)) kmaxder = maxder(2) + ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2,n2)) + ALLOCATE(left2(ng2)) + DO i2=1,n2 + left2 = i2 + CALL basfun(xg2(:,i2), spl%sp2, fun2(:,:,:,i2), left2) + END DO + DEALLOCATE(left2) +! +! Ordering in local to global matrix mapping +! + nlnat = .FALSE. + IF(PRESENT(nat_order)) nlnat = nat_order +!=========================================================================== +! 2.0 Assembly loop +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2)) + ALLOCATE(iderw(kterms,2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! +! Allocate local matrices +! + ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2)) + ALLOCATE(matc(ng1,ng2)) + ALLOCATE(matg(0:nidbas2,0:nidbas2,ng2)) + ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1)) + ALLOCATE(matcg(ng1,0:nidbas2,0:nidbas2)) +! + DO i1=1,n1 + DO i2=1,n2 +! +! Coefficients of the weak form +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), & + & idert, iderw, coefs(:,ig1,ig2)) + END DO + END DO +! +! Compute local matrix: A <- E*(C*D^T) + A +! + mata = 0.0d0 + DO k=1,kterms +! + matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2) +! + DO it1=0,nidbas1 + DO iw1=0,nidbas1 + DO ig1=1,ng1 + matf(it1,iw1,ig1) = wg1(ig1,i1) * & + & fun1(it1,idert(k,1),ig1,i1) * & + & fun1(iw1,iderw(k,1),ig1,i1) + END DO + END DO + END DO +! + DO it2=0,nidbas2 + DO iw2=0,nidbas2 + DO ig2=1,ng2 + matg(it2,iw2,ig2) = wg2(ig2,i2) * & + & fun2(it2,idert(k,2),ig2,i2) * & + & fun2(iw2,iderw(k,2),ig2,i2) + END DO + END DO + END DO +! + CALL dgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, 1.0d0, & + & matc, ng1, matg, (nidbas2+1)*(nidbas2+1), 0.0d0, & + & matcg, ng1) + CALL dgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), & + & ng1, 1.0d0, matf, (nidbas1+1)*(nidbas1+1), matcg, ng1, 1.0d0, & + & mata, (nidbas1+1)*(nidbas1+1)) +! + END DO +! +! Map local matrix A to global matrix +! + DO it1=0,nidbas1 + igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1 + DO it2=0,nidbas2 + igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1 + irow = glmap(igt1, igt2, n1e, n2e) + DO iw1=0,nidbas1 + igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1 + DO iw2=0,nidbas2 + igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1 + jcol = glmap(igw1, igw2, n1e, n2e) + CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2)) + END DO + END DO + END DO + END DO +! + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun1) + DEALLOCATE(fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(mata) + DEALLOCATE(matc) + DEALLOCATE(matg) + DEALLOCATE(matcg) + DEALLOCATE(matf) +! +CONTAINS + INTEGER FUNCTION glmap(i,j,n1,n2) + INTEGER, INTENT(in) :: i,j,n1,n2 + IF(nlnat) THEN + glmap = (j-1)*n1 + i + ELSE + glmap = (i-1)*n2 + j + END IF + END FUNCTION glmap diff --git a/src/conmat2.tpl b/src/conmat2.tpl new file mode 100644 index 0000000..27f7e4d --- /dev/null +++ b/src/conmat2.tpl @@ -0,0 +1,202 @@ +!> +!> @file conmat2.tpl +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! In this version local matrices E and D are precalculted +! (on all n1/n2 intervals and nterms weak-form terms +! + INTERFACE + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE + INTEGER, OPTIONAL :: maxder(2) ! maximum oder of derivatives +! + INTEGER :: n1, nidbas1, ndim1 + INTEGER :: n2, nidbas2, ndim2 + INTEGER :: ng1, ng2 + INTEGER :: i1, i2, ig1, ig2 + INTEGER :: igt1, igt2, igw1, igw2, irow, jcol + INTEGER, ALLOCATABLE :: left1(:), left2(:) +! + LOGICAL :: nlper1, nlper2 +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER :: k, kmaxder, it1, iw1, it2, iw2 + INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + DOUBLE PRECISION :: dummy(mat%nterms) +! + DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:) + DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:), fun2(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: mata(:,:,:,:), matc(:,:), matcd(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: matd(:,:,:,:,:), mate(:,:,:,:,:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + nlper1 = spl%sp1%period + nlper2 = spl%sp2%period +! +! Gauss points and weights on all intervals +! + xg1 => spl%sp1%gausx ! xg1(ng1,n1) + wg1 => spl%sp1%gausw ! wg1(ng1,n1) + ng1 = SIZE(xg1,1) + xg2 => spl%sp2%gausx + wg2 => spl%sp2%gausw + ng2 = SIZE(xg2,1) +! +! Derivative orders in the weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2)) + ALLOCATE(iderw(kterms,2)) + CALL coefeq(xg1(1,1), xg2(1,1), idert, iderw, dummy) +! +! Precalc matrix E in dimension 1 +! + kmaxder = 1 + IF(PRESENT(maxder)) kmaxder = maxder(1) + ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1)) + ALLOCATE(left1(ng1)) + ALLOCATE(mate(0:nidbas1,0:nidbas1,ng1,kterms,n1)) + DO i1=1,n1 + left1 = i1 + CALL basfun(xg1(:,i1), spl%sp1, fun1, left1) + DO k=1,kterms + DO ig1=1,ng1 + DO iw1=0,nidbas1 + DO it1=0,nidbas1 + mate(it1,iw1,ig1,k,i1) = wg1(ig1,i1) * & + & fun1(it1,idert(k,1),ig1) * & + & fun1(iw1,iderw(k,1),ig1) + END DO + END DO + END DO + END DO + END DO + DEALLOCATE(fun1) + DEALLOCATE(left1) +! +! Precalc matrix D in dimension 2 +! + kmaxder = 1 + IF(PRESENT(maxder)) kmaxder = maxder(2) + ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2)) + ALLOCATE(left2(ng2)) + ALLOCATE(matd(0:nidbas2,0:nidbas2,ng2,kterms,n2)) + DO i2=1,n2 + left2 = i2 + CALL basfun(xg2(:,i2), spl%sp2, fun2, left2) + DO k=1,kterms + DO ig2=1,ng2 + DO iw2=0,nidbas2 + DO it2=0,nidbas2 + matd(it2,iw2,ig2,k,i2) = wg2(ig2,i2) * & + & fun2(it2,idert(k,2),ig2) * & + & fun2(iw2,iderw(k,2),ig2) + END DO + END DO + END DO + END DO + END DO + DEALLOCATE(fun2) + DEALLOCATE(left2) +!=========================================================================== +! 2.0 Assembly loop +! +! Physical coefficients in Weak form +! + ALLOCATE(coefs(kterms,ng1,ng2)) + ALLOCATE(matc(ng1,ng2)) +! +! Allocate local matrix A +! + ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2)) + ALLOCATE(matcd(ng1,0:nidbas2,0:nidbas2)) +! + DO i1=1,n1 + DO i2=1,n2 +! +! Coefficients of the weak form +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), & + & idert, iderw, coefs(:,ig1,ig2)) + END DO + END DO +! +! Compute local matrix: A <- E*(C*D^T) + A +! + mata = 0.0d0 + DO k=1,kterms +! + matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2) +! + CALL dgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, 1.0d0, & + & matc, ng1, matd(0,0,1,k,i2), (nidbas2+1)*(nidbas2+1), 0.0d0, & + & matcd, ng1) + CALL dgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), & + & ng1, 1.0d0, mate(0,0,1,k,i1), (nidbas1+1)*(nidbas1+1), matcd, ng1, 1.0d0, & + & mata, (nidbas1+1)*(nidbas1+1)) +! + END DO +! +! Map local matrix A to global matrix +! + DO it1=0,nidbas1 + igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1 + DO it2=0,nidbas2 + igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1 + irow = igt2 + (igt1-1)*n2 + DO iw1=0,nidbas1 + igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1 + DO iw2=0,nidbas2 + igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1 + jcol = igw2 + (igw1-1)*n2 + CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2)) + END DO + END DO + END DO + END DO +! + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(mata) + DEALLOCATE(matc) + DEALLOCATE(matd) + DEALLOCATE(matcd) + DEALLOCATE(mate) diff --git a/src/conmat_1d.tpl b/src/conmat_1d.tpl new file mode 100644 index 0000000..ce06d1d --- /dev/null +++ b/src/conmat_1d.tpl @@ -0,0 +1,156 @@ +!> +!> @file conmat_1d.tpl +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! In this version s[lines are precalculted +! (on all n1/n2 intervals +! + INTERFACE + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE + INTEGER, OPTIONAL :: maxder ! maximum oder of derivatives +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: i1, ig1 + INTEGER :: irow, jcol + INTEGER, ALLOCATABLE :: left1(:) +! + LOGICAL :: nlper1 +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER :: k, kmaxder, it1, iw1 + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:) ! Terms in weak form +! + DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), matc(:) + DOUBLE PRECISION, ALLOCATABLE :: matf(:,:,:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, ndim1, n1, nidbas1) + nlper1 = spl%period +! +! Gauss points and weights on all intervals +! + xg1 => spl%gausx ! xg1(ng1,n1) + wg1 => spl%gausw ! wg1(ng1,n1) + ng1 = SIZE(xg1,1) +! +! Splines on all intervals +! + kmaxder = 1 + IF(PRESENT(maxder)) kmaxder = maxder + ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1)) + ALLOCATE(left1(ng1)) + DO i1=1,n1 + left1 = i1 +!!$ DO ig1=1,ng1 +!!$ CALL basfun(xg1(ig1,i1), spl, fun1(:,:,ig1,i1), left1(ig1)) +!!$ END DO + CALL basfun(xg1(:,i1), spl, fun1(:,:,:,i1), left1) + END DO + DEALLOCATE(left1) +!=========================================================================== +! 2.0 Assembly loop +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms)) + ALLOCATE(iderw(kterms)) + ALLOCATE(coefs(kterms,ng1)) +! +! Allocate local matrices +! + ALLOCATE(mata(0:nidbas1,0:nidbas1)) + ALLOCATE(matc(ng1)) + ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1)) +! + DO i1=1,n1 +! +! Coefficients of the weak form +! + DO ig1=1,ng1 + CALL coefeq(xg1(ig1,i1), idert, iderw, coefs(:,ig1)) + END DO +! +! Compute local matrix: A <- F*c + A +! + mata = 0.0d0 + DO k=1,kterms +! + matc(1:ng1) = coefs(k,1:ng1) +! + DO it1=0,nidbas1 + DO iw1=0,nidbas1 + DO ig1=1,ng1 + matf(it1,iw1,ig1) = wg1(ig1,i1) * & + & fun1(it1,idert(k),ig1,i1) * & + & fun1(iw1,iderw(k),ig1,i1) + END DO + END DO + END DO +! + CALL dgemv('N', (nidbas1+1)*(nidbas1+1), ng1, 1.0d0, matf, & + & (nidbas1+1)*(nidbas1+1), matc, 1, 1.0d0, mata, 1) + END DO +! +! Map local matrix A to global matrix +! +!!$ WRITE(*,'(/a,i3)') "Lambda, i =", i1 +!!$ DO ig1=1,ng1 +!!$ WRITE(*,'(10(1pe12.3))') fun1(:,0,ig1,i1) +!!$ END DO +!!$ WRITE(*,'(a,i3)') "Lambda', i =", i1 +!!$ DO ig1=1,ng1 +!!$ WRITE(*,'(10(1pe12.3))') fun1(:,1,ig1,i1) +!!$ END DO +!!$ WRITE(*,'(/a)') 'local matrix' + DO it1=0,nidbas1 + irow = i1+it1; IF(nlper1) irow = MODULO(irow-1,n1) + 1 + DO iw1=0,nidbas1 + jcol = i1+iw1; IF(nlper1) jcol = MODULO(jcol-1,n1) + 1 + CALL updtmat(mat, irow, jcol, mata(it1,iw1)) + END DO +!!$ WRITE(*,'(10(1pe12.3))') mata(it1,:) + END DO +! + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun1) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(mata) + DEALLOCATE(matc) + DEALLOCATE(matf) diff --git a/src/conrhs.tpl b/src/conrhs.tpl new file mode 100644 index 0000000..5278381 --- /dev/null +++ b/src/conrhs.tpl @@ -0,0 +1,52 @@ +!> +!> @file conrhs.tpl +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> + DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:) + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + INTEGER :: ndim, n, nidbas, ng + INTEGER :: i, ig, it, irow + LOGICAL :: nlper +! + CALL get_dim(spl, ndim, n, nidbas) + nlper = spl%period + xg => spl%gausx ! xg(ng,n) + wg => spl%gausw ! wg(ng,n) + ng = SIZE(xg,1) + ALLOCATE(fun(0:nidbas,1)) +! + farr = 0.0d0 + DO i=1,n + DO ig=1,ng + CALL basfun(xg(ig,i), spl, fun, i) + contrib = wg(ig,i)*frhs(xg(ig,i)) + DO it=0,nidbas + irow = i+it + IF(nlper) irow = MODULO(irow-1,n) +1 + farr(irow) = farr(irow)+contrib*fun(it,1) + END DO + END DO + END DO +! + DEALLOCATE(fun) diff --git a/src/csr_mod.f90 b/src/csr_mod.f90 new file mode 100644 index 0000000..ca67f83 --- /dev/null +++ b/src/csr_mod.f90 @@ -0,0 +1,1255 @@ +!> +!> @file csr_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE csr +! +! CSR: Implement CSR (Compressed Sparse Row) matrice +! +! T.M. Tran, CRPP-EPFL +! October 2012 +! + USE sparse + USE mumps_bsplines + IMPLICIT NONE +! + TYPE, EXTENDS(spmat) :: csr_mat + INTEGER :: mrows, ncols + INTEGER :: nnz = 0 ! Number of non-zeros + INTEGER :: nterms ! Number of terms in weak form + LOGICAL :: nlforce_zero ! Keep exixting nodes with zero value if .true. + INTEGER, POINTER :: irow(:) => NULL() ! points to start of row + INTEGER, POINTER :: idiag(:) => NULL() ! points to diagonal element + INTEGER, POINTER :: cols(:) => NULL() ! Column indices + DOUBLE PRECISION, POINTER :: val(:) => NULL() ! Elelement values + TYPE(mumps_mat), ALLOCATABLE :: mumps + END TYPE csr_mat +! + TYPE, EXTENDS(zspmat) :: zcsr_mat + INTEGER :: mrows, ncols + INTEGER :: nnz = 0 ! Number of non-zeros + INTEGER :: nterms ! Number of terms in weak form + LOGICAL :: nlforce_zero ! Keep exixting nodes with zero value if .true. + INTEGER, POINTER :: irow(:) => NULL() ! points to start of row + INTEGER, POINTER :: idiag(:) => NULL() ! points to diagonal element + INTEGER, POINTER :: cols(:) => NULL() ! Column indices + DOUBLE COMPLEX, POINTER :: val(:) => NULL() ! Elelement values +! + TYPE(zmumps_mat), ALLOCATABLE :: mumps + END TYPE zcsr_mat +! + INTERFACE init + MODULE PROCEDURE init_csr_mat, init_zcsr_mat + END INTERFACE init + INTERFACE clear_mat + MODULE PROCEDURE clear_csr_mat, clear_zcsr_mat + END INTERFACE clear_mat + INTERFACE updtmat + MODULE PROCEDURE updt_csr_mat, updt_zcsr_mat + END INTERFACE updtmat + INTERFACE putele + MODULE PROCEDURE putele_csr_mat, putele_zcsr_mat + END INTERFACE putele + INTERFACE getele + MODULE PROCEDURE getele_csr_mat, getele_zcsr_mat + END INTERFACE getele + INTERFACE putrow + MODULE PROCEDURE putrow_csr_mat, putrow_zcsr_mat + END INTERFACE putrow + INTERFACE getrow + MODULE PROCEDURE getrow_csr_mat, getrow_zcsr_mat + END INTERFACE getrow + INTERFACE getdiag + MODULE PROCEDURE getdiag_csr_mat, getdiag_zcsr_mat + END INTERFACE getdiag + INTERFACE putcol + MODULE PROCEDURE putcol_csr_mat, putcol_zcsr_mat + END INTERFACE putcol + INTERFACE getcol + MODULE PROCEDURE getcol_csr_mat, getcol_zcsr_mat + END INTERFACE getcol + INTERFACE to_mat + MODULE PROCEDURE to_csr_mat, to_zcsr_mat + END INTERFACE to_mat + INTERFACE vmx + MODULE PROCEDURE vmx_csr_mat, vmx_csr_matn, vmx_zcsr_mat, vmx_zcsr_matn + END INTERFACE vmx + INTERFACE destroy + MODULE PROCEDURE destroy_csr_mat, destroy_zcsr_mat + END INTERFACE destroy + INTERFACE putmat + MODULE PROCEDURE put_csr_mat, put_zcsr_mat + END INTERFACE putmat +!>>>>> +!>>>>> CONMAT +!>>>> + INTERFACE conmat + MODULE PROCEDURE conmat_1d_csr, conmat_2d_csr, conmat_1d_zcsr, conmat_2d_zcsr + END INTERFACE conmat +!>>>> +!>>>> MULTIGRID_MOD +!>>>> + INTERFACE femat + MODULE PROCEDURE femat_csr + END INTERFACE femat + INTERFACE matnorm + MODULE PROCEDURE matnorm_csr + END INTERFACE matnorm + INTERFACE kron + MODULE PROCEDURE kron_csr + END INTERFACE kron +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_csr_mat(n, nterms, mat, nlforce_zero, ncols) +! +! Initialize an empty CSR matrix +! + INTEGER, INTENT(in) :: n, nterms + TYPE(csr_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero + INTEGER, OPTIONAL, intent(in) :: ncols +! + CALL init(n, mat%spmat) + mat%mrows = n + mat%ncols = n + IF(PRESENT(ncols)) mat%ncols = ncols + mat%nterms = nterms + mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val + IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero +! + END SUBROUTINE init_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_zcsr_mat(n, nterms, mat, nlforce_zero, ncols) +! +! Initialize an empty CSR matrix +! + INTEGER, INTENT(in) :: n, nterms + TYPE(zcsr_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero + INTEGER, OPTIONAL, intent(in) :: ncols +! + CALL init(n, mat%zspmat) + mat%mrows = n + mat%ncols = n + IF(PRESENT(ncols)) mat%ncols = ncols + mat%nterms = nterms + mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val + IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero +! + END SUBROUTINE init_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE clear_zcsr_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(zcsr_mat) :: mat +! + mat%val = (0.0d0,0.0d0) + END SUBROUTINE clear_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE clear_csr_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(csr_mat) :: mat +! + mat%val = 0.0d0 + END SUBROUTINE clear_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_csr_mat(mat, i, j, val) +! +! Update element Aij of csr matrix +! + TYPE(csr_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: k, s, e +! + IF(mat%nnz.EQ.0) THEN ! Still using linked lists + CALL updtmat(mat%spmat, i, j, val) + ELSE + s = mat%irow(i) + e = mat%irow(i+1)-1 + k = isearch(mat%cols(s:e), j) + IF( k.GE.0 ) THEN + mat%val(s+k) = mat%val(s+k)+val + ELSE + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE csr_mod ***' + END IF + END IF + END SUBROUTINE updt_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_zcsr_mat(mat, i, j, val) +! +! Update element Aij of csr matrix +! + TYPE(zcsr_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + INTEGER :: k, s, e +! + IF(mat%nnz.EQ.0) THEN ! Still using linked lists + CALL updtmat(mat%zspmat, i, j, val) + ELSE + s = mat%irow(i) + e = mat%irow(i+1)-1 + k = isearch(mat%cols(s:e), j) + IF( k.GE.0 ) THEN + mat%val(s+k) = mat%val(s+k)+val + ELSE + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE csr_mod ***' + END IF + END IF + END SUBROUTINE updt_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_csr_mat(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(csr_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: iput, jput + INTEGER :: k, s, e +! + iput = i + jput = j +! + IF(mat%nnz.EQ.0) THEN ! Still using linked lists + CALL putele(mat%spmat, iput, jput, val, & + & nlforce_zero=mat%nlforce_zero) + ELSE + s = mat%irow(iput) + e = mat%irow(iput+1)-1 + k = isearch(mat%cols(s:e), jput) + IF( k.GE.0 ) THEN + mat%val(s+k) = val + ELSE + IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val + WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j + PRINT*, 'val', val + PRINT*, 'matrix m, n', mat%mrows, mat%ncols + STOP '*** Abnormal EXIT in MODULE csr_mod ***' + END IF + END IF + END IF + END SUBROUTINE putele_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_zcsr_mat(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(zcsr_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + INTEGER :: iput, jput + INTEGER :: k, s, e +! + iput = i + jput = j +! + IF(mat%nnz.EQ.0) THEN ! Still using linked lists + CALL putele(mat%zspmat, iput, jput, val, & + & nlforce_zero=mat%nlforce_zero) + ELSE + s = mat%irow(iput) + e = mat%irow(iput+1)-1 + k = isearch(mat%cols(s:e), jput) + IF( k.GE.0 ) THEN + mat%val(s+k) = val + ELSE + IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val + WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j + PRINT*, 'val', val + PRINT*, 'matrix m, n', mat%mrows, mat%ncols + STOP '*** Abnormal EXIT in MODULE csr_mod ***' + END IF + END IF + END IF + END SUBROUTINE putele_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_csr_mat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(csr_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(out) :: val + INTEGER :: iget, jget + INTEGER :: k, s, e +! + iget = i + jget = j + IF(mat%nnz.EQ.0) THEN ! Still using linked lists + CALL getele(mat%spmat, iget, jget, val) + ELSE + s = mat%irow(iget) + e = mat%irow(iget+1)-1 + k = isearch(mat%cols(s:e), jget) + IF( k.GE.0 ) THEN + val =mat%val(s+k) + ELSE + val = 0.0d0 ! Assume zero val if not found + END IF + END IF + END SUBROUTINE getele_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_zcsr_mat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(zcsr_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(out) :: val + INTEGER :: iget, jget + INTEGER :: k, s, e +! + iget = i + jget = j + IF(mat%nnz.EQ.0) THEN ! Still using linked lists + CALL getele(mat%zspmat, iget, jget, val) + ELSE + s = mat%irow(iget) + e = mat%irow(iget+1)-1 + k = isearch(mat%cols(s:e), jget) + IF( k.GE.0 ) THEN + val =mat%val(s+k) + ELSE + val = 0.0d0 ! Assume zero val if not found + END IF + END IF + END SUBROUTINE getele_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_csr_mat(amat, i, arr) +! +! Put a row into sparse matrix +! + TYPE(csr_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: s, e, j +! + IF(amat%nnz.EQ.0) THEN ! Still using linked lists + DO j=1,amat%ncols + CALL putele(amat, i, j, arr(j)) + END DO + ELSE + s = amat%irow(i) + e = amat%irow(i+1)-1 + DO j=s,e + amat%val(j) = arr(amat%cols(j)) + END DO + END IF + END SUBROUTINE putrow_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_zcsr_mat(amat, i, arr) +! +! Put a row into sparse matrix +! + TYPE(zcsr_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER :: s, e, j +! + IF(amat%nnz.EQ.0) THEN ! Still using linked lists + DO j=1,amat%ncols + CALL putele(amat, i, j, arr(j)) + END DO + ELSE + s = amat%irow(i) + e = amat%irow(i+1)-1 + DO j=s,e + amat%val(j) = arr(amat%cols(j)) + END DO + END IF + END SUBROUTINE putrow_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_csr_mat(amat, i, arr) +! +! Get a row from sparse matrix +! + TYPE(csr_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: s, e, j +! + arr = 0.0d0 + IF(amat%nnz.EQ.0) THEN ! Still using linked lists + DO j=1,amat%ncols + CALL getele(amat, i, j, arr(j)) + END DO + ELSE + s = amat%irow(i) + e = amat%irow(i+1)-1 + DO j=s,e + arr(amat%cols(j)) = amat%val(j) + END DO + END IF + END SUBROUTINE getrow_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_zcsr_mat(amat, i, arr) +! +! Get a row from sparse matrix +! + TYPE(zcsr_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER :: s, e, j +! + arr = 0.0d0 + IF(amat%nnz.EQ.0) THEN ! Still using linked lists + DO j=1,amat%ncols + CALL getele(amat, i, j, arr(j)) + END DO + ELSE + s = amat%irow(i) + e = amat%irow(i+1)-1 + DO j=s,e + arr(amat%cols(j)) = amat%val(j) + END DO + END IF + END SUBROUTINE getrow_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getdiag_csr_mat(amat, arr) +! +! Get the diagonal from matrix +! + TYPE(csr_mat), INTENT(in) :: amat + DOUBLE PRECISION, INTENT(out) :: arr(:) +! +! WARNING: assume that CSR matrix has been converted from linked lists +! + arr(:) = amat%val(amat%idiag(:)) + END SUBROUTINE getdiag_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getdiag_zcsr_mat(amat, arr) +! +! Get the diagonal from matrix +! + TYPE(zcsr_mat), INTENT(in) :: amat + DOUBLE COMPLEX, INTENT(out) :: arr(:) +! +! WARNING: assume that CSR matrix has been converted from linked lists +! + arr(:) = amat%val(amat%idiag(:)) + END SUBROUTINE getdiag_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_csr_mat(amat, j, arr) +! +! Put a column into sparse matrix +! + TYPE(csr_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: i +! + DO i=1,amat%mrows + CALL putele(amat, i, j, arr(i)) + END DO + END SUBROUTINE putcol_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_zcsr_mat(amat, j, arr) +! +! Put a column into sparse matrix +! + TYPE(zcsr_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER :: i +! + DO i=1,amat%mrows + CALL putele(amat, i, j, arr(i)) + END DO + END SUBROUTINE putcol_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_csr_mat(amat, j, arr) +! +! Get a column from sparse matrix +! + TYPE(csr_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: i +! + DO i=1,amat%mrows + CALL getele(amat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_zcsr_mat(amat, j, arr) +! +! Get a column from sparse matrix +! + TYPE(zcsr_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER :: i +! + DO i=1,amat%mrows + CALL getele(amat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE to_csr_mat(mat, nlkeep) +! +! Convert linked list spmat to csr matrice structure +! + TYPE(csr_mat) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nlkeep + INTEGER :: nnz_arr(mat%rank) + INTEGER :: i, nnz, rank, s, e + LOGICAL :: nlclean +! + nlclean = .TRUE. + IF(PRESENT(nlkeep)) THEN + nlclean = .NOT. nlkeep + END IF +! +! Allocate the csr matrix structure +! + nnz = get_count(mat%spmat, nnz_arr) + rank = mat%rank + mat%nnz = nnz + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag) + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%irow(rank+1)) + ALLOCATE(mat%idiag(rank)) + ALLOCATE(mat%cols(nnz)) + ALLOCATE(mat%val(nnz)) +! +! Fill csr structure and optionally deallocate the sparse rows +! + mat%irow = 1 + DO i=1,rank + mat%irow(i+1) = mat%irow(i) + nnz_arr(i) + s = mat%irow(i) + e = mat%irow(i+1)-1 + CALL getrow(mat%spmat%row(i), mat%val(s:e), mat%cols(s:e)) + mat%idiag(i) = isearch(mat%cols(s:e), i) + s + IF(nlclean) CALL destroy(mat%spmat%row(i)) + END DO +!!$! +!!$! MUMPS mat for direct solver +!!$! +!!$ ALLOCATE(mat%mumps) +!!$ CALL csr2mumps(mat, mat%mumps) + END SUBROUTINE to_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE to_zcsr_mat(mat, nlkeep) +! +! Convert linked list spmat to csr matrice structure +! + TYPE(zcsr_mat) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nlkeep + INTEGER :: nnz_arr(mat%rank) + INTEGER :: i, nnz, rank, s, e + LOGICAL :: nlclean +! + nlclean = .TRUE. + IF(PRESENT(nlkeep)) THEN + nlclean = .NOT. nlkeep + END IF +! +! Allocate the csr matrix structure +! + nnz = get_count(mat%zspmat, nnz_arr) + rank = mat%rank + mat%nnz = nnz + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag) + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%irow(rank+1)) + ALLOCATE(mat%idiag(rank)) + ALLOCATE(mat%cols(nnz)) + ALLOCATE(mat%val(nnz)) +! +! Fill csr structure and optionally deallocate the sparse rows +! + mat%irow = 1 + DO i=1,rank + mat%irow(i+1) = mat%irow(i) + nnz_arr(i) + s = mat%irow(i) + e = mat%irow(i+1)-1 + CALL getrow(mat%zspmat%row(i), mat%val(s:e), mat%cols(s:e)) + mat%idiag(i) = isearch(mat%cols(s:e), i) + s + IF(nlclean) CALL destroy(mat%zspmat%row(i)) + END DO +!!$! +!!$! MUMPS mat for direct solver +!!$! +!!$ ALLOCATE(mat%mumps) +!!$ CALL csr2mumps(mat, mat%mumps) + END SUBROUTINE to_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE csr2mumps(mat, mat_mumps) +! +! Fill mumps structure (based on routine to_mumps_mat) +! + INCLUDE 'mpif.h' + TYPE(csr_mat) :: mat + TYPE(mumps_mat) :: mat_mumps +! + INTEGER :: i, rank, s, e + INTEGER :: comm, ierr, nnz_loc +! + CALL init(mat%rank, mat%nterms, mat_mumps) +! + comm = mat_mumps%mumps_par%COMM + mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix +! +! Allocate the Mumps matrix structure +! CSR format: (cols, irow, val) or (JCN, irow, A) +! COO format: (IRN, JCN, A) or (IRN, cols, val) +! + rank = mat_mumps%rank + nnz_loc = mat%nnz + mat_mumps%nnz_start = 0 + CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) + mat_mumps%nnz_start = mat_mumps%nnz_start + 1 + mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1 + mat_mumps%nnz_loc = nnz_loc + CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) +! + mat_mumps%mumps_par%N = rank + mat_mumps%mumps_par%NZ_loc = nnz_loc +! + mat_mumps%cols => mat%cols + mat_mumps%irow => mat%irow + mat_mumps%val => mat%val +! +! (A,JCN) picked from CSR mat + mat_mumps%mumps_par%A_loc => mat_mumps%val + mat_mumps%mumps_par%JCN_loc => mat_mumps%cols +! +! Determine IRN array + IF(ASSOCIATED(mat_mumps%mumps_par%IRN_loc)) DEALLOCATE(mat_mumps%mumps_par%IRN_loc) + ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc)) + DO i=mat_mumps%istart,mat_mumps%iend + s = mat_mumps%irow(i) - mat_mumps%nnz_start + 1 + e = mat_mumps%irow(i+1) - mat_mumps%nnz_start + mat_mumps%mumps_par%IRN_loc(s:e) = i + END DO + CALL destroy(mat_mumps%mat) + NULLIFY(mat_mumps%mat) +! + END SUBROUTINE csr2mumps +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE zcsr2mumps(mat, mat_mumps) +! +! Fill mumps structure (based on routine to_mumps_mat) +! + INCLUDE 'mpif.h' + TYPE(zcsr_mat) :: mat + TYPE(zmumps_mat) :: mat_mumps +! + INTEGER :: i, rank, s, e + INTEGER :: comm, ierr, nnz_loc +! + CALL init(mat%rank, mat%nterms, mat_mumps) +! + comm = mat_mumps%mumps_par%COMM + mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix +! +! Allocate the Mumps matrix structure +! CSR format: (cols, irow, val) or (JCN, irow, A) +! COO format: (IRN, JCN, A) or (IRN, cols, val) +! + rank = mat_mumps%rank + nnz_loc = mat%nnz + mat_mumps%nnz_start = 0 + CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) + mat_mumps%nnz_start = mat_mumps%nnz_start + 1 + mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1 + mat_mumps%nnz_loc = nnz_loc + CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) +! + mat_mumps%mumps_par%N = rank + mat_mumps%mumps_par%NZ_loc = nnz_loc +! + mat_mumps%cols => mat%cols + mat_mumps%irow => mat%irow + mat_mumps%val => mat%val +! +! (A,JCN) picked from CSR mat + mat_mumps%mumps_par%A_loc => mat_mumps%val + mat_mumps%mumps_par%JCN_loc => mat_mumps%cols +! +! Determine IRN array + IF(ASSOCIATED(mat_mumps%mumps_par%IRN_loc)) DEALLOCATE(mat_mumps%mumps_par%IRN_loc) + ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc)) + DO i=mat_mumps%istart,mat_mumps%iend + s = mat_mumps%irow(i) - mat_mumps%nnz_start + 1 + e = mat_mumps%irow(i+1) - mat_mumps%nnz_start + mat_mumps%mumps_par%IRN_loc(s:e) = i + END DO + CALL destroy(mat_mumps%mat) + NULLIFY(mat_mumps%mat) +! + END SUBROUTINE zcsr2mumps +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_csr_mat(mat) +! +! Deallocate csr mat +! + TYPE(csr_mat) :: mat +! + CALL destroy(mat%spmat) + IF(mat%nnz.GT.0) THEN + DEALLOCATE(mat%irow) + DEALLOCATE(mat%idiag) + DEALLOCATE(mat%cols) + DEALLOCATE(mat%val) + END IF + mat%nnz = 0 + END SUBROUTINE destroy_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_zcsr_mat(mat) +! +! Deallocate csr mat +! + TYPE(zcsr_mat) :: mat +! + CALL destroy(mat%zspmat) + IF(mat%nnz.GT.0) THEN + DEALLOCATE(mat%irow) + DEALLOCATE(mat%idiag) + DEALLOCATE(mat%cols) + DEALLOCATE(mat%val) + END IF + mat%nnz = 0 + END SUBROUTINE destroy_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_csr_mat(mat, xarr, transa_in) RESULT(yarr) +! +! Return product mat*x +! + TYPE(csr_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:) + CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in + DOUBLE PRECISION :: yarr(SIZE(xarr)) +! + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + CHARACTER(len=1) :: transa + CHARACTER(len=6) :: matdescra + INTEGER :: n, i, j +! + n = mat%rank + transa = 'N' + IF(PRESENT(transa_in)) transa = transa_in +! +#ifdef MKL + matdescra = 'g' + CALL mkl_dcsrmv(transa, n, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & beta, yarr) +#else + yarr = 0.0d0 + DO i=1,mat%rank + DO j=mat%irow(i), mat%irow(i+1)-1 + IF(transa .EQ. 'N') THEN + yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) + ELSE + yarr(mat%cols(j)) = yarr(mat%cols(j)) + mat%val(j)*xarr(i) + END IF + END DO + END DO +#endif +! + END FUNCTION vmx_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_zcsr_mat(mat, xarr, transa_in) RESULT(yarr) +! +! Return product mat*x +! + TYPE(zcsr_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: xarr(:) + CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in + DOUBLE COMPLEX :: yarr(SIZE(xarr)) +! + DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) + CHARACTER(len=1) :: transa + CHARACTER(len=6) :: matdescra + INTEGER :: n, i, j +! + n = mat%rank + transa = 'N' + IF(PRESENT(transa_in)) transa = transa_in +! +#ifdef MKL + matdescra = 'g' + CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & beta, yarr) +#else + yarr = 0.0d0 + DO i=1,mat%rank + DO j=mat%irow(i), mat%irow(i+1)-1 + IF(transa .EQ. 'N') THEN + yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) + ELSE + yarr(mat%cols(j)) = yarr(mat%cols(j)) + mat%val(j)*xarr(i) + END IF + END DO + END DO +#endif +! + END FUNCTION vmx_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_csr_matn(mat, xarr, transa_in) RESULT(yarr) +! +! Return product mat*x +! + TYPE(csr_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:,:) + CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in + DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + CHARACTER(len=1) :: transa + INTEGER :: n, nrhs, i, j + CHARACTER(len=6) :: matdescra +! + n = mat%rank + nrhs = SIZE(xarr,2) + transa = 'N' + IF(PRESENT(transa_in)) transa = transa_in +! +#ifdef MKL + matdescra = 'g' + CALL mkl_dcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val,& + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & n, beta, yarr, n) +#else + yarr = 0.0d0 + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + IF(transa .EQ. 'N') THEN + yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) + ELSE + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) + mat%val(j)*xarr(i,:) + END IF + END DO + END DO +#endif +! + END FUNCTION vmx_csr_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_zcsr_matn(mat, xarr, transa_in) RESULT(yarr) +! +! Return product mat*x +! + TYPE(zcsr_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: xarr(:,:) + CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in + DOUBLE COMPLEX :: yarr(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) + CHARACTER(len=1) :: transa + INTEGER :: n, nrhs, i, j + CHARACTER(len=6) :: matdescra +! + n = mat%rank + nrhs = SIZE(xarr,2) + transa = 'N' + IF(PRESENT(transa_in)) transa = transa_in +! +#ifdef MKL + matdescra = 'g' + CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val,& + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & n, beta, yarr, n) +#else + yarr = (0.0d0,0.0d0) + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + IF(transa .EQ. 'N') THEN + yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) + ELSE + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) + mat%val(j)*xarr(i,:) + END IF + END DO + END DO +#endif +! + END FUNCTION vmx_zcsr_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE conmat_1d_csr(spl, mat, coefeq, maxder) +! +! Construction of FE matrix mat for 1D differential operator +! using spline spl +! + USE bsplines + TYPE(csr_mat) :: mat + TYPE(spline1d), INTENT(in) :: spl +! + INCLUDE '../../bsplines/src/conmat_1d.tpl' + END SUBROUTINE conmat_1d_csr +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE conmat_1d_zcsr(spl, mat, coefeq, maxder) +! +! Construction of FE matrix mat for 1D differential operator +! using spline spl +! + USE bsplines + TYPE(zcsr_mat) :: mat + TYPE(spline1d), INTENT(in) :: spl +! + INCLUDE '../../bsplines/src/zconmat_1d.tpl' + END SUBROUTINE conmat_1d_zcsr +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE conmat_2d_csr(spl, mat, coefeq, maxder, nat_order) +! +! Construction of FE matrix mat for 2D differential operator +! using spline spl +! + USE bsplines + TYPE(spline2d), INTENT(in) :: spl + TYPE(csr_mat) :: mat +! + INCLUDE '../../bsplines/src/conmat.tpl' + END SUBROUTINE conmat_2d_csr +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE conmat_2d_zcsr(spl, mat, coefeq, maxder, nat_order) +! +! Construction of FE matrix mat for 2D differential operator +! using spline spl +! + USE bsplines + TYPE(spline2d), INTENT(in) :: spl + TYPE(zcsr_mat) :: mat +! + INCLUDE '../../bsplines/src/zconmat.tpl' + END SUBROUTINE conmat_2d_zcsr +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE femat_csr(spl, mat, coefeq, nterms) +! +! Compute fe matrix +! + USE bsplines + TYPE(spline1d), INTENT(in) :: spl + TYPE(csr_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: nterms + INTERFACE + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE +! + INTEGER :: nrank, nx, nidbas +! + CALL get_dim(spl, nrank, nx, nidbas) + IF(spl%period) nrank = nx + IF(mat%nnz.EQ.0) THEN + WRITE(*,'(a,i0,a)') 'FEMAT: Initialize mat with ', & + & nterms, ' terms ...' + CALL init(nrank, nterms, mat) + END IF + CALL conmat(spl, mat, coefeq) + END SUBROUTINE femat_csr +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE put_csr_mat(fid, label, mat, str) +! +! Write matrix to hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(csr_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'NNZ', mat%nnz) + CALL putarr(fid, TRIM(label)//'/irow', mat%irow) + CALL putarr(fid, TRIM(label)//'/cols', mat%cols) + CALL putarr(fid, TRIM(label)//'/idiag', mat%idiag) + CALL putarr(fid, TRIM(label)//'/val', mat%val) + END SUBROUTINE put_csr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE put_zcsr_mat(fid, label, mat, str) +! +! Write matrix to hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(zcsr_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'NNZ', mat%nnz) + CALL putarr(fid, TRIM(label)//'/irow', mat%irow) + CALL putarr(fid, TRIM(label)//'/cols', mat%cols) + CALL putarr(fid, TRIM(label)//'/idiag', mat%idiag) + CALL putarr(fid, TRIM(label)//'/val', mat%val) + END SUBROUTINE put_zcsr_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + DOUBLE PRECISION FUNCTION matnorm_csr(mat, p) +! +! Compute matrix norm +! + TYPE(csr_mat), INTENT(in) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: p +! + CHARACTER(len=4) :: norm_type + INTEGER :: i, j + DOUBLE PRECISION :: temp(mat%rank) +! + norm_type = 'fro' + IF(PRESENT(p)) norm_type = p +! + SELECT CASE (norm_type) + CASE ('inf') + DO i=1,mat%rank + temp(i) = SUM(ABS(mat%val(mat%irow(i):mat%irow(i+1)-1))) + END DO + matnorm_csr = MAXVAL(temp) + CASE ('1') + temp = 0.0d0 + DO i=1,mat%rank + DO j=mat%irow(i), mat%irow(i+1)-1 + temp(mat%cols(j)) = temp(mat%cols(j)) + ABS(mat%val(j)) + END DO + END DO + matnorm_csr = MAXVAL(temp) + CASE('fro') + matnorm_csr = SQRT(DOT_PRODUCT(mat%val, mat%val)) + END SELECT + END FUNCTION matnorm_csr +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE full_to_csr(fullmat, mat) +! +! Convert full rectangular matrix to csr mat +! + DOUBLE PRECISION, INTENT(inout) :: fullmat(:,:) + TYPE(csr_mat), INTENT(out) :: mat +! + INTEGER :: m, n, nnz + INTEGER :: i, j, k +! + m = SIZE(fullmat,1) + n = SIZE(fullmat,2) + CALL init(m, 0, mat, ncols=n) +! +! Determine nnz of matrix +! + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag) + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) +! + ALLOCATE(mat%irow(m+1)) + ALLOCATE(mat%idiag(m)) +! +! Clear matrix small elements of fullmat + WHERE( ABS(fullmat) < 1.d-8) fullmat=0.0d0 +! + mat%irow(1) = 1 + nnz = 0 + DO i=1,m + DO j=1,n + IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN + nnz = nnz+1 + IF(m.EQ.n .AND. i.EQ.j) THEN ! Only for square matrix + mat%idiag(i) = nnz + END IF + END IF + END DO + mat%irow(i+1) = nnz+1 + END DO +! +! Allocate and fill the csr matrix structure +! + mat%nnz = nnz + ALLOCATE(mat%cols(nnz)) + ALLOCATE(mat%val(nnz)) + k=0 + DO i=1,m + DO j=1,n + IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN + k=k+1 + mat%cols(k) = j + mat%val(k) = fullmat(i,j) + END IF + END DO + END DO + END SUBROUTINE full_to_csr +!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE full_to_zcsr(fullmat, mat) +! +! Convert full rectangular matrix to csr mat +! + DOUBLE COMPLEX, INTENT(inout) :: fullmat(:,:) + TYPE(zcsr_mat), INTENT(out) :: mat +! + INTEGER :: m, n, nnz + INTEGER :: i, j, k +! + m = SIZE(fullmat,1) + n = SIZE(fullmat,2) + CALL init(m, 0, mat, ncols=n) +! +! Determine nnz of matrix +! + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag) + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) +! + ALLOCATE(mat%irow(m+1)) + ALLOCATE(mat%idiag(m)) +! +! Clear matrix small elements of fullmat + WHERE( ABS(fullmat) < 1.d-8) fullmat=(0.0d0,0.0d0) +! + mat%irow(1) = 1 + nnz = 0 + DO i=1,m + DO j=1,n + IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN + nnz = nnz+1 + IF(m.EQ.n .AND. i.EQ.j) THEN ! Only for square matrix + mat%idiag(i) = nnz + END IF + END IF + END DO + mat%irow(i+1) = nnz+1 + END DO +! +! Allocate and fill the csr matrix structure +! + mat%nnz = nnz + ALLOCATE(mat%cols(nnz)) + ALLOCATE(mat%val(nnz)) + k=0 + DO i=1,m + DO j=1,n + IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN + k=k+1 + mat%cols(k) = j + mat%val(k) = fullmat(i,j) + END IF + END DO + END DO + END SUBROUTINE full_to_zcsr +!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE check_dom(mat) +! +! Check whether mat is strict diagonal dominabce. +! + TYPE(csr_mat), INTENT(in) :: mat + DOUBLE PRECISION :: arow(mat%rank), asum(mat%rank) + INTEGER :: n, i, j1, j2, jdiag +! + n = mat%rank + DO i=1,n + j1 = mat%irow(i) + jdiag = mat%idiag(i) + j2 = mat%irow(i+1)-1 + asum(i) = SUM(ABS(mat%val(j1:j2))) / ABS(mat%val(jdiag)) - 1.0d0 + END DO + WRITE(*,'(/a,1pe12.3)') 'Max of sum of off-diag', MAXVAL(asum) +! + END SUBROUTINE check_dom +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE kron_csr(mata, matb, matc) +! +! Kronecker product of 2 CSR matrices +! + USE sparse, ONLY : isearch + TYPE(csr_mat), INTENT(in) :: mata, matb + TYPE(csr_mat), INTENT(out) :: matc +! + INTEGER :: m1, n1, nnz1, m2, n2, nnz2, m, n, nnz + INTEGER :: i,i1,i2,j1,s,s1,s2,e,e1,e2,k,nc2 +! + m1 = mata%mrows + n1 = mata%ncols + nnz1 = mata%nnz + m2 = matb%mrows + n2 = matb%ncols + nnz2 = matb%nnz + m = m1*m2 + n = n1*n2 + nnz = nnz1*nnz2 +! + CALL init(m, 0, matc, ncols=n) + matc%nnz = nnz + IF(ASSOCIATED(matc%irow)) DEALLOCATE(matc%irow) + IF(ASSOCIATED(matc%idiag)) DEALLOCATE(matc%idiag) + IF(ASSOCIATED(matc%cols)) DEALLOCATE(matc%cols) + IF(ASSOCIATED(matc%val)) DEALLOCATE(matc%val) + ALLOCATE(matc%irow(m+1)) + IF(m.EQ.n) THEN + ALLOCATE(matc%idiag(m)) ! Only for square matrices + END IF + ALLOCATE(matc%cols(nnz)) + ALLOCATE(matc%val(nnz)) +! + k = 0 + matc%irow(1) = 1 + DO i1=1,m1 + s1=mata%irow(i1) + e1=mata%irow(i1+1)-1 + DO i2=1,m2 + s2=matb%irow(i2) + e2=matb%irow(i2+1)-1 + nc2=e2-s2+1 + DO j1=s1,e1 + matc%val(k+1:k+nc2) = mata%val(j1)*matb%val(s2:e2) + matc%cols(k+1:k+nc2) = (mata%cols(j1)-1)*n2 + matb%cols(s2:e2) + k = k+nc2 + matc%irow((i1-1)*m2+i2+1) = k+1 ! Points to next row + END DO + END DO + END DO +! +! Search the diagonals + DO i=1,matc%mrows + s = matc%irow(i) + e = matc%irow(i+1)-1 + matc%idiag(i) = isearch(matc%cols(s:e),i) + s + END DO + END SUBROUTINE kron_csr +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE csr diff --git a/src/lapack_extra.f b/src/lapack_extra.f new file mode 100644 index 0000000..8f5248f --- /dev/null +++ b/src/lapack_extra.f @@ -0,0 +1,718 @@ +!> +!> @file lapack_extra.f +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> + DOUBLE PRECISION FUNCTION DOPGB( SUBNAM, M, N, KL, KU, IPIV ) +* +* -- LAPACK timing routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER*6 SUBNAM + INTEGER KL, KU, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) +* .. +* +* Purpose +* ======= +* +* DOPGB counts operations for the LU factorization of a band matrix +* xGBTRF. +* +* Arguments +* ========= +* +* SUBNAM (input) CHARACTER*6 +* The name of the subroutine. +* +* M (input) INTEGER +* The number of rows of the coefficient matrix. M >= 0. +* +* N (input) INTEGER +* The number of columns of the coefficient matrix. N >= 0. +* +* KL (input) INTEGER +* The number of subdiagonals of the matrix. KL >= 0. +* +* KU (input) INTEGER +* The number of superdiagonals of the matrix. KU >= 0. +* +* IPIV (input) INTEGER array, dimension (min(M,N)) +* The vector of pivot indices from DGBTRF or ZGBTRF. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL CORZ, SORD + CHARACTER C1 + CHARACTER*2 C2 + CHARACTER*3 C3 + INTEGER I, J, JP, JU, KM + DOUBLE PRECISION ADDFAC, ADDS, MULFAC, MULTS +* .. +* .. External Functions .. + LOGICAL LSAME, LSAMEN + EXTERNAL LSAME, LSAMEN +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + DOPGB = 0 + MULTS = 0 + ADDS = 0 + C1 = SUBNAM( 1: 1 ) + C2 = SUBNAM( 2: 3 ) + C3 = SUBNAM( 4: 6 ) + SORD = LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) + CORZ = LSAME( C1, 'C' ) .OR. LSAME( C1, 'Z' ) + IF( .NOT.( SORD .OR. CORZ ) ) + $ RETURN + IF( LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) ) THEN + ADDFAC = 1 + MULFAC = 1 + ELSE + ADDFAC = 2 + MULFAC = 6 + END IF +* +* -------------------------- +* GB: General Band matrices +* -------------------------- +* + IF( LSAMEN( 2, C2, 'GB' ) ) THEN +* +* xGBTRF: M, N, KL, KU => M, N, KL, KU +* + IF( LSAMEN( 3, C3, 'TRF' ) ) THEN + JU = 1 + DO 10 J = 1, MIN( M, N ) + KM = MIN( KL, M-J ) + JP = IPIV( J ) + JU = MAX( JU, MIN( JP+KU, N ) ) + IF( KM.GT.0 ) THEN + MULTS = MULTS + KM*( 1+JU-J ) + ADDS = ADDS + KM*( JU-J ) + END IF + 10 CONTINUE + END IF +* +* --------------------------------- +* GT: General Tridiagonal matrices +* --------------------------------- +* + ELSE IF( LSAMEN( 2, C2, 'GT' ) ) THEN +* +* xGTTRF: N => M +* + IF( LSAMEN( 3, C3, 'TRF' ) ) THEN + MULTS = 2*( M-1 ) + ADDS = M - 1 + DO 20 I = 1, M - 2 + IF( IPIV( I ).NE.I ) + $ MULTS = MULTS + 1 + 20 CONTINUE +* +* xGTTRS: N, NRHS => M, N +* + ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN + MULTS = 4*N*( M-1 ) + ADDS = 3*N*( M-1 ) +* +* xGTSV: N, NRHS => M, N +* + ELSE IF( LSAMEN( 3, C3, 'SV ' ) ) THEN + MULTS = ( 4*N+2 )*( M-1 ) + ADDS = ( 3*N+1 )*( M-1 ) + DO 30 I = 1, M - 2 + IF( IPIV( I ).NE.I ) + $ MULTS = MULTS + 1 + 30 CONTINUE + END IF + END IF +* + DOPGB = MULFAC*MULTS + ADDFAC*ADDS + RETURN +* +* End of DOPGB +* + END + DOUBLE PRECISION FUNCTION DOPLA( SUBNAM, M, N, KL, KU, NB ) +* +* -- LAPACK timing routine (version 3.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* June 30, 1999 +* +* .. Scalar Arguments .. + CHARACTER*6 SUBNAM + INTEGER KL, KU, M, N, NB +* .. +* +* Purpose +* ======= +* +* DOPLA computes an approximation of the number of floating point +* operations used by the subroutine SUBNAM with the given values +* of the parameters M, N, KL, KU, and NB. +* +* This version counts operations for the LAPACK subroutines. +* +* Arguments +* ========= +* +* SUBNAM (input) CHARACTER*6 +* The name of the subroutine. +* +* M (input) INTEGER +* The number of rows of the coefficient matrix. M >= 0. +* +* N (input) INTEGER +* The number of columns of the coefficient matrix. +* For solve routine when the matrix is square, +* N is the number of right hand sides. N >= 0. +* +* KL (input) INTEGER +* The lower band width of the coefficient matrix. +* If needed, 0 <= KL <= M-1. +* For xGEQRS, KL is the number of right hand sides. +* +* KU (input) INTEGER +* The upper band width of the coefficient matrix. +* If needed, 0 <= KU <= N-1. +* +* NB (input) INTEGER +* The block size. If needed, NB >= 1. +* +* Notes +* ===== +* +* In the comments below, the association is given between arguments +* in the requested subroutine and local arguments. For example, +* +* xGETRS: N, NRHS => M, N +* +* means that arguments N and NRHS in DGETRS are passed to arguments +* M and N in this procedure. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL CORZ, SORD + CHARACTER C1 + CHARACTER*2 C2 + CHARACTER*3 C3 + INTEGER I + DOUBLE PRECISION ADDFAC, ADDS, EK, EM, EMN, EN, MULFAC, MULTS, + $ WL, WU +* .. +* .. External Functions .. + LOGICAL LSAME, LSAMEN + EXTERNAL LSAME, LSAMEN +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* -------------------------------------------------------- +* Initialize DOPLA to 0 and do a quick return if possible. +* -------------------------------------------------------- +* + DOPLA = 0 + MULTS = 0 + ADDS = 0 + C1 = SUBNAM( 1: 1 ) + C2 = SUBNAM( 2: 3 ) + C3 = SUBNAM( 4: 6 ) + SORD = LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) + CORZ = LSAME( C1, 'C' ) .OR. LSAME( C1, 'Z' ) + IF( M.LE.0 .OR. .NOT.( SORD .OR. CORZ ) ) + $ RETURN +* +* --------------------------------------------------------- +* If the coefficient matrix is real, count each add as 1 +* operation and each multiply as 1 operation. +* If the coefficient matrix is complex, count each add as 2 +* operations and each multiply as 6 operations. +* --------------------------------------------------------- +* + IF( LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) ) THEN + ADDFAC = 1 + MULFAC = 1 + ELSE + ADDFAC = 2 + MULFAC = 6 + END IF + EM = M + EN = N + EK = KL +* +* --------------------------------- +* GE: GEneral rectangular matrices +* --------------------------------- +* + IF( LSAMEN( 2, C2, 'GE' ) ) THEN +* +* xGETRF: M, N => M, N +* + IF( LSAMEN( 3, C3, 'TRF' ) ) THEN + EMN = MIN( M, N ) + ADDS = EMN*( EM*EN-( EM+EN )*( EMN+1.D0 ) / 2.D0+ + $ ( EMN+1.D0 )*( 2.D0*EMN+1.D0 ) / 6.D0 ) + MULTS = ADDS + EMN*( EM-( EMN+1.D0 ) / 2.D0 ) +* +* xGETRS: N, NRHS => M, N +* + ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN + MULTS = EN*EM*EM + ADDS = EN*( EM*( EM-1.D0 ) ) +* +* xGETRI: N => M +* + ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN + MULTS = EM*( 5.D0 / 6.D0+EM*( 1.D0 / 2.D0+EM*( 2.D0 / + $ 3.D0 ) ) ) + ADDS = EM*( 5.D0 / 6.D0+EM*( -3.D0 / 2.D0+EM*( 2.D0 / + $ 3.D0 ) ) ) +* +* xGEQRF or xGEQLF: M, N => M, N +* + ELSE IF( LSAMEN( 3, C3, 'QRF' ) .OR. + $ LSAMEN( 3, C3, 'QR2' ) .OR. + $ LSAMEN( 3, C3, 'QLF' ) .OR. LSAMEN( 3, C3, 'QL2' ) ) + $ THEN + IF( M.GE.N ) THEN + MULTS = EN*( ( ( 23.D0 / 6.D0 )+EM+EN / 2.D0 )+EN* + $ ( EM-EN / 3.D0 ) ) + ADDS = EN*( ( 5.D0 / 6.D0 )+EN* + $ ( 1.D0 / 2.D0+( EM-EN / 3.D0 ) ) ) + ELSE + MULTS = EM*( ( ( 23.D0 / 6.D0 )+2.D0*EN-EM / 2.D0 )+EM* + $ ( EN-EM / 3.D0 ) ) + ADDS = EM*( ( 5.D0 / 6.D0 )+EN-EM / 2.D0+EM* + $ ( EN-EM / 3.D0 ) ) + END IF +* +* xGERQF or xGELQF: M, N => M, N +* + ELSE IF( LSAMEN( 3, C3, 'RQF' ) .OR. + $ LSAMEN( 3, C3, 'RQ2' ) .OR. + $ LSAMEN( 3, C3, 'LQF' ) .OR. LSAMEN( 3, C3, 'LQ2' ) ) + $ THEN + IF( M.GE.N ) THEN + MULTS = EN*( ( ( 29.D0 / 6.D0 )+EM+EN / 2.D0 )+EN* + $ ( EM-EN / 3.D0 ) ) + ADDS = EN*( ( 5.D0 / 6.D0 )+EM+EN* + $ ( -1.D0 / 2.D0+( EM-EN / 3.D0 ) ) ) + ELSE + MULTS = EM*( ( ( 29.D0 / 6.D0 )+2.D0*EN-EM / 2.D0 )+EM* + $ ( EN-EM / 3.D0 ) ) + ADDS = EM*( ( 5.D0 / 6.D0 )+EM / 2.D0+EM* + $ ( EN-EM / 3.D0 ) ) + END IF +* +* xGEQPF: M, N => M, N +* + ELSE IF( LSAMEN( 3, C3, 'QPF' ) ) THEN + EMN = MIN( M, N ) + MULTS = 2*EN*EN + EMN*( 3*EM+5*EN+2*EM*EN-( EMN+1 )* + $ ( 4+EN+EM-( 2*EMN+1 ) / 3 ) ) + ADDS = EN*EN + EMN*( 2*EM+EN+2*EM*EN-( EMN+1 )* + $ ( 2+EN+EM-( 2*EMN+1 ) / 3 ) ) +* +* xGEQRS or xGERQS: M, N, NRHS => M, N, KL +* + ELSE IF( LSAMEN( 3, C3, 'QRS' ) .OR. LSAMEN( 3, C3, 'RQS' ) ) + $ THEN + MULTS = EK*( EN*( 2.D0-EK )+EM* + $ ( 2.D0*EN+( EM+1.D0 ) / 2.D0 ) ) + ADDS = EK*( EN*( 1.D0-EK )+EM* + $ ( 2.D0*EN+( EM-1.D0 ) / 2.D0 ) ) +* +* xGELQS or xGEQLS: M, N, NRHS => M, N, KL +* + ELSE IF( LSAMEN( 3, C3, 'LQS' ) .OR. LSAMEN( 3, C3, 'QLS' ) ) + $ THEN + MULTS = EK*( EM*( 2.D0-EK )+EN* + $ ( 2.D0*EM+( EN+1.D0 ) / 2.D0 ) ) + ADDS = EK*( EM*( 1.D0-EK )+EN* + $ ( 2.D0*EM+( EN-1.D0 ) / 2.D0 ) ) +* +* xGEBRD: M, N => M, N +* + ELSE IF( LSAMEN( 3, C3, 'BRD' ) ) THEN + IF( M.GE.N ) THEN + MULTS = EN*( 20.D0 / 3.D0+EN* + $ ( 2.D0+( 2.D0*EM-( 2.D0 / 3.D0 )*EN ) ) ) + ADDS = EN*( 5.D0 / 3.D0+( EN-EM )+EN* + $ ( 2.D0*EM-( 2.D0 / 3.D0 )*EN ) ) + ELSE + MULTS = EM*( 20.D0 / 3.D0+EM* + $ ( 2.D0+( 2.D0*EN-( 2.D0 / 3.D0 )*EM ) ) ) + ADDS = EM*( 5.D0 / 3.D0+( EM-EN )+EM* + $ ( 2.D0*EN-( 2.D0 / 3.D0 )*EM ) ) + END IF +* +* xGEHRD: N => M +* + ELSE IF( LSAMEN( 3, C3, 'HRD' ) ) THEN + IF( M.EQ.1 ) THEN + MULTS = 0.D0 + ADDS = 0.D0 + ELSE + MULTS = -13.D0 + EM*( -7.D0 / 6.D0+EM* + $ ( 0.5D0+EM*( 5.D0 / 3.D0 ) ) ) + ADDS = -8.D0 + EM*( -2.D0 / 3.D0+EM* + $ ( -1.D0+EM*( 5.D0 / 3.D0 ) ) ) + END IF +* + END IF +* +* ---------------------------- +* GB: General Banded matrices +* ---------------------------- +* Note: The operation count is overestimated because +* it is assumed that the factor U fills in to the maximum +* extent, i.e., that its bandwidth goes from KU to KL + KU. +* + ELSE IF( LSAMEN( 2, C2, 'GB' ) ) THEN +* +* xGBTRF: M, N, KL, KU => M, N, KL, KU +* + IF( LSAMEN( 3, C3, 'TRF' ) ) THEN + DO 10 I = MIN( M, N ), 1, -1 + WL = MAX( 0, MIN( KL, M-I ) ) + WU = MAX( 0, MIN( KL+KU, N-I ) ) + MULTS = MULTS + WL*( 1.D0+WU ) + ADDS = ADDS + WL*WU + 10 CONTINUE +* +* xGBTRS: N, NRHS, KL, KU => M, N, KL, KU +* + ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN + WL = MAX( 0, MIN( KL, M-1 ) ) + WU = MAX( 0, MIN( KL+KU, M-1 ) ) + MULTS = EN*( EM*( WL+1.D0+WU )-0.5D0* + $ ( WL*( WL+1.D0 )+WU*( WU+1.D0 ) ) ) + ADDS = EN*( EM*( WL+WU )-0.5D0* + $ ( WL*( WL+1.D0 )+WU*( WU+1.D0 ) ) ) +* + END IF +* +* -------------------------------------- +* PO: POsitive definite matrices +* PP: Positive definite Packed matrices +* -------------------------------------- +* + ELSE IF( LSAMEN( 2, C2, 'PO' ) .OR. LSAMEN( 2, C2, 'PP' ) ) THEN +* +* xPOTRF: N => M +* + IF( LSAMEN( 3, C3, 'TRF' ) ) THEN + MULTS = EM*( 1.D0 / 3.D0+EM*( 1.D0 / 2.D0+EM*( 1.D0 / + $ 6.D0 ) ) ) + ADDS = ( 1.D0 / 6.D0 )*EM*( -1.D0+EM*EM ) +* +* xPOTRS: N, NRHS => M, N +* + ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN + MULTS = EN*( EM*( EM+1.D0 ) ) + ADDS = EN*( EM*( EM-1.D0 ) ) +* +* xPOTRI: N => M +* + ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN + MULTS = EM*( 2.D0 / 3.D0+EM*( 1.D0+EM*( 1.D0 / 3.D0 ) ) ) + ADDS = EM*( 1.D0 / 6.D0+EM*( -1.D0 / 2.D0+EM*( 1.D0 / + $ 3.D0 ) ) ) +* + END IF +* +* ------------------------------------ +* PB: Positive definite Band matrices +* ------------------------------------ +* + ELSE IF( LSAMEN( 2, C2, 'PB' ) ) THEN +* +* xPBTRF: N, K => M, KL +* + IF( LSAMEN( 3, C3, 'TRF' ) ) THEN + MULTS = EK*( -2.D0 / 3.D0+EK*( -1.D0+EK*( -1.D0 / 3.D0 ) ) ) + $ + EM*( 1.D0+EK*( 3.D0 / 2.D0+EK*( 1.D0 / 2.D0 ) ) ) + ADDS = EK*( -1.D0 / 6.D0+EK*( -1.D0 / 2.D0+EK*( -1.D0 / + $ 3.D0 ) ) ) + EM*( EK / 2.D0*( 1.D0+EK ) ) +* +* xPBTRS: N, NRHS, K => M, N, KL +* + ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN + MULTS = EN*( ( 2*EM-EK )*( EK+1.D0 ) ) + ADDS = EN*( EK*( 2*EM-( EK+1.D0 ) ) ) +* + END IF +* +* ---------------------------------- +* PT: Positive definite Tridiagonal +* ---------------------------------- +* + ELSE IF( LSAMEN( 2, C2, 'PT' ) ) THEN +* +* xPTTRF: N => M +* + IF( LSAMEN( 3, C3, 'TRF' ) ) THEN + MULTS = 2*( EM-1 ) + ADDS = EM - 1 +* +* xPTTRS: N, NRHS => M, N +* + ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN + MULTS = EN*( 3*EM-2 ) + ADDS = EN*( 2*( EM-1 ) ) +* +* xPTSV: N, NRHS => M, N +* + ELSE IF( LSAMEN( 3, C3, 'SV ' ) ) THEN + MULTS = 2*( EM-1 ) + EN*( 3*EM-2 ) + ADDS = EM - 1 + EN*( 2*( EM-1 ) ) + END IF +* +* -------------------------------------------------------- +* SY: SYmmetric indefinite matrices +* SP: Symmetric indefinite Packed matrices +* HE: HErmitian indefinite matrices (complex only) +* HP: Hermitian indefinite Packed matrices (complex only) +* -------------------------------------------------------- +* + ELSE IF( LSAMEN( 2, C2, 'SY' ) .OR. LSAMEN( 2, C2, 'SP' ) .OR. + $ LSAMEN( 3, SUBNAM, 'CHE' ) .OR. + $ LSAMEN( 3, SUBNAM, 'ZHE' ) .OR. + $ LSAMEN( 3, SUBNAM, 'CHP' ) .OR. + $ LSAMEN( 3, SUBNAM, 'ZHP' ) ) THEN +* +* xSYTRF: N => M +* + IF( LSAMEN( 3, C3, 'TRF' ) ) THEN + MULTS = EM*( 10.D0 / 3.D0+EM* + $ ( 1.D0 / 2.D0+EM*( 1.D0 / 6.D0 ) ) ) + ADDS = EM / 6.D0*( -1.D0+EM*EM ) +* +* xSYTRS: N, NRHS => M, N +* + ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN + MULTS = EN*EM*EM + ADDS = EN*( EM*( EM-1.D0 ) ) +* +* xSYTRI: N => M +* + ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN + MULTS = EM*( 2.D0 / 3.D0+EM*EM*( 1.D0 / 3.D0 ) ) + ADDS = EM*( -1.D0 / 3.D0+EM*EM*( 1.D0 / 3.D0 ) ) +* +* xSYTRD, xSYTD2: N => M +* + ELSE IF( LSAMEN( 3, C3, 'TRD' ) .OR. LSAMEN( 3, C3, 'TD2' ) ) + $ THEN + IF( M.EQ.1 ) THEN + MULTS = 0.D0 + ADDS = 0.D0 + ELSE + MULTS = -15.D0 + EM*( -1.D0 / 6.D0+EM* + $ ( 5.D0 / 2.D0+EM*( 2.D0 / 3.D0 ) ) ) + ADDS = -4.D0 + EM*( -8.D0 / 3.D0+EM* + $ ( 1.D0+EM*( 2.D0 / 3.D0 ) ) ) + END IF + END IF +* +* ------------------- +* Triangular matrices +* ------------------- +* + ELSE IF( LSAMEN( 2, C2, 'TR' ) .OR. LSAMEN( 2, C2, 'TP' ) ) THEN +* +* xTRTRS: N, NRHS => M, N +* + IF( LSAMEN( 3, C3, 'TRS' ) ) THEN + MULTS = EN*EM*( EM+1.D0 ) / 2.D0 + ADDS = EN*EM*( EM-1.D0 ) / 2.D0 +* +* xTRTRI: N => M +* + ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN + MULTS = EM*( 1.D0 / 3.D0+EM*( 1.D0 / 2.D0+EM*( 1.D0 / + $ 6.D0 ) ) ) + ADDS = EM*( 1.D0 / 3.D0+EM*( -1.D0 / 2.D0+EM*( 1.D0 / + $ 6.D0 ) ) ) +* + END IF +* + ELSE IF( LSAMEN( 2, C2, 'TB' ) ) THEN +* +* xTBTRS: N, NRHS, K => M, N, KL +* + IF( LSAMEN( 3, C3, 'TRS' ) ) THEN + MULTS = EN*( EM*( EM+1.D0 ) / 2.D0-( EM-EK-1.D0 )* + $ ( EM-EK ) / 2.D0 ) + ADDS = EN*( EM*( EM-1.D0 ) / 2.D0-( EM-EK-1.D0 )*( EM-EK ) / + $ 2.D0 ) + END IF +* +* -------------------- +* Trapezoidal matrices +* -------------------- +* + ELSE IF( LSAMEN( 2, C2, 'TZ' ) ) THEN +* +* xTZRQF: M, N => M, N +* + IF( LSAMEN( 3, C3, 'RQF' ) ) THEN + EMN = MIN( M, N ) + MULTS = 3*EM*( EN-EM+1 ) + ( 2*EN-2*EM+3 )* + $ ( EM*EM-EMN*( EMN+1 ) / 2 ) + ADDS = ( EN-EM+1 )*( EM+2*EM*EM-EMN*( EMN+1 ) ) + END IF +* +* ------------------- +* Orthogonal matrices +* ------------------- +* + ELSE IF( ( SORD .AND. LSAMEN( 2, C2, 'OR' ) ) .OR. + $ ( CORZ .AND. LSAMEN( 2, C2, 'UN' ) ) ) THEN +* +* -MQR, -MLQ, -MQL, or -MRQ: M, N, K, SIDE => M, N, KL, KU +* where KU<= 0 indicates SIDE = 'L' +* and KU> 0 indicates SIDE = 'R' +* + IF( LSAMEN( 3, C3, 'MQR' ) .OR. LSAMEN( 3, C3, 'MLQ' ) .OR. + $ LSAMEN( 3, C3, 'MQL' ) .OR. LSAMEN( 3, C3, 'MRQ' ) ) THEN + IF( KU.LE.0 ) THEN + MULTS = EK*EN*( 2.D0*EM+2.D0-EK ) + ADDS = EK*EN*( 2.D0*EM+1.D0-EK ) + ELSE + MULTS = EK*( EM*( 2.D0*EN-EK )+ + $ ( EM+EN+( 1.D0-EK ) / 2.D0 ) ) + ADDS = EK*EM*( 2.D0*EN+1.D0-EK ) + END IF +* +* -GQR or -GQL: M, N, K => M, N, KL +* + ELSE IF( LSAMEN( 3, C3, 'GQR' ) .OR. LSAMEN( 3, C3, 'GQL' ) ) + $ THEN + MULTS = EK*( -5.D0 / 3.D0+( 2.D0*EN-EK )+ + $ ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) ) + ADDS = EK*( 1.D0 / 3.D0+( EN-EM )+ + $ ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) ) +* +* -GLQ or -GRQ: M, N, K => M, N, KL +* + ELSE IF( LSAMEN( 3, C3, 'GLQ' ) .OR. LSAMEN( 3, C3, 'GRQ' ) ) + $ THEN + MULTS = EK*( -2.D0 / 3.D0+( EM+EN-EK )+ + $ ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) ) + ADDS = EK*( 1.D0 / 3.D0+( EM-EN )+ + $ ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) ) +* + END IF +* + END IF +* + DOPLA = MULFAC*MULTS + ADDFAC*ADDS +* + RETURN +* +* End of DOPLA +* + END + LOGICAL FUNCTION LSAMEN( N, CA, CB ) +* +* -- LAPACK auxiliary routine (version 2.0) -- +* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., +* Courant Institute, Argonne National Lab, and Rice University +* September 30, 1994 +* +* .. Scalar Arguments .. + CHARACTER*( * ) CA, CB + INTEGER N +* .. +* +* Purpose +* ======= +* +* LSAMEN tests if the first N letters of CA are the same as the +* first N letters of CB, regardless of case. +* LSAMEN returns .TRUE. if CA and CB are equivalent except for case +* and .FALSE. otherwise. LSAMEN also returns .FALSE. if LEN( CA ) +* or LEN( CB ) is less than N. +* +* Arguments +* ========= +* +* N (input) INTEGER +* The number of characters in CA and CB to be compared. +* +* CA (input) CHARACTER*(*) +* CB (input) CHARACTER*(*) +* CA and CB specify two character strings of length at least N. +* Only the first N characters of each string will be accessed. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC LEN +* .. +* .. Executable Statements .. +* + LSAMEN = .FALSE. + IF( LEN( CA ).LT.N .OR. LEN( CB ).LT.N ) + $ GO TO 20 +* +* Do for each character in the two strings. +* + DO 10 I = 1, N +* +* Test if the characters are equal using LSAME. +* + IF( .NOT.LSAME( CA( I: I ), CB( I: I ) ) ) + $ GO TO 20 +* + 10 CONTINUE + LSAMEN = .TRUE. +* + 20 CONTINUE + RETURN +* +* End of LSAMEN +* + END diff --git a/src/math_util.f90 b/src/math_util.f90 new file mode 100644 index 0000000..60817ff --- /dev/null +++ b/src/math_util.f90 @@ -0,0 +1,291 @@ +!> +!> @file math_util.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE math_util +! +! MATH_UTIL: Some math utilities. +! +! T.M. Tran, CRPP-EPFL +! December 2012 +! +! +! Notes: +! - Assume the Fortran 2008 intrinsic BESSEL_JN(n,x) exists! +! + IMPLICIT NONE + DOUBLE PRECISION, PARAMETER :: pi=4.0d0*ATAN(1.0d0) +! +CONTAINS + ELEMENTAL FUNCTION bessjp(n,x) +! +! Derivative of J_n +! + DOUBLE PRECISION :: bessjp + INTEGER, INTENT(in) :: n + DOUBLE PRECISION, INTENT(in) :: x +! + IF(n.EQ.0) THEN + bessjp = -bessel_jn(1,x) + ELSE + bessjp = 0.5d0*(bessel_jn(n-1,x)-bessel_jn(n+1,x)) + END IF + END FUNCTION bessjp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION root_bessj(n, s, info) +! +! s^th root of j_n +! + DOUBLE PRECISION :: root_bessj + INTEGER, INTENT(in) :: n + INTEGER, INTENT(in) :: s + INTEGER, OPTIONAL, INTENT(out) :: info +! + DOUBLE PRECISION :: b0,b1,b2,b3,b5,b7,t0,t1,t3,t5,t7,fn,fk,f1,f2,f3 + DOUBLE PRECISION :: c1=1.8557571d0, c2=1.033150d0, c3=.00397d0, c4=.0908d0,& + & c5=.043d0 + DOUBLE PRECISION :: zero + INTEGER :: iter +! + fn = REAL(ABS(n),8) + IF(s.EQ.1) THEN ! first zero + IF(n.EQ.0) THEN + zero = c1+c2-c3-c4+c5 + ELSE + f1 = fn**(1.d0/3.d0) + f2 = f1*f1*fn + f3 = f1*fn*fn + zero = fn+c1*f1+(c2/f1)-(c3/fn)-(c4/f2)+(c5/f3) + END IF + ELSE ! Other zeros + t0 = 4.d0*fn*fn + t1 = t0-1.d0 + t3 = 4.d0*t1*(7.d0*t0-31.d0) + t5 = 32.d0*t1*((83.d0*t0-982.d0)*t0+3779.d0) + t7 = 64.d0*t1*(((6949.d0*t0-153855.d0)*t0+1585743.d0)*t0 & + -6277237.d0) + fk = REAL(s,8) +! + b0 = (fk+.5d0*fn-.25d0)*pi! mac mahon's series for k>>n + b1 = 8.d0*b0 + b2 = b1*b1 + b3 = 3.d0*b1*b2 + b5 = 5.d0*b3*b2 + b7 = 7.d0*b5*b2 + zero = b0-(t1/b1)-(t3/b3)-(t5/b5)-(t7/b7) + END IF + CALL newton(iter) + IF(PRESENT(info)) info = iter + root_bessj = zero + CONTAINS + SUBROUTINE newton(iter) + INTEGER, INTENT(out) :: iter + INTEGER :: itermx = 20 + DOUBLE PRECISION :: dx, tol + tol = EPSILON(1.0d0)*zero + iter = 0 + DO + iter = iter+1 + dx = -bessel_jn(n,zero)/bessjp(n,zero) + zero = zero+dx + IF(iter.GE.itermx .OR. ABS(dx).LT.tol) EXIT + END DO + END SUBROUTINE newton + END FUNCTION root_bessj +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION root_bessjp(n, s, info) +! +! s^th root of derivative of j_n +! + DOUBLE PRECISION :: root_bessjp + INTEGER, INTENT(in) :: n + INTEGER, INTENT(in) :: s + INTEGER, OPTIONAL, INTENT(out) :: info +! + DOUBLE PRECISION :: c1=0.8086165D0, c2=0.072490D0, c3=.05097D0, c4=.0094D0 + DOUBLE PRECISION :: b0,b1,b2,b3,b5,b7,t0,t1,t3,t5,t7,fn,fk,f1,f2 + INTEGER :: iter + DOUBLE PRECISION :: zero +! + IF(n.EQ.0 .AND. s.EQ.1) THEN + root_bessjp = 0.0d0 + IF(PRESENT(info)) info = 0 + RETURN + END IF +! + fn = REAL(ABS(n),8) + fk = REAL(s,8) +! + IF(s.GT.1) THEN +! +! McMahon's series for s >> n + b0 = (fk+.5d0*fn-.75d0)*pi + b1 = 8.d0*b0 + b2 = b1*b1 + b3 = 3.d0*b1*b2 + b5 = 5.d0*b3*b2 + b7 = 7.d0*b5*b2 + t0 = 4.d0*fn*fn + t1 = t0+3.d0 + t3 = 4.d0*((7.d0*t0+82.d0)*t0-9.d0) + t5 = 32.d0*(((83.d0*t0+2075.d0)*t0-3039.d0)*t0+3537.d0) + t7 = 64.d0*((((6949.d0*t0+296492.d0)*t0-1248002.d0)*t0 & + +7414380.d0)*t0-5853627.d0) + zero = b0-(t1/b1)-(t3/b3)-(t5/b5)-(t7/b7) + ELSE +! +! Tchebychev's series for s <= n + f1 = fn**(1.d0/3.d0) + f2 = f1*f1*fn + zero = fn+c1*f1+(c2/f1)-(c3/fn)+(c4/f2) + END IF +! + CALL newton(iter) + root_bessjp = zero + IF(PRESENT(info)) info = iter + CONTAINS + SUBROUTINE newton(iter) + INTEGER, INTENT(out) :: iter + INTEGER :: itermx = 20 + DOUBLE PRECISION :: dx, tol + tol = EPSILON(1.0d0)*zero + iter = 0 + DO + iter = iter+1 + dx = -bessel_jn(n,zero)/bessjp(n,zero) + dx = -2.0d0 * (bessel_jn(n-1,zero)-bessel_jn(n+1,zero)) / & + & (bessel_jn(n-2,zero)-2.d0*bessel_jn(n,zero)+& + & bessel_jn(n+2,zero)) + zero = zero+dx + IF(iter.GE.itermx .OR. ABS(dx).LT.tol) EXIT + END DO + END SUBROUTINE newton + END FUNCTION root_bessjp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +!!$ PURE FUNCTION BESSEL_JN(n,x) RESULT(bessj) +!!$ DOUBLE PRECISION, INTENT(in) :: x +!!$ DOUBLE PRECISION :: bessj +!!$ DOUBLE PRECISION BIGNO,BIGNI +!!$ INTEGER n,IACC +!!$ PARAMETER (IACC=40,BIGNO=1.d10,BIGNI=1.d-10) +!!$ INTEGER j,jsum,m +!!$ DOUBLE PRECISION ax,bj,bjm,bjp,sum,tox,bessj0,bessj1 +!!$ IF( n.EQ.0 ) THEN +!!$ bessj = bessj0(x) +!!$ RETURN +!!$ ELSE IF( n.EQ.1 ) THEN +!!$ bessj = bessj1(x) +!!$ RETURN +!!$ ENDIF +!!$ ax=ABS(x) +!!$ IF(ax.EQ.0.d0)THEN +!!$ bessj=0.d0 +!!$ ELSE IF(ax.GT.float(n))THEN +!!$ tox=2./ax +!!$ bjm=bessj0(ax) +!!$ bj=bessj1(ax) +!!$ DO j=1,n-1 +!!$ bjp=j*tox*bj-bjm +!!$ bjm=bj +!!$ bj=bjp +!!$ END DO +!!$ bessj=bj +!!$ ELSE +!!$ tox=2./ax +!!$ m=2*((n+INT(SQRT(float(IACC*n))))/2) +!!$ bessj=0.d0 +!!$ jsum=0 +!!$ sum=0.d0 +!!$ bjp=0.d0 +!!$ bj=1. +!!$ DO j=m,1,-1 +!!$ bjm=j*tox*bj-bjp +!!$ bjp=bj +!!$ bj=bjm +!!$ IF(ABS(bj).GT.BIGNO)THEN +!!$ bj=bj*BIGNI +!!$ bjp=bjp*BIGNI +!!$ bessj=bessj*BIGNI +!!$ sum=sum*BIGNI +!!$ ENDIF +!!$ IF(jsum.NE.0)sum=sum+bj +!!$ jsum=1-jsum +!!$ IF(j.EQ.n)bessj=bjp +!!$ END DO +!!$ sum=2.*sum-bj +!!$ bessj=bessj/sum +!!$ ENDIF +!!$ IF(x.LT.0.d0.AND.MOD(n,2).EQ.1)bessj=-bessj +!!$ RETURN +!!$ END FUNCTION bessel_jn +!!$ +!!$ PURE FUNCTION bessj0(x) +!!$ DOUBLE PRECISION, INTENT(in) :: x +!!$ DOUBLE PRECISION bessj0 +!!$ DOUBLE PRECISION ax,xx,z +!!$ DOUBLE PRECISION p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6,y +!!$ SAVE p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6 +!!$ DATA p1,p2,p3,p4,p5/1.d0,-.1098628627d-2,.2734510407d-4,-.2073370639d-5,.2093887211d-6/ +!!$ DATA q1,q2,q3,q4,q5/-.1562499995d-1,.1430488765d-3,-.6911147651d-5,.7621095161d-6,-.934945152d-7/ +!!$ DATA r1,r2,r3,r4,r5,r6/57568490574.d0,-13362590354.d0,651619640.7d0,-11214424.18d0,& +!!$ & 77392.33017d0,-184.9052456d0/ +!!$ DATA s1,s2,s3,s4,s5,s6/57568490411.d0,1029532985.d0,9494680.718d0,59272.64853d0,267.8532712d0,1.d0/ +!!$ IF(ABS(x).LT.8.)THEN +!!$ y=x**2 +!!$ bessj0=(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))))/(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))) +!!$ ELSE +!!$ ax=ABS(x) +!!$ z=8./ax +!!$ y=z**2 +!!$ xx=ax-.785398164 +!!$ bessj0=SQRT(.636619772/ax)*(COS(xx)*(p1+y*(p2+y*(p3+y*(p4+y*p5))))-z*SIN(xx)*(q1+y*(q2+y*(q3+y*(q4+y*q5))))) +!!$ ENDIF +!!$ RETURN +!!$ END FUNCTION bessj0 +!!$ +!!$ PURE FUNCTION bessj1(x) +!!$ DOUBLE PRECISION, INTENT(in) :: x +!!$ DOUBLE PRECISION bessj1 +!!$ DOUBLE PRECISION ax,xx,z +!!$ DOUBLE PRECISION p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6,y +!!$ SAVE p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6 +!!$ DATA r1,r2,r3,r4,r5,r6/72362614232.d0,-7895059235.d0,242396853.1d0,-2972611.439d0,15704.48260d0,-30.16036606d0/ +!!$ DATA s1,s2,s3,s4,s5,s6/144725228442.d0,2300535178.d0,18583304.74d0,99447.43394d0,376.9991397d0,1.d0/ +!!$ DATA p1,p2,p3,p4,p5/1.d0,.183105d-2,-.3516396496d-4,.2457520174d-5,-.240337019d-6/ +!!$ DATA q1,q2,q3,q4,q5/.04687499995d0,-.2002690873d-3,.8449199096d-5,-.88228987d-6,.105787412d-6/ +!!$ IF(ABS(x).LT.8.)THEN +!!$ y=x**2 +!!$ bessj1=x*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))))/(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))) +!!$ ELSE +!!$ ax=ABS(x) +!!$ z=8.d0/ax +!!$ y=z**2 +!!$ xx=ax-2.356194491d0 +!!$ bessj1=SQRT(.636619772d0/ax)*(COS(xx)*(p1+y*(p2+y*(p3+y*(p4+y*p5))))- & +!!$ & z*SIN(xx)*(q1+y*(q2+y*(q3+y*(q4+y*q5)))))*SIGN(1.d0,x) +!!$ ENDIF +!!$ RETURN +!!$ END FUNCTION bessj1 + +END MODULE math_util diff --git a/src/matrix.f90 b/src/matrix.f90 new file mode 100644 index 0000000..22ea822 --- /dev/null +++ b/src/matrix.f90 @@ -0,0 +1,3295 @@ +!> +!> @file matrix.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE matrix +! +! MATRIX: Simple interface to the direct solver LAPACK. +! +! T.M. Tran, CRPP-EPFL +! February 2007 +! + IMPLICIT NONE +! + TYPE gbmat ! Lapack General Band matrix storage + INTEGER :: kl, ku, rank + INTEGER :: mrows, ncols + INTEGER :: nterms, kmat + INTEGER, DIMENSION(:), POINTER :: piv => null() + DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null() + END TYPE gbmat +! + TYPE gemat ! Lapack General DENSE matrix storage + INTEGER :: rank + INTEGER :: mrows, ncols + INTEGER :: nterms, kmat + INTEGER, DIMENSION(:), POINTER :: piv => null() + DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null() + END TYPE gemat +! + TYPE pbmat ! Lapack Pack Band matrix storage (super-diagonals) + INTEGER :: ku, rank + INTEGER :: nterms, kmat + DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null() + END TYPE pbmat +! + TYPE zgbmat ! Lapack General Band matrix storage + INTEGER :: kl, ku, rank + INTEGER :: mrows, ncols + INTEGER :: nterms, kmat + INTEGER, DIMENSION(:), POINTER :: piv => null() + DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => NULL() + END TYPE zgbmat +! + TYPE zgemat ! Lapack General DENSE matrix storage + INTEGER :: rank + INTEGER :: mrows, ncols + INTEGER :: nterms, kmat + INTEGER, DIMENSION(:), POINTER :: piv => null() + DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => null() + END TYPE zgemat +! + TYPE zpbmat ! Lapack Pack Band matrix storage (super-diagonals) + INTEGER :: ku, rank + INTEGER :: nterms, kmat + DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => null() + END TYPE zpbmat +! + TYPE periodic_mat + TYPE(gbmat) :: mat + INTEGER :: nterms + DOUBLE PRECISION, DIMENSION(:,:), POINTER :: & + & matu => null(), & + & matvt => null() + END TYPE periodic_mat +! + TYPE zperiodic_mat + TYPE(zgbmat) :: mat + INTEGER :: nterms + DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: & + & matu => null(), & + & matvt => null() + END TYPE zperiodic_mat +! +!-------------------------------------------------------------------------------- + INTERFACE init + MODULE PROCEDURE init_gb, init_ge, init_pb, & + & init_zgb, init_zge, init_zpb, & + & init_periodic, init_zperiodic + END INTERFACE + INTERFACE getvalp + MODULE PROCEDURE getvalp_gb, getvalp_ge, getvalp_pb, & + & getvalp_zgb, getvalp_zge, getvalp_zpb + END INTERFACE + INTERFACE mcopy + MODULE PROCEDURE mcopy_gb, mcopy_ge, mcopy_pb, & + & mcopy_zgb, mcopy_zge, mcopy_zpb, & + & mcopy_periodic, mcopy_zperiodic + END INTERFACE + INTERFACE maddto + MODULE PROCEDURE maddto_gb, maddto_ge, maddto_pb, & + & maddto_zgb, maddto_zge, maddto_zpb, & + & maddto_periodic, maddto_zperiodic + END INTERFACE + INTERFACE destroy + MODULE PROCEDURE destroy_gb, destroy_ge, destroy_pb, & + & destroy_zgb, destroy_zge, destroy_zpb, & + & destroy_periodic, destroy_zperiodic + END INTERFACE + INTERFACE updtmat + MODULE PROCEDURE updt_gb, updt_ge, updt_pb, & + & updt_zgb, updt_zpb, & + & updt_periodic, updt_zperiodic + END INTERFACE + INTERFACE getele + MODULE PROCEDURE getele_gb, getele_pb, & + & getele_zgb, getele_zpb, & + & getele_periodic, getele_zperiodic + END INTERFACE + INTERFACE putele + MODULE PROCEDURE putele_gb, putele_pb, & + & putele_zgb, putele_zpb, & + & putele_periodic, putele_zperiodic + END INTERFACE + INTERFACE getcol + MODULE PROCEDURE getcol_gb, getcol_pb, & + & getcol_zgb, getcol_zpb, & + & getcol_periodic, getcol_zperiodic + END INTERFACE + INTERFACE getrow + MODULE PROCEDURE getrow_gb, getrow_ge, getrow_pb, & + & getrow_zgb, getrow_zpb, & + & getrow_periodic, getrow_zperiodic + END INTERFACE + INTERFACE putcol + MODULE PROCEDURE putcol_gb, putcol_ge, putcol_pb, & + & putcol_zgb, putcol_zpb, & + & putcol_periodic, putcol_zperiodic + END INTERFACE + INTERFACE putrow + MODULE PROCEDURE putrow_gb, putrow_ge, putrow_pb, & + & putrow_zgb, putrow_zpb, & + & putrow_periodic, putrow_zperiodic + END INTERFACE + INTERFACE factor + MODULE PROCEDURE factor_gb, factor_ge, factor_pb, & + & factor_zgb, factor_zge, factor_zpb, & + & factor_periodic, factor_zperiodic + END INTERFACE + INTERFACE bsolve + MODULE PROCEDURE bsolve_gb1, bsolve_gbn, bsolve_ge1, bsolve_gen, & + & bsolve_pb1, bsolve_pbn, & + & bsolve_periodic1, bsolve_periodicn, & + & bsolve_zperiodic1, bsolve_zperiodicn, & + & bsolve_zgb1, bsolve_zgbn, bsolve_zge1, bsolve_zgen, & + & bsolve_zpb1, bsolve_zpbn + END INTERFACE + INTERFACE vmx + MODULE PROCEDURE vmx_gb, vmx_gbn, vmx_pb, vmx_pbn, & + & vmx_zgb, vmx_zgbn, vmx_zpb, vmx_zpbn, & + & vmx_ge, vmx_gen, vmx_zge, vmx_zgen, & + & vmx_periodic, vmx_zperiodic + END INTERFACE + INTERFACE determinant + MODULE PROCEDURE determinant_ge, determinant_gb, determinant_pb, & + & determinant_zge, determinant_zgb, determinant_zpb + END INTERFACE + INTERFACE putmat + MODULE PROCEDURE putmat_gb + END INTERFACE + INTERFACE getmat + MODULE PROCEDURE getmat_gb + END INTERFACE + INTERFACE kron + MODULE PROCEDURE kron_ge + END INTERFACE kron +! +CONTAINS +!=========================================================================== + SUBROUTINE init_ge(n, nterms, mat, kmat, mrows) +! +! Initialize Lapack General Dense matrice +! + INTEGER, INTENT(in) :: n, nterms + INTEGER, OPTIONAL :: kmat + INTEGER, OPTIONAL :: mrows + TYPE(gemat) :: mat +! + mat%ncols = n + mat%mrows = n + IF(PRESENT(mrows)) THEN + mat%mrows = mrows + END IF + mat%rank = n ! Warning: ok if square matrix + mat%nterms = nterms + IF(PRESENT(kmat)) mat%kmat = kmat + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) + ALLOCATE(mat%val(mat%mrows,mat%ncols)) + ALLOCATE(mat%piv(MIN(mat%mrows,mat%ncols))) + mat%val = 0.0d0 + mat%piv = 0 + END SUBROUTINE init_ge +!=========================================================================== + SUBROUTINE init_gb(kl, ku, n, nterms, mat, kmat, mrows) +! +! Initialize Lapack General Banded matrice +! + INTEGER, INTENT(in) :: kl, ku, n, nterms + INTEGER, OPTIONAL :: kmat + INTEGER, OPTIONAL :: mrows + TYPE(gbmat) :: mat + INTEGER :: lda +! + mat%kl = kl + mat%ku = ku + mat%ncols = n + mat%mrows = n + IF(PRESENT(mrows)) THEN + mat%mrows = mrows + END IF + mat%rank = n ! Warning: ok if square matrix + mat%nterms = nterms + IF(PRESENT(kmat)) mat%kmat = kmat + lda = 2*kl + ku + 1 + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) + ALLOCATE(mat%val(lda,n)) + ALLOCATE(mat%piv(n)) + mat%val = 0.0d0 + mat%piv = 0 + END SUBROUTINE init_gb +!=========================================================================== + SUBROUTINE init_periodic(kl, ku, n, nterms, mat, kmat) +! +! Initialize Lapack Periodic General Banded matrice +! + INTEGER, INTENT(in) :: kl, ku, n, nterms + INTEGER, OPTIONAL :: kmat + TYPE(periodic_mat) :: mat + INTEGER :: i,j + DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0 +! +! In band matrix matp%mat is a GB matrix + IF( PRESENT(kmat)) THEN + CALL init(kl, ku, n, nterms, mat%mat, kmat) + ELSE + CALL init(kl, ku, n, nterms, mat%mat) + END IF + mat%nterms = nterms +! +! Off band matrices + IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu) + IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt) + ALLOCATE(mat%matu(n, kl+ku)) + ALLOCATE(mat%matvt(kl+ku,n)) +! + mat%matu = zero + mat%matvt = zero ! kl=3, ku=2 + DO j=1,kl ! [ 1 0 0 . . ] + mat%matu(j,j) = one ! [ 0 1 0 . . ] + END DO ! [ 0 0 1 . . ] +! ! [ 0 . . . . ] + DO j=1,ku ! [ . . . . . ] + i=n-ku+j ! [ . . . 1 0 ] + mat%matu(i,ku+j) = one ! [ . . . 0 1 ] + END DO + END SUBROUTINE init_periodic +!=========================================================================== + SUBROUTINE init_zperiodic(kl, ku, n, nterms, mat, kmat) +! +! Initialize Lapack Periodic General Banded matrice +! + INTEGER, INTENT(in) :: kl, ku, n, nterms + INTEGER, OPTIONAL :: kmat + TYPE(zperiodic_mat) :: mat + INTEGER :: i,j + DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0 +! +! In band matrix matp%mat is a GB matrix + IF( PRESENT(kmat)) THEN + CALL init(kl, ku, n, nterms, mat%mat, kmat) + ELSE + CALL init(kl, ku, n, nterms, mat%mat) + END IF + mat%nterms = nterms +! +! Off band matrices + IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu) + IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt) + ALLOCATE(mat%matu(n, kl+ku)) + ALLOCATE(mat%matvt(kl+ku,n)) +! + mat%matu = zero + mat%matvt = zero ! kl=3, ku=2 + DO j=1,kl ! [ 1 0 0 . . ] + mat%matu(j,j) = one ! [ 0 1 0 . . ] + END DO ! [ 0 0 1 . . ] +! ! [ 0 . . . . ] + DO j=1,ku ! [ . . . . . ] + i=n-ku+j ! [ . . . 1 0 ] + mat%matu(i,ku+j) = one ! [ . . . 0 1 ] + END DO + END SUBROUTINE init_zperiodic +!=========================================================================== + SUBROUTINE init_pb(ku, n, nterms, mat, kmat) +! +! Initialize Lapack Packed Banded matrice +! + INTEGER, INTENT(in) :: ku, n, nterms + INTEGER, OPTIONAL :: kmat + TYPE(pbmat) :: mat + INTEGER :: lda +! + mat%ku = ku + mat%rank = n + mat%nterms = nterms + IF(PRESENT(kmat)) mat%kmat = kmat + lda = ku + 1 + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%val(lda,n)) + mat%val = 0.0d0 + END SUBROUTINE init_pb +!=========================================================================== + SUBROUTINE init_zge(n, nterms, mat, kmat, mrows) +! +! Initialize Lapack General Dense matrice +! + INTEGER, INTENT(in) :: n, nterms + INTEGER, OPTIONAL :: kmat + INTEGER, OPTIONAL :: mrows + TYPE(zgemat) :: mat +! + mat%ncols = n + mat%mrows = n + IF(PRESENT(mrows)) THEN + mat%mrows = mrows + END IF + mat%rank = n + mat%nterms = nterms + IF(PRESENT(kmat)) mat%kmat = kmat + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) + ALLOCATE(mat%val(mat%mrows,mat%ncols)) + ALLOCATE(mat%piv(MIN(mat%mrows,mat%ncols))) + mat%val = 0.0d0 + mat%piv = 0 + END SUBROUTINE init_zge +!=========================================================================== + SUBROUTINE init_zgb(kl, ku, n, nterms, mat, kmat, mrows) +! +! Initialize Lapack General Banded matrice +! + INTEGER, INTENT(in) :: kl, ku, n, nterms + INTEGER, OPTIONAL :: kmat + INTEGER, OPTIONAL :: mrows + TYPE(zgbmat) :: mat + INTEGER :: lda +! + mat%kl = kl + mat%ku = ku + mat%ncols = n + mat%mrows = n + IF(PRESENT(mrows)) THEN + mat%mrows = mrows + END IF + mat%rank = n ! Warning: ok if square matrix + mat%nterms = nterms + IF(PRESENT(kmat)) mat%kmat = kmat + lda = 2*kl + ku + 1 + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) + ALLOCATE(mat%val(lda,n)) + ALLOCATE(mat%piv(n)) + mat%val = 0.0d0 + mat%piv = 0 + END SUBROUTINE init_zgb +!=========================================================================== + SUBROUTINE init_zpb(ku, n, nterms, mat, kmat) +! +! Initialize Lapack Packed Banded matrice +! + INTEGER, INTENT(in) :: ku, n, nterms + INTEGER, OPTIONAL :: kmat + TYPE(zpbmat) :: mat + INTEGER :: lda +! + mat%ku = ku + mat%rank = n + mat%nterms = nterms + IF(PRESENT(kmat)) mat%kmat = kmat + lda = ku + 1 + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%val(lda,n)) + mat%val = 0.0d0 + END SUBROUTINE init_zpb +!=========================================================================== + SUBROUTINE mcopy_ge(mata, matb) +! +! Matrix copy: B = A +! + TYPE(gemat) :: mata, matb +! + matb%rank = mata%rank + matb%mrows = mata%mrows + matb%ncols = mata%ncols + matb%nterms = mata%nterms + matb%kmat = mata%kmat + IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv) + ALLOCATE(matb%val(matb%mrows,matb%ncols)) + ALLOCATE(matb%piv(MIN(matb%mrows,matb%ncols))) + matb%val = mata%val + matb%piv = mata%piv + END SUBROUTINE mcopy_ge +!=========================================================================== + SUBROUTINE mcopy_gb(mata, matb) +! +! Matrix copy: B = A +! + TYPE(gbmat) :: mata, matb + INTEGER :: n, lda +! + n = mata%rank + matb%kl = mata%kl + matb%ku = mata%ku + matb%rank = mata%rank + matb%mrows = mata%mrows + matb%ncols = mata%ncols + matb%nterms = mata%nterms + matb%kmat = mata%kmat + lda = 2*mata%kl + mata%ku + 1 + IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv) + ALLOCATE(matb%val(lda,n)) + ALLOCATE(matb%piv(n)) + matb%val = mata%val + matb%piv = mata%piv + END SUBROUTINE mcopy_gb +!=========================================================================== + SUBROUTINE mcopy_periodic(mata, matb) +! +! Matrix copy: B = A +! + TYPE(periodic_mat) :: mata, matb + INTEGER :: n, kl, ku +! + kl = mata%mat%kl + ku = mata%mat%ku + n = mata%mat%rank +! + CALL mcopy(mata%mat, matb%mat) + IF( ASSOCIATED(matb%matu)) DEALLOCATE(matb%matu) + IF( ASSOCIATED(matb%matvt)) DEALLOCATE(matb%matvt) + ALLOCATE(matb%matu(n,kl+ku)) + ALLOCATE(matb%matvt(kl+ku,n)) + matb%matu = mata%matu + matb%matvt = mata%matvt + END SUBROUTINE mcopy_periodic +!=========================================================================== + SUBROUTINE mcopy_zperiodic(mata, matb) +! +! Matrix copy: B = A +! + TYPE(zperiodic_mat) :: mata, matb + INTEGER :: n, kl, ku +! + kl = mata%mat%kl + ku = mata%mat%ku + n = mata%mat%rank +! + CALL mcopy(mata%mat, matb%mat) + IF( ASSOCIATED(matb%matu)) DEALLOCATE(matb%matu) + IF( ASSOCIATED(matb%matvt)) DEALLOCATE(matb%matvt) + ALLOCATE(matb%matu(n,kl+ku)) + ALLOCATE(matb%matvt(kl+ku,n)) + matb%matu = mata%matu + matb%matvt = mata%matvt + END SUBROUTINE mcopy_zperiodic +!=========================================================================== + SUBROUTINE mcopy_pb(mata, matb) +! +! Matrix copy: B = A +! + TYPE(pbmat) :: mata, matb + INTEGER :: n, lda +! + n = mata%rank + matb%ku = mata%ku + matb%rank = mata%rank + matb%nterms = mata%nterms + lda = mata%ku + 1 + IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + ALLOCATE(matb%val(lda,n)) + matb%val = mata%val + END SUBROUTINE mcopy_pb +!=========================================================================== + SUBROUTINE mcopy_zge(mata, matb) +! +! Matrix copy: B = A +! + TYPE(zgemat) :: mata, matb + INTEGER :: n +! + n = mata%rank + matb%rank = n + IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv) + ALLOCATE(matb%val(n,n)) + ALLOCATE(matb%piv(n)) + matb%val = mata%val + matb%piv = mata%piv + END SUBROUTINE mcopy_zge +!=========================================================================== + SUBROUTINE mcopy_zgb(mata, matb) +! +! Matrix copy: B = A +! + TYPE(zgbmat) :: mata, matb + INTEGER :: n, lda +! + n = mata%rank + matb%kl = mata%kl + matb%ku = mata%ku + matb%rank = mata%rank + matb%nterms = mata%nterms + lda = 2*mata%kl + mata%ku + 1 + IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv) + ALLOCATE(matb%val(lda,n)) + ALLOCATE(matb%piv(n)) + matb%val = mata%val + matb%piv = mata%piv + END SUBROUTINE mcopy_zgb +!=========================================================================== + SUBROUTINE mcopy_zpb(mata, matb) +! +! Matrix copy: B = A +! + TYPE(zpbmat) :: mata, matb + INTEGER :: n, lda +! + n = mata%rank + matb%ku = mata%ku + matb%rank = mata%rank + matb%nterms = mata%nterms + lda = mata%ku + 1 + IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + ALLOCATE(matb%val(lda,n)) + matb%val = mata%val + END SUBROUTINE mcopy_zpb +!=========================================================================== + SUBROUTINE maddto_ge(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(gemat) :: mata, matb + DOUBLE PRECISION :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_ge +!=========================================================================== + SUBROUTINE maddto_gb(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(gbmat) :: mata, matb + DOUBLE PRECISION :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_gb +!=========================================================================== + SUBROUTINE maddto_periodic(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(periodic_mat) :: mata, matb + DOUBLE PRECISION :: alpha +! + mata%mat%val = mata%mat%val + alpha*matb%mat%val + mata%matvt = mata%matvt + alpha*matb%matvt + END SUBROUTINE maddto_periodic +!=========================================================================== + SUBROUTINE maddto_zperiodic(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(zperiodic_mat) :: mata, matb + DOUBLE COMPLEX :: alpha +! + mata%mat%val = mata%mat%val + alpha*matb%mat%val + mata%matvt = mata%matvt + alpha*matb%matvt + END SUBROUTINE maddto_zperiodic +!=========================================================================== + SUBROUTINE maddto_pb(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(pbmat) :: mata, matb + DOUBLE PRECISION :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_pb +!=========================================================================== + SUBROUTINE maddto_zge(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(zgemat) :: mata, matb + DOUBLE COMPLEX :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_zge +!=========================================================================== + SUBROUTINE maddto_zgb(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(zgbmat) :: mata, matb + DOUBLE COMPLEX :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_zgb +!=========================================================================== + SUBROUTINE maddto_zpb(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(zpbmat) :: mata, matb + DOUBLE COMPLEX :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_zpb +!=========================================================================== + SUBROUTINE getvalp_ge(mat, p) +! +! Get pointer to matrix coefficients +! + TYPE(gemat) :: mat + DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p +! + p => mat%val + END SUBROUTINE getvalp_ge +!=========================================================================== + SUBROUTINE getvalp_gb(mat, p) +! +! Get pointer to matrix coefficients +! + TYPE(gbmat) :: mat + DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p +! + p => mat%val + END SUBROUTINE getvalp_gb +!=========================================================================== + SUBROUTINE getvalp_pb(mat, p) +! +! Get pointer to matrix coefficients +! + TYPE(pbmat) :: mat + DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p +! + p => mat%val + END SUBROUTINE getvalp_pb +!=========================================================================== + SUBROUTINE getvalp_zge(mat, p) +! +! Get pointer to matrix coefficients +! + TYPE(zgemat) :: mat + DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p +! + p => mat%val + END SUBROUTINE getvalp_zge +!=========================================================================== + SUBROUTINE getvalp_zgb(mat, p) +! +! Get pointer to matrix coefficients +! + TYPE(zgbmat) :: mat + DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p +! + p => mat%val + END SUBROUTINE getvalp_zgb +!=========================================================================== + SUBROUTINE getvalp_zpb(mat, p) +! +! Get pointer to matrix coefficients +! + TYPE(zpbmat) :: mat + DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p +! + p => mat%val + END SUBROUTINE getvalp_zpb +!=========================================================================== + SUBROUTINE destroy_gb(mat) +! +! Deallocate pointers in mat +! + TYPE(gbmat) :: mat + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) + END SUBROUTINE destroy_gb +!=========================================================================== + SUBROUTINE destroy_ge(mat) +! +! Deallocate pointers in mat +! + TYPE(gemat) :: mat + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) + END SUBROUTINE destroy_ge +!=========================================================================== + SUBROUTINE destroy_pb(mat) +! +! Deallocate pointers in mat +! + TYPE(pbmat) :: mat + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + END SUBROUTINE destroy_pb +!=========================================================================== + SUBROUTINE destroy_zgb(mat) +! +! Deallocate pointers in mat +! + TYPE(zgbmat) :: mat + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) + END SUBROUTINE destroy_zgb +!=========================================================================== + SUBROUTINE destroy_zge(mat) +! +! Deallocate pointers in mat +! + TYPE(zgemat) :: mat + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv) + END SUBROUTINE destroy_zge +!=========================================================================== + SUBROUTINE destroy_zpb(mat) +! +! Deallocate pointers in mat +! + TYPE(zpbmat) :: mat + IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + END SUBROUTINE destroy_zpb +!=========================================================================== + SUBROUTINE destroy_periodic(mat) +! +! Deallocate pointers in mat +! + TYPE(periodic_mat) :: mat + IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu) + IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt) + CALL destroy(mat%mat) + END SUBROUTINE destroy_periodic +!=========================================================================== + SUBROUTINE destroy_zperiodic(mat) +! +! Deallocate pointers in mat +! + TYPE(zperiodic_mat) :: mat + IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu) + IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt) + CALL destroy(mat%mat) + END SUBROUTINE destroy_zperiodic +!=========================================================================== + SUBROUTINE updt_gb(mat, i, j, val) +! +! Update element Aij into banded matrix +! + TYPE(gbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: lda, n, ib +! + lda = SIZE(mat%val, 1) + n = mat%rank + ib = mat%kl + mat%ku + i - j + 1 + IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN + WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + mat%val(ib,j) = mat%val(ib,j) + val + END SUBROUTINE updt_gb +!=========================================================================== + SUBROUTINE updt_ge(mat, i, j, val) +! +! Update element Aij into banded matrix +! + TYPE(gemat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val +! + IF( (i .GT. mat%mrows) .OR. (j .GT. mat%ncols) .OR. (j .LT. 1) .OR. (i.LT.1)) THEN + WRITE(*,*) 'UPDT: i, j out of range ', i, j, mat%mrows, mat%ncols + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + mat%val(i,j) = mat%val(i,j) + val + END SUBROUTINE updt_ge +!=========================================================================== + SUBROUTINE updt_periodic(mat, i, j, val) +! +! Update element Aij into periodic banded matrix +! + TYPE(periodic_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: n, kl, ku +! + n = mat%mat%rank + kl = mat%mat%kl + ku = mat%mat%ku +! + IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN +! +! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) + mat%matvt(i,j) = mat%matvt(i,j) + val +! + ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN +! +! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) + mat%matvt(i-n+kl+ku,j) = mat%matvt(i-n+kl+ku,j) + val +! + ELSE +! +! Put into the banded matrix + CALL updtmat(mat%mat, i, j, val) +! + END IF + END SUBROUTINE updt_periodic +!=========================================================================== + SUBROUTINE updt_zperiodic(mat, i, j, val) +! +! Update element Aij into periodic banded matrix +! + TYPE(zperiodic_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + INTEGER :: n, kl, ku +! + n = mat%mat%rank + kl = mat%mat%kl + ku = mat%mat%ku +! + IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN +! +! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) + mat%matvt(i,j) = mat%matvt(i,j) + val +! + ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN +! +! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) + mat%matvt(i-n+kl+ku,j) = mat%matvt(i-n+kl+ku,j) + val +! + ELSE +! +! Put into the banded matrix + CALL updtmat(mat%mat, i, j, val) +! + END IF + END SUBROUTINE updt_zperiodic +!=========================================================================== + SUBROUTINE updt_pb(mat, i, j, val) +! +! Update element Aij into banded matrix +! + TYPE(pbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: lda, n, ib +! + lda = SIZE(mat%val, 1) + n = mat%rank + IF( i .LE. j ) THEN + ib = mat%ku + i - j + 1 + IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN + WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + mat%val(ib,j) = mat%val(ib,j) + val + END IF + END SUBROUTINE updt_pb +!=========================================================================== + SUBROUTINE updt_zgb(mat, i, j, val) +! +! Update element Aij into banded matrix +! + TYPE(zgbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + INTEGER :: lda, n, ib +! + lda = SIZE(mat%val, 1) + n = mat%rank + ib = mat%kl + mat%ku + i - j + 1 + IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN + WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + mat%val(ib,j) = mat%val(ib,j) + val + END SUBROUTINE updt_zgb +!=========================================================================== + SUBROUTINE updt_zpb(mat, i, j, val) +! +! Update element Aij into banded matrix +! + TYPE(zpbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + INTEGER :: lda, n, ib +! + lda = SIZE(mat%val, 1) + n = mat%rank + IF( i .LE. j ) THEN + ib = mat%ku + i - j + 1 + IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN + WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + mat%val(ib,j) = mat%val(ib,j) + val + END IF + END SUBROUTINE updt_zpb +!=========================================================================== + SUBROUTINE getele_gb(mat, i, j, val) +! +! Get element (i,j) of matrix +! + TYPE(gbmat), INTENT(in) :: mat + DOUBLE PRECISION, INTENT (OUT) :: val + INTEGER, INTENT (IN) :: i, j + INTEGER :: lda, n, ib +! + lda = SIZE(mat%val, 1) + n = mat%rank + ib = mat%kl + mat%ku + i - j + 1 + IF( (ib .GT. lda) .OR. (j .GT. n)) THEN + WRITE(*,*) 'GETELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE matrix***' + END IF + val = mat%val(ib,j) + END SUBROUTINE getele_gb +!=========================================================================== + SUBROUTINE getele_periodic(mat, i, j, val) +! +! Get element Aij of periodic banded matrix +! + TYPE(periodic_mat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(out) :: val + INTEGER :: n, kl, ku +! + n = mat%mat%rank + kl = mat%mat%kl + ku = mat%mat%ku +! + IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN +! +! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) + val =mat%matvt(i,j) +! + ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN +! +! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) + val = mat%matvt(i-n+kl+ku,j) +! + ELSE +! +! Put into the banded matrix + CALL getele(mat%mat, i, j, val) +! + END IF + END SUBROUTINE getele_periodic +!=========================================================================== + SUBROUTINE getele_zperiodic(mat, i, j, val) +! +! Get element Aij of periodic banded matrix +! + TYPE(zperiodic_mat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(out) :: val + INTEGER :: n, kl, ku +! + n = mat%mat%rank + kl = mat%mat%kl + ku = mat%mat%ku +! + IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN +! +! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) + val =mat%matvt(i,j) +! + ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN +! +! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) + val = mat%matvt(i-n+kl+ku,j) +! + ELSE +! +! Put into the banded matrix + CALL getele(mat%mat, i, j, val) +! + END IF + END SUBROUTINE getele_zperiodic +!=========================================================================== + SUBROUTINE getele_pb(mat, i, j, val) +! +! Get element (i,j) of matrix +! + TYPE(pbmat), INTENT(in) :: mat + DOUBLE PRECISION, INTENT (OUT) :: val + INTEGER, INTENT (IN) :: i, j + INTEGER :: lda, n, ib, irow, jcol +! + lda = SIZE(mat%val, 1) + n = mat%rank + IF( i .LE. j ) THEN ! Upper triangular matrix + irow = i; jcol = j + ELSE ! Lower triangular matrix + irow = j; jcol = i + END IF + ib = mat%ku + irow - jcol + 1 + IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN + WRITE(*,*) 'GETELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE matrix***' + END IF + val = mat%val(ib,jcol) + END SUBROUTINE getele_pb +!=========================================================================== + SUBROUTINE getele_zgb(mat, i, j, val) +! +! Get element (i,j) of matrix +! + TYPE(zgbmat), INTENT(in) :: mat + DOUBLE COMPLEX, INTENT (OUT) :: val + INTEGER, INTENT (IN) :: i, j + INTEGER :: lda, n, ib +! + lda = SIZE(mat%val, 1) + n = mat%rank + ib = mat%kl + mat%ku + i - j + 1 + IF( (ib .GT. lda) .OR. (j .GT. n)) THEN + WRITE(*,*) 'GETELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE matrix***' + END IF + val = mat%val(ib,j) + END SUBROUTINE getele_zgb +!=========================================================================== + SUBROUTINE getele_zpb(mat, i, j, val) +! +! Get element (i,j) of matrix +! + TYPE(zpbmat), INTENT(in) :: mat + DOUBLE COMPLEX, INTENT (OUT) :: val + INTEGER, INTENT (IN) :: i, j + INTEGER :: lda, n, ib, irow, jcol +! + lda = SIZE(mat%val, 1) + n = mat%rank +! + IF( i .LE. j ) THEN ! Upper triangular matrix + irow = i; jcol = j + ib = mat%ku + irow - jcol + 1 + IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN + WRITE(*,*) 'GETELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE matrix***' + END IF + val = mat%val(ib,jcol) + RETURN + ELSE ! Lower triangular matrix + irow = j; jcol = i + ib = mat%ku + irow - jcol + 1 + IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN + WRITE(*,*) 'GETELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE matrix***' + END IF + val = CONJG(mat%val(ib,jcol)) + END IF + END SUBROUTINE getele_zpb +!=========================================================================== + SUBROUTINE putele_gb(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(gbmat), INTENT(inout) :: mat + DOUBLE PRECISION, INTENT (in) :: val + INTEGER, INTENT (in) :: i, j + INTEGER :: lda, n, ib +! + lda = SIZE(mat%val, 1) + n = mat%rank + ib = mat%kl + mat%ku + i - j + 1 + IF( (ib .GT. lda) .OR. (j .GT. n)) THEN + WRITE(*,*) 'GETELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + mat%val(ib,j) = val + END SUBROUTINE putele_gb +!=========================================================================== + SUBROUTINE putele_periodic(mat, i, j, val) +! +! Put element Aij into periodic banded matrix +! + TYPE(periodic_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: n, kl, ku +! + n = mat%mat%rank + kl = mat%mat%kl + ku = mat%mat%ku +! + IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN +! +! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) + mat%matvt(i,j) = val +! + ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN +! +! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) + mat%matvt(i-n+kl+ku,j) = val +! + ELSE +! +! Put into the banded matrix + CALL putele(mat%mat, i, j, val) +! + END IF + END SUBROUTINE putele_periodic +!=========================================================================== + SUBROUTINE putele_zperiodic(mat, i, j, val) +! +! Put element Aij into periodic banded matrix +! + TYPE(zperiodic_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + INTEGER :: n, kl, ku +! + n = mat%mat%rank + kl = mat%mat%kl + ku = mat%mat%ku +! + IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN +! +! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n) + mat%matvt(i,j) = val +! + ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN +! +! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku) + mat%matvt(i-n+kl+ku,j) = val +! + ELSE +! +! Put into the banded matrix + CALL putele(mat%mat, i, j, val) +! + END IF + END SUBROUTINE putele_zperiodic +!=========================================================================== + SUBROUTINE putele_pb(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(pbmat), INTENT(inout) :: mat + DOUBLE PRECISION, INTENT (in) :: val + INTEGER, INTENT (IN) :: i, j + INTEGER :: lda, n, ib, irow, jcol +! + lda = SIZE(mat%val, 1) + n = mat%rank + IF( i .LE. j ) THEN ! Upper triangular matrix + irow = i; jcol = j + ELSE ! Lower triangular matrix + irow = j; jcol = i + END IF + ib = mat%ku + irow - jcol + 1 + IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN + WRITE(*,*) 'PUTELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE matrix***' + END IF + mat%val(ib,jcol) = val + END SUBROUTINE putele_pb +!=========================================================================== + SUBROUTINE putele_zgb(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(zgbmat), INTENT(inout) :: mat + DOUBLE COMPLEX, INTENT (in) :: val + INTEGER, INTENT (in) :: i, j + INTEGER :: lda, n, ib +! + lda = SIZE(mat%val, 1) + n = mat%rank + ib = mat%kl + mat%ku + i - j + 1 + IF( (ib .GT. lda) .OR. (j .GT. n)) THEN + WRITE(*,*) 'GETELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + mat%val(ib,j) = val + END SUBROUTINE putele_zgb +!=========================================================================== + SUBROUTINE putele_zpb(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(zpbmat), INTENT(inout) :: mat + DOUBLE COMPLEX, INTENT (in) :: val + INTEGER, INTENT (IN) :: i, j + INTEGER :: lda, n, ib, irow, jcol +! + lda = SIZE(mat%val, 1) + n = mat%rank + IF( i .LE. j ) THEN ! Upper triangular matrix + irow = i; jcol = j + ib = mat%ku + irow - jcol + 1 + IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN + WRITE(*,*) 'GETELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE matrix***' + END IF + mat%val(ib,jcol) = val + ELSE ! Lower triangular matrix + irow = j; jcol = i + ib = mat%ku + irow - jcol + 1 + IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN + WRITE(*,*) 'PUTELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE matrix***' + END IF + mat%val(ib,jcol) = CONJG(val) + END IF + END SUBROUTINE putele_zpb +!=========================================================================== + SUBROUTINE getcol_gb(mat, j, arr) +! +! Get a column from matrix +! + TYPE(gbmat), INTENT(in) :: mat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr + INTEGER :: m, kl, ku + INTEGER :: ibmin, ibmax, imin, imax +! + kl = mat%kl + ku = mat%ku + m = mat%mrows + IF( SIZE(arr) .LT. m ) THEN + WRITE(*,*) 'GETCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + arr(1:m) = 0.0d0 + imin = MAX(1,j-ku) + imax = MIN(m, j+kl) + ibmin = kl+ku+imin-j+1 + ibmax = kl+ku+imax-j+1 + arr(imin:imax) = mat%val(ibmin:ibmax,j) + END SUBROUTINE getcol_gb +!=========================================================================== + SUBROUTINE getcol_periodic(mat, j, arr) +! +! Get a column from matrix +! + TYPE(periodic_mat), INTENT(in) :: mat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr + INTEGER :: n, kl, ku +! + kl = mat%mat%kl + ku = mat%mat%ku + n = mat%mat%rank +! + CALL getcol(mat%mat, j, arr) +! + IF( j.GE.n-kl+1 ) THEN + arr(1:kl) = mat%matvt(1:kl,j) + ELSE IF( j.LE.ku ) THEN + arr(n-ku+1:n) = mat%matvt(kl+1:kl+ku,j) + END IF + END SUBROUTINE getcol_periodic +!=========================================================================== + SUBROUTINE getcol_zperiodic(mat, j, arr) +! +! Get a column from matrix +! + TYPE(zperiodic_mat), INTENT(in) :: mat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr + INTEGER :: n, kl, ku +! + kl = mat%mat%kl + ku = mat%mat%ku + n = mat%mat%rank +! + CALL getcol(mat%mat, j, arr) +! + IF( j.GE.n-kl+1 ) THEN + arr(1:kl) = mat%matvt(1:kl,j) + ELSE IF( j.LE.ku ) THEN + arr(n-ku+1:n) = mat%matvt(kl+1:kl+ku,j) + END IF + END SUBROUTINE getcol_zperiodic +!=========================================================================== + SUBROUTINE getcol_pb(mat, j, arr) +! +! Get a column from matrix +! + TYPE(pbmat), INTENT(in) :: mat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr + INTEGER :: n, ku + INTEGER :: i, ib, ibmin, ibmax, imin, imax +! + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'GETCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + arr(1:n) = 0.0d0 +! + imin=MAX(1,j-ku); imax=j ! The column in the upper diagonal part + ibmin=ku+1+imin-j ; ibmax=ku+1+imax-j + arr(imin:imax) = mat%val(ibmin:ibmax,j) +! + imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part + DO i=imin,imax + ib = ku+1+j-i + arr(i) = mat%val(ib,i) + END DO + END SUBROUTINE getcol_pb +!=========================================================================== + SUBROUTINE getcol_zgb(mat, j, arr) +! +! Get a column from matrix +! + TYPE(zgbmat), INTENT(in) :: mat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr + INTEGER :: m, kl, ku + INTEGER :: ibmin, ibmax, imin, imax +! + kl = mat%kl + ku = mat%ku + m = mat%mrows + IF( SIZE(arr) .LT. m ) THEN + WRITE(*,*) 'GETCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + arr(1:m) = 0.0d0 + imin = MAX(1,j-ku) + imax = MIN(m, j+kl) + ibmin = kl+ku+imin-j+1 + ibmax = kl+ku+imax-j+1 + arr(imin:imax) = mat%val(ibmin:ibmax,j) + END SUBROUTINE getcol_zgb +!=========================================================================== + SUBROUTINE getcol_zpb(mat, j, arr) +! +! Get a column from matrix +! + TYPE(zpbmat), INTENT(in) :: mat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr + INTEGER :: n, ku + INTEGER :: i, ib, ibmin, ibmax, imin, imax +! + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'GETCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + arr(1:n) = 0.0d0 +! + imin=MAX(1,j-ku); imax=j ! The column in the upper diagonal part + ibmin=ku+1+imin-j ; ibmax=ku+1+imax-j + arr(imin:imax) = mat%val(ibmin:ibmax,j) +! + imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part + DO i=imin,imax + ib = ku+1+j-i + arr(i) = CONJG(mat%val(ib,i)) + END DO + END SUBROUTINE getcol_zpb +!=========================================================================== + SUBROUTINE getrow_gb(mat, i, arr) +! +! Get a row from matrix +! + TYPE(gbmat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr + INTEGER :: n, kl, ku + INTEGER :: j, ib, jmin, jmax +! + kl = mat%kl + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'GETROW: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + arr(1:n) = 0.0d0 + jmin = MAX(1,i-kl) + jmax = MIN(n, i+ku) + DO j=jmin,jmax + ib = kl+ku+i-j+1 + arr(j) = mat%val(ib,j) + END DO + END SUBROUTINE getrow_gb +!=========================================================================== + SUBROUTINE getrow_ge(mat, i, arr) +! +! Get a row from matrix +! + TYPE(gemat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr + INTEGER :: n +! + n = mat%ncols + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'GETROW: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + arr(1:n) = mat%val(i,1:n) + END SUBROUTINE getrow_ge +!=========================================================================== + SUBROUTINE getrow_periodic(mat, i, arr) +! +! Get a row from matrix +! + TYPE(periodic_mat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr + INTEGER :: n, kl, ku +! + kl = mat%mat%kl + ku = mat%mat%ku + n = mat%mat%rank +! + CALL getrow(mat%mat, i, arr) +! + IF( i.LE.kl ) THEN + arr(n-kl+1:n) = mat%matvt(i,n-kl+1:n) + ELSE IF( i.GE.n-ku+1 ) THEN + arr(1:ku) = mat%matvt(i-n+kl+ku,1:ku) + END IF + END SUBROUTINE getrow_periodic +!=========================================================================== + SUBROUTINE getrow_zperiodic(mat, i, arr) +! +! Get a row from matrix +! + TYPE(zperiodic_mat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr + INTEGER :: n, kl, ku +! + kl = mat%mat%kl + ku = mat%mat%ku + n = mat%mat%rank +! + CALL getrow(mat%mat, i, arr) +! + IF( i.LE.kl ) THEN + arr(n-kl+1:n) = mat%matvt(i,n-kl+1:n) + ELSE IF( i.GE.n-ku+1 ) THEN + arr(1:ku) = mat%matvt(i-n+kl+ku,1:ku) + END IF + END SUBROUTINE getrow_zperiodic +!=========================================================================== + SUBROUTINE getrow_pb(mat, i, arr) +! +! Get a row from matrix +! + TYPE(pbmat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr + INTEGER :: n, ku + INTEGER :: j, ib, ibmin, ibmax, jmin, jmax +! + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'GETROW: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + arr(1:n) = 0.0d0 +! + jmin=i; jmax=MIN(n,i+ku) + DO j=jmin,jmax + ib=ku+1+i-j + arr(j) = mat%val(ib,j) + END DO +! + jmin=MAX(1,i-ku); jmax=i-1 + DO j=jmin,jmax + ib=ku+1+j-i + arr(j) = mat%val(ib,i) + END DO + END SUBROUTINE getrow_pb +!=========================================================================== + SUBROUTINE getrow_zgb(mat, i, arr) +! +! Get a row from matrix +! + TYPE(zgbmat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr + INTEGER :: n, kl, ku + INTEGER :: j, ib, jmin, jmax +! + kl = mat%kl + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'GETROW: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + arr(1:n) = 0.0d0 + jmin = MAX(1,i-kl) + jmax = MIN(n, i+ku) + DO j=jmin,jmax + ib = kl+ku+i-j+1 + arr(j) = mat%val(ib,j) + END DO + END SUBROUTINE getrow_zgb +!=========================================================================== + SUBROUTINE getrow_zpb(mat, i, arr) +! +! Get a row from matrix +! + TYPE(zpbmat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr + INTEGER :: n, ku + INTEGER :: j, ib, ibmin, ibmax, jmin, jmax +! + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'GETROW: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + arr(1:n) = 0.0d0 +! + jmin=i; jmax=MIN(n,i+ku) + DO j=jmin,jmax + ib=ku+1+i-j + arr(j) = mat%val(ib,j) + END DO +! + jmin=MAX(1,i-ku); jmax=i-1 + DO j=jmin,jmax + ib=ku+1+j-i + arr(j) = CONJG(mat%val(ib,i)) + END DO + END SUBROUTINE getrow_zpb +!=========================================================================== + SUBROUTINE putcol_gb(mat, j, arr) +! +! Put a column from matrix +! + TYPE(gbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr + INTEGER :: m, kl, ku + INTEGER :: ibmin, ibmax, imin, imax +! + kl = mat%kl + ku = mat%ku + m = mat%mrows + IF( SIZE(arr) .LT. m ) THEN + WRITE(*,*) 'PUTCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + imin = MAX(1,j-ku) + imax = MIN(m, j+kl) + ibmin = kl+ku+imin-j+1 + ibmax = kl+ku+imax-j+1 + mat%val(ibmin:ibmax,j) = arr(imin:imax) + END SUBROUTINE putcol_gb +!=========================================================================== + SUBROUTINE putcol_ge(mat, j, arr) +! +! Put a column from matrix +! + TYPE(gemat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr + INTEGER :: m +! + m = mat%mrows + IF( SIZE(arr) .LT. m ) THEN + WRITE(*,*) 'PUTCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + mat%val(:,j) = arr(:) + END SUBROUTINE putcol_ge +!=========================================================================== + SUBROUTINE putrow_periodic(mat, i, arr) +! +! Put a row to matrix +! + TYPE(periodic_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr + INTEGER :: n, kl, ku +! + kl = mat%mat%kl + ku = mat%mat%ku + n = mat%mat%rank +! + CALL putrow(mat%mat, i, arr) +! + IF( i.LE.kl ) THEN + mat%matvt(i,n-kl+1:n) = arr(n-kl+1:n) + ELSE IF( i.GE.n-ku+1 ) THEN + mat%matvt(i-n+kl+ku,1:ku) = arr(1:ku) + END IF + END SUBROUTINE putrow_periodic +!=========================================================================== + SUBROUTINE putrow_zperiodic(mat, i, arr) +! +! Put a row to matrix +! + TYPE(zperiodic_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr + INTEGER :: n, kl, ku +! + kl = mat%mat%kl + ku = mat%mat%ku + n = mat%mat%rank +! + CALL putrow(mat%mat, i, arr) +! + IF( i.LE.kl ) THEN + mat%matvt(i,n-kl+1:n) = arr(n-kl+1:n) + ELSE IF( i.GE.n-ku+1 ) THEN + mat%matvt(i-n+kl+ku,1:ku) = arr(1:ku) + END IF + END SUBROUTINE putrow_zperiodic +!=========================================================================== + SUBROUTINE putcol_periodic(mat, j, arr) +! +! Put a column into matrix +! + TYPE(periodic_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr + INTEGER :: n, kl, ku +! + kl = mat%mat%kl + ku = mat%mat%ku + n = mat%mat%rank +! + CALL putcol(mat%mat, j, arr) +! + IF( j.GE.n-kl+1 ) THEN + mat%matvt(1:kl,j) = arr(1:kl) + ELSE IF( j.LE.ku ) THEN + mat%matvt(kl+1:kl+ku,j) = arr(n-ku+1:n) + END IF + END SUBROUTINE putcol_periodic +!=========================================================================== + SUBROUTINE putcol_zperiodic(mat, j, arr) +! +! Put a column into matrix +! + TYPE(zperiodic_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr + INTEGER :: n, kl, ku +! + kl = mat%mat%kl + ku = mat%mat%ku + n = mat%mat%rank +! + CALL putcol(mat%mat, j, arr) +! + IF( j.GE.n-kl+1 ) THEN + mat%matvt(1:kl,j) = arr(1:kl) + ELSE IF( j.LE.ku ) THEN + mat%matvt(kl+1:kl+ku,j) = arr(n-ku+1:n) + END IF + END SUBROUTINE putcol_zperiodic +!=========================================================================== + SUBROUTINE putcol_pb(mat, j, arr) +! +! Put a column from matrix +! + TYPE(pbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr + INTEGER :: n, ku + INTEGER :: i, ib, ibmin, ibmax, imin, imax +! + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'PUTCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + imin = MAX(1,j-ku); imax = j ! The column in the upper diagonal part + ibmin = ku+imin-j+1; ibmax = ku+imax-j+1 + mat%val(ibmin:ibmax,j) = arr(imin:imax) + ! + imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part + DO i=imin,imax + ib = ku+1+j-i + mat%val(ib,i) = arr(i) + END DO + END SUBROUTINE putcol_pb +!=========================================================================== + SUBROUTINE putcol_zgb(mat, j, arr) +! +! Put a column from matrix +! + TYPE(zgbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr + INTEGER :: m, kl, ku + INTEGER :: ibmin, ibmax, imin, imax +! + kl = mat%kl + ku = mat%ku + m = mat%mrows + IF( SIZE(arr) .LT. m ) THEN + WRITE(*,*) 'PUTCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + imin = MAX(1,j-ku) + imax = MIN(m, j+kl) + ibmin = kl+ku+imin-j+1 + ibmax = kl+ku+imax-j+1 + mat%val(ibmin:ibmax,j) = arr(imin:imax) + END SUBROUTINE putcol_zgb +!=========================================================================== + SUBROUTINE putcol_zpb(mat, j, arr) +! +! Put a column from matrix +! + TYPE(zpbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr + INTEGER :: n, ku, i, ib + INTEGER :: ibmin, ibmax, imin, imax +! + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'PUTCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF +! + imin=MAX(1,j-ku); imax=j ! The column in the upper diagonal part + ibmin=ku+1+imin-j ; ibmax=ku+1+imax-j + mat%val(ibmin:ibmax,j) = arr(imin:imax) +! + imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part + DO i=imin,imax + ib = ku+1+j-i + mat%val(ib,i) = CONJG(arr(i)) + END DO + END SUBROUTINE putcol_zpb +!=========================================================================== + SUBROUTINE putrow_gb(mat, i, arr) +! +! Put a row from matrix +! + TYPE(gbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr + INTEGER :: n, kl, ku + INTEGER :: j, ib, jmin, jmax +! + kl = mat%kl + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'GETCOL: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + jmin = MAX(1,i-kl) + jmax = MIN(n, i+ku) + DO j=jmin,jmax + ib = kl+ku+i-j+1 + mat%val(ib,j) = arr(j) + END DO + END SUBROUTINE putrow_gb +!=========================================================================== + SUBROUTINE putrow_ge(mat, i, arr) +! +! Put a row from matrix +! + TYPE(gemat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr + INTEGER :: n, kl, ku + INTEGER :: j, ib, jmin, jmax +! + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'PUTROW: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + mat%val(i,:) = arr(:) + END SUBROUTINE putrow_ge +!=========================================================================== + SUBROUTINE putrow_pb(mat, i, arr) +! +! Put a row from matrix +! + TYPE(pbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr + INTEGER :: n, ku + INTEGER :: j, ib, jmin, jmax +! + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'PUTROW: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + jmin = i + jmax = MIN(n, i+ku) + DO j=jmin,jmax + ib = ku+i-j+1 + mat%val(ib,j) = arr(j) + END DO +! + jmin=MAX(1,i-ku); jmax=i-1 + DO j=jmin,jmax + ib=ku+1+j-i + mat%val(ib,i) = arr(j) + END DO + END SUBROUTINE putrow_pb +!=========================================================================== + SUBROUTINE putrow_zgb(mat, i, arr) +! +! Put a row from matrix +! + TYPE(zgbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr + INTEGER :: n, kl, ku + INTEGER :: j, ib, jmin, jmax +! + kl = mat%kl + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'PUTROW: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + jmin = MAX(1,i-kl) + jmax = MIN(n, i+ku) + DO j=jmin,jmax + ib = kl+ku+i-j+1 + mat%val(ib,j) = arr(j) + END DO + END SUBROUTINE putrow_zgb +!=========================================================================== + SUBROUTINE putrow_zpb(mat, i, arr) +! +! Put a row from matrix +! + TYPE(zpbmat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr + INTEGER :: n, ku + INTEGER :: j, ib, jmin, jmax +! + ku = mat%ku + n = mat%rank + IF( SIZE(arr) .LT. n ) THEN + WRITE(*,*) 'PUTROW: size of arr too small' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + jmin = i + jmax = MIN(n, i+ku) + DO j=jmin,jmax + ib = ku+i-j+1 + mat%val(ib,j) = arr(j) + END DO +! + jmin=MAX(1,i-ku); jmax=i-1 + DO j=jmin,jmax + ib=ku+1+j-i + mat%val(ib,i) = CONJG(arr(j)) + END DO + END SUBROUTINE putrow_zpb +!=========================================================================== + SUBROUTINE factor_gb(mat,flops) +! +! Factor the matrix, using Lapack +! + TYPE(gbmat), INTENT(inout) :: mat + DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops + INTEGER :: lda, n, m, kl, ku + INTEGER :: info + DOUBLE PRECISION :: dopgb + EXTERNAL dopgb +! + lda = SIZE(mat%val, 1) + kl = mat%kl + ku = mat%ku + m = mat%mrows + n = mat%ncols + CALL dgbtrf(m, n, kl, ku, mat%val, lda, mat%piv, info) + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GBTRF ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + IF( PRESENT(flops) ) THEN + flops = dopgb('DGBTRF',m, n, kl, ku, mat%piv) + END IF + END SUBROUTINE factor_gb +!=========================================================================== + SUBROUTINE factor_periodic(mat) +! +! Factor the periodic GB matrix, using the +! Sherman-Morrisson-Woodburry formula +! + TYPE(periodic_mat), INTENT(inout) :: mat + TYPE(gemat) :: hmat + DOUBLE PRECISION :: one=1.0d0 + INTEGER :: bandw, mr, nc, i +! + bandw = SIZE(mat%matvt,1) +! +! Factor A + CALL factor(mat%mat) +! + IF(bandw .EQ. 0 ) RETURN ! No off band terms +! +! U <-- A^(-1) * U + CALL bsolve(mat%mat, mat%matu) +! +! H <-- 1 + V^T * U + mr = SIZE(mat%matvt, 1) + nc = SIZE(mat%matvt, 2) + CALL init(mr, 0, hmat) ! hmat is initialized to 0! + DO i=1,mr + hmat%val(i,i) = one + END DO + CALL dgemm('N', 'N', mr, mr, nc, one, mat%matvt, mr, & + & mat%matu, nc, one, hmat%val, mr) +! +! V^T <-- H^(-1) V^T + CALL factor(hmat) + CALL bsolve(hmat, mat%matvt) + CALL destroy(hmat) +! + END SUBROUTINE factor_periodic +!=========================================================================== + SUBROUTINE factor_zperiodic(mat) +! +! Factor the periodic GB matrix, using the +! Sherman-Morrisson-Woodburry formula +! + TYPE(zperiodic_mat), INTENT(inout) :: mat + TYPE(zgemat) :: hmat + DOUBLE COMPLEX :: one=1.0d0 + INTEGER :: bandw, mr, nc, i +! + bandw = SIZE(mat%matvt,1) +! +! Factor A + CALL factor(mat%mat) +! + IF(bandw .EQ. 0 ) RETURN ! No off band terms +! +! U <-- A^(-1) * U + CALL bsolve(mat%mat, mat%matu) +! +! H <-- 1 + V^T * U + mr = SIZE(mat%matvt, 1) + nc = SIZE(mat%matvt, 2) + CALL init(mr, 0, hmat) ! hmat is initialized to 0! + DO i=1,mr + hmat%val(i,i) = one + END DO + CALL zgemm('N', 'N', mr, mr, nc, one, mat%matvt, mr, & + & mat%matu, nc, one, hmat%val, mr) +! +! V^T <-- H^(-1) V^T + CALL factor(hmat) + CALL bsolve(hmat, mat%matvt) + CALL destroy(hmat) +! + END SUBROUTINE factor_zperiodic +!=========================================================================== + SUBROUTINE factor_pb(mat,flops) +! +! Factor the matrix, using Lapack +! + TYPE(pbmat), INTENT(inout) :: mat + DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops + INTEGER :: lda, n, ku + INTEGER :: info + DOUBLE PRECISION :: dopla + EXTERNAL dopla +! + lda = SIZE(mat%val, 1) + ku = mat%ku + n = mat%rank + CALL dpbtrf('U', n, ku, mat%val, lda, info) + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from PBTRF ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + IF( PRESENT(flops) ) THEN + flops = dopla('DPBTRF', n, n, ku, ku, 1) + END IF + END SUBROUTINE factor_pb +!=========================================================================== + SUBROUTINE factor_ge(mat,flops) +! +! Factor the matrix, using Lapack +! + TYPE(gemat), INTENT(inout) :: mat + DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops + INTEGER :: n, m + INTEGER :: info + DOUBLE PRECISION :: dopla + EXTERNAL dopla +! + m = mat%mrows + n = mat%ncols + CALL dgetrf(m, n, mat%val, m, mat%piv, info) + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GETRF ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + IF( PRESENT(flops) ) THEN + flops = dopla('DGETRF',m, n, 0, 0, 0) + END IF + END SUBROUTINE factor_ge +!=========================================================================== + SUBROUTINE factor_zgb(mat,flops) +! +! Factor the matrix, using Lapack +! + TYPE(zgbmat), INTENT(inout) :: mat + DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops + INTEGER :: lda, n, m, kl, ku + INTEGER :: info + DOUBLE PRECISION :: dopgb + EXTERNAL dopgb +! + lda = SIZE(mat%val, 1) + kl = mat%kl + ku = mat%ku + m = mat%mrows + n = mat%ncols + CALL zgbtrf(m, n, kl, ku, mat%val, lda, mat%piv, info) + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GBTRF ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + IF( PRESENT(flops) ) THEN + flops = dopgb('ZGBTRF',m, n, kl, ku, mat%piv) + END IF + END SUBROUTINE factor_zgb +!=========================================================================== + SUBROUTINE factor_zpb(mat,flops) +! +! Factor the matrix, using Lapack +! + TYPE(zpbmat), INTENT(inout) :: mat + DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops + INTEGER :: lda, n, ku + INTEGER :: info + DOUBLE PRECISION :: dopla + EXTERNAL dopla +! + lda = SIZE(mat%val, 1) + ku = mat%ku + n = mat%rank + CALL zpbtrf('U', n, ku, mat%val, lda, info) + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from PBTRF ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + IF( PRESENT(flops) ) THEN + flops = dopla('ZPBTRF', n, n, ku, ku, 1) + END IF + END SUBROUTINE factor_zpb +!=========================================================================== + SUBROUTINE factor_zge(mat,flops) +! +! Factor the matrix, using Lapack +! + TYPE(zgemat), INTENT(inout) :: mat + DOUBLE PRECISION, OPTIONAL, INTENT (OUT) :: flops + INTEGER :: n, m + INTEGER :: info + DOUBLE PRECISION :: dopla + EXTERNAL dopla +! + m = mat%mrows + n = mat%ncols + CALL zgetrf(m, n, mat%val, m, mat%piv, info) + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GETRF ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + IF( PRESENT(flops) ) THEN + flops = dopla('ZGETRF',m, n, 0, 0, 0) + END IF + END SUBROUTINE factor_zge +!=========================================================================== + SUBROUTINE bsolve_gb1(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(gbmat), INTENT(inout) :: mat + DOUBLE PRECISION, DIMENSION (:) :: rhs + DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol + INTEGER :: lda, n, kl, ku + INTEGER :: info +!---------------------------------------------------------------------- + lda = SIZE(mat%val, 1) + kl = mat%kl + ku = mat%ku + n = mat%rank +! + IF( PRESENT(sol) ) THEN + sol = rhs + CALL dgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, sol, n, info) + ELSE + CALL dgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GBTRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_gb1 +!=========================================================================== + SUBROUTINE bsolve_periodic1(mat, rhs, sol) +! +! Backsolve, using the Sherman-Morrison-Woodburry formula +! + TYPE(periodic_mat), INTENT(inout) :: mat + DOUBLE PRECISION, DIMENSION (:) :: rhs + DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol + DOUBLE PRECISION :: one=1.0d0, zero=0.0d0, minus1=-1.0d0 + DOUBLE PRECISION, ALLOCATABLE :: tarr(:,:) + INTEGER :: rank, bandw, nrhs + INTEGER :: info +!---------------------------------------------------------------------- + rank = mat%mat%rank + bandw = SIZE(mat%matvt,1) + nrhs = 1 +! +! Solve Ay = f + IF( PRESENT(sol) ) THEN + CALL bsolve(mat%mat, rhs, sol) + ELSE + CALL bsolve(mat%mat, rhs) + END IF +! + IF(bandw .EQ. 0 ) RETURN ! No off band terms +! +! t = V^T*y ( = W^T*y ) + ALLOCATE(tarr(bandw,nrhs)) + IF( PRESENT(sol) ) THEN + CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, & + & rank, zero, tarr, bandw) + ELSE + CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, & + & rank, zero, tarr, bandw) + END IF +! +! y = y - U*t ( = y-Z*t) + IF( PRESENT(sol) ) THEN + CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & + & bandw, one, sol, rank) + ELSE + CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & + & bandw, one, rhs, rank) + END IF +! + DEALLOCATE(tarr) + END SUBROUTINE bsolve_periodic1 +!=========================================================================== + SUBROUTINE bsolve_zperiodic1(mat, rhs, sol) +! +! Backsolve, using the Sherman-Morrison-Woodburry formula +! + TYPE(zperiodic_mat), INTENT(inout) :: mat + DOUBLE COMPLEX, DIMENSION (:) :: rhs + DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol + DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0, minus1=-1.0d0 + DOUBLE COMPLEX, ALLOCATABLE :: tarr(:,:) + INTEGER :: rank, bandw, nrhs + INTEGER :: info +!---------------------------------------------------------------------- + rank = mat%mat%rank + bandw = SIZE(mat%matvt,1) + nrhs = 1 +! +! Solve Ay = f + IF( PRESENT(sol) ) THEN + CALL bsolve(mat%mat, rhs, sol) + ELSE + CALL bsolve(mat%mat, rhs) + END IF +! + IF(bandw .EQ. 0 ) RETURN ! No off band terms +! +! t = V^T*y ( = W^T*y ) + ALLOCATE(tarr(bandw,nrhs)) + IF( PRESENT(sol) ) THEN + CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, & + & rank, zero, tarr, bandw) + ELSE + CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, & + & rank, zero, tarr, bandw) + END IF +! +! y = y - U*t ( = y-Z*t) + IF( PRESENT(sol) ) THEN + CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & + & bandw, one, sol, rank) + ELSE + CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & + & bandw, one, rhs, rank) + END IF +! + DEALLOCATE(tarr) + END SUBROUTINE bsolve_zperiodic1 +!=========================================================================== + SUBROUTINE bsolve_pb1(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(pbmat), INTENT(inout) :: mat + DOUBLE PRECISION, DIMENSION (:) :: rhs + DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol + INTEGER :: lda, n, ku + INTEGER :: info +!---------------------------------------------------------------------- + lda = SIZE(mat%val, 1) + ku = mat%ku + n = mat%rank +! + IF( PRESENT(sol) ) THEN + sol = rhs + CALL dpbtrs('U', n, ku, 1, mat%val, lda, sol, n, info) + ELSE + CALL dpbtrs('U', n, ku, 1, mat%val, lda, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from PBTRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_pb1 +!=========================================================================== + SUBROUTINE bsolve_ge1(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(gemat), INTENT(inout) :: mat + DOUBLE PRECISION, DIMENSION (:) :: rhs + DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol + INTEGER :: n + INTEGER :: info +!---------------------------------------------------------------------- + n = mat%rank +! + IF( PRESENT(sol) ) THEN + sol = rhs + CALL dgetrs('N', n, 1, mat%val, n, mat%piv, sol, n, info) + ELSE + CALL dgetrs('N', n, 1, mat%val, n, mat%piv, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GETRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_ge1 +!=========================================================================== + SUBROUTINE bsolve_gbn(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(gbmat), INTENT(inout) :: mat + DOUBLE PRECISION, DIMENSION (:,:) :: rhs + DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol + INTEGER :: lda, n, nrhs, kl, ku + INTEGER :: info +!---------------------------------------------------------------------- + lda = SIZE(mat%val, 1) + kl = mat%kl + ku = mat%ku + n = mat%rank + nrhs = SIZE(rhs,2) +! + IF( PRESENT(sol) ) THEN + sol(:,1:nrhs) = rhs(:,1:nrhs) + CALL dgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, sol, n, info) + ELSE + CALL dgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GBTRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_gbn +!=========================================================================== + SUBROUTINE bsolve_periodicn(mat, rhs, sol) +! +! Backsolve, using the Sherman-Morrison-Woodburry formula +! + TYPE(periodic_mat), INTENT(inout) :: mat + DOUBLE PRECISION, DIMENSION (:,:) :: rhs + DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol + DOUBLE PRECISION :: one=1.0d0, zero=0.0d0, minus1=-1.0d0 + DOUBLE PRECISION, ALLOCATABLE :: tarr(:,:) + INTEGER :: rank, bandw, nrhs + INTEGER :: info +!---------------------------------------------------------------------- + rank = mat%mat%rank + bandw = SIZE(mat%matvt,1) + nrhs = SIZE(rhs,2) +! +! Solve Ay = f + IF( PRESENT(sol) ) THEN + CALL bsolve(mat%mat, rhs, sol) + ELSE + CALL bsolve(mat%mat, rhs) + END IF +! + IF(bandw .EQ. 0 ) RETURN ! No off band terms +! +! t = V^T*y ( = W^T*y ) + ALLOCATE(tarr(bandw,nrhs)) + IF( PRESENT(sol) ) THEN + CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, & + & rank, zero, tarr, bandw) + ELSE + CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, & + & rank, zero, tarr, bandw) + END IF +! +! y = y - U*t ( = y-Z*t) + IF( PRESENT(sol) ) THEN + CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & + & bandw, one, sol, rank) + ELSE + CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & + & bandw, one, rhs, rank) + END IF +! + DEALLOCATE(tarr) + END SUBROUTINE bsolve_periodicn +!=========================================================================== + SUBROUTINE bsolve_zperiodicn(mat, rhs, sol) +! +! Backsolve, using the Sherman-Morrison-Woodburry formula +! + TYPE(zperiodic_mat), INTENT(inout) :: mat + DOUBLE COMPLEX, DIMENSION (:,:) :: rhs + DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol + DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0, minus1=-1.0d0 + DOUBLE COMPLEX, ALLOCATABLE :: tarr(:,:) + INTEGER :: rank, bandw, nrhs + INTEGER :: info +!---------------------------------------------------------------------- + rank = mat%mat%rank + bandw = SIZE(mat%matvt,1) + nrhs = SIZE(rhs,2) +! +! Solve Ay = f + IF( PRESENT(sol) ) THEN + CALL bsolve(mat%mat, rhs, sol) + ELSE + CALL bsolve(mat%mat, rhs) + END IF +! + IF(bandw .EQ. 0 ) RETURN ! No off band terms +! +! t = V^T*y ( = W^T*y ) + ALLOCATE(tarr(bandw,nrhs)) + IF( PRESENT(sol) ) THEN + CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, & + & rank, zero, tarr, bandw) + ELSE + CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, & + & rank, zero, tarr, bandw) + END IF +! +! y = y - U*t ( = y-Z*t) + IF( PRESENT(sol) ) THEN + CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & + & bandw, one, sol, rank) + ELSE + CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, & + & bandw, one, rhs, rank) + END IF +! + DEALLOCATE(tarr) + END SUBROUTINE bsolve_zperiodicn +!=========================================================================== + SUBROUTINE bsolve_pbn(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(pbmat), INTENT(inout) :: mat + DOUBLE PRECISION, DIMENSION (:,:) :: rhs + DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol + INTEGER :: lda, n, nrhs, ku + INTEGER :: info +!---------------------------------------------------------------------- + lda = SIZE(mat%val, 1) + ku = mat%ku + n = mat%rank + nrhs = SIZE(rhs,2) +! + IF( PRESENT(sol) ) THEN + sol(:,1:nrhs) = rhs(:,1:nrhs) + CALL dpbtrs('U', n, ku, nrhs, mat%val, lda, sol, n, info) + ELSE + CALL dpbtrs('U', n, ku, nrhs, mat%val, lda, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GBTRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_pbn +!=========================================================================== + SUBROUTINE bsolve_gen(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(gemat), INTENT(inout) :: mat + DOUBLE PRECISION, DIMENSION (:,:) :: rhs + DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol + INTEGER :: n, nrhs + INTEGER :: info +!---------------------------------------------------------------------- + n = mat%rank + nrhs = SIZE(rhs,2) +! + IF( PRESENT(sol) ) THEN + sol(:,1:nrhs) = rhs + CALL dgetrs('N', n, nrhs, mat%val, n, mat%piv, sol, n, info) + ELSE + CALL dgetrs('N', n, nrhs, mat%val, n, mat%piv, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GETRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_gen +!=========================================================================== + SUBROUTINE bsolve_zgb1(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(zgbmat), INTENT(inout) :: mat + DOUBLE COMPLEX, DIMENSION (:) :: rhs + DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol + INTEGER :: lda, n, kl, ku + INTEGER :: info +!---------------------------------------------------------------------- + lda = SIZE(mat%val, 1) + kl = mat%kl + ku = mat%ku + n = mat%rank +! + IF( PRESENT(sol) ) THEN + sol = rhs + CALL zgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, sol, n, info) + ELSE + CALL zgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GBTRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_zgb1 +!=========================================================================== + SUBROUTINE bsolve_zpb1(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(zpbmat), INTENT(inout) :: mat + DOUBLE COMPLEX, DIMENSION (:) :: rhs + DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol + INTEGER :: lda, n, ku + INTEGER :: info +!---------------------------------------------------------------------- + lda = SIZE(mat%val, 1) + ku = mat%ku + n = mat%rank +! + IF( PRESENT(sol) ) THEN + sol = rhs + CALL zpbtrs('U', n, ku, 1, mat%val, lda, sol, n, info) + ELSE + CALL zpbtrs('U', n, ku, 1, mat%val, lda, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from PBTRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_zpb1 +!=========================================================================== + SUBROUTINE bsolve_zge1(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(zgemat), INTENT(inout) :: mat + DOUBLE COMPLEX, DIMENSION (:) :: rhs + DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol + INTEGER :: n + INTEGER :: info +!---------------------------------------------------------------------- + n = mat%rank +! + IF( PRESENT(sol) ) THEN + sol = rhs + CALL zgetrs('N', n, 1, mat%val, n, mat%piv, sol, n, info) + ELSE + CALL zgetrs('N', n, 1, mat%val, n, mat%piv, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GETRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_zge1 +!=========================================================================== + SUBROUTINE bsolve_zgbn(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(zgbmat), INTENT(inout) :: mat + DOUBLE COMPLEX, DIMENSION (:,:) :: rhs + DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol + INTEGER :: lda, n, nrhs, kl, ku + INTEGER :: info +!---------------------------------------------------------------------- + lda = SIZE(mat%val, 1) + kl = mat%kl + ku = mat%ku + n = mat%rank + nrhs = SIZE(rhs,2) +! + IF( PRESENT(sol) ) THEN + sol(:,1:nrhs) = rhs(:,1:nrhs) + CALL zgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, sol, n, info) + ELSE + CALL zgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GBTRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_zgbn +!=========================================================================== + SUBROUTINE bsolve_zpbn(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(zpbmat), INTENT(inout) :: mat + DOUBLE COMPLEX, DIMENSION (:,:) :: rhs + DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol + INTEGER :: lda, n, nrhs, ku + INTEGER :: info +!---------------------------------------------------------------------- + lda = SIZE(mat%val, 1) + ku = mat%ku + n = mat%rank + nrhs = SIZE(rhs,2) +! + IF( PRESENT(sol) ) THEN + sol(:,1:nrhs) = rhs(:,1:nrhs) + CALL zpbtrs('U', n, ku, nrhs, mat%val, lda, sol, n, info) + ELSE + CALL zpbtrs('U', n, ku, nrhs, mat%val, lda, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GBTRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_zpbn +!=========================================================================== + SUBROUTINE bsolve_zgen(mat, rhs, sol) +! +! Backsolve, using Lapack +! + TYPE(zgemat), INTENT(inout) :: mat + DOUBLE COMPLEX, DIMENSION (:,:) :: rhs + DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol + INTEGER :: n, nrhs + INTEGER :: info +!---------------------------------------------------------------------- + n = mat%rank + nrhs = SIZE(rhs,2) +! + IF( PRESENT(sol) ) THEN + sol(:,1:nrhs) = rhs + CALL zgetrs('N', n, nrhs, mat%val, n, mat%piv, sol, n, info) + ELSE + CALL zgetrs('N', n, nrhs, mat%val, n, mat%piv, rhs, n, info) + END IF + IF( info .NE. 0) THEN + WRITE(*,*) 'FACTOR: info from GETRS ', info + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + END SUBROUTINE bsolve_zgen +!=========================================================================== + FUNCTION vmx_gb(mat, x, trans) RESULT(vmx) +! +! Return product mat*x +! + TYPE(gbmat), INTENT(in) :: mat + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x + CHARACTER(len=1), OPTIONAL :: trans + DOUBLE PRECISION, ALLOCATABLE :: vmx(:) + DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 + INTEGER :: lda, kl, ku, m, n, j, imin, imax, ibmin, ibmax + CHARACTER(len=1) :: trans_loc +! + lda = SIZE(mat%val, 1) + kl = mat%kl + ku = mat%ku + m = mat%mrows + n = mat%ncols + trans_loc = 'N' + IF(PRESENT(trans)) trans_loc = trans +! + IF(trans_loc.EQ.'N') THEN + ALLOCATE(vmx(m)) + ELSE + ALLOCATE(vmx(n)) + END IF +! + CALL dgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, x, 1, zero,& + & vmx, 1) + END FUNCTION vmx_gb +!=========================================================================== + FUNCTION vmx_ge(mat, x, trans) RESULT(vmx) +! +! Return product mat*x +! + TYPE(gemat), INTENT(in) :: mat + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x + CHARACTER(len=1), OPTIONAL :: trans + DOUBLE PRECISION, ALLOCATABLE :: vmx(:) + DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 + INTEGER :: lda, m, n + CHARACTER(len=1) :: trans_loc +! + lda = SIZE(mat%val, 1) + m = mat%mrows + n = mat%ncols + trans_loc = 'N' + IF(PRESENT(trans)) trans_loc = trans +! + IF(trans_loc.EQ.'N') THEN + ALLOCATE(vmx(m)) + ELSE + ALLOCATE(vmx(n)) + END IF +! + CALL dgemv(trans_loc, m, n, one, mat%val, lda, x, 1, zero, vmx, 1) + END FUNCTION vmx_ge +!=========================================================================== + FUNCTION vmx_gen(mat, x, trans) RESULT(vmx) +! +! Return product mat*x +! + TYPE(gemat), INTENT(in) :: mat + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x + CHARACTER(len=1), OPTIONAL :: trans + DOUBLE PRECISION, ALLOCATABLE :: vmx(:,:) + DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 + INTEGER :: lda, ldb, m, n, k + CHARACTER(len=1) :: trans_loc +! + lda = SIZE(mat%val, 1) + ldb = SIZE(x,1) + trans_loc = 'N' + IF(PRESENT(trans)) trans_loc = trans +! + IF(trans_loc.EQ.'N') THEN + m = mat%mrows + n = SIZE(x,2) + k = mat%ncols + ALLOCATE(vmx(m,n)) + ELSE + m = mat%ncols + n = SIZE(x,2) + k = mat%mrows + ALLOCATE(vmx(m,n)) + END IF +! + CALL dgemm(trans_loc, 'N', m, n, k, one, mat%val, lda, x, ldb, zero, vmx, & + & lda) +! + END FUNCTION vmx_gen +!=========================================================================== + FUNCTION vmx_zge(mat, x, trans) RESULT(vmx) +! +! Return product mat*x +! + TYPE(zgemat), INTENT(in) :: mat + DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x + CHARACTER(len=1), OPTIONAL :: trans + DOUBLE COMPLEX, ALLOCATABLE :: vmx(:) + DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0 + INTEGER :: lda, m, n + CHARACTER(len=1) :: trans_loc +! + lda = SIZE(mat%val, 1) + m = mat%mrows + n = mat%ncols + trans_loc = 'N' + IF(PRESENT(trans)) trans_loc = trans +! + IF(trans_loc.EQ.'N') THEN + ALLOCATE(vmx(m)) + ELSE + ALLOCATE(vmx(n)) + END IF +! + CALL zgemv(trans_loc, m, n, one, mat%val, lda, x, 1, zero, vmx, 1) + END FUNCTION vmx_zge +!=========================================================================== + FUNCTION vmx_zgen(mat, x, trans) RESULT(vmx) +! +! Return product mat*x +! + TYPE(zgemat), INTENT(in) :: mat + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x + CHARACTER(len=1), OPTIONAL :: trans + DOUBLE COMPLEX, ALLOCATABLE :: vmx(:,:) + DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0 + INTEGER :: lda, ldb, m, n, k + CHARACTER(len=1) :: trans_loc +! + lda = SIZE(mat%val, 1) + ldb = SIZE(x,1) + trans_loc = 'N' + IF(PRESENT(trans)) trans_loc = trans +! + IF(trans_loc.EQ.'N') THEN + m = mat%mrows + n = SIZE(x,2) + k = mat%ncols + ALLOCATE(vmx(m,n)) + ELSE + m = mat%ncols + n = SIZE(x,2) + k = mat%mrows + ALLOCATE(vmx(m,n)) + END IF +! + CALL zgemm(trans_loc, 'N', m, n, k, one, mat%val, lda, x, ldb, zero, vmx, & + & lda) +! + END FUNCTION vmx_zgen +!=========================================================================== + FUNCTION vmx_periodic(mat, x) +! +! Return product mat*x +! + TYPE(periodic_mat), INTENT(in) :: mat + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x + DOUBLE PRECISION, DIMENSION(SIZE(x)) :: vmx_periodic + INTEGER :: kl, ku, n, i, ii +! + kl = mat%mat%kl + ku = mat%mat%ku + n = mat%mat%rank +! + vmx_periodic = vmx(mat%mat, x) +! + DO i=1,kl + vmx_periodic(i) = vmx_periodic(i) + & + & DOT_PRODUCT(mat%matvt(i,n-kl+1:n), x(n-kl+1:n)) + END DO +! + DO i=n-ku+1,n + ii = i-n+ku+kl + vmx_periodic(i) = vmx_periodic(i) + & + & DOT_PRODUCT(mat%matvt(ii,1:ku), x(1:ku)) + END DO + END FUNCTION vmx_periodic +!=========================================================================== + FUNCTION vmx_zperiodic(mat, x) +! +! Return product mat*x +! + TYPE(zperiodic_mat), INTENT(in) :: mat + DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x + DOUBLE COMPLEX, DIMENSION(SIZE(x)) :: vmx_zperiodic + INTEGER :: kl, ku, n, i, ii +! + kl = mat%mat%kl + ku = mat%mat%ku + n = mat%mat%rank +! + vmx_zperiodic = vmx(mat%mat, x) +! + DO i=1,kl + vmx_zperiodic(i) = vmx_zperiodic(i) + & + & DOT_PRODUCT(mat%matvt(i,n-kl+1:n), x(n-kl+1:n)) + END DO +! + DO i=n-ku+1,n + ii = i-n+ku+kl + vmx_zperiodic(i) = vmx_zperiodic(i) + & + & DOT_PRODUCT(mat%matvt(ii,1:ku), x(1:ku)) + END DO + END FUNCTION vmx_zperiodic +!=========================================================================== + FUNCTION vmx_gbn(mat, x, trans) RESULT(vmx) +! +! Return product mat*x +! + TYPE(gbmat), INTENT(in) :: mat + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x + CHARACTER(len=1), OPTIONAL :: trans + DOUBLE PRECISION, ALLOCATABLE :: vmx(:,:) + DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 + INTEGER :: lda, kl, ku, m, n, j, k, imin, imax, ibmin, ibmax + CHARACTER(len=1) :: trans_loc +! + lda = SIZE(mat%val, 1) + kl = mat%kl + ku = mat%ku + m = mat%mrows + n = mat%ncols + trans_loc = 'N' + IF(PRESENT(trans)) trans_loc = trans +! + IF(trans_loc.EQ.'N') THEN + ALLOCATE(vmx(m,SIZE(x,2))) + ELSE + ALLOCATE(vmx(n,SIZE(x,2))) + END IF +! + DO k=1,SIZE(x,2) + CALL dgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, & + & x(1,k), 1, zero, vmx(1,k), 1) + END DO + END FUNCTION vmx_gbn +!=========================================================================== + FUNCTION vmx_pb(mat, x) +! +! Return product mat*x +! + TYPE(pbmat), INTENT(in) :: mat + DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x + DOUBLE PRECISION, DIMENSION(SIZE(x)) :: vmx_pb + INTEGER :: lda, ku, n, i, j, imin, imax, ib, ibmin, ibmax +! + lda = SIZE(mat%val, 1) + ku = mat%ku + n = mat%rank +! + vmx_pb = 0.0d0 + DO j=1,n + imin = MAX(1,j-ku); imax = j ! The column in the upper diagonal part + ibmin = ku+imin-j+1; ibmax = ku+imax-j+1 + vmx_pb(imin:imax) = vmx_pb(imin:imax) + mat%val(ibmin:ibmax,j)*x(j) +! + imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part + DO i=imin,imax + ib = ku+1+j-i + vmx_pb(i) = vmx_pb(i) + mat%val(ib,i)*x(j) + END DO + END DO + END FUNCTION vmx_pb +!=========================================================================== + FUNCTION vmx_pbn(mat, x) +! +! Return product mat*x +! + TYPE(pbmat), INTENT(in) :: mat + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x + DOUBLE PRECISION, DIMENSION(SIZE(x,1),SIZE(x,2)) :: vmx_pbn + INTEGER :: lda, ku, n, i, j, k, imin, imax, ib, ibmin, ibmax +! + lda = SIZE(mat%val, 1) + ku = mat%ku + n = mat%rank +! + vmx_pbn = 0.0d0 + DO j=1,n + imin = MAX(1,j-ku); imax = j ! The column in the upper diagonal part + ibmin = ku+imin-j+1; ibmax = ku+imax-j+1 + DO k=1,SIZE(x,2) + vmx_pbn(imin:imax,k) = vmx_pbn(imin:imax,k) + & + & mat%val(ibmin:ibmax,j)*x(j,k) + END DO +! + imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part + DO i=imin,imax + ib = ku+1+j-i + vmx_pbn(i,:) = vmx_pbn(i,:) + mat%val(ib,i)*x(j,:) + END DO + END DO + END FUNCTION vmx_pbn +!=========================================================================== + FUNCTION vmx_zgb(mat, x, trans) RESULT(vmx) +! +! Return product mat*x +! + TYPE(zgbmat), INTENT(in) :: mat + DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x + CHARACTER(len=1), OPTIONAL :: trans + DOUBLE COMPLEX, ALLOCATABLE :: vmx(:) + DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0 + INTEGER :: lda, kl, ku, m, n, j, imin, imax, ibmin, ibmax + CHARACTER(len=1) :: trans_loc +! + lda = SIZE(mat%val, 1) + kl = mat%kl + ku = mat%ku + m = mat%mrows + n = mat%ncols + trans_loc = 'N' + IF(PRESENT(trans)) trans_loc = trans +! + IF(trans_loc.EQ.'N') THEN + ALLOCATE(vmx(m)) + ELSE + ALLOCATE(vmx(n)) + END IF +! + CALL zgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, x, 1, zero,& + & vmx, 1) + END FUNCTION vmx_zgb +!=========================================================================== + FUNCTION vmx_zgbn(mat, x, trans) RESULT(vmx) +! +! Return product mat*x +! + TYPE(zgbmat), INTENT(in) :: mat + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x + CHARACTER(len=1), OPTIONAL :: trans + DOUBLE COMPLEX, ALLOCATABLE :: vmx(:,:) + DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0 + INTEGER :: lda, kl, ku, m, n, j, k, imin, imax, ibmin, ibmax + CHARACTER(len=1) :: trans_loc +! + lda = SIZE(mat%val, 1) + kl = mat%kl + ku = mat%ku + m = mat%mrows + n = mat%ncols + trans_loc = 'N' + IF(PRESENT(trans)) trans_loc = trans +! + IF(trans_loc.EQ.'N') THEN + ALLOCATE(vmx(m,SIZE(x,2))) + ELSE + ALLOCATE(vmx(n,SIZE(x,2))) + END IF +! + DO k=1,SIZE(x,2) + CALL zgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, & + & x(1,k), 1, zero, vmx(1,k), 1) + END DO + END FUNCTION vmx_zgbn +!=========================================================================== + FUNCTION vmx_zpb(mat, x) +! +! Return product mat*x +! + TYPE(zpbmat), INTENT(in) :: mat + DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x + DOUBLE COMPLEX, DIMENSION(SIZE(x)) :: vmx_zpb + INTEGER :: lda, ku, n, i, j, imin, imax, ib, ibmin, ibmax +! + lda = SIZE(mat%val, 1) + ku = mat%ku + n = mat%rank +! + vmx_zpb = 0.0d0 + DO j=1,n + imin = MAX(1,j-ku); imax = j ! The column in the upper diagonal part + ibmin = ku+imin-j+1; ibmax = ku+imax-j+1 + vmx_zpb(imin:imax) = vmx_zpb(imin:imax) + mat%val(ibmin:ibmax,j)*x(j) +! + imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part + DO i=imin,imax + ib = ku+1+j-i + vmx_zpb(i) = vmx_zpb(i) + CONJG(mat%val(ib,i))*x(j) + END DO + END DO + END FUNCTION vmx_zpb +!=========================================================================== + FUNCTION vmx_zpbn(mat, x) +! +! Return product mat*x +! + TYPE(zpbmat), INTENT(in) :: mat + DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x + DOUBLE COMPLEX, DIMENSION(SIZE(x,1),SIZE(x,2)) :: vmx_zpbn + INTEGER :: lda, ku, n, i, j, k, imin, imax, ib, ibmin, ibmax +! + lda = SIZE(mat%val, 1) + ku = mat%ku + n = mat%rank +! + vmx_zpbn = 0.0d0 + DO j=1,n + imin = MAX(1,j-ku); imax = j ! The column in the upper diagonal part + ibmin = ku+imin-j+1; ibmax = ku+imax-j+1 + DO k=1,SIZE(x,2) + vmx_zpbn(imin:imax,k) = vmx_zpbn(imin:imax,k) + & + & mat%val(ibmin:ibmax,j)*x(j,k) + END DO +! + imin=j+1; imax = MIN(n,j+ku) ! The column in the lower diagonal part + DO i=imin,imax + ib = ku+1+j-i + vmx_zpbn(i,:) = vmx_zpbn(i,:) + CONJG(mat%val(ib,i))*x(j,:) + END DO + END DO + END FUNCTION vmx_zpbn +!=========================================================================== + SUBROUTINE determinant_ge(mat, base, pow) +! +! Return the determinant of mat +! + TYPE(gemat) :: mat + INTEGER :: pow, i + DOUBLE PRECISION :: base +! + CALL factor(mat) + base = 1.0d0 + pow = 0 + DO i=1,mat%rank + IF( mat%piv(i) .NE. i) base = -base + base = mat%val(i,i)*base + IF( base .EQ. 0.0d0 ) THEN + WRITE(*,*) 'DETERMINANT_GE: matrix is singular' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + DO + IF( ABS(base) .GE. 1.0d0 ) EXIT + base = 10.0d0*base + pow = pow - 1 + END DO + DO + IF( ABS(base) .LT. 10.0d0 ) EXIT + base = base/10.0d0 + pow = pow + 1 + END DO + END DO + END SUBROUTINE determinant_ge +!=========================================================================== + SUBROUTINE determinant_gb(mat, base, pow) +! +! Return the determinant of mat +! + TYPE(gbmat) :: mat + INTEGER :: pow, i, ib + DOUBLE PRECISION :: base +! + CALL factor(mat) + base = 1.0d0 + pow = 0 + ib=mat%kl + mat%ku + 1 + DO i=1,mat%rank + IF( mat%piv(i) .NE. i) base = -base + base = mat%val(ib,i)*base + IF( base .EQ. 0.0d0 ) THEN + WRITE(*,*) 'DETERMINANT_GB: matrix is singular' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + DO + IF( ABS(base) .GE. 1.0d0 ) EXIT + base = 10.0d0*base + pow = pow - 1 + END DO + DO + IF( ABS(base) .LT. 10.0d0 ) EXIT + base = base/10.0d0 + pow = pow + 1 + END DO + END DO + END SUBROUTINE determinant_gb +!=========================================================================== + SUBROUTINE determinant_pb(mat, base, pow) +! +! Return the determinant of mat +! + TYPE(pbmat) :: mat + INTEGER :: pow, i, ib + DOUBLE PRECISION :: base +! + CALL factor(mat) + base = 1.0d0 + pow = 0 + ib = mat%ku + 1 + DO i=1,mat%rank + base = mat%val(ib,i)*base + IF( base .EQ. 0.0d0 ) THEN + WRITE(*,*) 'DETERMINANT_PB: matrix is singular' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + DO + IF( ABS(base) .GE. 1.0d0 ) EXIT + base = 10.0d0*base + pow = pow - 1 + END DO + DO + IF( ABS(base) .LT. 10.0d0 ) EXIT + base = base/10.0d0 + pow = pow + 1 + END DO + END DO + base=base**2 + pow=pow*2 + END SUBROUTINE determinant_pb +!=========================================================================== + SUBROUTINE determinant_zge(mat, base, pow) +! +! Return the determinant of mat +! + TYPE(zgemat) :: mat + INTEGER :: pow, i + DOUBLE COMPLEX :: base +! + CALL factor(mat) + base = 1.0d0 + pow = 0 + DO i=1,mat%rank + IF( mat%piv(i) .NE. i) base = -base + base = mat%val(i,i)*base + IF( base .EQ. 0.0d0 ) THEN + WRITE(*,*) 'DETERMINANT_ZGE: matrix is singular' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + DO + IF( ABS(base) .GE. 1.0d0 ) EXIT + base = 10.0d0*base + pow = pow - 1 + END DO + DO + IF( ABS(base) .LT. 10.0d0 ) EXIT + base = base/10.0d0 + pow = pow + 1 + END DO + END DO + END SUBROUTINE determinant_zge +!=========================================================================== + SUBROUTINE determinant_zgb(mat, base, pow) +! +! Return the determinant of mat +! + TYPE(zgbmat) :: mat + INTEGER :: pow, i, ib + DOUBLE COMPLEX :: base +! + CALL factor(mat) + base = 1.0d0 + pow = 0 + ib=mat%kl + mat%ku + 1 + DO i=1,mat%rank + IF( mat%piv(i) .NE. i) base = -base + base = mat%val(ib,i)*base + IF( base .EQ. 0.0d0 ) THEN + WRITE(*,*) 'DETERMINANT_GB: matrix is singular' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + DO + IF( ABS(base) .GE. 1.0d0 ) EXIT + base = 10.0d0*base + pow = pow - 1 + END DO + DO + IF( ABS(base) .LT. 10.0d0 ) EXIT + base = base/10.0d0 + pow = pow + 1 + END DO + END DO + END SUBROUTINE determinant_zgb +!=========================================================================== + SUBROUTINE determinant_zpb(mat, base, pow) +! +! Return the determinant of mat +! + TYPE(zpbmat) :: mat + INTEGER :: pow, i, ib + DOUBLE COMPLEX :: base +! + CALL factor(mat) + base = 1.0d0 + pow = 0 + ib = mat%ku + 1 + DO i=1,mat%rank + base = mat%val(ib,i)*base + IF( base .EQ. 0.0d0 ) THEN + WRITE(*,*) 'DETERMINANT_PB: matrix is singular' + STOP '*** Abnormal EXIT in MODULE matrix ***' + END IF + DO + IF( ABS(base) .GE. 1.0d0 ) EXIT + base = 10.0d0*base + pow = pow - 1 + END DO + DO + IF( ABS(base) .LT. 10.0d0 ) EXIT + base = base/10.0d0 + pow = pow + 1 + END DO + END DO + base=base**2 + pow=pow*2 + END SUBROUTINE determinant_zpb +!=========================================================================== + SUBROUTINE putmat_gb(fid, label, mat, str) +! +! Write GB matrix in hdf5 file +! + USE futils + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(gbmat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str + IF(PRESENT(str)) THEN + CALL putarr(fid, label, mat%val, str) + ELSE + CALL putarr(fid, label, mat%val) + END IF + CALL attach(fid, label, 'KL', mat%kl) + CALL attach(fid, label, 'KU', mat%ku) + CALL attach(fid, label, 'RANK', mat%rank) + END SUBROUTINE putmat_gb +!=========================================================================== + SUBROUTINE getmat_gb(fid, label, mat, str) +! +! Read in GB matrix from hdf5 file +! + USE futils + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(gbmat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str + CALL getatt(fid, label, 'KL', mat%kl) + CALL getatt(fid, label, 'KU', mat%ku) + CALL getatt(fid, label, 'RANK', mat%rank) + CALL getarr(fid, label, mat%val) + END SUBROUTINE getmat_gb +!=========================================================================== + SUBROUTINE kron_ge(mata, matb, matc) +! +! Krocnecker product of 2 dense matrices +! + TYPE(gemat), INTENT(in) :: mata, matb + TYPE(gemat), INTENT(out) :: matc +! + INTEGER :: i1, j1, i3, j3, m1, n1, m2, n2, m3, n3 +! + m1 = mata%mrows + n1 = mata%ncols + m2 = matb%mrows + n2 = matb%ncols + m3 = m1*m2 + n3 = n1*n2 +! + CALL init(n3, 0, matc, mrows=m3) + DO i1=1,m1 + i3 = (i1-1)*m2 + DO j1=1,n1 + j3 = (j1-1)*n2 + matc%val(i3+1:i3+m2,j3+1:j3+n2) = mata%val(i1,j1)*matb%val(1:m2,1:n2) + END DO + END DO + END SUBROUTINE kron_ge +!=========================================================================== +END MODULE matrix diff --git a/src/multigrid_mod.f90 b/src/multigrid_mod.f90 new file mode 100644 index 0000000..192c76c --- /dev/null +++ b/src/multigrid_mod.f90 @@ -0,0 +1,2373 @@ +!> +!> @file multigrid_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE multigrid +! +! MULTIGRID: Implement Multigrid solver for Finite Elements +! and Fiinite Differences. +! +! T.M. Tran, CRPP-EPFL +! September 2012 +! + USE bsplines + USE matrix + USE conmat_mod + USE csr + USE cds + IMPLICIT NONE +! + TYPE grid1d + INTEGER :: n ! Number of intervals + INTEGER :: rank ! Dimension of FE space + DOUBLE PRECISION :: h + DOUBLE PRECISION, ALLOCATABLE :: x(:) + DOUBLE PRECISION, ALLOCATABLE :: v(:) + DOUBLE PRECISION, ALLOCATABLE :: f(:) + TYPE(spline1d) :: spl + TYPE(gemat) :: transf ! Coarse to fine transfer matrix + TYPE(gbmat), ALLOCATABLE :: mata ! FE matrix + TYPE(gbmat), ALLOCATABLE :: matm ! mass matrix + TYPE(gbmat), ALLOCATABLE :: mata_copy ! Used for direct_solve + TYPE(gemat), ALLOCATABLE :: matap ! FE matrix + TYPE(gemat), ALLOCATABLE :: matmp ! mass matrix + TYPE(gemat), ALLOCATABLE :: matap_copy! Used for direct_solve + END TYPE grid1d +! + TYPE grid2d + INTEGER :: n(2) ! Number of intervals + INTEGER :: rank(2) ! Dimension of FE space + DOUBLE PRECISION :: h(2) + DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:) +! + DOUBLE PRECISION, ALLOCATABLE :: v(:,:) ! sol + DOUBLE PRECISION, ALLOCATABLE :: f(:,:) ! rhs + DOUBLE PRECISION, POINTER :: v1d(:) ! flatten sol + DOUBLE PRECISION, POINTER :: f1d(:) ! flatten rhs +! + TYPE(csr_mat),ALLOCATABLE :: mata + TYPE(cds_mat),ALLOCATABLE :: mata_cds + TYPE(spline2d) :: spl + TYPE(gemat) :: transf(2) +! + TYPE(csr_mat) :: matp(2) + END TYPE grid2d +! + TYPE mg_info + INTEGER :: nu1 ! Relaxation down sweeps + INTEGER :: nu2 ! Relaxation up sweeps + INTEGER :: mu ! mu-cycle number + INTEGER :: nu0 ! Number of FMG cycles + INTEGER :: levels ! Number of mg levels + CHARACTER(len=4) :: relax ! Type of relation + DOUBLE PRECISION :: omega ! for weighted Jacobi relaxation + LOGICAL :: nlscale=.FALSE. ! Scale restriction if .TRUE. + END TYPE mg_info +! + INTERFACE create_grid + MODULE PROCEDURE create_grid_1d, create_grid_2d + END INTERFACE create_grid + INTERFACE disrhs + MODULE PROCEDURE disrhs_1d, disrhs_2d + END INTERFACE disrhs + INTERFACE direct_solve + MODULE PROCEDURE direct_solve_1d, direct_solve_2d + END INTERFACE direct_solve + INTERFACE mg + MODULE PROCEDURE mg_1d, mg_2d + END INTERFACE mg + INTERFACE disc_err + MODULE PROCEDURE disc_err_1d, disc_err_2d + END INTERFACE disc_err + INTERFACE jacobi + MODULE PROCEDURE jacobi_cds, jacobi_csr, jacobi_gb, jacobi_ge + END INTERFACE jacobi + INTERFACE gs + MODULE PROCEDURE gs_cds, gs_csr, gs_gb, gs_ge + END INTERFACE gs + INTERFACE restrict + MODULE PROCEDURE restrict_1d, restrict_2d, restrict_2d_csr + END INTERFACE restrict + INTERFACE prolong + MODULE PROCEDURE prolong_1d, prolong_2d, prolong_2d_csr + END INTERFACE prolong + INTERFACE printmat + MODULE PROCEDURE printmat_mat, printmat_ge, printmat_gb, printmat_periodic + END INTERFACE printmat + INTERFACE massmat + MODULE PROCEDURE massmat_ge, massmat_gb, massmat_periodic + END INTERFACE massmat + INTERFACE femat + MODULE PROCEDURE femat_2d_csr, femat_ge, femat_gb, femat_periodic + END INTERFACE femat + INTERFACE ibcmat + MODULE PROCEDURE ibcmat_1d, ibcmat_2d + END INTERFACE ibcmat + INTERFACE mod_transf + MODULE PROCEDURE mod_transf_full, mod_transf_csr + END INTERFACE mod_transf + INTERFACE normf + MODULE PROCEDURE normf_gb, normf_ge + END INTERFACE normf + INTERFACE residue + MODULE PROCEDURE residue_gen, residue_csr, residue_cds, residue_gb, residue_ge + END INTERFACE residue +! +CONTAINS +!-------------------------------------------------------------------------------- + SUBROUTINE create_grid_1d(n, nidbas, ng_in, alpha, grids, period) +! +! Create an array of levels grids +! Compute mass matrix and prolongation matrices. +! + INTEGER, INTENT(in) :: n ! Number of intervals in the finest grid + INTEGER, INTENT(in) :: nidbas ! Order of splines + INTEGER, INTENT(in) :: ng_in ! Number of proposed Gauss points + INTEGER, INTENT(in) :: alpha ! geometric exponent + TYPE(grid1d), INTENT(out) :: grids(:) + LOGICAL, INTENT(in), OPTIONAL :: period +! + LOGICAL :: nlper + INTEGER :: n_current, nrank, ngauss + INTEGER :: levels, l, i + DOUBLE PRECISION :: h_current + TYPE(gbmat) :: matm + TYPE(gemat) :: matmp +! + levels = SIZE(grids) + nlper = .FALSE. + IF(PRESENT(period)) nlper = period +! + ngauss = CEILING(REAL(2*nidbas+alpha+1,8)/2.d0) + ngauss = MAX(ng_in, ngauss) + WRITE(*,'(a,i0)') 'ngauss = ', ngauss +! +! Allocate some matrices +! + DO l=1,levels + IF(nlper) THEN + ALLOCATE(grids(l)%matmp) + ALLOCATE(grids(l)%matap) + ELSE + ALLOCATE(grids(l)%matm) + ALLOCATE(grids(l)%mata) + END IF + END DO +! + n_current = n + h_current = 1.0d0/REAL(n_current,8) + DO l=1,levels + IF(n_current .LT. 2 ) THEN + PRINT*, 'CREATE_GRID: number intervals too small!' + STOP + END IF + grids(l)%n = n_current + grids(l)%h = h_current + ALLOCATE(grids(l)%x(0:n_current)) + grids(l)%x(0:n_current) = (/ (REAL(i,8)*h_current, i=0,n_current) /) + CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%spl, period=nlper) + CALL get_dim(grids(l)%spl, nrank) + IF(nlper) nrank = n_current + grids(l)%rank = nrank + ALLOCATE(grids(l)%v(nrank)) + ALLOCATE(grids(l)%f(nrank)) + IF(nlper) THEN + CALL massmat(grids(l)%spl, alpha, grids(l)%matmp) + ELSE + CALL massmat(grids(l)%spl, alpha, grids(l)%matm) + END IF + IF(l.GT.1) THEN + CALL ctof_massmat(grids(l-1)%spl, grids(l)%spl, alpha, grids(l)%transf) + IF(nlper) THEN + CALL mcopy(grids(l-1)%matmp, matmp) + CALL factor(matmp) + CALL bsolve(matmp, grids(l)%transf%val) + CALL destroy(matmp) + ELSE + CALL mcopy(grids(l-1)%matm, matm) + CALL factor(matm) + CALL bsolve(matm, grids(l)%transf%val) + CALL destroy(matm) + END IF + END IF + n_current = n_current/2 + h_current = 2.0d0*h_current + END DO + END SUBROUTINE create_grid_1d +!-------------------------------------------------------------------------------- + SUBROUTINE create_grid_2d(x, y, nidbas, ng_in, alpha, grids, mat_type, period, & + & debug_in) +! +! Create an array of levels grids +! Compute mass matrix and prolongation matrices. +! + DOUBLE PRECISION, INTENT(in) :: x(0:), y(0:) ! Finest grid points + INTEGER, INTENT(in) :: nidbas(2) ! Order of splines + INTEGER, INTENT(in) :: alpha(2) ! geometric exponent + INTEGER, INTENT(in) :: ng_in(2) ! Number of proposed Gauss points + TYPE(grid2d), INTENT(out), TARGET :: grids(:) + CHARACTER(*), INTENT(in), OPTIONAL :: mat_type ! csr (default) or cds + LOGICAL, INTENT(in), OPTIONAL :: period(2) + LOGICAL, INTENT(in), OPTIONAL :: debug_in +! + LOGICAL, DIMENSION(2) :: nlper + INTEGER, DIMENSION(2) :: n, sp_dim, ngauss + LOGICAL :: nlcds + LOGICAL :: debug + INTEGER :: levels, l, rank2d + TYPE(gemat) :: matm +! + DOUBLE PRECISION, PARAMETER :: pi = 4.0d0*ATAN(1.0d0) +! +! Process input args +! + n(1) = SIZE(x)-1 + n(2) = SIZE(y)-1 + levels = SIZE(grids) + nlper = .FALSE. + IF(PRESENT(period)) THEN + nlper = period + END IF + IF(PRESENT(debug_in)) THEN + debug = debug_in + ELSE + debug = .FALSE. + END IF + IF(PRESENT(mat_type)) THEN ! CSR matrix by default + nlcds = mat_type.EQ.'cds' + ELSE + nlcds = .FALSE. + END IF +! +! WARNING: Assume that only 2nd dim can be periodic!!! + IF(nlper(1)) THEN + WRITE(*,'(A)') 'CREATE_GRID: First dimension could not be periodic!' + STOP + END IF +! + ngauss = CEILING(REAL(2*nidbas+1,8)/2.d0) + ngauss = MAX(ng_in, ngauss) + WRITE(*,'(a,2i4)') 'ngauss = ', ngauss +! + DO l=1,levels +! +! Create mesh from finest grid mesh + IF(MINVAL(n) .LT. 2 ) THEN + PRINT*, 'CREATE_GRID: number intervals too small!' + STOP + END IF + grids(l)%n = n + ALLOCATE(grids(l)%x(0:n(1))) + ALLOCATE(grids(l)%y(0:n(2))) + IF(l.EQ.1) THEN + grids(1)%x = x + grids(1)%y = y + ELSE + grids(l)%x(:) = grids(l-1)%x(0::2) + grids(l)%y(:) = grids(l-1)%y(0::2) + END IF + IF(debug) THEN + WRITE(*,'(/a,i4,a,2l2)') 'l =', l, ' nlper =', nlper + WRITE(*,'(a/(10(1pe12.3)))') 'x', grids(l)%x + WRITE(*,'(a/(10(1pe12.3)))') 'y', grids(l)%y + END IF +! +! Allocate mem for solution v and RHS f + CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%y, grids(l)%spl, period=nlper) + CALL get_dim(grids(l)%spl%sp1, sp_dim(1)) + CALL get_dim(grids(l)%spl%sp2, sp_dim(2)) + ALLOCATE(grids(l)%v(sp_dim(1), sp_dim(2))) + ALLOCATE(grids(l)%f(sp_dim(1), sp_dim(2))) +! +! WARNING: Assume that only 2nd dim can be periodic!!! + grids(l)%rank = sp_dim + IF(nlper(2)) THEN + grids(l)%rank(2) = n(2) + END IF + IF(debug) THEN + WRITE(*,'(a,2i6)') 'Grid ranks', grids(l)%rank + END IF +! +! Flatten version of sol and rhs + rank2d = PRODUCT(grids(l)%rank) + grids(l)%v1d(1:rank2d) => grids(l)%v + grids(l)%f1d(1:rank2d) => grids(l)%f +! +! Matrix format for FE matrix + IF(nlcds) THEN + ALLOCATE(grids(l)%mata_cds) + ELSE + ALLOCATE(grids(l)%mata) + END IF +! +! Coarse to fine mesh transfers for l>1 + IF(l.GT.1) THEN + CALL ctof_massmat(grids(l-1)%spl%sp1, grids(l)%spl%sp1, alpha(1), grids(l)%transf(1)) + CALL ctof_massmat(grids(l-1)%spl%sp2, grids(l)%spl%sp2, alpha(2), grids(l)%transf(2)) +! + CALL massmat(grids(l-1)%spl%sp1, alpha(1), matm) + CALL factor(matm) + CALL bsolve(matm, grids(l)%transf(1)%val) + CALL full_to_csr(grids(l)%transf(1)%val, grids(l)%matp(1)) + CALL destroy(matm) + CALL destroy(grids(l)%transf(1)) +! + CALL massmat(grids(l-1)%spl%sp2, alpha(2), matm) + CALL factor(matm) + CALL bsolve(matm, grids(l)%transf(2)%val) + CALL full_to_csr(grids(l)%transf(2)%val, grids(l)%matp(2)) + CALL destroy(matm) + CALL destroy(grids(l)%transf(2)) + END IF +! +! Next coarse grid + n = n/2 + END DO + END SUBROUTINE create_grid_2d +!-------------------------------------------------------------------------------- + SUBROUTINE create_grid_fd(x, y, grids, info, mat_type, period, debug) +! +! FD version of create_grid +! + DOUBLE PRECISION, INTENT(in) :: x(0:), y(0:) + TYPE(grid2d), INTENT(out), TARGET :: grids(:) + TYPE(mg_info), INTENT(inout) :: info ! info for MG + CHARACTER(*), INTENT(in), OPTIONAL :: mat_type ! csr (default) or cds + LOGICAL, INTENT(in), OPTIONAL :: period(2) + LOGICAL, INTENT(in), OPTIONAL :: debug +! + INTEGER :: nidbas(2)=1, ngauss(2)=4 ! Linear Splines \equiv 1st FD + INTEGER :: alpha(2) = 1 ! Cartesian coordinate + LOGICAL, DIMENSION(2) :: nlper + LOGICAL :: nldebug + LOGICAL :: nlcds + INTEGER :: levels, n(2), sp_dim(2) + INTEGER :: l, rank2d + TYPE(gemat) :: matm +!-------------------------------------------------------------------------------- +! +! Process input args +! + n(1) = SIZE(x)-1 + n(2) = SIZE(y)-1 + levels = SIZE(grids) + info%nlscale = .TRUE. ! Restriction should be scaled for FD + nlper = .FALSE. + IF(PRESENT(period)) nlper = period + nldebug = .FALSE. + IF(PRESENT(debug)) nldebug = debug + IF(PRESENT(mat_type)) THEN ! CSR matrix by default + nlcds = mat_type.EQ.'cds' + ELSE + nlcds = .FALSE. + END IF +! + DO l=1,levels +! +! Create mesh from finest grid mesh + IF(MINVAL(n) .LT. 2 ) THEN + PRINT*, 'CREATE_GRID: number intervals too small!' + STOP + END IF + grids(l)%n = n + ALLOCATE(grids(l)%x(0:n(1))) + ALLOCATE(grids(l)%y(0:n(2))) + IF(l.EQ.1) THEN + grids(1)%x = x + grids(1)%y = y + ELSE + grids(l)%x(:) = grids(l-1)%x(0::2) + grids(l)%y(:) = grids(l-1)%y(0::2) + END IF + IF(nldebug) THEN + WRITE(*,'(/a,i4,a,2l2)') 'l =', l, ' nlper =', nlper + WRITE(*,'(a/(10(1pe12.3)))') 'x', grids(l)%x + WRITE(*,'(a/(10(1pe12.3)))') 'y', grids(l)%y + END IF +! +! Allocate mem for solution v and RHS f + CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%y, grids(l)%spl, period=nlper) + CALL get_dim(grids(l)%spl%sp1, sp_dim(1)) + CALL get_dim(grids(l)%spl%sp2, sp_dim(2)) + ALLOCATE(grids(l)%v(0:sp_dim(1)-1, 0:sp_dim(2)-1)) + ALLOCATE(grids(l)%f(0:sp_dim(1)-1, 0:sp_dim(2)-1)) +! +! WARNING: Assume that only 2nd dim can be periodic!!! + grids(l)%rank = sp_dim + IF(nlper(2)) THEN + grids(l)%rank(2) = n(2) + END IF + IF(nldebug) THEN + WRITE(*,'(a,2i6)') 'Grid ranks', grids(l)%rank + END IF +! +! Flatten version of sol and rhs + rank2d = PRODUCT(grids(l)%rank) + grids(l)%v1d(1:rank2d) => grids(l)%v + grids(l)%f1d(1:rank2d) => grids(l)%f +! +! Matrix format for FD matrix + IF(nlcds) THEN + ALLOCATE(grids(l)%mata_cds) + ELSE + ALLOCATE(grids(l)%mata) + END IF +! +! Coarse to fine mesh transfers for l>1 + IF(l.GT.1) THEN + CALL ctof_massmat(grids(l-1)%spl%sp1, grids(l)%spl%sp1, alpha(1), grids(l)%transf(1)) + CALL ctof_massmat(grids(l-1)%spl%sp2, grids(l)%spl%sp2, alpha(2), grids(l)%transf(2)) +! + CALL massmat(grids(l-1)%spl%sp1, alpha(1), matm) + CALL factor(matm) + CALL bsolve(matm, grids(l)%transf(1)%val) + CALL full_to_csr(grids(l)%transf(1)%val, grids(l)%matp(1)) + CALL destroy(matm) + CALL destroy(grids(l)%transf(1)) +! + CALL massmat(grids(l-1)%spl%sp2, alpha(2), matm) + CALL factor(matm) + CALL bsolve(matm, grids(l)%transf(2)%val) + CALL full_to_csr(grids(l)%transf(2)%val, grids(l)%matp(2)) + CALL destroy(matm) + CALL destroy(grids(l)%transf(2)) + END IF +! +! Next coarse grid + n = n/2 + END DO + END SUBROUTINE create_grid_fd +!-------------------------------------------------------------------------------- + RECURSIVE SUBROUTINE fmg(grids, info, l) +! +! Execute a full multigrid V-cycle +! + TYPE(grid1d), INTENT(inout) :: grids(:) + TYPE(mg_info), INTENT(in) :: info + INTEGER, INTENT(in) :: l + INTEGER :: levels, k + levels = info%levels +! + IF(l.EQ.levels) THEN + CALL direct_solve(grids(levels), grids(levels)%v) + ELSE + grids(l+1)%f = restrict(grids(l+1)%transf,grids(l)%f) + CALL fmg(grids, info, l+1) + grids(l)%v = prolong(grids(l+1)%transf,grids(l+1)%v) + DO k=1,info%nu0 + CALL mg(grids, info, l) + END DO + END IF + END SUBROUTINE fmg +!-------------------------------------------------------------------------------- + RECURSIVE SUBROUTINE mg_1d(grids, info, l) +! +! Execute a recursive V-cycle +! + TYPE(grid1d), INTENT(inout) :: grids(:) + TYPE(mg_info), INTENT(in) :: info + INTEGER, INTENT(in) :: l + INTEGER :: levels, k + LOGICAL :: nlper +! + levels = info%levels + nlper = grids(1)%spl%period +! + IF(l.EQ.levels) THEN + CALL direct_solve(grids(levels), grids(levels)%v) + ELSE + CALL relax(info%nu1) + IF(nlper) THEN + grids(l+1)%f = restrict(grids(l+1)%transf, & + & grids(l)%f-vmx(grids(l)%matap, grids(l)%v)) + ELSE + grids(l+1)%f = restrict(grids(l+1)%transf, & + & grids(l)%f-vmx(grids(l)%mata, grids(l)%v)) + END IF + grids(l+1)%v = 0.0d0 +! +! Only 1 call to the coarsest level + DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) + CALL mg(grids, info, l+1) + END DO +! + grids(l)%v = grids(l)%v + prolong(grids(l+1)%transf,grids(l+1)%v) + CALL relax(info%nu2) + END IF +! + CONTAINS + SUBROUTINE relax(nu) + INTEGER, INTENT(in) :: nu + SELECT CASE (TRIM(info%relax)) + CASE ("jac") + IF(nlper) THEN + CALL jacobi(grids(l)%matap, info%omega, nu, grids(l)%v, grids(l)%f) + ELSE + CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v, grids(l)%f) + END IF + CASE ("gs") + IF(nlper) THEN + CALL gs(grids(l)%matap, nu, grids(l)%v, grids(l)%f) + ELSE + CALL gs(grids(l)%mata, nu, grids(l)%v, grids(l)%f) + END IF + CASE default + PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!" + STOP + END SELECT + END SUBROUTINE relax + END SUBROUTINE mg_1d +!-------------------------------------------------------------------------------- + RECURSIVE SUBROUTINE mg_2d(grids, info, l) +! +! Execute a recursive V-cycle +! + TYPE(grid2d), INTENT(inout) :: grids(:) + TYPE(mg_info), INTENT(in) :: info + INTEGER, INTENT(in) :: l +! + DOUBLE PRECISION, ALLOCATABLE, TARGET :: resid(:,:) + DOUBLE PRECISION, POINTER :: resid1d(:) + INTEGER :: levels, k, m1, m2 +! + levels = info%levels + m1 = SIZE(grids(l)%v,1) + m2 = SIZE(grids(l)%v,2) +! + IF(l.EQ.levels) THEN + grids(levels)%v = grids(levels)%f + CALL direct_solve(grids(levels), grids(levels)%v1d) + ELSE + CALL relax(info%nu1) + ALLOCATE(resid(m1,m2)); resid1d(1:m1*m2) => resid + IF(ALLOCATED(grids(l)%mata)) THEN + resid1d = grids(l)%f1d - vmx(grids(l)%mata, grids(l)%v1d) + ELSE + resid1d = grids(l)%f1d - vmx(grids(l)%mata_cds, grids(l)%v1d) + END IF + grids(l+1)%f = restrict(grids(l+1)%matp, resid) + IF(info%nlscale) grids(l+1)%f = 0.25d0*grids(l+1)%f + DEALLOCATE(resid) + grids(l+1)%v = 0.0d0 +! +! Only 1 call to the coarsest level + DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) + CALL mg(grids, info, l+1) + END DO +! + grids(l)%v = grids(l)%v + prolong(grids(l+1)%matp,grids(l+1)%v) + CALL relax(info%nu2) + END IF +! + CONTAINS + SUBROUTINE relax(nu) + INTEGER, INTENT(in) :: nu + SELECT CASE (TRIM(info%relax)) + CASE ("jac") + IF(ALLOCATED(grids(l)%mata)) THEN + CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v1d, grids(l)%f1d) + ELSE + CALL jacobi(grids(l)%mata_cds, info%omega, nu, grids(l)%v1d, grids(l)%f1d) + END IF + CASE ("gs") + IF(ALLOCATED(grids(l)%mata)) THEN + CALL gs(grids(l)%mata, nu, grids(l)%v1d, grids(l)%f1d) + ELSE + CALL gs(grids(l)%mata_cds, nu, grids(l)%v1d, grids(l)%f1d) + END IF + CASE default + PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!" + STOP + END SELECT + END SUBROUTINE relax + END SUBROUTINE mg_2d +!-------------------------------------------------------------------------------- + RECURSIVE SUBROUTINE mg_cyl(grids, info, l, nluniq_in) +! +! Execute a recursive V-cycle +! + TYPE(grid2d), INTENT(inout) :: grids(:) + TYPE(mg_info), INTENT(in) :: info + INTEGER, INTENT(in) :: l + LOGICAL, INTENT(in), OPTIONAL :: nluniq_in +! + DOUBLE PRECISION, ALLOCATABLE, TARGET :: resid(:,:) + DOUBLE PRECISION, POINTER :: resid1d(:) + INTEGER :: levels, k, m1, m2, r1, r2 + LOGICAL :: nluniq +! + levels = info%levels + m1 = SIZE(grids(l)%v,1) + m2 = SIZE(grids(l)%v,2) + r1 = grids(l)%rank(1) ! r1 = m1 + r2 = grids(l)%rank(2) ! r2 = m2-1 + IF(PRESENT(nluniq_in)) THEN + nluniq = nluniq_in + ELSE + nluniq = .TRUE. + END IF +! + IF(l.EQ.levels) THEN + grids(levels)%v1d = grids(levels)%f1d + CALL direct_solve(grids(levels), grids(levels)%v1d, debug=.FALSE.) + ELSE + CALL relax(info%nu1) + ALLOCATE(resid(m1,m2)); resid1d(1:r1*r2) => resid + IF(ALLOCATED(grids(l)%mata)) THEN + resid1d(:) = grids(l)%f1d(:) - vmx(grids(l)%mata, grids(l)%v1d) + ELSE + resid1d(:) = grids(l)%f1d(:) - vmx(grids(l)%mata_cds, grids(l)%v1d) + END IF +! + grids(l+1)%f(:,:) = restrict_cyl(grids(l+1), resid, nluniq) +! + DEALLOCATE(resid) + grids(l+1)%v1d = 0.0d0 +! +! Only 1 call to the coarsest level + DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) + CALL mg_cyl(grids, info, l+1, nluniq) + END DO +! + grids(l)%v(:,1:r2) = grids(l)%v(:,1:r2) + & + & prolong_cyl(grids(l+1),grids(l+1)%v, nluniq) +! + CALL relax(info%nu2) + END IF +! + CONTAINS + SUBROUTINE relax(nu) + INTEGER, INTENT(in) :: nu + SELECT CASE (TRIM(info%relax)) + CASE ("jac") + IF(ALLOCATED(grids(l)%mata)) THEN + CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v1d, grids(l)%f1d) + ELSE + CALL jacobi(grids(l)%mata_cds, info%omega, nu, grids(l)%v1d, grids(l)%f1d) + END IF + CASE ("gs") + IF(ALLOCATED(grids(l)%mata)) THEN + CALL gs(grids(l)%mata, nu, grids(l)%v1d, grids(l)%f1d) + ELSE + CALL gs(grids(l)%mata_cds, nu, grids(l)%v1d, grids(l)%f1d) + END IF + CASE default + PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!" + STOP + END SELECT + END SUBROUTINE relax + END SUBROUTINE mg_cyl +!-------------------------------------------------------------------------------- + FUNCTION prolong_1d(matp,vcoarse) RESULT(vfine) +! +! Prolongation +! + TYPE(gemat), INTENT(in) :: matp + DOUBLE PRECISION, INTENT(in) :: vcoarse(:) + DOUBLE PRECISION :: vfine(matp%mrows) +! + vfine = vmx(matp,vcoarse) + END FUNCTION prolong_1d +!-------------------------------------------------------------------------------- + FUNCTION restrict_1d(matp,vfine) RESULT(vcoarse) +! +! Restriction +! + TYPE(gemat), INTENT(in) :: matp + DOUBLE PRECISION, INTENT(in) :: vfine(:) + DOUBLE PRECISION :: vcoarse(matp%ncols) +! + vcoarse = vmx(matp,vfine,'T') + END FUNCTION restrict_1d +!-------------------------------------------------------------------------------- + FUNCTION prolong_2d(matp,vcoarse) RESULT(vfine) +! +! Prolongation +! + TYPE(gemat), INTENT(in) :: matp(2) + DOUBLE PRECISION, INTENT(in) :: vcoarse(:,:) + DOUBLE PRECISION, ALLOCATABLE :: vfine(:,:) +! + DOUBLE PRECISION, POINTER :: pmat1(:,:), pmat2(:,:) + DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) + DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 + INTEGER :: m1, m1p, m2, m2p +! + pmat1 => matp(1)%val + pmat2 => matp(2)%val + m1 = SIZE(pmat1,1); m1p = SIZE(pmat1,2) + m2 = SIZE(pmat2,1); m2p = SIZE(pmat2,2) + ALLOCATE(vfine(m1,m2)) + ALLOCATE(temp(m1,m2p)) +! +! Compute (P1) * V +! + CALL dgemm('N', 'N', m1, m2p, m1p, one, pmat1, m1, vcoarse, m1p, zero, & + & temp, m1) +! +! Compute (P1) * V * (P2)^T +! + CALL dgemm('N', 'T', m1, m2, m2p, one, temp, m1, pmat2, m2, zero, & + & vfine, m1) +! + DEALLOCATE(temp) + END FUNCTION prolong_2d +!-------------------------------------------------------------------------------- + FUNCTION prolong_2d_csr(matp,vcoarse) RESULT(vfine) +! +! Prolongation using CSR prolongation matrix +! + TYPE(csr_mat), INTENT(in) :: matp(2) + DOUBLE PRECISION, INTENT(in) :: vcoarse(:,:) + DOUBLE PRECISION, ALLOCATABLE :: vfine(:,:) +! + DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) + INTEGER :: m1, m1p, m2, m2p + INTEGER :: i, j, k, kk +! + m1 = matp(1)%mrows; m1p = matp(1)%ncols + m2 = matp(2)%mrows; m2p = matp(2)%ncols + ALLOCATE(vfine(m1,m2)) + ALLOCATE(temp(m1p,m2)) + temp = 0.0d0 + vfine = 0.0d0 +! +! Compute temp = V * (P2)^T +! t_ij = sum_{k=1}^{m2p} V_ik (P2)_jk, i=1:m1p, j=1:m2 +! + DO j=1,m2 + DO kk=matp(2)%irow(j),matp(2)%irow(j+1)-1 + k = matp(2)%cols(kk) + temp(:,j) = temp(:,j) + vcoarse(:,k)*matp(2)%val(kk) + END DO + END DO +! +! Compute (P1) * V * (P2)^T +! V_ij = sum_{k=1}^{m1p} (P1)_ik t_kj, i=1:m1, j=1:m2 +! + DO i=1,m1 + DO kk=matp(1)%irow(i),matp(1)%irow(i+1)-1 + k = matp(1)%cols(kk) + vfine(i,:) = vfine(i,:) + matp(1)%val(kk)*temp(k,:) + END DO + END DO +! + DEALLOCATE(temp) + END FUNCTION prolong_2d_csr +!-------------------------------------------------------------------------------- + FUNCTION restrict_2d(matp,vfine) RESULT(vcoarse) +! +! Restriction +! + TYPE(gemat), INTENT(in) :: matp(2) + DOUBLE PRECISION, INTENT(in) :: vfine(:,:) + DOUBLE PRECISION, ALLOCATABLE :: vcoarse(:,:) +! + DOUBLE PRECISION, POINTER :: pmat1(:,:), pmat2(:,:) + DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) + DOUBLE PRECISION :: one=1.0d0, zero=0.0d0 + INTEGER :: m1, m1p, m2, m2p +! + pmat1 => matp(1)%val + pmat2 => matp(2)%val + m1 = SIZE(pmat1,1); m1p = SIZE(pmat1,2) + m2 = SIZE(pmat2,1); m2p = SIZE(pmat2,2) + ALLOCATE(vcoarse(m1p,m2p)) + ALLOCATE(temp(m1p,m2)) +! +! Compute (P1)^T * V +! + CALL dgemm('T', 'N', m1p, m2, m1, one, pmat1, m1, vfine, m1, zero, & + & temp, m1p) +! +! Compute (P1)^T * V * P2 +! + CALL dgemm('N', 'N', m1p, m2p, m2, one, temp, m1p, pmat2, m2, zero, & + & vcoarse, m1p) +! + DEALLOCATE(temp) + END FUNCTION restrict_2d +!-------------------------------------------------------------------------------- + FUNCTION restrict_2d_csr(matp,vfine) RESULT(vcoarse) +! +! Restriction using CSR prolongation matrix +! + TYPE(csr_mat), INTENT(in) :: matp(2) + DOUBLE PRECISION, INTENT(in) :: vfine(:,:) + DOUBLE PRECISION, ALLOCATABLE :: vcoarse(:,:) +! + DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) + INTEGER :: m1, m1p, m2, m2p + INTEGER :: i, ii, j, jj, k +! + m1 = matp(1)%mrows; m1p = matp(1)%ncols + m2 = matp(2)%mrows; m2p = matp(2)%ncols + ALLOCATE(vcoarse(m1p,m2p)) + ALLOCATE(temp(m1,m2p)) + temp = 0.0d0 + vcoarse = 0.0d0 +! +! Compute temp = V * (R2)^T = V * (P2) +! t_ij = sum_{k=1}^{m2} V_ik (P2)_kj, i=1:m1, j=1:m2p +! + DO k=1,m2 + DO jj=matp(2)%irow(k),matp(2)%irow(k+1)-1 + j = matp(2)%cols(jj) + temp(:,j) = temp(:,j) + vfine(:,k)*matp(2)%val(jj) + END DO + END DO +! +! Compute (R1) * V * (R2)^T = (P1)^T) * V * (P2) +! V_ij = sum_{k=1}^{m1p} (P1)_ki t_kj, i=1:m1p, j=1:m2p +! + DO k=1,m1 + DO ii=matp(1)%irow(k),matp(1)%irow(k+1)-1 + i = matp(1)%cols(ii) + vcoarse(i,:) = vcoarse(i,:) + matp(1)%val(ii)*temp(k,:) + END DO + END DO +! + DEALLOCATE(temp) + END FUNCTION restrict_2d_csr +!-------------------------------------------------------------------------------- + FUNCTION prolong_cyl(grid, vcoarse, nluniq) RESULT(vfine) +! +! Prolongation (cylindrical case) +! + TYPE(grid2d) :: grid + DOUBLE PRECISION, INTENT(inout) :: vcoarse(:,:) + DOUBLE PRECISION, ALLOCATABLE :: vfine(:,:) + LOGICAL, INTENT(in) :: nluniq +! + DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) + INTEGER :: m1, m1p, m2, m2p + INTEGER :: i, j, k, kk +! + m1 = grid%matp(1)%mrows; m1p = grid%matp(1)%ncols + m2 = grid%matp(2)%mrows; m2p = grid%matp(2)%ncols + ALLOCATE(vfine(m1,m2)) + ALLOCATE(temp(m1p,m2)) + temp = 0.0d0 + vfine = 0.0d0 +! + IF(nluniq) vcoarse(1,1:m2p-1) = vcoarse(1,m2p) +! +! Compute temp = V * (P2)^T +! t_ij = sum_{k=1}^{m2p} V_ik (P2)_jk, i=1:m1p, j=1:m2 +! + DO j=1,m2 + DO kk=grid%matp(2)%irow(j),grid%matp(2)%irow(j+1)-1 + k = grid%matp(2)%cols(kk) + temp(:,j) = temp(:,j) + vcoarse(:,k)*grid%matp(2)%val(kk) + END DO + END DO +! +! Compute (P1) * V * (P2)^T +! V_ij = sum_{k=1}^{m1p} (P1)_ik t_kj, i=1:m1, j=1:m2 +! + DO i=1,m1 + DO kk=grid%matp(1)%irow(i),grid%matp(1)%irow(i+1)-1 + k = grid%matp(1)%cols(kk) + vfine(i,:) = vfine(i,:) + grid%matp(1)%val(kk)*temp(k,:) + END DO + END DO +!!$! +!!$! Compute (P1) * V +!!$! +!!$ CALL dgemm('N', 'N', m1, m2p, m1p, one, pmat1, m1, vcoarse, m1p, zero, & +!!$ & temp, m1) +!!$! +!!$! Compute (P1) * V * (P2)^T +!!$! +!!$ CALL dgemm('N', 'T', m1, m2, m2p, one, temp, m1, pmat2, m2, zero, & +!!$ & vfine, m1) +! + IF(nluniq) THEN + vcoarse(1,1:m2p-1) = vcoarse(1,1:m2p-1) - vcoarse(1,m2p) + vfine(1,1:m2-1) = vfine(1,1:m2-1) - vfine(1,m2) + END IF +! + DEALLOCATE(temp) + END FUNCTION prolong_cyl +!-------------------------------------------------------------------------------- + FUNCTION restrict_cyl(grid, vfine, nluniq) RESULT(vcoarse) +! +! Restriction (cylindrical case) +! + TYPE(grid2d) :: grid + DOUBLE PRECISION, INTENT(inout) :: vfine(:,:) + DOUBLE PRECISION, ALLOCATABLE :: vcoarse(:,:) + LOGICAL, INTENT(in) :: nluniq +! + DOUBLE PRECISION, ALLOCATABLE :: temp(:,:) + INTEGER :: m1, m1p, m2, m2p + INTEGER :: i, ii, j, jj, k +! + m1 = grid%matp(1)%mrows; m1p = grid%matp(1)%ncols + m2 = grid%matp(2)%mrows; m2p = grid%matp(2)%ncols + ALLOCATE(vcoarse(m1p,m2p)) + ALLOCATE(temp(m1,m2p)) + temp = 0.0d0 + vcoarse = 0.0d0 +! + IF(nluniq) vfine(1,1:m2) = vfine(1,m2)/REAL(m2,8) +! +! Compute temp = V * (R2)^T = V * (P2) +! t_ij = sum_{k=1}^{m2} V_ik (P2)_kj, i=1:m1, j=1:m2p +! + DO k=1,m2 + DO jj=grid%matp(2)%irow(k),grid%matp(2)%irow(k+1)-1 + j = grid%matp(2)%cols(jj) + temp(:,j) = temp(:,j) + vfine(:,k)*grid%matp(2)%val(jj) + END DO + END DO +! +! Compute (R1) * V * (R2)^T = (P1)^T) * V * (P2) +! V_ij = sum_{k=1}^{m1p} (P1)_ki t_kj, i=1:m1p, j=1:m2p +! + DO k=1,m1 + DO ii=grid%matp(1)%irow(k),grid%matp(1)%irow(k+1)-1 + i = grid%matp(1)%cols(ii) + vcoarse(i,:) = vcoarse(i,:) + grid%matp(1)%val(ii)*temp(k,:) + END DO + END DO +! +!!$! Compute (P1)^T * V +!!$! +!!$ CALL dgemm('T', 'N', m1p, m2, m1, one, pmat1, m1, vfine, m1, zero, & +!!$ & temp, m1p) +!!$! +!!$! Compute (P1)^T * V * P2 +!!$! +!!$ CALL dgemm('N', 'N', m1p, m2p, m2, one, temp, m1p, pmat2, m2, zero, & +!!$ & vcoarse, m1p) +! + IF(nluniq) THEN + vfine(1,m2) = SUM(vfine(1,1:m2)); vfine(1,1:m2-1) = 0.0d0 + vcoarse(1,m2p) = SUM(vcoarse(1,1:m2p)); vcoarse(1,m2p-1) = 0.0d0 + END IF +! + DEALLOCATE(temp) +! + END FUNCTION restrict_cyl +!-------------------------------------------------------------------------------- + SUBROUTINE massmat_ge(spl, alpha, matm) +! +! Compute mass matrix +! + TYPE(spline1d), INTENT(in) :: spl + INTEGER, INTENT(in) :: alpha + TYPE(gemat), INTENT(out) :: matm +! + INTEGER :: nrank, nx, nidbas, kl, ku +! + CALL get_dim(spl, nrank, nx, nidbas) + kl=nidbas; ku=kl + IF(spl%period) nrank = nx + CALL init(nrank, 1, matm) + CALL conmat(spl, matm, coefeq) + CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + c(1) = x**alpha + idt(1) = 0 + idw(1) = 0 + END SUBROUTINE coefeq + END SUBROUTINE massmat_ge +!-------------------------------------------------------------------------------- + SUBROUTINE massmat_gb(spl, alpha, matm) +! +! Compute mass matrix +! + TYPE(spline1d), INTENT(in) :: spl + INTEGER, INTENT(in) :: alpha + TYPE(gbmat), INTENT(out) :: matm +! + INTEGER :: nrank, nx, nidbas, kl, ku +! + CALL get_dim(spl, nrank, nx, nidbas) + kl=nidbas; ku=kl + CALL init(kl, ku, nrank, 1, matm) + CALL conmat(spl, matm, coefeq) + CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + c(1) = x**alpha + idt(1) = 0 + idw(1) = 0 + END SUBROUTINE coefeq + END SUBROUTINE massmat_gb +!-------------------------------------------------------------------------------- + SUBROUTINE massmat_periodic(spl, alpha, matm) +! +! Compute mass matrix (periodic case) +! + TYPE(spline1d), INTENT(in) :: spl + INTEGER, INTENT(in) :: alpha + TYPE(periodic_mat), INTENT(out) :: matm +! + INTEGER :: dim, nrank, nx, nidbas, kl, ku +! + CALL get_dim(spl, dim, nx, nidbas) + kl=nidbas; ku=kl + nrank = nx + CALL init(kl, ku, nrank, 1, matm) + CALL conmat(spl, matm, coefeq) + CONTAINS + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + c(1) = x**alpha + idt(1) = 0 + idw(1) = 0 + END SUBROUTINE coefeq + END SUBROUTINE massmat_periodic +!-------------------------------------------------------------------------------- + SUBROUTINE femat_2d_csr(spl, mat, coefeq, nterms, maxder_in, nat_order_in, & + & noinit) +! +! Compute 2d fe CSR matrix +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(csr_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: nterms + INTEGER, INTENT(in), OPTIONAL :: maxder_in(2) + LOGICAL, INTENT(in), OPTIONAL :: nat_order_in + LOGICAL, INTENT(in), OPTIONAL :: noinit + INTERFACE + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE +! + INTEGER :: nrank, ndim(2), nints(2), maxder(2) + LOGICAL :: nat_order, run_init +! + CALL get_dim(spl, ndim, nints) + IF(spl%sp2%period) THEN + nrank = ndim(1)*nints(2) + ELSE + nrank = PRODUCT(ndim) + END IF +! + maxder = 1; IF(PRESENT(maxder_in)) maxder = maxder_in + nat_order = .TRUE.; IF(PRESENT(nat_order_in)) nat_order = nat_order_in +! + run_init = .TRUE. + IF(PRESENT(noinit)) run_init = .NOT.noinit + IF(run_init) CALL init(nrank, nterms, mat) +! + CALL conmat(spl, mat, coefeq, maxder, nat_order) + END SUBROUTINE femat_2d_csr +!-------------------------------------------------------------------------------- + SUBROUTINE femat_ge(spl, mat, coefeq) +! +! Compute fe matrix +! + TYPE(spline1d), INTENT(in) :: spl + TYPE(gemat), INTENT(out) :: mat + INTERFACE + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE +! + INTEGER :: nrank, nx, nidbas, kl, ku +! + CALL get_dim(spl, nrank, nx, nidbas) + kl=nidbas; ku=kl + IF(spl%period) nrank = nx + CALL init(nrank, 2, mat) + CALL conmat(spl, mat, coefeq) + END SUBROUTINE femat_ge +!-------------------------------------------------------------------------------- + SUBROUTINE femat_gb(spl, mat, coefeq) +! +! Compute fe matrix +! + TYPE(spline1d), INTENT(in) :: spl + TYPE(gbmat), INTENT(out) :: mat + INTERFACE + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE +! + INTEGER :: nrank, nx, nidbas, kl, ku +! + CALL get_dim(spl, nrank, nx, nidbas) + kl=nidbas; ku=kl + CALL init(kl, ku, nrank, 2, mat) + CALL conmat(spl, mat, coefeq) + END SUBROUTINE femat_gb +!-------------------------------------------------------------------------------- + SUBROUTINE femat_periodic(spl, mat, coefeq) +! +! Compute fe matrix +! + TYPE(spline1d), INTENT(in) :: spl + TYPE(periodic_mat), INTENT(out) :: mat + INTERFACE + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE PRECISION, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE +! + INTEGER :: nrank, dim, nx, nidbas, kl, ku +! + CALL get_dim(spl, dim, nx, nidbas) + kl=nidbas; ku=kl + nrank = nx + CALL init(kl, ku, nrank, 2, mat) + CALL conmat(spl, mat, coefeq) + END SUBROUTINE femat_periodic +!-------------------------------------------------------------------------------- + SUBROUTINE ibcmat_1d(irow, mat) +! +! Set BC at row irow to 0 +! + INTEGER, INTENT(in) :: irow + TYPE(gbmat), INTENT(inout) :: mat +! + DOUBLE PRECISION :: a(mat%rank) +! + a(:)=0.0d0; a(irow)=1.0d0 + CALL putrow(mat, irow, a) + CALL putcol(mat, irow, a) + END SUBROUTINE ibcmat_1d +!-------------------------------------------------------------------------------- + SUBROUTINE ibcmat_2d(grid, mat, nluniq_in) +! +! Impose BC on matrix (asume natural ordering) +! I = (j-1)*N1 + i, i=1:N1, j=1:N2 +! + TYPE(grid2d), INTENT(in) :: grid + TYPE(csr_mat), INTENT(inout) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nluniq_in +! + DOUBLE PRECISION :: temp(mat%rank) + INTEGER :: n1e, n2e, nrank, i, j, irow, jcol + LOGICAL :: nlper1, nlper2, nlcart, nluniq +! + n1e = grid%rank(1) + n2e = grid%rank(2) + nrank = mat%rank + nlper1 = grid%spl%sp1%period + nlper2 = grid%spl%sp2%period + nlcart = (.NOT.nlper1) .AND. (.NOT.nlper2) + IF(PRESENT(nluniq_in)) THEN + nluniq = nluniq_in + ELSE + nluniq = .TRUE. + END IF +! +! BC at x=0 ! Dirichlet for Cartesian, unicity for cylindrical problem + IF(nlcart) THEN + i=1 + DO j=1,n2e + irow = (j-1)*n1e + i + temp = 0.0d0; temp(irow) = 1.0d0 + CALL putrow(mat, irow, temp) + CALL putcol(mat, irow, temp) + END DO + ELSE + i=1 + IF(nluniq) THEN + CALL unicity + END IF + END IF +! +! BC at x=1 ! For both Cartesian and cylindrical + i=n1e + DO j=1,n2e + irow = (j-1)*n1e + i + temp = 0.0d0; temp(irow) = 1.0d0 + CALL putrow(mat, irow, temp) + CALL putcol(mat, irow, temp) + END DO +! +! BC at y=0 ! Only for Cartesian problem + IF(nlcart) THEN + j=1 + DO i=1,n1e + irow = (j-1)*n1e + i + temp = 0.0d0; temp(irow) = 1.0d0 + CALL putrow(mat, irow, temp) + CALL putcol(mat, irow, temp) + END DO + END IF +! +! BC at y=1 ! Only for Cartesian problem + IF(nlcart) THEN + j=n2e + DO i=1,n1e + irow = (j-1)*n1e + i + temp = 0.0d0; temp(irow) = 1.0d0 + CALL putrow(mat, irow, temp) + CALL putcol(mat, irow, temp) + END DO + END IF +! + CONTAINS + SUBROUTINE unicity + INTEGER :: irow0, jcol0 + DOUBLE PRECISION :: temp_sum(mat%rank) +! + irow0 = (n2e-1)*n1e + i + jcol0 = irow0 +! +! Vertical sum + temp_sum(:) = 0.0d0 + DO j=1,n2e + irow = (j-1)*n1e + i + temp = 0.0d0 + CALL getrow(mat, irow, temp) + temp_sum(:) = temp_sum(:) + temp(:) + END DO + CALL putrow(mat, irow0, temp_sum) +! +! Horizontal sum + temp_sum(:) = 0.0d0 + DO j=1,n2e + jcol = (j-1)*n1e + i + temp = 0.0d0 + CALL getcol(mat, jcol, temp) + temp_sum(:) = temp_sum(:) + temp(:) + END DO + CALL putcol(mat, jcol0, temp_sum) +! +! The away operator + DO j=1,n2e-1 + irow = (j-1)*n1e + i + temp = 0.0d0; temp(irow) = 1.0d0 + CALL putrow(mat, irow, temp) + CALL putcol(mat, irow, temp) + END DO + END SUBROUTINE unicity + END SUBROUTINE ibcmat_2d +!-------------------------------------------------------------------------------- + SUBROUTINE mod_transf_full(mat,k) +! +! Modify grid transfer matrix. +! + DOUBLE PRECISION, INTENT(inout) :: mat(:,:) + INTEGER, INTENT(in) :: k + INTEGER :: m, n +! + m=SIZE(mat,1) + n=SIZE(mat,2) +! +! Clear matrix small elements. + WHERE( ABS(mat) < 1.d-8) mat=0.0d0 +! +! Left boundary + IF(k.EQ.1 .OR. k.EQ.3) THEN + mat(2:m,1) = 0.0d0 + END IF +! +! Right boundary + IF(k.EQ.2 .OR. k.EQ.3) THEN + mat(1:m-1,n) = 0.0d0 + END IF + END SUBROUTINE mod_transf_full +!-------------------------------------------------------------------------------- + SUBROUTINE mod_transf_csr(mat,k) +! +! Modify grid transfer matrix. +! + TYPE(csr_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: k +! + DOUBLE PRECISION :: acol(mat%mrows) + INTEGER :: m, n +! + m=mat%mrows + n=mat%ncols +! +! Left boundary + acol = 0.0d0 + IF(k.EQ.1 .OR. k.EQ.3) THEN + CALL getele(mat, 1, 1, acol(1)) + CALL putcol(mat, 1, acol) + END IF +! +! Right boundary + acol = 0.0d0 + IF(k.EQ.2 .OR. k.EQ.3) THEN + CALL getele(mat, m, n, acol(m)) + CALL putcol(mat, n, acol) + END IF + END SUBROUTINE mod_transf_csr +!-------------------------------------------------------------------------------- + SUBROUTINE calc_pmat(grid1, grid2, pmat, debug_in) +! +! Compute prolongation matrix by collocation +! + TYPE(grid1d), INTENT(in) :: grid1, grid2 + TYPE(gemat), INTENT(out) :: pmat + LOGICAL, OPTIONAL, INTENT(in) :: debug_in +! + TYPE(gemat) :: mat_interp + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), fun2(:,:) + DOUBLE PRECISION :: xinter + INTEGER, ALLOCATABLE :: jcol(:) + INTEGER :: nidbas, nfine, ncoarse, nderv + LOGICAL :: nlper, debug + INTEGER :: i, i0, ii, k, irow +!================================================================================ +! 0. Prologue +! + debug = .FALSE. + IF(PRESENT(debug_in)) debug = debug_in +! + nfine = grid1%n + ncoarse = grid2%n + nidbas = grid1%spl%order - 1 + nlper = grid1%spl%period +! + IF(nlper) THEN + IF(ncoarse .LT. nidbas+1) THEN + WRITE(*,'(/a)') '** NX/2 should be larger or equal to NIDBAS+1 **' + STOP + END IF + END IF +! + IF(debug) THEN + WRITE(*,'(/2(a,i0))') 'nfine = ', nfine, ', ncoarse = ', ncoarse + IF(nlper) WRITE(*,'(a)') 'Grids are periodic!' + END IF +! + ALLOCATE(jcol(0:nidbas)) + ALLOCATE(fun(0:nidbas,1)) + IF(nlper) THEN + CALL init(ncoarse, 1, pmat, mrows=nfine) + CALL init(nfine, 1, mat_interp) + ELSE + CALL init(ncoarse+nidbas, 1, pmat, mrows=nfine+nidbas) + CALL init(nfine+nidbas, 1, mat_interp) + END IF +!================================================================================ +! 1. Interpolation matrix +! + irow = 0 + i0 = 1 +! +! Left bound + IF(.NOT.nlper) THEN + IF(MODULO(nidbas,2).EQ.1) THEN + nderv = (nidbas-1)/2 ! ndidbas = 1, 3, 5, ... + ALLOCATE(fun2(0:nidbas,nderv+1)) + CALL basfun(grid1%x(0), grid1%spl, fun2, 1) + jcol = 1 + (/ (i, i=0,nidbas) /) + DO k=1,nderv+1 + irow = irow+1 + mat_interp%val(irow,jcol) = fun2(0:nidbas,k) + END DO + i0 = 2 ! Skip the first grid point + ELSE + nderv = nidbas/2-1 ! ndidbas = 2, 4, ... + ALLOCATE(fun2(0:nidbas,nderv+1)) + CALL basfun(grid1%x(0), grid1%spl, fun2, 1) + jcol = 1 + (/ (i, i=0,nidbas) /) + DO k=1,nderv+1 + irow = irow+1 + mat_interp%val(irow,jcol) = fun2(0:nidbas,k) + END DO + END IF + END IF + DO i=i0,nfine + IF(MODULO(nidbas,2).EQ.0) THEN + xinter = (grid1%x(i-1)+grid1%x(i))/2.0d0 ! Left bound of interval + ELSE + xinter = grid1%x(i-1) ! Left bound of interval + END IF + CALL basfun(xinter, grid1%spl, fun, i) + irow = irow+1 + DO k=0,nidbas + jcol(k) = i+k + END DO + IF(nlper) jcol = MODULO(jcol-1,nfine)+1 + mat_interp%val(irow,jcol) = fun(0:nidbas,1) + END DO +! +! Right bound + IF(.NOT.nlper) THEN + CALL basfun(grid1%x(nfine), grid1%spl, fun2, nfine) + jcol = nfine + (/ (i, i=0,nidbas) /) + DO k=nderv+1,1,-1 + irow = irow+1 + mat_interp%val(irow,jcol) = fun2(0:nidbas,k) + END DO + END IF + IF(debug) CALL printmat('** Interpolation matrix **', mat_interp) +!================================================================================ +! 2. RHS matrix +! + irow = 0 + i0 = 1 + DO i=1,ncoarse + ii = 2*i-1 + CALL comp_rhs(ii) + CALL comp_rhs(ii+1) + END DO + IF(debug) CALL printmat('** RHS matrix **', pmat) +!================================================================================ +! 3. Compute prolongation matrix +! + CALL factor(mat_interp) + CALL bsolve(mat_interp, pmat%val) +!================================================================================ +! 9. Epilogue +! + CALL destroy(mat_interp) + DEALLOCATE(jcol) + DEALLOCATE(fun) + IF(ALLOCATED(fun2)) DEALLOCATE(fun2) +! + CONTAINS + SUBROUTINE comp_rhs(ii) + INTEGER, INTENT(in) :: ii + INTEGER :: k +! +! Left bounds for non-periodic cases + IF(.NOT.nlper .AND. ii.EQ.1) THEN + CALL basfun(grid1%x(0), grid2%spl, fun2, 1) + jcol = 1 + (/ (k, k=0,nidbas) /) + DO k=1,nderv+1 + irow = irow+1 + pmat%val(irow,jcol) = fun2(0:nidbas,k) + END DO + IF(MODULO(nidbas,2).EQ.1) RETURN ! Skip + END IF +! + IF(MODULO(nidbas,2).EQ.0) THEN + xinter = (grid1%x(ii-1)+grid1%x(ii))/2.0d0 ! Left bound of interval + ELSE + xinter = grid1%x(ii-1) ! Left bound of interval + END IF + CALL basfun(xinter, grid2%spl, fun, i) + irow = irow+1 + DO k=0,nidbas + jcol(k) = i+k + END DO + IF(nlper) jcol = MODULO(jcol-1,ncoarse)+1 + pmat%val(irow,jcol) = fun(0:nidbas,1) +! +! Right bounds for non-periodic cases + IF(.NOT.nlper .AND. ii.EQ.nfine) THEN + CALL basfun(grid1%x(nfine), grid2%spl, fun2, ncoarse) + jcol = ncoarse + (/ (k, k=0,nidbas) /) + DO k=nderv+1,1,-1 + irow = irow+1 + pmat%val(irow,jcol) = fun2(0:nidbas,k) + END DO + END IF + END SUBROUTINE comp_rhs + END SUBROUTINE calc_pmat +!-------------------------------------------------------------------------------- + SUBROUTINE disrhs_1d(spl, farr, frhs) +! +! Projection of RHS on spline basis functions +! + TYPE(spline1d) :: spl + DOUBLE PRECISION, INTENT(out) :: farr(:) + INTERFACE + DOUBLE PRECISION FUNCTION frhs(x) + DOUBLE PRECISION, INTENT(in) :: x + END FUNCTION frhs + END INTERFACE + DOUBLE PRECISION :: contrib +! + DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:) + DOUBLE PRECISION, ALLOCATABLE :: fun(:,:) + INTEGER :: ndim, n, nidbas, ng + INTEGER :: i, ig, it, irow + LOGICAL :: nlper +! + CALL get_dim(spl, ndim, n, nidbas) + nlper = spl%period + xg => spl%gausx ! xg(ng,n) + wg => spl%gausw ! wg(ng,n) + ng = SIZE(xg,1) + ALLOCATE(fun(0:nidbas,1)) +! + farr = 0.0d0 + DO i=1,n + DO ig=1,ng + CALL basfun(xg(ig,i), spl, fun, i) + contrib = wg(ig,i)*frhs(xg(ig,i)) + DO it=0,nidbas + irow = i+it + IF(nlper) irow = MODULO(irow-1,n) +1 + farr(irow) = farr(irow)+contrib*fun(it,1) + END DO + END DO + END DO +! + DEALLOCATE(fun) + END SUBROUTINE disrhs_1d +!-------------------------------------------------------------------------------- + SUBROUTINE disrhs_2d(spl, farr, frhs) +! +! Projection of RHS on spline basis functions +! + TYPE(spline2d) :: spl + DOUBLE PRECISION, INTENT(out) :: farr(:,:) + INTERFACE + DOUBLE PRECISION FUNCTION frhs(x,y) + DOUBLE PRECISION, INTENT(in) :: x,y + END FUNCTION frhs + END INTERFACE + DOUBLE PRECISION :: contrib +! + DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:) + DOUBLE PRECISION, POINTER :: xg2(:,:), wg2(:,:) + DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: fun2(:,:) + INTEGER :: ndim1, n1, nidbas1, ng1 + INTEGER :: ndim2, n2, nidbas2, ng2 + INTEGER :: i1, ig1, it1, irow + INTEGER :: i2, ig2, it2, jcol +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + xg1 => spl%sp1%gausx ! xg(ng,n) + wg1 => spl%sp1%gausw ! wg(ng,n) + ng1 = SIZE(xg1,1) + xg2 => spl%sp2%gausx ! xg(ng,n) + wg2 => spl%sp2%gausw ! wg(ng,n) + ng2 = SIZE(xg2,1) +! + ALLOCATE(fun1(0:nidbas1,1)) + ALLOCATE(fun2(0:nidbas2,1)) +! + farr = 0.0d0 + DO i1=1,n1 + DO ig1=1,ng1 + CALL basfun(xg1(ig1,i1), spl%sp1, fun1, i1) + DO i2=1,n2 + DO ig2=1,ng2 + CALL basfun(xg2(ig2,i2), spl%sp2, fun2, i2) + contrib = wg1(ig1,i1)*wg2(ig2,i2)* & + & frhs(xg1(ig1,i1), xg2(ig2,i2)) + DO it1=0,nidbas1 + irow = i1+it1 + DO it2=0,nidbas2 + jcol = i2+it2 + farr(irow,jcol) = farr(irow,jcol) + & + & contrib*fun1(it1,1)*fun2(it2,1) + END DO + END DO + END DO + END DO + END DO + END DO +! +! Case of periodic BC (only in 2nd dimension!) +! + IF(spl%sp2%period) THEN + DO jcol=1,nidbas2 + farr(:,jcol) = farr(:,jcol)+farr(:,jcol+n2) + farr(:,jcol+n2) = 0.0d0 + END DO + END IF + DEALLOCATE(fun1) + DEALLOCATE(fun2) + END SUBROUTINE disrhs_2d +!-------------------------------------------------------------------------------- + SUBROUTINE ibcrhs(grid, f, nluniq_in) +! +! Impose BC on RHS +! + TYPE(grid2d) :: grid + DOUBLE PRECISION, INTENT(inout) :: f(:,:) + LOGICAL, INTENT(in), OPTIONAL :: nluniq_in +! + DOUBLE PRECISION :: temp + INTEGER :: n1, n2 + LOGICAL :: nlper1, nlper2, nlcyl, nluniq +! + n1 = grid%rank(1) + n2 = grid%rank(2) + nlper1 = grid%spl%sp1%period + nlper2 = grid%spl%sp2%period + nlcyl = (.NOT.nlper1) .AND. (nlper2) + IF(PRESENT(nluniq_in)) THEN + nluniq = nluniq_in + ELSE + nluniq=.TRUE. + END IF +! +! Cylindrical case, unicity at the axis, 0 at the right side + IF(nlcyl) THEN ! + IF(nluniq) THEN + temp = SUM(f(1,1:n2)) + f(1,n2) = temp + f(1,1:n2-1) = 0.0d0 + END IF + f(n1,1:n2) = 0.0d0 +! +! Cartesian case: 0 on all 4 boundaries + ELSE + f(1,:) = 0.0d0; f(n1,:) = 0.0d0 + f(:,1) = 0.0d0; f(:,n2) = 0.0d0 + END IF + END SUBROUTINE ibcrhs +!-------------------------------------------------------------------------------- + FUNCTION disc_err_1d(spl, f, fexact) RESULT(disc_err) +! +! L2 norm of discretization error +! + TYPE(spline1d) :: spl + DOUBLE PRECISION, INTENT(in) :: f(:) + DOUBLE PRECISION :: disc_err + INTERFACE + FUNCTION fexact(x) + DOUBLE PRECISION, INTENT(in) :: x(:) + DOUBLE PRECISION :: fexact(SIZE(x)) + END FUNCTION fexact + END INTERFACE +! + DOUBLE PRECISION, ALLOCATABLE :: err(:,:) + DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:) + INTEGER :: ndim, n, nidbas, ng + INTEGER :: ig +! + CALL get_dim(spl, ndim, n, nidbas) + xg => spl%gausx ! xg(ng,n) + wg => spl%gausw ! wg(ng,n) + ng = SIZE(xg,1) +! + ALLOCATE(err(ng,n)) + CALL gridval(spl, xg(1,:), err(1,:), 0, f) + err(1,:) = (err(1,:) - fexact(xg(1,:)))**2 + DO ig=2,ng + CALL gridval(spl, xg(ig,:), err(ig,:), 0) + err(ig,:) = (err(ig,:) - fexact(xg(ig,:)))**2 + END DO +! + disc_err = SQRT(SUM(wg*err)) +! + DEALLOCATE(err) + END FUNCTION disc_err_1d +!-------------------------------------------------------------------------------- + FUNCTION disc_err_2d(spl, f, fexact) RESULT(disc_err) +! +! L2 norm of discretization error +! + TYPE(spline2d) :: spl + DOUBLE PRECISION, INTENT(in) :: f(:,:) + DOUBLE PRECISION :: disc_err + INTERFACE + FUNCTION fexact(x,y) + DOUBLE PRECISION, INTENT(in) :: x(:), y(:) + DOUBLE PRECISION :: fexact(SIZE(x),SIZE(y)) + END FUNCTION fexact + END INTERFACE +! + DOUBLE PRECISION, ALLOCATABLE :: err(:,:) + DOUBLE PRECISION, POINTER :: xg(:,:), wg1(:,:), yg(:,:), wg2(:,:) + INTEGER, DIMENSION(2) :: ndim, n, ng + INTEGER :: i, j, ig, jg + LOGICAL :: nlper1, nlper2, nlcyl +! + CALL get_dim(spl, ndim, n) + xg => spl%sp1%gausx ! xg(ng,n) + wg1 => spl%sp1%gausw ! wg(ng,n) + ng(1) = SIZE(xg,1) + yg => spl%sp2%gausx ! xg(ng,n) + wg2 => spl%sp2%gausw ! wg(ng,n) + ng(2) = SIZE(yg,1) +! + nlper1 = spl%sp1%period + nlper2 = spl%sp2%period + nlcyl = (.NOT.nlper1) .AND. (nlper2) +! + disc_err = 0.0d0 + ALLOCATE(err(n(1),n(2))) + DO ig=1,ng(1) + DO jg=1,ng(2) + IF(ig.EQ.1.AND.jg.EQ.1) THEN + CALL gridval(spl, xg(ig,:), yg(jg,:), err, [0,0], f) + ELSE + CALL gridval(spl, xg(ig,:), yg(jg,:), err, [0,0]) + END IF + err = (err - fexact(xg(ig,:), yg(jg,:)))**2 + DO i=1,n(1) + DO j=1,n(2) + IF(nlcyl) THEN + disc_err = disc_err + xg(ig,i)*wg1(ig,i)*wg2(jg,j)*err(i,j) + ELSE + disc_err = disc_err + wg1(ig,i)*wg2(jg,j)*err(i,j) + END IF + END DO + END DO + END DO + END DO + disc_err = SQRT(disc_err) +! + DEALLOCATE(err) + END FUNCTION disc_err_2d +!-------------------------------------------------------------------------------- + SUBROUTINE back_transf(grid, u, nluniq_in) +! +! Back transform solution and use periodicity (cylindrical problem) +! + TYPE(grid2d), INTENT(in) :: grid + DOUBLE PRECISION, INTENT(inout) :: u(:,:) + LOGICAL, INTENT(in), OPTIONAL :: nluniq_in +! + LOGICAL :: nluniq + INTEGER :: n, nidbas, j +! + n = grid%n(2) + nidbas = grid%spl%sp2%order-1 + IF(PRESENT(nluniq_in)) THEN + nluniq = nluniq_in + ELSE + nluniq = .TRUE. + END IF +! +! Back transform + IF(nluniq) THEN + u(1,1:n-1) = u(1,n) + END IF +! +! Periodicity + DO j=1,nidbas + u(:,j+n) = u(:,j) + END DO + END SUBROUTINE back_transf +!-------------------------------------------------------------------------------- + DOUBLE PRECISION FUNCTION normf_gb(matm, f) +! +! L2 norm of f represented by its expansion coefficients. +! + TYPE(gbmat), INTENT(in) :: matm + DOUBLE PRECISION, INTENT(in) :: f(:) + normf_gb = SQRT(DOT_PRODUCT(f, vmx(matm,f))) + END FUNCTION normf_gb +!-------------------------------------------------------------------------------- + DOUBLE PRECISION FUNCTION normf_ge(matm, f) +! +! L2 norm of f represented by its expansion coefficients. +! + TYPE(gemat), INTENT(in) :: matm + DOUBLE PRECISION, INTENT(in) :: f(:) + normf_ge = SQRT(DOT_PRODUCT(f, vmx(matm,f))) + END FUNCTION normf_ge +!-------------------------------------------------------------------------------- + DOUBLE PRECISION FUNCTION residue_gen(grid, f, u, p) +! +! Generic version of residue +! + TYPE(grid2d) :: grid + DOUBLE PRECISION, INTENT(in) :: f(:), u(:) + DOUBLE PRECISION :: r(SIZE(f)) + CHARACTER(len=*), OPTIONAL, INTENT(in) :: p +! + IF(ALLOCATED(grid%mata)) THEN + residue_gen = residue_csr(grid%mata, f, u, p) + ELSE + residue_gen = residue_cds(grid%mata_cds, f, u, p) + END IF + END FUNCTION residue_gen +!-------------------------------------------------------------------------------- + DOUBLE PRECISION FUNCTION residue_csr(mat, f, u, p) +! +! L2 norm of residue ||f-Av|| +! + TYPE(csr_mat), INTENT(in) :: mat + DOUBLE PRECISION, INTENT(in) :: f(:), u(:) + DOUBLE PRECISION :: r(SIZE(f)) + CHARACTER(len=*), OPTIONAL, INTENT(in) :: p +! + CHARACTER(len=4) :: norm_type + norm_type = '2' + IF(PRESENT(p)) norm_type = p +! + r = f-vmx(mat,u) + SELECT CASE (norm_type) + CASE('1') + residue_csr = SUM(ABS(r)) + CASE ('2') + residue_csr = SQRT(DOT_PRODUCT(r,r)) + CASE ('inf') + residue_csr = MAXVAL(ABS(r)) + END SELECT + END FUNCTION residue_csr +!-------------------------------------------------------------------------------- + DOUBLE PRECISION FUNCTION residue_cds(mat, f, u, p) +! +! L2 norm of residue ||f-Av|| +! + TYPE(cds_mat), INTENT(in) :: mat + DOUBLE PRECISION, INTENT(in) :: f(:), u(:) + DOUBLE PRECISION :: r(SIZE(f)) + CHARACTER(len=*), OPTIONAL, INTENT(in) :: p +! + CHARACTER(len=4) :: norm_type + norm_type = '2' + IF(PRESENT(p)) norm_type = p +! + r = f-vmx(mat,u) + SELECT CASE (norm_type) + CASE('1') + residue_cds = SUM(ABS(r)) + CASE ('2') + residue_cds = SQRT(DOT_PRODUCT(r,r)) + CASE ('inf') + residue_cds = MAXVAL(ABS(r)) + END SELECT + END FUNCTION residue_cds +!-------------------------------------------------------------------------------- + DOUBLE PRECISION FUNCTION residue_ge(mat, f, u) +! +! L2 norm of residue ||f-Av|| +! + TYPE(gemat), INTENT(in) :: mat + DOUBLE PRECISION, INTENT(in) :: f(:), u(:) + DOUBLE PRECISION :: r(SIZE(f)) + r = f-vmx(mat,u) + residue_ge = SQRT(DOT_PRODUCT(r,r)) + END FUNCTION residue_ge +!-------------------------------------------------------------------------------- + DOUBLE PRECISION FUNCTION residue_gb(mat, f, u) +! +! L2 norm of residue ||f-Av|| +! + TYPE(gbmat), INTENT(in) :: mat + DOUBLE PRECISION, INTENT(in) :: f(:), u(:) + DOUBLE PRECISION :: r(SIZE(f)) + r = f-vmx(mat,u) + residue_gb = SQRT(DOT_PRODUCT(r,r)) + END FUNCTION residue_gb +!-------------------------------------------------------------------------------- + SUBROUTINE ctof_massmat(splf, splc, alpha, matm) +! +! Compute coarse to fine mass matrix M(h,2h) +! + TYPE(spline1d), INTENT(in) :: splf ! Spline on fine mesh + TYPE(spline1d), INTENT(in) :: splc ! Spline on coarse mesh + INTEGER, INTENT(in) :: alpha + TYPE(gemat), INTENT(out) :: matm +! + LOGICAL :: nlper + INTEGER :: nf, nxf, nc, nxc, nidbas, kl, ku + INTEGER :: ig, ngauss + INTEGER :: i, ic, it, jw, irow, jcol +! + DOUBLE PRECISION :: contrib + DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:) + DOUBLE PRECISION, ALLOCATABLE :: funf(:,:), func(:,:) +! + nlper = splf%period .OR. splc%period + CALL get_dim(splf, nf, nxf, nidbas) + CALL get_gauss(splf, ngauss) + CALL get_dim(splc, nc, nxc, nidbas) + kl=nidbas; ku=kl + IF(nlper) THEN + nf = nxf + nc = nxc + END IF + CALL init(nc, 1, matm, mrows=nf) ! Defne nf x nc matrix +! + ALLOCATE(xg(ngauss),wg(ngauss)) + ALLOCATE(funf(0:nidbas,1)) + ALLOCATE(func(0:nidbas,1)) + DO i=1,nxf + ic = (i-1)/2+1 + CALL get_gauss(splf, ngauss, i, xg, wg) + DO ig=1,ngauss + CALL basfun(xg(ig), splf, funf, i) + CALL basfun(xg(ig), splc, func, ic) + DO it=0,nidbas + DO jw=0,nidbas + contrib = wg(ig)*funf(it,1)*func(jw,1)*xg(ig)**alpha + irow = i+it; IF(nlper) irow=MODULO(irow-1,nxf)+1 + jcol = ic+jw; IF(nlper) jcol=MODULO(jcol-1,nxc)+1 + CALL updtmat(matm, irow, jcol, contrib) + END DO + END DO + END DO + END DO +! + DEALLOCATE(xg,wg) + DEALLOCATE(funf,func) + END SUBROUTINE ctof_massmat +!-------------------------------------------------------------------------------- + SUBROUTINE direct_solve_1d(grid, v) +! +! 1D direct solver +! + TYPE(grid1d), INTENT(inout) :: grid + DOUBLE PRECISION, INTENT(out) :: v(:) + LOGICAL :: nlper +! + nlper = grid%spl%period + IF(nlper) THEN + IF(.NOT.ALLOCATED(grid%matap_copy)) THEN + ALLOCATE(grid%matap_copy) + CALL mcopy(grid%matap, grid%matap_copy) + CALL factor(grid%matap_copy) + END IF + CALL bsolve(grid%matap_copy, grid%f, v) + ELSE + IF(.NOT.ALLOCATED(grid%mata_copy)) THEN + ALLOCATE(grid%mata_copy) + CALL mcopy(grid%mata, grid%mata_copy) + CALL factor(grid%mata_copy) + END IF + CALL bsolve(grid%mata_copy, grid%f, v) + END IF + END SUBROUTINE direct_solve_1d +!-------------------------------------------------------------------------------- + SUBROUTINE direct_solve_2d(grid, v, debug) +! +! 2D direct solver +! + TYPE(grid2d), INTENT(inout) :: grid + DOUBLE PRECISION, INTENT(inout) :: v(:) + LOGICAL, INTENT(in), OPTIONAL :: debug + LOGICAL :: dbg +! + dbg = .FALSE. + IF(PRESENT(debug)) dbg=debug +! + IF(ALLOCATED(grid%mata)) THEN + IF(.NOT.ALLOCATED(grid%mata%mumps)) THEN + ALLOCATE(grid%mata%mumps) + CALL csr2mumps(grid%mata, grid%mata%mumps) + CALL factor(grid%mata%mumps, debug=dbg) + END IF + CALL bsolve(grid%mata%mumps, v, debug=dbg) + ELSE + IF(.NOT.ALLOCATED(grid%mata_cds%mumps)) THEN + ALLOCATE(grid%mata_cds%mumps) + CALL cds2mumps(grid%mata_cds, grid%mata_cds%mumps) + CALL factor(grid%mata_cds%mumps, debug=dbg) + END IF + CALL bsolve(grid%mata_cds%mumps, v, debug=dbg) + END IF +! +! Only cylindrical case + IF(.NOT.grid%spl%sp1%period .AND. grid%spl%sp2%period) THEN + END IF + END SUBROUTINE direct_solve_2d +!-------------------------------------------------------------------------------- + SUBROUTINE jacobi_gb(mat, omega, nu, v, f) +! +! Weighted Jacobi relaxation +! + TYPE(gbmat),INTENT(in) :: mat + DOUBLE PRECISION, INTENT(in) :: omega + INTEGER, INTENT(in) :: nu + DOUBLE PRECISION, INTENT(inout) :: v(:) + DOUBLE PRECISION, INTENT(in) :: f(:) +! + DOUBLE PRECISION :: temp(SIZE(v)) + DOUBLE PRECISION :: inv_diag(SIZE(v)) + INTEGER :: k, kl, ku, n, i, j, jmin, jmax +! + kl = mat%kl + ku = mat%ku + n = mat%rank +! + inv_diag(:) = omega/mat%val(kl+ku+1,:) + DO k=1,nu + DO i=1,n + jmin = MAX(1,i-kl) + jmax = MIN(n, i+ku) + temp(i) = f(i) + DO j=jmin,i-1 + temp(i) = temp(i) - mat%val(kl+ku+i-j+1,j)*v(j) + END DO + DO j=i+1,jmax + temp(i) = temp(i) - mat%val(kl+ku+i-j+1,j)*v(j) + END DO + temp(i) = temp(i)*inv_diag(i) + END DO + v(:) = (1.d0-omega)*v(:) + temp(:) + END DO + END SUBROUTINE jacobi_gb +!-------------------------------------------------------------------------------- + SUBROUTINE jacobi_ge(mat, omega, nu, v, f) +! +! Weighted Jacobi relaxation +! + TYPE(gemat),INTENT(in) :: mat + DOUBLE PRECISION, INTENT(in) :: omega + INTEGER, INTENT(in) :: nu + DOUBLE PRECISION, INTENT(inout) :: v(:) + DOUBLE PRECISION, INTENT(in) :: f(:) +! + DOUBLE PRECISION :: temp(SIZE(v)) + DOUBLE PRECISION :: inv_diag(SIZE(v)) + INTEGER :: k, n, i, j +! + n = mat%rank +! + DO i=1,n + inv_diag(i) = omega/mat%val(i,i) + END DO +! + DO k=1,nu + DO i=1,n + temp(i) = f(i) + DO j=1,i-1 + temp(i) = temp(i) - mat%val(i,j)*v(j) + END DO + DO j=i+1,n + temp(i) = temp(i) - mat%val(i-j+1,j)*v(j) + END DO + temp(i) = temp(i)*inv_diag(i) + END DO + v(:) = (1.d0-omega)*v(:) + temp(:) + END DO + END SUBROUTINE jacobi_ge +!-------------------------------------------------------------------------------- + SUBROUTINE jacobi_csr(mat, omega, nu, v, f) +! +! Weighted Jacobi relaxation +! + TYPE(csr_mat),INTENT(in) :: mat + DOUBLE PRECISION, INTENT(in) :: omega + INTEGER, INTENT(in) :: nu + DOUBLE PRECISION, INTENT(inout) :: v(:) + DOUBLE PRECISION, INTENT(in) :: f(:) +! + DOUBLE PRECISION :: temp(SIZE(v)) + DOUBLE PRECISION :: inv_diag(SIZE(v)) + INTEGER :: k, n, i, j, jcol +! + n = mat%rank +! + inv_diag(:) = omega/mat%val(mat%idiag) + DO k=1,nu + temp(:) = f(:) + DO i=1,n + DO j = mat%irow(i), mat%irow(i+1)-1 + jcol = mat%cols(j) + IF(jcol.NE.i) THEN ! The diagonal + temp(i) = temp(i) - mat%val(j)*v(jcol) + END IF + END DO + END DO + temp(:) = temp(:)*inv_diag(:) + v(:) = (1.d0-omega)*v(:) + temp(:) + END DO + END SUBROUTINE jacobi_csr +!-------------------------------------------------------------------------------- + SUBROUTINE jacobi_cds(mat, omega, nu, v, f) +! +! Weighted Jacobi relaxation +! + TYPE(cds_mat),INTENT(in) :: mat + DOUBLE PRECISION, INTENT(in) :: omega + INTEGER, INTENT(in) :: nu + DOUBLE PRECISION, INTENT(inout) :: v(:) + DOUBLE PRECISION, INTENT(in) :: f(:) +! + DOUBLE PRECISION :: temp(SIZE(v)) + DOUBLE PRECISION :: inv_diag(SIZE(v)) + INTEGER :: k, n, i, id, d +! + n = mat%rank +! + inv_diag(:) = omega/mat%val(:,mat%dists(0)) + DO k=1,nu + temp(:) = f(:) + DO id=-mat%kl,mat%ku ! f - (L+U)*v + IF(id.EQ.0) CYCLE + d = mat%dists(id) + DO i=MAX(1,1-d), MIN(n,mat%rank-d) + temp(i) = temp(i) - mat%val(i,id)*v(i+d) + END DO + END DO + temp(:) = temp(:)*inv_diag(:) + v(:) = (1.d0-omega)*v(:) + temp(:) + END DO + END SUBROUTINE jacobi_cds +!-------------------------------------------------------------------------------- + SUBROUTINE gs_gb(mat, nu, v, f) +! +! Gauss-Seidel relaxation +! + TYPE(gbmat),INTENT(in) :: mat + INTEGER, INTENT(in) :: nu + DOUBLE PRECISION, INTENT(inout) :: v(:) + DOUBLE PRECISION, INTENT(in) :: f(:) +! + INTEGER :: k, kl, ku, n, i, j, jmin, jmax + DOUBLE PRECISION :: inv_diag(SIZE(v)) +! + kl = mat%kl + ku = mat%ku + n = mat%rank +! + inv_diag(:) = 1.d0/mat%val(kl+ku+1,:) + DO k=1,nu + DO i=1,n + jmin = MAX(1,i-kl) + jmax = MIN(n, i+ku) + v(i) = f(i) + DO j=jmin,i-1 + v(i) = v(i) - mat%val(kl+ku+i-j+1,j)*v(j) + END DO + DO j=i+1,jmax + v(i) = v(i) - mat%val(kl+ku+i-j+1,j)*v(j) + END DO + v(i) = inv_diag(i)*v(i) + END DO + END DO + END SUBROUTINE gs_gb +!-------------------------------------------------------------------------------- + SUBROUTINE gs_ge(mat, nu, v, f) +! +! Gauss-Seidel relaxation +! + TYPE(gemat),INTENT(in) :: mat + INTEGER, INTENT(in) :: nu + DOUBLE PRECISION, INTENT(inout) :: v(:) + DOUBLE PRECISION, INTENT(in) :: f(:) +! + INTEGER :: k, n, i, j + DOUBLE PRECISION :: inv_diag(SIZE(v)) +! + n = mat%rank +! + DO i=1,n + inv_diag(i) = 1.d0/mat%val(i,i) + END DO + DO k=1,nu + DO i=1,n + v(i) = f(i) + DO j=1,i-1 + v(i) = v(i) - mat%val(i,j)*v(j) + END DO + DO j=i+1,n + v(i) = v(i) - mat%val(i,j)*v(j) + END DO + v(i) = inv_diag(i)*v(i) + END DO + END DO + END SUBROUTINE gs_ge +!-------------------------------------------------------------------------------- + SUBROUTINE gs_csr(mat, nu, v, f) +! +! Gauss-Seidel relaxation +! + TYPE(csr_mat),INTENT(in) :: mat + INTEGER, INTENT(in) :: nu + DOUBLE PRECISION, INTENT(inout) :: v(:) + DOUBLE PRECISION, INTENT(in) :: f(:) +! + DOUBLE PRECISION :: inv_diag(SIZE(v)) + INTEGER :: k, n, i, j, jcol +! + n = mat%rank +! + inv_diag(:) = 1.0d0/mat%val(mat%idiag) + DO k=1,nu + DO i=1,n + v(i) = f(i) + DO j = mat%irow(i), mat%irow(i+1)-1 + jcol = mat%cols(j) + IF(jcol.NE.i) THEN ! The diagonal + v(i) = v(i) - mat%val(j)*v(jcol) + END IF + END DO + v(i) = v(i)*inv_diag(i) + END DO + END DO + END SUBROUTINE gs_csr +!-------------------------------------------------------------------------------- + SUBROUTINE gs_cds(mat, nu, v, f) +! +! Gauss-Seidel relaxation +! + TYPE(cds_mat),INTENT(in) :: mat + INTEGER, INTENT(in) :: nu + DOUBLE PRECISION, INTENT(inout) :: v(:) + DOUBLE PRECISION, INTENT(in) :: f(:) +! + DOUBLE PRECISION :: temp(SIZE(v)) + DOUBLE PRECISION :: inv_diag(SIZE(v)) + INTEGER :: k, n, i, id, d +! + n = mat%rank +! + inv_diag(:) = 1.0d0/mat%val(:,mat%dists(0)) + DO k=1,nu +! + temp(:) = f(:) + DO id=1,mat%ku ! t <- f - U*v + d = mat%dists(id) + DO i=MAX(1,1-d), MIN(n,n-d) + temp(i) = temp(i) - mat%val(i,id)*v(i+d) + END DO + END DO +! + DO i=1,n ! Solve (L+D)v=t + v(i) = temp(i) + DO id=-1,-mat%kl,-1 + d = mat%dists(id) + IF(i+d.LT.1) EXIT + v(i) = v(i) - mat%val(i,id)*v(i+d) + END DO + v(i) = v(i)*inv_diag(i) + END DO + END DO + END SUBROUTINE gs_cds +!-------------------------------------------------------------------------------- + SUBROUTINE printmat_mat(str, val) +! + CHARACTER(len=*), INTENT(in) :: str + DOUBLE PRECISION, INTENT(in) :: val(:,:) + INTEGER :: mrows, ncols,i + mrows=SIZE(val,1) + ncols=SIZE(val,2) + WRITE(*,'(/a)') TRIM(str) + WRITE(*,'(2(a,i6))') 'M =', mrows, ', N =', ncols + DO i=1,mrows + WRITE(*,'(12(1pe12.3))') val(i,:) + END DO + WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', SUM(val,2) + WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', SUM(val,1) + END SUBROUTINE printmat_mat +!-------------------------------------------------------------------------------- + SUBROUTINE printmat_ge(str, mat) +! + CHARACTER(len=*), INTENT(in) :: str + TYPE(gemat), INTENT(in) :: mat + INTEGER :: i + DOUBLE PRECISION :: arow(mat%ncols) + DOUBLE PRECISION :: sum_cols(mat%ncols), sum_rows(mat%mrows) + sum_cols = 0.0d0 + arow = 0.0d0 + WRITE(*,'(/a)') TRIM(str) + WRITE(*,'(2(a,i6))') 'M =', mat%mrows, ', N =', mat%ncols + DO i=1,mat%mrows + CALL getrow(mat,i,arow) + sum_rows(i) = SUM(arow) + sum_cols(:) = sum_cols(:) + arow(:) + WRITE(*,'(12(1pe12.3))') arow + END DO + WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', sum_rows + WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', sum_cols + END SUBROUTINE printmat_ge +!-------------------------------------------------------------------------------- + SUBROUTINE printmat_gb(str, mat) +! + CHARACTER(len=*), INTENT(in) :: str + TYPE(gbmat), INTENT(in) :: mat + INTEGER :: i + DOUBLE PRECISION :: arow(mat%ncols) + DOUBLE PRECISION :: sum_cols(mat%ncols), sum_rows(mat%mrows) + sum_cols = 0.0d0 + WRITE(*,'(/a)') TRIM(str) + WRITE(*,'(2(a,i6))') 'M =', mat%mrows, ', N =', mat%ncols + DO i=1,mat%mrows + CALL getrow(mat,i,arow) + sum_rows(i) = SUM(arow) + sum_cols(:) = sum_cols(:) + arow(:) + WRITE(*,'(8(1pe12.3))') arow + END DO + WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', sum_rows + WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', sum_cols + END SUBROUTINE printmat_gb +!-------------------------------------------------------------------------------- + SUBROUTINE printmat_periodic(str, mat) +! + CHARACTER(len=*), INTENT(in) :: str + TYPE(periodic_mat), INTENT(in) :: mat + INTEGER :: i + DOUBLE PRECISION :: arow(mat%mat%ncols) + DOUBLE PRECISION :: sum_cols(mat%mat%ncols), sum_rows(mat%mat%mrows) + sum_cols = 0.0d0 + WRITE(*,'(/a)') TRIM(str) + WRITE(*,'(2(a,i6))') 'M =', mat%mat%mrows, ', N =', mat%mat%ncols + DO i=1,mat%mat%mrows + CALL getrow(mat,i,arow) + sum_rows(i) = SUM(arow) + sum_cols(:) = sum_cols(:) + arow(:) + WRITE(*,'(8(1pe12.3))') arow + END DO + WRITE(*,'(a/(8(1pe12.3)))') 'Sum or rows', sum_rows + WRITE(*,'(a/(8(1pe12.3)))') 'Sum or cols', sum_cols + END SUBROUTINE printmat_periodic +!-------------------------------------------------------------------------------- + SUBROUTINE printdiag_gb(str, mat) +! + CHARACTER(len=*), INTENT(in) :: str + TYPE(gbmat), INTENT(in) :: mat + INTEGER :: ku, kl + kl = mat%kl + ku = mat%ku + WRITE(*,'(a/(8(1pe12.3)))') str, mat%val(kl+ku+1,:) + END SUBROUTINE printdiag_gb +!-------------------------------------------------------------------------------- + INTEGER FUNCTION get_lmax(n) + INTEGER, INTENT(in) :: n + INTEGER :: l, ncur + l=1 + ncur = n + DO + IF(ncur.EQ.2 .OR. MODULO(ncur,2).NE.0) EXIT ! Minimum N is 2 or odd. + l=l+1 + ncur = ncur/2 + END DO + get_lmax = l + END FUNCTION get_lmax +!-------------------------------------------------------------------------------- + SUBROUTINE ibc_transf(grids, dir, k) +! +! Impose BC on transfer matrix +! + TYPE(grid2d), INTENT(inout) :: grids(:) + INTEGER, INTENT(in) :: dir + INTEGER, INTENT(in) :: k +! + INTEGER :: levels, l + levels = SIZE(grids) + DO l=2,levels + CALL mod_transf(grids(l)%matp(dir),k) + END DO + END SUBROUTINE ibc_transf +!-------------------------------------------------------------------------------- +END MODULE multigrid diff --git a/src/mumps_mod.f90 b/src/mumps_mod.f90 new file mode 100644 index 0000000..fe23546 --- /dev/null +++ b/src/mumps_mod.f90 @@ -0,0 +1,1728 @@ +!> +!> @file mumps_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE mumps_bsplines +! +! MUMPS_BSPLINES: Simple interface to the sparse direct solver MUMPS +! +! T.M. Tran, CRPP-EPFL +! June 2011 +! + USE sparse + IMPLICIT NONE + INCLUDE 'dmumps_struc.h' + INCLUDE 'zmumps_struc.h' +! + TYPE mumps_mat + INTEGER :: rank, nnz + INTEGER :: nterms, kmat + INTEGER :: istart, iend + INTEGER :: nnz_start, nnz_end, nnz_loc + LOGICAL :: nlsym + LOGICAL :: nlpos + LOGICAL :: nlforce_zero + TYPE(spmat), POINTER :: mat => NULL() + INTEGER, POINTER :: cols(:) => NULL() + INTEGER, POINTER :: irow(:) => NULL() + INTEGER, POINTER :: perm(:) => NULL() + DOUBLE PRECISION, POINTER :: val(:) => NULL() + TYPE(dmumps_struc) :: mumps_par + END TYPE mumps_mat +! + TYPE zmumps_mat + INTEGER :: rank, nnz + INTEGER :: nterms, kmat + INTEGER :: istart, iend + INTEGER :: nnz_start, nnz_end, nnz_loc + LOGICAL :: nlsym + LOGICAL :: nlherm + LOGICAL :: nlpos + LOGICAL :: nlforce_zero + TYPE(zspmat), POINTER :: mat => NULL() + INTEGER, POINTER :: cols(:) => NULL() + INTEGER, POINTER :: irow(:) => NULL() + INTEGER, POINTER :: perm(:) => NULL() + DOUBLE COMPLEX, POINTER :: val(:) => NULL() + TYPE(zmumps_struc) :: mumps_par + END TYPE zmumps_mat +! + INTERFACE init + MODULE PROCEDURE init_mumps_mat, init_zmumps_mat + END INTERFACE init +! + INTERFACE clear_mat + MODULE PROCEDURE clear_mumps_mat, clear_zmumps_mat + END INTERFACE clear_mat +! + INTERFACE updtmat + MODULE PROCEDURE updt_mumps_mat, updt_zmumps_mat + END INTERFACE updtmat +! + INTERFACE putele + MODULE PROCEDURE putele_mumps_mat, putele_zmumps_mat + END INTERFACE putele +! + INTERFACE getele + MODULE PROCEDURE getele_mumps_mat, getele_zmumps_mat + END INTERFACE getele +! + INTERFACE putrow + MODULE PROCEDURE putrow_mumps_mat, putrow_zmumps_mat + END INTERFACE putrow +! + INTERFACE getrow + MODULE PROCEDURE getrow_mumps_mat, getrow_zmumps_mat + END INTERFACE getrow +! + INTERFACE putcol + MODULE PROCEDURE putcol_mumps_mat, putcol_zmumps_mat + END INTERFACE putcol +! + INTERFACE getcol + MODULE PROCEDURE getcol_mumps_mat, getcol_zmumps_mat + END INTERFACE getcol +! + INTERFACE get_count + MODULE PROCEDURE get_count_mumps_mat, get_count_zmumps_mat + END INTERFACE get_count +! + INTERFACE to_mat + MODULE PROCEDURE to_mumps_mat, to_zmumps_mat + END INTERFACE to_mat +! + INTERFACE reord_mat + MODULE PROCEDURE reord_mumps_mat, reord_zmumps_mat + END INTERFACE reord_mat +! + INTERFACE numfact + MODULE PROCEDURE numfact_mumps_mat, numfact_zmumps_mat + END INTERFACE numfact +! + INTERFACE factor + MODULE PROCEDURE factor_mumps_mat, factor_zmumps_mat + END INTERFACE factor +! + INTERFACE bsolve + MODULE PROCEDURE bsolve_mumps_mat1, bsolve_mumps_matn, & + & bsolve_zmumps_mat1, bsolve_zmumps_matn + END INTERFACE bsolve +! + INTERFACE vmx + MODULE PROCEDURE vmx_mumps_mat, vmx_mumps_matn, & + & vmx_zmumps_mat, vmx_zmumps_matn + END INTERFACE vmx +! + INTERFACE destroy + MODULE PROCEDURE destroy_mumps_mat, destroy_zmumps_mat + END INTERFACE destroy +! + INTERFACE putmat + MODULE PROCEDURE put_mumps_mat, put_zmumps_mat + END INTERFACE putmat +! + INTERFACE getmat + MODULE PROCEDURE get_mumps_mat, get_zmumps_mat + END INTERFACE getmat +! + INTERFACE mcopy + MODULE PROCEDURE mcopy_mumps_mat, mcopy_zmumps_mat + END INTERFACE mcopy +! + INTERFACE maddto + MODULE PROCEDURE maddto_mumps_mat, maddto_zmumps_mat + END INTERFACE maddto +! + INTERFACE psum_mat + MODULE PROCEDURE psum_mumps_mat, psum_zmumps_mat + END INTERFACE psum_mat +! + INTERFACE p2p_mat + MODULE PROCEDURE p2p_mumps_mat, p2p_zmumps_mat + END INTERFACE p2p_mat +! +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_mumps_mat(n, nterms, mat, kmat, nlsym, nlpos, & + & nlforce_zero, comm_in) +! +! Initialize an empty sparse mumps matrix +! + USE pputils2 + INCLUDE 'mpif.h' + INTEGER, INTENT(in) :: n, nterms + TYPE(mumps_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlpos + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero + INTEGER, OPTIONAL, INTENT(in) :: comm_in +! + INTEGER :: comm, nloc +! + comm = MPI_COMM_SELF ! Default is serial! + IF(PRESENT(comm_in)) comm = comm_in +! + mat%rank = n + mat%nterms = nterms + mat%nnz = 0 + mat%nlsym = .FALSE. + mat%nlpos = .TRUE. + mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val + IF(PRESENT(kmat)) mat%kmat = kmat + IF(PRESENT(nlsym)) mat%nlsym = nlsym + IF(PRESENT(nlpos)) mat%nlpos = nlpos + IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero +! +! Matrix partition +! + CALL dist1d(comm, 1, n, mat%istart, nloc) + mat%iend = mat%istart + nloc - 1 +! + IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) + ALLOCATE(mat%mat) + CALL init(n, mat%mat, mat%istart, mat%iend) +! +! Initialize a MUMPS instance +! + mat%mumps_par%N = n + mat%mumps_par%NZ = 0 + mat%mumps_par%COMM = comm + mat%mumps_par%PAR = 1 ! Host involved in calculations + IF(mat%nlsym) THEN + IF(mat%nlpos) THEN + mat%mumps_par%SYM = 1 ! symmetric, positive definite + ELSE + mat%mumps_par%SYM = 2 ! symmetric, indefinite + END IF + ELSE + mat%mumps_par%SYM = 0 ! unsymmetric + END IF +! + mat%mumps_par%JOB = -1 ! Init phase + CALL dmumps(mat%mumps_par) +! +! Nullify MUMPS pointers for distributed matrix +! + NULLIFY(mat%mumps_par%A_loc) + NULLIFY(mat%mumps_par%IRN_loc) + NULLIFY(mat%mumps_par%JCN_loc) + NULLIFY(mat%mumps_par%RHS) +! + END SUBROUTINE init_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_zmumps_mat(n, nterms, mat, kmat, nlsym, nlherm, & + & nlpos, nlforce_zero, comm_in) +! +! Initialize an empty sparse mumps matrix +! + USE pputils2 + INCLUDE 'mpif.h' + INTEGER, INTENT(in) :: n, nterms + TYPE(zmumps_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlherm + LOGICAL, OPTIONAL, INTENT(in) :: nlpos + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero + INTEGER, OPTIONAL, INTENT(in) :: comm_in +! + INTEGER :: comm, nloc +! + comm = MPI_COMM_SELF ! Default is serial! + IF(PRESENT(comm_in)) comm = comm_in +! + mat%rank = n + mat%nterms = nterms + mat%nnz = 0 + mat%nlsym = .FALSE. + mat%nlherm = .FALSE. + mat%nlpos = .TRUE. + mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val + IF(PRESENT(kmat)) mat%kmat = kmat + IF(PRESENT(nlsym)) mat%nlsym = nlsym + IF(PRESENT(nlpos)) mat%nlpos = nlpos + IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero +! +! Matrix partition +! + CALL dist1d(comm, 1, n, mat%istart, nloc) + mat%iend = mat%istart + nloc - 1 +! + IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) + ALLOCATE(mat%mat) + CALL init(n, mat%mat, mat%istart, mat%iend) +! +! Initialize a MUMPS instance +! + mat%mumps_par%N = n + mat%mumps_par%NZ = 0 + mat%mumps_par%COMM = comm + mat%mumps_par%PAR = 1 ! Host involved in calculations + mat%mumps_par%SYM = 0 ! General unsymmetric + IF(mat%nlsym) THEN + IF(mat%nlpos) THEN + mat%mumps_par%SYM = 1 ! symmetric, positive definite + ELSE + mat%mumps_par%SYM = 2 ! symmetric, indefinite + END IF + END IF +! + mat%mumps_par%JOB = -1 ! Init phase + CALL zmumps(mat%mumps_par) +! +! WARNING: SYM=1 is currently (version 4.10.0) is treated as SYM=2. +! The Hermitian case is not implemented yet! +! +! Nullify MUMPS pointers for distributed matrix +! + NULLIFY(mat%mumps_par%A_loc) + NULLIFY(mat%mumps_par%IRN_loc) + NULLIFY(mat%mumps_par%JCN_loc) + NULLIFY(mat%mumps_par%RHS) +! + END SUBROUTINE init_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE clear_mumps_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(mumps_mat) :: mat +! + mat%val = 0.0d0 + END SUBROUTINE clear_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE clear_zmumps_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(zmumps_mat) :: mat +! + mat%val = (0.0d0, 0.0d0) + END SUBROUTINE clear_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_mumps_mat(mat, i, j, val) +! +! Update element Aij of mumps matrix +! + TYPE(mumps_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: k, s, e +! + IF(mat%nlsym) THEN ! Store only upper part for symmetric matrices + IF(i.GT.j) RETURN + END IF + IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + WRITE(*,'(a,2i6)') ' istart, iend ', mat%istart, mat%iend + STOP '*** Abnormal EXIT in MODULE mumps_mod ***' + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL updtmat(mat%mat, i, j, val) + ELSE + s = mat%irow(i) - mat%nnz_start + 1 + e = mat%irow(i+1) - mat%nnz_start + k = isearch(mat%cols(s:e), j) + IF( k.GE.0 ) THEN + mat%val(s+k) = mat%val(s+k)+val + ELSE + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE mumps_mod ***' + END IF + END IF + END SUBROUTINE updt_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_zmumps_mat(mat, i, j, val) +! +! Update element Aij of mumps matrix +! + TYPE(zmumps_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + INTEGER :: k, s, e +! + IF(mat%nlherm .OR. mat%nlsym) THEN ! Store only upper part + IF(i.GT.j) RETURN + END IF + IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + WRITE(*,'(a,2i6)') ' istart, iend ', mat%istart, mat%iend + STOP '*** Abnormal EXIT in MODULE mumps_mod ***' + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL updtmat(mat%mat, i, j, val) + ELSE + s = mat%irow(i) - mat%nnz_start + 1 + e = mat%irow(i+1) - mat%nnz_start + k = isearch(mat%cols(s:e), j) + IF( k.GE.0 ) THEN + mat%val(s+k) = mat%val(s+k)+val + ELSE + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE mumps_mod ***' + END IF + END IF + END SUBROUTINE updt_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_mumps_mat(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(mumps_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: iput, jput + INTEGER :: k, s, e +! + iput = i + jput = j + IF(mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iput = j + jput = i + END IF + END IF +! +! Do nothing if outside + IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN +! + IF(ASSOCIATED(mat%mat)) THEN + CALL putele(mat%mat, iput, jput, val, & + & nlforce_zero=mat%nlforce_zero) + ELSE + s = mat%irow(iput) - mat%nnz_start + 1 + e = mat%irow(iput+1) - mat%nnz_start + k = isearch(mat%cols(s:e), jput) + IF( k.GE.0 ) THEN + mat%val(s+k) = val + ELSE + IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val + WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE mumps_mod ***' + END IF + END IF + END IF + END SUBROUTINE putele_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_zmumps_mat(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(zmumps_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + DOUBLE COMPLEX :: valput + INTEGER :: iput, jput + INTEGER :: k, s, e +! + iput = i + jput = j + valput = val + IF(mat%nlherm .OR. mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iput = j + jput = i + IF(mat%nlherm) THEN + valput = CONJG(val) + ELSE + valput = val + END IF + END IF + END IF +! +! Do nothing if outside + IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN +! + IF(ASSOCIATED(mat%mat)) THEN + CALL putele(mat%mat, iput, jput, valput, & + & nlforce_zero=mat%nlforce_zero) + ELSE + s = mat%irow(iput) - mat%nnz_start + 1 + e = mat%irow(iput+1) - mat%nnz_start + k = isearch(mat%cols(s:e), jput) + IF( k.GE.0 ) THEN + mat%val(s+k) = valput + ELSE + IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val + WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE mumps_mod ***' + END IF + END IF + END IF + END SUBROUTINE putele_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_mumps_mat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(mumps_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(out) :: val + INTEGER :: iget, jget + INTEGER :: k, s, e +! + iget = i + jget = j + IF(mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iget = j + jget = i + END IF + END IF +! + val = 0.0d0 ! Assume zero val if outside + IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN +! + IF(ASSOCIATED(mat%mat)) THEN + CALL getele(mat%mat, iget, jget, val) + ELSE + s = mat%irow(iget) - mat%nnz_start + 1 + e = mat%irow(iget+1) - mat%nnz_start + k = isearch(mat%cols(s:e), jget) + IF( k.GE.0 ) THEN + val =mat%val(s+k) + ELSE + val = 0.0d0 ! Assume zero val if not found + END IF + END IF + END SUBROUTINE getele_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_zmumps_mat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(zmumps_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(out) :: val + DOUBLE COMPLEX :: valget + INTEGER :: iget, jget + INTEGER :: k, s, e +! + iget = i + jget = j + IF(mat%nlherm .OR. mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iget = j + jget = i + END IF + END IF +! + val = (0.0d0, 0.0d0) ! Assume zero val if outside + IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN +! + IF(ASSOCIATED(mat%mat)) THEN + CALL getele(mat%mat, iget, jget, valget) + ELSE + s = mat%irow(iget) - mat%nnz_start + 1 + e = mat%irow(iget+1) - mat%nnz_start + k = isearch(mat%cols(s:e), jget) + IF( k.GE.0 ) THEN + valget =mat%val(s+k) + ELSE + valget = (0.0d0,0.0d0) ! Assume zero val if not found + END IF + END IF + val = valget + IF( i.GT.j ) THEN + IF(mat%nlherm) THEN + val = CONJG(valget) + END IF + END IF + END SUBROUTINE getele_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_mumps_mat(amat, i, arr) +! +! Put a row into sparse matrix +! + TYPE(mumps_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL putele(amat, i, j, arr(j)) + END DO + END SUBROUTINE putrow_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_zmumps_mat(amat, i, arr) +! +! Put a row into sparse matrix +! + TYPE(zmumps_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL putele(amat, i, j, arr(j)) + END DO + END SUBROUTINE putrow_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_mumps_mat(amat, i, arr) +! +! Get a row from sparse matrix +! + TYPE(mumps_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL getele(amat, i, j, arr(j)) + END DO + END SUBROUTINE getrow_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_zmumps_mat(amat, i, arr) +! +! Get a row from sparse matrix +! + TYPE(zmumps_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL getele(amat, i, j, arr(j)) + END DO + END SUBROUTINE getrow_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_mumps_mat(amat, j, arr) +! +! Put a column into sparse matrix +! + TYPE(mumps_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: i +! + DO i=amat%istart,amat%iend + CALL putele(amat, i, j, arr(i)) + END DO + END SUBROUTINE putcol_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_zmumps_mat(amat, j, arr) +! +! Put a column into sparse matrix +! + TYPE(zmumps_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER :: i +! + DO i=amat%istart,amat%iend + CALL putele(amat, i, j, arr(i)) + END DO + END SUBROUTINE putcol_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_mumps_mat(amat, j, arr) +! +! Get a column from sparse matrix +! + TYPE(mumps_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: i +! + DO i=amat%istart,amat%iend + CALL getele(amat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_zmumps_mat(amat, j, arr) +! +! Get a column from sparse matrix +! + TYPE(zmumps_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER :: i +! + DO i=amat%istart,amat%iend + CALL getele(amat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_mumps_mat(mat, nnz) +! +! Number of non-zeros in sparse matrix +! + TYPE(mumps_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) + INTEGER :: i +! + IF(ASSOCIATED(mat%mat)) THEN + get_count_mumps_mat = get_count(mat%mat, nnz) + ELSE + get_count_mumps_mat = mat%nnz + IF(PRESENT(nnz)) THEN + DO i=mat%istart,mat%iend + nnz(i) = mat%irow(i+1)-mat%irow(i) + END DO + END IF + END IF + END FUNCTION get_count_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_zmumps_mat(mat, nnz) +! +! Number of non-zeros in sparse matrix +! + TYPE(zmumps_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) + INTEGER :: i +! + IF(ASSOCIATED(mat%mat)) THEN + get_count_zmumps_mat = get_count(mat%mat, nnz) + ELSE + get_count_zmumps_mat = mat%nnz + IF(PRESENT(nnz)) THEN + DO i=mat%istart,mat%iend + nnz(i) = mat%irow(i+1)-mat%irow(i) + END DO + END IF + END IF + END FUNCTION get_count_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE to_mumps_mat(mat, nlkeep) +! +! Convert linked list spmat to mumps matrice structure +! + INCLUDE 'mpif.h' + TYPE(mumps_mat) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nlkeep + INTEGER :: i, nnz, rank, s, e + INTEGER :: comm, ierr, nnz_loc + LOGICAL :: nlclean +! + nlclean = .TRUE. + IF(PRESENT(nlkeep)) THEN + nlclean = .NOT. nlkeep + END IF +! + comm = mat%mumps_par%COMM + mat%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix +! +! Allocate the Mumps matrix structure +! CSR format: (cols, irow, val) or (JCN, irow, A) +! COO format: (IRN, JCN, A) or (IRN, cols, val) +! + rank = mat%rank + nnz_loc = get_count(mat) + mat%nnz_start = 0 + CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) + mat%nnz_start = mat%nnz_start + 1 + mat%nnz_end = mat%nnz_start + nnz_loc - 1 + mat%nnz_loc = nnz_loc + CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) +! + mat%mumps_par%N = rank + mat%mumps_par%NZ_loc = nnz_loc +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%val(nnz_loc)) + ALLOCATE(mat%cols(nnz_loc)) + ALLOCATE(mat%irow(mat%istart:mat%iend+1)) +! + IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc) + ALLOCATE(mat%mumps_par%IRN_loc(nnz_loc)) + mat%mumps_par%A_loc => mat%val + mat%mumps_par%JCN_loc => mat%cols +! +! Fill Mumps structure and deallocate the sparse rows +! + mat%irow(mat%istart) = mat%nnz_start + DO i=mat%istart,mat%iend + mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz + s = mat%irow(i) - mat%nnz_start + 1 + e = mat%irow(i+1) - mat%nnz_start + CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) + mat%mumps_par%IRN_loc(s:e) = i + IF(nlclean) CALL destroy(mat%mat%row(i)) + END DO + IF(nlclean) DEALLOCATE(mat%mat) + END SUBROUTINE to_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE to_zmumps_mat(mat, nlkeep) +! +! Convert linked list spmat to mumps matrice structure +! + INCLUDE 'mpif.h' + TYPE(zmumps_mat) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nlkeep + INTEGER :: i, nnz, rank, s, e + INTEGER :: comm, ierr, nnz_loc + LOGICAL :: nlclean +! + nlclean = .TRUE. + IF(PRESENT(nlkeep)) THEN + nlclean = .NOT. nlkeep + END IF +! + comm = mat%mumps_par%COMM + mat%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix +! +! Allocate the Mumps matrix structure +! CSR format: (cols, irow, val) or (JCN, irow, A) +! COO format: (IRN, JCN, A) or (IRN, cols, val) +! + rank = mat%rank + nnz_loc = get_count(mat) + mat%nnz_start = 0 + CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) + mat%nnz_start = mat%nnz_start + 1 + mat%nnz_end = mat%nnz_start + nnz_loc - 1 + mat%nnz_loc = nnz_loc + CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) +! + mat%mumps_par%N = rank + mat%mumps_par%NZ_loc = nnz_loc +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%val(nnz_loc)) + ALLOCATE(mat%cols(nnz_loc)) + ALLOCATE(mat%irow(mat%istart:mat%iend+1)) +! + IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc) + ALLOCATE(mat%mumps_par%IRN_loc(nnz_loc)) + mat%mumps_par%A_loc => mat%val + mat%mumps_par%JCN_loc => mat%cols +! +! Fill Mumps structure and deallocate the sparse rows +! + mat%irow(mat%istart) = mat%nnz_start + DO i=mat%istart,mat%iend + mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz + s = mat%irow(i) - mat%nnz_start + 1 + e = mat%irow(i+1) - mat%nnz_start + CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) + mat%mumps_par%IRN_loc(s:e) = i + IF(nlclean) CALL destroy(mat%mat%row(i)) + END DO + IF(nlclean) DEALLOCATE(mat%mat) + END SUBROUTINE to_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE reord_mumps_mat(mat, nlmetis, debug) +! +! Reordering and symbolic factorization +! + TYPE(mumps_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlmetis + LOGICAL, OPTIONAL, INTENT(in) :: debug +! +! Verbose messages +! + mat%mumps_par%ICNTL(3) = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%mumps_par%ICNTL(3) = 6 + END IF +! +! Ordering +! + mat%mumps_par%ICNTL(7) = 7 ! Automatic choice + IF(PRESENT(nlmetis)) THEN + IF(nlmetis) mat%mumps_par%ICNTL(7) = 5 ! use METIS nested dissection + END IF +! + mat%mumps_par%JOB = 1 + CALL dmumps(mat%mumps_par) + mat%perm => mat%mumps_par%SYM_PERM + + IF(mat%mumps_par%INFOG(1).NE.0) THEN + WRITE(*,'(a,2i12)') 'REORD: Reordering failed with error', & + & mat%mumps_par%INFOG(1:2) + STOP + END IF + END SUBROUTINE reord_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE reord_zmumps_mat(mat, nlmetis, debug) +! +! Reordering and symbolic factorization +! + TYPE(zmumps_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlmetis + LOGICAL, OPTIONAL, INTENT(in) :: debug +! +! Verbose messages +! + mat%mumps_par%ICNTL(3) = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%mumps_par%ICNTL(3) = 6 + END IF +! +! Ordering +! + mat%mumps_par%ICNTL(7) = 7 ! Automatic choice + IF(PRESENT(nlmetis)) THEN + IF(nlmetis) mat%mumps_par%ICNTL(7) = 5 ! use METIS nested dissection + END IF +! + mat%mumps_par%JOB = 1 + CALL zmumps(mat%mumps_par) + + IF(mat%mumps_par%INFOG(1).NE.0) THEN + WRITE(*,'(a,2i12)') 'REORD: Reordering failed with error', & + & mat%mumps_par%INFOG(1:2) + STOP + END IF + END SUBROUTINE reord_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE numfact_mumps_mat(mat, debug) +! +! Numerical factorization +! + TYPE(mumps_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: debug +! +! Verbose messages +! + mat%mumps_par%ICNTL(3) = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%mumps_par%ICNTL(3) = 6 + END IF +! + mat%mumps_par%JOB = 2 + CALL dmumps(mat%mumps_par) + + IF(mat%mumps_par%INFOG(1).NE.0) THEN + WRITE(*,'(a,2i12)') 'FACTOR: Reordering failed with error', & + & mat%mumps_par%INFOG(1:2) + STOP + END IF +! + END SUBROUTINE numfact_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE numfact_zmumps_mat(mat, debug) +! +! Numerical factorization +! + TYPE(zmumps_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: debug +! +! Verbose messages +! + mat%mumps_par%ICNTL(3) = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%mumps_par%ICNTL(3) = 6 + END IF +! + mat%mumps_par%JOB = 2 + CALL zmumps(mat%mumps_par) + + IF(mat%mumps_par%INFOG(1).NE.0) THEN + WRITE(*,'(a,2i12)') 'FACTOR: Reordering failed with error', & + & mat%mumps_par%INFOG(1:2) + STOP + END IF +! + END SUBROUTINE numfact_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE factor_mumps_mat(mat, nlreord, nlmetis, debug) +! +! Factor (create +reorder + factor) a mumps_mat matrix +! + TYPE(mumps_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlreord + LOGICAL, OPTIONAL, INTENT(in) :: nlmetis + LOGICAL, OPTIONAL, INTENT(in) :: debug + LOGICAL :: mlreord +!---------------------------------------------------------------------- +! 1.0 Creation from the sparse rows +! + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF +!---------------------------------------------------------------------- +! 2.0 Reordering and symbolic factorization phase +! + mlreord = .TRUE. + IF(PRESENT(nlreord)) mlreord = nlreord + IF(mlreord) THEN + CALL reord_mat(mat, nlmetis, debug) + END IF +!---------------------------------------------------------------------- +! 3.0 Numerical factorization +! + CALL numfact(mat, debug) + END SUBROUTINE factor_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE factor_zmumps_mat(mat, nlreord, nlmetis, debug) +! +! Factor (create +reorder + factor) a mumps_mat matrix +! + TYPE(zmumps_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlreord + LOGICAL, OPTIONAL, INTENT(in) :: nlmetis + LOGICAL, OPTIONAL, INTENT(in) :: debug + LOGICAL :: mlreord +!---------------------------------------------------------------------- +! 1.0 Creation from the sparse rows +! + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF +!---------------------------------------------------------------------- +! 2.0 Reordering and symbolic factorization phase +! + mlreord = .TRUE. + IF(PRESENT(nlreord)) mlreord = nlreord + IF(mlreord) THEN + CALL reord_mat(mat, nlmetis, debug) + END IF +!---------------------------------------------------------------------- +! 3.0 Numerical factorization +! + CALL numfact(mat, debug) + END SUBROUTINE factor_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_mumps_mat1(mat, rhs, sol, nref, debug) +! +! Backsolve, using Mumps +! + INCLUDE 'mpif.h' + TYPE(mumps_mat) :: mat + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + DOUBLE PRECISION, OPTIONAL, INTENT(inout) :: sol(:) + INTEGER, OPTIONAL, INTENT(in) :: nref + LOGICAL, OPTIONAL, INTENT(in) :: debug +! + INTEGER :: nrank, ierr +! + nrank = SIZE(rhs,1) +! +! Verbose messages +! + mat%mumps_par%ICNTL(3) = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%mumps_par%ICNTL(3) = 6 + END IF +! + IF(mat%mumps_par%MYID .EQ. 0) THEN + mat%mumps_par%NRHS = 1 + mat%mumps_par%LRHS = nrank + mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref +! + ALLOCATE(mat%mumps_par%RHS(nrank)) + mat%mumps_par%RHS = rhs + END IF +! + mat%mumps_par%JOB = 3 + CALL dmumps(mat%mumps_par) +! +! The solution will be broadcasted to everyone +! + IF(PRESENT(sol)) THEN + IF(mat%mumps_par%MYID .EQ. 0) sol=mat%mumps_par%RHS + CALL mpi_bcast(sol, nrank, MPI_DOUBLE_PRECISION, & + & 0, mat%mumps_par%COMM, ierr) + ELSE + IF(mat%mumps_par%MYID .EQ. 0) rhs=mat%mumps_par%RHS + CALL mpi_bcast(rhs, nrank, MPI_DOUBLE_PRECISION, & + & 0, mat%mumps_par%COMM, ierr) + END IF +! + IF(mat%mumps_par%MYID .EQ. 0) THEN + DEALLOCATE(mat%mumps_par%RHS) + IF(mat%mumps_par%INFOG(1).NE.0) THEN + WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', & + & mat%mumps_par%INFOG(1:2) + STOP + END IF + END IF + END SUBROUTINE bsolve_mumps_mat1 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_zmumps_mat1(mat, rhs, sol, nref, debug) +! +! Backsolve, using Mumps +! + INCLUDE 'mpif.h' + TYPE(zmumps_mat) :: mat + DOUBLE COMPLEX, INTENT(inout) :: rhs(:) + DOUBLE COMPLEX, OPTIONAL, INTENT(inout) :: sol(:) + INTEGER, OPTIONAL, INTENT(in) :: nref + LOGICAL, OPTIONAL, INTENT(in) :: debug +! + INTEGER :: nrank, ierr +! + nrank = SIZE(rhs,1) +! +! Verbose messages +! + mat%mumps_par%ICNTL(3) = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%mumps_par%ICNTL(3) = 6 + END IF +! + IF(mat%mumps_par%MYID .EQ. 0) THEN + mat%mumps_par%NRHS = 1 + mat%mumps_par%LRHS = nrank + mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref +! + ALLOCATE(mat%mumps_par%RHS(nrank)) + mat%mumps_par%RHS = rhs + END IF +! + mat%mumps_par%JOB = 3 + CALL zmumps(mat%mumps_par) +! +! The solution will be broadcasted to everyone +! + IF(PRESENT(sol)) THEN + IF(mat%mumps_par%MYID .EQ. 0) sol=mat%mumps_par%RHS + CALL mpi_bcast(sol, SIZE(rhs), MPI_DOUBLE_COMPLEX, & + & 0, mat%mumps_par%COMM, ierr) + ELSE + IF(mat%mumps_par%MYID .EQ. 0) rhs=mat%mumps_par%RHS + CALL mpi_bcast(rhs, SIZE(rhs), MPI_DOUBLE_COMPLEX, & + & 0, mat%mumps_par%COMM, ierr) + END IF +! + IF(mat%mumps_par%MYID .EQ. 0) THEN + DEALLOCATE(mat%mumps_par%RHS) + IF(mat%mumps_par%INFOG(1).NE.0) THEN + WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', & + & mat%mumps_par%INFOG(1:2) + STOP + END IF + END IF + END SUBROUTINE bsolve_zmumps_mat1 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_mumps_matn(mat, rhs, sol, nref, debug) +! +! Backsolve, using Mumps +! + INCLUDE 'mpif.h' + TYPE(mumps_mat) :: mat + DOUBLE PRECISION, INTENT(inout) :: rhs(:,:) + DOUBLE PRECISION, OPTIONAL, INTENT(inout) :: sol(:,:) + INTEGER, OPTIONAL, INTENT(in) :: nref + LOGICAL, OPTIONAL, INTENT(in) :: debug +! + INTEGER :: nrank, nrhs, ierr +! + nrank = SIZE(rhs,1) + nrhs = SIZE(rhs,2) +! +! Verbose messages +! + mat%mumps_par%ICNTL(3) = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%mumps_par%ICNTL(3) = 6 + END IF +! + IF(mat%mumps_par%MYID .EQ. 0) THEN + mat%mumps_par%NRHS = nrhs + mat%mumps_par%LRHS = nrank + mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref +! + ALLOCATE(mat%mumps_par%RHS(nrhs*nrank)) + mat%mumps_par%RHS = RESHAPE(rhs, SHAPE(mat%mumps_par%RHS)) + END IF +! + mat%mumps_par%JOB = 3 + CALL dmumps(mat%mumps_par) +! +! The solution will be broadcasted to everyone +! + IF(PRESENT(sol)) THEN + IF(mat%mumps_par%MYID .EQ. 0) sol=RESHAPE(mat%mumps_par%RHS, SHAPE(sol)) + CALL mpi_bcast(sol, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_PRECISION, & + & 0, mat%mumps_par%COMM, ierr) + ELSE + IF(mat%mumps_par%MYID .EQ. 0) rhs=RESHAPE(mat%mumps_par%RHS, SHAPE(rhs)) + CALL mpi_bcast(rhs, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_PRECISION, & + & 0, mat%mumps_par%COMM, ierr) + END IF +! + IF(mat%mumps_par%MYID .EQ. 0) THEN + DEALLOCATE(mat%mumps_par%RHS) + IF(mat%mumps_par%INFOG(1).NE.0) THEN + WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', & + & mat%mumps_par%INFOG(1:2) + STOP + END IF + END IF + END SUBROUTINE bsolve_mumps_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_zmumps_matn(mat, rhs, sol, nref, debug) +! +! Backsolve, using Mumps +! + INCLUDE 'mpif.h' + TYPE(zmumps_mat) :: mat + DOUBLE COMPLEX, INTENT(inout) :: rhs(:,:) + DOUBLE COMPLEX, OPTIONAL, INTENT(inout) :: sol(:,:) + INTEGER, OPTIONAL, INTENT(in) :: nref + LOGICAL, OPTIONAL, INTENT(in) :: debug +! + INTEGER :: nrank, nrhs, ierr +! + nrank = SIZE(rhs,1) + nrhs = SIZE(rhs,2) +! +! Verbose messages +! + mat%mumps_par%ICNTL(3) = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%mumps_par%ICNTL(3) = 6 + END IF +! + IF(mat%mumps_par%MYID .EQ. 0) THEN + mat%mumps_par%NRHS = nrhs + mat%mumps_par%LRHS = nrank + mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref +! + ALLOCATE(mat%mumps_par%RHS(nrhs*nrank)) + mat%mumps_par%RHS = RESHAPE(rhs, SHAPE(mat%mumps_par%RHS)) + END IF +! + mat%mumps_par%JOB = 3 + CALL zmumps(mat%mumps_par) +! +! The solution will be broadcasted to everyone +! + IF(PRESENT(sol)) THEN + IF(mat%mumps_par%MYID .EQ. 0) sol=RESHAPE(mat%mumps_par%RHS, SHAPE(sol)) + CALL mpi_bcast(sol, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_COMPLEX, & + & 0, mat%mumps_par%COMM, ierr) + ELSE + IF(mat%mumps_par%MYID .EQ. 0) rhs=RESHAPE(mat%mumps_par%RHS, SHAPE(rhs)) + CALL mpi_bcast(rhs, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_COMPLEX, & + & 0, mat%mumps_par%COMM, ierr) + END IF +! + IF(mat%mumps_par%MYID .EQ. 0) THEN + DEALLOCATE(mat%mumps_par%RHS) + IF(mat%mumps_par%INFOG(1).NE.0) THEN + WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', & + & mat%mumps_par%INFOG(1:2) + STOP + END IF + END IF + END SUBROUTINE bsolve_zmumps_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_mumps_mat(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(mumps_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:) + DOUBLE PRECISION :: yarr(SIZE(xarr)) + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + CHARACTER(len=6) :: matdescra + INTEGER :: n, i, j +! + n = mat%rank +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & beta, yarr) +#else + yarr = 0.0d0 + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + mat%val(j)*xarr(i) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_zmumps_mat(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(zmumps_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: xarr(:) + DOUBLE COMPLEX :: yarr(SIZE(xarr)) + DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) + INTEGER :: n, i, j + CHARACTER(len=6) :: matdescra +! + n = mat%rank +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE IF(mat%nlherm) THEN + matdescra = 'hun' + ELSE + matdescra = 'g' + END IF + CALL mkl_zcsrmv('N', n, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & beta, yarr) +#else + yarr = (0.0d0,0.0d0) + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + mat%val(j)*xarr(i) + END DO + ELSE IF(mat%nlherm) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + CONJG(mat%val(j))*xarr(i) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_mumps_matn(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(mumps_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:,:) + DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + INTEGER :: n, nrhs, i, j + CHARACTER(len=6) :: matdescra +! + n = mat%rank + nrhs = SIZE(xarr,2) +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,& + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & n, beta, yarr, n) +#else + yarr = 0.0d0 + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + mat%val(j)*xarr(i,:) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_mumps_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_zmumps_matn(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(zmumps_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: xarr(:,:) + DOUBLE COMPLEX :: yarr(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) + INTEGER :: n, nrhs, i, j + CHARACTER(len=6) :: matdescra +! + n = mat%rank + nrhs = SIZE(xarr,2) +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE IF(mat%nlherm) THEN + matdescra = 'hun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_zcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, n, & + & beta, yarr, n) +#else + yarr = (0.0d0,0.0d0) + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + mat%val(j)*xarr(i,:) + END DO + ELSE IF(mat%nlherm) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + CONJG(mat%val(j))*xarr(i,:) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_zmumps_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_mumps_mat(mat) +! +! Deallocate the sparse matrix mat +! + TYPE(mumps_mat) :: mat +! + IF(ASSOCIATED(mat%mat)) THEN + CALL destroy(mat%mat) + DEALLOCATE(mat%mat) + END IF +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) +! + IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc) + mat%mumps_par%JOB = -2 + CALL dmumps(mat%mumps_par) + IF(mat%mumps_par%INFOG(1).NE.0) THEN + WRITE(*,'(a,2i12)') 'DESTROY: Reordering failed with error', & + & mat%mumps_par%INFOG(1:2) + STOP + END IF + END SUBROUTINE destroy_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_zmumps_mat(mat) +! +! Deallocate the sparse matrix mat +! + TYPE(zmumps_mat) :: mat +! + IF(ASSOCIATED(mat%mat)) THEN + CALL destroy(mat%mat) + DEALLOCATE(mat%mat) + END IF +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) +! + IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc) + mat%mumps_par%JOB = -2 + CALL zmumps(mat%mumps_par) + IF(mat%mumps_par%INFOG(1).NE.0) THEN + WRITE(*,'(a,2i12)') 'DESTROY: Reordering failed with error', & + & mat%mumps_par%INFOG(1:2) + STOP + END IF + END SUBROUTINE destroy_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE put_mumps_mat(fid, label, mat, str) +! +! Write matrix to hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(mumps_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str + CHARACTER(len=128) :: mumps_grp +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'NNZ', mat%nnz) + CALL attach(fid, label, 'NLSYM', mat%nlsym) + CALL attach(fid, label, 'NLPOS', mat%nlpos) + CALL putarr(fid, TRIM(label)//'/irow', mat%irow) + CALL putarr(fid, TRIM(label)//'/cols', mat%cols) + CALL putarr(fid, TRIM(label)//'/perm', mat%perm) + CALL putarr(fid, TRIM(label)//'/val', mat%val) +! + mumps_grp = TRIM(label)//'/mumps_par' + CALL creatg(fid, mumps_grp) + CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR) + CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM) + CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN) +! + END SUBROUTINE put_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE put_zmumps_mat(fid, label, mat, str) +! +! Write matrix to hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(zmumps_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str + CHARACTER(len=128) :: mumps_grp +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'NNZ', mat%nnz) + CALL attach(fid, label, 'NLSYM', mat%nlsym) + CALL attach(fid, label, 'NLPOS', mat%nlpos) + CALL putarr(fid, TRIM(label)//'/irow', mat%irow) + CALL putarr(fid, TRIM(label)//'/cols', mat%cols) + CALL putarr(fid, TRIM(label)//'/val', mat%val) +! + mumps_grp = TRIM(label)//'/mumps_par' + CALL creatg(fid, mumps_grp) + CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR) + CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM) + CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN) +! + END SUBROUTINE put_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE get_mumps_mat(fid, label, mat) +! +! Read matrix from hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(mumps_mat) :: mat + CHARACTER(len=128) :: mumps_grp +! + CALL getatt(fid, label, 'RANK', mat%rank) + CALL getatt(fid, label, 'NNZ', mat%nnz) + CALL getatt(fid, label, 'NLSYM', mat%nlsym) + CALL getarr(fid, TRIM(label)//'/irow', mat%irow) + CALL getarr(fid, TRIM(label)//'/cols', mat%cols) + CALL getarr(fid, TRIM(label)//'/perm', mat%perm) + CALL getarr(fid, TRIM(label)//'/val', mat%val) +! + mumps_grp = TRIM(label)//'/mumps_par' + CALL getatt(fid, mumps_grp, 'PAR', mat%mumps_par%PAR) + CALL getatt(fid, mumps_grp, 'SYM', mat%mumps_par%SYM) + CALL getarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN) +! + END SUBROUTINE get_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE get_zmumps_mat(fid, label, mat) +! +! Read matrix from hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(zmumps_mat) :: mat + CHARACTER(len=128) :: mumps_grp +! + CALL getatt(fid, label, 'RANK', mat%rank) + CALL getatt(fid, label, 'NNZ', mat%nnz) + CALL getatt(fid, label, 'NLSYM', mat%nlsym) + CALL getarr(fid, TRIM(label)//'/irow', mat%irow) + CALL getarr(fid, TRIM(label)//'/cols', mat%cols) + CALL getarr(fid, TRIM(label)//'/perm', mat%perm) + CALL getarr(fid, TRIM(label)//'/val', mat%val) +! + mumps_grp = TRIM(label)//'/mumps_par' + CALL getatt(fid, mumps_grp, 'PAR', mat%mumps_par%PAR) + CALL getatt(fid, mumps_grp, 'SYM', mat%mumps_par%SYM) + CALL getarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN) +! + END SUBROUTINE get_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE mcopy_mumps_mat(mata, matb) +! +! Matrix copy: B = A (assume that B is already initialize) +! + TYPE(mumps_mat) :: mata, matb + INTEGER :: n, nnz, nnz_loc +! + IF(ASSOCIATED(matb%mat)) THEN ! Sparse linled list not needed + CALL destroy(matb%mat) + DEALLOCATE(matb%mat) + END IF +! + n = mata%rank + nnz = mata%nnz + nnz_loc = mata%nnz_loc + matb%nnz = nnz + matb%nnz_loc = nnz_loc + matb%nnz_start = mata%nnz_start + matb%nnz_end = mata%nnz_end + matb%istart = mata%istart + matb%iend = mata%iend +! + matb%mumps_par%NZ_loc = mata%mumps_par%NZ_loc +! + IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) + IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) + ALLOCATE(matb%val(nnz_loc)); matb%val = mata%val + ALLOCATE(matb%cols(nnz_loc)); matb%cols = mata%cols + ALLOCATE(matb%irow(matb%istart:matb%iend+1)); matb%irow = mata%irow +! + ALLOCATE(matb%mumps_par%IRN_loc(nnz_loc)) + matb%mumps_par%IRN_loc = mata%mumps_par%IRN_loc + matb%mumps_par%A_loc => matb%val + matb%mumps_par%JCN_loc => matb%cols + END SUBROUTINE mcopy_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE mcopy_zmumps_mat(mata, matb) +! +! Matrix copy: B = A (assume that B is already initialize) +! + TYPE(zmumps_mat) :: mata, matb + INTEGER :: n, nnz, nnz_loc +! + IF(ASSOCIATED(matb%mat)) THEN ! Sparse linled list not needed + CALL destroy(matb%mat) + DEALLOCATE(matb%mat) + END IF +! + n = mata%rank + nnz = mata%nnz + nnz_loc = mata%nnz_loc + matb%nnz = nnz + matb%nnz_loc = nnz_loc + matb%nnz_start = mata%nnz_start + matb%nnz_end = mata%nnz_end + matb%istart = mata%istart + matb%iend = mata%iend +! + matb%mumps_par%NZ_loc = mata%mumps_par%NZ_loc +! + IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) + IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) + ALLOCATE(matb%val(nnz_loc)); matb%val = mata%val + ALLOCATE(matb%cols(nnz_loc)); matb%cols = mata%cols + ALLOCATE(matb%irow(matb%istart:matb%iend+1)); matb%irow = mata%irow +! + ALLOCATE(matb%mumps_par%IRN_loc(nnz_loc)) + matb%mumps_par%IRN_loc = mata%mumps_par%IRN_loc + matb%mumps_par%A_loc => matb%val + matb%mumps_par%JCN_loc => matb%cols + END SUBROUTINE mcopy_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE maddto_mumps_mat(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(mumps_mat) :: mata, matb + DOUBLE PRECISION :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE maddto_zmumps_mat(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(zmumps_mat) :: mata, matb + DOUBLE COMPLEX :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE psum_mumps_mat(mat, comm) +! +! Parallel sum of sparse matrices +! + INCLUDE "mpif.h" +! + TYPE(mumps_mat) :: mat + INCLUDE 'psum_mat.tpl' + END SUBROUTINE psum_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE psum_zmumps_mat(mat, comm) +! +! Parallel sum of sparse matrices +! + INCLUDE "mpif.h" +! + TYPE(zmumps_mat) :: mat + INCLUDE 'psum_mat.tpl' + END SUBROUTINE psum_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE p2p_mumps_mat(mat, dest, extyp, op, comm) +! +! Point-to-point combine sparse matrix between 2 processes +! + INCLUDE "mpif.h" +! + TYPE(mumps_mat) :: mat + DOUBLE PRECISION, ALLOCATABLE :: val(:) + INTEGER :: mpi_type=MPI_DOUBLE_PRECISION +! + INCLUDE "p2p_mat.tpl" + END SUBROUTINE p2p_mumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE p2p_zmumps_mat(mat, dest, extyp, op, comm) +! +! Point-to-point combine sparse matrix between 2 processes +! + INCLUDE "mpif.h" +! + TYPE(zmumps_mat) :: mat + DOUBLE COMPLEX, ALLOCATABLE :: val(:) + INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX +! + INCLUDE "p2p_mat.tpl" + END SUBROUTINE p2p_zmumps_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE mumps_bsplines diff --git a/src/p2p_mat.tpl b/src/p2p_mat.tpl new file mode 100644 index 0000000..c39e729 --- /dev/null +++ b/src/p2p_mat.tpl @@ -0,0 +1,119 @@ +!> +!> @file p2p_mat.tpl +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> + INTEGER, INTENT(in) :: dest + CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv') + CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt') + INTEGER, INTENT(in) :: comm +! + INTEGER :: ierr + INTEGER :: nrank, nnz, nnz_rem + INTEGER :: i, s, idx, bufsize, position + CHARACTER(len=1), ALLOCATABLE :: sbuf(:), rbuf(:) + INTEGER, ALLOCATABLE :: irow(:), cols(:) +!-------------------------------------------------------------------------- +! 1.0 Prologue +! + nrank = mat%rank + nnz = get_count(mat) + CALL mpi_sendrecv(nnz, 1, MPI_INTEGER, dest, 0, & + & nnz_rem, 1, MPI_INTEGER, dest, 0, & + & comm, MPI_STATUS_IGNORE, ierr) +!-------------------------------------------------------------------------- +! 2.0 Send or sendrecv +! + IF(extyp.EQ.'send' .OR. extyp.EQ.'sendrecv') THEN +! +! Allocate packed send buffer + bufsize = 0 + CALL mpi_pack_size(nrank+1, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s + CALL mpi_pack_size(nnz, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s + CALL mpi_pack_size(nnz, mpi_type, comm, s, ierr); bufsize=bufsize+s + ALLOCATE(sbuf(bufsize)) +! +! Obtain matrix CSR arrays and pack + CALL to_mat(mat, nlkeep=.TRUE.) + position = 0 + CALL mpi_pack(mat%irow, nrank+1, MPI_INTEGER, sbuf, bufsize, position, comm, ierr) + CALL mpi_pack(mat%cols, nnz, MPI_INTEGER, sbuf, bufsize, position, comm, ierr) + CALL mpi_pack(mat%val, nnz, mpi_type, sbuf, bufsize, position, comm, ierr) + DEALLOCATE(mat%irow) + DEALLOCATE(mat%cols) + DEALLOCATE(mat%val) +! +! Communicate packed buffer + IF(extyp.EQ.'send') THEN + CALL mpi_send(sbuf, position, MPI_PACKED, dest, 0, comm, ierr) + DEALLOCATE(sbuf) + END IF + END IF +!-------------------------------------------------------------------------- +! 3.0 Sendrecv or recv +! + IF(extyp.EQ.'recv' .OR. extyp.EQ.'sendrecv') THEN +! +! Allocate unpacked received buffer + bufsize = 0 + CALL mpi_pack_size(nrank+1, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s + CALL mpi_pack_size(nnz_rem, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s + CALL mpi_pack_size(nnz_rem, mpi_type, comm, s, ierr); bufsize=bufsize+s + ALLOCATE(rbuf(bufsize)) +! +! Communicate packed buffer + IF(extyp.EQ.'recv') THEN + CALL mpi_recv(rbuf, bufsize, MPI_PACKED, dest, 0, comm, MPI_STATUS_IGNORE, ierr) + ELSE IF(extyp.EQ.'sendrecv') THEN + CALL mpi_sendrecv(sbuf, position, MPI_PACKED, dest, 0, & + & rbuf, bufsize, MPI_PACKED, dest, 0, & + & comm, MPI_STATUS_IGNORE, ierr) + DEALLOCATE(sbuf) + END IF +! +! Unpacked rbuf + ALLOCATE(irow(nrank+1)) + ALLOCATE(cols(nnz_rem)) + ALLOCATE(val(nnz_rem)) + position = 0 + CALL mpi_unpack(rbuf, bufsize, position, irow, nrank+1, MPI_INTEGER, comm, ierr) + CALL mpi_unpack(rbuf, bufsize, position, cols, nnz_rem, MPI_INTEGER, comm, ierr) + CALL mpi_unpack(rbuf, bufsize, position, val, nnz_rem, mpi_type, comm, ierr) + DEALLOCATE(rbuf) +! +! Update/replace sparse matrix + DO i=1,nrank + DO idx=irow(i),irow(i+1)-1 + IF(op.EQ.'updt') THEN + CALL updtmat(mat, i, cols(idx), val(idx)) + ELSE IF(op.EQ.'put') THEN + CALL putele(mat, i, cols(idx), val(idx)) + END IF + END DO + END DO + DEALLOCATE(irow) + DEALLOCATE(cols) + DEALLOCATE(val) +! + END IF +!-------------------------------------------------------------------------- diff --git a/src/pardiso_mod.f90 b/src/pardiso_mod.f90 new file mode 100644 index 0000000..4491b9c --- /dev/null +++ b/src/pardiso_mod.f90 @@ -0,0 +1,1605 @@ +!> +!> @file pardiso_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE pardiso_bsplines +! +! PARDISO_BSPLINES: Simple interface to the sparse direct solver PARDISO +! (MKL version). +! +! T.M. Tran, CRPP-EPFL +! November 2010 +! + USE sparse + IMPLICIT NONE +! + TYPE pardiso_param + INTEGER :: error, mtype, msglvl, phase, maxfct, mnum, nrhs + INTEGER :: iparm(64) + INTEGER*8 :: pt(64) + END TYPE pardiso_param +! + TYPE pardiso_mat + INTEGER :: rank, nnz + INTEGER :: nterms, kmat + LOGICAL :: nlsym + LOGICAL :: nlpos + LOGICAL :: nlforce_zero + TYPE(spmat), POINTER :: mat => NULL() + INTEGER, POINTER :: cols(:) => NULL() + INTEGER, POINTER :: irow(:) => NULL() + INTEGER, POINTER :: perm(:) => NULL() + DOUBLE PRECISION, POINTER :: val(:) => NULL() + TYPE(pardiso_param) :: p + END TYPE pardiso_mat +! + TYPE zpardiso_mat + INTEGER :: rank, nnz + INTEGER :: nterms, kmat + LOGICAL :: nlsym + LOGICAL :: nlherm + LOGICAL :: nlpos + LOGICAL :: nlforce_zero + TYPE(zspmat), POINTER :: mat => NULL() + INTEGER, POINTER :: cols(:) => NULL() + INTEGER, POINTER :: irow(:) => NULL() + INTEGER, POINTER :: perm(:) => NULL() + DOUBLE COMPLEX, POINTER :: val(:) => NULL() + TYPE(pardiso_param) :: p + END TYPE zpardiso_mat +! + INTERFACE init + MODULE PROCEDURE init_pardiso_mat, init_zpardiso_mat + END INTERFACE init +! + INTERFACE clear_mat + MODULE PROCEDURE clear_pardiso_mat, clear_zpardiso_mat + END INTERFACE clear_mat +! + INTERFACE updtmat + MODULE PROCEDURE updt_pardiso_mat, updt_zpardiso_mat + END INTERFACE updtmat +! + INTERFACE putele + MODULE PROCEDURE putele_pardiso_mat, putele_zpardiso_mat + END INTERFACE putele +! + INTERFACE getele + MODULE PROCEDURE getele_pardiso_mat, getele_zpardiso_mat + END INTERFACE getele +! + INTERFACE putrow + MODULE PROCEDURE putrow_pardiso_mat, putrow_zpardiso_mat + END INTERFACE putrow +! + INTERFACE getrow + MODULE PROCEDURE getrow_pardiso_mat, getrow_zpardiso_mat + END INTERFACE getrow +! + INTERFACE putcol + MODULE PROCEDURE putcol_pardiso_mat, putcol_zpardiso_mat + END INTERFACE putcol +! + INTERFACE getcol + MODULE PROCEDURE getcol_pardiso_mat, getcol_zpardiso_mat + END INTERFACE getcol +! + INTERFACE get_count + MODULE PROCEDURE get_count_pardiso_mat, get_count_zpardiso_mat + END INTERFACE get_count +! + INTERFACE to_mat + MODULE PROCEDURE to_pardiso_mat, to_zpardiso_mat + END INTERFACE to_mat +! + INTERFACE reord_mat + MODULE PROCEDURE reord_pardiso_mat, reord_zpardiso_mat + END INTERFACE reord_mat +! + INTERFACE numfact + MODULE PROCEDURE numfact_pardiso_mat, numfact_zpardiso_mat + END INTERFACE numfact +! + INTERFACE factor + MODULE PROCEDURE factor_pardiso_mat, factor_zpardiso_mat + END INTERFACE factor +! + INTERFACE bsolve + MODULE PROCEDURE bsolve_pardiso_mat1, bsolve_pardiso_matn, & + & bsolve_zpardiso_mat1, bsolve_zpardiso_matn + END INTERFACE bsolve +! + INTERFACE vmx + MODULE PROCEDURE vmx_pardiso_mat, vmx_pardiso_matn, & + & vmx_zpardiso_mat, vmx_zpardiso_matn + END INTERFACE vmx +! + INTERFACE destroy + MODULE PROCEDURE destroy_pardiso_mat, destroy_zpardiso_mat + END INTERFACE destroy +! + INTERFACE putmat + MODULE PROCEDURE put_pardiso_mat, put_zpardiso_mat + END INTERFACE putmat +! + INTERFACE getmat + MODULE PROCEDURE get_pardiso_mat, get_zpardiso_mat + END INTERFACE getmat +! + INTERFACE mcopy + MODULE PROCEDURE mcopy_pardiso_mat, mcopy_zpardiso_mat + END INTERFACE mcopy +! + INTERFACE maddto + MODULE PROCEDURE maddto_pardiso_mat, maddto_zpardiso_mat + END INTERFACE maddto +! + INTERFACE psum_mat + MODULE PROCEDURE psum_pardiso_mat, psum_zpardiso_mat + END INTERFACE psum_mat +! + INTERFACE p2p_mat + MODULE PROCEDURE p2p_pardiso_mat, p2p_zpardiso_mat + END INTERFACE p2p_mat +! +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_pardiso_mat(n, nterms, mat, kmat, nlsym, nlpos, & + & nlforce_zero) +! +! Initialize an empty sparse pardiso matrix +! + INTEGER, INTENT(in) :: n, nterms + TYPE(pardiso_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlpos + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero +! + mat%rank = n + mat%nterms = nterms + mat%nnz = 0 + mat%nlsym = .FALSE. + mat%nlpos = .TRUE. + mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val + IF(PRESENT(kmat)) mat%kmat = kmat + IF(PRESENT(nlsym)) mat%nlsym = nlsym + IF(PRESENT(nlpos)) mat%nlpos = nlpos + IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero +! + IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) + ALLOCATE(mat%mat) + CALL init(n, mat%mat) +! + mat%p%iparm = 0 + CALL setup_pardiso(mat%p%iparm) + mat%p%maxfct = 1 ! Max number of factorizations + mat%p%mnum = 1 ! Actual matrix, shoild be 1<= num <= maxfct + mat%p%error = 0 ! initialize error flag + mat%p%msglvl = 1 ! print statistical information (0: no stat) + IF(mat%nlsym) THEN + IF(mat%nlpos) THEN + mat%p%mtype = 2 ! symmetric, positive definite + ELSE + mat%p%mtype = -2 ! symmetric, indefinite + END IF + ELSE + mat%p%mtype = 11 ! unsymmetric + END IF + mat%p%nrhs = 1 ! number of RHSs + mat%p%pt(1:64) = 0 ! Initialize Pardiso address pointer (handle) +! + CONTAINS + SUBROUTINE setup_pardiso(iparm) + INTEGER :: iparm(:) + iparm(1) = 1 ! no solver default +!!$ iparm(2) = 2 ! fill-in reordering from METIS + iparm(2) = 0 ! Minimum degree fill-in reordering + iparm(3) = 1 ! numbers of processors + iparm(4) = 0 ! no iterative-direct algorithm + iparm(5) = 0 ! no user fill-in reducing permutation + iparm(6) = 0 ! =0 solution on the first n compoments of x + iparm(7) = 0 ! not in use + iparm(8) = 9 ! numbers of iterative refinement steps + iparm(9) = 0 ! not in use + iparm(10) = 13 ! perturbe the pivot elements with 1E-13 + iparm(11) = 1 ! use nonsymmetric permutation and scaling MPS + iparm(12) = 0 ! not in use + iparm(13) = 0 ! not in use + iparm(14) = 0 ! Output: number of perturbed pivots + iparm(15) = 0 ! not in use + iparm(16) = 0 ! not in use + iparm(17) = 0 ! not in use + iparm(18) = -1 ! Output: number of nonzeros in the factor LU + iparm(19) = -1 ! Output: Mflops for LU factorization + iparm(20) = 0 ! Output: Numbers of CG Iterations + END SUBROUTINE setup_pardiso + END SUBROUTINE init_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_zpardiso_mat(n, nterms, mat, kmat, nlsym, nlherm, & + & nlpos, nlforce_zero) +! +! Initialize an empty sparse pardiso matrix +! + INTEGER, INTENT(in) :: n, nterms + TYPE(zpardiso_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlherm + LOGICAL, OPTIONAL, INTENT(in) :: nlpos + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero +! + mat%rank = n + mat%nterms = nterms + mat%nnz = 0 + mat%nlsym = .FALSE. + mat%nlherm = .FALSE. + mat%nlpos = .TRUE. + mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val + IF(PRESENT(kmat)) mat%kmat = kmat + IF(PRESENT(nlsym)) mat%nlsym = nlsym + IF(PRESENT(nlherm)) mat%nlherm = nlherm + IF(PRESENT(nlpos)) mat%nlpos = nlpos + IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero +! + IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) + ALLOCATE(mat%mat) + CALL init(n, mat%mat) +! + mat%p%iparm = 0 + CALL setup_pardiso(mat%p%iparm) + mat%p%maxfct = 1 ! Max number of factorizations + mat%p%mnum = 1 ! Actual matrix, shoild be 1<= num <= maxfct + mat%p%error = 0 ! initialize error flag + mat%p%msglvl = 1 ! print statistical information (0: no stat) + IF(mat%nlherm) THEN + IF(mat%nlpos) THEN + mat%p%mtype = 4 ! hermitian, positive definite + ELSE + mat%p%mtype = -4 ! hermitian, indefinite + END IF + ELSE IF(mat%nlsym) THEN + mat%p%mtype = 6 ! symmetric + ELSE + mat%p%mtype = 13 ! unsymmetric + END IF + mat%p%nrhs = 1 ! number of RHSs + mat%p%pt(1:64) = 0 ! Initialize Pardiso address pointer (handle) +! + CONTAINS + SUBROUTINE setup_pardiso(iparm) + INTEGER :: iparm(:) + iparm(1) = 1 ! no solver default +!!$ iparm(2) = 2 ! fill-in reordering from METIS + iparm(2) = 0 ! Minimum degree fill-in reordering + iparm(3) = 1 ! numbers of processors + iparm(4) = 0 ! no iterative-direct algorithm + iparm(5) = 0 ! no user fill-in reducing permutation + iparm(6) = 0 ! =0 solution on the first n compoments of x + iparm(7) = 0 ! not in use + iparm(8) = 9 ! numbers of iterative refinement steps + iparm(9) = 0 ! not in use + iparm(10) = 13 ! perturbe the pivot elements with 1E-13 + iparm(11) = 1 ! use nonsymmetric permutation and scaling MPS + iparm(12) = 0 ! not in use + iparm(13) = 0 ! not in use + iparm(14) = 0 ! Output: number of perturbed pivots + iparm(15) = 0 ! not in use + iparm(16) = 0 ! not in use + iparm(17) = 0 ! not in use + iparm(18) = -1 ! Output: number of nonzeros in the factor LU + iparm(19) = -1 ! Output: Mflops for LU factorization + iparm(20) = 0 ! Output: Numbers of CG Iterations + END SUBROUTINE setup_pardiso + END SUBROUTINE init_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE clear_pardiso_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(pardiso_mat) :: mat +! + mat%val = 0.0d0 + mat%perm = 0 + END SUBROUTINE clear_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE clear_zpardiso_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(zpardiso_mat) :: mat +! + mat%val = (0.0d0, 0.0d0) + mat%perm = 0 + END SUBROUTINE clear_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_pardiso_mat(mat, i, j, val) +! +! Update element Aij of pardiso matrix +! + TYPE(pardiso_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: k, s, e +! + IF(mat%nlsym) THEN ! Store only upper part for symmetric matrices + IF(i.GT.j) RETURN + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL updtmat(mat%mat, i, j, val) + ELSE + s = mat%irow(i) + e = mat%irow(i+1)-1 + k = isearch(mat%cols(s:e), j) + IF( k.GE.0 ) THEN + mat%val(s+k) = mat%val(s+k)+val + ELSE + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' + END IF + END IF + END SUBROUTINE updt_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_zpardiso_mat(mat, i, j, val) +! +! Update element Aij of pardiso matrix +! + TYPE(zpardiso_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + INTEGER :: k, s, e +! + IF(mat%nlherm .OR. mat%nlsym) THEN ! Store only upper part + IF(i.GT.j) RETURN + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL updtmat(mat%mat, i, j, val) + ELSE + s = mat%irow(i) + e = mat%irow(i+1)-1 + k = isearch(mat%cols(s:e), j) + IF( k.GE.0 ) THEN + mat%val(s+k) = mat%val(s+k)+val + ELSE + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' + END IF + END IF + END SUBROUTINE updt_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_pardiso_mat(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(pardiso_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: iput, jput + INTEGER :: k, s, e +! + iput = i + jput = j + IF(mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iput = j + jput = i + END IF + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL putele(mat%mat, iput, jput, val, & + & nlforce_zero=mat%nlforce_zero) + ELSE + s = mat%irow(iput) + e = mat%irow(iput+1)-1 + k = isearch(mat%cols(s:e), jput) + IF( k.GE.0 ) THEN + mat%val(s+k) = val + ELSE + IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val + WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' + END IF + END IF + END IF + END SUBROUTINE putele_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_zpardiso_mat(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(zpardiso_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + DOUBLE COMPLEX :: valput + INTEGER :: iput, jput + INTEGER :: k, s, e +! + iput = i + jput = j + valput = val + IF(mat%nlherm .OR. mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iput = j + jput = i + IF(mat%nlherm) THEN + valput = CONJG(val) + ELSE + valput = val + END IF + END IF + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL putele(mat%mat, iput, jput, valput, & + & nlforce_zero=mat%nlforce_zero) + ELSE + s = mat%irow(iput) + e = mat%irow(iput+1)-1 + k = isearch(mat%cols(s:e), jput) + IF( k.GE.0 ) THEN + mat%val(s+k) = valput + ELSE + IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val + WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' + END IF + END IF + END IF + END SUBROUTINE putele_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_pardiso_mat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(pardiso_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(out) :: val + INTEGER :: iget, jget + INTEGER :: k, s, e +! + iget = i + jget = j + IF(mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iget = j + jget = i + END IF + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL getele(mat%mat, iget, jget, val) + ELSE + s = mat%irow(iget) + e = mat%irow(iget+1)-1 + k = isearch(mat%cols(s:e), jget) + IF( k.GE.0 ) THEN + val =mat%val(s+k) + ELSE + val = 0.0d0 ! Assume zero val if not found + END IF + END IF + END SUBROUTINE getele_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_zpardiso_mat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(zpardiso_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(out) :: val + DOUBLE COMPLEX :: valget + INTEGER :: iget, jget + INTEGER :: k, s, e +! + iget = i + jget = j + IF(mat%nlherm .OR. mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iget = j + jget = i + END IF + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL getele(mat%mat, iget, jget, valget) + ELSE + s = mat%irow(iget) + e = mat%irow(iget+1)-1 + k = isearch(mat%cols(s:e), jget) + IF( k.GE.0 ) THEN + valget =mat%val(s+k) + ELSE + valget = (0.0d0,0.0d0) ! Assume zero val if not found + END IF + END IF + val = valget + IF( i.GT.j ) THEN + IF(mat%nlherm) THEN + val = CONJG(valget) + END IF + END IF + END SUBROUTINE getele_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_pardiso_mat(amat, i, arr) +! +! Put a row into sparse matrix +! + TYPE(pardiso_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL putele(amat, i, j, arr(j)) + END DO + END SUBROUTINE putrow_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_zpardiso_mat(amat, i, arr) +! +! Put a row into sparse matrix +! + TYPE(zpardiso_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL putele(amat, i, j, arr(j)) + END DO + END SUBROUTINE putrow_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_pardiso_mat(amat, i, arr) +! +! Get a row from sparse matrix +! + TYPE(pardiso_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL getele(amat, i, j, arr(j)) + END DO + END SUBROUTINE getrow_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_zpardiso_mat(amat, i, arr) +! +! Get a row from sparse matrix +! + TYPE(zpardiso_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL getele(amat, i, j, arr(j)) + END DO + END SUBROUTINE getrow_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_pardiso_mat(amat, j, arr) +! +! Put a column into sparse matrix +! + TYPE(pardiso_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: i +! + DO i=1,amat%rank + CALL putele(amat, i, j, arr(i)) + END DO + END SUBROUTINE putcol_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_zpardiso_mat(amat, j, arr) +! +! Put a column into sparse matrix +! + TYPE(zpardiso_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER :: i +! + DO i=1,amat%rank + CALL putele(amat, i, j, arr(i)) + END DO + END SUBROUTINE putcol_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_pardiso_mat(amat, j, arr) +! +! Get a column from sparse matrix +! + TYPE(pardiso_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: i +! + DO i=1,amat%rank + CALL getele(amat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_zpardiso_mat(amat, j, arr) +! +! Get a column from sparse matrix +! + TYPE(zpardiso_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER :: i +! + DO i=1,amat%rank + CALL getele(amat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_pardiso_mat(mat, nnz) +! +! Number of non-zeros in sparse matrix +! + TYPE(pardiso_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) + INTEGER :: i +! + IF(ASSOCIATED(mat%mat)) THEN + get_count_pardiso_mat = get_count(mat%mat, nnz) + ELSE + get_count_pardiso_mat = mat%nnz + IF(PRESENT(nnz)) THEN + DO i=1,mat%rank + nnz(i) = mat%irow(i+1)-mat%irow(i) + END DO + END IF + END IF + END FUNCTION get_count_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_zpardiso_mat(mat, nnz) +! +! Number of non-zeros in sparse matrix +! + TYPE(zpardiso_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) + INTEGER :: i +! + IF(ASSOCIATED(mat%mat)) THEN + get_count_zpardiso_mat = get_count(mat%mat, nnz) + ELSE + get_count_zpardiso_mat = mat%nnz + IF(PRESENT(nnz)) THEN + DO i=1,mat%rank + nnz(i) = mat%irow(i+1)-mat%irow(i) + END DO + END IF + END IF + END FUNCTION get_count_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE to_pardiso_mat(mat, nlkeep) +! +! Convert linked list spmat to pardiso matrice structure +! + TYPE(pardiso_mat) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nlkeep + INTEGER :: i, nnz, rank, s, e + LOGICAL :: nlclean +! + nlclean = .TRUE. + IF(PRESENT(nlkeep)) THEN + nlclean = .NOT. nlkeep + END IF +! +! Allocate the Pardiso matrix structure +! + nnz = get_count(mat) + rank = mat%rank + mat%nnz = nnz + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%val(nnz)) + ALLOCATE(mat%perm(rank)) + ALLOCATE(mat%irow(rank+1)) + ALLOCATE(mat%cols(nnz)) +! +! Fill Pardiso structure and optionnaly deallocate the sparse rows +! + mat%irow = 1 + DO i=1,rank + mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz + s = mat%irow(i) + e = mat%irow(i+1)-1 + CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) + IF(nlclean) CALL destroy(mat%mat%row(i)) + END DO + IF(nlclean) DEALLOCATE(mat%mat) + END SUBROUTINE to_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE to_zpardiso_mat(mat, nlkeep) +! +! Convert linked list spmat to pardiso matrice structure +! + TYPE(zpardiso_mat) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nlkeep + INTEGER :: i, nnz, rank, s, e + LOGICAL :: nlclean +! + nlclean = .TRUE. + IF(PRESENT(nlkeep)) THEN + nlclean = .NOT. nlkeep + END IF +! +! Allocate the Pardiso matrix structure +! + nnz = get_count(mat) + rank = mat%rank + mat%nnz = nnz + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%val(nnz)) + ALLOCATE(mat%perm(rank)) + ALLOCATE(mat%irow(rank+1)) + ALLOCATE(mat%cols(nnz)) +! +! Fill Pardiso structure and deallocate the sparse rows +! + mat%irow = 1 + DO i=1,rank + mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz + s = mat%irow(i) + e = mat%irow(i+1)-1 + CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) + IF(nlclean) CALL destroy(mat%mat%row(i)) + END DO + IF(nlclean) DEALLOCATE(mat%mat) + END SUBROUTINE to_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE reord_pardiso_mat(mat, nlmetis, debug) +! +! Reordering and symbolic factorization +! + TYPE(pardiso_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlmetis + LOGICAL, OPTIONAL, INTENT(in) :: debug + DOUBLE PRECISION :: dummy +! + mat%p%iparm(2) = 0 ! use minimum degree algorithm + IF(PRESENT(nlmetis)) THEN + IF(nlmetis) mat%p%iparm(2) = 2 ! use METIS nested dissection + END IF + mat%p%iparm(5)= 2 ! return the permutation vector in mat%perm + mat%p%phase = 11 ! Reordering and symbolic factorization + mat%p%msglvl = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%p%msglvl = 1 + END IF + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & dummy, dummy, mat%p%error) + IF(mat%p%error.NE.0) THEN + WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%error + END IF + END SUBROUTINE reord_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE reord_zpardiso_mat(mat, nlmetis, debug) +! +! Reordering and symbolic factorization +! + TYPE(zpardiso_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlmetis + LOGICAL, OPTIONAL, INTENT(in) :: debug + DOUBLE COMPLEX :: dummy +! + mat%p%iparm(2) = 0 ! use minimum degree algorithm + IF(PRESENT(nlmetis)) THEN + IF(nlmetis) mat%p%iparm(2) = 2 ! use METIS nested dissection + END IF + mat%p%iparm(5)= 2 ! return the permutation vector in mat%perm + mat%p%phase = 11 ! Reordering and symbolic factorization + mat%p%msglvl = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%p%msglvl = 1 + END IF + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & dummy, dummy, mat%p%error) + IF(mat%p%error.NE.0) THEN + WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%error + END IF + END SUBROUTINE reord_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE numfact_pardiso_mat(mat, debug) +! +! Numerical factorization +! + TYPE(pardiso_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: debug + DOUBLE PRECISION :: dummy +! + mat%p%phase = 22 ! Numerical factorization + mat%p%msglvl = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%p%msglvl = 1 + END IF + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & dummy, dummy, mat%p%error) + IF(mat%p%error.NE.0) THEN + WRITE(*,'(a,i4)') 'FACTOR: Factorization failed with error', mat%p%error + END IF + END SUBROUTINE numfact_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE numfact_zpardiso_mat(mat, debug) +! +! Numerical factorization +! + TYPE(zpardiso_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: debug + DOUBLE COMPLEX :: dummy +! + mat%p%phase = 22 ! Numerical factorization + mat%p%msglvl = 0 + IF(PRESENT(debug)) THEN + IF(debug) mat%p%msglvl = 1 + END IF + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & dummy, dummy, mat%p%error) + IF(mat%p%error.NE.0) THEN + WRITE(*,'(a,i4)') 'FACTOR: Factorization failed with error', mat%p%error + END IF + END SUBROUTINE numfact_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE factor_pardiso_mat(mat, nlreord, nlmetis, debug) +! +! Factor (create +reorder + factor) a pardiso_mat matrix +! + TYPE(pardiso_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlreord + LOGICAL, OPTIONAL, INTENT(in) :: nlmetis + LOGICAL, OPTIONAL, INTENT(in) :: debug + LOGICAL :: mlreord +!---------------------------------------------------------------------- +! 1.0 Creation from the sparse rows +! + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF +!---------------------------------------------------------------------- +! 2.0 Reordering and symbolic factorization phase +! + mlreord = .TRUE. + IF(PRESENT(nlreord)) mlreord = nlreord + IF(mlreord) THEN + CALL reord_mat(mat, nlmetis, debug) + END IF +!---------------------------------------------------------------------- +! 3.0 Numerical factorization +! + CALL numfact(mat, debug) + END SUBROUTINE factor_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE factor_zpardiso_mat(mat, nlreord, nlmetis, debug) +! +! Factor (create +reorder + factor) a pardiso_mat matrix +! + TYPE(zpardiso_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlreord + LOGICAL, OPTIONAL, INTENT(in) :: nlmetis + LOGICAL, OPTIONAL, INTENT(in) :: debug + LOGICAL :: mlreord +!---------------------------------------------------------------------- +! 1.0 Creation from the sparse rows +! + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF +!---------------------------------------------------------------------- +! 2.0 Reordering and symbolic factorization phase +! + mlreord = .TRUE. + IF(PRESENT(nlreord)) mlreord = nlreord + IF(mlreord) THEN + CALL reord_mat(mat, nlmetis, debug) + END IF +!---------------------------------------------------------------------- +! 3.0 Numerical factorization +! + CALL numfact(mat, debug) + END SUBROUTINE factor_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_pardiso_mat1(mat, rhs, sol, nref, debug) +! +! Backsolve, using Pardiso +! + TYPE(pardiso_mat) :: mat + DOUBLE PRECISION :: rhs(:) + DOUBLE PRECISION, OPTIONAL :: sol(:) + INTEGER, OPTIONAL :: nref + LOGICAL, OPTIONAL, INTENT(in) :: debug + DOUBLE PRECISION :: dummy(SIZE(rhs)) +! + mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) mat%p%iparm(8) = nref + mat%p%phase = 33 ! Backsolve + mat%p%nrhs = 1 + mat%p%msglvl = 0 + IF(PRESENT(debug)) THEN + IF(debug) THEN + mat%p%msglvl = 1 + END IF + END IF + IF(PRESENT(sol)) THEN + mat%p%iparm(6) = 0 + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & rhs, sol, mat%p%error) + ELSE + mat%p%iparm(6) = 1 + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & rhs, dummy, mat%p%error) + END IF + IF(mat%p%error.NE.0) THEN + WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error + END IF + END SUBROUTINE bsolve_pardiso_mat1 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_zpardiso_mat1(mat, rhs, sol, nref, debug) +! +! Backsolve, using Pardiso +! + TYPE(zpardiso_mat) :: mat + DOUBLE COMPLEX :: rhs(:) + DOUBLE COMPLEX, OPTIONAL :: sol(:) + INTEGER, OPTIONAL :: nref + LOGICAL, OPTIONAL, INTENT(in) :: debug + DOUBLE COMPLEX :: dummy(SIZE(rhs)) +! + mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) mat%p%iparm(8) = nref + mat%p%phase = 33 ! Backsolve + mat%p%nrhs = 1 + mat%p%msglvl = 0 + IF(PRESENT(debug)) THEN + IF(debug) THEN + mat%p%msglvl = 1 + END IF + END IF + IF(PRESENT(sol)) THEN + mat%p%iparm(6) = 0 + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & rhs, sol, mat%p%error) + ELSE + mat%p%iparm(6) = 1 + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & rhs, dummy, mat%p%error) + END IF + IF(mat%p%error.NE.0) THEN + WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error + END IF + END SUBROUTINE bsolve_zpardiso_mat1 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_pardiso_matn(mat, rhs, sol, nref, debug) +! +! Backsolve, using Pardiso, multiple RHS +! + TYPE(pardiso_mat) :: mat + DOUBLE PRECISION :: rhs(:,:) + DOUBLE PRECISION, OPTIONAL :: sol(:,:) + INTEGER, OPTIONAL :: nref + LOGICAL, OPTIONAL, INTENT(in) :: debug + DOUBLE PRECISION :: dummy(SIZE(rhs,1),SIZE(rhs,2)) +! + mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) mat%p%iparm(8) = nref + mat%p%phase = 33 ! Backsolve + mat%p%nrhs = SIZE(rhs,2) + mat%p%msglvl = 0 + IF(PRESENT(debug)) THEN + IF(debug) THEN + mat%p%msglvl = 1 + END IF + END IF + IF(PRESENT(sol)) THEN + mat%p%iparm(6) = 0 + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & rhs, sol, mat%p%error) + ELSE + mat%p%iparm(6) = 1 + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & rhs, dummy, mat%p%error) + END IF + IF(mat%p%error.NE.0) THEN + WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error + END IF + END SUBROUTINE bsolve_pardiso_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_zpardiso_matn(mat, rhs, sol, nref, debug) +! +! Backsolve, using Pardiso, multiple RHS +! + TYPE(zpardiso_mat) :: mat + DOUBLE COMPLEX :: rhs(:,:) + DOUBLE COMPLEX, OPTIONAL :: sol(:,:) + INTEGER, OPTIONAL :: nref + LOGICAL, OPTIONAL, INTENT(in) :: debug + DOUBLE COMPLEX :: dummy(SIZE(rhs,1),SIZE(rhs,2)) +! + mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) mat%p%iparm(8) = nref + mat%p%phase = 33 ! Backsolve + mat%p%nrhs = SIZE(rhs,2) + mat%p%msglvl = 0 + IF(PRESENT(debug)) THEN + IF(debug) THEN + mat%p%msglvl = 1 + END IF + END IF + IF(PRESENT(sol)) THEN + mat%p%iparm(6) = 0 + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & rhs, sol, mat%p%error) + ELSE + mat%p%iparm(6) = 1 + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & rhs, dummy, mat%p%error) + END IF + IF(mat%p%error.NE.0) THEN + WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error + END IF + END SUBROUTINE bsolve_zpardiso_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_pardiso_mat(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(pardiso_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:) + DOUBLE PRECISION :: yarr(SIZE(xarr)) + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + CHARACTER(len=6) :: matdescra + INTEGER :: n, i, j +! + n = mat%rank +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & beta, yarr) +#else + yarr = 0.0d0 + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + mat%val(j)*xarr(i) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_zpardiso_mat(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(zpardiso_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: xarr(:) + DOUBLE COMPLEX :: yarr(SIZE(xarr)) + DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) + INTEGER :: n, i, j + CHARACTER(len=6) :: matdescra +! + n = mat%rank +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE IF(mat%nlherm) THEN + matdescra = 'hun' + ELSE + matdescra = 'g' + END IF + CALL mkl_zcsrmv('N', n, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & beta, yarr) +#else + yarr = (0.0d0,0.0d0) + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + mat%val(j)*xarr(i) + END DO + ELSE IF(mat%nlherm) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + CONJG(mat%val(j))*xarr(i) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_pardiso_matn(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(pardiso_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:,:) + DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + INTEGER :: n, nrhs, i, j + CHARACTER(len=6) :: matdescra +! + n = mat%rank + nrhs = SIZE(xarr,2) +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,& + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & n, beta, yarr, n) +#else + yarr = 0.0d0 + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + mat%val(j)*xarr(i,:) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_pardiso_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_zpardiso_matn(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(zpardiso_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: xarr(:,:) + DOUBLE COMPLEX :: yarr(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) + INTEGER :: n, nrhs, i, j + CHARACTER(len=6) :: matdescra +! + n = mat%rank + nrhs = SIZE(xarr,2) +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE IF(mat%nlherm) THEN + matdescra = 'hun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_zcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, n, & + & beta, yarr, n) +#else + yarr = (0.0d0,0.0d0) + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + mat%val(j)*xarr(i,:) + END DO + ELSE IF(mat%nlherm) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + CONJG(mat%val(j))*xarr(i,:) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_zpardiso_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_pardiso_mat(mat) +! +! Deallocate the sparse matrix mat +! + TYPE(pardiso_mat) :: mat + DOUBLE PRECISION :: dummy +! + IF(ASSOCIATED(mat%mat)) THEN + CALL destroy(mat%mat) + DEALLOCATE(mat%mat) + END IF +! + IF(mat%p%phase .GT. 0) THEN + mat%p%phase = 0 ! Release memory for factors + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & dummy, dummy, mat%p%error) + IF(mat%p%error.NE.0) THEN + WRITE(*,'(a,i4)') 'DESTROY: Mem release failed with error', mat%p%error + END IF + END IF +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + END SUBROUTINE destroy_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_zpardiso_mat(mat) +! +! Deallocate the sparse matrix mat +! + TYPE(zpardiso_mat) :: mat + DOUBLE COMPLEX :: dummy +! + IF(ASSOCIATED(mat%mat)) THEN + CALL destroy(mat%mat) + DEALLOCATE(mat%mat) + END IF +! + IF(mat%p%phase .GT. 0) THEN + mat%p%phase = 0 ! Release memory for factors + CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, & + & mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, & + & mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, & + & dummy, dummy, mat%p%error) + IF(mat%p%error.NE.0) THEN + WRITE(*,'(a,i4)') 'DESTROY: Mem release failed with error', mat%p%error + END IF + END IF +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + END SUBROUTINE destroy_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE put_pardiso_mat(fid, label, mat, str) +! +! Write matrix to hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(pardiso_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'NNZ', mat%nnz) + CALL attach(fid, label, 'NLSYM', mat%nlsym) + CALL attach(fid, label, 'NLPOS', mat%nlpos) + CALL putarr(fid, TRIM(label)//'/irow', mat%irow) + CALL putarr(fid, TRIM(label)//'/cols', mat%cols) + CALL putarr(fid, TRIM(label)//'/perm', mat%perm) + CALL putarr(fid, TRIM(label)//'/val', mat%val) +! + CALL creatg(fid, TRIM(label)//'/p') + CALL attach(fid, TRIM(label)//'/p', 'error', mat%p%error) + CALL attach(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype) + CALL attach(fid, TRIM(label)//'/p', 'phase', mat%p%phase) + CALL attach(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl) + CALL attach(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct) + CALL attach(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum) + CALL attach(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs) + CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + END SUBROUTINE put_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE put_zpardiso_mat(fid, label, mat, str) +! +! Write matrix to hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(zpardiso_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'NNZ', mat%nnz) + CALL attach(fid, label, 'NLSYM', mat%nlsym) + CALL attach(fid, label, 'NLPOS', mat%nlpos) + CALL attach(fid, label, 'NLHERM', mat%nlherm) + CALL putarr(fid, TRIM(label)//'/irow', mat%irow) + CALL putarr(fid, TRIM(label)//'/cols', mat%cols) + CALL putarr(fid, TRIM(label)//'/perm', mat%perm) + CALL putarr(fid, TRIM(label)//'/val', mat%val) +! + CALL creatg(fid, TRIM(label)//'/p') + CALL attach(fid, TRIM(label)//'/p', 'error', mat%p%error) + CALL attach(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype) + CALL attach(fid, TRIM(label)//'/p', 'phase', mat%p%phase) + CALL attach(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl) + CALL attach(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct) + CALL attach(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum) + CALL attach(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs) + CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + END SUBROUTINE put_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE get_pardiso_mat(fid, label, mat) +! +! Read matrix from hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(pardiso_mat) :: mat +! + CALL getatt(fid, label, 'RANK', mat%rank) + CALL getatt(fid, label, 'NNZ', mat%nnz) + CALL getatt(fid, label, 'NLSYM', mat%nlsym) + CALL getarr(fid, TRIM(label)//'/irow', mat%irow) + CALL getarr(fid, TRIM(label)//'/cols', mat%cols) + CALL getarr(fid, TRIM(label)//'/perm', mat%perm) + CALL getarr(fid, TRIM(label)//'/val', mat%val) +! + CALL getatt(fid, TRIM(label)//'/p', 'error', mat%p%error) + CALL getatt(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype) + CALL getatt(fid, TRIM(label)//'/p', 'phase', mat%p%phase) + CALL getatt(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl) + CALL getatt(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct) + CALL getatt(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum) + CALL getatt(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs) + CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + END SUBROUTINE get_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE get_zpardiso_mat(fid, label, mat) +! +! Read matrix from hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(zpardiso_mat) :: mat +! + CALL getatt(fid, label, 'RANK', mat%rank) + CALL getatt(fid, label, 'NNZ', mat%nnz) + CALL getatt(fid, label, 'SYM', mat%nlsym) + CALL getatt(fid, label, 'HERM', mat%nlherm) + CALL getarr(fid, TRIM(label)//'/irow', mat%irow) + CALL getarr(fid, TRIM(label)//'/cols', mat%cols) + CALL getarr(fid, TRIM(label)//'/perm', mat%perm) + CALL getarr(fid, TRIM(label)//'/val', mat%val) +! + CALL getatt(fid, TRIM(label)//'/p', 'error', mat%p%error) + CALL getatt(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype) + CALL getatt(fid, TRIM(label)//'/p', 'phase', mat%p%phase) + CALL getatt(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl) + CALL getatt(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct) + CALL getatt(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum) + CALL getatt(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs) + CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + END SUBROUTINE get_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE mcopy_pardiso_mat(mata, matb) +! +! Matrix copy: B = A +! + TYPE(pardiso_mat) :: mata, matb + INTEGER :: n, nnz +! +! Assume that matb was already initialized by init_wsmp_mat. + IF(matb%rank.LE.0) THEN + WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' + STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' + END IF +! + IF(ASSOCIATED(matb%mat)) THEN + CALL destroy(matb%mat) + DEALLOCATE(matb%mat) + END IF +! + n = mata%rank + nnz = mata%nnz + matb%rank = n + matb%nnz = nnz + matb%nlsym = mata%nlsym + matb%nlpos = mata%nlpos + matb%nlforce_zero = mata%nlforce_zero +! + IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) + IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) + IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) + ALLOCATE(matb%val(nnz)); matb%val = mata%val + ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols + ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow + ALLOCATE(matb%perm(n)) + END SUBROUTINE mcopy_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE mcopy_zpardiso_mat(mata, matb) +! +! Matrix copy: B = A +! + TYPE(zpardiso_mat) :: mata, matb + INTEGER :: n, nnz +! +! Assume that matb was already initialized by init_wsmp_mat. + IF(matb%rank.LE.0) THEN + WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' + STOP '*** Abnormal EXIT in MODULE pardiso_mod ***' + END IF +! + IF(ASSOCIATED(matb%mat)) THEN + CALL destroy(matb%mat) + DEALLOCATE(matb%mat) + END IF +! + n = mata%rank + nnz = mata%nnz + matb%rank = n + matb%nnz = nnz + matb%nlsym = mata%nlsym + matb%nlherm = mata%nlherm + matb%nlpos = mata%nlpos + matb%nlforce_zero = mata%nlforce_zero +! + IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) + IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) + IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) + ALLOCATE(matb%val(nnz)); matb%val = mata%val + ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols + ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow + ALLOCATE(matb%perm(n)) + END SUBROUTINE mcopy_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE maddto_pardiso_mat(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(pardiso_mat) :: mata, matb + DOUBLE PRECISION :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE maddto_zpardiso_mat(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(zpardiso_mat) :: mata, matb + DOUBLE COMPLEX :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE psum_pardiso_mat(mat, comm) +! +! Parallel sum of sparse matrices +! + INCLUDE "mpif.h" +! + TYPE(pardiso_mat) :: mat + INCLUDE 'psum_mat.tpl' + END SUBROUTINE psum_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE psum_zpardiso_mat(mat, comm) +! +! Parallel sum of sparse matrices +! + INCLUDE "mpif.h" +! + TYPE(zpardiso_mat) :: mat + INCLUDE 'psum_mat.tpl' + END SUBROUTINE psum_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE p2p_pardiso_mat(mat, dest, extyp, op, comm) +! +! Point-to-point combine sparse matrix between 2 processes +! + INCLUDE "mpif.h" +! + TYPE(pardiso_mat) :: mat + DOUBLE PRECISION, ALLOCATABLE :: val(:) + INTEGER :: mpi_type=MPI_DOUBLE_PRECISION +! + INCLUDE "p2p_mat.tpl" + END SUBROUTINE p2p_pardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE p2p_zpardiso_mat(mat, dest, extyp, op, comm) +! +! Point-to-point combine sparse matrix between 2 processes +! + INCLUDE "mpif.h" +! + TYPE(zpardiso_mat) :: mat + DOUBLE COMPLEX, ALLOCATABLE :: val(:) + INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX +! + INCLUDE "p2p_mat.tpl" + END SUBROUTINE p2p_zpardiso_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pardiso_bsplines diff --git a/src/petsc_mod.F90 b/src/petsc_mod.F90 new file mode 100644 index 0000000..9909f17 --- /dev/null +++ b/src/petsc_mod.F90 @@ -0,0 +1,873 @@ +!> +!> @file petsc_mod.F90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE petsc_bsplines +! +! PETSC_BSPLINES: Simple interface to the parallel iterative +! solver PETSC +! +! T.M. Tran, CRPP-EPFL +! June 2011 +! + USE sparse + IMPLICIT NONE + +#include "finclude/petsc.h90" + +! + TYPE petsc_mat + INTEGER :: rank + INTEGER(8) :: nnz, nnz_loc + INTEGER :: nterms, kmat + INTEGER :: istart, iend + INTEGER, POINTER :: rcounts(:) => NULL() + INTEGER, POINTER :: rdispls(:) => NULL() + INTEGER :: comm + LOGICAL :: nlsym + LOGICAL :: nlforce_zero + TYPE(spmat), POINTER :: mat => NULL() + INTEGER, POINTER :: cols(:) => NULL() + INTEGER, POINTER :: irow(:) => NULL() + DOUBLE PRECISION, POINTER :: val(:) => NULL() +! + Mat :: AMAT + KSP :: SOLVER + END TYPE petsc_mat +! + INTERFACE init + MODULE PROCEDURE init_petsc_mat + END INTERFACE init +! + INTERFACE clear_mat + MODULE PROCEDURE clear_petsc_mat + END INTERFACE clear_mat +! + INTERFACE updtmat + MODULE PROCEDURE updt_petsc_mat + END INTERFACE updtmat +! + INTERFACE putele + MODULE PROCEDURE putele_petsc_mat + END INTERFACE putele +! + INTERFACE getele + MODULE PROCEDURE getele_petsc_mat + END INTERFACE getele +! + INTERFACE putrow + MODULE PROCEDURE putrow_petsc_mat + END INTERFACE putrow +! + INTERFACE getrow + MODULE PROCEDURE getrow_petsc_mat + END INTERFACE getrow +! + INTERFACE putcol + MODULE PROCEDURE putcol_petsc_mat + END INTERFACE putcol +! + INTERFACE getcol + MODULE PROCEDURE getcol_petsc_mat + END INTERFACE getcol +! + INTERFACE get_count + MODULE PROCEDURE get_count_petsc_mat + END INTERFACE get_count +! + INTERFACE to_mat + MODULE PROCEDURE to_petsc_mat + END INTERFACE to_mat +! + INTERFACE save_mat + MODULE PROCEDURE save_petsc_mat + END INTERFACE save_mat +! + INTERFACE load_mat + MODULE PROCEDURE load_petsc_mat + END INTERFACE load_mat +! + INTERFACE bsolve + MODULE PROCEDURE bsolve_petsc_mat1, bsolve_petsc_matn + END INTERFACE bsolve +! + INTERFACE vmx + MODULE PROCEDURE vmx_petsc_mat, vmx_petsc_matn + END INTERFACE vmx +! + INTERFACE destroy + MODULE PROCEDURE destroy_petsc_mat + END INTERFACE destroy +! + INTERFACE mcopy + MODULE PROCEDURE mcopy_petsc_mat + END INTERFACE mcopy +! + INTERFACE maddto + MODULE PROCEDURE maddto_petsc_mat + END INTERFACE maddto +! +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_petsc_mat(n, nterms, mat, kmat, nlsym, & + & nlforce_zero, comm) +! +! Initialize an empty sparse petsc matrix +! + USE pputils2 + INCLUDE 'mpif.h' +! + INTEGER, INTENT(in) :: n, nterms + TYPE(petsc_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero + INTEGER, OPTIONAL, INTENT(in) :: comm +! + INTEGER :: me, npes + INTEGER :: i, ierr, nloc + PetscBool :: flg +!!$ PetscTruth :: flg ! Petsc version before 3.2 +! +! Prologue +! + CALL mpi_comm_size(comm, npes, ierr) + CALL mpi_comm_rank(comm, me, ierr) +! + mat%rank = n + mat%nterms = nterms + mat%nnz = 0 + mat%nlsym = .FALSE. + mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val + IF(PRESENT(kmat)) mat%kmat = kmat + IF(PRESENT(nlsym)) mat%nlsym = nlsym + IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero +! +! Inititialize the PETSC environment +! + IF(PRESENT(comm)) THEN + PETSC_COMM_WORLD = comm + ELSE ! Single process Petsc + PETSC_COMM_WORLD = MPI_COMM_SELF + END IF + CALL PetscInitialize(PETSC_NULL_CHARACTER, ierr) + mat%comm = PETSC_COMM_WORLD +! +! Matrix partition +! + CALL dist1d(mat%comm, 1, n, mat%istart, nloc) + mat%iend = mat%istart + nloc - 1 +! + IF(ASSOCIATED(mat%rcounts)) DEALLOCATE(mat%rcounts) + IF(ASSOCIATED(mat%rdispls)) DEALLOCATE(mat%rdispls) + ALLOCATE(mat%rcounts(0:npes-1)) + ALLOCATE(mat%rdispls(0:npes-1)) + CALL mpi_allgather(nloc, 1, MPI_INTEGER, mat%rcounts, 1, MPI_INTEGER, & + & mat%comm, ierr) + mat%rdispls(0) = 0 + DO i=1,npes-1 + mat%rdispls(i) = mat%rdispls(i-1)+mat%rcounts(i-1) + END DO +! +! Initialize linked list for sparse matrix +! + IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) + ALLOCATE(mat%mat) + CALL init(n, mat%mat, mat%istart, mat%iend) +! +! Create PETSC matrix +! + CALL MatCreate(mat%comm, mat%AMAT, ierr) + CALL MatSetSizes(mat%AMAT, nloc, nloc, n, n, ierr) + CALL MatSetFromOptions(mat%AMAT, ierr) +! +! Create PETSC SOLVER +! + CALL KSPCreate(mat%comm, mat%SOLVER, ierr) +! + END SUBROUTINE init_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE clear_petsc_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(petsc_mat) :: mat +! + IF(ASSOCIATED(mat%val)) THEN + mat%val = 0.0d0 + ELSE + CALL MatZeroEntries(mat%AMAT) + END IF + END SUBROUTINE clear_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_petsc_mat(mat, i, j, val) +! +! Update element Aij of petsc matrix +! + TYPE(petsc_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: ierr +! + IF(mat%nlsym) THEN ! Store only upper part for symmetric matrices + IF(i.GT.j) RETURN + END IF + IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + WRITE(*,'(a,2i6)') ' istart, iend ', mat%istart, mat%iend + STOP '*** Abnormal EXIT in MODULE mumps_mod ***' + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL updtmat(mat%mat, i, j, val) + ELSE + CALL MatSetValue(mat%AMAT, i-1, j-1, ADD_VALUES, ierr) + END IF + END SUBROUTINE updt_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_petsc_mat(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(petsc_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: iput, jput + INTEGER :: ierr +! + iput = i + jput = j + IF(mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iput = j + jput = i + END IF + END IF +! +! Do nothing if outside + IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN +! + IF(ASSOCIATED(mat%mat)) THEN + CALL putele(mat%mat, iput, jput, val, & + & nlforce_zero=mat%nlforce_zero) + ELSE + CALL MatSetValue(mat%AMAT, iput-1, jput-1, val, INSERT_VALUES, ierr) + END IF + END SUBROUTINE putele_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_petsc_mat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(petsc_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(out) :: val + INTEGER :: iget, jget + INTEGER :: ierr +! + iget = i + jget = j + IF(mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iget = j + jget = i + END IF + END IF +! + val = 0.0d0 ! Assume zero val if outside + IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN +! + IF(ASSOCIATED(mat%mat)) THEN + CALL getele(mat%mat, iget, jget, val) + ELSE + CALL MatGetValues(mat%AMAT, 1, iget-1, 1, jget-1, val, ierr) + END IF + END SUBROUTINE getele_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_petsc_mat(amat, i, arr, cols) +! +! Put a row into sparse matrix +! + TYPE(petsc_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER, INTENT(in), OPTIONAL :: cols(:) + INTEGER :: j +! + IF(i.GT.amat%iend .OR. i.LT.amat%istart) RETURN ! Do nothing +! + IF(PRESENT(cols)) THEN + DO j=1,SIZE(cols) + CALL putele(amat, i, cols(j), arr(j)) + END DO + ELSE + DO j=1,amat%rank + CALL putele(amat, i, j, arr(j)) + END DO + END IF + END SUBROUTINE putrow_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_petsc_mat(amat, i, arr) +! +! Get a row from sparse matrix +! + TYPE(petsc_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: j, ierr + INTEGER :: ncols, cols(amat%rank) + DOUBLE PRECISION :: vals(amat%rank) +! + arr = 0.0d0 + IF(i.GT.amat%iend .OR. i.LT.amat%istart) RETURN ! return 0 if outside + IF(ASSOCIATED(amat%mat)) THEN + DO j=1,amat%rank + CALL getele(amat%mat, i, j, arr(j)) + END DO + ELSE + CALL MatGetRow(amat%AMAT, i-1, ncols, cols, vals, ierr) ! 0-based array + DO j=1,ncols + arr(cols(j)+1) = vals(j) + END DO + CALL MatRestoreRow(amat%AMAT, i-1, ncols, cols, vals, ierr) + END IF + END SUBROUTINE getrow_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_petsc_mat(amat, j, arr) +! +! Put a column into sparse matrix +! + TYPE(petsc_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: i +! + DO i=amat%istart,amat%iend + CALL putele(amat, i, j, arr(i)) + END DO + END SUBROUTINE putcol_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_petsc_mat(amat, j, arr) +! +! Get a column from sparse matrix +! + TYPE(petsc_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: i +! + arr = 0.0d0 + DO i=amat%istart,amat%iend + CALL getele(amat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_petsc_mat(mat, nnz) +! +! Number of non-zeros in sparse matrix +! + TYPE(petsc_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) + INTEGER :: i, ierr + DOUBLE PRECISION :: info(MAT_INFO_SIZE) +! + IF(ASSOCIATED(mat%mat)) THEN + get_count_petsc_mat = get_count(mat%mat, nnz) + ELSE + CALL MatGetInfo(mat%AMAT, MAT_LOCAL, info, ierr) + get_count_petsc_mat = info(MAT_INFO_NZ_ALLOCATED) +!!$ IF(PRESENT(nnz)) THEN +!!$ DO i=1,mat%rank +!!$ nnz(i) = mat%irow(i+1)-mat%irow(i) +!!$ END DO +!!$ END IF + END IF + END FUNCTION get_count_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE to_petsc_mat(mat, nlkeep) +! +! Convert linked list spmat to petsc matrice structure +! + TYPE(petsc_mat) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nlkeep +! + INTEGER :: me, i, j, jj, nnz, rank, s, e + INTEGER :: istart, iend + INTEGER :: iloc, k1, k2, ncol + INTEGER :: d_nz, d_nnz(mat%istart:mat%iend) + INTEGER :: o_nz, o_nnz(mat%istart:mat%iend) + INTEGER :: comm, ierr + LOGICAL :: nlclean +! + nlclean = .TRUE. + IF(PRESENT(nlkeep)) THEN + nlclean = .NOT. nlkeep + END IF +! + comm = mat%comm + CALL mpi_comm_rank(comm, me, ierr) +! +! Allocate the Petsc matrix structure +! + rank = mat%rank + mat%nnz_loc = get_count(mat) + istart = mat%istart + iend = mat%iend + CALL mpi_allreduce(mat%nnz_loc, mat%nnz, 1, MPI_INTEGER8, MPI_SUM, comm, ierr) +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%val(mat%nnz_loc)) + ALLOCATE(mat%cols(mat%nnz_loc)) + ALLOCATE(mat%irow(mat%istart:mat%iend+1)) +! +! Get Sparse structure from linked list +! + d_nnz(:) = 0 + o_nnz(:) = 0 + mat%irow(istart) = 1 + DO i=istart,iend + mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz + s = mat%irow(i) + e = mat%irow(i+1)-1 + CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) + d_nnz(i) = COUNT(mat%cols(s:e) .GE. istart .AND. & + & mat%cols(s:e) .LE. iend) + o_nnz(i) = mat%mat%row(i)%nnz - d_nnz(i) + IF(nlclean) CALL destroy(mat%mat%row(i)) + END DO + IF(nlclean) DEALLOCATE(mat%mat) +! +! Petsc matrix preallocation +! + CALL MatMPIAIJSetPreallocation(mat%AMAT, PETSC_NULL_INTEGER, & + & d_nnz, PETSC_NULL_INTEGER, o_nnz, ierr) + CALL MatSeqAIJSetPreallocation(mat%AMAT, PETSC_NULL_INTEGER, & + & d_nnz, ierr) +! +! Petsc matrix assembly +! + mat%cols = mat%cols-1 ! Start column index = 0 + DO i=istart,iend + iloc = i-istart+1 + k1 = mat%irow(i) + k2 = mat%irow(i+1) + ncol = k2-k1 + CALL MatSetValues(mat%AMAT, 1, i-1, ncol, mat%cols(k1:k2-1), & + & mat%val(k1:k2-1), INSERT_VALUES, ierr) + END DO +! + CALL MatAssemblyBegin(mat%AMAT, MAT_FINAL_ASSEMBLY ,ierr) + CALL MatAssemblyEnd(mat%AMAT, MAT_FINAL_ASSEMBLY, ierr) +! + DEALLOCATE(mat%irow) + DEALLOCATE(mat%cols) + DEALLOCATE(mat%val) +! + END SUBROUTINE to_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE save_petsc_mat(mat, file) +! +! Save matrix in PETSC binary format +! + TYPE(petsc_mat) :: mat + CHARACTER(len=*), INTENT(in) :: file +! + INTEGER :: ierr + PetscViewer :: viewer +! + CALL PetscViewerBinaryOpen(mat%comm, TRIM(file), FILE_MODE_WRITE,& + & viewer, ierr) + CALL MatView(mat%AMAT, viewer, ierr) + CALL PetscViewerDestroy(viewer, ierr) +! + END SUBROUTINE save_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE load_petsc_mat(mat, file) +! +! Load matrix in PETSC binary format +! + TYPE(petsc_mat) :: mat + CHARACTER(len=*), INTENT(in) :: file +! + INTEGER :: nloc, i, npes, ierr + PetscViewer :: viewer +! + CALL mpi_comm_size(mat%comm, npes, ierr) +! +! Clean up unneeded sparse matrix +! + IF(ASSOCIATED(mat%mat)) THEN + CALL destroy(mat%mat) + DEALLOCATE(mat%mat) + END IF +! +! Load matrix from file +! + CALL PetscViewerBinaryOpen(mat%comm, TRIM(file), FILE_MODE_READ,& + & viewer, ierr) + CALL MatLoad(mat%AMAT, viewer, ierr) + CALL PetscViewerDestroy(viewer, ierr) +! +! Some mat info +! + CALL MatGetSize(mat%AMAT, mat%rank, PETSC_NULL_INTEGER, ierr) + mat%nnz_loc = get_count(mat) + CALL mpi_allreduce(mat%nnz_loc, mat%nnz, 1, MPI_INTEGER8, MPI_SUM, & + & mat%comm, ierr) +! +! +! Recompute matrix partition from loaded matrix +! + CALL MatGetOwnershipRange(mat%AMAT, mat%istart, mat%iend, ierr) + mat%istart = mat%istart+1 ! Convert from Petsc definition + nloc = mat%iend - mat%istart + 1 + CALL mpi_allgather(nloc, 1, MPI_INTEGER, mat%rcounts, 1, MPI_INTEGER, & + & mat%comm, ierr) + mat%rdispls(0) = 0 + DO i=1,npes-1 + mat%rdispls(i) = mat%rdispls(i-1)+mat%rcounts(i-1) + END DO +! + END SUBROUTINE load_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_petsc_mat1(mat, rhs, sol, rtol_in, nitmax_in, nits) +! +! Backsolve, using Petsc +! + TYPE(petsc_mat) :: mat + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + DOUBLE PRECISION, INTENT(out), OPTIONAL :: sol(:) + DOUBLE PRECISION, INTENT(in), OPTIONAL :: rtol_in + INTEGER, INTENT(in), OPTIONAL :: nitmax_in + INTEGER, INTENT(out), OPTIONAL :: nits +! + DOUBLE PRECISION :: rtol=PETSC_DEFAULT_DOUBLE_PRECISION + INTEGER :: nitmax=PETSC_DEFAULT_INTEGER + INTEGER :: i, istart, iend, nrank_loc, nrank + INTEGER :: npes, me, ierr + INTEGER :: idx(mat%istart:mat%iend) +! + Vec :: vec_rhs, vec_sol + PetscScalar :: scal + PetscScalar, POINTER :: psol_loc(:) + KSPConvergedReason :: reason +! + CALL mpi_comm_size(mat%comm, npes, ierr) + CALL mpi_comm_rank(mat%comm, me, ierr) +! + istart = mat%istart + iend = mat%iend + nrank_loc = iend-istart+1 + nrank = mat%rank + idx = (/ (i, i=istart,iend) /) - 1 ! 0-based petsc vector +! +! Create Vectors +! + CALL VecCreate(mat%comm, vec_rhs, ierr) + CALL VecSetSizes(vec_rhs, nrank_loc, nrank, ierr) + CALL VecSetFromOptions(vec_rhs, ierr) + CALL VecDuplicate(vec_rhs, vec_sol, ierr) +! +! Set solver +! + IF(PRESENT(rtol_in)) rtol = rtol_in + IF(PRESENT(nitmax_in)) nitmax = nitmax_in +! + CALL KSPSetOperators(mat%SOLVER, mat%AMAT, mat%AMAT, SAME_PRECONDITIONER, ierr) + CALL KSPSetTolerances(mat%SOLVER, rtol, PETSC_DEFAULT_DOUBLE_PRECISION,& + & PETSC_DEFAULT_DOUBLE_PRECISION, nitmax, ierr) + CALL KSPSetFromOptions(mat%SOLVER, ierr) +! +! Set RHS +! + CALL VecSetValues(vec_rhs, nrank_loc, idx, rhs(istart), INSERT_VALUES, ierr) + CALL VecAssemblyBegin(vec_rhs, ierr) + CALL VecAssemblyEnd(vec_rhs, ierr) +! + CALL KSPSolve(mat%SOLVER, vec_rhs, vec_sol, ierr) + CALL KSPGetConvergedReason(mat%SOLVER, reason, ierr) + IF(reason .LT. 0) THEN + IF(me.EQ.0) WRITE(*,'(a,i4)') 'BSOLVE: diverges with reason', reason + END IF + IF(PRESENT(nits)) THEN + CALL KSPGetIterationNumber(mat%SOLVER, nits, ierr) + END IF +! +! Get global solutions on all MPI processes +! + CALL VecGetArrayF90(vec_sol, psol_loc, ierr) +! + IF(PRESENT(sol)) THEN + CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, & + & sol, mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, & + & mat%comm, ierr) + ELSE + CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, & + & rhs, mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, & + & mat%comm, ierr) + END IF +! + CALL VecRestoreArrayF90(vec_sol, psol_loc, ierr) + END SUBROUTINE bsolve_petsc_mat1 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_petsc_matn(mat, rhs, sol, rtol_in, nitmax_in, nits) +! +! Backsolve, using Petsc, multiple RHS +! + TYPE(petsc_mat) :: mat + DOUBLE PRECISION :: rhs(:,:) + DOUBLE PRECISION, OPTIONAL :: sol(:,:) + DOUBLE PRECISION, INTENT(in), OPTIONAL :: rtol_in + INTEGER, INTENT(in), OPTIONAL :: nitmax_in + INTEGER, INTENT(out), OPTIONAL :: nits(:) +! + DOUBLE PRECISION :: rtol=PETSC_DEFAULT_DOUBLE_PRECISION + INTEGER :: nitmax=PETSC_DEFAULT_INTEGER + INTEGER :: j, nrhs + INTEGER :: i, istart, iend, nrank_loc, nrank + INTEGER :: npes, me, ierr + INTEGER :: idx(mat%istart:mat%iend) +! + Vec :: vec_rhs, vec_sol + PetscScalar :: scal + PetscScalar, POINTER :: psol_loc(:) + KSPConvergedReason :: reason +! + CALL mpi_comm_size(mat%comm, npes, ierr) + CALL mpi_comm_rank(mat%comm, me, ierr) +! + nrhs = SIZE(rhs,2) + istart = mat%istart + iend = mat%iend + nrank_loc = iend-istart+1 + nrank = mat%rank + idx = (/ (i, i=istart,iend) /) - 1 ! 0-based petsc vector +! +! Create Vectors +! + CALL VecCreate(mat%comm, vec_rhs, ierr) + CALL VecSetSizes(vec_rhs, nrank_loc, nrank, ierr) + CALL VecSetFromOptions(vec_rhs, ierr) + CALL VecDuplicate(vec_rhs, vec_sol, ierr) +! +! Set solver +! + IF(PRESENT(rtol_in)) rtol = rtol_in + IF(PRESENT(nitmax_in)) nitmax = nitmax_in +! + CALL KSPSetOperators(mat%SOLVER, mat%AMAT, mat%AMAT, SAME_PRECONDITIONER, ierr) + CALL KSPSetTolerances(mat%SOLVER, rtol, PETSC_DEFAULT_DOUBLE_PRECISION,& + & PETSC_DEFAULT_DOUBLE_PRECISION, nitmax, ierr) + CALL KSPSetFromOptions(mat%SOLVER, ierr) +! +! Set RHS +! + DO j=1,nrhs + CALL VecSetValues(vec_rhs, nrank_loc, idx, rhs(istart,j), INSERT_VALUES, ierr) + CALL VecAssemblyBegin(vec_rhs, ierr) + CALL VecAssemblyEnd(vec_rhs, ierr) +! + CALL KSPSolve(mat%SOLVER, vec_rhs, vec_sol, ierr) + CALL KSPGetConvergedReason(mat%SOLVER, reason, ierr) + IF(reason .LT. 0) THEN + IF(me.EQ.0) THEN + WRITE(*,'(a,i4,a,i8)') 'BSOLVE: diverges with reason', reason, & + & ' for j =', j + END IF + END IF + IF(PRESENT(nits)) THEN + CALL KSPGetIterationNumber(mat%SOLVER, nits(j), ierr) + END IF +! +! Get global solutions on all MPI processes +! + CALL VecGetArrayF90(vec_sol, psol_loc, ierr) +! + IF(PRESENT(sol)) THEN + CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, & + & sol(1,j), mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, & + & mat%comm, ierr) + ELSE + CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, & + & rhs(1,j), mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, & + & mat%comm, ierr) + END IF +! + CALL VecRestoreArrayF90(vec_sol, psol_loc, ierr) + END DO +! + END SUBROUTINE bsolve_petsc_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_petsc_mat(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(petsc_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:) + DOUBLE PRECISION :: yarr(SIZE(xarr)) + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + CHARACTER(len=6) :: matdescra + INTEGER :: n, i, j +! + n = mat%rank +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & beta, yarr) +#else + yarr = 0.0d0 + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + mat%val(j)*xarr(i) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_petsc_matn(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(petsc_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:,:) + DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + INTEGER :: n, nrhs, i, j + CHARACTER(len=6) :: matdescra +! + n = mat%rank + nrhs = SIZE(xarr,2) +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,& + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & n, beta, yarr, n) +#else + yarr = 0.0d0 + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + mat%val(j)*xarr(i,:) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_petsc_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_petsc_mat(mat) +! +! Deallocate the sparse matrix mat +! + TYPE(petsc_mat) :: mat + INTEGER :: ierr +! + IF(ASSOCIATED(mat%mat)) THEN + CALL destroy(mat%mat) + DEALLOCATE(mat%mat) + END IF +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) +! + CALL MatDestroy(mat%AMAT,ierr) + END SUBROUTINE destroy_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE mcopy_petsc_mat(mata, matb) +! +! Matrix copy: B = A +! + TYPE(petsc_mat) :: mata, matb + INTEGER :: ierr +! +! Assume that matb was already initialized by init_petsc_mat. + IF(matb%rank.LE.0) THEN + WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' + STOP '*** Abnormal EXIT in MODULE petsc_mod ***' + END IF +! + IF(ASSOCIATED(matb%mat)) THEN + CALL destroy(matb%mat) + DEALLOCATE(matb%mat) + END IF +! + matb%rank = mata%rank + matb%nnz = mata%nnz + matb%nnz_loc = mata%nnz_loc + matb%istart = mata%istart + matb%iend = mata%iend + matb%nlsym = mata%nlsym + matb%nlforce_zero = mata%nlforce_zero +! + IF(ASSOCIATED(matb%mat)) THEN + CALL destroy(matb%mat) + DEALLOCATE(matb%mat) + END IF +! +! Destroy existing PETSC matrix and recreate a new one from scratch +! + CALL MatDestroy(matb%AMAT, ierr) + CALL MatConvert(mata%AMAT, MATSAME, MAT_INITIAL_MATRIX, matb%AMAT, ierr) + END SUBROUTINE mcopy_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE maddto_petsc_mat(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(petsc_mat) :: mata, matb + DOUBLE PRECISION :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_petsc_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +! +END MODULE petsc_bsplines diff --git a/src/psum_mat.tpl b/src/psum_mat.tpl new file mode 100644 index 0000000..bf44e63 --- /dev/null +++ b/src/psum_mat.tpl @@ -0,0 +1,101 @@ +!> +!> @file psum_mat.tpl +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> + INTEGER, INTENT(in) :: comm +! + INTEGER :: me, npes, ierr + INTEGER :: n, r, i, base + INTEGER :: newrank +!--------------------------------------------------------------------- +! 1.0 Prologue +! + CALL mpi_comm_size(comm, npes, ierr) + CALL mpi_comm_rank(comm, me, ierr) +! +! Compute n and r defined by npes = 2**n+r + i=1 + n=0 + DO WHILE (2*i.LE.npes) + n=n+1 + i=2*i + END DO ! i=2**n + r = npes-i +!--------------------------------------------------------------------- +! 2.0 Node partition +! +! I: nodes with ranks < 2*r +! . nodes with even ranks receive from rank+1 and sum +! . odd ranks sends to rank-1 +! II: nodes with ranks >= 2*r +! . do nothing +! + IF( me .LT. 2*r ) THEN + IF( MODULO(me,2) .EQ. 0 ) THEN + CALL p2p_mat(mat, me+1, 'recv', 'updt', comm) + ELSE + CALL p2p_mat(mat, me-1, 'send', 'updt', comm) + END IF + END IF +!--------------------------------------------------------------------- +! 3.0 Binary tree reduction using new ranks +! +! Define new ranks + IF( MODULO(me,2).EQ.0 .AND. me.LT.2*r ) THEN ! new rank in I + newrank = me/2 + ELSE IF( me.GE.2*r ) THEN ! new ranks in II + newrank = me-r + ELSE ! inactive ranks in I + newrank = -1 + END IF +! +! Reduction with 2**n (positive) newranks + IF( newrank .GE. 0 ) THEN ! only for nodes with new rank > 0 + base = 1 + DO i=1,n + CALL p2p_mat(mat, oldrank(IEOR(newrank,base)), & + & 'sendrecv', 'updt', comm) + base = base*2 + END DO + END IF +!--------------------------------------------------------------------- +! 4.0 Final exchanche in I +! + IF( me .LT. 2*r ) THEN + IF( MODULO(me,2).EQ.0 ) THEN + CALL p2p_mat(mat, me+1, 'send', 'put', comm) + ELSE + CALL p2p_mat(mat, me-1, 'recv', 'put', comm) + END IF + END IF +!--------------------------------------------------------------------- +CONTAINS + INTEGER FUNCTION oldrank(rank) + INTEGER, INTENT(in) :: rank + IF(rank.LT.r) THEN + oldrank = 2*rank + ELSE + oldrank = rank+r + END IF + END FUNCTION oldrank diff --git a/src/pwsmp_mod.f90 b/src/pwsmp_mod.f90 new file mode 100644 index 0000000..b981cde --- /dev/null +++ b/src/pwsmp_mod.f90 @@ -0,0 +1,2032 @@ +!> +!> @file pwsmp_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE pwsmp_bsplines +! +! PWSMP_BSPLINES: Simple interface to the parallel sparse direct +! solver PWSMP. +! +! T.M. Tran, CRPP-EPFL +! December 2011 +! + USE sparse + IMPLICIT NONE +! + INTEGER, SAVE :: current_matid = -1 + INTEGER, SAVE :: last_matid = -1 +! + TYPE wsmp_param + INTEGER :: iparm(64) + DOUBLE PRECISION :: dparm(64) + END TYPE wsmp_param +! + TYPE wsmp_mat + INTEGER :: matid=-1 + INTEGER :: rank=0, nnz + INTEGER :: nterms, kmat, nrhs + INTEGER :: comm + INTEGER :: istart, iend, rank_loc + INTEGER :: nnz_start, nnz_end, nnz_loc + LOGICAL :: nlsym + LOGICAL :: nlpos + LOGICAL :: nlforce_zero + TYPE(spmat), POINTER :: mat => NULL() + INTEGER, POINTER :: cols(:) => NULL() + INTEGER, POINTER :: irow(:) => NULL() + INTEGER, POINTER :: perm(:) => NULL() + INTEGER, POINTER :: invp(:) => NULL() + INTEGER, POINTER :: mrp(:) => NULL() + DOUBLE PRECISION, POINTER :: diag(:) => NULL() + DOUBLE PRECISION, POINTER :: val(:) => NULL() + DOUBLE PRECISION, POINTER :: aux(:) => NULL() + TYPE(wsmp_param) :: p + END TYPE wsmp_mat +! + TYPE zwsmp_mat + INTEGER :: matid=-1 + INTEGER :: rank=0, nnz + INTEGER :: nterms, kmat, nrhs + INTEGER :: comm + INTEGER :: istart, iend, rank_loc + INTEGER :: nnz_start, nnz_end, nnz_loc + LOGICAL :: nlherm + LOGICAL :: nlsym + LOGICAL :: nlpos + LOGICAL :: nlforce_zero + TYPE(zspmat), POINTER :: mat => NULL() + INTEGER, POINTER :: cols(:) => NULL() + INTEGER, POINTER :: irow(:) => NULL() + INTEGER, POINTER :: perm(:) => NULL() + INTEGER, POINTER :: invp(:) => NULL() + INTEGER, POINTER :: mrp(:) => NULL() + DOUBLE COMPLEX, POINTER :: diag(:) => NULL() + DOUBLE COMPLEX, POINTER :: val(:) => NULL() + DOUBLE COMPLEX, POINTER :: aux(:) => NULL() + TYPE(wsmp_param) :: p + END TYPE zwsmp_mat +! + INTERFACE init + MODULE PROCEDURE init_wsmp_mat, init_zwsmp_mat + END INTERFACE init +! + INTERFACE check_mat + MODULE PROCEDURE check_wsmp_mat, check_zwsmp_mat + END INTERFACE check_mat +! + INTERFACE clear_mat + MODULE PROCEDURE clear_wsmp_mat, clear_zwsmp_mat + END INTERFACE clear_mat +! + INTERFACE updtmat + MODULE PROCEDURE updt_wsmp_mat, updt_zwsmp_mat + END INTERFACE updtmat +! + INTERFACE putele + MODULE PROCEDURE putele_wsmp_mat, putele_zwsmp_mat + END INTERFACE putele +! + INTERFACE getele + MODULE PROCEDURE getele_wsmp_mat, getele_zwsmp_mat + END INTERFACE getele +! + INTERFACE putrow + MODULE PROCEDURE putrow_wsmp_mat, putrow_zwsmp_mat + END INTERFACE putrow +! + INTERFACE getrow + MODULE PROCEDURE getrow_wsmp_mat, getrow_zwsmp_mat + END INTERFACE getrow +! + INTERFACE putcol + MODULE PROCEDURE putcol_wsmp_mat, putcol_zwsmp_mat + END INTERFACE putcol +! + INTERFACE getcol + MODULE PROCEDURE getcol_wsmp_mat, getcol_zwsmp_mat + END INTERFACE getcol +! + INTERFACE get_count + MODULE PROCEDURE get_count_wsmp_mat, get_count_zwsmp_mat + END INTERFACE get_count +! + INTERFACE to_mat + MODULE PROCEDURE to_wsmp_mat, to_zwsmp_mat + END INTERFACE to_mat +! + INTERFACE reord_mat + MODULE PROCEDURE reord_wsmp_mat, reord_zwsmp_mat + END INTERFACE reord_mat +! + INTERFACE numfact + MODULE PROCEDURE numfact_wsmp_mat, numfact_zwsmp_mat + END INTERFACE numfact +! + INTERFACE factor + MODULE PROCEDURE factor_wsmp_mat, factor_zwsmp_mat + END INTERFACE factor +! + INTERFACE bsolve + MODULE PROCEDURE bsolve_wsmp_mat1, bsolve_wsmp_matn, & + & bsolve_zwsmp_mat1, bsolve_zwsmp_matn + END INTERFACE bsolve +! + INTERFACE vmx + MODULE PROCEDURE vmx_wsmp_mat, vmx_wsmp_matn, & + & vmx_zwsmp_mat, vmx_zwsmp_matn + END INTERFACE vmx +! + INTERFACE destroy + MODULE PROCEDURE destroy_wsmp_mat, destroy_zwsmp_mat + END INTERFACE destroy +! + INTERFACE putmat + MODULE PROCEDURE put_wsmp_mat, put_zwsmp_mat + END INTERFACE putmat +! + INTERFACE getmat + MODULE PROCEDURE get_wsmp_mat, get_zwsmp_mat + END INTERFACE getmat +! + INTERFACE mcopy + MODULE PROCEDURE mcopy_wsmp_mat, mcopy_zwsmp_mat + END INTERFACE mcopy +! + INTERFACE maddto + MODULE PROCEDURE maddto_wsmp_mat, maddto_zwsmp_mat + END INTERFACE maddto +! + INTERFACE psum_mat + MODULE PROCEDURE psum_wsmp_mat, psum_zwsmp_mat + END INTERFACE psum_mat +! + INTERFACE p2p_mat + MODULE PROCEDURE p2p_wsmp_mat, p2p_zwsmp_mat + END INTERFACE p2p_mat +! +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_wsmp_mat(n, nterms, mat, kmat, nlsym, nlpos, & + & nlforce_zero, comm_in) +! +! Initialize an empty sparse wsmp matrix +! + USE pputils2 + INCLUDE 'mpif.h' + INTEGER, INTENT(in) :: n, nterms + TYPE(wsmp_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlpos + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero + INTEGER, OPTIONAL, INTENT(in) :: comm_in +! + INTEGER :: comm, nloc + INTEGER :: info + INTEGER :: idummy = 0 + DOUBLE PRECISION :: dummy = 0.0d0 +! + comm = MPI_COMM_WORLD + IF(PRESENT(comm_in)) comm = comm_in + mat%comm = comm +! +! Store away (valid) current matrix id +! + IF(current_matid .GE. 0) THEN + CALL wstoremat(current_matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i12)') 'INIT: WSTOREMAT failed WITH error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + last_matid = last_matid+1 + mat%matid = last_matid + current_matid = mat%matid +! +! Initialize sparse matrice structure +! + mat%rank = n + mat%nterms = nterms + mat%nnz = 0 + mat%nlsym = .FALSE. + mat%nlpos = .TRUE. + mat%nrhs = 1 + mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val + IF(PRESENT(kmat)) mat%kmat = kmat + IF(PRESENT(nlsym)) mat%nlsym = nlsym + IF(PRESENT(nlpos)) mat%nlpos = nlpos + IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero +! +! Matrix partition +! + CALL dist1d(comm, 1, n, mat%istart, nloc) + mat%iend = mat%istart + nloc - 1 + mat%rank_loc = nloc +! + IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) + ALLOCATE(mat%mat) + CALL init(n, mat%mat, mat%istart, mat%iend) +! +! Fill 'iparm' and 'dparm' with default values +! + mat%p%iparm(1:3) = 0 + IF(mat%nlsym) THEN + CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + IF(mat%nlpos) THEN + mat%p%iparm(31) = 0 + ELSE +!!$ mat%p%iparm(31) = 1 ! LDL^T without pivoting + mat%p%iparm(31) = 2 ! LDL^T with pivoting + END IF + CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i12)') 'INIT: Initialization failed with error', & + & mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! + CALL setup_wsmp(mat%p%iparm, mat%p%dparm) +! + CONTAINS + SUBROUTINE setup_wsmp(iparm, dparm) + INTEGER :: iparm(:) + DOUBLE PRECISION :: dparm(:) + END SUBROUTINE setup_wsmp + END SUBROUTINE init_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_zwsmp_mat(n, nterms, mat, kmat, nlsym, nlherm, & + & nlpos, nlforce_zero, comm_in) +! +! Initialize an empty sparse wsmp matrix +! + USE pputils2 + INCLUDE 'mpif.h' + INTEGER, INTENT(in) :: n, nterms + TYPE(zwsmp_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlherm + LOGICAL, OPTIONAL, INTENT(in) :: nlpos + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero + INTEGER, OPTIONAL, INTENT(in) :: comm_in +! + INTEGER :: comm, nloc + INTEGER :: info + INTEGER :: idummy = 0 + DOUBLE COMPLEX :: dummy = 0.0d0 +! + comm = MPI_COMM_WORLD + IF(PRESENT(comm_in)) comm = comm_in + mat%comm = comm +! +! Store away (valid) current matrix id +! + IF(current_matid .GE. 0) THEN + CALL wstoremat(current_matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i12)') 'INIT: WSTOREMAT failed WITH error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + last_matid = last_matid+1 + mat%matid = last_matid + current_matid = mat%matid +! +! Initialize sparse matrice structure +! + mat%rank = n + mat%nterms = nterms + mat%nnz = 0 + mat%nlsym = .FALSE. + mat%nlherm = .FALSE. + mat%nlpos = .TRUE. + mat%nrhs = 1 + mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val + IF(PRESENT(kmat)) mat%kmat = kmat + IF(PRESENT(nlsym)) mat%nlsym = nlsym + IF(PRESENT(nlherm)) mat%nlherm = nlherm + IF(PRESENT(nlpos)) mat%nlpos = nlpos + IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero +! +! Matrix partition +! + CALL dist1d(comm, 1, n, mat%istart, nloc) + mat%iend = mat%istart + nloc - 1 + mat%rank_loc = nloc +! + IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) + ALLOCATE(mat%mat) + CALL init(n, mat%mat, mat%istart, mat%iend) +! +! Fill 'iparm' and 'dparm' with default values +! + mat%p%iparm(1:3) = 0 + IF(mat%nlherm .OR. mat%nlsym) THEN + CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + IF(mat%nlherm) THEN + IF(mat%nlpos) THEN + mat%p%iparm(31) = 0 ! hermitian, positive definite + ELSE + mat%p%iparm(31) = 2 ! hermitian, no-definite, LDL^T with pivoting + END IF + ELSE + mat%p%iparm(31) = 3 ! non-hermitian, symmetric + END IF + ELSE + CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i12)') 'INIT: Initialization failed with error', & + & mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +!!$ WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm +!!$ WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm +! + CALL setup_wsmp(mat%p%iparm, mat%p%dparm) +! + CONTAINS + SUBROUTINE setup_wsmp(iparm, dparm) + INTEGER :: iparm(:) + DOUBLE PRECISION :: dparm(:) + END SUBROUTINE setup_wsmp + END SUBROUTINE init_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE check_wsmp_mat(mat) +! +! Check matrice id and recall the matrice if not current +! + TYPE(wsmp_mat) :: mat + INTEGER :: info +! + IF(.NOT.mat%nlsym) THEN + IF( mat%matid.NE.current_matid ) THEN + WRITE(*,'(a)') "Processing multi matrices is not possible "// & + & "for non-symetric matrices." + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + ELSE + RETURN + END IF + END IF +! + IF( mat%matid.NE.current_matid ) THEN + IF(current_matid .GE. 0) THEN + CALL wstoremat(current_matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i3,a,i12)') 'Store matrix', current_matid, & + & ' failed with error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + CALL wrecallmat(mat%matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i3,a,i12)') 'Recall matrix', mat%matid, & + & ' failed with error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + current_matid = mat%matid + END IF + END SUBROUTINE check_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE check_zwsmp_mat(mat) +! +! Check matrice id and recall the matrice if not current +! + TYPE(zwsmp_mat) :: mat + INTEGER :: info +! + IF(.NOT.mat%nlsym .AND. .NOT.mat%nlherm ) THEN + IF( mat%matid.NE.current_matid ) THEN + WRITE(*,'(a)') "Processing multi matrices is not possible "// & + & "for non-symetric/non-hermitian matrices." + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + ELSE + RETURN + END IF + END IF +! + IF( mat%matid.NE.current_matid ) THEN + IF(current_matid .GE. 0) THEN + CALL wstoremat(current_matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i3,a,i12)') 'Store matrix', current_matid, & + & ' failed with error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + CALL wrecallmat(mat%matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i3,a,i12)') 'Recall matrix', mat%matid, & + & ' failed with error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + current_matid = mat%matid + END IF + END SUBROUTINE check_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE clear_wsmp_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(wsmp_mat) :: mat +! + mat%val = 0.0d0 + END SUBROUTINE clear_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE clear_zwsmp_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(zwsmp_mat) :: mat +! + mat%val = (0.0d0, 0.0d0) + END SUBROUTINE clear_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_wsmp_mat(mat, i, j, val) +! +! Update element Aij of wsmp matrix +! + TYPE(wsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: k, s, e +! + IF(mat%nlsym) THEN ! Store only upper part for symmetric matrices + IF(i.GT.j) RETURN + END IF + IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + WRITE(*,'(a,2i6)') ' istart, iend ', mat%istart, mat%iend + STOP '*** Abnormal EXIT in MODULE mumps_mod ***' + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL updtmat(mat%mat, i, j, val) + ELSE + s = mat%irow(i) + e = mat%irow(i+1)-1 + k = isearch(mat%cols(s:e), j) + IF( k.GE.0 ) THEN + mat%val(s+k) = mat%val(s+k)+val + ELSE + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + END SUBROUTINE updt_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_zwsmp_mat(mat, i, j, val) +! +! Update element Aij of wsmp matrix +! + TYPE(zwsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + INTEGER :: k, s, e +! + IF(mat%nlherm .OR. mat%nlsym) THEN ! Store only upper part + IF(i.GT.j) RETURN + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL updtmat(mat%mat, i, j, val) + ELSE + s = mat%irow(i) + e = mat%irow(i+1)-1 + k = isearch(mat%cols(s:e), j) + IF( k.GE.0 ) THEN + IF(mat%nlherm) THEN + mat%val(s+k) = mat%val(s+k)+CONJG(val) ! CSR-UT* = CSC-LT + ELSE + mat%val(s+k) = mat%val(s+k)+val + END IF + ELSE + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + END SUBROUTINE updt_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_wsmp_mat(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(wsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: iput, jput + INTEGER :: k, s, e +! + iput = i + jput = j + IF(mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iput = j + jput = i + END IF + END IF +! +! Do nothing if outside + IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN +! + IF(ASSOCIATED(mat%mat)) THEN + CALL putele(mat%mat, iput, jput, val, & + & nlforce_zero=mat%nlforce_zero) + ELSE + s = mat%irow(iput) + e = mat%irow(iput+1) - 1 + k = isearch(mat%cols(s:e), jput) + IF( k.GE.0 ) THEN + mat%val(s+k) = val + ELSE + IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val + WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + END IF + END SUBROUTINE putele_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_zwsmp_mat(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(zwsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + DOUBLE COMPLEX :: valput + INTEGER :: iput, jput + INTEGER :: k, s, e +! + iput = i + jput = j + valput = val + IF(mat%nlsym .OR. mat%nlherm) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iput = j + jput = i + IF(mat%nlherm) THEN + valput = CONJG(val) + ELSE + valput = val + END IF + END IF + END IF +! +! Do nothing if outside + IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN +! + IF(ASSOCIATED(mat%mat)) THEN + CALL putele(mat%mat, iput, jput, valput, & + & nlforce_zero=mat%nlforce_zero) + ELSE + s = mat%irow(iput) + e = mat%irow(iput+1) - 1 + k = isearch(mat%cols(s:e), jput) + IF( k.GE.0 ) THEN + IF(mat%nlherm) THEN + mat%val(s+k) = CONJG(valput) ! CSR-UT* = CSC-LT + ELSE + mat%val(s+k) = valput + END IF + ELSE + IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val + WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + END IF + END SUBROUTINE putele_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_wsmp_mat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(wsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(out) :: val + INTEGER :: iget, jget + INTEGER :: k, s, e +! + iget = i + jget = j + IF(mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iget = j + jget = i + END IF + END IF +! + val = 0.0d0 ! Assume zero val if outside + IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN +! + IF(ASSOCIATED(mat%mat)) THEN + CALL getele(mat%mat, iget, jget, val) + ELSE + s = mat%irow(iget) + e = mat%irow(iget+1) - 1 + k = isearch(mat%cols(s:e), jget) + IF( k.GE.0 ) THEN + val =mat%val(s+k) + ELSE + val = 0.0d0 ! Assume zero val if not found + END IF + END IF + END SUBROUTINE getele_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_zwsmp_mat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(zwsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(out) :: val + DOUBLE COMPLEX :: valget + INTEGER :: iget, jget + INTEGER :: k, s, e +! + iget = i + jget = j + IF(mat%nlherm .OR. mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iget = j + jget = i + END IF + END IF +! + val = (0.0d0, 0.0d0) ! Assume zero val if outside + IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN +! + IF(ASSOCIATED(mat%mat)) THEN + CALL getele(mat%mat, iget, jget, valget) + ELSE + s = mat%irow(iget) + e = mat%irow(iget+1) - 1 + k = isearch(mat%cols(s:e), jget) + IF( k.GE.0 ) THEN + IF(mat%nlherm) THEN + valget = CONJG(mat%val(s+k)) ! CSR-UT* = CSC-LT + ELSE + valget = mat%val(s+k) + END IF + ELSE + valget = (0.0d0,0.0d0) ! Assume zero val if not found + END IF + END IF + val = valget + IF( i.GT.j ) THEN + IF(mat%nlherm) THEN + val = CONJG(valget) + END IF + END IF + END SUBROUTINE getele_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_wsmp_mat(amat, i, arr) +! +! Put a row into sparse matrix +! + TYPE(wsmp_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL putele(amat, i, j, arr(j)) + END DO + END SUBROUTINE putrow_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_zwsmp_mat(amat, i, arr) +! +! Put a row into sparse matrix +! + TYPE(zwsmp_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL putele(amat, i, j, arr(j)) + END DO + END SUBROUTINE putrow_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_wsmp_mat(amat, i, arr) +! +! Get a row from sparse matrix +! + TYPE(wsmp_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL getele(amat, i, j, arr(j)) + END DO + END SUBROUTINE getrow_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_zwsmp_mat(amat, i, arr) +! +! Get a row from sparse matrix +! + TYPE(zwsmp_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL getele(amat, i, j, arr(j)) + END DO + END SUBROUTINE getrow_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_wsmp_mat(amat, j, arr) +! +! Put a column into sparse matrix +! + TYPE(wsmp_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: i +! + DO i=amat%istart,amat%iend + CALL putele(amat, i, j, arr(i)) + END DO + END SUBROUTINE putcol_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_zwsmp_mat(amat, j, arr) +! +! Put a column into sparse matrix +! + TYPE(zwsmp_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER :: i +! + DO i=amat%istart,amat%iend + CALL putele(amat, i, j, arr(i)) + END DO + END SUBROUTINE putcol_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_wsmp_mat(amat, j, arr) +! +! Get a column from sparse matrix +! + TYPE(wsmp_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: i +! + DO i=amat%istart,amat%iend + CALL getele(amat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_zwsmp_mat(amat, j, arr) +! +! Get a column from sparse matrix +! + TYPE(zwsmp_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER :: i +! + DO i=amat%istart,amat%iend + CALL getele(amat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_wsmp_mat(mat, nnz) +! +! Number of non-zeros in sparse matrix +! + TYPE(wsmp_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) + INTEGER :: i +! + IF(ASSOCIATED(mat%mat)) THEN + get_count_wsmp_mat = get_count(mat%mat, nnz) + ELSE + get_count_wsmp_mat = mat%nnz + IF(PRESENT(nnz)) THEN + DO i=mat%istart,mat%iend + nnz(i) = mat%irow(i+1)-mat%irow(i) + END DO + END IF + END IF + END FUNCTION get_count_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_zwsmp_mat(mat, nnz) +! +! Number of non-zeros in sparse matrix +! + TYPE(zwsmp_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) + INTEGER :: i +! + IF(ASSOCIATED(mat%mat)) THEN + get_count_zwsmp_mat = get_count(mat%mat, nnz) + ELSE + get_count_zwsmp_mat = mat%nnz + IF(PRESENT(nnz)) THEN + DO i=mat%istart,mat%iend + nnz(i) = mat%irow(i+1)-mat%irow(i) + END DO + END IF + END IF + END FUNCTION get_count_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE to_wsmp_mat(mat, nlkeep) +! +! Convert linked list spmat to wsmp matrice structure +! + INCLUDE 'mpif.h' + TYPE(wsmp_mat) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nlkeep + INTEGER :: i, nnz, rank, s, e + INTEGER :: comm, ierr, nnz_loc, rank_loc + LOGICAL :: nlclean +! + nlclean = .TRUE. + IF(PRESENT(nlkeep)) THEN + nlclean = .NOT. nlkeep + END IF +! + comm = mat%comm +! +! Allocate the WSMP matrix structure +! + rank = mat%rank + rank_loc = mat%rank +! + nnz_loc = get_count(mat) + mat%nnz_start = 0 + CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) + mat%nnz_start = mat%nnz_start + 1 + mat%nnz_end = mat%nnz_start + nnz_loc - 1 + mat%nnz_loc = nnz_loc + CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) +! +! Allocate LOCAL irow, cols and val + IF(mat%nlsym) THEN + ALLOCATE(mat%perm(rank)) + ALLOCATE(mat%invp(rank)) + END IF + ALLOCATE(mat%val(nnz_loc)) + ALLOCATE(mat%cols(nnz_loc)) + ALLOCATE(mat%irow(mat%istart:mat%iend+1)) +! +! Fill WSMP structure and deallocate the sparse rows +! + mat%irow(mat%istart) = 1 + DO i=mat%istart,mat%iend + mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz + s = mat%irow(i) + e = mat%irow(i+1)+1 + CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) + IF(nlclean) CALL destroy(mat%mat%row(i)) + END DO + IF(nlclean) DEALLOCATE(mat%mat) + END SUBROUTINE to_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE to_zwsmp_mat(mat, nlkeep) +! +! Convert linked list spmat to wsmp matrice structure +! + INCLUDE 'mpif.h' + TYPE(zwsmp_mat) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nlkeep + INTEGER :: i, nnz, rank, s, e + INTEGER :: comm, ierr, nnz_loc, rank_loc + LOGICAL :: nlclean +! + nlclean = .TRUE. + IF(PRESENT(nlkeep)) THEN + nlclean = .NOT. nlkeep + END IF +! + comm = mat%comm +! +! Allocate the WSMP matrix structure +! + rank = mat%rank + rank_loc = mat%rank +! + nnz_loc = get_count(mat) + mat%nnz_start = 0 + CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr) + mat%nnz_start = mat%nnz_start + 1 + mat%nnz_end = mat%nnz_start + nnz_loc - 1 + mat%nnz_loc = nnz_loc + CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr) +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) +! +! Allocate LOCAL irow, cols and val + IF(mat%nlsym) THEN + ALLOCATE(mat%perm(rank)) + ALLOCATE(mat%invp(rank)) + END IF + ALLOCATE(mat%val(nnz_loc)) + ALLOCATE(mat%cols(nnz_loc)) + ALLOCATE(mat%irow(mat%istart:mat%iend+1)) +! +! Fill WSMP structure and deallocate the sparse rows +! + mat%irow(mat%istart) = 1 + DO i=mat%istart,mat%iend + mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz + s = mat%irow(i) + e = mat%irow(i+1)+1 + CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) + IF(nlclean) CALL destroy(mat%mat%row(i)) + END DO + IF(mat%nlherm) THEN + mat%val(:) = CONJG(mat%val(:)) ! CSR-UT* = CSC-LT + END IF + IF(nlclean) DEALLOCATE(mat%mat) + END SUBROUTINE to_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE reord_wsmp_mat(mat) +! +! Reordering and symbolic factorization +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym) THEN + mat%p%iparm(2) = 1 ! Ordering + mat%p%iparm(3) = 2 ! Symbolic factorization + CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + mat%p%iparm(2) = 1 ! Analysis and reordering + mat%p%iparm(3) = 1 + CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i12)') 'REORD: Reordering failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END SUBROUTINE reord_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE reord_zwsmp_mat(mat) +! +! Reordering and symbolic factorization +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym .OR. mat%nlherm) THEN + mat%p%iparm(2) = 1 ! Ordering + mat%p%iparm(3) = 2 ! Symbolic factorization + CALL zssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) +!!$ WRITE(*,'(a,i3/(10i8))') 'REORD: matrice', mat%matid, mat%perm + ELSE + mat%p%iparm(2) = 1 ! Analysis and reordering + mat%p%iparm(3) = 1 + CALL zgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i12)') 'REORD: Reordering failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END SUBROUTINE reord_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE numfact_wsmp_mat(mat) +! +! Numerical factorization +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym) THEN + mat%p%iparm(2) = 3 ! Numerical factorization + mat%p%iparm(3) = 3 + CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + mat%p%iparm(2) = 2 ! Factorization + mat%p%iparm(3) = 2 + CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i12)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END SUBROUTINE numfact_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE numfact_zwsmp_mat(mat) +! +! Numerical factorization +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym .OR. mat%nlherm) THEN + mat%p%iparm(2) = 3 ! Numerical factorization + mat%p%iparm(3) = 3 + CALL zssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + mat%p%iparm(2) = 2 ! Factorization + mat%p%iparm(3) = 2 + CALL zgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i12)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END SUBROUTINE numfact_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE factor_wsmp_mat(mat, nlreord) +! +! Factor (create +reorder + factor) a wsmp_mat matrix +! + TYPE(wsmp_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlreord + LOGICAL :: mlreord +!---------------------------------------------------------------------- +! 1.0 Creation from the sparse rows +! + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF +!---------------------------------------------------------------------- +! 2.0 Reordering and symbolic factorization phase +! + mlreord = .TRUE. + IF(PRESENT(nlreord)) mlreord = nlreord + IF(mlreord) THEN + CALL reord_mat(mat) + END IF +!---------------------------------------------------------------------- +! 3.0 Numerical factorization +! + CALL numfact(mat) + END SUBROUTINE factor_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE factor_zwsmp_mat(mat, nlreord) +! +! Factor (create +reorder + factor) a wsmp_mat matrix +! + TYPE(zwsmp_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlreord + LOGICAL :: mlreord +!---------------------------------------------------------------------- +! 1.0 Creation from the sparse rows +! + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF +!---------------------------------------------------------------------- +! 2.0 Reordering and symbolic factorization phase +! + mlreord = .TRUE. + IF(PRESENT(nlreord)) mlreord = nlreord + IF(mlreord) THEN + CALL reord_mat(mat) + END IF +!---------------------------------------------------------------------- +! 3.0 Numerical factorization +! + CALL numfact(mat) + END SUBROUTINE factor_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref) +! +! Backsolve, using Wsmp +! + INCLUDE 'mpif.h' + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION :: rhs(:) + DOUBLE PRECISION, OPTIONAL :: sol(:) + INTEGER, OPTIONAL :: nref +! + DOUBLE PRECISION :: sol_loc(mat%rank_loc) + INTEGER :: nloc, me, nprocs, ierr, i + INTEGER, ALLOCATABLE :: nlocs(:), displs(:) + DOUBLE PRECISION :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym) THEN + mat%p%iparm(2) = 4 ! Back substitution + mat%p%iparm(3) = 4 + ELSE + mat%p%iparm(2) = 3 ! Back substitution + mat%p%iparm(3) = 3 + END IF + mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) THEN + IF(mat%nlsym) THEN + mat%p%iparm(3) = 5 + ELSE + mat%p%iparm(3) = 4 + END IF + mat%p%iparm(6) = nref + END IF + mat%nrhs = 1 +! +! Extract local rhs from global rhs +! + sol_loc = rhs(mat%istart:mat%iend) +! + IF(mat%nlsym) THEN + CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, & + & mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF +! + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! +! Allgatherv local sol +! + CALL mpi_comm_rank(mat%comm, me, ierr) + CALL mpi_comm_size(mat%comm, nprocs, ierr) +! + ALLOCATE(displs(0:nprocs)) + ALLOCATE(nlocs(0:nprocs-1)) +! + nloc = mat%rank_loc + CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, & + & mat%comm, ierr) +! + displs(0) = 0 + DO i=0,nprocs-1 + displs(i+1) = displs(i)+nlocs(i) + END DO +! + IF(PRESENT(sol)) THEN + CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_PRECISION, & + & sol, nlocs, displs, MPI_DOUBLE_PRECISION, & + & mat%comm, ierr) + ELSE + CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_PRECISION, & + & rhs, nlocs, displs, MPI_DOUBLE_PRECISION, & + & mat%comm, ierr) + END IF +! + DEALLOCATE(nlocs) + DEALLOCATE(displs) +! + END SUBROUTINE bsolve_wsmp_mat1 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_zwsmp_mat1(mat, rhs, sol, nref) +! +! Backsolve, using Wsmp +! + INCLUDE 'mpif.h' + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX :: rhs(:) + DOUBLE COMPLEX, OPTIONAL :: sol(:) + INTEGER, OPTIONAL :: nref +! + DOUBLE COMPLEX :: sol_loc(mat%rank_loc) + INTEGER :: nloc, me, nprocs, ierr, i + INTEGER, ALLOCATABLE :: nlocs(:), displs(:) + DOUBLE COMPLEX :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym .OR. mat%nlherm) THEN + mat%p%iparm(2) = 4 ! Back substitution + mat%p%iparm(3) = 4 + ELSE + mat%p%iparm(2) = 3 ! Back substitution + mat%p%iparm(3) = 3 + END IF + mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) THEN + IF(mat%nlsym .OR. mat%nlherm) THEN + mat%p%iparm(3) = 5 + ELSE + mat%p%iparm(3) = 4 + END IF + mat%p%iparm(6) = nref + END IF + mat%nrhs = 1 +! +! Extract local rhs from global rhs +! + sol_loc = rhs(mat%istart:mat%iend) +! + IF(mat%nlsym .OR. mat%nlherm) THEN + CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, & + & mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF +! + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! +! Allgatherv local sol +! + CALL mpi_comm_rank(mat%comm, me, ierr) + CALL mpi_comm_size(mat%comm, nprocs, ierr) +! + ALLOCATE(displs(0:nprocs)) + ALLOCATE(nlocs(0:nprocs-1)) +! + nloc = mat%rank_loc + CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, & + & mat%comm, ierr) +! + displs(0) = 0 + DO i=0,nprocs-1 + displs(i+1) = displs(i)+nlocs(i) + END DO +! + IF(PRESENT(sol)) THEN + CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_COMPLEX, & + & sol, nlocs, displs, MPI_DOUBLE_COMPLEX, & + & mat%comm, ierr) + ELSE + CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_COMPLEX, & + & rhs, nlocs, displs, MPI_DOUBLE_COMPLEX, & + & mat%comm, ierr) + END IF +! + DEALLOCATE(nlocs) + DEALLOCATE(displs) +! + END SUBROUTINE bsolve_zwsmp_mat1 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_wsmp_matn(mat, rhs, sol, nref) +! +! Backsolve, using Wsmp, multiple RHS +! + INCLUDE 'mpif.h' + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION :: rhs(:,:) + DOUBLE PRECISION, OPTIONAL :: sol(:,:) + INTEGER, OPTIONAL :: nref +! + DOUBLE PRECISION :: sol_loc(mat%rank_loc,SIZE(rhs,2)) + INTEGER :: nloc, me, nprocs, ierr, i + INTEGER, ALLOCATABLE :: nlocs(:), displs(:) + DOUBLE PRECISION :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym) THEN + mat%p%iparm(2) = 4 ! Back substitution + mat%p%iparm(3) = 4 + ELSE + mat%p%iparm(2) = 3 ! Back substitution + mat%p%iparm(3) = 3 + END IF + mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) THEN + IF(mat%nlsym) THEN + mat%p%iparm(3) = 5 + ELSE + mat%p%iparm(3) = 4 + END IF + mat%p%iparm(6) = nref + END IF + mat%nrhs = SIZE(rhs,2) +! +! Extract local rhs from global rhs +! + sol_loc(:,:) = rhs(mat%istart:mat%iend,:) +! + IF(mat%nlsym) THEN + CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, & + & mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF +! + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! +! Allgatherv local sol +! + CALL mpi_comm_rank(mat%comm, me, ierr) + CALL mpi_comm_size(mat%comm, nprocs, ierr) +! + ALLOCATE(displs(0:nprocs)) + ALLOCATE(nlocs(0:nprocs-1)) +! + nloc = mat%rank_loc + CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, & + & mat%comm, ierr) +! + displs(0) = 0 + DO i=0,nprocs-1 + displs(i+1) = displs(i)+nlocs(i) + END DO +! + DO i=1,mat%nrhs + IF(PRESENT(sol)) THEN + CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_PRECISION, & + & sol(1,i), nlocs, displs, MPI_DOUBLE_PRECISION, & + & mat%comm, ierr) + ELSE + CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_PRECISION, & + & rhs(1,i), nlocs, displs, MPI_DOUBLE_PRECISION, & + & mat%comm, ierr) + END IF + END DO +! + DEALLOCATE(nlocs) + DEALLOCATE(displs) +! + END SUBROUTINE bsolve_wsmp_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_zwsmp_matn(mat, rhs, sol, nref) +! +! Backsolve, using Wsmp, multiple RHS +! + INCLUDE 'mpif.h' + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX :: rhs(:,:) + DOUBLE COMPLEX, OPTIONAL :: sol(:,:) + INTEGER, OPTIONAL :: nref +! + DOUBLE COMPLEX :: sol_loc(mat%rank_loc,SIZE(rhs,2)) + INTEGER :: nloc, me, nprocs, ierr, i + INTEGER, ALLOCATABLE :: nlocs(:), displs(:) + DOUBLE COMPLEX :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym .or. mat%nlherm) THEN + mat%p%iparm(2) = 4 ! Back substitution + mat%p%iparm(3) = 4 + ELSE + mat%p%iparm(2) = 3 ! Back substitution + mat%p%iparm(3) = 3 + END IF + mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) THEN + IF(mat%nlsym .OR. mat%nlherm) THEN + mat%p%iparm(3) = 5 + ELSE + mat%p%iparm(3) = 4 + END IF + mat%p%iparm(6) = nref + END IF + mat%nrhs = SIZE(rhs,2) +! +! Extract local rhs from global rhs +! + sol_loc(:,:) = rhs(mat%istart:mat%iend,:) +! + IF(mat%nlsym) THEN + CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, & + & mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF +! + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! +! Allgatherv local sol +! + CALL mpi_comm_rank(mat%comm, me, ierr) + CALL mpi_comm_size(mat%comm, nprocs, ierr) +! + ALLOCATE(displs(0:nprocs)) + ALLOCATE(nlocs(0:nprocs-1)) +! + nloc = mat%rank_loc + CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, & + & mat%comm, ierr) +! + displs(0) = 0 + DO i=0,nprocs-1 + displs(i+1) = displs(i)+nlocs(i) + END DO +! + DO i=1,mat%nrhs + IF(PRESENT(sol)) THEN + CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_COMPLEX, & + & sol(1,i), nlocs, displs, MPI_DOUBLE_COMPLEX, & + & mat%comm, ierr) + ELSE + CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_COMPLEX, & + & rhs(1,i), nlocs, displs, MPI_DOUBLE_COMPLEX, & + & mat%comm, ierr) + END IF + END DO +! + DEALLOCATE(nlocs) + DEALLOCATE(displs) +! + END SUBROUTINE bsolve_zwsmp_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_wsmp_mat(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:) + DOUBLE PRECISION :: yarr(SIZE(xarr)) + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + CHARACTER(len=6) :: matdescra + INTEGER :: n, i, j +! + n = mat%rank +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & beta, yarr) +#else + yarr = 0.0d0 + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + mat%val(j)*xarr(i) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_zwsmp_mat(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: xarr(:) + DOUBLE COMPLEX :: yarr(SIZE(xarr)) + DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) + INTEGER :: n, i, j + CHARACTER(len=6) :: matdescra + CHARACTER(len=1) :: transa +! + n = mat%rank +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE IF(mat%nlherm) THEN + matdescra = 'hun' + ELSE + matdescra = 'g' + END IF + transa='N' + IF(mat%nlherm) THEN + transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT + END IF + CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & beta, yarr) +#else + yarr = (0.0d0,0.0d0) + DO i=1,n + IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i) = yarr(i) + CONJG(mat%val(j))*xarr(mat%cols(j)) + END DO + ELSE + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) + END DO + END IF + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + mat%val(j)*xarr(i) + END DO + ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + mat%val(j)*xarr(i) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_wsmp_matn(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:,:) + DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + INTEGER :: n, nrhs, i, j + CHARACTER(len=6) :: matdescra +! + n = mat%rank + nrhs = SIZE(xarr,2) +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,& + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & n, beta, yarr, n) +#else + yarr = 0.0d0 + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + mat%val(j)*xarr(i,:) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_wsmp_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_zwsmp_matn(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: xarr(:,:) + DOUBLE COMPLEX :: yarr(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) + INTEGER :: n, nrhs, i, j + CHARACTER(len=6) :: matdescra + CHARACTER(len=1) :: transa +! + n = mat%rank + nrhs = SIZE(xarr,2) +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE IF(mat%nlherm) THEN + matdescra = 'hun' + ELSE + matdescra = 'g' + END IF + transa='N' + IF(mat%nlherm) THEN + transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT + END IF +! + CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, n, & + & beta, yarr, n) +#else + yarr = (0.0d0,0.0d0) + DO i=1,n + IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i,:) = yarr(i,:) + CONJG(mat%val(j))*xarr(mat%cols(j),:) + END DO + ELSE + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) + END DO + END IF + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + mat%val(j)*xarr(i,:) + END DO + ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + mat%val(j)*xarr(i,:) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_zwsmp_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_wsmp_mat(mat) +! +! Deallocate the sparse matrix mat +! + TYPE(wsmp_mat) :: mat +! + IF(ASSOCIATED(mat%mat)) THEN + CALL destroy(mat%mat) + DEALLOCATE(mat%mat) + END IF +! +! Release memory for factors for symmetric matrix + IF(mat%nlsym) THEN + CALL check_mat(mat) + CALL wsffree + END IF +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) + IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp) + IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux) + END SUBROUTINE destroy_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_zwsmp_mat(mat) +! +! Deallocate the sparse matrix mat +! + TYPE(zwsmp_mat) :: mat +! + IF(ASSOCIATED(mat%mat)) THEN + CALL destroy(mat%mat) + DEALLOCATE(mat%mat) + END IF +! +! Release memory for factors for symmetric/hermitian matrix + IF(mat%nlsym .OR. mat%nlherm) THEN + CALL check_mat(mat) + CALL wsffree + END IF +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) + IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp) + IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux) + END SUBROUTINE destroy_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE put_wsmp_mat(fid, label, mat, str) +! +! Write matrix to hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(wsmp_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'NNZ', mat%nnz) + CALL attach(fid, label, 'NLSYM', mat%nlsym) + CALL attach(fid, label, 'NLPOS', mat%nlpos) + CALL putarr(fid, TRIM(label)//'/irow', mat%irow) + CALL putarr(fid, TRIM(label)//'/cols', mat%cols) + CALL putarr(fid, TRIM(label)//'/val', mat%val) +! + CALL creatg(fid, TRIM(label)//'/p') + CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) + END SUBROUTINE put_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE put_zwsmp_mat(fid, label, mat, str) +! +! Write matrix to hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(zwsmp_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'NNZ', mat%nnz) + CALL attach(fid, label, 'NLSYM', mat%nlsym) + CALL attach(fid, label, 'NLPOS', mat%nlpos) + CALL attach(fid, label, 'NLHERM', mat%nlherm) + CALL putarr(fid, TRIM(label)//'/irow', mat%irow) + CALL putarr(fid, TRIM(label)//'/cols', mat%cols) + CALL putarr(fid, TRIM(label)//'/val', mat%val) +! + CALL creatg(fid, TRIM(label)//'/p') + CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) + END SUBROUTINE put_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE get_wsmp_mat(fid, label, mat) +! +! Read matrix from hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(wsmp_mat) :: mat +! + CALL getatt(fid, label, 'RANK', mat%rank) + CALL getatt(fid, label, 'NNZ', mat%nnz) + CALL getatt(fid, label, 'NLSYM', mat%nlsym) + CALL getatt(fid, label, 'NLPOS', mat%nlpos) + CALL getarr(fid, TRIM(label)//'/irow', mat%irow) + CALL getarr(fid, TRIM(label)//'/cols', mat%cols) + IF(mat%nlsym) THEN + CALL getarr(fid, TRIM(label)//'/perm', mat%perm) + CALL getarr(fid, TRIM(label)//'/invp', mat%invp) + END IF + CALL getarr(fid, TRIM(label)//'/val', mat%val) +! + CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) + END SUBROUTINE get_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE get_zwsmp_mat(fid, label, mat) +! +! Read matrix from hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(zwsmp_mat) :: mat +! + CALL getatt(fid, label, 'RANK', mat%rank) + CALL getatt(fid, label, 'NNZ', mat%nnz) + CALL getatt(fid, label, 'NLSYM', mat%nlsym) + CALL getatt(fid, label, 'NLPOS', mat%nlpos) + CALL getatt(fid, label, 'NLHERM', mat%nlherm) + CALL getarr(fid, TRIM(label)//'/irow', mat%irow) + CALL getarr(fid, TRIM(label)//'/cols', mat%cols) + IF(mat%nlsym) THEN + CALL getarr(fid, TRIM(label)//'/perm', mat%perm) + CALL getarr(fid, TRIM(label)//'/invp', mat%invp) + END IF + CALL getarr(fid, TRIM(label)//'/val', mat%val) +! + CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) + END SUBROUTINE get_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE mcopy_wsmp_mat(mata, matb) +! +! Matrix copy: B = A +! + TYPE(wsmp_mat) :: mata, matb + INTEGER :: n, nnz +! +! Assume that matb was already initialized by init_wsmp_mat. + IF(matb%rank.LE.0) THEN + WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! + IF(ASSOCIATED(matb%mat)) THEN + CALL destroy(matb%mat) + DEALLOCATE(matb%mat) + END IF +! + n = mata%rank + nnz = mata%nnz + matb%rank = n + matb%nnz = nnz + matb%nlsym = mata%nlsym + matb%nlpos = mata%nlpos + matb%nlforce_zero = mata%nlforce_zero +! + IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) + IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) + IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) + IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp) + ALLOCATE(matb%val(nnz)); matb%val = mata%val + ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols + ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow + ALLOCATE(matb%perm(n)) + IF(matb%nlsym) THEN + ALLOCATE(matb%perm(n)) + ALLOCATE(matb%invp(n)) + END IF + END SUBROUTINE mcopy_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE mcopy_zwsmp_mat(mata, matb) +! +! Matrix copy: B = A +! + TYPE(zwsmp_mat) :: mata, matb + INTEGER :: n, nnz +! +! Assume that matb was already initialized by init_wsmp_mat. + IF(matb%rank.LE.0) THEN + WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! + IF(ASSOCIATED(matb%mat)) THEN + CALL destroy(matb%mat) + DEALLOCATE(matb%mat) + END IF +! + n = mata%rank + nnz = mata%nnz + matb%rank = n + matb%nnz = nnz + matb%nlsym = mata%nlsym + matb%nlherm = mata%nlherm + matb%nlpos = mata%nlpos + matb%nlforce_zero = mata%nlforce_zero +! + IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) + IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) + IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) + IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp) + ALLOCATE(matb%val(nnz)); matb%val = mata%val + ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols + ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow + ALLOCATE(matb%perm(n)) + IF(matb%nlsym) THEN + ALLOCATE(matb%perm(n)) + ALLOCATE(matb%invp(n)) + END IF + END SUBROUTINE mcopy_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE maddto_wsmp_mat(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(wsmp_mat) :: mata, matb + DOUBLE PRECISION :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE maddto_zwsmp_mat(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(zwsmp_mat) :: mata, matb + DOUBLE COMPLEX :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE psum_wsmp_mat(mat, comm) +! +! Parallel sum of sparse matrices +! + INCLUDE "mpif.h" +! + TYPE(wsmp_mat) :: mat + INCLUDE 'psum_mat.tpl' + END SUBROUTINE psum_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE psum_zwsmp_mat(mat, comm) +! +! Parallel sum of sparse matrices +! + INCLUDE "mpif.h" +! + TYPE(zwsmp_mat) :: mat + INCLUDE 'psum_mat.tpl' + END SUBROUTINE psum_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE p2p_wsmp_mat(mat, dest, extyp, op, comm) +! +! Point-to-point combine sparse matrix between 2 processes +! + INCLUDE "mpif.h" +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION, ALLOCATABLE :: val(:) + INTEGER :: mpi_type=MPI_DOUBLE_PRECISION +! + INCLUDE "p2p_mat.tpl" + END SUBROUTINE p2p_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE p2p_zwsmp_mat(mat, dest, extyp, op, comm) +! +! Point-to-point combine sparse matrix between 2 processes +! + INCLUDE "mpif.h" +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX, ALLOCATABLE :: val(:) + INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX +! + INCLUDE "p2p_mat.tpl" + END SUBROUTINE p2p_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pwsmp_bsplines diff --git a/src/sparse_mod.f90 b/src/sparse_mod.f90 new file mode 100644 index 0000000..5bc289e --- /dev/null +++ b/src/sparse_mod.f90 @@ -0,0 +1,899 @@ +!> +!> @file sparse_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE sparse +! +! SPARSE: Implement sparse matrix using dynamic linked lists +! as matrix rows. +! +! T.M. Tran, CRPP-EPFL +! October 2010 +! + IMPLICIT NONE +! + TYPE elt + INTEGER :: index=0 + DOUBLE PRECISION :: val=0.0d0 + TYPE(elt), POINTER :: next => NULL() + END TYPE elt +! + TYPE zelt + INTEGER :: index=0 + DOUBLE COMPLEX :: val=(0.0d0, 0.0d0) + TYPE(zelt), POINTER :: next => NULL() + END TYPE zelt +! + TYPE sprow + INTEGER :: nnz=0 ! Number of non zeros in a row + TYPE(elt), POINTER :: row0 => NULL() ! Points to head of a (sparse) row + END TYPE sprow +! + TYPE zsprow + INTEGER :: nnz=0 ! Number of non zeros in a row + TYPE(zelt), POINTER :: row0 => NULL() ! Points to head of a (sparse) row + END TYPE zsprow +! + TYPE spmat + INTEGER :: rank + TYPE(sprow), POINTER :: row(:) => NULL() + END TYPE spmat +! + TYPE zspmat + INTEGER :: rank + TYPE(zsprow), POINTER :: row(:) => NULL() + END TYPE zspmat +! + INTERFACE init + MODULE PROCEDURE init_spmat, init_zspmat + END INTERFACE init +! + INTERFACE updtmat + MODULE PROCEDURE updt_sp, updt_zsp, updt_spmat, updt_zspmat + END INTERFACE updtmat +! + INTERFACE putele + MODULE PROCEDURE putele_sp, putele_zsp, putele_spmat, putele_zspmat + END INTERFACE putele +! + INTERFACE getele + MODULE PROCEDURE getele_sp, getele_zsp, getele_spmat, getele_zspmat + END INTERFACE getele +! + INTERFACE putrow + MODULE PROCEDURE putrow_csr, putrow_full, putrow_spmat, & + & putrow_zcsr, putrow_zfull, putrow_zspmat + END INTERFACE putrow +! + INTERFACE getrow + MODULE PROCEDURE getrow_csr, getrow_full, getrow_spmat, & + & getrow_zcsr, getrow_zfull, getrow_zspmat + END INTERFACE getrow +! + INTERFACE putcol + MODULE PROCEDURE putcol_spmat, putcol_zspmat + END INTERFACE putcol +! + INTERFACE getcol + MODULE PROCEDURE getcol_spmat, getcol_zspmat + END INTERFACE getcol +! + INTERFACE get_count + MODULE PROCEDURE get_count_sp, get_count_spmat, & + & get_count_zsp, get_count_zspmat + END INTERFACE get_count +! + INTERFACE destroy + MODULE PROCEDURE destroy_spmat, destroy_row, destroy_node, & + & destroy_zspmat, destroy_zrow, destroy_znode + END INTERFACE destroy +! +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_spmat(n, mat, istart, iend) +! +! Initial an empty sparse matrix +! + INTEGER, INTENT(in) :: n + INTEGER, INTENT(in), OPTIONAL :: istart, iend + TYPE(spmat) :: mat +! + mat%rank = n + IF(ASSOCIATED(mat%row)) DEALLOCATE(mat%row) + IF(PRESENT(istart)) THEN + ALLOCATE(mat%row(istart:iend)) + ELSE + ALLOCATE(mat%row(n)) + END IF +! + END SUBROUTINE init_spmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_zspmat(n, mat, istart, iend) +! +! Initial an empty sparse matrix +! + INTEGER, INTENT(in) :: n + INTEGER, INTENT(in), OPTIONAL :: istart, iend + TYPE(zspmat) :: mat +! + mat%rank = n + IF(ASSOCIATED(mat%row)) DEALLOCATE(mat%row) + IF(PRESENT(istart)) THEN + ALLOCATE(mat%row(istart:iend)) + ELSE + ALLOCATE(mat%row(n)) + END IF +! + END SUBROUTINE init_zspmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_sp(arow, j, val) +! +! Update element j of row arow or insert it in an increasing "index" +! + TYPE(sprow), TARGET :: arow + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(in) :: val +! + TYPE(elt), TARGET :: pre_root + TYPE(elt), POINTER :: t, p +! + pre_root%next => arow%row0 ! pre_root is linked to the head of the list. + t => pre_root + DO WHILE( ASSOCIATED(t%next) ) + p => t%next + IF( p%index .EQ. j ) THEN + p%val = p%val+val + RETURN + END IF + IF( p%index .GT. j ) EXIT + t => t%next + END DO + ALLOCATE(p) + p = elt(j, val, t%next) + t%next => p +! + arow%nnz = arow%nnz+1 + arow%row0 => pre_root%next ! In case the head is altered + END SUBROUTINE updt_sp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_zsp(arow, j, val) +! +! Update element j of row arow or insert it in an increasing "index" +! + TYPE(zsprow), TARGET :: arow + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(in) :: val +! + TYPE(zelt), TARGET :: pre_root + TYPE(zelt), POINTER :: t, p +! + pre_root%next => arow%row0 ! pre_root is linked to the head of the list. + t => pre_root + DO WHILE( ASSOCIATED(t%next) ) + p => t%next + IF( p%index .EQ. j ) THEN + p%val = p%val+val + RETURN + END IF + IF( p%index .GT. j ) EXIT + t => t%next + END DO + ALLOCATE(p) + p = zelt(j, val, t%next) + t%next => p +! + arow%nnz = arow%nnz+1 + arow%row0 => pre_root%next ! In case the head is altered + END SUBROUTINE updt_zsp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_spmat(mat, i, j, val) +! +! Update element Aij of sparse matrix +! + TYPE(spmat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val +! + CALL updt_sp(mat%row(i), j, val) + END SUBROUTINE updt_spmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_zspmat(mat, i, j, val) +! +! Update element Aij of sparse matrix +! + TYPE(zspmat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val +! + CALL updt_zsp(mat%row(i), j, val) + END SUBROUTINE updt_zspmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_sp(arow, j, val, found) +! +! Get element j from row arow +! + TYPE(sprow), TARGET :: arow + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(out) :: val + LOGICAL, INTENT(out), OPTIONAL :: found +! + TYPE(elt), POINTER :: t + INTEGER :: i +! + val = 0.0d0 + t => arow%row0 ! Start of a row + DO WHILE( ASSOCIATED(t) ) + IF(t%index .EQ. j) THEN + val = t%val + IF(PRESENT(found)) found = .TRUE. + RETURN + END IF + t => t%next + END DO + IF(PRESENT(found)) found = .FALSE. + END SUBROUTINE getele_sp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_zsp(arow, j, val, found) +! +! Get element j from row arow +! + TYPE(zsprow), TARGET :: arow + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(out) :: val + LOGICAL, INTENT(out), OPTIONAL :: found +! + TYPE(zelt), POINTER :: t + INTEGER :: i +! + val = 0.0d0 + t => arow%row0 ! Start of a row + DO WHILE( ASSOCIATED(t) ) + IF(t%index .EQ. j) THEN + val = t%val + IF(PRESENT(found)) found = .TRUE. + RETURN + END IF + t => t%next + END DO + IF(PRESENT(found)) found = .FALSE. + END SUBROUTINE getele_zsp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_spmat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(spmat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(out) :: val +! + CALL getele(mat%row(i), j, val) + END SUBROUTINE getele_spmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_zspmat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(zspmat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(out) :: val +! + CALL getele(mat%row(i), j, val) + END SUBROUTINE getele_zspmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_sp(arow, j, val, nlforce_zero) +! +! Put (overwrite) element j of row arow or insert it in an increasing "index" +! + TYPE(sprow), TARGET :: arow + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(in) :: val + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + TYPE(elt), TARGET :: pre_root + TYPE(elt), POINTER :: t, p + LOGICAL :: rmnode +! + pre_root%next => arow%row0 ! pre_root is linked to the head of the list. + t => pre_root +! +! Remove node which has zero val or not? +! But never create new node with zero val +! + rmnode = .TRUE. + IF(PRESENT(nlforce_zero)) rmnode = .NOT.nlforce_zero +! + DO WHILE( ASSOCIATED(t%next) ) + p => t%next + IF( p%index .EQ. j ) THEN + IF(ABS(val).LE.EPSILON(0.0d0) .AND. rmnode) THEN ! Remove the node for zero val! + t%next => p%next + arow%nnz = arow%nnz-1 + arow%row0 => pre_root%next ! In case the head is altered + DEALLOCATE(p) + ELSE + p%val = val + END IF + RETURN + END IF + IF( p%index .GT. j ) EXIT + t => t%next + END DO +! +! Never create new node with zero val +! + IF(ABS(val).GT.EPSILON(0.0d0)) THEN + ALLOCATE(p) + p = elt(j, val, t%next) + t%next => p + arow%nnz = arow%nnz+1 + arow%row0 => pre_root%next + END IF + END SUBROUTINE putele_sp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_zsp(arow, j, val, nlforce_zero) +! +! Put (overwrite) element j of row arow or insert it in an increasing "index" +! + TYPE(zsprow), TARGET :: arow + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(in) :: val + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + TYPE(zelt), TARGET :: pre_root + TYPE(zelt), POINTER :: t, p + LOGICAL :: rmnode +! + pre_root%next => arow%row0 ! pre_root is linked to the head of the list. + t => pre_root +! +! Remove node which has zero val or not? +! But never create new node with zero val +! + rmnode = .TRUE. + IF(PRESENT(nlforce_zero)) rmnode = .NOT.nlforce_zero +! + DO WHILE( ASSOCIATED(t%next) ) + p => t%next + IF( p%index .EQ. j ) THEN + IF(ABS(val).LE.EPSILON(0.0d0) .AND. rmnode) THEN ! Remove the node for zero val! + t%next => p%next + arow%nnz = arow%nnz-1 + arow%row0 => pre_root%next ! In case the head is altered + DEALLOCATE(p) + ELSE + p%val = val + END IF + RETURN + END IF + IF( p%index .GT. j ) EXIT + t => t%next + END DO +! +! Never create new node with zero val +! + IF(ABS(val).GT.EPSILON(0.0d0)) THEN + ALLOCATE(p) + p = zelt(j, val, t%next) + t%next => p + arow%nnz = arow%nnz+1 + arow%row0 => pre_root%next + END IF + END SUBROUTINE putele_zsp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_spmat(mat, i, j, val, nlforce_zero) +! +! Put element (i,j) of matrix +! + TYPE(spmat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + CALL putele(mat%row(i), j, val, nlforce_zero) + END SUBROUTINE putele_spmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_zspmat(mat, i, j, val, nlforce_zero) +! +! Put element (i,j) of matrix +! + TYPE(zspmat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + CALL putele(mat%row(i), j, val, nlforce_zero) + END SUBROUTINE putele_zspmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_sp(arow) +! +! Number of elements in arow +! + TYPE(sprow), INTENT(in) :: arow + TYPE(elt), POINTER :: t + INTEGER :: i +! + t => arow%row0 ! Start of a row + i = 0 + DO WHILE( ASSOCIATED(t) ) + i=i+1 + t => t%next + END DO + get_count_sp = i + END FUNCTION get_count_sp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_zsp(arow) +! +! Number of elements in arow +! + TYPE(zsprow), INTENT(in) :: arow + TYPE(zelt), POINTER :: t + INTEGER :: i +! + t => arow%row0 ! Start of a row + i = 0 + DO WHILE( ASSOCIATED(t) ) + i=i+1 + t => t%next + END DO + get_count_zsp = i + END FUNCTION get_count_zsp +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_spmat(mat, nnz) +! +! Number of non-zeros in sparse matrix +! + TYPE(spmat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) +! + INTEGER :: i, c(LBOUND(mat%row,1):UBOUND(mat%row,1)) + DO i=LBOUND(mat%row,1),UBOUND(mat%row,1) + c(i) = get_count_sp(mat%row(i)) + END DO + IF(PRESENT(nnz)) nnz = c + get_count_spmat = SUM(c) + END FUNCTION get_count_spmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_zspmat(mat, nnz) +! +! Number of non-zeros in sparse matrix +! + TYPE(zspmat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) +! + INTEGER :: i, c(LBOUND(mat%row,1):UBOUND(mat%row,1)) + DO i=LBOUND(mat%row,1),UBOUND(mat%row,1) + c(i) = get_count_zsp(mat%row(i)) + END DO + IF(PRESENT(nnz)) nnz = c + get_count_zspmat = SUM(c) + END FUNCTION get_count_zspmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_csr(arow, arr, col, count) +! +! Get a row from sparse row arow and put it in a CSR format +! + TYPE(sprow), INTENT(in) :: arow + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER, INTENT(out) :: col(:) + INTEGER, OPTIONAL, INTENT(out) :: count +! + TYPE(elt), POINTER :: t + INTEGER :: i +! + t => arow%row0 ! Start of a row + i = 0 + DO WHILE( ASSOCIATED(t) ) + i=i+1 + col(i) = t%index + arr(i) = t%val + t => t%next + END DO + IF(PRESENT(count)) count = i + END SUBROUTINE getrow_csr +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_zcsr(arow, arr, col, count) +! +! Get a row from sparse row arow and put it in a CSR format +! + TYPE(zsprow), INTENT(in) :: arow + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER, INTENT(out) :: col(:) + INTEGER, OPTIONAL, INTENT(out) :: count +! + TYPE(zelt), POINTER :: t + INTEGER :: i +! + t => arow%row0 ! Start of a row + i = 0 + DO WHILE( ASSOCIATED(t) ) + i=i+1 + col(i) = t%index + arr(i) = t%val + t => t%next + END DO + IF(PRESENT(count)) count = i + END SUBROUTINE getrow_zcsr +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_full(arow, arr, count) +! +! Get a row from sparse row arow and put it in an full row +! + TYPE(sprow), INTENT(in) :: arow + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER, OPTIONAL, INTENT(out) :: count +! + TYPE(elt), POINTER :: t + INTEGER :: n, i, j +! + n = SIZE(arr) + arr = 0.0d0 + t => arow%row0 ! Start of a row + i = 0 + DO WHILE( ASSOCIATED(t) ) + i=i+1 + j = t%index + IF(j.LE.n) THEN + arr(j) = t%val + t => t%next + ELSE + WRITE(*,'(a)') 'GETROW_FULL: size of input ARR too small!' + STOP + END IF + END DO + IF(PRESENT(count)) count = i + END SUBROUTINE getrow_full +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_zfull(arow, arr, count) +! +! Get a row from sparse row arow and put it in an full row +! + TYPE(zsprow), INTENT(in) :: arow + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER, OPTIONAL, INTENT(out) :: count +! + TYPE(zelt), POINTER :: t + INTEGER :: n, i, j +! + n = SIZE(arr) + arr = 0.0d0 + t => arow%row0 ! Start of a row + i = 0 + DO WHILE( ASSOCIATED(t) ) + i=i+1 + j = t%index + IF(j.LE.n) THEN + arr(j) = t%val + t => t%next + ELSE + WRITE(*,'(a)') 'GETROW_FULL: size of input ARR too small!' + STOP + END IF + END DO + IF(PRESENT(count)) count = i + END SUBROUTINE getrow_zfull +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_spmat(mat, i, arr, col) +! +! Get a row from sparse matrix +! + TYPE(spmat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER, INTENT(out), OPTIONAL :: col(:) +! + IF(PRESENT(col)) THEN ! The output row is defined by (col, arr) + CALL getrow_csr(mat%row(i), arr, col) + ELSE + CALL getrow_full(mat%row(i), arr) + END IF + END SUBROUTINE getrow_spmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_zspmat(mat, i, arr, col) +! +! Get a row from sparse matrix +! + TYPE(zspmat), INTENT(in) :: mat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER, INTENT(out), OPTIONAL :: col(:) +! + IF(PRESENT(col)) THEN ! The output row is defined by (col, arr) + CALL getrow_zcsr(mat%row(i), arr, col) + ELSE + CALL getrow_zfull(mat%row(i), arr) + END IF + END SUBROUTINE getrow_zspmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_csr(arow, arr, col, nlforce_zero) +! +! Put a row from sparse row arow and put it in a CSR format +! + TYPE(sprow), INTENT(inout) :: arow + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER, INTENT(in) :: col(:) + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + INTEGER :: n, i +! + n=SIZE(arr) + DO i=1,n + CALL putele(arow, col(i), arr(i), nlforce_zero) + END DO + END SUBROUTINE putrow_csr +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_zcsr(arow, arr, col, nlforce_zero) +! +! Put a row from sparse row arow and put it in a CSR format +! + TYPE(zsprow), INTENT(inout) :: arow + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER, INTENT(in) :: col(:) + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + INTEGER :: n, i +! + n=SIZE(arr) + DO i=1,n + CALL putele(arow, col(i), arr(i), nlforce_zero) + END DO + END SUBROUTINE putrow_zcsr +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_full(arow, arr, nlforce_zero) +! +! Put a row from sparse row arow and put it in a full row +! + TYPE(sprow), INTENT(inout) :: arow + DOUBLE PRECISION, INTENT(in) :: arr(:) + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + INTEGER :: n, i +! + n=SIZE(arr) + DO i=1,n + CALL putele(arow, i, arr(i), nlforce_zero) + END DO + END SUBROUTINE putrow_full +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_zfull(arow, arr, nlforce_zero) +! +! Put a row from sparse row arow and put it in a full row +! + TYPE(zsprow), INTENT(inout) :: arow + DOUBLE COMPLEX, INTENT(in) :: arr(:) + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + INTEGER :: n, i +! + n=SIZE(arr) + DO i=1,n + CALL putele(arow, i, arr(i), nlforce_zero) + END DO + END SUBROUTINE putrow_zfull +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_spmat(mat, i, arr, col, nlforce_zero) +! +! Put a row to matrix +! + TYPE(spmat) :: mat + INTEGER, intent(in) :: i + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER, INTENT(in), OPTIONAL :: col(:) + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + IF(PRESENT(col)) THEN ! The input row is defined by (col, arr) + CALL putrow_csr(mat%row(i), arr, col, nlforce_zero) + ELSE + CALL putrow_full(mat%row(i), arr, nlforce_zero) + END IF + END SUBROUTINE putrow_spmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_zspmat(mat, i, arr, col, nlforce_zero) +! +! Put a row to matrix +! + TYPE(zspmat) :: mat + INTEGER, intent(in) :: i + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER, INTENT(in), OPTIONAL :: col(:) + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + IF(PRESENT(col)) THEN ! The input row is defined by (col, arr) + CALL putrow_zcsr(mat%row(i), arr, col, nlforce_zero) + ELSE + CALL putrow_zfull(mat%row(i), arr, nlforce_zero) + END IF + END SUBROUTINE putrow_zspmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_spmat(mat, j, arr, nlforce_zero) +! +! Put a column to mtarix +! + TYPE(spmat) :: mat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(in) :: arr(:) + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + INTEGER :: i + DO i=1,mat%rank + CALL putele(mat, i, j, arr(i), nlforce_zero) + END DO + END SUBROUTINE putcol_spmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_zspmat(mat, j, arr, nlforce_zero) +! +! Put a column to mtarix +! + TYPE(zspmat) :: mat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(in) :: arr(:) + LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero +! + INTEGER :: i + DO i=1,mat%rank + CALL putele(mat, i, j, arr(i), nlforce_zero) + END DO + END SUBROUTINE putcol_zspmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_spmat(mat, j, arr) +! +! Get column j of matrix +! + TYPE(spmat) :: mat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: i + DO i=1,mat%rank + CALL getele(mat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_spmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_zspmat(mat, j, arr) +! +! Get column j of matrix +! + TYPE(zspmat) :: mat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER :: i + DO i=1,mat%rank + CALL getele(mat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_zspmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_row(arow) +! +! Deallocate the sparse row +! + TYPE(sprow), INTENT(inout) :: arow +! + IF(ASSOCIATED(arow%row0)) CALL destroy_node(arow%row0) + arow%nnz = get_count(arow) + END SUBROUTINE destroy_row +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_zrow(arow) +! +! Deallocate the sparse row +! + TYPE(zsprow), INTENT(inout) :: arow +! + IF(ASSOCIATED(arow%row0)) CALL destroy_znode(arow%row0) + arow%nnz = get_count(arow) + END SUBROUTINE destroy_zrow +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + RECURSIVE SUBROUTINE destroy_node(p) +! +! Deallocate recursively the linked list +! + TYPE(elt), POINTER :: p +! + IF(ASSOCIATED(p%next)) CALL destroy_node(p%next) + DEALLOCATE(p) + END SUBROUTINE destroy_node +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + RECURSIVE SUBROUTINE destroy_znode(p) +! +! Deallocate recursively the linked list +! + TYPE(zelt), POINTER :: p +! + IF(ASSOCIATED(p%next)) CALL destroy_znode(p%next) + DEALLOCATE(p) + END SUBROUTINE destroy_znode +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_spmat(mat) +! +! Deallocate the sparse matrix +! + TYPE(spmat) :: mat + INTEGER :: n, i +! + n = mat%rank + DO i=LBOUND(mat%row,1),UBOUND(mat%row,1) + CALL destroy(mat%row(i)) + END DO + DEALLOCATE(mat%row) + END SUBROUTINE destroy_spmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_zspmat(mat) +! +! Deallocate the sparse matrix +! + TYPE(zspmat) :: mat + INTEGER :: n, i +! + n = mat%rank + DO i=LBOUND(mat%row,1),UBOUND(mat%row,1) + CALL destroy(mat%row(i)) + END DO + DEALLOCATE(mat%row) + END SUBROUTINE destroy_zspmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION isearch(karr, k) +! +! Sequential search an ordered table of integers +! + INTEGER, INTENT(in) :: karr(0:) + INTEGER, INTENT(in) :: k + INTEGER :: n +! + n=SIZE(karr) + isearch = -1 ! Failure + IF( k.GT.karr(n-1)) RETURN +! + isearch=0 + DO + IF( k.LE.karr(isearch)) EXIT + isearch = isearch+1 + END DO + IF( k.NE.karr(isearch)) isearch = -1 ! Failure + END FUNCTION isearch +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION isearch_bin(karr, k) +! +! Binary search an ordered table of integers +! + INTEGER, INTENT(in) :: karr(0:) + INTEGER, INTENT(in) :: k + INTEGER :: n + INTEGER :: l, u +! + n=SIZE(karr) + isearch_bin = -1 ! Failure + IF( k.LT.karr(0) .OR. k.GT.karr(n-1)) RETURN +! + l=0; u=n-1 + DO WHILE(l.LE.u) + isearch_bin = (l+u)/2 + IF(k.EQ.karr(isearch_bin)) THEN + RETURN + ELSE IF(k.LT.karr(isearch_bin)) THEN + u = isearch_bin-1 + ELSE + l = isearch_bin+1 + END IF + END DO + isearch_bin = -1 ! Failure + END FUNCTION isearch_bin +! +END MODULE sparse diff --git a/src/tsparse3.f90 b/src/tsparse3.f90 new file mode 100644 index 0000000..0e75703 --- /dev/null +++ b/src/tsparse3.f90 @@ -0,0 +1,705 @@ +!> +!> @file tsparse3.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Solving the 2d PDE using splines and WSMP non-symmetric matrix +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! +MODULE pde2d_mod + USE bsplines + USE wsmp_bsplines + IMPLICIT NONE +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(wsmp_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + left2 = j + CALL basfun(xg2, spl%sp2, fun2, left2) +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 +!------------- + contrib = 0.0d0 + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,ig1,ig2) * & + & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) + END DO + END DO + END DO + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 + ! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq + END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(wsmp_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + zsum(:) = zsum(:) + arr(:) + END DO + CALL putrow(mat, ny, zsum) +! +! The horizontal sum on the NY-th column +! + zsum = 0.0d0 + DO j=1,ny + arr = 0.0d0 + CALL getcol(mat, j, arr) + zsum(ny:) = zsum(ny:) + arr(ny:) + END DO + CALL putcol(mat, ny, zsum) +! +! The away operator +! + DO j = 1,ny-1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO j = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(j) = 1.0d0 + CALL putcol(mat, j, arr) + END DO +! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_mod +PROGRAM main + USE pde2d_mod + USE futils +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol + TYPE(spline2d) :: splxy + TYPE(wsmp_mat) :: mat +! + CHARACTER(len=128) :: file='pde2d_wsmp.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + DOUBLE PRECISION :: tconv, treord + INTEGER :: nits=100 + LOGICAL :: nlmetis, nlforce_zero +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, & + & nlforce_zero, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlmetis = .FALSE. ! Use metis ordering or minimum degree + nlforce_zero = .FALSE. ! Remove existing nodes with zero val in putele/row/ele + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + WRITE(*,'(a,i8)') 'nrank', nrank +! + CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero) + CALL dismat(splxy, mat) + ALLOCATE(arr(nrank)) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr + END IF +! +! BC on Matrix +! + WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero + WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + CALL ibcmat(mat, ny) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr + WRITE(*,'(a)') 'Last rows' + DO i=nrank-ny,nrank + CALL getrow(mat, i, arr) + WRITE(*,'(10(1pe12.3))') arr + END DO + END IF + tmat = seconds() - t0 +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, ny) +! + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL to_mat(mat) + tconv = seconds() -t0 + WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) + WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() +! + t0 = seconds() + CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.) + CALL putmat(fid, '/MAT', mat) + treord = seconds() - t0 +! + t0 = seconds() + CALL numfact(mat, debug=.FALSE.) + tfact = seconds() - t0 + + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() +! + CALL bsolve(mat, rhs, sol, debug=.FALSE.) + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL bsolve(mat, rhs, sol) + sol(1:ny-1) = sol(ny) + END DO + WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(26) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() + tsolv = (seconds() - t0)/REAL(nits) +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv + WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv + WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +!=========================================================================== +! 5.0 Clear the matrix and recompute +! + WRITE(*,'(/a)') 'Recompute the solver ...' + t0 = seconds() + CALL clear_mat(mat) + CALL dismat(splxy, mat) + CALL ibcmat(mat, ny) + tmat = seconds()-t0 +! + t0 = seconds() + CALL numfact(mat, debug=.FALSE.) + tfact = seconds()-t0 +! + t0 = seconds() + ALLOCATE(newsol(nrank)) + CALL bsolve(mat, rhs, newsol) + newsol(1:ny-1) = newsol(ny) + tsolv = seconds()-t0 +! + WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv +! + DEALLOCATE(newsol) +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + CALL destroy_sp(splxy) + CALL destroy(mat) +! + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/src/tsparse4.f90 b/src/tsparse4.f90 new file mode 100644 index 0000000..40b900f --- /dev/null +++ b/src/tsparse4.f90 @@ -0,0 +1,722 @@ +!> +!> @file tsparse4.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! Solving the 2d PDE using splines and PARDISO non-symmetric matrix +! +! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0 +! exact solution: f(x,y) = (1-x^2) x^m cos(my) +! +MODULE pde2d_mod + USE bsplines + USE pardiso_bsplines + IMPLICIT NONE +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE dismat(spl, mat) +! +! Assembly of FE matrix mat using spline spl +! + TYPE(spline2d), INTENT(in) :: spl + TYPE(pardiso_mat), INTENT(inout) :: mat +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2 + INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:) + DOUBLE PRECISION:: contrib +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:) ! Derivative order + DOUBLE PRECISION, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form + INTEGER, ALLOCATABLE :: left1(:), left2(:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1 + WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2 +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! + ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative + ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! +!=========================================================================== +! 2.0 Assembly loop +! + ALLOCATE(left1(ng1)) + ALLOCATE(left2(ng2)) + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + left1 = i + CALL basfun(xg1, spl%sp1, fun1, left1) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + left2 = j + CALL basfun(xg2, spl%sp2, fun2, left2) +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1), xg2(ig2), & + & idert(:,:,ig1,ig2), & + & iderw(:,:,ig1,ig2), & + & coefs(:,ig1,ig2)) + END DO + END DO +! + DO iw1=0,nidbas1 ! Weight function in dir 1 + igw1 = i+iw1 + DO iw2=0,nidbas2 ! Weight function in dir 2 + igw2 = MODULO(j+iw2-1, n2) + 1 + irow = igw2 + (igw1-1)*n2 + DO it1=0,nidbas1 ! Test function in dir 1 + igt1 = i+it1 + DO it2=0,nidbas2 ! Test function in dir 2 + igt2 = MODULO(j+it2-1, n2) + 1 + jcol = igt2 + (igt1-1)*n2 +!------------- + contrib = 0.0d0 + DO ig1=1,ng1 + DO ig2=1,ng2 + DO iterm=1,kterms + contrib = contrib + & + & fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * & + & fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * & + & coefs(iterm,ig1,ig2) * & + & fun2(it2,idert(iterm,2,ig1,ig2),ig2) * & + & fun1(it1,idert(iterm,1,ig1,ig2),ig1) * & + & wg1(ig1) * wg2(ig2) + END DO + END DO + END DO + CALL updtmat(mat, irow, jcol, contrib) +!------------- + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(left1,left2) +! + CONTAINS + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1)) +! +! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy +! + c(1) = x ! + idt(1,1) = 1 + idt(1,2) = 0 + idw(1,1) = 1 + idw(1,2) = 0 + ! + c(2) = 1.d0/x + idt(2,1) = 0 + idt(2,2) = 1 + idw(2,1) = 0 + idw(2,2) = 1 + END SUBROUTINE coefeq + END SUBROUTINE dismat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE disrhs(mbess, spl, rhs) +! +! Assembly the RHS using 2d spline spl +! + INTEGER, INTENT(in) :: mbess + TYPE(spline2d), INTENT(in) :: spl + DOUBLE PRECISION, INTENT(out) :: rhs(:) + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: n2, nidbas2, ndim2, ng2 + INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank + DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:) + DOUBLE PRECISION:: contrib +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) +! + ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives) + ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives) +! +! Gauss quadature +! + CALL get_gauss(spl%sp1, ng1) + CALL get_gauss(spl%sp2, ng2) + ALLOCATE(xg1(ng1), wg1(ng1)) + ALLOCATE(xg2(ng1), wg2(ng1)) +!=========================================================================== +! 2.0 Assembly loop +! + nrank = SIZE(rhs) + rhs(1:nrank) = 0.0d0 +! + DO i=1,n1 + CALL get_gauss(spl%sp1, ng1, i, xg1, wg1) + DO ig1=1,ng1 + CALL basfun(xg1(ig1), spl%sp1, fun1, i) + DO j=1,n2 + CALL get_gauss(spl%sp2, ng2, j, xg2, wg2) + DO ig2=1,ng2 + CALL basfun(xg2(ig2), spl%sp2, fun2, j) + contrib = wg1(ig1)*wg2(ig2) * & + & rhseq(xg1(ig1),xg2(ig2), mbess) + DO k1=0,nidbas1 + i1 = i+k1 + DO k2=0,nidbas2 + j2 = MODULO(j+k2-1,n2) + 1 + ij = j2 + (i1-1)*n2 + rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1) + END DO + END DO + END DO + END DO + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(xg1, wg1, fun1) + DEALLOCATE(xg2, wg2, fun2) +! + CONTAINS + DOUBLE PRECISION FUNCTION rhseq(x1, x2, m) + DOUBLE PRECISION, INTENT(in) :: x1, x2 + INTEGER, INTENT(in) :: m + rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2) + END FUNCTION rhseq + END SUBROUTINE disrhs +! +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcmat(mat, ny) +! +! Apply BC on matrix +! + TYPE(pardiso_mat), INTENT(inout) :: mat + INTEGER, INTENT(in) :: ny + INTEGER :: nrank, i, j + DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:) +!=========================================================================== +! 1.0 Prologue +! + nrank = mat%rank + ALLOCATE(zsum(nrank), arr(nrank)) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum on the NY-th row +! + zsum = 0.0d0 + DO i=1,ny + arr = 0.0d0 + CALL getrow(mat, i, arr) + zsum(:) = zsum(:) + arr(:) + END DO + zsum(ny) = SUM(zsum(1:ny)) ! using symmetry + CALL putrow(mat, ny, zsum) +! +! The away operator +! + DO i = 1,ny-1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +! +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + DO i = nrank, nrank-ny+1, -1 + arr = 0.0d0; arr(i) = 1.0d0 + CALL putrow(mat, i, arr) + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(zsum, arr) +! + END SUBROUTINE ibcmat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE ibcrhs(rhs, ny) +! +! Apply BC on RHS +! + DOUBLE PRECISION, INTENT(inout) :: rhs(:) + INTEGER, INTENT(in) :: ny + INTEGER :: nrank + DOUBLE PRECISION :: zsum +!=========================================================================== +! 1.0 Prologue +! + nrank = SIZE(rhs,1) +!=========================================================================== +! 2.0 Unicity at the axis +! +! The vertical sum +! + zsum = SUM(rhs(1:ny)) + rhs(ny) = zsum + rhs(1:ny-1) = 0.0d0 +!=========================================================================== +! 3.0 Dirichlet on right boundary +! + rhs(nrank-ny+1:nrank) = 0.0d0 + END SUBROUTINE ibcrhs +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE pde2d_mod +PROGRAM main + USE pde2d_mod + USE futils +! + IMPLICIT NONE + INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms + LOGICAL :: nlppform + INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it + DOUBLE PRECISION :: pi, coefx(5), coefy(5) + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol + TYPE(spline2d) :: splxy + TYPE(pardiso_mat) :: mat, newmat +! + CHARACTER(len=128) :: file='pde2d_sym_pardiso.h5' + INTEGER :: fid + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr + DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol + DOUBLE PRECISION :: seconds, mem, dopla + DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 + DOUBLE PRECISION :: tconv, treord + INTEGER :: nits=100 + LOGICAL :: nlmetis, nlforce_zero, nlpos +! + NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, & + & nlpos, nlforce_zero, coefx, coefy +!=========================================================================== +! 1.0 Prologue +! +! Read in data specific to run +! + nx = 8 ! Number of intervals in x + ny = 8 ! Number of intervals in y + nidbas = (/3,3/) ! Degree of splines + ngauss = (/4,4/) ! Number of Gauss points/interval + mbess = 2 ! Exponent of differential problem + nterms = 2 ! Number of terms in weak form + nlppform = .TRUE. ! Use PPFORM for gridval or not + nlmetis = .FALSE. ! Use metis ordering or minimum degree + nlforce_zero = .TRUE. ! Remove existing nodes with zero val in putele/row/ele + nlpos = .TRUE. ! Matrix is positive definite + coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function + coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function +! + READ(*,newrun) + WRITE(*,newrun) +! +! Define grid on x (=radial) & y (=poloidal) axis +! + pi = 4.0d0*ATAN(1.0d0) + ALLOCATE(xgrid(0:nx), ygrid(0:ny)) + xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 + CALL meshdist(coefx, xgrid, nx) + ygrid(0) = 0.0d0; ygrid(ny) = 2.d0*pi + CALL meshdist(coefy, ygrid, ny) +! +! Create hdf5 file +! + CALL creatf(file, fid, 'PDE2D Result File', real_prec='d') + CALL attach(fid, '/', 'NX', nx) + CALL attach(fid, '/', 'NY', ny) + CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) + CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) + CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) + CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) + CALL attach(fid, '/', 'MBESS', mbess) +!=========================================================================== +! 2.0 Discretize the PDE +! +! Set up spline +! + t0 = seconds() + CALL set_spline(nidbas, ngauss, & + & xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform) +! +! FE matrix assembly +! + nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix + WRITE(*,'(a,i8)') 'nrank', nrank +! + CALL init(nrank, nterms, mat, nlsym=.FALSE., nlpos=nlpos, & + & nlforce_zero=nlforce_zero) + CALL dismat(splxy, mat) + ALLOCATE(arr(nrank)) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr + END IF +! +! BC on Matrix +! + WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero + WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + CALL ibcmat(mat, ny) + IF(nrank.LT.100) THEN + DO i=1,nrank + CALL getele(mat, i, i, arr(i)) + END DO + WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr + WRITE(*,'(a)') 'Last rows' + DO i=nrank-ny,nrank + CALL getrow(mat, i, arr) + WRITE(*,'(10(1pe12.3))') arr + END DO + END IF + tmat = seconds() - t0 +! +! RHS assembly +! + ALLOCATE(rhs(nrank), sol(nrank)) + CALL disrhs(mbess, splxy, rhs) +! +! BC on RHS +! + CALL ibcrhs(rhs, ny) +! + CALL putarr(fid, '/RHS', rhs, 'RHS of linear system') + WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat) + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem() +!=========================================================================== +! 3.0 Solve the dicretized system +! + t0 = seconds() + CALL to_mat(mat) + tconv = seconds() -t0 + WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat) + WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem() +! + t0 = seconds() + CALL reord_mat(mat) + CALL putmat(fid, '/MAT', mat) + treord = seconds() - t0 +! + t0 = seconds() + CALL numfact(mat) + tfact = seconds() - t0 + + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem() + WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A = ',mat%p%iparm(18) + WRITE(*,'(a,i12)') 'Number of factorization MFLOPS = ',mat%p%iparm(19) + gflops1 = mat%p%iparm(19) / tfact / 1.d3 +! + CALL bsolve(mat, rhs, sol) + WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol)) + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL bsolve(mat, rhs, sol) + sol(1:ny-1) = sol(ny) + END DO + WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem() + tsolv = (seconds() - t0)/REAL(nits) +! +! Spline coefficients, taking into account of periodicity in y +! Note: in SOL, y was numbered first. +! + dimx = splxy%sp1%dim + dimy = splxy%sp2%dim + ALLOCATE(bcoef(0:dimx-1, 0:dimy-1)) + DO j=0,dimy-1 + DO i=0,dimx-1 + ij = MODULO(j,ny) + i*ny + 1 + bcoef(i,j) = sol(ij) + END DO + END DO + WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef) + CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution') +!=========================================================================== +! 4.0 Check the solution +! +! Check function values computed with various method +! + ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny)) + DO i=0,nx + DO j=0,ny + solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) + END DO + END DO + jder = (/0,0/) +! +! Compute PPFORM/BCOEFS at first call to gridval + CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef) +! + WRITE(*,'(/a)') '*** Checking solutions' + t0 = seconds() + DO it=1,nits ! nits iterations for timing + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + END DO + tgrid = (seconds() - t0)/REAL(nits) + errsol = solana - solcal + IF( SIZE(bcoef,2) .LE. 10 ) THEN + CALL prnmat('BCOEF', bcoef) + CALL prnmat('SOLANA', solana) + CALL prnmat('SOLCAL', solcal) + CALL prnmat('ERRSOL', errsol) + END IF + WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & + & norm2(errsol) / norm2(solana) + WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s) ', tgrid +! + CALL putarr(fid, '/xgrid', xgrid, 'r') + CALL putarr(fid, '/ygrid', ygrid, '\theta') + CALL putarr(fid, '/sol', solcal, 'Solutions') + CALL putarr(fid, '/solana', solana,'Exact solutions') + CALL putarr(fid, '/errors', errsol, 'Errors') +! +! Check derivatives d/dx and d/dy +! + WRITE(*,'(/a)') '*** Checking gradient' + DO i=0,nx + DO j=0,ny + IF( mbess .EQ. 0 ) THEN + solana(i,j) = -2.0d0 * xgrid(i) + ELSE + solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * & + & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) + END IF + END DO + END DO +! + jder = (/1,0/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + errsol = solana - solcal + CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions') + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx', norm2(errsol) +! + DO i=0,nx + DO j=0,ny + solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j)) + END DO + END DO +! + jder = (/0,1/) + CALL gridval(splxy, xgrid, ygrid, solcal, jder) + CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions') + errsol = solana - solcal + WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy', norm2(errsol) +! + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s) ', tconv + WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s) ', treord + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv + WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s) ', tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +!=========================================================================== +! 5.0 Clear the matrix and recompute +! + WRITE(*,'(/a)') 'Recompute the solver ...' + t0 = seconds() + CALL clear_mat(mat) + CALL dismat(splxy, mat) + CALL ibcmat(mat, ny) + tmat = seconds()-t0 +! + t0 = seconds() + CALL numfact(mat) + tfact = seconds()-t0 + gflops1 = mat%p%iparm(19) / tfact / 1.d3 +! + t0 = seconds() + ALLOCATE(newsol(nrank)) + CALL bsolve(mat, rhs, newsol) + newsol(1:ny-1) = newsol(ny) + tsolv = seconds()-t0 +! + WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol)) + WRITE(*,'(/a)') '---' + WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat + WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact + WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s) ', tsolv + WRITE(*,'(a,1pe12.3)') 'Total (s) ', tmat+tfact+tsolv + WRITE(*,'(a,2f10.3)') 'Factor Gflop/s', gflops1 +! +!=========================================================================== +! 6.0 Another matrix to solve +! + WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol)) +!!$ PRINT*, 'current/last matid, matid', current_matid, last_matid, mat%matid +! + WRITE(*,'(/a)') ' Another solver ...' +! + CALL init(nrank, nterms, newmat, nlsym=.FALSE., nlpos=nlpos, & + & nlforce_zero=nlforce_zero) + CALL mcopy(mat, newmat) +!!$ CALL clear_mat(newmat) +!!$ CALL maddto(newmat, 1000.0d0, mat) + CALL factor(newmat) +!!$ CALL dismat(splxy, newmat) +!!$ CALL ibcmat(newmat, ny) +!!$ CALL to_mat(newmat) +!!$ CALL reord_mat(newmat) +!!$ CALL numfact(newmat) + CALL bsolve(newmat, rhs, newsol) + WRITE(*,'(/a, 1pe16.8)') 'Norm of newsol =', SQRT(DOT_PRODUCT(newsol,newsol)) +!!$ PRINT*, 'current/last matid, matid', current_matid, last_matid, newmat%matid +! + CALL bsolve(mat, rhs, sol) + WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol)) +!!$ PRINT*, 'current/last matid, matid', current_matid, last_matid, mat%matid +! +!=========================================================================== + +! 9.0 Epilogue +! + DEALLOCATE(xgrid, rhs, sol) + DEALLOCATE(solcal, solana, errsol) + DEALLOCATE(bcoef) + DEALLOCATE(arr) + DEALLOCATE(newsol) + CALL destroy_sp(splxy) + CALL destroy(mat) + CALL destroy(newmat) +! + CALL closef(fid) +!=========================================================================== +! +CONTAINS + FUNCTION norm2(x) +! +! Compute the 2-norm of array x +! + IMPLICIT NONE + DOUBLE PRECISION :: norm2 + DOUBLE PRECISION, INTENT(in) :: x(:,:) + DOUBLE PRECISION :: sum2 + INTEGER :: i, j +! + sum2 = 0.0d0 + DO i=1,SIZE(x,1) + DO j=1,SIZE(x,2) + sum2 = sum2 + x(i,j)**2 + END DO + END DO + norm2 = SQRT(sum2) + END FUNCTION norm2 + SUBROUTINE prnmat(label, mat) + CHARACTER(len=*) :: label + DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat + INTEGER :: i + WRITE(*,'(/a)') TRIM(label) + DO i=1,SIZE(mat,1) + WRITE(*,'(10(1pe12.3))') mat(i,:) + END DO + END SUBROUTINE prnmat +END PROGRAM main +! +!+++ +SUBROUTINE meshdist(c, x, nx) +! +! Construct an 1d non-equidistant mesh given a +! mesh distribution function. +! + IMPLICIT NONE + DOUBLE PRECISION, INTENT(in) :: c(5) + INTEGER, INTENT(iN) :: nx + DOUBLE PRECISION, INTENT(inout) :: x(0:nx) + INTEGER :: nintg + DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint + DOUBLE PRECISION :: a, b, dx, f0, f1, scal + INTEGER :: i, k +! + a=x(0) + b=x(nx) + nintg = 10*nx + ALLOCATE(xint(0:nintg), fint(0:nintg)) +! +! Mesh distribution +! + dx = (b-a)/REAL(nintg) + xint(0) = a + fint(0) = 0.0d0 + f1 = fdist(xint(0)) + DO i=1,nintg + f0 = f1 + xint(i) = xint(i-1) + dx + f1 = fdist(xint(i)) + fint(i) = fint(i-1) + 0.5*(f0+f1) + END DO +! +! Normalization +! + scal = REAL(nx) / fint(nintg) + fint(0:nintg) = fint(0:nintg) * scal +!!$ WRITE(*,'(a/(10f8.3))') 'FINT', fint +! +! Obtain mesh point by (inverse) interpolation +! + k = 1 + DO i=1,nintg-1 + IF( fint(i) .GE. REAL(k) ) THEN + x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * & + & (k-fint(i)) + k = k+1 + END IF + END DO +! + DEALLOCATE(xint, fint) +CONTAINS + DOUBLE PRECISION FUNCTION fdist(x) + DOUBLE PRECISION, INTENT(in) :: x + fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2) + END FUNCTION fdist +END SUBROUTINE meshdist +!+++ diff --git a/src/wsmp_mod.f90 b/src/wsmp_mod.f90 new file mode 100644 index 0000000..8026640 --- /dev/null +++ b/src/wsmp_mod.f90 @@ -0,0 +1,1835 @@ +!> +!> @file wsmp_mod.f90 +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +MODULE wsmp_bsplines +! +! WSMP_BSPLINES: Simple interface to the sparse direct solver WSMP. +! +! T.M. Tran, CRPP-EPFL +! November 2011 +! + USE sparse + IMPLICIT NONE +! + INTEGER, SAVE :: current_matid = -1 + INTEGER, SAVE :: last_matid = -1 +! + TYPE wsmp_param + INTEGER :: iparm(64) + DOUBLE PRECISION :: dparm(64) + END TYPE wsmp_param +! + TYPE wsmp_mat + INTEGER :: matid=-1 + INTEGER :: rank=0, nnz + INTEGER :: nterms, kmat, nrhs + LOGICAL :: nlsym + LOGICAL :: nlpos + LOGICAL :: nlforce_zero + TYPE(spmat), POINTER :: mat => NULL() + INTEGER, POINTER :: cols(:) => NULL() + INTEGER, POINTER :: irow(:) => NULL() + INTEGER, POINTER :: perm(:) => NULL() + INTEGER, POINTER :: invp(:) => NULL() + INTEGER, POINTER :: mrp(:) => NULL() + DOUBLE PRECISION, POINTER :: diag(:) => NULL() + DOUBLE PRECISION, POINTER :: val(:) => NULL() + DOUBLE PRECISION, POINTER :: aux(:) => NULL() + TYPE(wsmp_param) :: p + END TYPE wsmp_mat +! + TYPE zwsmp_mat + INTEGER :: matid=-1 + INTEGER :: rank=0, nnz + INTEGER :: nterms, kmat, nrhs + LOGICAL :: nlherm + LOGICAL :: nlsym + LOGICAL :: nlpos + LOGICAL :: nlforce_zero + TYPE(zspmat), POINTER :: mat => NULL() + INTEGER, POINTER :: cols(:) => NULL() + INTEGER, POINTER :: irow(:) => NULL() + INTEGER, POINTER :: perm(:) => NULL() + INTEGER, POINTER :: invp(:) => NULL() + INTEGER, POINTER :: mrp(:) => NULL() + DOUBLE COMPLEX, POINTER :: diag(:) => NULL() + DOUBLE COMPLEX, POINTER :: val(:) => NULL() + DOUBLE COMPLEX, POINTER :: aux(:) => NULL() + TYPE(wsmp_param) :: p + END TYPE zwsmp_mat +! + INTERFACE init + MODULE PROCEDURE init_wsmp_mat, init_zwsmp_mat + END INTERFACE init +! + INTERFACE check_mat + MODULE PROCEDURE check_wsmp_mat, check_zwsmp_mat + END INTERFACE check_mat +! + INTERFACE clear_mat + MODULE PROCEDURE clear_wsmp_mat, clear_zwsmp_mat + END INTERFACE clear_mat +! + INTERFACE updtmat + MODULE PROCEDURE updt_wsmp_mat, updt_zwsmp_mat + END INTERFACE updtmat +! + INTERFACE putele + MODULE PROCEDURE putele_wsmp_mat, putele_zwsmp_mat + END INTERFACE putele +! + INTERFACE getele + MODULE PROCEDURE getele_wsmp_mat, getele_zwsmp_mat + END INTERFACE getele +! + INTERFACE putrow + MODULE PROCEDURE putrow_wsmp_mat, putrow_zwsmp_mat + END INTERFACE putrow +! + INTERFACE getrow + MODULE PROCEDURE getrow_wsmp_mat, getrow_zwsmp_mat + END INTERFACE getrow +! + INTERFACE putcol + MODULE PROCEDURE putcol_wsmp_mat, putcol_zwsmp_mat + END INTERFACE putcol +! + INTERFACE getcol + MODULE PROCEDURE getcol_wsmp_mat, getcol_zwsmp_mat + END INTERFACE getcol +! + INTERFACE get_count + MODULE PROCEDURE get_count_wsmp_mat, get_count_zwsmp_mat + END INTERFACE get_count +! + INTERFACE to_mat + MODULE PROCEDURE to_wsmp_mat, to_zwsmp_mat + END INTERFACE to_mat +! + INTERFACE reord_mat + MODULE PROCEDURE reord_wsmp_mat, reord_zwsmp_mat + END INTERFACE reord_mat +! + INTERFACE numfact + MODULE PROCEDURE numfact_wsmp_mat, numfact_zwsmp_mat + END INTERFACE numfact +! + INTERFACE factor + MODULE PROCEDURE factor_wsmp_mat, factor_zwsmp_mat + END INTERFACE factor +! + INTERFACE bsolve + MODULE PROCEDURE bsolve_wsmp_mat1, bsolve_wsmp_matn, & + & bsolve_zwsmp_mat1, bsolve_zwsmp_matn + END INTERFACE bsolve +! + INTERFACE vmx + MODULE PROCEDURE vmx_wsmp_mat, vmx_wsmp_matn, & + & vmx_zwsmp_mat, vmx_zwsmp_matn + END INTERFACE vmx +! + INTERFACE destroy + MODULE PROCEDURE destroy_wsmp_mat, destroy_zwsmp_mat + END INTERFACE destroy +! + INTERFACE putmat + MODULE PROCEDURE put_wsmp_mat, put_zwsmp_mat + END INTERFACE putmat +! + INTERFACE getmat + MODULE PROCEDURE get_wsmp_mat, get_zwsmp_mat + END INTERFACE getmat +! + INTERFACE mcopy + MODULE PROCEDURE mcopy_wsmp_mat, mcopy_zwsmp_mat + END INTERFACE mcopy +! + INTERFACE maddto + MODULE PROCEDURE maddto_wsmp_mat, maddto_zwsmp_mat + END INTERFACE maddto +! + INTERFACE psum_mat + MODULE PROCEDURE psum_wsmp_mat, psum_zwsmp_mat + END INTERFACE psum_mat +! + INTERFACE p2p_mat + MODULE PROCEDURE p2p_wsmp_mat, p2p_zwsmp_mat + END INTERFACE p2p_mat +! +CONTAINS +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_wsmp_mat(n, nterms, mat, kmat, nlsym, nlpos, & + & nlforce_zero) +! +! Initialize an empty sparse wsmp matrix +! + INTEGER, INTENT(in) :: n, nterms + TYPE(wsmp_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlpos + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero + INTEGER :: info + INTEGER :: idummy = 0 + DOUBLE PRECISION :: dummy = 0.0d0 +! +! Store away (valid) current matrix id +! + IF(current_matid .GE. 0) THEN + CALL wstoremat(current_matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i4)') 'INIT: WSTOREMAT failed WITH error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + last_matid = last_matid+1 + mat%matid = last_matid + current_matid = mat%matid +! +! Initialize sparse matrice structure +! + mat%rank = n + mat%nterms = nterms + mat%nnz = 0 + mat%nlsym = .FALSE. + mat%nlpos = .TRUE. + mat%nrhs = 1 + mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val + IF(PRESENT(kmat)) mat%kmat = kmat + IF(PRESENT(nlsym)) mat%nlsym = nlsym + IF(PRESENT(nlpos)) mat%nlpos = nlpos + IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero + IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) + ALLOCATE(mat%mat) + CALL init(n, mat%mat) +! +! Fill 'iparm' and 'dparm' with default values +! + mat%p%iparm(1:3) = 0 + IF(mat%nlsym) THEN + CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + IF(mat%nlpos) THEN + mat%p%iparm(31) = 0 + ELSE +!!$ mat%p%iparm(31) = 1 ! LDL^T without pivoting + mat%p%iparm(31) = 2 ! LDL^T with pivoting + END IF + ELSE + CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i4)') 'INIT: Initialization failed with error', & + & mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +!!$ WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm +!!$ WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm +! + CALL setup_wsmp(mat%p%iparm, mat%p%dparm) +! + CONTAINS + SUBROUTINE setup_wsmp(iparm, dparm) + INTEGER :: iparm(:) + DOUBLE PRECISION :: dparm(:) + END SUBROUTINE setup_wsmp + END SUBROUTINE init_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE init_zwsmp_mat(n, nterms, mat, kmat, nlsym, nlherm, & + & nlpos, nlforce_zero) +! +! Initialize an empty sparse wsmp matrix +! + INTEGER, INTENT(in) :: n, nterms + TYPE(zwsmp_mat) :: mat + INTEGER, OPTIONAL, INTENT(in) :: kmat + LOGICAL, OPTIONAL, INTENT(in) :: nlsym + LOGICAL, OPTIONAL, INTENT(in) :: nlherm + LOGICAL, OPTIONAL, INTENT(in) :: nlpos + LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero + INTEGER :: info + INTEGER :: idummy = 0 + DOUBLE COMPLEX :: dummy = 0.0d0 +! +! Store away (valid) current matrix id +! + IF(current_matid .GE. 0) THEN + CALL wstoremat(current_matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i4)') 'INIT: WSTOREMAT failed WITH error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + last_matid = last_matid+1 + mat%matid = last_matid + current_matid = mat%matid +! +! Initialize sparse matrice structure +! + mat%rank = n + mat%nterms = nterms + mat%nnz = 0 + mat%nlsym = .FALSE. + mat%nlherm = .FALSE. + mat%nlpos = .TRUE. + mat%nrhs = 1 + mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val + IF(PRESENT(kmat)) mat%kmat = kmat + IF(PRESENT(nlsym)) mat%nlsym = nlsym + IF(PRESENT(nlherm)) mat%nlherm = nlherm + IF(PRESENT(nlpos)) mat%nlpos = nlpos + IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero + IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat) + ALLOCATE(mat%mat) + CALL init(n, mat%mat) +! +! Fill 'iparm' and 'dparm' with default values +! + mat%p%iparm(1:3) = 0 + IF(mat%nlherm .OR. mat%nlsym) THEN + CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + IF(mat%nlherm) THEN + IF(mat%nlpos) THEN + mat%p%iparm(31) = 0 ! hermitian, positive definite + ELSE + mat%p%iparm(31) = 2 ! hermitian, no-definite, LDL^T with pivoting + END IF + ELSE + mat%p%iparm(31) = 3 ! non-hermitian, symmetric + END IF + ELSE + CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i4)') 'INIT: Initialization failed with error', & + & mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +!!$ WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm +!!$ WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm +! + CALL setup_wsmp(mat%p%iparm, mat%p%dparm) +! + CONTAINS + SUBROUTINE setup_wsmp(iparm, dparm) + INTEGER :: iparm(:) + DOUBLE PRECISION :: dparm(:) + END SUBROUTINE setup_wsmp + END SUBROUTINE init_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE check_wsmp_mat(mat) +! +! Check matrice id and recall the matrice if not current +! + TYPE(wsmp_mat) :: mat + INTEGER :: info +! + IF(.NOT.mat%nlsym) THEN + IF( mat%matid.NE.current_matid ) THEN + WRITE(*,'(a)') "Processing multi matrices is not possible "// & + & "for non-symetric matrices." + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + ELSE + RETURN + END IF + END IF +! + IF( mat%matid.NE.current_matid ) THEN + IF(current_matid .GE. 0) THEN + CALL wstoremat(current_matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i3,a,i4)') 'Store matrix', current_matid, & + & ' failed with error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + CALL wrecallmat(mat%matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i3,a,i4)') 'Recall matrix', mat%matid, & + & ' failed with error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + current_matid = mat%matid + END IF + END SUBROUTINE check_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE check_zwsmp_mat(mat) +! +! Check matrice id and recall the matrice if not current +! + TYPE(zwsmp_mat) :: mat + INTEGER :: info +! + IF(.NOT.mat%nlsym .AND. .NOT.mat%nlherm ) THEN + IF( mat%matid.NE.current_matid ) THEN + WRITE(*,'(a)') "Processing multi matrices is not possible "// & + & "for non-symetric/non-hermitian matrices." + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + ELSE + RETURN + END IF + END IF +! + IF( mat%matid.NE.current_matid ) THEN + IF(current_matid .GE. 0) THEN + CALL wstoremat(current_matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i3,a,i4)') 'Store matrix', current_matid, & + & ' failed with error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + CALL wrecallmat(mat%matid, info) + IF(info.NE.0) THEN + WRITE(*,'(a,i3,a,i4)') 'Recall matrix', mat%matid, & + & ' failed with error', info + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + current_matid = mat%matid + END IF + END SUBROUTINE check_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE clear_wsmp_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(wsmp_mat) :: mat +! + mat%val = 0.0d0 + END SUBROUTINE clear_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE clear_zwsmp_mat(mat) +! +! Clear matrix, keeping its sparse structure unchanged +! + TYPE(zwsmp_mat) :: mat +! + mat%val = (0.0d0, 0.0d0) + END SUBROUTINE clear_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_wsmp_mat(mat, i, j, val) +! +! Update element Aij of wsmp matrix +! + TYPE(wsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: k, s, e +! + IF(mat%nlsym) THEN ! Store only upper part for symmetric matrices + IF(i.GT.j) RETURN + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL updtmat(mat%mat, i, j, val) + ELSE + s = mat%irow(i) + e = mat%irow(i+1)-1 + k = isearch(mat%cols(s:e), j) + IF( k.GE.0 ) THEN + mat%val(s+k) = mat%val(s+k)+val + ELSE + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + END SUBROUTINE updt_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE updt_zwsmp_mat(mat, i, j, val) +! +! Update element Aij of wsmp matrix +! + TYPE(zwsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + INTEGER :: k, s, e +! + IF(mat%nlherm .OR. mat%nlsym) THEN ! Store only upper part + IF(i.GT.j) RETURN + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL updtmat(mat%mat, i, j, val) + ELSE + s = mat%irow(i) + e = mat%irow(i+1)-1 + k = isearch(mat%cols(s:e), j) + IF( k.GE.0 ) THEN + IF(mat%nlherm) THEN + mat%val(s+k) = mat%val(s+k)+CONJG(val) ! CSR-UT* = CSC-LT + ELSE + mat%val(s+k) = mat%val(s+k)+val + END IF + ELSE + WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + END SUBROUTINE updt_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_wsmp_mat(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(wsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(in) :: val + INTEGER :: iput, jput + INTEGER :: k, s, e +! + iput = i + jput = j + IF(mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iput = j + jput = i + END IF + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL putele(mat%mat, iput, jput, val, & + & nlforce_zero=mat%nlforce_zero) + ELSE + s = mat%irow(iput) + e = mat%irow(iput+1)-1 + k = isearch(mat%cols(s:e), jput) + IF( k.GE.0 ) THEN + mat%val(s+k) = val + ELSE + IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val + WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + END IF + END SUBROUTINE putele_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putele_zwsmp_mat(mat, i, j, val) +! +! Put element (i,j) of matrix +! + TYPE(zwsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(in) :: val + DOUBLE COMPLEX :: valput + INTEGER :: iput, jput + INTEGER :: k, s, e +! + iput = i + jput = j + valput = val + IF(mat%nlsym .OR. mat%nlherm) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iput = j + jput = i + IF(mat%nlherm) THEN + valput = CONJG(val) + ELSE + valput = val + END IF + END IF + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL putele(mat%mat, iput, jput, valput, & + & nlforce_zero=mat%nlforce_zero) + ELSE + s = mat%irow(iput) + e = mat%irow(iput+1)-1 + k = isearch(mat%cols(s:e), jput) + IF( k.GE.0 ) THEN + IF(mat%nlherm) THEN + mat%val(s+k) = CONJG(valput) ! CSR-UT* = CSC-LT + ELSE + mat%val(s+k) = valput + END IF + ELSE + IF(ABS(val) .GT. EPSILON(0.0d0)) THEN ! Ok for zero val + WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END IF + END IF + END SUBROUTINE putele_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_wsmp_mat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(wsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE PRECISION, INTENT(out) :: val + INTEGER :: iget, jget + INTEGER :: k, s, e +! + iget = i + jget = j + IF(mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iget = j + jget = i + END IF + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL getele(mat%mat, iget, jget, val) + ELSE + s = mat%irow(iget) + e = mat%irow(iget+1)-1 + k = isearch(mat%cols(s:e), jget) + IF( k.GE.0 ) THEN + val =mat%val(s+k) + ELSE + val = 0.0d0 ! Assume zero val if not found + END IF + END IF + END SUBROUTINE getele_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getele_zwsmp_mat(mat, i, j, val) +! +! Get element (i,j) of sparse matrix +! + TYPE(zwsmp_mat) :: mat + INTEGER, INTENT(in) :: i, j + DOUBLE COMPLEX, INTENT(out) :: val + DOUBLE COMPLEX :: valget + INTEGER :: iget, jget + INTEGER :: k, s, e +! + iget = i + jget = j + IF(mat%nlherm .OR. mat%nlsym) THEN + IF( i.GT.j ) THEN ! Lower triangular part + iget = j + jget = i + END IF + END IF +! + IF(ASSOCIATED(mat%mat)) THEN + CALL getele(mat%mat, iget, jget, valget) + ELSE + s = mat%irow(iget) + e = mat%irow(iget+1)-1 + k = isearch(mat%cols(s:e), jget) + IF( k.GE.0 ) THEN + IF(mat%nlherm) THEN + valget = CONJG(mat%val(s+k)) ! CSR-UT* = CSC-LT + ELSE + valget = mat%val(s+k) + END IF + ELSE + valget = (0.0d0,0.0d0) ! Assume zero val if not found + END IF + END IF + val = valget + IF( i.GT.j ) THEN + IF(mat%nlherm) THEN + val = CONJG(valget) + END IF + END IF + END SUBROUTINE getele_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_wsmp_mat(amat, i, arr) +! +! Put a row into sparse matrix +! + TYPE(wsmp_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL putele(amat, i, j, arr(j)) + END DO + END SUBROUTINE putrow_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putrow_zwsmp_mat(amat, i, arr) +! +! Put a row into sparse matrix +! + TYPE(zwsmp_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL putele(amat, i, j, arr(j)) + END DO + END SUBROUTINE putrow_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_wsmp_mat(amat, i, arr) +! +! Get a row from sparse matrix +! + TYPE(wsmp_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: i + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL getele(amat, i, j, arr(j)) + END DO + END SUBROUTINE getrow_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getrow_zwsmp_mat(amat, i, arr) +! +! Get a row from sparse matrix +! + TYPE(zwsmp_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: i + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER :: j +! + DO j=1,amat%rank + CALL getele(amat, i, j, arr(j)) + END DO + END SUBROUTINE getrow_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_wsmp_mat(amat, j, arr) +! +! Put a column into sparse matrix +! + TYPE(wsmp_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(in) :: arr(:) + INTEGER :: i +! + DO i=1,amat%rank + CALL putele(amat, i, j, arr(i)) + END DO + END SUBROUTINE putcol_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE putcol_zwsmp_mat(amat, j, arr) +! +! Put a column into sparse matrix +! + TYPE(zwsmp_mat), INTENT(inout) :: amat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(in) :: arr(:) + INTEGER :: i +! + DO i=1,amat%rank + CALL putele(amat, i, j, arr(i)) + END DO + END SUBROUTINE putcol_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_wsmp_mat(amat, j, arr) +! +! Get a column from sparse matrix +! + TYPE(wsmp_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: j + DOUBLE PRECISION, INTENT(out) :: arr(:) + INTEGER :: i +! + DO i=1,amat%rank + CALL getele(amat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE getcol_zwsmp_mat(amat, j, arr) +! +! Get a column from sparse matrix +! + TYPE(zwsmp_mat), INTENT(in) :: amat + INTEGER, INTENT(in) :: j + DOUBLE COMPLEX, INTENT(out) :: arr(:) + INTEGER :: i +! + DO i=1,amat%rank + CALL getele(amat, i, j, arr(i)) + END DO + END SUBROUTINE getcol_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_wsmp_mat(mat, nnz) +! +! Number of non-zeros in sparse matrix +! + TYPE(wsmp_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) + INTEGER :: i +! + IF(ASSOCIATED(mat%mat)) THEN + get_count_wsmp_mat = get_count(mat%mat, nnz) + ELSE + get_count_wsmp_mat = mat%nnz + IF(PRESENT(nnz)) THEN + DO i=1,mat%rank + nnz(i) = mat%irow(i+1)-mat%irow(i) + END DO + END IF + END IF + END FUNCTION get_count_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + INTEGER FUNCTION get_count_zwsmp_mat(mat, nnz) +! +! Number of non-zeros in sparse matrix +! + TYPE(zwsmp_mat) :: mat + INTEGER, INTENT(out), OPTIONAL :: nnz(:) + INTEGER :: i +! + IF(ASSOCIATED(mat%mat)) THEN + get_count_zwsmp_mat = get_count(mat%mat, nnz) + ELSE + get_count_zwsmp_mat = mat%nnz + IF(PRESENT(nnz)) THEN + DO i=1,mat%rank + nnz(i) = mat%irow(i+1)-mat%irow(i) + END DO + END IF + END IF + END FUNCTION get_count_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE to_wsmp_mat(mat, nlkeep) +! +! Convert linked list spmat to wsmp matrice structure +! + TYPE(wsmp_mat) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nlkeep + INTEGER :: i, nnz, rank, s, e + LOGICAL :: nlclean +! + nlclean = .TRUE. + IF(PRESENT(nlkeep)) THEN + nlclean = .NOT. nlkeep + END IF +! +! Allocate the WSMP matrix structure +! + nnz = get_count(mat) + rank = mat%rank + mat%nnz = nnz + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%val(nnz)) + IF(mat%nlsym) THEN + ALLOCATE(mat%perm(rank)) + ALLOCATE(mat%invp(rank)) + END IF + ALLOCATE(mat%irow(rank+1)) + ALLOCATE(mat%cols(nnz)) +! +! Fill WSMP structure and deallocate the sparse rows +! + mat%irow = 1 + DO i=1,rank + mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz + s = mat%irow(i) + e = mat%irow(i+1)-1 + CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) + IF(nlclean) CALL destroy(mat%mat%row(i)) + END DO + IF(nlclean) DEALLOCATE(mat%mat) + END SUBROUTINE to_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE to_zwsmp_mat(mat, nlkeep) +! +! Convert linked list spmat to wsmp matrice structure +! + TYPE(zwsmp_mat) :: mat + LOGICAL, INTENT(in), OPTIONAL :: nlkeep + INTEGER :: i, nnz, rank, s, e + LOGICAL :: nlclean +! + nlclean = .TRUE. + IF(PRESENT(nlkeep)) THEN + nlclean = .NOT. nlkeep + END IF +! +! Allocate the WSMP matrix structure +! + nnz = get_count(mat) + rank = mat%rank + mat%nnz = nnz + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + ALLOCATE(mat%val(nnz)) + IF(mat%nlsym) THEN + ALLOCATE(mat%perm(rank)) + ALLOCATE(mat%invp(rank)) + END IF + ALLOCATE(mat%irow(rank+1)) + ALLOCATE(mat%cols(nnz)) +! +! Fill WSMP structure and deallocate the sparse rows +! + mat%irow = 1 + DO i=1,rank + mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz + s = mat%irow(i) + e = mat%irow(i+1)-1 + CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) + IF(nlclean) CALL destroy(mat%mat%row(i)) + END DO + IF(mat%nlherm) THEN + mat%val(:) = CONJG(mat%val(:)) ! CSR-UT* = CSC-LT + END IF + IF(nlclean) DEALLOCATE(mat%mat) + END SUBROUTINE to_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE reord_wsmp_mat(mat) +! +! Reordering and symbolic factorization +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym) THEN + mat%p%iparm(2) = 1 ! Ordering + mat%p%iparm(3) = 2 ! Symbolic factorization + CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + mat%p%iparm(2) = 1 ! Analysis and reordering + mat%p%iparm(3) = 1 + CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END SUBROUTINE reord_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE reord_zwsmp_mat(mat) +! +! Reordering and symbolic factorization +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym .OR. mat%nlherm) THEN + mat%p%iparm(2) = 1 ! Ordering + mat%p%iparm(3) = 2 ! Symbolic factorization + CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) +!!$ WRITE(*,'(a,i3/(10i8))') 'REORD: matrice', mat%matid, mat%perm + ELSE + mat%p%iparm(2) = 1 ! Analysis and reordering + mat%p%iparm(3) = 1 + CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END SUBROUTINE reord_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE numfact_wsmp_mat(mat) +! +! Numerical factorization +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym) THEN + mat%p%iparm(2) = 3 ! Numerical factorization + mat%p%iparm(3) = 3 + CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + mat%p%iparm(2) = 2 ! Factorization + mat%p%iparm(3) = 2 + CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i4)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END SUBROUTINE numfact_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE numfact_zwsmp_mat(mat) +! +! Numerical factorization +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym .OR. mat%nlherm) THEN + mat%p%iparm(2) = 3 ! Numerical factorization + mat%p%iparm(3) = 3 + CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + mat%p%iparm(2) = 2 ! Factorization + mat%p%iparm(3) = 2 + CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i4)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF + END SUBROUTINE numfact_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE factor_wsmp_mat(mat, nlreord) +! +! Factor (create +reorder + factor) a wsmp_mat matrix +! + TYPE(wsmp_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlreord + LOGICAL :: mlreord +!---------------------------------------------------------------------- +! 1.0 Creation from the sparse rows +! + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF +!---------------------------------------------------------------------- +! 2.0 Reordering and symbolic factorization phase +! + mlreord = .TRUE. + IF(PRESENT(nlreord)) mlreord = nlreord + IF(mlreord) THEN + CALL reord_mat(mat) + END IF +!---------------------------------------------------------------------- +! 3.0 Numerical factorization +! + CALL numfact(mat) + END SUBROUTINE factor_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE factor_zwsmp_mat(mat, nlreord) +! +! Factor (create +reorder + factor) a wsmp_mat matrix +! + TYPE(zwsmp_mat) :: mat + LOGICAL, OPTIONAL, INTENT(in) :: nlreord + LOGICAL :: mlreord +!---------------------------------------------------------------------- +! 1.0 Creation from the sparse rows +! + IF(ASSOCIATED(mat%mat)) THEN + CALL to_mat(mat) + END IF +!---------------------------------------------------------------------- +! 2.0 Reordering and symbolic factorization phase +! + mlreord = .TRUE. + IF(PRESENT(nlreord)) mlreord = nlreord + IF(mlreord) THEN + CALL reord_mat(mat) + END IF +!---------------------------------------------------------------------- +! 3.0 Numerical factorization +! + CALL numfact(mat) + END SUBROUTINE factor_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref) +! +! Backsolve, using Wsmp +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION :: rhs(:) + DOUBLE PRECISION, OPTIONAL :: sol(:) + INTEGER, OPTIONAL :: nref + DOUBLE PRECISION :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym) THEN + mat%p%iparm(2) = 4 ! Back substitution + mat%p%iparm(3) = 4 + ELSE + mat%p%iparm(2) = 3 ! Back substitution + mat%p%iparm(3) = 3 + END IF + mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) THEN + IF(mat%nlsym) THEN + mat%p%iparm(3) = 5 + ELSE + mat%p%iparm(3) = 4 + END IF + mat%p%iparm(6) = nref + END IF + mat%nrhs = 1 + IF(PRESENT(sol)) THEN + sol = rhs + IF(mat%nlsym) THEN + CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, sol, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + ELSE + IF(mat%nlsym) THEN + CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! + END SUBROUTINE bsolve_wsmp_mat1 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_zwsmp_mat1(mat, rhs, sol, nref) +! +! Backsolve, using Wsmp +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX :: rhs(:) + DOUBLE COMPLEX, OPTIONAL :: sol(:) + INTEGER, OPTIONAL :: nref + DOUBLE COMPLEX :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym .OR. mat%nlherm) THEN + mat%p%iparm(2) = 4 ! Back substitution + mat%p%iparm(3) = 4 + ELSE + mat%p%iparm(2) = 3 ! Back substitution + mat%p%iparm(3) = 3 + END IF + mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) THEN + IF(mat%nlsym .OR. mat%nlherm) THEN + mat%p%iparm(3) = 5 + ELSE + mat%p%iparm(3) = 4 + END IF + mat%p%iparm(6) = nref + END IF + mat%nrhs = 1 + IF(PRESENT(sol)) THEN + sol = rhs + IF(mat%nlsym .OR. mat%nlherm) THEN + CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, sol, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + ELSE + IF(mat%nlsym .OR. mat%nlherm) THEN + CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! + END SUBROUTINE bsolve_zwsmp_mat1 +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_wsmp_matn(mat, rhs, sol, nref) +! +! Backsolve, using Wsmp, multiple RHS +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION :: rhs(:,:) + DOUBLE PRECISION, OPTIONAL :: sol(:,:) + INTEGER, OPTIONAL :: nref + DOUBLE PRECISION :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym) THEN + mat%p%iparm(2) = 4 ! Back substitution + mat%p%iparm(3) = 4 + ELSE + mat%p%iparm(2) = 3 ! Back substitution + mat%p%iparm(3) = 3 + END IF + mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) THEN + IF(mat%nlsym) THEN + mat%p%iparm(3) = 5 + ELSE + mat%p%iparm(3) = 4 + END IF + mat%p%iparm(6) = nref + END IF + mat%nrhs = SIZE(rhs,2) + IF(PRESENT(sol)) THEN + sol = rhs + IF(mat%nlsym) THEN + CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, sol, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + ELSE + IF(mat%nlsym) THEN + CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! + END SUBROUTINE bsolve_wsmp_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE bsolve_zwsmp_matn(mat, rhs, sol, nref) +! +! Backsolve, using Wsmp, multiple RHS +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX :: rhs(:,:) + DOUBLE COMPLEX, OPTIONAL :: sol(:,:) + INTEGER, OPTIONAL :: nref + DOUBLE COMPLEX :: dummy +! +! Recall the matrice if not current +! + CALL check_mat(mat) +! + IF(mat%nlsym .or. mat%nlherm) THEN + mat%p%iparm(2) = 4 ! Back substitution + mat%p%iparm(3) = 4 + ELSE + mat%p%iparm(2) = 3 ! Back substitution + mat%p%iparm(3) = 3 + END IF + mat%p%iparm(6) = 0 ! Max numbers of iterative refinement steps + IF(PRESENT(nref)) THEN + IF(mat%nlsym .OR. mat%nlherm) THEN + mat%p%iparm(3) = 5 + ELSE + mat%p%iparm(3) = 4 + END IF + mat%p%iparm(6) = nref + END IF + mat%nrhs = SIZE(rhs,2) + IF(PRESENT(sol)) THEN + sol = rhs + IF(mat%nlsym .OR. mat%nlherm) THEN + CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, sol, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + ELSE + IF(mat%nlsym .OR. mat%nlherm) THEN + CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, & + & mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, & + & mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, & + & mat%p%dparm) + ELSE + CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, & + & mat%nrhs, dummy, mat%p%iparm, mat%p%dparm) + END IF + END IF + IF(mat%p%iparm(64).NE.0) THEN + WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64) + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! + END SUBROUTINE bsolve_zwsmp_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_wsmp_mat(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:) + DOUBLE PRECISION :: yarr(SIZE(xarr)) + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + CHARACTER(len=6) :: matdescra + INTEGER :: n, i, j +! + n = mat%rank +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & beta, yarr) +#else + yarr = 0.0d0 + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + mat%val(j)*xarr(i) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_zwsmp_mat(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: xarr(:) + DOUBLE COMPLEX :: yarr(SIZE(xarr)) + DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) + INTEGER :: n, i, j + CHARACTER(len=6) :: matdescra + CHARACTER(len=1) :: transa +! + n = mat%rank +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE IF(mat%nlherm) THEN + matdescra = 'hun' + ELSE + matdescra = 'g' + END IF + transa='N' + IF(mat%nlherm) THEN + transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT + END IF + CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & beta, yarr) +#else + yarr = (0.0d0,0.0d0) + DO i=1,n + IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i) = yarr(i) + CONJG(mat%val(j))*xarr(mat%cols(j)) + END DO + ELSE + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j)) + END DO + END IF + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + mat%val(j)*xarr(i) + END DO + ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j)) = yarr(mat%cols(j)) & + & + mat%val(j)*xarr(i) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_wsmp_matn(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION, INTENT(in) :: xarr(:,:) + DOUBLE PRECISION :: yarr(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0 + INTEGER :: n, nrhs, i, j + CHARACTER(len=6) :: matdescra +! + n = mat%rank + nrhs = SIZE(xarr,2) +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE + matdescra = 'g' + END IF +! + CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,& + & mat%cols, mat%irow(1), mat%irow(2), xarr, & + & n, beta, yarr, n) +#else + yarr = 0.0d0 + DO i=1,n + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) + END DO + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + mat%val(j)*xarr(i,:) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_wsmp_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + FUNCTION vmx_zwsmp_matn(mat, xarr) RESULT(yarr) +! +! Return product mat*x +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX, INTENT(in) :: xarr(:,:) + DOUBLE COMPLEX :: yarr(SIZE(xarr,1),SIZE(xarr,2)) +! + DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0) + INTEGER :: n, nrhs, i, j + CHARACTER(len=6) :: matdescra + CHARACTER(len=1) :: transa +! + n = mat%rank + nrhs = SIZE(xarr,2) +! +#ifdef MKL + IF(mat%nlsym) THEN + matdescra = 'sun' + ELSE IF(mat%nlherm) THEN + matdescra = 'hun' + ELSE + matdescra = 'g' + END IF + transa='N' + IF(mat%nlherm) THEN + transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT + END IF +! + CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val, & + & mat%cols, mat%irow(1), mat%irow(2), xarr, n, & + & beta, yarr, n) +#else + yarr = (0.0d0,0.0d0) + DO i=1,n + IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i,:) = yarr(i,:) + CONJG(mat%val(j))*xarr(mat%cols(j),:) + END DO + ELSE + DO j=mat%irow(i), mat%irow(i+1)-1 + yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:) + END DO + END IF + IF(mat%nlsym) THEN + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + mat%val(j)*xarr(i,:) + END DO + ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)* + DO j=mat%irow(i)+1, mat%irow(i+1)-1 + yarr(mat%cols(j),:) = yarr(mat%cols(j),:) & + & + mat%val(j)*xarr(i,:) + END DO + END IF + END DO +#endif +! + END FUNCTION vmx_zwsmp_matn +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_wsmp_mat(mat) +! +! Deallocate the sparse matrix mat +! + TYPE(wsmp_mat) :: mat +! + IF(ASSOCIATED(mat%mat)) THEN + CALL destroy(mat%mat) + DEALLOCATE(mat%mat) + END IF +! +! Release memory for factors for symmetric matrix + IF(mat%nlsym) THEN + CALL check_mat(mat) + CALL wsffree + END IF +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) + IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp) + IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux) + END SUBROUTINE destroy_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE destroy_zwsmp_mat(mat) +! +! Deallocate the sparse matrix mat +! + TYPE(zwsmp_mat) :: mat +! + IF(ASSOCIATED(mat%mat)) THEN + CALL destroy(mat%mat) + DEALLOCATE(mat%mat) + END IF +! +! Release memory for factors for symmetric/hermitian matrix + IF(mat%nlsym .OR. mat%nlherm) THEN + CALL check_mat(mat) + CALL wsffree + END IF +! + IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols) + IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow) + IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm) + IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp) + IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp) + IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag) + IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val) + IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux) + END SUBROUTINE destroy_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE put_wsmp_mat(fid, label, mat, str) +! +! Write matrix to hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(wsmp_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'NNZ', mat%nnz) + CALL attach(fid, label, 'NLSYM', mat%nlsym) + CALL attach(fid, label, 'NLPOS', mat%nlpos) + CALL putarr(fid, TRIM(label)//'/irow', mat%irow) + CALL putarr(fid, TRIM(label)//'/cols', mat%cols) + CALL putarr(fid, TRIM(label)//'/val', mat%val) +! + CALL creatg(fid, TRIM(label)//'/p') + CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) + END SUBROUTINE put_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE put_zwsmp_mat(fid, label, mat, str) +! +! Write matrix to hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(zwsmp_mat) :: mat + CHARACTER(len=*), OPTIONAL, INTENT(in) :: str +! + IF(PRESENT(str)) THEN + CALL creatg(fid, label, str) + ELSE + CALL creatg(fid, label) + END IF + CALL attach(fid, label, 'RANK', mat%rank) + CALL attach(fid, label, 'NNZ', mat%nnz) + CALL attach(fid, label, 'NLSYM', mat%nlsym) + CALL attach(fid, label, 'NLPOS', mat%nlpos) + CALL attach(fid, label, 'NLHERM', mat%nlherm) + CALL putarr(fid, TRIM(label)//'/irow', mat%irow) + CALL putarr(fid, TRIM(label)//'/cols', mat%cols) + CALL putarr(fid, TRIM(label)//'/val', mat%val) +! + CALL creatg(fid, TRIM(label)//'/p') + CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) + END SUBROUTINE put_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE get_wsmp_mat(fid, label, mat) +! +! Read matrix from hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(wsmp_mat) :: mat +! + CALL getatt(fid, label, 'RANK', mat%rank) + CALL getatt(fid, label, 'NNZ', mat%nnz) + CALL getatt(fid, label, 'NLSYM', mat%nlsym) + CALL getatt(fid, label, 'NLPOS', mat%nlpos) + CALL getarr(fid, TRIM(label)//'/irow', mat%irow) + CALL getarr(fid, TRIM(label)//'/cols', mat%cols) + IF(mat%nlsym) THEN + CALL getarr(fid, TRIM(label)//'/perm', mat%perm) + CALL getarr(fid, TRIM(label)//'/invp', mat%invp) + END IF + CALL getarr(fid, TRIM(label)//'/val', mat%val) +! + CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) + END SUBROUTINE get_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE get_zwsmp_mat(fid, label, mat) +! +! Read matrix from hdf5 file +! + USE futils +! + INTEGER, INTENT(in) :: fid + CHARACTER(len=*), INTENT(in) :: label + TYPE(zwsmp_mat) :: mat +! + CALL getatt(fid, label, 'RANK', mat%rank) + CALL getatt(fid, label, 'NNZ', mat%nnz) + CALL getatt(fid, label, 'NLSYM', mat%nlsym) + CALL getatt(fid, label, 'NLPOS', mat%nlpos) + CALL getatt(fid, label, 'NLHERM', mat%nlherm) + CALL getarr(fid, TRIM(label)//'/irow', mat%irow) + CALL getarr(fid, TRIM(label)//'/cols', mat%cols) + IF(mat%nlsym) THEN + CALL getarr(fid, TRIM(label)//'/perm', mat%perm) + CALL getarr(fid, TRIM(label)//'/invp', mat%invp) + END IF + CALL getarr(fid, TRIM(label)//'/val', mat%val) +! + CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm) + CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm) + END SUBROUTINE get_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE mcopy_wsmp_mat(mata, matb) +! +! Matrix copy: B = A +! + TYPE(wsmp_mat) :: mata, matb + INTEGER :: n, nnz +! +! Assume that matb was already initialized by init_wsmp_mat. + IF(matb%rank.LE.0) THEN + WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! + IF(ASSOCIATED(matb%mat)) THEN + CALL destroy(matb%mat) + DEALLOCATE(matb%mat) + END IF +! + n = mata%rank + nnz = mata%nnz + matb%rank = n + matb%nnz = nnz + matb%nlsym = mata%nlsym + matb%nlpos = mata%nlpos + matb%nlforce_zero = mata%nlforce_zero +! + IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) + IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) + IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) + IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp) + ALLOCATE(matb%val(nnz)); matb%val = mata%val + ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols + ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow + ALLOCATE(matb%perm(n)) + IF(matb%nlsym) THEN + ALLOCATE(matb%perm(n)) + ALLOCATE(matb%invp(n)) + END IF + END SUBROUTINE mcopy_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE mcopy_zwsmp_mat(mata, matb) +! +! Matrix copy: B = A +! + TYPE(zwsmp_mat) :: mata, matb + INTEGER :: n, nnz +! +! Assume that matb was already initialized by init_wsmp_mat. + IF(matb%rank.LE.0) THEN + WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT' + STOP '*** Abnormal EXIT in MODULE wsmp_mod ***' + END IF +! + IF(ASSOCIATED(matb%mat)) THEN + CALL destroy(matb%mat) + DEALLOCATE(matb%mat) + END IF +! + n = mata%rank + nnz = mata%nnz + matb%rank = n + matb%nnz = nnz + matb%nlsym = mata%nlsym + matb%nlherm = mata%nlherm + matb%nlpos = mata%nlpos + matb%nlforce_zero = mata%nlforce_zero +! + IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val) + IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols) + IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow) + IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm) + IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp) + ALLOCATE(matb%val(nnz)); matb%val = mata%val + ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols + ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow + ALLOCATE(matb%perm(n)) + IF(matb%nlsym) THEN + ALLOCATE(matb%perm(n)) + ALLOCATE(matb%invp(n)) + END IF + END SUBROUTINE mcopy_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE maddto_wsmp_mat(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(wsmp_mat) :: mata, matb + DOUBLE PRECISION :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE maddto_zwsmp_mat(mata, alpha, matb) +! +! A <- A + alpha*B +! + TYPE(zwsmp_mat) :: mata, matb + DOUBLE COMPLEX :: alpha +! + mata%val = mata%val + alpha*matb%val + END SUBROUTINE maddto_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE psum_wsmp_mat(mat, comm) +! +! Parallel sum of sparse matrices +! + INCLUDE "mpif.h" +! + TYPE(wsmp_mat) :: mat + INCLUDE 'psum_mat.tpl' + END SUBROUTINE psum_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE psum_zwsmp_mat(mat, comm) +! +! Parallel sum of sparse matrices +! + INCLUDE "mpif.h" +! + TYPE(zwsmp_mat) :: mat + INCLUDE 'psum_mat.tpl' + END SUBROUTINE psum_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE p2p_wsmp_mat(mat, dest, extyp, op, comm) +! +! Point-to-point combine sparse matrix between 2 processes +! + INCLUDE "mpif.h" +! + TYPE(wsmp_mat) :: mat + DOUBLE PRECISION, ALLOCATABLE :: val(:) + INTEGER :: mpi_type=MPI_DOUBLE_PRECISION +! + INCLUDE "p2p_mat.tpl" + END SUBROUTINE p2p_wsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + SUBROUTINE p2p_zwsmp_mat(mat, dest, extyp, op, comm) +! +! Point-to-point combine sparse matrix between 2 processes +! + INCLUDE "mpif.h" +! + TYPE(zwsmp_mat) :: mat + DOUBLE COMPLEX, ALLOCATABLE :: val(:) + INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX +! + INCLUDE "p2p_mat.tpl" + END SUBROUTINE p2p_zwsmp_mat +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +END MODULE wsmp_bsplines diff --git a/src/zconmat.tpl b/src/zconmat.tpl new file mode 100644 index 0000000..2d40218 --- /dev/null +++ b/src/zconmat.tpl @@ -0,0 +1,214 @@ +!> +!> @file zconmat.tpl +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! In this version s[lines are precalculted +! (on all n1/n2 intervals +! + INTERFACE + SUBROUTINE coefeq(x, y, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x, y + INTEGER, INTENT(out) :: idt(:,:), idw(:,:) + DOUBLE COMPLEX, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE + INTEGER, OPTIONAL :: maxder(2) ! maximum oder of derivatives + LOGICAL, OPTIONAL :: nat_order ! Natural ordering for 2d-1d mapping +! + INTEGER :: n1, nidbas1, ndim1, n1e + INTEGER :: n2, nidbas2, ndim2, n2e + INTEGER :: ng1, ng2 + INTEGER :: i1, i2, ig1, ig2 + INTEGER :: igt1, igt2, igw1, igw2, irow, jcol + INTEGER, ALLOCATABLE :: left1(:), left2(:) +! + LOGICAL :: nlper1, nlper2, nlnat +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER :: k, kmaxder, it1, iw1, it2, iw2 + INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:) ! Derivative order + DOUBLE COMPLEX :: one=(1.0d0,0.0d0), zero=(0.0d0,0.0d0) + DOUBLE COMPLEX, ALLOCATABLE :: coefs(:,:,:) ! Terms in weak form +! + DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:) + DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:) + DOUBLE COMPLEX, ALLOCATABLE :: mata(:,:,:,:), matc(:,:) + DOUBLE COMPLEX, ALLOCATABLE :: matg(:,:,:), matf(:,:,:), matcg(:,:,:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl%sp1, ndim1, n1, nidbas1) + CALL get_dim(spl%sp2, ndim2, n2, nidbas2) + nlper1 = spl%sp1%period + nlper2 = spl%sp2%period +! + n1e = n1+nidbas1 ! Number of elements in 1st coordinate + n2e = n2+nidbas2 ! Number of elements in 2nd coordinate + IF(nlper2) n2e = n2 +! +! Gauss points and weights on all intervals +! + xg1 => spl%sp1%gausx ! xg1(ng1,n1) + wg1 => spl%sp1%gausw ! wg1(ng1,n1) + ng1 = SIZE(xg1,1) + xg2 => spl%sp2%gausx + wg2 => spl%sp2%gausw + ng2 = SIZE(xg2,1) +! +! Splines on all intervals +! + kmaxder = 1 + IF(PRESENT(maxder)) kmaxder = maxder(1) + ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1)) + ALLOCATE(left1(ng1)) + DO i1=1,n1 + left1 = i1 + CALL basfun(xg1(:,i1), spl%sp1, fun1(:,:,:,i1), left1) + END DO + DEALLOCATE(left1) +! + kmaxder = 1 + IF(PRESENT(maxder)) kmaxder = maxder(2) + ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2,n2)) + ALLOCATE(left2(ng2)) + DO i2=1,n2 + left2 = i2 + CALL basfun(xg2(:,i2), spl%sp2, fun2(:,:,:,i2), left2) + END DO + DEALLOCATE(left2) +! +! Ordering in local to global matrix mapping +! + nlnat = .FALSE. + IF(PRESENT(nat_order)) nlnat = nat_order +!=========================================================================== +! 2.0 Assembly loop +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms,2)) + ALLOCATE(iderw(kterms,2)) + ALLOCATE(coefs(kterms,ng1,ng2)) +! +! Allocate local matrices +! + ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2)) + ALLOCATE(matc(ng1,ng2)) + ALLOCATE(matg(0:nidbas2,0:nidbas2,ng2)) + ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1)) + ALLOCATE(matcg(ng1,0:nidbas2,0:nidbas2)) +! + DO i1=1,n1 + DO i2=1,n2 +! +! Coefficients of the weak form +! + DO ig1=1,ng1 + DO ig2=1,ng2 + CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), & + & idert, iderw, coefs(:,ig1,ig2)) + END DO + END DO +! +! Compute local matrix: A <- E*(C*D^T) + A +! + mata = 0.0d0 + DO k=1,kterms +! + matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2) +! + DO it1=0,nidbas1 + DO iw1=0,nidbas1 + DO ig1=1,ng1 + matf(it1,iw1,ig1) = wg1(ig1,i1) * & + & fun1(it1,idert(k,1),ig1,i1) * & + & fun1(iw1,iderw(k,1),ig1,i1) + END DO + END DO + END DO +! + DO it2=0,nidbas2 + DO iw2=0,nidbas2 + DO ig2=1,ng2 + matg(it2,iw2,ig2) = wg2(ig2,i2) * & + & fun2(it2,idert(k,2),ig2,i2) * & + & fun2(iw2,iderw(k,2),ig2,i2) + END DO + END DO + END DO +! + CALL zgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, one, & + & matc, ng1, matg, (nidbas2+1)*(nidbas2+1), zero, & + & matcg, ng1) + CALL zgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), & + & ng1, one, matf, (nidbas1+1)*(nidbas1+1), matcg, ng1, one, & + & mata, (nidbas1+1)*(nidbas1+1)) +! + END DO +! +! Map local matrix A to global matrix +! + DO it1=0,nidbas1 + igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1 + DO it2=0,nidbas2 + igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1 + irow = glmap(igt1, igt2, n1e, n2e) + DO iw1=0,nidbas1 + igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1 + DO iw2=0,nidbas2 + igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1 + jcol = glmap(igw1, igw2, n1e, n2e) + CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2)) + END DO + END DO + END DO + END DO +! + END DO + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun1) + DEALLOCATE(fun2) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(mata) + DEALLOCATE(matc) + DEALLOCATE(matg) + DEALLOCATE(matcg) + DEALLOCATE(matf) +! +CONTAINS + INTEGER FUNCTION glmap(i,j,n1,n2) + INTEGER, INTENT(in) :: i,j,n1,n2 + IF(nlnat) THEN + glmap = (j-1)*n1 + i + ELSE + glmap = (i-1)*n2 + j + END IF + END FUNCTION glmap diff --git a/src/zconmat_1d.tpl b/src/zconmat_1d.tpl new file mode 100644 index 0000000..d066822 --- /dev/null +++ b/src/zconmat_1d.tpl @@ -0,0 +1,144 @@ +!> +!> @file zconmat_1d.tpl +!> +!> @brief +!> +!> @copyright +!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +!> SPC (Swiss Plasma Center) +!> +!> spclibs is free software: you can redistribute it and/or modify it under +!> the terms of the GNU Lesser General Public License as published by the Free +!> Software Foundation, either version 3 of the License, or (at your option) +!> any later version. +!> +!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +!> +!> You should have received a copy of the GNU Lesser General Public License +!> along with this program. If not, see . +!> +!> @authors +!> (in alphabetical order) +!> @author Trach-Minh Tran +!> +! +! In this version s[lines are precalculted +! (on all n1/n2 intervals +! + INTERFACE + SUBROUTINE coefeq(x, idt, idw, c) + DOUBLE PRECISION, INTENT(in) :: x + INTEGER, INTENT(out) :: idt(:), idw(:) + DOUBLE COMPLEX, INTENT(out) :: c(:) + END SUBROUTINE coefeq + END INTERFACE + INTEGER, OPTIONAL :: maxder ! maximum oder of derivatives +! + INTEGER :: n1, nidbas1, ndim1, ng1 + INTEGER :: i1, ig1 + INTEGER :: irow, jcol + INTEGER, ALLOCATABLE :: left1(:) +! + LOGICAL :: nlper1 +! + INTEGER :: kterms ! Number of terms in weak form + INTEGER :: k, kmaxder, it1, iw1 + INTEGER, ALLOCATABLE :: idert(:), iderw(:) ! Derivative order + DOUBLE COMPLEX :: one=(1.0d0,0.0d0), zero=(0.0d0,0.0d0) + DOUBLE COMPLEX, ALLOCATABLE :: coefs(:,:) ! Terms in weak form +! + DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:) + DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:) + DOUBLE COMPLEX, ALLOCATABLE :: mata(:,:), matc(:) + DOUBLE COMPLEX, ALLOCATABLE :: matf(:,:,:) +!=========================================================================== +! 1.0 Prologue +! +! Properties of spline space +! + CALL get_dim(spl, ndim1, n1, nidbas1) + nlper1 = spl%period +! +! Gauss points and weights on all intervals +! + xg1 => spl%gausx ! xg1(ng1,n1) + wg1 => spl%gausw ! wg1(ng1,n1) + ng1 = SIZE(xg1,1) +! +! Splines on all intervals +! + kmaxder = 1 + IF(PRESENT(maxder)) kmaxder = maxder + ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1)) + ALLOCATE(left1(ng1)) + DO i1=1,n1 + left1 = i1 + CALL basfun(xg1(:,i1), spl, fun1(:,:,:,i1), left1) + END DO + DEALLOCATE(left1) +!=========================================================================== +! 2.0 Assembly loop +! +! Weak form +! + kterms = mat%nterms + ALLOCATE(idert(kterms)) + ALLOCATE(iderw(kterms)) + ALLOCATE(coefs(kterms,ng1)) +! +! Allocate local matrices +! + ALLOCATE(mata(0:nidbas1,0:nidbas1)) + ALLOCATE(matc(ng1)) + ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1)) +! + DO i1=1,n1 +! +! Coefficients of the weak form +! + DO ig1=1,ng1 + CALL coefeq(xg1(ig1,i1), idert, iderw, coefs(:,ig1)) + END DO +! +! Compute local matrix: A <- F*c + A +! + mata = 0.0d0 + DO k=1,kterms +! + matc(1:ng1) = coefs(k,1:ng1) +! + DO it1=0,nidbas1 + DO iw1=0,nidbas1 + DO ig1=1,ng1 + matf(it1,iw1,ig1) = wg1(ig1,i1) * & + & fun1(it1,idert(k),ig1,i1) * & + & fun1(iw1,iderw(k),ig1,i1) + END DO + END DO + END DO +! + CALL zgemv('N', (nidbas1+1)*(nidbas1+1), ng1, one, matf, & + & (nidbas1+1)*(nidbas1+1), matc, 1, one, mata, 1) + END DO +! +! Map local matrix A to global matrix +! + DO it1=0,nidbas1 + irow = i1+it1; IF(nlper1) irow = MODULO(irow-1,n1) + 1 + DO iw1=0,nidbas1 + jcol = i1+iw1; IF(nlper1) jcol = MODULO(jcol-1,n1) + 1 + CALL updtmat(mat, irow, jcol, mata(it1,iw1)) + END DO + END DO +! + END DO +!=========================================================================== +! 9.0 Epilogue +! + DEALLOCATE(fun1) + DEALLOCATE(idert, iderw, coefs) + DEALLOCATE(mata) + DEALLOCATE(matc) + DEALLOCATE(matf) diff --git a/wk/CMakeLists.txt b/wk/CMakeLists.txt new file mode 100644 index 0000000..e24be22 --- /dev/null +++ b/wk/CMakeLists.txt @@ -0,0 +1,79 @@ +/** + * @file CMakeLists.txt + * + * @brief + * + * @copyright + * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) + * SPC (Swiss Plasma Center) + * + * spclibs is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + * + * @authors + * (in alphabetical order) + * @author Nicolas Richart + * @author Trach-Minh Tran + */ +set(BS_TESTS + driv1 driv2 driv3 driv4 + pde1d pde1dp pde1dp_cmpl + pde2d pde2d_pb + pde1dp_cmpl_dft + pde3d + fit1d fit1dbc fit1dp + fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y + moments optim1 optim2 optim3 + tcdsmat tmassmat tbasfun tsparse1 test_kron + ) + +if(HAS_PARDISO) + set(BS_TESTS ${BS_TESTS} + pde1dp_cmpl_pardiso + pde2d_pardiso + pde2d_sym_pardiso + pde2d_sym_pardiso_dft + ) +endif() + +if(HAS_MUMPS) + set(BS_TESTS ${BS_TESTS} + pde2d_mumps + pde1dp_cmpl_mumps + ) +endif() + +set(RUNTESTS "${CMAKE_CURRENT_SOURCE_DIR}/runtest.sh") +set(BIN_DIR "${bsplines_tests_BINARY_DIR}") +set(INPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + +foreach(prog ${BS_TESTS}) + add_test(${prog} ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 + ${RUNTESTS} ${BIN_DIR}/${prog} ${INPUT_DIR} + ) +endforeach() + +# Special cases! +if(HAS_PARDISO) + add_test(tsparse2 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 + ${BIN_DIR}/tsparse2 + ) +endif() + +add_test(ppde3d ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 + ${BIN_DIR}/ppde3d ${INPUT_DIR}/ppde3d.in + ) + +add_test(ppde3d_pb ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 + ${BIN_DIR}/ppde3d ${INPUT_DIR}/ppde3d_pb.in + ) + diff --git a/wk/adv.in b/wk/adv.in new file mode 100644 index 0000000..25a6abb --- /dev/null +++ b/wk/adv.in @@ -0,0 +1,11 @@ +&newrun + nx = 100, + nidbas = 2, + a = 0.0, + b = 100.0, + dt = 0.3 ! Time step + u = -1.0 ! Velocity + w = 0.1 ! Shape of initial function + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/basfun_perf.in b/wk/basfun_perf.in new file mode 100644 index 0000000..99b075e --- /dev/null +++ b/wk/basfun_perf.in @@ -0,0 +1,17 @@ +&newrun + nx = 64, + nidbas = 3, + nits = 1000 + npt = 100000 + ngroup=4, + jdermx = 0, + nlperiod = f, +/ + + + + + + + + diff --git a/wk/driv1.in b/wk/driv1.in new file mode 100644 index 0000000..7d70ec2 --- /dev/null +++ b/wk/driv1.in @@ -0,0 +1,11 @@ +&newrun + nx = 10, ny = 8, + nidbas = 4, + ngauss = 4, + a = 0.0 + b = 1.0 + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefx = 1., 0., 10., 0.5, 0.2 ! Gaussian peaks at 0.5 +/ diff --git a/wk/driv2.in b/wk/driv2.in new file mode 100644 index 0000000..30cbdb9 --- /dev/null +++ b/wk/driv2.in @@ -0,0 +1,10 @@ +&newrun + periodic = f, + nx = 10, + nidbas = 3, + ngauss = 3, + a = 0.0 + b = 1.0 + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefx = 0., 1., 0., 0., 1. ! fdist = x +/ diff --git a/wk/driv3.in b/wk/driv3.in new file mode 100644 index 0000000..62fef5a --- /dev/null +++ b/wk/driv3.in @@ -0,0 +1,2 @@ +&newrun +/ diff --git a/wk/driv4.in b/wk/driv4.in new file mode 100644 index 0000000..a9548e1 --- /dev/null +++ b/wk/driv4.in @@ -0,0 +1,5 @@ +&newrun + nx=10 + a=0, b=1.0, + nidbas1=3 nidbas2=1, +/ diff --git a/wk/fit1d.in b/wk/fit1d.in new file mode 100644 index 0000000..ee6d020 --- /dev/null +++ b/wk/fit1d.in @@ -0,0 +1,8 @@ +&newrun + nx = 10, + nidbas = 3, + a = 0.0, + b = 1.0, + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/fit1dbc.in b/wk/fit1dbc.in new file mode 100644 index 0000000..45187d5 --- /dev/null +++ b/wk/fit1dbc.in @@ -0,0 +1,10 @@ +&newrun + nx = 10, + nidbas = 3, + a = 0.0, + b = 1.0, + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + ibc = 2,2, 2,2, 3,3 + fbc = 0.0 +/ diff --git a/wk/fit1dp.in b/wk/fit1dp.in new file mode 100644 index 0000000..ee6d020 --- /dev/null +++ b/wk/fit1dp.in @@ -0,0 +1,8 @@ +&newrun + nx = 10, + nidbas = 3, + a = 0.0, + b = 1.0, + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/fit2d.in b/wk/fit2d.in new file mode 100644 index 0000000..b4b46ca --- /dev/null +++ b/wk/fit2d.in @@ -0,0 +1,10 @@ +&newrun + nx = 10, + ny = 10, + nidbas = 3,3 + mbes = 2, + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefy = 0., 1., 0., 0., 1. ! fdist = y + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/fit2d1d.in b/wk/fit2d1d.in new file mode 100644 index 0000000..b345a49 --- /dev/null +++ b/wk/fit2d1d.in @@ -0,0 +1,10 @@ +&newrun + nx = 80, + ny = 80, + nidbas = 3,3 + mbes = 2, + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefy = 0., 1., 0., 0., 1. ! fdist = y + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/fit2d_cmpl.in b/wk/fit2d_cmpl.in new file mode 100644 index 0000000..b345a49 --- /dev/null +++ b/wk/fit2d_cmpl.in @@ -0,0 +1,10 @@ +&newrun + nx = 80, + ny = 80, + nidbas = 3,3 + mbes = 2, + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefy = 0., 1., 0., 0., 1. ! fdist = y + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/fit2dbc.in b/wk/fit2dbc.in new file mode 100644 index 0000000..b4b46ca --- /dev/null +++ b/wk/fit2dbc.in @@ -0,0 +1,10 @@ +&newrun + nx = 10, + ny = 10, + nidbas = 3,3 + mbes = 2, + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefy = 0., 1., 0., 0., 1. ! fdist = y + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/fit2dbc_x.in b/wk/fit2dbc_x.in new file mode 100644 index 0000000..b4b46ca --- /dev/null +++ b/wk/fit2dbc_x.in @@ -0,0 +1,10 @@ +&newrun + nx = 10, + ny = 10, + nidbas = 3,3 + mbes = 2, + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefy = 0., 1., 0., 0., 1. ! fdist = y + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/fit2dbc_y.in b/wk/fit2dbc_y.in new file mode 100644 index 0000000..b4b46ca --- /dev/null +++ b/wk/fit2dbc_y.in @@ -0,0 +1,10 @@ +&newrun + nx = 10, + ny = 10, + nidbas = 3,3 + mbes = 2, + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefy = 0., 1., 0., 0., 1. ! fdist = y + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/getgrad_perf.in b/wk/getgrad_perf.in new file mode 100644 index 0000000..0777787 --- /dev/null +++ b/wk/getgrad_perf.in @@ -0,0 +1,6 @@ +&newrun + nx = 64, + ny = 64, + nidbas = 3,3 + npt = 100000, nits=100 +/ diff --git a/wk/gridval_perf.in b/wk/gridval_perf.in new file mode 100644 index 0000000..7e18794 --- /dev/null +++ b/wk/gridval_perf.in @@ -0,0 +1,6 @@ +&newrun + nx = 64, + ny = 64, + nidbas = 3, 3 + npt = 100000, nits=100 +/ diff --git a/wk/mesh.in b/wk/mesh.in new file mode 100644 index 0000000..1af5979 --- /dev/null +++ b/wk/mesh.in @@ -0,0 +1,5 @@ +&newrun + nx = 2000, + coefs = 0., 1., 0., 0., 1. ! fdist = x + coefs = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/moments.in b/wk/moments.in new file mode 100644 index 0000000..4deccb5 --- /dev/null +++ b/wk/moments.in @@ -0,0 +1 @@ +1, 0.0, 7.0, 16, 2 diff --git a/wk/optim1.in b/wk/optim1.in new file mode 100644 index 0000000..0b327ec --- /dev/null +++ b/wk/optim1.in @@ -0,0 +1,5 @@ +&newrun + nx = 64, + nidbas = 3, + npt = 10000000 +/ diff --git a/wk/optim2.in b/wk/optim2.in new file mode 100644 index 0000000..b5ae359 --- /dev/null +++ b/wk/optim2.in @@ -0,0 +1,6 @@ +&newrun + nx = 64, + ny = 64, + nidbas = 2, 2 + npt = 10000000 +/ diff --git a/wk/optim3.in b/wk/optim3.in new file mode 100644 index 0000000..060c954 --- /dev/null +++ b/wk/optim3.in @@ -0,0 +1,7 @@ +&newrun + nx = 64, + ny = 64, + nz = 64, + nidbas = 3*1, + npt = 10000000 +/ diff --git a/wk/pde1d.in b/wk/pde1d.in new file mode 100644 index 0000000..4503514 --- /dev/null +++ b/wk/pde1d.in @@ -0,0 +1,9 @@ +&newrun + nx = 32, + nidbas = 3, + ngauss = 4, + kdiff = 10, + nlppform = t, + coefs = 1., 0., 0., 0., 1. ! Equidistant mesh + coefs = 0., 1., 0., 0., 1. ! fdist = x +/ diff --git a/wk/pde1d_eig.in b/wk/pde1d_eig.in new file mode 100644 index 0000000..4e790dc --- /dev/null +++ b/wk/pde1d_eig.in @@ -0,0 +1,11 @@ +&newrun + nx = 32, + nidbas = 3, + ngauss = 4, + kdiff = 10, + nlppform = t, + coefs = 1., 0., 0., 0., 1. ! Equidistant mesh + coefs = 0., 1., 0., 0., 1. ! fdist = x + nev=20, ncv=25, which='SM' +/ + diff --git a/wk/pde1d_eig_zmumps.in b/wk/pde1d_eig_zmumps.in new file mode 100644 index 0000000..1a1321f --- /dev/null +++ b/wk/pde1d_eig_zmumps.in @@ -0,0 +1,12 @@ +&newrun + nx = 32, + nidbas = 3, + ngauss = 4, + kdiff = 10, + nlppform = t, + coefs = 1., 0., 0., 0., 1. ! Equidistant mesh + coefs = 0., 1., 0., 0., 1. ! fdist = x + nlinv=t, nev=20, ncv=25, which='LM' + nlinv=f, nev=20, ncv=25, which='SM' + tol=1.e-6 +/ diff --git a/wk/pde1dp.in b/wk/pde1dp.in new file mode 100644 index 0000000..1500aaa --- /dev/null +++ b/wk/pde1dp.in @@ -0,0 +1,8 @@ +&newrun + nx = 10, + nidbas = 3, + ngauss = 4, + ibcoef = 1, + coefs = 0., 1., 0., 0., 1. ! fdist = x + coefs = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/pde1dp_cmpl.in b/wk/pde1dp_cmpl.in new file mode 100644 index 0000000..32035be --- /dev/null +++ b/wk/pde1dp_cmpl.in @@ -0,0 +1,10 @@ +&newrun + nx = 8, + nidbas = 3, + ngauss = 4, + nlequid = f, + alpha = (1.0,1.0), + beta = (0.2,0.0), + mmode=3, + npt = 100, +/ diff --git a/wk/pde1dp_cmpl_dft.in b/wk/pde1dp_cmpl_dft.in new file mode 100644 index 0000000..c47df20 --- /dev/null +++ b/wk/pde1dp_cmpl_dft.in @@ -0,0 +1,9 @@ +&newrun + nx = 8, + nidbas = 3, + ngauss = 4, + alpha = (1.0,1.0), + beta = (0.2,0.0), + mmode=3, + npt = 100, +/ diff --git a/wk/pde1dp_cmpl_mumps.in b/wk/pde1dp_cmpl_mumps.in new file mode 100644 index 0000000..f6fb39a --- /dev/null +++ b/wk/pde1dp_cmpl_mumps.in @@ -0,0 +1,13 @@ +&newrun + nx = 128, + nidbas = 3, + ngauss = 4, + nlequid = t, + alpha = (1.0,1.0), + beta = (0.2,0.0), + mmode=3, + npt = 100, + nlsym = t, + nlherm = f, + nlpos = f, +/ diff --git a/wk/pde1dp_cmpl_pardiso.in b/wk/pde1dp_cmpl_pardiso.in new file mode 100644 index 0000000..f32f54c --- /dev/null +++ b/wk/pde1dp_cmpl_pardiso.in @@ -0,0 +1,13 @@ +&newrun + nx = 128, + nidbas = 3, + ngauss = 4, + nlequid = t, + alpha = (1.0,1.0), + beta = (0.2,0.0), + mmode=3, + npt = 100, + nlsym = f, + nlherm = f, + nlpos = f, +/ diff --git a/wk/pde1dp_cmpl_wsmp.in b/wk/pde1dp_cmpl_wsmp.in new file mode 100644 index 0000000..2155ffe --- /dev/null +++ b/wk/pde1dp_cmpl_wsmp.in @@ -0,0 +1,13 @@ +&newrun + nx = 32, + nidbas = 3, + ngauss = 4, + nlequid = t, + alpha = (1.0,1.0), + beta = (0.2,0.0), + mmode=3, + npt = 100, + nlsym = t, + nlherm = f, + nlpos = f, +/ diff --git a/wk/pde2d.in b/wk/pde2d.in new file mode 100644 index 0000000..9e665ca --- /dev/null +++ b/wk/pde2d.in @@ -0,0 +1,10 @@ +&newrun + nx = 32, ny = 32, + nidbas = 3,3, + ngauss = 4,4, + mbess = 3, + nlppform = t, + nlconmat = t, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/pde2d_mumps.in b/wk/pde2d_mumps.in new file mode 100644 index 0000000..33d4553 --- /dev/null +++ b/wk/pde2d_mumps.in @@ -0,0 +1,16 @@ +&newrun + debug_mumps=t + nx = 32, ny = 32, + nx =512, ny=256 + nidbas = 3,3, + ngauss = 4,4, + mbess = 3, + nlppform = t, + nlsym = f, nlpos=t, + nlmetis = f, + nlforce_zero = t, + nlserial = f, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh + matfile = '' +/ diff --git a/wk/pde2d_nh.in b/wk/pde2d_nh.in new file mode 100644 index 0000000..daf02ef --- /dev/null +++ b/wk/pde2d_nh.in @@ -0,0 +1,10 @@ +&newrun + nx =32, ny = 32, + nidbas = 3,4, + ngauss = 4,5, + nlfix = f, + mbess = 3, + nlppform = t, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/pde2d_pardiso.in b/wk/pde2d_pardiso.in new file mode 100644 index 0000000..e146f9d --- /dev/null +++ b/wk/pde2d_pardiso.in @@ -0,0 +1,12 @@ +&newrun + nx = 32, ny = 32, + nidbas = 3,3, + ngauss = 4,4, + mbess = 3, + nlppform = t, + nlmetis = f, + nlforce_zero = t, + nlconmat = t, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/pde2d_pb.in b/wk/pde2d_pb.in new file mode 120000 index 0000000..1ae3c0a --- /dev/null +++ b/wk/pde2d_pb.in @@ -0,0 +1 @@ +pde2d.in \ No newline at end of file diff --git a/wk/pde2d_petsc.in b/wk/pde2d_petsc.in new file mode 100644 index 0000000..5a53680 --- /dev/null +++ b/wk/pde2d_petsc.in @@ -0,0 +1,13 @@ +&newrun + nx = 32, ny = 32, + nidbas = 3,3, + ngauss = 4,4, + mbess = 3, + nlppform = t, + nlsym = f, + nlforce_zero = t, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh + nitmax=10000, rtol=1.e-9, + matfile = 'mat_32.dat' +/ diff --git a/wk/pde2d_pwsmp.in b/wk/pde2d_pwsmp.in new file mode 100644 index 0000000..d85f248 --- /dev/null +++ b/wk/pde2d_pwsmp.in @@ -0,0 +1,11 @@ +&newrun + nx = 32, ny = 32, + nidbas = 3,3, + ngauss = 4,4, + mbess = 3, + nlppform = t, + nlforce_zero = t, + nlsym = f, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/pde2d_sym_pardiso.in b/wk/pde2d_sym_pardiso.in new file mode 100644 index 0000000..de9eb4f --- /dev/null +++ b/wk/pde2d_sym_pardiso.in @@ -0,0 +1,13 @@ +&newrun + nx = 32, ny = 32, + nidbas = 3,3, + ngauss = 4,4, + mbess = 3, + epsi = 0.0, + nlppform = t, + nlmetis = f, + nlforce_zero = t, + nlpos = t, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/pde2d_sym_pardiso_dft.in b/wk/pde2d_sym_pardiso_dft.in new file mode 100644 index 0000000..6065900 --- /dev/null +++ b/wk/pde2d_sym_pardiso_dft.in @@ -0,0 +1,16 @@ +&newrun + nx = 32, ny = 32, + nidbas = 3,3, + ngauss = 4,4, + kmin=-4, kmax=4, + mbess = 3, + epsi = 0.9, + nlppform = t, + nlmetis = f, + nlforce_zero = t, + nlpos = t, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ +3 +-1 0 1 diff --git a/wk/pde2d_sym_wsmp.in b/wk/pde2d_sym_wsmp.in new file mode 100644 index 0000000..d1694bb --- /dev/null +++ b/wk/pde2d_sym_wsmp.in @@ -0,0 +1,11 @@ +&newrun + nx = 32, ny = 32, + nidbas = 3,3, + ngauss = 4,4, + mbess = 3, + nlppform = t, + nlforce_zero = t, + nlpos = t, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/pde2d_sym_wsmp_dft.in b/wk/pde2d_sym_wsmp_dft.in new file mode 100644 index 0000000..1b01d91 --- /dev/null +++ b/wk/pde2d_sym_wsmp_dft.in @@ -0,0 +1,15 @@ +&newrun + nx = 32, ny = 32, + nidbas = 3,3, + ngauss = 4,4, + kmin=-4, kmax=4, + mbess = 3, + epsi = 0.9, + nlppform = t, + nlforce_zero = t, + nlpos = t, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ +3 +-1 0 1 diff --git a/wk/pde2d_wsmp.in b/wk/pde2d_wsmp.in new file mode 100644 index 0000000..9cd0dca --- /dev/null +++ b/wk/pde2d_wsmp.in @@ -0,0 +1,10 @@ +&newrun + nx = 32, ny = 32, + nidbas = 3,3, + ngauss = 4,4, + mbess = 3, + nlppform = t, + nlforce_zero = t, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/pde3d.in b/wk/pde3d.in new file mode 100644 index 0000000..76f58d3 --- /dev/null +++ b/wk/pde3d.in @@ -0,0 +1,10 @@ +&newrun + nx = 32, ny = 16, nz=16 + nx = 64, ny = 64, nz=32 + nx = 32, ny = 16, nz=8 + nidbas = 3,3,3 + ngauss = 4,4,4 + mbess = 3, npow=2, + nlppform = f, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/poisson_petsc.in b/wk/poisson_petsc.in new file mode 100644 index 0000000..af12ee2 --- /dev/null +++ b/wk/poisson_petsc.in @@ -0,0 +1,6 @@ +&newrun + nx = 256, ny = 256, + nitmax=10000, rtol=1.e-9 + matfile='mat_256x256.dat' + rhsfile='rhs_256x256.dat' +/ diff --git a/wk/ppde3d.in b/wk/ppde3d.in new file mode 100644 index 0000000..76f58d3 --- /dev/null +++ b/wk/ppde3d.in @@ -0,0 +1,10 @@ +&newrun + nx = 32, ny = 16, nz=16 + nx = 64, ny = 64, nz=32 + nx = 32, ny = 16, nz=8 + nidbas = 3,3,3 + ngauss = 4,4,4 + mbess = 3, npow=2, + nlppform = f, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/ppde3d_pb.in b/wk/ppde3d_pb.in new file mode 100644 index 0000000..76f58d3 --- /dev/null +++ b/wk/ppde3d_pb.in @@ -0,0 +1,10 @@ +&newrun + nx = 32, ny = 16, nz=16 + nx = 64, ny = 64, nz=32 + nx = 32, ny = 16, nz=8 + nidbas = 3,3,3 + ngauss = 4,4,4 + mbess = 3, npow=2, + nlppform = f, + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/runtest.sh b/wk/runtest.sh new file mode 100644 index 0000000..810d2bb --- /dev/null +++ b/wk/runtest.sh @@ -0,0 +1,37 @@ +# +# @file runtest.sh +# +# @brief +# +# @copyright +# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) +# SPC (Swiss Plasma Center) +# +# spclibs is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +# +# @authors +# (in alphabetical order) +# @author Trach-Minh Tran +# +#!/bin/sh + +progname=$1 +input_dir=$2 + +prog=$(basename ${progname}) +input_file=${input_dir}/${prog}.in + +${progname} < $input_file + +exit $? + diff --git a/wk/runtests b/wk/runtests new file mode 100755 index 0000000..3ae622e --- /dev/null +++ b/wk/runtests @@ -0,0 +1,23 @@ +#!/bin/sh +for e in ../bin/*; do + c=$(basename $e); + if [ -f $c.out ]; then #run only if reference output file exists. + echo -ne "\n*** Running $c ... " + temp=$c.$$ + if [ -f $c.in ]; then + $e < $c.in | grep -v 'time (s)' | grep -v 'Memory used' > $temp + else + $e | grep -v 'time (s)' | grep -v 'Memory used' > $temp + fi + diff -w $c.out $temp >/dev/null + stat=$? + if [ $stat -eq 1 ]; then + echo "test failed! ***" + echo "*** Diff of $c.out $temp ***" + diff -w $c.out $temp + else + echo "test passed! ***" + fi + rm $temp + fi +done diff --git a/wk/runtests.bgp b/wk/runtests.bgp new file mode 100755 index 0000000..4559b03 --- /dev/null +++ b/wk/runtests.bgp @@ -0,0 +1,37 @@ +#!/bin/sh +part=$1 +part=${part:?"missing (R00-M0-00 for example)"} + +d=`pwd` +b=$d/../bin +w=/bgscratch/$USER + +cd $w +rm -f *.h5 +cp -p $d/*.out ./ +cp -p $d/*.in ./ + +EXEC="mpirun -nofree -mode VN -np 1 -cwd $w -partition $part -exe" + +for e in $b/*; do + c=$(basename $e); + if [ -f $c.out ]; then #run only if reference output file exists. + echo -ne "\n*** Running $c ... " + temp=$c.$$ + if [ -f $c.in ]; then + $EXEC $e < $c.in | grep -v 'time (s)' | grep -v 'Memory used' > $temp + else + $EXEC $e | grep -v 'time (s)' | grep -v 'Memory used' > $temp + fi + diff -w $c.out $temp >/dev/null + stat=$? + if [ $stat -eq 1 ]; then + echo "test failed! ***" + echo "*** Diff of $c.out $temp ***" + diff -w $c.out $temp + else + echo "test passed! ***" + fi + rm $temp + fi +done diff --git a/wk/runtests.mac b/wk/runtests.mac new file mode 100644 index 0000000..77abbac --- /dev/null +++ b/wk/runtests.mac @@ -0,0 +1,487 @@ + +*** Running driv1 ... test failed! *** +*** Diff of driv1.out driv1.4071 *** +6,10c6,10 +< A = 0.000000000000000E+000, +< B = 1.00000000000000 , +< COEFX = 1.00000000000000 , 0.000000000000000E+000, 10.0000000000000 , 0.500000000000000 , +< 0.200000000000000 , +< COEFY = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> A= 0.0000000000000000 , +> B= 1.0000000000000000 , +> COEFX= 1.0000000000000000 , 0.0000000000000000 , 10.000000000000000 , 0.50000000000000000 , 0.20000000000000001 , +> +> COEFY= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , + +*** Running driv2 ... test failed! *** +*** Diff of driv2.out driv2.4071 *** +6,8c6,8 +< A = 0.000000000000000E+000, +< B = 1.00000000000000 , +< COEFX = 0.000000000000000E+000, 1.00000000000000 , 2*0.000000000000000E+000 , 1.00000000000000 +--- +> A= 0.0000000000000000 , +> B= 1.0000000000000000 , +> COEFX= 0.0000000000000000 , 1.0000000000000000 , 2*0.0000000000000000 , 1.0000000000000000 , +25c25 +< Sum of finteg 1.00000000000000 +--- +> Sum of finteg 0.99999999999999967 +29c29 +< Sum of finteg 1.00000000000000 +--- +> Sum of finteg 0.99999999999999967 + +*** Running driv3 ... test failed! *** +*** Diff of driv3.out driv3.4071 *** +3,5c3,5 +< A = 0.000000000000000E+000, +< B = 1.00000000000000 , +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 , +--- +> A= 0.0000000000000000 , +> B= 1.0000000000000000 , +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +9c9 +< PERIODIC2 = F +--- +> PERIODIC2=F, +19c19 +< 0.000 0.000 0.000 0.188 0.313 0.438 0.562 0.687 0.812 1.000 +--- +> 0.000 0.000 0.000 0.188 0.313 0.438 0.563 0.687 0.812 1.000 + +*** Running fit1d ... test failed! *** +*** Diff of fit1d.out fit1d.4071 *** +4,6c4,6 +< A = 0.000000000000000E+000, +< B = 1.00000000000000 , +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> A= 0.0000000000000000 , +> B= 1.0000000000000000 , +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , + +*** Running fit1dbc ... test failed! *** +*** Diff of fit1dbc.out fit1dbc.4071 *** +4,8c4,9 +< A = 0.000000000000000E+000, +< B = 1.00000000000000 , +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 , +< IBC = 4*2, 2*3, 2*5, 2*6, 2*7, 2*8, 2*9, 2*10, 2*11, +< FBC = 20*0.000000000000000E+000 +--- +> A= 0.0000000000000000 , +> B= 1.0000000000000000 , +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +> IBC= 4*2 , 2*3 , 2*5 , 2*6 , 2*7 , +> 2*8 , 2*9 , 2*10 , 2*11 , +> FBC= 20*0.0000000000000000 , + +*** Running fit1dp ... test failed! *** +*** Diff of fit1dp.out fit1dp.4071 *** +4,6c4,6 +< A = 0.000000000000000E+000, +< B = 1.00000000000000 , +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> A= 0.0000000000000000 , +> B= 1.0000000000000000 , +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , + +*** Running fit2d ... test failed! *** +*** Diff of fit2d.out fit2d.4071 *** +6,7c6,7 +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 , +< COEFY = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +> COEFY= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +14,16c14,16 +< 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 +< 0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 0.000 +< 0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 0.000 +--- +> -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 +> -0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 -0.000 +> -0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 -0.000 +19,21c19,21 +< 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 +< 0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 0.000 +< 0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 0.000 +--- +> -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 +> -0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 -0.000 +> -0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 -0.000 + +*** Running fit2d1d ... test failed! *** +*** Diff of fit2d1d.out fit2d1d.4071 *** +6,7c6,7 +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 , +< COEFY = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +> COEFY= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +13c13 +< Min max or errors -7.903E-07 7.937E-07 +--- +> Min max or errors -7.947E-07 7.760E-07 + +*** Running fit2d_cmpl ... test failed! *** +*** Diff of fit2d_cmpl.out fit2d_cmpl.4071 *** +6,7c6,7 +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 , +< COEFY = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +> COEFY= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +10c10 +< Max errors (on random points) 8.273E-07 +--- +> Max errors (on random points) 8.274E-07 + +*** Running fit2dbc ... test failed! *** +*** Diff of fit2dbc.out fit2dbc.4071 *** +6,7c6,7 +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 , +< COEFY = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +> COEFY= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +14c14 +< 0.000 -0.014 -0.052 -0.108 -0.176 -0.250 -0.324 -0.392 -0.448 -0.486 -0.500 +--- +> -0.000 -0.014 -0.052 -0.108 -0.176 -0.250 -0.324 -0.392 -0.448 -0.486 -0.500 +25,26d24 +< Memory used so far (MB) = 6.797 +< Memory used so far (MB) = 10.465 +34c32 +< Min max or errors -3.331E-16 5.551E-16 +--- +> Min max or errors -6.106E-16 5.551E-16 + +*** Running fit2dbc_x ... test failed! *** +*** Diff of fit2dbc_x.out fit2dbc_x.4071 *** +6,7c6,7 +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 , +< COEFY = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +> COEFY= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +14,16c14,16 +< 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 +< 0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 0.000 +< 0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 0.000 +--- +> -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 +> -0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 -0.000 +> -0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 -0.000 +19,21c19,21 +< 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 +< 0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 0.000 +< 0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 0.000 +--- +> -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 +> -0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 -0.000 +> -0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 -0.000 + +*** Running fit2dbc_y ... test failed! *** +*** Diff of fit2dbc_y.out fit2dbc_y.4071 *** +6,7c6,7 +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 , +< COEFY = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +> COEFY= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +14,16c14,16 +< 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 +< 0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 0.000 +< 0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 0.000 +--- +> -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 +> -0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 -0.000 +> -0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 -0.000 +19,21c19,21 +< 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 +< 0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 0.000 +< 0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 0.000 +--- +> -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 -0.000 +> -0.000 -0.019 -0.073 -0.156 -0.256 -0.357 -0.438 -0.475 -0.438 -0.293 -0.000 +> -0.000 -0.012 -0.045 -0.096 -0.158 -0.220 -0.271 -0.294 -0.271 -0.181 -0.000 + +*** Running moments ... test passed! *** + +*** Running pde1d ... test failed! *** +*** Diff of pde1d.out pde1d.4071 *** +7c7 +< COEFS = 0.000000000000000E+000, 1.00000000000000 , 2*0.000000000000000E+000 , 1.00000000000000 +--- +> COEFS= 0.0000000000000000 , 1.0000000000000000 , 2*0.0000000000000000 , 1.0000000000000000 , +29,31d28 +< Matrice construction time (s) 1.034E-03 +< Matrice factorisation time (s) 4.665E-03 +< Backsolve time (s) 5.889E-05 + +*** Running pde1dp ... test failed! *** +*** Diff of pde1dp.out pde1dp.4071 *** +6c6 +< COEFS = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> COEFS= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +33,39c33,39 +< 1 1.000E+00 1.000E+00 0.000E+00 +< 2 1.908E-17 0.000E+00 1.908E-17 +< 3 -1.041E-17 0.000E+00 -1.041E-17 +< 4 6.939E-18 0.000E+00 6.939E-18 +< 5 -6.939E-18 0.000E+00 -6.939E-18 +< 6 1.388E-17 0.000E+00 1.388E-17 +< 7 -2.776E-17 0.000E+00 -2.776E-17 +--- +> 1 1.000E+00 1.000E+00 2.220E-16 +> 2 -2.099E-16 0.000E+00 -2.099E-16 +> 3 1.180E-16 0.000E+00 1.180E-16 +> 4 -6.245E-17 0.000E+00 -6.245E-17 +> 5 3.469E-17 0.000E+00 3.469E-17 +> 6 -1.388E-17 0.000E+00 -1.388E-17 +> 7 -0.000E+00 0.000E+00 -0.000E+00 +41c41 +< 9 0.000E+00 0.000E+00 0.000E+00 +--- +> 9 -0.000E+00 0.000E+00 -0.000E+00 +43c43 +< Max. error = 1.110E-16 +--- +> Max. error = 2.220E-16 + +*** Running pde1dp_cmpl ... test failed! *** +*** Diff of pde1dp_cmpl.out pde1dp_cmpl.4071 *** +6,7c6,7 +< ALPHA = (1.00000000000000,1.00000000000000), +< BETA = (0.200000000000000,0.000000000000000E+000), +--- +> ALPHA=( 1.0000000000000000 , 1.0000000000000000 ), +> BETA=( 0.20000000000000001 , 0.0000000000000000 ), +9c9 +< NPT = 100 +--- +> NPT= 100, + +*** Running pde2d ... test failed! *** +*** Diff of pde2d.out pde2d.4071 *** +8,9c8,9 +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 , +< COEFY = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +> COEFY= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +35,36c35,36 +< Mem used so far (MB) 1.104E+01 +< Integral of sol -8.174E-16 +--- +> Mem used so far (MB) -1.000E+00 +> Integral of sol -8.877E-16 +40d39 +< GRIDVAL2 time (s) 2.297E-04 +47,50c46 +< Matrice construction time (s) 8.840E-01 +< Matrice factorisation time (s) 2.261E-02 +< Backsolve time (s) 3.321E-04 +< Factor/solve Gflop/s 1.344 1.153 +--- +> Factor/solve Gflop/s 2.406 0.556 + +*** Running pde2d_pb ... test failed! *** +*** Diff of pde2d_pb.out pde2d_pb.4071 *** +8,9c8,9 +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 , +< COEFY = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +> COEFY= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +35,36c35,36 +< Mem used so far (MB) 7.594E+00 +< Integral of sol -7.480E-16 +--- +> Mem used so far (MB) -1.000E+00 +> Integral of sol -8.704E-16 +40d39 +< GRIDVAL2 time (s) 2.228E-04 +47,50c46 +< Matrice construction time (s) 7.885E-01 +< Matrice factorisation time (s) 4.689E-03 +< Backsolve time (s) 1.800E-04 +< Factor/solve Gflop/s 1.828 1.542 +--- +> Factor/solve Gflop/s 0.914 0.557 + +*** Running pde3d ... test failed! *** +*** Diff of pde3d.out pde3d.4071 *** +10c10 +< COEFX = 1.00000000000000 , 3*0.000000000000000E+000 , 1.00000000000000 +--- +> COEFX= 1.0000000000000000 , 3*0.0000000000000000 , 1.0000000000000000 , +42c42 +< Mem used so far (MB) 1.032E+01 +--- +> Mem used so far (MB) -1.000E+00 +46a47,57 +> X CALC ANAL ERROR +> 9.976E-01 4.892E-03 4.839E-03 5.282E-05 +> 5.668E-01 1.249E-01 1.236E-01 1.340E-03 +> 9.659E-01 6.105E-02 6.039E-02 6.594E-04 +> 7.479E-01 1.864E-01 1.843E-01 2.007E-03 +> 3.674E-01 4.336E-02 4.290E-02 4.618E-04 +> 4.806E-01 8.631E-02 8.538E-02 9.235E-04 +> 7.375E-02 4.032E-04 3.990E-04 4.163E-06 +> 5.355E-03 1.562E-07 1.536E-07 2.578E-09 +> 3.471E-01 3.717E-02 3.677E-02 3.955E-04 +> 3.422E-01 3.577E-02 3.539E-02 3.805E-04 +53,57c64,65 +< Matrice construction time (s) 5.769E-01 +< Matrice factorisation time (s) 4.341E-03 +< Backsolve time (s) 2.255E-03 +< Factor/solve Gflop/s 1.804 0.085 +< Mem used so far (MB) 1.245E+01 +--- +> Factor/solve Gflop/s 1.842 0.092 +> Mem used so far (MB) -1.000E+00 + +*** Running tbasfun ... test failed! *** +*** Diff of tbasfun.out tbasfun.4071 *** +4c4 +< NPT = 10 +--- +> NPT= 10, +7c7 +< 0.000 0.016 0.031 0.047 0.062 0.078 0.094 0.109 0.125 0.141 +--- +> 0.000 0.016 0.031 0.047 0.063 0.078 0.094 0.109 0.125 0.141 +9,10c9,10 +< 0.312 0.328 0.344 0.359 0.375 0.391 0.406 0.422 0.438 0.453 +< 0.469 0.484 0.500 0.516 0.531 0.547 0.562 0.578 0.594 0.609 +--- +> 0.313 0.328 0.344 0.359 0.375 0.391 0.406 0.422 0.438 0.453 +> 0.469 0.484 0.500 0.516 0.531 0.547 0.563 0.578 0.594 0.609 +12c12 +< 0.781 0.797 0.812 0.828 0.844 0.859 0.875 0.891 0.906 0.922 +--- +> 0.781 0.797 0.813 0.828 0.844 0.859 0.875 0.891 0.906 0.922 +16c16 +< 0.000 0.000 0.000 0.000 0.016 0.031 0.047 0.062 0.078 0.094 +--- +> 0.000 0.000 0.000 0.000 0.016 0.031 0.047 0.063 0.078 0.094 +18,19c18,19 +< 0.266 0.281 0.297 0.312 0.328 0.344 0.359 0.375 0.391 0.406 +< 0.422 0.438 0.453 0.469 0.484 0.500 0.516 0.531 0.547 0.562 +--- +> 0.266 0.281 0.297 0.313 0.328 0.344 0.359 0.375 0.391 0.406 +> 0.422 0.438 0.453 0.469 0.484 0.500 0.516 0.531 0.547 0.563 +21c21 +< 0.734 0.750 0.766 0.781 0.797 0.812 0.828 0.844 0.859 0.875 +--- +> 0.734 0.750 0.766 0.781 0.797 0.813 0.828 0.844 0.859 0.875 +27,36c27,36 +< 1 3.9209E-07 0 0 0.0000E+00 +< 2 2.5480E-02 1 1 0.0000E+00 +< 3 3.5252E-01 22 22 0.0000E+00 +< 4 6.6691E-01 42 42 0.0000E+00 +< 5 9.6306E-01 61 61 0.0000E+00 +< 6 8.3829E-01 53 53 0.0000E+00 +< 7 3.3536E-01 21 21 0.0000E+00 +< 8 9.1533E-01 58 58 0.0000E+00 +< 9 7.9586E-01 50 50 0.0000E+00 +< 10 8.3269E-01 53 53 0.0000E+00 +--- +> 1 9.9756E-01 63 63 0.0000E+00 +> 2 5.6682E-01 36 36 0.0000E+00 +> 3 9.6592E-01 61 61 0.0000E+00 +> 4 7.4793E-01 47 47 0.0000E+00 +> 5 3.6739E-01 23 23 0.0000E+00 +> 6 4.8064E-01 30 30 0.0000E+00 +> 7 7.3754E-02 4 4 0.0000E+00 +> 8 5.3552E-03 0 0 0.0000E+00 +> 9 3.4708E-01 22 22 0.0000E+00 +> 10 3.4224E-01 21 21 0.0000E+00 +40,49c40,49 +< 1 3.9209E-07 0.0000E+00 +< 2 2.5480E-02 0.0000E+00 +< 3 3.5252E-01 0.0000E+00 +< 4 6.6691E-01 0.0000E+00 +< 5 9.6306E-01 0.0000E+00 +< 6 8.3829E-01 0.0000E+00 +< 7 3.3536E-01 0.0000E+00 +< 8 9.1533E-01 0.0000E+00 +< 9 7.9586E-01 0.0000E+00 +< 10 8.3269E-01 0.0000E+00 +--- +> 1 9.9756E-01 0.0000E+00 +> 2 5.6682E-01 0.0000E+00 +> 3 9.6592E-01 0.0000E+00 +> 4 7.4793E-01 0.0000E+00 +> 5 3.6739E-01 0.0000E+00 +> 6 4.8064E-01 0.0000E+00 +> 7 7.3754E-02 0.0000E+00 +> 8 5.3552E-03 0.0000E+00 +> 9 3.4708E-01 0.0000E+00 +> 10 3.4224E-01 0.0000E+00 + +*** Running tmassmat ... test failed! *** +*** Diff of tmassmat.out tmassmat.4071 *** +4c4 +< XLENGHT = 1.00000000000000 +--- +> XLENGHT= 1.0000000000000000 , + +*** Running tmatrix_gb ... test failed! *** +*** Diff of tmatrix_gb.out tmatrix_gb.4071 *** +69c69 +< Prod. of factored A diagnonals 7650.00000000003 +--- +> Prod. of factored A diagnonals 7650.0000000000291 + +*** Running tmatrix_pb ... test passed! *** + +*** Running tmatrix_zpb ... test failed! *** +*** Diff of tmatrix_zpb.out tmatrix_zpb.4071 *** +32c32 +< ( 0.0, 0.0) ( 0.0, 0.0) ( 0.0, 0.0) ( 34.0, 1.0) ( 45.0, 1.0) +--- +> ( 0.0, 0.0) ( 0.0, -0.0) ( 0.0, 0.0) ( 34.0, 1.0) ( 45.0, 1.0) +35c35 +< ( 11.0, 0.0) ( 0.0, 0.0) ( 13.0, 2.0) ( 14.0, 3.0) ( 0.0, 0.0) +--- +> ( 11.0, 0.0) ( 0.0, -0.0) ( 13.0, 2.0) ( 14.0, 3.0) ( 0.0, 0.0) +37,39c37,39 +< ( 13.0, -2.0) ( 0.0, 0.0) ( 33.0, 0.0) ( 34.0, 1.0) ( 35.0, 2.0) +< ( 14.0, -3.0) ( 0.0, 0.0) ( 34.0, -1.0) ( 44.0, 0.0) ( 45.0, 1.0) +< ( 0.0, 0.0) ( 0.0, 0.0) ( 35.0, -2.0) ( 45.0, -1.0) ( 55.0, 0.0) +--- +> ( 13.0, -2.0) ( 0.0, -0.0) ( 33.0, 0.0) ( 34.0, 1.0) ( 35.0, 2.0) +> ( 14.0, -3.0) ( 0.0, -0.0) ( 34.0, -1.0) ( 44.0, 0.0) ( 45.0, 1.0) +> ( 0.0, 0.0) ( 0.0, -0.0) ( 35.0, -2.0) ( 45.0, -1.0) ( 55.0, 0.0) +41c41 +< ( 11.0, 0.0) ( 0.0, 0.0) ( 13.0, 2.0) ( 14.0, 3.0) ( 0.0, 0.0) +--- +> ( 11.0, 0.0) ( 0.0, -0.0) ( 13.0, 2.0) ( 14.0, 3.0) ( 0.0, 0.0) +43,45c43,45 +< ( 13.0, -2.0) ( 0.0, 0.0) ( 33.0, 0.0) ( 34.0, 1.0) ( 35.0, 2.0) +< ( 14.0, -3.0) ( 0.0, 0.0) ( 34.0, -1.0) ( 44.0, 0.0) ( 45.0, 1.0) +< ( 0.0, 0.0) ( 0.0, 0.0) ( 35.0, -2.0) ( 45.0, -1.0) ( 55.0, 0.0) +--- +> ( 13.0, -2.0) ( 0.0, -0.0) ( 33.0, 0.0) ( 34.0, 1.0) ( 35.0, 2.0) +> ( 14.0, -3.0) ( 0.0, -0.0) ( 34.0, -1.0) ( 44.0, 0.0) ( 45.0, 1.0) +> ( 0.0, 0.0) ( 0.0, -0.0) ( 35.0, -2.0) ( 45.0, -1.0) ( 55.0, 0.0) +47c47 +< ( 11.0, 0.0) ( 0.0, 0.0) ( 13.0, 2.0) ( 14.0, 3.0) ( 0.0, 0.0) +--- +> ( 11.0, 0.0) ( 0.0, -0.0) ( 13.0, 2.0) ( 14.0, 3.0) ( 0.0, 0.0) +49,51c49,51 +< ( 13.0, -2.0) ( 0.0, 0.0) ( 33.0, 0.0) ( 34.0, 1.0) ( 35.0, 2.0) +< ( 14.0, -3.0) ( 0.0, 0.0) ( 34.0, -1.0) ( 44.0, 0.0) ( 45.0, 1.0) +< ( 0.0, 0.0) ( 0.0, 0.0) ( 35.0, -2.0) ( 45.0, -1.0) ( 55.0, 0.0) +--- +> ( 13.0, -2.0) ( 0.0, -0.0) ( 33.0, 0.0) ( 34.0, 1.0) ( 35.0, 2.0) +> ( 14.0, -3.0) ( 0.0, -0.0) ( 34.0, -1.0) ( 44.0, 0.0) ( 45.0, 1.0) +> ( 0.0, 0.0) ( 0.0, -0.0) ( 35.0, -2.0) ( 45.0, -1.0) ( 55.0, 0.0) diff --git a/wk/tbasfun.in b/wk/tbasfun.in new file mode 100644 index 0000000..222b86e --- /dev/null +++ b/wk/tbasfun.in @@ -0,0 +1,7 @@ +&newrun + nx = 64, + nidbas = 2, + npt = 10 + jdermx = 1, + nlper = t, +/ diff --git a/wk/tcdsmat.in b/wk/tcdsmat.in new file mode 100644 index 0000000..e753a37 --- /dev/null +++ b/wk/tcdsmat.in @@ -0,0 +1,13 @@ +&newrun + nints = 32,32 + nidbas = 3,3, + ngauss = 4,4, + mbess = 3, + coefy = 0., 1., 0., 0., 1. ! fdist = y + coefx = 0., 1., 0., 0., 1. ! fdist = x + coefx = 1., 0., 0., 0., 1. ! Equidistant mesh + coefy = 1., 0., 0., 0., 1. ! Equidistant mesh + nitmx=100000, rtolmx=1.e-12, nssor=0, omega=1.6, + readmat=f, verbose=f, + filein="../solver/mat4096.h5", +/ diff --git a/wk/test_kron.in b/wk/test_kron.in new file mode 100644 index 0000000..e69de29 diff --git a/wk/tlocintv.in b/wk/tlocintv.in new file mode 100644 index 0000000..ad5473e --- /dev/null +++ b/wk/tlocintv.in @@ -0,0 +1,11 @@ +&newrun + nx = 64, + nidbas = 3, + ngauss = 4, + a = 0., b=1.0 + np = 20, + nits=10000, + coefs = 0., 1., 0., 0., 1. ! fdist = x + coefs = 1., 0., 1., 0.5, 0.2 ! Gaussian + coefs = 1., 0., 0., 0., 1. ! Equidistant mesh +/ diff --git a/wk/tmassmat.in b/wk/tmassmat.in new file mode 100644 index 0000000..e0ef279 --- /dev/null +++ b/wk/tmassmat.in @@ -0,0 +1,5 @@ +&newrun + nx = 8, + nidbas = 3, + xlenght = 6.283185307179586, +/ diff --git a/wk/tpardiso.in b/wk/tpardiso.in new file mode 100644 index 0000000..fcd6eea --- /dev/null +++ b/wk/tpardiso.in @@ -0,0 +1,9 @@ +&newrun + nints = 32,32 + nidbas = 3,3, + ngauss = 4,4, + mbess = 3, + verbose = f, + readmat = f, + filein="../solver/mat32.h5", +/ diff --git a/wk/tsparse1.in b/wk/tsparse1.in new file mode 100644 index 0000000..a6abc66 --- /dev/null +++ b/wk/tsparse1.in @@ -0,0 +1,11 @@ +2 +19 +13 +5 +20 +1 +13 +10 +4 +2 +0