diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..13c63f7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+*.[oa]
+*.mod
+*~
+html
+auto
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..03300f2
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,151 @@
+/**
+ * @file CMakeLists.txt
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+cmake_minimum_required (VERSION 2.8.8)
+#cmake_policy(SET CMP0053 NEW)
+
+project(bsplines_root Fortran C)
+
+#enable_language(Fortran)
+enable_testing()
+
+option(BSPLINES_USE_MUMPS "Activate the mumps interface" ON)
+if(NOT DEFINED BSPLINES_EXAMPLES)
+  option(BSPLINES_EXAMPLES "Compiles the examples" ON)
+endif()
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake")
+
+# Assume we are on CRAY if ftn is the Fortran compiler
+if (${CMAKE_Fortran_COMPILER} MATCHES "ftn$")
+  set(CRAY TRUE)
+  if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Cray")
+    set(cray_suffix cray)
+  elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "PGI")
+    set(cray_suffix pgi)
+  elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
+    set(cray_suffix intel)
+  elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU")
+    set(cray_suffix gnu)
+  endif()
+else()
+  set(CRAY FALSE)
+endif()
+
+if(POLICY CMP0074)
+  cmake_policy(SET CMP0074 NEW)
+endif()
+
+
+include(CMakeFlagsHandling)
+# Compiler flags for debug/optimization
+if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Cray")
+elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
+  set(CMAKE_AR ${XIAR})
+  add_flags(LANG Fortran TYPE DEBUG -traceback "-check bounds" "-warn unused")
+  add_flags(LANG Fortran TYPE RELEASE -xHost)
+elseif(${CMAKE_Fortran_COMPILER_ID} MATCHES "GNU")
+  add_flags(LANG Fortran TYPE DEBUG -fbounds-check -fbacktrace)
+endif()
+
+if(NOT MUMPS)
+  set(MUMPS $ENV{MUMPS_ROOT})
+endif()
+
+# Installation root directory
+if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+  set(PREFIX $ENV{PREFIX})
+  if(PREFIX)
+    set(${CMAKE_INSTALL_PREFIX} ${PREFIX})
+  else()
+    set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "..." FORCE)
+  endif()
+  message(STATUS "CMAKE_INSTALL_PREFIX is " ${CMAKE_INSTALL_PREFIX})
+endif()
+
+# Search and load the FUTILS configuration file
+if(NOT TARGET futils)
+  find_package(futils PATHS ${FUTILS}/lib/cmake REQUIRED)
+endif()
+
+if(BSPLINES_USE_MUMPS)
+  find_package(Mumps REQUIRED)
+  set(HAS_MUMPS ${MUMPS_FOUND})
+else()
+  set(HAS_MUMPS FALSE)
+endif()
+
+# Find lapack/blas. Skip it if on CRAY!
+if(CRAY)
+  set(BSPLINES_USE_PARDISO OFF)
+endif()
+include(blas)
+
+if(NOT BSPLINES_EXPORT_TARGETS)
+  set(BSPLINES_EXPORT_TARGETS bsplines-targets)
+endif()
+
+find_package(MPI COMPONENTS Fortran REQUIRED)
+
+include(GNUInstallDirs)
+
+add_subdirectory(pppack)
+add_subdirectory(pputils2)
+add_subdirectory(fft)
+add_subdirectory(src)
+
+if(HAS_MUMPS AND BSPLINES_EXAMPLES)
+  add_subdirectory(multigrid)
+endif()
+
+if(BSPLINES_EXAMPLES)
+  add_subdirectory(examples)
+  add_subdirectory(wk)
+endif()
+
+export(TARGETS pppack pputils2 bsplines fft
+  FILE "${CMAKE_BINARY_DIR}/bsplinesLibraryDepends.cmake")
+export(PACKAGE bsplines)
+
+# install configuration files
+if(BSPLINES_EXPORT_TARGETS MATCHES "bsplines-targets")
+  install(EXPORT bsplines-targets
+    DESTINATION lib/cmake
+    )
+
+  configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/cmake/bsplines-config.cmake.in
+    ${CMAKE_CURRENT_BINARY_DIR}/cmake/bsplines-config.cmake @ONLY
+    )
+  install(FILES
+    ${CMAKE_CURRENT_BINARY_DIR}/cmake/bsplines-config.cmake
+    DESTINATION lib/cmake
+    )
+endif()
+
+# enable packaging with CPack
+include(CPack)
+
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..f288702
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
diff --git a/COPYING.lesser b/COPYING.lesser
new file mode 100644
index 0000000..0a04128
--- /dev/null
+++ b/COPYING.lesser
@@ -0,0 +1,165 @@
+                   GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+  This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+  0. Additional Definitions.
+
+  As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+  "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+  An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+  A "Combined Work" is a work produced by combining or linking an
+Application with the Library.  The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+  The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+  The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+  1. Exception to Section 3 of the GNU GPL.
+
+  You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+  2. Conveying Modified Versions.
+
+  If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+   a) under this License, provided that you make a good faith effort to
+   ensure that, in the event an Application does not supply the
+   function or data, the facility still operates, and performs
+   whatever part of its purpose remains meaningful, or
+
+   b) under the GNU GPL, with none of the additional permissions of
+   this License applicable to that copy.
+
+  3. Object Code Incorporating Material from Library Header Files.
+
+  The object code form of an Application may incorporate material from
+a header file that is part of the Library.  You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+   a) Give prominent notice with each copy of the object code that the
+   Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the object code with a copy of the GNU GPL and this license
+   document.
+
+  4. Combined Works.
+
+  You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+   a) Give prominent notice with each copy of the Combined Work that
+   the Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the Combined Work with a copy of the GNU GPL and this license
+   document.
+
+   c) For a Combined Work that displays copyright notices during
+   execution, include the copyright notice for the Library among
+   these notices, as well as a reference directing the user to the
+   copies of the GNU GPL and this license document.
+
+   d) Do one of the following:
+
+       0) Convey the Minimal Corresponding Source under the terms of this
+       License, and the Corresponding Application Code in a form
+       suitable for, and under terms that permit, the user to
+       recombine or relink the Application with a modified version of
+       the Linked Version to produce a modified Combined Work, in the
+       manner specified by section 6 of the GNU GPL for conveying
+       Corresponding Source.
+
+       1) Use a suitable shared library mechanism for linking with the
+       Library.  A suitable mechanism is one that (a) uses at run time
+       a copy of the Library already present on the user's computer
+       system, and (b) will operate properly with a modified version
+       of the Library that is interface-compatible with the Linked
+       Version.
+
+   e) Provide Installation Information, but only if you would otherwise
+   be required to provide such information under section 6 of the
+   GNU GPL, and only to the extent that such information is
+   necessary to install and execute a modified version of the
+   Combined Work produced by recombining or relinking the
+   Application with a modified version of the Linked Version. (If
+   you use option 4d0, the Installation Information must accompany
+   the Minimal Corresponding Source and Corresponding Application
+   Code. If you use option 4d1, you must provide the Installation
+   Information in the manner specified by section 6 of the GNU GPL
+   for conveying Corresponding Source.)
+
+  5. Combined Libraries.
+
+  You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+   a) Accompany the combined library with a copy of the same work based
+   on the Library, uncombined with any other library facilities,
+   conveyed under the terms of this License.
+
+   b) Give prominent notice with the combined library that part of it
+   is a work based on the Library, and explaining where to find the
+   accompanying uncombined form of the same work.
+
+  6. Revised Versions of the GNU Lesser General Public License.
+
+  The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+  Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+  If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/cmake/CMakeFlagsHandling.cmake b/cmake/CMakeFlagsHandling.cmake
new file mode 100644
index 0000000..56b5fae
--- /dev/null
+++ b/cmake/CMakeFlagsHandling.cmake
@@ -0,0 +1,100 @@
+/**
+ * @file CMakeFlagsHandling.cmake
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ */
+if(_CMAKE_FLAGS_HANDLING)
+  return()
+endif()
+set(_CMAKE_FLAGS_HANDLING TRUE)
+
+#===============================================================================
+# Compilation options handling
+#===============================================================================
+macro(_get_flags_message lang type desc)
+  if(${lang} MATCHES "C.." OR ${lang} MATCHES "Fortran")
+    set(${desc} "Flags used by the compiler")
+  elseif(${lang} MATCHES ".*_LINKER")
+    set(${desc} "Flags used by the linker")
+  endif()
+
+  if(${lang} MATCHES "SHARED_LINKER")
+    set(${desc} "${desc} during the creation of shared libraries")
+  elseif(${lang} MATCHES "MODULE_LINKER")
+    set(${desc} "${desc} during the creation of modules")
+  elseif(${lang} MATCHES "STATIC_LINKER")
+    set(${desc} "${desc} linker during the creation of static libraries")
+  endif()
+
+  if(${type} MATCHES "ALL")
+    set(${desc} "${desc} during all build types")
+  else()
+    set(${desc} "${desc} during ${type} builds")
+  endif()
+endmacro()
+
+#===============================================================================
+
+function(handle_flags)
+  include(CMakeParseArguments)
+  cmake_parse_arguments(_flags
+    "ADD;REMOVE" "LANG;TYPE" ""
+    ${ARGN}
+    )
+
+  if(NOT _flags_LANG)
+    set(_flags_LANG ${FLAGS_HANDLING_DEFAULT_LANGUAGE})
+  endif()
+
+  set(_variable CMAKE_${_flags_LANG}_FLAGS)
+
+  if (_flags_TYPE)
+    set(_variable ${_variable}_${_flags_TYPE})
+  else()
+    set(_flags_TYPE "ALL")
+  endif()
+
+  _get_flags_message(${_flags_LANG} ${_flags_TYPE} _desc)
+  foreach(flag ${_flags_UNPARSED_ARGUMENTS})
+    if (_flags_ADD)
+      string(REPLACE "${flag}" "match" _temp_var "${${_variable}}")
+      if(NOT _temp_var MATCHES "match")
+        set(${_variable} "${flag} ${${_variable}}" CACHE STRING ${_desc} FORCE)
+      endif()
+    elseif(_flags_REMOVE)
+      string(REPLACE "${flag} " "" ${_variable} "${${_variable}}")
+      set(${_variable} "${${_variable}}" CACHE STRING ${_desc} FORCE)
+    endif()
+  endforeach()
+endfunction()
+
+#===============================================================================
+function(add_flags)
+  handle_flags(ADD ${ARGN})
+endfunction()
+
+#===============================================================================
+function(remove_flags)
+  handle_flags(REMOVE ${ARGN})
+endfunction()
+#===============================================================================
diff --git a/cmake/CheckFindMumps.c b/cmake/CheckFindMumps.c
new file mode 100644
index 0000000..3b006f7
--- /dev/null
+++ b/cmake/CheckFindMumps.c
@@ -0,0 +1,105 @@
+/**
+ * @file CheckFindMumps.c
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ */
+#include <stdio.h>
+
+#if !defined(MUMPS_SEQ)
+#  include <mpi.h>
+#endif
+
+#define JOB_INIT -1
+#define JOB_END -2
+#define JOB_COMPLETE 6
+#define USE_COMM_WORLD -987654
+
+#define icntl(n) id.icntl[n - 1]
+
+int main(int argc, char **argv) {
+  int n = 2;
+  int nz = 2;
+
+  int irn[2] = {1, 2};
+  int jcn[2] = {1, 2};
+  Real a[2];
+  Real rhs[2];
+
+#if !defined(MUMPS_SEQ)
+  MPI_Init(&argc, &argv);
+#endif
+
+  rhs[0] = 1.0; rhs[1]=4.0;
+  a[0] = 1.0; a[1] = 2.0;
+
+  id.job = JOB_INIT;
+  id.par = 1;
+  id.sym = 0;
+
+#if !defined(MUMPS_SEQ)
+  id.comm_fortran = USE_COMM_WORLD;
+#endif
+
+  mumps_c(&id);
+
+  // Default Scaling
+  icntl(8) = 77;
+
+  // Assembled matrix
+  icntl(5) = 0;
+
+  /// Default centralized dense second member
+  icntl(20) = 0;
+  icntl(21) = 0;
+
+  // automatic choice for analysis analysis
+  icntl(28) = 0;
+
+  // fully distributed
+  icntl(18) = 3;
+
+  id.n = n;
+
+  id.nz_loc = nz;
+  id.irn_loc = irn;
+  id.jcn_loc = jcn;
+
+  id.a_loc = a;
+  id.rhs = rhs;
+
+  icntl(1) = -1;
+  icntl(2) = -1;
+  icntl(3) = -1;
+  icntl(4) = 0;
+
+
+  id.job = JOB_COMPLETE;
+  mumps_c(&id);
+
+  id.job=JOB_END;
+  mumps_c(&id);
+
+  printf("Solution is : (%8.2f %8.2f)\n", rhs[0], rhs[1]);
+
+  return 0;
+}
diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake
new file mode 100644
index 0000000..1fba7ca
--- /dev/null
+++ b/cmake/FindFFTW.cmake
@@ -0,0 +1,46 @@
+/**
+ * @file FindFFTW.cmake
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ */
+if(CMAKE_C_COMPILER_ID MATCHES "Cray")
+  set(_cray TRUE)
+endif()
+
+# Find FFTW2
+if (${_cray})
+#  set(FFTW_LIBRARY "-ldfftw")
+  set(FFTW_LIBRARY "${FFTW_DIR}/libdfftw.a")
+else()
+  find_library(FFTW_LIBRARY NAMES fftw PATHS ${FFTW}/lib)
+  find_library(FFTW_LIBRARY NAMES fftw3 PATHS ${FFTW}/lib)
+  find_path(FFTW_INCLUDES fftw_f77.h ${FFTW}/include)
+  find_path(FFTW_INCLUDES fftw_f77.i ${FFTW}/include)
+  find_path(FFTW_INCLUDES fftw.h ${FFTW}/include)
+  find_path(FFTW_INCLUDES fftw3.h ${FFTW}/include)
+endif()
+
+mark_as_advanced(FFTW_LIBRARY FFTW_INCLUDES)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(FFTW DEFAULT_MSG FFTW_LIBRARY)
diff --git a/cmake/FindMETIS.cmake b/cmake/FindMETIS.cmake
new file mode 100644
index 0000000..2207580
--- /dev/null
+++ b/cmake/FindMETIS.cmake
@@ -0,0 +1,62 @@
+/**
+ * @file FindMETIS.cmake
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ */
+find_path(METIS_INCLUDE_DIR metis.h
+  PATHS "${METIS_DIR}"
+  ENV METIS_DIR
+  PATH_SUFFIXES include
+  )
+
+find_library(METIS_LIBRARY NAMES metis
+  PATHS "${METIS_DIR}"
+  ENV METIS_DIR
+  PATH_SUFFIXES lib
+  )
+
+mark_as_advanced(METIS_LIBRARY METIS_INCLUDE_DIR)
+
+#===============================================================================
+include(FindPackageHandleStandardArgs)
+if(CMAKE_VERSION VERSION_GREATER 2.8.12)
+  if(METIS_INCLUDE_DIR)
+    file(STRINGS ${METIS_INCLUDE_DIR}/metis.h _versions
+      REGEX "^#define\ +METIS_VER_(MAJOR|MINOR|SUBMINOR) .*")
+    foreach(_ver ${_versions})
+      string(REGEX MATCH "METIS_VER_(MAJOR|MINOR|SUBMINOR) *([0-9.]+)" _tmp "${_ver}")
+      set(_metis_${CMAKE_MATCH_1} ${CMAKE_MATCH_2})
+    endforeach()
+    set(METIS_VERSION "${_metis_MAJOR}.${_metis_MINOR}" CACHE INTERNAL "")
+  endif()
+
+  find_package_handle_standard_args(METIS
+    REQUIRED_VARS
+      METIS_LIBRARY
+      METIS_INCLUDE_DIR
+    VERSION_VAR
+      METIS_VERSION)
+else()
+  find_package_handle_standard_args(METIS DEFAULT_MSG
+    METIS_LIBRARY METIS_INCLUDE_DIR)
+endif()
diff --git a/cmake/FindMumps.cmake b/cmake/FindMumps.cmake
new file mode 100644
index 0000000..8d8e76d
--- /dev/null
+++ b/cmake/FindMumps.cmake
@@ -0,0 +1,314 @@
+/**
+ * @file FindMumps.cmake
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ */
+#===============================================================================
+# @file   FindMumps.cmake
+#
+# @author Nicolas Richart <nicolas.richart@epfl.ch>
+#
+# @date creation: Fri Oct 24 2014
+# @date last modification: Wed Jan 13 2016
+#
+# @brief  The find_package file for the Mumps solver
+#
+# @section LICENSE
+#
+# Copyright (©) 2015 EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory
+# (LSMS - Laboratoire de Simulation en Mécanique des Solides)
+#
+# Akantu is free  software: you can redistribute it and/or  modify it under the
+# terms  of the  GNU Lesser  General Public  License as  published by  the Free
+# Software Foundation, either version 3 of the License, or (at your option) any
+# later version.
+#
+# Akantu is  distributed in the  hope that it  will be useful, but  WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A  PARTICULAR PURPOSE. See  the GNU  Lesser General  Public License  for more
+# details.
+#
+# You should  have received  a copy  of the GNU  Lesser General  Public License
+# along with Akantu. If not, see <http://www.gnu.org/licenses/>.
+#
+#===============================================================================
+set(_MUMPS_COMPONENTS "sequential" "parallel" "double" "float" "complex_double" "complex_float")
+
+if(NOT Mumps_FIND_COMPONENTS)
+  set(Mumps_FIND_COMPONENTS "parallel" "double" "float" "complex_double" "complex_float")
+endif()
+#===============================================================================
+enable_language(Fortran)
+
+set(MUMPS_PRECISIONS)
+set(MUMPS_PLAT)
+foreach(_comp ${Mumps_FIND_COMPONENTS})
+  if("${_comp}" STREQUAL "sequential")
+    set(MUMPS_PLAT _seq) #default plat on debian based distribution
+  endif()
+
+  if("${_comp}" STREQUAL "float")
+    list(APPEND MUMPS_PRECISIONS s)
+  endif()
+  if("${_comp}" STREQUAL "double")
+    list(APPEND MUMPS_PRECISIONS d)
+  endif()
+  if("${_comp}" STREQUAL "complex_float")
+    list(APPEND MUMPS_PRECISIONS c)
+  endif()
+  if("${_comp}" STREQUAL "complex_double")
+    list(APPEND MUMPS_PRECISIONS z)
+  endif()
+endforeach()
+
+if(NOT MUMPS_PRECISIONS)
+  set(MUMPS_PRECISIONS s d c z)
+endif()
+
+list(GET MUMPS_PRECISIONS 0 _first_precision)
+
+string(TOUPPER "${_first_precision}" _u_first_precision)
+
+find_path(MUMPS_INCLUDE_DIR ${_first_precision}mumps_c.h
+  PATHS "${MUMPS_DIR}"
+  ENV MUMPS_DIR
+  PATH_SUFFIXES include
+  )
+mark_as_advanced(MUMPS_INCLUDE_DIR)
+
+set(_mumps_required_vars)
+foreach(_precision ${MUMPS_PRECISIONS})
+  string(TOUPPER "${_precision}" _u_precision)
+  find_library(MUMPS_LIBRARY_${_u_precision}MUMPS NAMES ${_precision}mumps${MUMPS_PREFIX}
+    PATHS "${MUMPS_DIR}"
+    ENV MUMPS_DIR
+    PATH_SUFFIXES lib
+    )
+  mark_as_advanced(MUMPS_LIBRARY_${_u_precision}MUMPS)
+  list(APPEND _mumps_required_vars MUMPS_LIBRARY_${_u_precision}MUMPS)
+
+  list(APPEND MUMPS_LIBRARIES_ALL ${MUMPS_LIBRARY_${_u_precision}MUMPS})
+endforeach()
+
+
+if(MUMPS_LIBRARY_${_u_first_precision}MUMPS MATCHES ".*${_first_precision}mumps.*${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  # Assuming mumps was compiled as a static library
+  set(MUMPS_LIBRARY_TYPE STATIC CACHE INTERNAL "" FORCE)
+
+  if (CMAKE_Fortran_COMPILER MATCHES ".*gfortran")
+    set(_compiler_specific gfortran)
+  elseif (CMAKE_Fortran_COMPILER MATCHES ".*ifort")
+    set(_compiler_specific ifcore)
+  else()
+    message("Compiler ${CMAKE_Fortran_COMPILER} is not known, you will probably "
+      "have to add semething instead of this message to be able to test mumps "
+      "install")
+  endif()
+else()
+  set(MUMPS_LIBRARY_TYPE SHARED CACHE INTERNAL "" FORCE)
+endif()
+
+
+function(mumps_add_dependency _pdep _libs)
+  string(TOUPPER ${_pdep} _u_pdep)
+  if(_pdep STREQUAL "mumps_common")
+    find_library(MUMPS_LIBRARY_COMMON mumps_common${MUMPS_PREFIX}
+      PATHS "${MUMPS_DIR}"
+      ENV MUMPS_DIR
+      PATH_SUFFIXES lib
+      )
+    set(${_libs} ${MUMPS_LIBRARY_COMMON} PARENT_SCOPE)
+    mark_as_advanced(MUMPS_LIBRARY_COMMON)
+  elseif(_pdep STREQUAL "pord")
+    find_library(MUMPS_LIBRARY_PORD pord${MUMPS_PREFIX}
+      PATHS "${MUMPS_DIR}"
+      ENV MUMPS_DIR
+      PATH_SUFFIXES lib
+      )
+    set(${_libs} ${MUMPS_LIBRARY_PORD} PARENT_SCOPE)
+    mark_as_advanced(MUMPS_LIBRARY_PORD)
+  elseif(_pdep MATCHES "Scotch")
+    find_package(Scotch REQUIRED ${ARGN} QUIET)
+    if(ARGN)
+      list(GET ARGN 1 _comp)
+      string(TOUPPER ${_comp} _u_comp)
+      set(${_libs} ${SCOTCH_LIBRARY_${_u_comp}} PARENT_SCOPE)
+    else()
+      set(${_libs} ${${_u_pdep}_LIBRARIES} PARENT_SCOPE)
+    endif()
+  elseif(_pdep MATCHES "MPI")
+    if(MUMPS_PLAT STREQUAL "_seq")
+      find_library(MUMPS_LIBRARY_MPISEQ mpiseq${MUMPS_PREFIX}
+        PATHS "${MUMPS_DIR}"
+        ENV MUMPS_DIR
+        PATH_SUFFIXES lib
+        )
+      set(${_libs} ${MUMPS_LIBRARY_MPISEQ} PARENT_SCOPE)
+      mark_as_advanced(MUMPS_LIBRARY_MPISEQ)
+    else()
+      find_package(MPI REQUIRED C Fortran QUIET)
+      set(${_libs} ${MPI_C_LIBRARIES} ${MPI_Fortran_LIBRARIES} PARENT_SCOPE)
+    endif()
+  else()
+    find_package(${_pdep} REQUIRED QUIET)
+    set(${_libs} ${${_u_pdep}_LIBRARIES} ${${_u_pdep}_LIBRARY} PARENT_SCOPE)
+  endif()
+endfunction()
+
+function(mumps_find_dependencies)
+  set(_libraries_all ${MUMPS_LIBRARIES_ALL})
+  set(_include_dirs ${MUMPS_INCLUDE_DIR})
+
+  set(_mumps_test_dir "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}")
+  file(READ ${CMAKE_CURRENT_LIST_DIR}/CheckFindMumps.c _output)
+  file(WRITE "${_mumps_test_dir}/mumps_test_code.c"
+    "#include <${_first_precision}mumps_c.h>
+${_u_first_precision}MUMPS_STRUC_C id;
+
+#define mumps_c ${_first_precision}mumps_c
+#define Real ${_u_first_precision}MUMPS_REAL
+")
+
+  if(MUMPS_PLAT STREQUAL _seq)
+    file(APPEND "${_mumps_test_dir}/mumps_test_code.c"
+      "#define MUMPS_SEQ
+")
+  else()
+    file(APPEND "${_mumps_test_dir}/mumps_test_code.c"
+      "// #undef MUMPS_SEQ
+")
+        find_package(MPI REQUIRED)
+    list(APPEND _compiler_specific ${MPI_C_LIBRARIES})
+    list(APPEND _include_dirs ${MPI_C_INCLUDE_PATH} ${MPI_INCLUDE_DIR})
+  endif()
+
+  file(APPEND "${_mumps_test_dir}/mumps_test_code.c" "${_output}")
+
+  #===============================================================================
+  set(_mumps_dep_symbol_BLAS ${_first_precision}gemm)
+  set(_mumps_dep_symbol_ScaLAPACK numroc)
+  set(_mumps_dep_symbol_MPI mpi_send)
+  set(_mumps_dep_symbol_Scotch SCOTCH_graphInit)
+  set(_mumps_dep_symbol_Scotch_ptscotch scotchfdgraphexit)
+  set(_mumps_dep_symbol_Scotch_esmumps esmumps)
+  set(_mumps_dep_symbol_mumps_common mumps_abort)
+  set(_mumps_dep_symbol_pord SPACE_ordering)
+  set(_mumps_dep_symbol_METIS metis_nodend)
+  set(_mumps_dep_symbol_ParMETIS ParMETIS_V3_NodeND)
+
+  # added for fucking macosx that cannot fail at link
+  set(_mumps_run_dep_symbol_mumps_common mumps_fac_descband)
+  set(_mumps_run_dep_symbol_MPI mpi_bcast)
+  set(_mumps_run_dep_symbol_ScaLAPACK idamax)
+
+  set(_mumps_dep_comp_Scotch_ptscotch COMPONENTS ptscotch)
+  set(_mumps_dep_comp_Scotch_esmumps COMPONENTS esmumps)
+
+  set(_mumps_potential_dependencies mumps_common pord BLAS ScaLAPACK MPI
+    Scotch Scotch_ptscotch Scotch_esmumps METIS ParMETIS)
+  #===============================================================================
+
+  set(_retry_try_run TRUE)
+  set(_retry_count 0)
+
+  # trying only as long as we add dependencies to avoid inifinte loop in case of an unkown dependency
+  while (_retry_try_run AND _retry_count LESS 100)
+    try_run(_mumps_run _mumps_compiles "${_mumps_test_dir}" "${_mumps_test_dir}/mumps_test_code.c"
+      CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${_include_dirs}"
+      LINK_LIBRARIES ${_libraries_all} ${_libraries_all} ${_compiler_specific}
+      RUN_OUTPUT_VARIABLE _run
+      COMPILE_OUTPUT_VARIABLE _out)
+
+    set(_retry_compile FALSE)
+    #message("COMPILATION outputs: \n${_out} \n RUN OUTPUT \n${_run}")
+    if(_mumps_compiles AND NOT (_mumps_run STREQUAL "FAILED_TO_RUN"))
+      break()
+    endif()
+
+    foreach(_pdep ${_mumps_potential_dependencies})
+      #message("CHECKING ${_pdep}")
+      set(_add_pdep FALSE)
+      if (NOT _mumps_compiles AND
+          _out MATCHES "undefined reference.*${_mumps_dep_symbol_${_pdep}}")
+        set(_add_pdep TRUE)
+        #message("NEED COMPILE ${_pdep}")
+      elseif(_mumps_run STREQUAL "FAILED_TO_RUN" AND
+          DEFINED _mumps_run_dep_symbol_${_pdep} AND
+          _run MATCHES "${_mumps_run_dep_symbol_${_pdep}}")
+        set(_add_pdep TRUE)
+	#message("NEED RUN ${_pdep}")
+      endif()
+
+      if(_add_pdep)
+        mumps_add_dependency(${_pdep} _libs ${_mumps_dep_comp_${_pdep}})
+	#message("ADDING ${_libs}")
+	if(NOT _libs)
+	  message(FATAL_ERROR "MUMPS depends on ${_pdep} but no libraries where found")
+	endif()
+	list(APPEND _libraries_all ${_libs})
+        set(_retry_try_run TRUE)
+      endif()
+    endforeach()
+
+    math(EXPR _retry_count "${_retry_count} + 1")
+  endwhile()
+
+  if(_retry_count GREATER 10)
+    message(FATAL_ERROR "Do not know what to do to link with mumps on your system, I give up!")
+  endif()
+
+  if(APPLE)
+    # in doubt add some stuff because mumps was perhaps badly compiled
+    mumps_add_dependency(pord _libs)
+    list(APPEND _libraries_all ${_libs})
+  endif()
+
+  set(MUMPS_LIBRARIES_ALL ${_libraries_all} PARENT_SCOPE)
+endfunction()
+
+mumps_find_dependencies()
+
+set(MUMPS_LIBRARIES ${MUMPS_LIBRARIES_ALL} CACHE INTERNAL "" FORCE)
+
+#===============================================================================
+include(FindPackageHandleStandardArgs)
+if(CMAKE_VERSION VERSION_GREATER 2.8.12)
+  if(MUMPS_INCLUDE_DIR)
+    file(STRINGS ${MUMPS_INCLUDE_DIR}/dmumps_c.h _versions
+      REGEX "^#define MUMPS_VERSION .*")
+    foreach(_ver ${_versions})
+      string(REGEX MATCH "MUMPS_VERSION *\"([0-9.]+)\"" _tmp "${_ver}")
+      set(_mumps_VERSION ${CMAKE_MATCH_1})
+    endforeach()
+    set(MUMPS_VERSION "${_mumps_VERSION}" CACHE INTERNAL "")
+  endif()
+
+  find_package_handle_standard_args(Mumps
+    REQUIRED_VARS ${_mumps_required_vars}
+                  MUMPS_INCLUDE_DIR
+    VERSION_VAR MUMPS_VERSION
+    )
+else()
+  find_package_handle_standard_args(Mumps DEFAULT_MSG
+    ${_mumps_required_vars} MUMPS_INCLUDE_DIR)
+endif()
diff --git a/cmake/FindPETSc.cmake b/cmake/FindPETSc.cmake
new file mode 100644
index 0000000..04ad8d1
--- /dev/null
+++ b/cmake/FindPETSc.cmake
@@ -0,0 +1,92 @@
+/**
+ * @file FindPETSc.cmake
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ */
+# - Try to find PETSc
+#  PETSC_FOUND         - system has PETSc
+#  PETSC_INCLUDE_DIRS  - the PETSc include directories
+#  PETSC_LIBRARIES     - Link these to use PETSc
+#  PETSC_VERSION       - Version string (MAJOR.MINOR.SUBMINOR)
+
+if(PETSc_FIND_REQUIRED)
+  find_package(PkgConfig REQUIRED)
+else()
+  find_package(PkgConfig QUIET)
+  if(NOT PKG_CONFIG_FOUND)
+    return()
+  endif()
+endif()
+
+pkg_search_module(_petsc PETSc)
+
+# Some debug code
+#get_property(_vars DIRECTORY PROPERTY VARIABLES)
+#foreach(_var ${_vars})
+#  if ("${_var}" MATCHES "^_petsc")
+#    message("${_var} -> ${${_var}}")
+#  endif()
+#endforeach()
+
+if(_petsc_FOUND AND _petsc_VERSION)
+  set(PETSC_VERSION ${_petsc_VERSION})
+endif()
+
+if(_petsc_FOUND)
+  set(_petsc_libs)
+  foreach(_lib ${_petsc_LIBRARIES})
+    string(TOUPPER "${_lib}" _u_lib)
+    find_library(PETSC_LIBRARY_${_u_lib} ${_lib} PATHS ${_petsc_LIBRARY_DIRS})
+    list(APPEND _petsc_libs ${PETSC_LIBRARY_${_u_lib}})
+    mark_as_advanced(PETSC_LIBRARY_${_u_lib})
+  endforeach()
+
+  if (NOT _petsc_INCLUDE_DIRS)
+    pkg_get_variable(_petsc_INCLUDE_DIRS ${_petsc_MODULE_NAME} includedir)
+    #message(${_petsc_INCLUDE_DIRS})
+  endif()
+
+  find_path(PETSC_Fortran_INCLUDE_DIRS "finclude/petsc.h"
+    PATHS ${_petsc_INCLUDE_DIRS}/petsc
+    NO_CMAKE_PATH
+    NO_DEFAULT_PATH
+    )
+
+  set(PETSC_LIBRARIES ${_petsc_libs} CACHE FILEPATH "")
+  set(PETSC_INCLUDE_DIRS ${_petsc_INCLUDE_DIRS} CACHE PATH "")
+  
+
+  add_library(petsc::petsc INTERFACE IMPORTED)
+  set_property(TARGET petsc::petsc PROPERTY INTERFACE_LINK_LIBRARIES ${PETSC_LIBRARIES})
+  set_property(TARGET petsc::petsc PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${PETSC_INCLUDE_DIRS})
+
+  add_library(petsc::petscf INTERFACE IMPORTED)
+  target_link_libraries(petsc::petscf INTERFACE petsc::petsc)
+  set_property(TARGET petsc::petscf PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${PETSC_Fortran_INCLUDE_DIRS})
+
+endif()
+
+include (FindPackageHandleStandardArgs)
+find_package_handle_standard_args(PETSc
+  REQUIRED_VARS PETSC_LIBRARIES PETSC_INCLUDE_DIRS
+  VERSION_VAR PETSC_VERSION)
diff --git a/cmake/FindParMETIS.cmake b/cmake/FindParMETIS.cmake
new file mode 100644
index 0000000..6a927e1
--- /dev/null
+++ b/cmake/FindParMETIS.cmake
@@ -0,0 +1,62 @@
+/**
+ * @file FindParMETIS.cmake
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ */
+find_path(PARMETIS_INCLUDE_DIR parmetis.h
+  PATHS "${PARMETIS_DIR}"
+  ENV PARMETIS_DIR
+  PATH_SUFFIXES include
+  )
+
+find_library(PARMETIS_LIBRARY NAMES parmetis
+  PATHS "${PARMETIS_DIR}"
+  ENV PARMETIS_DIR
+  PATH_SUFFIXES lib
+  )
+
+mark_as_advanced(PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR)
+
+#===============================================================================
+include(FindPackageHandleStandardArgs)
+if(CMAKE_VERSION VERSION_GREATER 2.8.12)
+  if(PARMETIS_INCLUDE_DIR)
+    file(STRINGS ${PARMETIS_INCLUDE_DIR}/parmetis.h _versions
+      REGEX "^#define\ +PARMETIS_(MAJOR|MINOR|SUBMINOR)_VERSION .*")
+    foreach(_ver ${_versions})
+      string(REGEX MATCH "PARMETIS_(MAJOR|MINOR|SUBMINOR)_VERSION *([0-9.]+)" _tmp "${_ver}")
+      set(_parmetis_${CMAKE_MATCH_1} ${CMAKE_MATCH_2})
+    endforeach()
+    set(PARMETIS_VERSION "${_parmetis_MAJOR}.${_parmetis_MINOR}" CACHE INTERNAL "")
+  endif()
+
+  find_package_handle_standard_args(ParMETIS
+    REQUIRED_VARS
+      PARMETIS_LIBRARY
+      PARMETIS_INCLUDE_DIR
+    VERSION_VAR
+      PARMETIS_VERSION)
+else()
+  find_package_handle_standard_args(ParMETIS DEFAULT_MSG
+    PARMETIS_LIBRARY PARMETIS_INCLUDE_DIR)
+endif()
diff --git a/cmake/FindScaLAPACK.cmake b/cmake/FindScaLAPACK.cmake
new file mode 100644
index 0000000..b3ef5ad
--- /dev/null
+++ b/cmake/FindScaLAPACK.cmake
@@ -0,0 +1,181 @@
+/**
+ * @file FindScaLAPACK.cmake
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ */
+#===============================================================================
+# @file   FindScaLAPACK.cmake
+#
+# @author Nicolas Richart <nicolas.richart@epfl.ch>
+#
+# @date creation: Tue Mar 31 2015
+# @date last modification: Wed Jan 13 2016
+#
+# @brief  The find_package file for the Mumps solver
+#
+# @section LICENSE
+#
+# Copyright (©) 2015 EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory
+# (LSMS - Laboratoire de Simulation en Mécanique des Solides)
+#
+# Akantu is free  software: you can redistribute it and/or  modify it under the
+# terms  of the  GNU Lesser  General Public  License as  published by  the Free
+# Software Foundation, either version 3 of the License, or (at your option) any
+# later version.
+#
+# Akantu is  distributed in the  hope that it  will be useful, but  WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A  PARTICULAR PURPOSE. See  the GNU  Lesser General  Public License  for more
+# details.
+#
+# You should  have received  a copy  of the GNU  Lesser General  Public License
+# along with Akantu. If not, see <http://www.gnu.org/licenses/>.
+#
+#===============================================================================
+set(SCALAPACK_VENDOR "Auto" CACHE
+  STRING "Vendor for scalapack (Auto, Netlib, Intel_(i?)lp64_(openmpi|intelmpi|sgimpt)")
+mark_as_advanced(SCALAPACK_VENDOR)
+set_property(CACHE SCALAPACK_VENDOR PROPERTY STRINGS
+  Auto Netlib
+  Intel_lp64_openmpi Intel_lp64_intelmpi Intel_lp64_sgimpt
+  Intel_ilp64_openmpi Intel_ilp64_intelmpi Intel_ilp64_sgimpt)
+
+
+macro(scalapack_find_library prefix target name list_libraries list_headers)
+  foreach(_lib ${list_libraries})
+    find_library(${prefix}_${_lib}_LIBRARY NAMES ${_lib}
+      PATHS ${prefix}_DIR
+      )
+    mark_as_advanced(${prefix}_${_lib}_LIBRARY)
+
+    if(${prefix}_${_lib}_LIBRARY)
+      list(APPEND ${prefix}_libraries ${${prefix}_${_lib}_LIBRARY})
+
+      get_filename_component(_ext ${${prefix}_${_lib}_LIBRARY} EXT)
+      if(NOT TARGET ${target})
+	if("${_ext}" STREQUAL "${CMAKE_SHARED_LIBRARY_SUFFIX}")
+	  add_library(${target} SHARED IMPORTED)
+	  get_filename_component(_soname ${${prefix}_${_lib}_LIBRARY} NAME)
+	  set_property(TARGET ${target} PROPERTY IMPORTED_SONAME ${_soname})
+	else()
+	  add_library(${target}_${name} STATIC IMPORTED)
+	endif()
+	set_property(TARGET ${target} PROPERTY
+	    IMPORTED_LOCATION ${${prefix}_${_lib}_LIBRARY})
+      else()
+	if("${_ext}" STREQUAL "${CMAKE_SHARED_LIBRARY_SUFFIX}")
+	  set_property(TARGET ${target} APPEND PROPERTY
+	    IMPORTED_LINK_DEPENDENT_LIBRARIES ${${prefix}_${_lib}_LIBRARY}
+	    )
+	else()
+	  set_property(TARGET ${target} APPEND PROPERTY
+	    IMPORTED_LINK_INTERFACE_LIBRARIES ${${prefix}_${_lib}_LIBRARY}
+	    )
+	endif()
+      endif()
+    else()
+      unset(${prefix}_${_lib}_LIBRARY CACHE)
+    endif()
+  endforeach()
+
+  if(${prefix}_libraries)
+    foreach(_hdr ${list_headers})
+      get_filename_component(_hdr_name ${_hdr} NAME_WE)
+      find_path(${prefix}_${_hdr_name}_INCLUDE_DIR NAMES ${_hdr}
+	PATHS ${prefix}_DIR)
+      mark_as_advanced(${prefix}_${_hdr_name}_INCLUDE_DIR)
+
+      if(${prefix}_${_hdr_name}_INCLUDE_DIR)
+	list(APPEND ${prefix}_include_dir ${${prefix}_${_hdr_name}_INCLUDE_DIR})
+	set_property(TARGET ${target} APPEND PROPERTY
+	  INTERFACE_INCLUDE_DIRECTORIES ${${prefix}_${_lib}_INCLUDE_DIR}
+	  )
+      else()
+	unset(${prefix}_${_lib}_INCLUDE_DIR CACHE)
+      endif()
+    endforeach()
+  endif()
+endmacro()
+
+set(SCALAPACK_libraries)
+set(SCALAPACK_INCLUDE_DIR)
+
+if(SCALAPACK_VENDOR STREQUAL "Auto" OR SCALAPACK_VENDOR STREQUAL "Netlib")
+  if(NOT SCALAPACK_libraries)
+    scalapack_find_library(
+      SCALAPACK
+      ScaLAPACK
+      "netlib"
+      "scalapack;blacsC;blacsF77;blacs"
+      ""
+      )
+  endif()
+endif()
+
+foreach(_precision lp64 ilp64)
+  foreach(_mpi intelmpi openmpi sgimpt)
+    if(NOT SCALAPACK_libraries)
+      if(SCALAPACK_VENDOR STREQUAL "Auto" OR SCALAPACK_VENDOR STREQUAL "Intel_${_precision}_${_mpi}")
+	if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+	  set(_mkl_common "mkl_intel_${_precision}")
+
+	else()
+	  set(_mkl_common "mkl_gf_${_precision}")
+	endif()
+	scalapack_find_library(
+	  SCALAPACK
+	  ScaLAPACK
+	  "intel_${_precision}_${_mpi}"
+	  "mkl_scalapack_${_precision};${_mkl_common};mkl_sequential;mkl_core;mkl_blacs_${_mpi}_${_precision}"
+	  "mkl_scalapack.h"
+	  )
+
+	if(SCALAPACK_libraries AND _precision STREQUAL "ilp64")
+	  set_property(TARGET ${target} APPEND PROPERTY
+	    INTERFACE_COMPILE_DEFINITIONS MKL_ILP64}
+	    )
+	endif()
+
+	if(EXISTS ${SCALAPACK_include_dir}/mkl_version.h)
+	  file(STRINGS ${SCALAPACK_include_dir}/mkl_version.h _versions
+	    REGEX "^#define\ +__INTEL_MKL(_MINOR|_UPDATE)?__ .*")
+	  foreach(_ver ${_versions})
+	    string(REGEX MATCH "__INTEL_MKL(_MINOR|_UPDATE)?__ *([0-9.]+)" _tmp "${_ver}")
+	    set(_mkl${CMAKE_MATCH_1} ${CMAKE_MATCH_2})
+	  endforeach()
+	  set(SCALAPACK_VERSION "mkl:${_mkl}.${_mkl_MINOR}.${_mkl_UPDATE}" CACHE INTERNAL "")
+	endif()
+
+      endif()
+    endif()
+  endforeach()
+endforeach()
+
+set(SCALAPACK_LIBRARIES ${SCALAPACK_libraries} CACHE INTERNAL "")
+set(SCALAPACK_INCLUDE_DIR ${SCALAPACK_include_dir} CACHE INTERNAL "")
+
+#===============================================================================
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(ScaLAPACK
+  REQUIRED_VARS SCALAPACK_LIBRARIES
+  VERSION_VAR SCALAPACK_VERSION)
diff --git a/cmake/FindScotch.cmake b/cmake/FindScotch.cmake
new file mode 100644
index 0000000..bc7829e
--- /dev/null
+++ b/cmake/FindScotch.cmake
@@ -0,0 +1,270 @@
+/**
+ * @file FindScotch.cmake
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ */
+#===============================================================================
+# @file   FindScotch.cmake
+#
+# @author Nicolas Richart <nicolas.richart@epfl.ch>
+#
+# @date creation: Fri Oct 24 2014
+# @date last modification: Wed Jan 13 2016
+#
+# @brief  The find_package file for Scotch
+#
+# @section LICENSE
+#
+# Copyright (©) 2015 EPFL (Ecole Polytechnique Fédérale de Lausanne) Laboratory
+# (LSMS - Laboratoire de Simulation en Mécanique des Solides)
+#
+# Akantu is free  software: you can redistribute it and/or  modify it under the
+# terms  of the  GNU Lesser  General Public  License as  published by  the Free
+# Software Foundation, either version 3 of the License, or (at your option) any
+# later version.
+#
+# Akantu is  distributed in the  hope that it  will be useful, but  WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A  PARTICULAR PURPOSE. See  the GNU  Lesser General  Public License  for more
+# details.
+#
+# You should  have received  a copy  of the GNU  Lesser General  Public License
+# along with Akantu. If not, see <http://www.gnu.org/licenses/>.
+#
+#===============================================================================
+
+set(_SCOTCH_COMPONENTS "metis" "parmetis" "esmumps" "ptscotch")
+
+if(NOT Scotch_FIND_COMPONENTS)
+  set(Scotch_FIND_COMPONENTS)
+endif()
+
+find_path(SCOTCH_INCLUDE_DIR scotch.h  PATHS "${SCOTCH_DIR}" ENV SCOTCH_DIR
+  PATH_SUFFIXES include include/scotch
+  )
+
+
+find_library(SCOTCH_LIBRARY scotch PATHS "${SCOTCH_DIR}" ENV SCOTCH_DIR PATH_SUFFIXES lib)
+
+set(_scotch_test_dir "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}")
+file(WRITE "${_scotch_test_dir}/scotch_test_code.c"
+  "#include <stdio.h>
+#include <stdint.h>
+#include <scotch.h>
+
+int main() {
+  SCOTCH_Graph graph;
+  SCOTCH_graphInit(&graph);
+  return 0;
+}
+")
+
+#===============================================================================
+include(FindPackageHandleStandardArgs)
+if(CMAKE_VERSION VERSION_GREATER 2.8.12)
+  if(SCOTCH_INCLUDE_DIR)
+    file(STRINGS ${SCOTCH_INCLUDE_DIR}/scotch.h _versions
+      REGEX "^#define\ +SCOTCH_(VERSION|RELEASE|PATCHLEVEL) .*")
+    foreach(_ver ${_versions})
+      string(REGEX MATCH "SCOTCH_(VERSION|RELEASE|PATCHLEVEL) *([0-9.]+)" _tmp "${_ver}")
+      set(_scotch_${CMAKE_MATCH_1} ${CMAKE_MATCH_2})
+    endforeach()
+    set(SCOTCH_VERSION "${_scotch_VERSION}.${_scotch_RELEASE}.${_scotch_PATCHLEVEL}" CACHE INTERNAL "")
+  endif()
+  find_package_handle_standard_args(Scotch
+    REQUIRED_VARS SCOTCH_LIBRARY SCOTCH_INCLUDE_DIR
+    VERSION_VAR SCOTCH_VERSION)
+else()
+  find_package_handle_standard_args(Scotch DEFAULT_MSG
+    SCOTCH_LIBRARY SCOTCH_INCLUDE_DIR)
+endif()
+
+set(SCOTCH_LIBRARIES_ALL ${SCOTCH_LIBRARY})
+
+try_compile(_scotch_compiles "${_scotch_test_dir}" SOURCES "${_scotch_test_dir}/scotch_test_code.c"
+  CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${SCOTCH_INCLUDE_DIR}"
+  LINK_LIBRARIES ${SCOTCH_LIBRARY}
+  OUTPUT_VARIABLE _out)
+
+get_filename_component(_scotch_hint "${SCOTCH_LIBRARY}" DIRECTORY)
+
+if(SCOTCH_LIBRARY MATCHES ".*scotch.*${CMAKE_STATIC_LIBRARY_SUFFIX}")
+  # Assuming scotch was compiled as a static library
+  set(SCOTCH_LIBRARY_TYPE STATIC CACHE INTERNAL "" FORCE)
+else()
+  set(SCOTCH_LIBRARY_TYPE SHARED CACHE INTERNAL "" FORCE)
+endif()
+
+if(NOT _scotch_compiles)
+  if(_out MATCHES "SCOTCH_errorPrint")
+    find_library(SCOTCH_LIBRARY_ERR scotcherr
+      HINTS ${_scotch_hint})
+    find_library(SCOTCH_LIBRARY_ERREXIT scotcherrexit
+      HINTS ${_scotch_hint})
+
+    if(NOT TARGET Scotch::err)
+      add_library(Scotch::err ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL)
+    endif()
+    if(NOT TARGET Scotch::errexit)
+      add_library(Scotch::errexit ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL)
+    endif()
+
+    set_target_properties(Scotch::errexit PROPERTIES
+      IMPORTED_LOCATION                 "${SCOTCH_LIBRARY_ERREXIT}"
+      INTERFACE_INCLUDE_DIRECTORIES     "${SCOTCH_INCLUDE_DIR}"
+      IMPORTED_LINK_INTERFACE_LANGUAGES "C")
+
+    set_target_properties(Scotch::err PROPERTIES
+      IMPORTED_LOCATION                 "${SCOTCH_LIBRARY_ERR}"
+      INTERFACE_INCLUDE_DIRECTORIES     "${SCOTCH_INCLUDE_DIR}"
+      IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+      INTERFACE_LINK_LIBRARIES          "Scotch::errexit")
+
+    mark_as_advanced(SCOTCH_LIBRARY_ERR
+      SCOTCH_LIBRARY_ERREXIT)
+
+    list(APPEND SCOTCH_LIBRARIES_ALL ${SCOTCH_LIBRARY_ERR} ${SCOTCH_LIBRARY_ERREXIT})
+
+    set(_scotch_link_lib INTERFACE_LINK_LIBRARIES "Scotch::err")
+  endif()
+endif()
+
+if(NOT TARGET Scotch::scotch)
+  add_library(Scotch::scotch ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL)
+endif()
+set_target_properties(Scotch::scotch PROPERTIES
+  IMPORTED_LOCATION                 "${SCOTCH_LIBRARY}"
+  INTERFACE_INCLUDE_DIRECTORIES     "${SCOTCH_INCLUDE_DIR}"
+  IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+  ${_scotch_link_lib})
+
+set(SCOTCH_LIBRARIES ${SCOTCH_LIBRARIES_ALL} CACHE INTERNAL "Libraries for Scotch" FORCE)
+
+mark_as_advanced(SCOTCH_LIBRARY
+  SCOTCH_INCLUDE_DIR
+  SCOTCH_LIBRARIES)
+
+
+if("${Scotch_FIND_COMPONENTS}" MATCHES "esmumps")
+  find_library(SCOTCH_LIBRARY_ESMUMPS esmumps  HINTS ${_scotch_hint})
+
+  if(NOT TARGET Scotch::esmumps)
+    add_library(Scotch::esmumps ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL)
+  endif()
+  set_target_properties(Scotch::esmumps PROPERTIES
+    IMPORTED_LOCATION                 "${SCOTCH_LIBRARY_ESMUMPS}"
+    INTERFACE_INCLUDE_DIRECTORIES     "${SCOTCH_INCLUDE_DIR}"
+    IMPORTED_LINK_INTERFACE_LANGUAGES "C")
+
+
+  mark_as_advanced(SCOTCH_LIBRARY_ESMUMPS)
+endif()
+
+if("${Scotch_FIND_COMPONENTS}" MATCHES "metis")
+  find_library(SCOTCH_LIBRARY_METIS scotchmetis HINTS ${_scotch_hint})
+
+  if(NOT TARGET Scotch::metis)
+    add_library(Scotch::metis ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL)
+  endif()
+  set_target_properties(Scotch::metis PROPERTIES
+    IMPORTED_LOCATION                 "${SCOTCH_LIBRARY_METIS}"
+    INTERFACE_INCLUDE_DIRECTORIES     "${SCOTCH_INCLUDE_DIR}"
+    IMPORTED_LINK_INTERFACE_LANGUAGES "C")
+
+  mark_as_advanced(SCOTCH_LIBRARY_METIS)
+endif()
+
+if("${Scotch_FIND_COMPONENTS}" MATCHES "parmetis")
+  find_library(SCOTCH_LIBRARY_PARMETIS scotchparmetis HINTS ${_scotch_hint})
+
+  if(NOT TARGET Scotch::parmetis)
+    add_library(Scotch::parmetis ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL)
+  endif()
+  set_target_properties(Scotch::parmetis PROPERTIES
+    IMPORTED_LOCATION                 "${SCOTCH_LIBRARY_PARMETIS}"
+    INTERFACE_INCLUDE_DIRECTORIES     "${SCOTCH_INCLUDE_DIR}"
+    IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+    INTERFACE_INCLUDE_DIRECTORIES     "Scotch::metis")
+  mark_as_advanced(SCOTCH_LIBRARY_PARMETIS)
+endif()
+
+#
+##===============================================================================
+if("${Scotch_FIND_COMPONENTS}" MATCHES "ptscotch")
+  file(WRITE "${_scotch_test_dir}/ptscotch_test_code.c"
+    "#include <stdio.h>
+#include <stdint.h>
+#include <mpi.h>
+#include <ptscotch.h>
+
+int main() {
+  SCOTCH_Dgraph graph;
+  SCOTCH_dgraphInit(&graph, MPI_COMM_WORLD);
+  return 0;
+}
+")
+
+  find_package(MPI REQUIRED)
+
+  find_library(SCOTCH_LIBRARY_PTSCOTCH ptscotch HINTS ${_scotch_hint})
+
+  try_compile(_scotch_compiles "${_scotch_test_dir}" SOURCES "${_scotch_test_dir}/ptscotch_test_code.c"
+    CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${SCOTCH_INCLUDE_DIR};${MPI_C_INCLUDE_PATH}"
+    LINK_LIBRARIES ${SCOTCH_LIBRARY_PTSCOTCH} ${MPI_C_LIBRARIES}
+    OUTPUT_VARIABLE _out)
+
+  if(NOT _scotch_compiles)
+    if(_out MATCHES "SCOTCH_archExit")
+      set(_scotch_link_lib INTERFACE_LINK_LIBRARIES "Scotch::scotch")
+    endif()
+  endif()
+
+  if(NOT TARGET Scotch::ptscotch)
+    add_library(Scotch::ptscotch ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL)
+  endif()
+  set_target_properties(Scotch::ptscotch PROPERTIES
+    IMPORTED_LOCATION                 "${SCOTCH_LIBRARY_PTSCOTCH}"
+    INTERFACE_INCLUDE_DIRECTORIES     "${SCOTCH_INCLUDE_DIR}"
+    IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+    ${_scotch_link_lib})
+
+  set(PTSCOTCH_LIBRARIES ${SCOTCH_LIBRARY_PTSCOTCH} ${SCOTCH_LIBRARIES} CACHE INTERNAL "Libraries for PT-Scotch" FORCE)
+
+  mark_as_advanced(SCOTCH_LIBRARY_PTSCOTCH
+    PTSCOTCH_LIBRARIES)
+
+  if("${Scotch_FIND_COMPONENTS}" MATCHES "esmumps")
+    find_library(SCOTCH_LIBRARY_PTESMUMPS ptesmumps
+      HINTS ${_scotch_hint} PATH_SUFFIXES lib .)
+
+    if(NOT TARGET Scotch::ptesmumps)
+      add_library(Scotch::ptesmumps ${SCOTCH_LIBRARY_TYPE} IMPORTED GLOBAL)
+    endif()
+    set_target_properties(Scotch::ptesmumps PROPERTIES
+      IMPORTED_LOCATION                 "${SCOTCH_LIBRARY_ESMUMPS}"
+      INTERFACE_INCLUDE_DIRECTORIES     "${SCOTCH_INCLUDE_DIR}"
+      IMPORTED_LINK_INTERFACE_LANGUAGES "C")
+
+    mark_as_advanced(SCOTCH_LIBRARY_PTESMUMPS)
+  endif()
+endif()
diff --git a/cmake/blas.cmake b/cmake/blas.cmake
new file mode 100644
index 0000000..9648874
--- /dev/null
+++ b/cmake/blas.cmake
@@ -0,0 +1,63 @@
+/**
+ * @file blas.cmake
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ */
+set(_default_blas $ENV{BLA_VENDOR})
+if(NOT _default_blas)
+  set(_default_blas All)
+endif()
+set(BSPLINES_USE_BLAS_VENDOR "${_default_blas}" CACHE STRING "Version of blas to use")
+mark_as_advanced(BSPLINES_USE_BLAS_VENDOR)
+set_property(CACHE BSPLINES_USE_BLAS_VENDOR PROPERTY STRINGS
+  All
+  ACML
+  ACML_GPU
+  ACML_MP
+  ATLAS
+  Apple
+  CXML
+  DXML
+  Generic
+  Goto
+  IBMESSL
+  Intel
+  Intel10_32
+  Intel10_64lp
+  Intel10_64lp_seq
+  NAS
+  OpenBLAS
+  PhiPACK
+  SCSL
+  SGIMATH
+  SunPerf
+  )
+
+if(BSPLINES_USE_PARDISO)
+  set(BSPLINES_USE_BLAS_VENDOR Intel10_64lp CACHE STRING "" INTERNAL)
+endif()
+
+set(ENV{BLA_VENDOR} ${BSPLINES_USE_BLAS_VENDOR})
+
+find_package(BLAS REQUIRED)
+find_package(LAPACK REQUIRED)
diff --git a/cmake/bsplines-config.cmake.in b/cmake/bsplines-config.cmake.in
new file mode 100644
index 0000000..bd85d31
--- /dev/null
+++ b/cmake/bsplines-config.cmake.in
@@ -0,0 +1,54 @@
+/**
+ * @file bsplines-config.cmake.in
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+# - Config file for the BSPLINES package
+# It defines the target the following variables:
+#  FFLAGS               - Fortran compile flags
+#  BSPLINES_MODS        - include directories for bsplines modules
+#  BSPLINES_LIBS        - bsplines library
+#  BSPLINES_EXTRA_INCS  - additional include directories
+#  BSPLINES_EXTRA_LIBS  - additional libraries
+#  HAS_PARDISO          - BSPLINES built with PARDISO
+#  HAS_MUMPS            - BSPLINES built with MUMPS
+#  MPIEXEC              - MPI launcher
+#  MPIEXEC_NUMPROC_FLAG - Number of MPI processes flag
+
+# Compute paths
+get_filename_component(_dir "${CMAKE_CURRENT_LIST_FILE}" PATH)
+get_filename_component(_prefix "${_dir}/../.." ABSOLUTE)
+
+# Import the targets
+include("${_prefix}/lib/cmake/bsplines-targets.cmake")
+
+# Report other information
+set(FFLAGS "@CMAKE_Fortran_FLAGS@")
+set(BSPLINES_MODS "${_prefix}/include")
+set(BSPLINES_LIBS fft bsplines pppack pputils2)
+set(BSPLINES_EXTRA_INCS "@EXTRA_INCS@")
+set(BSPLINES_EXTRA_LIBS "@EXTRA_LIBS@")
+set(HAS_PARDISO "@HAS_PARDISO@")
+set(HAS_MUMPS "@HAS_MUMPS@")
+set(MPIEXEC "@MPIEXEC@")
+set(MPIEXEC_NUMPROC_FLAG "@MPIEXEC_NUMPROC_FLAG@")
diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile
new file mode 100644
index 0000000..bc32a3b
--- /dev/null
+++ b/docs/doxygen/Doxyfile
@@ -0,0 +1,75 @@
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+PROJECT_NAME           = "SPClibs"
+PROJECT_BRIEF          = ""
+PROJECT_LOGO           = img/epfl_logo.png
+OUTPUT_DIRECTORY       = ./
+OPTIMIZE_FOR_FORTRAN   = YES
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = YES
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+EXTRACT_ALL            = YES
+EXTRACT_PRIVATE        = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+INPUT                  = ../../src/
+INPUT                 += ../../pppack/
+INPUT                 += ../../pputils2/
+INPUT                 += ../../fft/
+FILE_PATTERNS          = *.f90 *.F90 *.tpl *.c
+RECURSIVE              = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+SOURCE_BROWSER         = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+GENERATE_HTML          = YES
+HTML_OUTPUT            = html
+HTML_DYNAMIC_SECTIONS  = YES
+USE_MATHJAX            = YES
+MATHJAX_RELPATH        = https://cdn.mathjax.org/mathjax/latest
+GENERATE_TREEVIEW      = YES
+HTML_FOOTER            = customfooter.html
+HTML_EXTRA_STYLESHEET  = doxygen-awesome-css/doxygen-awesome.css custom.css
+HTML_COLORSTYLE_HUE    = 209
+HTML_COLORSTYLE_SAT    = 255
+HTML_COLORSTYLE_GAMMA  = 113
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+GENERATE_LATEX         = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+CLASS_DIAGRAMS         = YES
+HAVE_DOT               = YES
+CALL_GRAPH             = YES
+CALLER_GRAPH           = YES
+DOT_GRAPH_MAX_NODES    = 60
+DOT_IMAGE_FORMAT       = svg
+
+#---------------------------------------------------------------------------
+# List of user-defined commands
+#---------------------------------------------------------------------------
+ALIASES += "merge=\xrefitem merge \"Merge comments\" \"Merge comments\""
+
+DISTRIBUTE_GROUP_DOC = YES
diff --git a/docs/doxygen/custom.css b/docs/doxygen/custom.css
new file mode 100644
index 0000000..815563f
--- /dev/null
+++ b/docs/doxygen/custom.css
@@ -0,0 +1,3 @@
+:root {
+    --content-maxwidth: 1200px;
+}
diff --git a/docs/doxygen/customfooter.html b/docs/doxygen/customfooter.html
new file mode 100644
index 0000000..cde195a
--- /dev/null
+++ b/docs/doxygen/customfooter.html
@@ -0,0 +1,21 @@
+<!-- HTML footer for doxygen 1.8.11-->
+<!-- start footer part -->
+<!--BEGIN GENERATE_TREEVIEW-->
+<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
+  <ul>
+    $navpath
+    <li class="footer">$generatedby
+    <a href="http://www.doxygen.org/index.html">
+    <img class="footer" src="$relpath^doxygen.png" alt="doxygen"/></a> $doxygenversion on $date</li>
+  </ul>
+</div>
+<!--END GENERATE_TREEVIEW-->
+<!--BEGIN !GENERATE_TREEVIEW-->
+<hr class="footer"/><address class="footer"><small>
+$generatedby &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="$relpath^doxygen.png" alt="doxygen"/>
+</a> $doxygenversion
+</small></address>
+<!--END !GENERATE_TREEVIEW-->
+</body>
+</html>
diff --git a/docs/doxygen/doxygen-awesome-css/doxygen-awesome.css b/docs/doxygen/doxygen-awesome-css/doxygen-awesome.css
new file mode 100644
index 0000000..5256f64
--- /dev/null
+++ b/docs/doxygen/doxygen-awesome-css/doxygen-awesome.css
@@ -0,0 +1,1364 @@
+/**
+
+Doxygen Awesome
+https://github.com/jothepro/doxygen-awesome-css
+
+MIT License
+
+Copyright (c) 2021 jothepro
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+*/
+
+:root {
+    /* primary theme color. This will affect the entire websites color scheme: links, arrows, labels, ... */
+    --primary-color: #1982d2;
+    --primary-dark-color: #00559f;
+    --primary-light-color: #7aabd6;
+    --primary-lighter-color: #cae1f1;
+    --primary-lightest-color: #e9f1f8;
+
+    /* page base colors */
+    --page-background-color: white;
+    --page-foreground-color: #2c3e50;
+    --page-secondary-foreground-color: #67727e;
+
+    /* color for all separators on the website: hr, borders, ... */
+    --separator-color: #dedede;
+
+    /* border radius for all rounded components. Will affect many components, like dropdowns, memitems, codeblocks, ... */
+    --border-radius-large: 8px;
+    --border-radius-small: 4px;
+    --border-radius-medium: 6px;
+
+    /* default spacings. Most compontest reference these values for spacing, to provide uniform spacing on the page. */
+    --spacing-small: 5px;
+    --spacing-medium: 10px;
+    --spacing-large: 16px;
+
+    /* default box shadow used for raising an element above the normal content. Used in dropdowns, Searchresult, ... */
+    --box-shadow: 0 2px 10px 0 rgba(0,0,0,.1);
+
+    --odd-color: rgba(0,0,0,.03);
+
+    /* font-families. will affect all text on the website
+     * font-family: the normal font for text, headlines, menus
+     * font-family-monospace: used for preformatted text in memtitle, code, fragments
+     */
+    --font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif;
+    --font-family-monospace: source-code-pro,Menlo,Monaco,Consolas,Courier New,monospace;
+
+    /* font sizes */
+    --page-font-size: 15.6px;
+    --navigation-font-size: 14.4px;
+    --code-font-size: 14.4px; /* affects code, fragment */
+    --title-font-size: 22px;
+
+    /* content text properties. These only affect the page content, not the navigation or any other ui elements */
+    --content-line-height: 27px;
+    /* The content is centered and constraint in it's width. To make the content fill the whole page, set the variable to auto.*/
+    --content-maxwidth: 900px;
+
+    /* colors for various content boxes: @warning, @note, @deprecated @bug */
+    --warning-color: #fca49b;
+    --warning-color-dark: #b61825;
+    --warning-color-darker: #75070f;
+    --note-color: rgba(255,229,100,.3);
+    --note-color-dark: #c39900;
+    --note-color-darker: #8d7400;
+    --deprecated-color: rgb(214, 216, 224);
+    --deprecated-color-dark: #5b6269;
+    --deprecated-color-darker: #43454a;
+    --bug-color: rgb(246, 208, 178);
+    --bug-color-dark: #a53a00;
+    --bug-color-darker: #5b1d00;
+    --invariant-color: #b7f8d0;
+    --invariant-color-dark: #00ba44;
+    --invariant-color-darker: #008622;
+
+    /* blockquote colors */
+    --blockquote-background: #f5f5f5;
+    --blockquote-foreground: #727272;
+
+    /* table colors */
+    --tablehead-background: #f1f1f1;
+    --tablehead-foreground: var(--page-foreground-color);
+
+    /* menu-display: block | none
+     * Visibility of the top navigation on screens >= 768px. On smaller screen the menu is always visible.
+     * `GENERATE_TREEVIEW` MUST be enabled!
+     */
+    --menu-display: block;
+
+    --menu-focus-foreground: var(--page-background-color);
+    --menu-focus-background: var(--primary-color);
+    --menu-selected-background: rgba(0,0,0,.05);
+
+
+    --header-background: var(--page-background-color);
+    --header-foreground: var(--page-foreground-color);
+
+    /* searchbar colors */
+    --searchbar-background: var(--side-nav-background);
+    --searchbar-foreground: var(--page-foreground-color);
+
+    /* searchbar size
+     * (`searchbar-width` is only applied on screens >= 768px.
+     * on smaller screens the searchbar will always fill the entire screen width) */
+    --searchbar-height: 33px;
+    --searchbar-width: 210px;
+
+    /* code block colors */
+    --code-background: #f5f5f5;
+    --code-foreground: var(--page-foreground-color);
+
+    /* fragment colors */
+    --fragment-background: #282c34;
+    --fragment-foreground: #ffffff;
+    --fragment-keyword: #cc99cd;
+    --fragment-keywordtype: #ab99cd;
+    --fragment-keywordflow: #e08000;
+    --fragment-token: #7ec699;
+    --fragment-comment: #999999;
+    --fragment-link: #98c0e3;
+    --fragment-preprocessor: #65cabe;
+    --fragment-linenumber-color: #cccccc;
+    --fragment-linenumber-background: #35393c;
+    --fragment-linenumber-border: #1f1f1f;
+    --fragment-lineheight: 20px;
+
+    /* sidebar navigation (treeview) colors */
+    --side-nav-background: #fbfbfb;
+    --side-nav-foreground: var(--page-foreground-color);
+    --side-nav-arrow-color: var(--page-background-color);
+
+    /* height of an item in any tree / collapsable table */
+    --tree-item-height: 30px;
+}
+
+@media screen and (max-width: 767px) {
+    :root {
+        --page-font-size: 16px;
+        --navigation-font-size: 16px;
+        --code-font-size: 15px; /* affects code, fragment */
+        --title-font-size: 22px;
+    }
+}
+
+@media (prefers-color-scheme: dark) {
+    :root {
+        --primary-color: #00559f;
+        --primary-dark-color: #1982d2;
+        --primary-light-color: #4779ac;
+        --primary-lighter-color: #191e21;
+        --primary-lightest-color: #191a1c;
+
+        --box-shadow: 0 2px 10px 0 rgba(0,0,0,.35);
+
+        --odd-color: rgba(0,0,0,.1);
+
+        --menu-selected-background: rgba(0,0,0,.4);
+
+        --page-background-color: #1C1D1F;
+        --page-foreground-color: #d2dbde;
+        --page-secondary-foreground-color: #859399;
+        --separator-color: #000000;
+        --side-nav-background: #252628;
+
+        --code-background: #2a2c2f;
+
+        --tablehead-background: #2a2c2f;
+
+        --blockquote-background: #1f2022;
+        --blockquote-foreground: #77848a;
+
+        --warning-color: #b61825;
+        --warning-color-dark: #510a02;
+        --warning-color-darker: #f5b1aa;
+        --note-color: rgb(255, 183, 0);
+        --note-color-dark: #9f7300;
+        --note-color-darker: #fff6df;
+        --deprecated-color: rgb(88, 90, 96);
+        --deprecated-color-dark: #262e37;
+        --deprecated-color-darker: #a0a5b0;
+        --bug-color: rgb(248, 113, 0);
+        --bug-color-dark: #812a00;
+        --bug-color-darker: #ffd3be;
+    }
+}
+
+body {
+    color: var(--page-foreground-color);
+    background-color: var(--page-background-color);
+    font-size: var(--page-font-size);
+}
+
+body, table, div, p, dl, #nav-tree .label, .title, .sm-dox a, .sm-dox a:hover, .sm-dox a:focus, #projectname, .SelectItem, #MSearchField, .navpath li.navelem a, .navpath li.navelem a:hover {
+    font-family: var(--font-family);
+}
+
+h1, h2, h3, h4, h5 {
+    margin-top: .9em;
+    font-weight: 600;
+    line-height: initial;
+}
+
+p, div, table, dl {
+    font-size: var(--page-font-size);
+}
+
+a, a.el:visited, a.el:hover, a.el:focus, a.el:active {
+    color: var(--primary-dark-color);
+}
+
+/*
+ Title and top navigation
+ */
+
+#top {
+    background: var(--header-background);
+    border-bottom: 1px solid var(--separator-color);
+}
+
+@media screen and (min-width: 768px) {
+    #top {
+        display: flex;
+        flex-wrap: wrap;
+        justify-content: space-between;
+        align-items: center;
+    }
+}
+
+#main-nav {
+    flex-grow: 5;
+    padding: var(--spacing-small) var(--spacing-medium);
+}
+
+#titlearea {
+    width: auto;
+    padding: var(--spacing-medium) var(--spacing-large);
+    background: none;
+    color: var(--header-foreground);
+    border-bottom: none;
+}
+
+@media screen and (max-width: 767px) {
+    #titlearea {
+        padding-bottom: var(--spacing-small);
+    }
+}
+
+#titlearea table tbody tr {
+    height: auto !important;
+}
+
+#projectname {
+    font-size: var(--title-font-size);
+    font-weight: 600;
+}
+
+#projectnumber {
+    font-family: inherit;
+    font-size: 60%;
+}
+
+#projectbrief {
+    font-family: inherit;
+    font-size: 80%;
+}
+
+#projectlogo {
+    vertical-align: middle;
+}
+
+#projectlogo img {
+    max-height: calc(var(--title-font-size) * 2);
+    margin-right: var(--spacing-small);
+}
+
+.sm-dox, .tabs, .tabs2, .tabs3 {
+    background: none;
+    padding: 0;
+}
+
+.tabs, .tabs2, .tabs3 {
+    border-bottom: 1px solid var(--separator-color);
+    margin-bottom: -1px;
+}
+
+@media screen and (max-width: 767px) {
+    .sm-dox a span.sub-arrow {
+        background: var(--code-background);
+    }
+}
+
+@media screen and (min-width: 768px) {
+    .sm-dox li, .tablist li {
+        display: var(--menu-display);
+    }
+
+    .sm-dox a span.sub-arrow {
+        border-color: var(--header-foreground) transparent transparent transparent;
+    }
+
+    .sm-dox a:hover span.sub-arrow {
+        border-color: var(--menu-focus-foreground) transparent transparent transparent;
+    }
+
+    .sm-dox ul a span.sub-arrow {
+        border-color: transparent transparent transparent var(--header-foreground);
+    }
+
+    .sm-dox ul a:hover span.sub-arrow {
+        border-color: transparent transparent transparent var(--menu-focus-foreground);
+    }
+}
+
+.sm-dox ul {
+    background: var(--page-background-color);
+    box-shadow: var(--box-shadow);
+    border: 1px solid var(--separator-color);
+    border-radius: var(--border-radius-medium) !important;
+    padding: var(--spacing-small);
+    animation: ease-out 150ms slideInMenu;
+}
+
+@keyframes slideInMenu {
+    from {
+        opacity: 0;
+        transform: translate(0px, -2px);
+    }
+
+    to {
+        opacity: 1;
+        transform: translate(0px, 0px);
+    }
+}
+
+.sm-dox ul a {
+    color: var(--page-foreground-color);
+    background: var(--page-background-color);
+    font-size: var(--navigation-font-size);
+}
+
+.sm-dox>li>ul:after {
+    border-bottom-color: var(--page-background-color) !important;
+}
+
+.sm-dox>li>ul:before {
+    border-bottom-color: var(--separator-color) !important;
+}
+
+.sm-dox ul a:hover, .sm-dox ul a:active, .sm-dox ul a:focus {
+    font-size: var(--navigation-font-size);
+    color: var(--menu-focus-foreground);
+    text-shadow: none;
+    background-color: var(--menu-focus-background);
+    border-radius: var(--border-radius-small) !important;
+}
+
+.sm-dox a, .sm-dox a:focus, .tablist li, .tablist li a, .tablist li.current a {
+    text-shadow: none;
+    background: transparent;
+    background-image: none !important;
+    color: var(--header-foreground);
+    font-weight: normal;
+    font-size: var(--navigation-font-size);
+}
+
+.sm-dox a:focus {
+    outline: auto;
+}
+
+.sm-dox a:hover, .sm-dox a:active, .tablist li a:hover {
+    text-shadow: none;
+    font-weight: normal;
+    background: var(--menu-focus-background);
+    color: var(--menu-focus-foreground);
+    border-radius: var(--border-radius-small) !important;
+    font-size: var(--navigation-font-size);
+}
+
+.tablist li.current {
+    border-radius: var(--border-radius-small);
+    background: var(--menu-selected-background);
+}
+
+.tablist li {
+    margin: var(--spacing-small) 0 var(--spacing-small) var(--spacing-small);
+}
+
+.tablist a {
+    padding: 0 var(--spacing-large);
+}
+
+
+/*
+ Search box
+ */
+
+#MSearchBox {
+    height: var(--searchbar-height);
+    background: var(--searchbar-background);
+    border-radius: var(--searchbar-height);
+    border: 1px solid var(--separator-color);
+    overflow: hidden;
+    width: var(--searchbar-width);
+    position: relative;
+    box-shadow: none;
+    display: block;
+    margin-top: 0;
+}
+
+.left #MSearchSelect {
+    left: 0;
+}
+
+.tabs .left #MSearchSelect {
+    padding-left: 0;
+}
+
+.tabs #MSearchBox {
+    position: absolute;
+    right: var(--spacing-medium);
+}
+
+@media screen and (max-width: 767px) {
+    .tabs #MSearchBox {
+        position: relative;
+        right: 0;
+        margin-left: var(--spacing-medium);
+        margin-top: 0;
+    }
+}
+
+#MSearchSelectWindow, #MSearchResultsWindow {
+    z-index: 9999;
+}
+
+#MSearchBox.MSearchBoxActive {
+    border-color: var(--primary-color);
+    box-shadow: inset 0 0 0 1px var(--primary-color);
+}
+
+#main-menu > li:last-child {
+    margin-right: 0;
+}
+
+@media screen and (max-width: 767px) {
+    #main-menu > li:last-child {
+        height: 50px;
+    }
+}
+
+#MSearchField {
+    font-size: var(--navigation-font-size);
+    height: calc(var(--searchbar-height) - 2px);
+    background: transparent;
+    width: calc(var(--searchbar-width) - 64px);
+}
+
+.MSearchBoxActive #MSearchField {
+    color: var(--searchbar-foreground);
+}
+
+#MSearchSelect {
+    top: calc(calc(var(--searchbar-height) / 2) - 11px);
+}
+
+.left #MSearchSelect {
+    padding-left: 8px;
+}
+
+#MSearchBox span.left, #MSearchBox span.right {
+    background: none;
+}
+
+#MSearchBox span.right {
+    padding-top: calc(calc(var(--searchbar-height) / 2) - 12px);
+}
+
+.tabs #MSearchBox span.right {
+    top: calc(calc(var(--searchbar-height) / 2) - 12px);
+}
+
+@keyframes slideInSearchResults {
+    from {
+        opacity: 0;
+        transform: translate(0, 15px);
+    }
+
+    to {
+        opacity: 1;
+        transform: translate(0, 20px);
+    }
+}
+
+#MSearchResultsWindow {
+    left: auto !important;
+    right: var(--spacing-medium);
+    border-radius: var(--border-radius-large);
+    border: 1px solid var(--separator-color);
+    transform: translate(0, 20px);
+    box-shadow: var(--box-shadow);
+    animation: ease-out 280ms slideInSearchResults;
+    background: var(--page-background-color);
+}
+
+iframe#MSearchResults {
+    background: var(--page-background-color);
+    margin: 4px;
+}
+
+#MSearchSelectWindow {
+    border: 1px solid var(--separator-color);
+    border-radius: var(--border-radius-medium);
+    box-shadow: var(--box-shadow);
+    background: var(--page-background-color);
+}
+
+#MSearchSelectWindow a.SelectItem {
+    font-size: var(--navigation-font-size);
+    line-height: var(--content-line-height);
+    margin: 0 var(--spacing-small);
+    border-radius: var(--border-radius-small);
+    color: var(--page-foreground-color);
+}
+
+#MSearchSelectWindow a.SelectItem:hover {
+    background: var(--menu-focus-background);
+    color: var(--menu-focus-foreground);
+}
+
+@media screen and (max-width: 767px) {
+    #MSearchBox {
+        margin-top: var(--spacing-medium);
+        margin-bottom: var(--spacing-medium);
+        width: calc(100vw - 30px);
+    }
+
+    #main-menu > li:last-child {
+        float: none !important;
+    }
+
+    #MSearchField {
+        width: calc(100vw - 95px);
+    }
+
+    @keyframes slideInSearchResultsMobile {
+        from {
+            opacity: 0;
+            transform: translate(0, 15px);
+        }
+
+        to {
+            opacity: 1;
+            transform: translate(0, 20px);
+        }
+    }
+
+    #MSearchResultsWindow {
+        left: var(--spacing-medium) !important;
+        right: var(--spacing-medium);
+        overflow: auto;
+        transform: translate(0, 20px);
+        animation: ease-out 280ms slideInSearchResultsMobile;
+    }
+}
+
+/*
+ Tree view
+ */
+
+#side-nav {
+    padding: 0 !important;
+    background: var(--side-nav-background);
+}
+
+@media screen and (max-width: 767px) {
+    #side-nav {
+        display: none;
+    }
+
+    #doc-content {
+        margin-left: 0 !important;
+        height: auto !important;
+        padding-bottom: calc(2 * var(--spacing-large));
+    }
+}
+
+#nav-tree {
+    background: transparent;
+}
+
+#nav-tree .label {
+    font-size: var(--navigation-font-size);
+}
+
+#nav-tree .item {
+    height: var(--tree-item-height);
+    line-height: var(--tree-item-height);
+}
+
+#nav-sync {
+    top: 12px !important;
+    right: 12px;
+}
+
+#nav-tree .selected {
+    text-shadow: none;
+    background-image: none;
+    background-color: transparent;
+    box-shadow: inset 4px 0 0 0 var(--primary-dark-color);
+}
+
+#nav-tree a {
+    color: var(--side-nav-foreground);
+}
+
+#nav-tree a:focus {
+    outline-style: auto;
+}
+
+.arrow {
+    color: var(--primary-light-color);
+    font-family: serif;
+    height: auto;
+    text-align: right;
+}
+
+#nav-tree .arrow {
+    opacity: 0;
+}
+
+#nav-tree div.item:hover .arrow, #nav-tree a:focus .arrow {
+    opacity: 1;
+}
+
+#nav-tree .selected a {
+    color: var(--primary-dark-color);
+    font-weight: bolder;
+}
+
+.ui-resizable-e {
+    background: var(--separator-color);
+    width: 1px;
+}
+
+/*
+ Contents
+ */
+
+div.header {
+    border-bottom: 1px solid var(--separator-color);
+    background-color: var(--page-background-color);
+    background-image: none;
+}
+
+div.contents, div.header .title, div.header .summary {
+    max-width: var(--content-maxwidth);
+}
+
+div.contents, div.header .title  {
+    line-height: initial;
+    margin: calc(var(--spacing-medium) + .2em) auto var(--spacing-medium) auto;
+}
+
+div.header .summary {
+    margin: var(--spacing-medium) auto 0 auto;
+}
+
+div.headertitle {
+    padding: 0;
+}
+
+div.header .title {
+    font-weight: 600;
+    font-size: 210%;
+    padding: var(--spacing-medium) var(--spacing-large);
+    word-break: break-word;
+}
+
+div.header .summary {
+    width: auto;
+    display: block;
+    float: none;
+    padding: 0 var(--spacing-large);
+}
+
+td.memSeparator {
+    border-color: var(--separator-color);
+}
+
+.mdescLeft, .mdescRight, .memItemLeft, .memItemRight, .memTemplItemLeft, .memTemplItemRight, .memTemplParams {
+    background: var(--code-background);
+}
+
+.mdescRight {
+    color: var(--page-secondary-foreground-color);
+}
+
+span.mlabel {
+    background: var(--primary-color);
+    border: none;
+    padding: 4px 9px;
+    border-radius: 12px;
+    margin-right: var(--spacing-medium);
+}
+
+span.mlabel:last-of-type {
+    margin-right: 2px;
+}
+
+div.contents {
+    padding: 0 var(--spacing-large);
+}
+
+div.contents p, div.contents li {
+    line-height: var(--content-line-height);
+}
+
+div.contents div.dyncontent {
+    margin: var(--spacing-medium) 0;
+}
+
+@media (prefers-color-scheme: dark) {
+    div.contents div.dyncontent img {
+        filter: hue-rotate(180deg) invert();
+    }
+}
+
+h2.groupheader {
+    border-bottom: 1px solid var(--separator-color);
+    color: var(--page-foreground-color);
+}
+
+blockquote {
+    padding: var(--spacing-small) var(--spacing-medium);
+    background: var(--blockquote-background);
+    color: var(--blockquote-foreground);
+    border-left: 2px solid var(--blockquote-foreground);
+    margin: 0;
+}
+
+blockquote p {
+    margin: var(--spacing-small) 0 var(--spacing-medium) 0;
+}
+.paramname {
+    color: var(--primary-dark-color);
+}
+
+.glow {
+    text-shadow: 0 0 15px var(--primary-light-color) !important;
+}
+
+.alphachar a {
+    color: var(--page-foreground-color);
+}
+
+/*
+ Table of Contents
+ */
+
+div.toc {
+    background-color: var(--side-nav-background);
+    border: 1px solid var(--separator-color);
+    border-radius: var(--border-radius-medium);
+    box-shadow: var(--box-shadow);
+    padding: 0 var(--spacing-large);
+    margin: 0 0 var(--spacing-medium) var(--spacing-medium);
+}
+
+div.toc h3 {
+    color: var(--side-nav-foreground);
+    font-size: var(--navigation-font-size);
+    margin: var(--spacing-large) 0;
+}
+
+div.toc li {
+    font-size: var(--navigation-font-size);
+    padding: 0;
+    background: none;
+}
+
+div.toc li:before {
+    content: '↓';
+    font-weight: 800;
+    font-family: var(--font-family);
+    margin-right: var(--spacing-small);
+    color: var(--side-nav-foreground);
+    opacity: .4;
+}
+
+div.toc ul li.level1 {
+    margin: 0;
+}
+
+div.toc ul li.level2, div.toc ul li.level3 {
+    margin-top: 0;
+}
+
+
+@media screen and (max-width: 767px) {
+    div.toc {
+        float: none;
+        width: auto;
+        margin: 0 0 var(--spacing-medium) 0;
+    }
+}
+
+/*
+ Code & Fragments
+ */
+
+code, div.fragment, pre.fragment {
+    border-radius: var(--border-radius-small);
+    border: none;
+    overflow: hidden;
+}
+
+code {
+    display: inline;
+    background: var(--code-background);
+    color: var(--code-foreground);
+    padding: 2px 6px;
+    word-break: break-word;
+}
+
+div.fragment, pre.fragment {
+    margin: var(--spacing-medium) 0;
+    padding: 14px 16px;
+    background: var(--fragment-background);
+    color: var(--fragment-foreground);
+    overflow-x: auto;
+}
+
+@media screen and (max-width: 767px) {
+    div.fragment, pre.fragment {
+        border-top-right-radius: 0;
+        border-bottom-right-radius: 0;
+    }
+
+    .contents > div.fragment, .textblock > div.fragment, .textblock > pre.fragment {
+        margin: var(--spacing-medium) calc(0px - var(--spacing-large));
+        border-radius: 0;
+    }
+
+    .textblock li > .fragment {
+        margin: var(--spacing-medium) calc(0px - var(--spacing-large));
+    }
+
+    .memdoc li > .fragment {
+        margin: var(--spacing-medium) calc(0px - var(--spacing-medium));
+    }
+
+    .memdoc > div.fragment, .memdoc > pre.fragment, dl dd > div.fragment, dl dd pre.fragment {
+        margin: var(--spacing-medium) calc(0px - var(--spacing-medium));
+        border-radius: 0;
+    }
+}
+
+code, code a, pre.fragment, div.fragment, div.fragment .line, div.fragment span, div.fragment .line a, div.fragment .line span {
+    font-family: var(--font-family-monospace);
+    font-size: var(--code-font-size) !important;
+}
+
+div.line:after {
+    margin-right: var(--spacing-medium);
+}
+
+div.fragment .line, pre.fragment {
+    white-space: pre;
+    word-wrap: initial;
+    line-height: var(--fragment-lineheight);
+}
+
+div.fragment span.keyword {
+    color: var(--fragment-keyword);
+}
+
+div.fragment span.keywordtype {
+    color: var(--fragment-keywordtype);
+}
+
+div.fragment span.keywordflow {
+    color: var(--fragment-keywordflow);
+}
+
+div.fragment span.stringliteral {
+    color: var(--fragment-token)
+}
+
+div.fragment span.comment {
+    color: var(--fragment-comment);
+}
+
+div.fragment a.code {
+    color: var(--fragment-link);
+}
+
+div.fragment span.preprocessor {
+    color: var(--fragment-preprocessor);
+}
+
+div.fragment span.lineno {
+    display: inline-block;
+    width: 27px;
+    border-right: none;
+    background: var(--fragment-linenumber-background);
+    color: var(--fragment-linenumber-color);
+}
+
+div.fragment span.lineno a {
+    background: none;
+    color: var(--fragment-link);
+}
+
+div.fragment .line:first-child .lineno {
+    box-shadow: -999999px 0px 0 999999px var(--fragment-linenumber-background), -999998px 0px 0 999999px var(--fragment-linenumber-border);
+}
+
+/*
+ dl warning, attention, note, deprecated, bug, ...
+ */
+
+dl.warning, dl.attention, dl.note, dl.deprecated, dl.bug, dl.invariant, dl.pre {
+    padding: var(--spacing-medium);
+    margin: var(--spacing-medium) 0;
+    color: var(--page-background-color);
+    overflow: hidden;
+    margin-left: 0;
+    border-radius: var(--border-radius-small);
+}
+
+dl.section dd {
+    margin-bottom: 2px;
+}
+
+dl.warning, dl.attention {
+    background: var(--warning-color);
+    border-left: 8px solid var(--warning-color-dark);
+    color: var(--warning-color-darker);
+}
+
+dl.warning dt, dl.attention dt {
+    color: var(--warning-color-dark);
+}
+
+dl.note {
+    background: var(--note-color);
+    border-left: 8px solid var(--note-color-dark);
+    color: var(--note-color-darker);
+}
+
+dl.note dt {
+    color: var(--note-color-dark);
+}
+
+dl.bug {
+    background: var(--bug-color);
+    border-left: 8px solid var(--bug-color-dark);
+    color: var(--bug-color-darker);
+}
+
+dl.bug dt a {
+    color: var(--bug-color-dark) !important;
+}
+
+dl.deprecated {
+    background: var(--deprecated-color);
+    border-left: 8px solid var(--deprecated-color-dark);
+    color: var(--deprecated-color-darker);
+}
+
+dl.deprecated dt a {
+    color: var(--deprecated-color-dark) !important;
+}
+
+dl.section dd, dl.bug dd, dl.deprecated dd {
+    margin-inline-start: 0px;
+}
+
+dl.invariant, dl.pre {
+    background: var(--invariant-color);
+    border-left: 8px solid var(--invariant-color-dark);
+    color: var(--invariant-color-darker);
+}
+
+/*
+ memitem
+ */
+
+div.memdoc, div.memproto, h2.memtitle {
+    box-shadow: none;
+    background-image: none;
+    border: none;
+}
+
+div.memdoc {
+    padding: 0 var(--spacing-medium);
+    background: var(--page-background-color);
+}
+
+h2.memtitle, div.memitem {
+    border: 1px solid var(--separator-color);
+}
+
+div.memproto, h2.memtitle {
+    background: var(--code-background);
+    text-shadow: none;
+}
+
+h2.memtitle {
+    font-weight: 500;
+    font-family: monospace, fixed;
+    border-bottom: none;
+    border-top-left-radius: var(--border-radius-medium);
+    border-top-right-radius: var(--border-radius-medium);
+    word-break: break-all;
+}
+
+a:target + h2.memtitle, a:target + h2.memtitle + div.memitem {
+    border-color: var(--primary-light-color);
+}
+
+a:target + h2.memtitle {
+    box-shadow: -3px -3px 3px 0 var(--primary-lightest-color), 3px -3px 3px 0 var(--primary-lightest-color);
+}
+
+a:target + h2.memtitle + div.memitem {
+    box-shadow: 0 0 10px 0 var(--primary-lighter-color);
+}
+
+div.memitem {
+    border-top-right-radius: var(--border-radius-medium);
+    border-bottom-right-radius: var(--border-radius-medium);
+    border-bottom-left-radius: var(--border-radius-medium);
+    overflow: hidden;
+    display: block !important;
+}
+
+div.memdoc {
+    border-radius: 0;
+}
+
+div.memproto {
+    border-radius: 0 var(--border-radius-small) 0 0;
+    overflow: auto;
+    border-bottom: 1px solid var(--separator-color);
+    padding: var(--spacing-medium);
+    margin-bottom: -1px;
+}
+
+div.memtitle {
+    border-top-right-radius: var(--border-radius-medium);
+    border-top-left-radius: var(--border-radius-medium);
+}
+
+div.memproto table.memname {
+    font-family: monospace, fixed;
+    color: var(--page-foreground-color);
+}
+
+table.mlabels, table.mlabels > tbody {
+    display: block;
+}
+
+td.mlabels-left {
+    width: auto;
+}
+
+table.mlabels > tbody > tr:first-child {
+    display: flex;
+    justify-content: space-between;
+    flex-wrap: wrap;
+}
+
+.memname, .memitem span.mlabels {
+    margin: 0
+}
+
+/*
+ reflist
+ */
+
+dl.reflist {
+    border-radius: var(--border-radius-medium);
+    border: 1px solid var(--separator-color);
+    overflow: hidden;
+    padding: 0;
+}
+
+
+dl.reflist dt, dl.reflist dd {
+    box-shadow: none;
+    text-shadow: none;
+    background-image: none;
+    border: none;
+    padding: 12px;
+}
+
+
+dl.reflist dt {
+    border-radius: 0;
+    background: var(--code-background);
+    border-bottom: 1px solid var(--separator-color);
+    color: var(--page-foreground-color)
+}
+
+
+dl.reflist dd {
+    background: none;
+}
+
+/*
+ Table
+ */
+
+table.markdownTable, table.fieldtable {
+    width: 100%;
+    border: 1px solid var(--separator-color);
+    margin: var(--spacing-medium) 0;
+}
+
+table.fieldtable {
+    box-shadow: none;
+    border-radius: var(--border-radius-small);
+}
+
+th.markdownTableHeadLeft, th.markdownTableHeadRight, th.markdownTableHeadCenter, th.markdownTableHeadNone {
+    background: var(--tablehead-background);
+    color: var(--tablehead-foreground);
+    font-weight: 600;
+}
+
+table.markdownTable td, table.markdownTable th, table.fieldtable dt {
+    border: 1px solid var(--separator-color);
+    padding: var(--spacing-small) var(--spacing-medium);
+}
+
+table.fieldtable th {
+    font-size: var(--page-font-size);
+    font-weight: 600;
+    background-image: none;
+    background-color: var(--tablehead-background);
+    color: var(--tablehead-foreground);
+    border-bottom: 1px solid var(--separator-color);
+}
+
+.fieldtable td.fieldtype, .fieldtable td.fieldname {
+    border-bottom: 1px solid var(--separator-color);
+    border-right: 1px solid var(--separator-color);
+}
+
+.fieldtable td.fielddoc {
+    border-bottom: 1px solid var(--separator-color);
+}
+
+.memberdecls td.glow, .fieldtable tr.glow {
+    background-color: var(--primary-light-color);
+    box-shadow: 0 0 15px var(--primary-lighter-color);
+}
+
+table.memberdecls {
+    display: block;
+    overflow-x: auto;
+    overflow-y: hidden;
+}
+
+
+/*
+ Horizontal Rule
+ */
+
+hr {
+    margin-top: var(--spacing-large);
+    margin-bottom: var(--spacing-large);
+    border-top:1px solid var(--separator-color);
+}
+
+.contents hr {
+    box-shadow: var(--content-maxwidth) 0 0 0 var(--separator-color), calc(0px - var(--content-maxwidth)) 0 0 0 var(--separator-color);
+}
+
+.contents img {
+    max-width: 100%;
+}
+
+/*
+ Directories
+ */
+div.directory {
+    border-top: 1px solid var(--separator-color);
+    border-bottom: 1px solid var(--separator-color);
+    width: auto;
+}
+
+table.directory {
+    font-family: var(--font-family);
+    font-size: var(--page-font-size);
+    font-weight: normal;
+}
+
+.directory td.entry {
+    padding: var(--spacing-small);
+    display: flex;
+    align-items: center;
+}
+
+.directory tr.even {
+    background-color: var(--odd-color);
+}
+
+.icona {
+    width: auto;
+    height: auto;
+    margin: 0 var(--spacing-small);
+}
+
+.icon {
+    background: var(--primary-dark-color);
+    width: 18px;
+    height: 18px;
+    line-height: 18px;
+}
+
+.iconfopen, .icondoc, .iconfclosed {
+    background-position: center;
+    margin-bottom: 0;
+}
+
+.icondoc {
+    filter: saturate(0.2);
+}
+
+@media screen and (max-width: 767px) {
+    div.directory {
+        margin-left: calc(0px - var(--spacing-medium));
+        margin-right: calc(0px - var(--spacing-medium));
+    }
+}
+
+@media (prefers-color-scheme: dark) {
+    .iconfopen, .iconfclosed {
+        filter: hue-rotate(180deg) invert();
+    }
+}
+
+/*
+ Class list
+ */
+
+.classindex dl.odd {
+    background: var(--odd-color);
+    border-radius: var(--border-radius-small);
+}
+
+@media screen and (max-width: 767px) {
+    .classindex {
+        margin: 0 calc(0px - var(--spacing-small));
+    }
+}
+
+/*
+  Footer and nav-path
+ */
+
+#nav-path {
+    margin-bottom: -1px;
+    width: 100%;
+}
+
+#nav-path ul {
+    background-image: none;
+    background: var(--page-background-color);
+    border: none;
+    border-top: 1px solid var(--separator-color);
+    border-bottom: 1px solid var(--separator-color);
+    font-size: var(--navigation-font-size);
+}
+
+img.footer {
+    width: 60px;
+}
+
+.navpath li.footer {
+    color: var(--page-secondary-foreground-color);
+}
+
+address.footer {
+    margin-bottom: var(--spacing-large);
+}
+
+#nav-path li.navelem {
+    background-image: none;
+    display: flex;
+    align-items: center;
+}
+
+.navpath li.navelem a {
+    text-shadow: none;
+    display: inline-block;
+    color: var(--primary-dark-color)
+}
+
+li.navelem {
+    padding: 0;
+    margin-left: -8px;
+}
+
+li.navelem:first-child {
+    margin-left: var(--spacing-large);
+}
+
+li.navelem:first-child:before {
+    display: none;
+}
+
+#nav-path li.navelem:after {
+    content: '';
+    border: 5px solid var(--page-background-color);
+    border-bottom-color: transparent;
+    border-right-color: transparent;
+    border-top-color: transparent;
+    transform: scaleY(4.2);
+    z-index: 10;
+    margin-left: 6px;
+}
+
+#nav-path li.navelem:before {
+    content: '';
+    border: 5px solid var(--separator-color);
+    border-bottom-color: transparent;
+    border-right-color: transparent;
+    border-top-color: transparent;
+    transform: scaleY(3.2);
+    margin-right: var(--spacing-small);
+}
+
+@media (prefers-color-scheme: dark) {
+    #nav-path li.navelem:after {
+        text-shadow: 3px 0 0 var(--separator-color), 8px 0 6px rgba(0,0,0,0.4);
+    }
+}
+
+.navpath li.navelem a:hover {
+    color: var(--primary-color);
+}
diff --git a/docs/doxygen/img/epfl_logo.png b/docs/doxygen/img/epfl_logo.png
new file mode 100644
index 0000000..f888916
Binary files /dev/null and b/docs/doxygen/img/epfl_logo.png differ
diff --git a/docs/manual/Makefile b/docs/manual/Makefile
new file mode 100644
index 0000000..206a081
--- /dev/null
+++ b/docs/manual/Makefile
@@ -0,0 +1,59 @@
+#
+# @file bsplines.tex
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+dvi:	bsplines.dvi
+pdf:	bsplines.pdf
+ps:	bsplines.ps
+
+.SUFFIXES:
+.SUFFIXES: .sgml .html .tex .dvi .pdf .ps .txt
+
+.tex.dvi:
+	latex $<
+	@while ( grep "Rerun to get cross-references" \
+	         ${<:tex=log} > /dev/null ); do \
+	         latex $<; \
+	done
+	latex $<
+
+.dvi.pdf:
+	dvipdf $<
+
+.dvi.ps:
+	dvips $<
+
+bsplines.dvi: bsplines.tex driv1.eps fit.eps
+
+solvers.dvi: solvers.tex
+
+dirichlet_2d.dvi: dirichlet_2d.tex
+
+using_bsplines.dvi: using_bsplines.tex
+
+clean:
+	rm -f *~ *.dvi *.log *.aux *.out *~ *.toc *.flc *.bbl *.blg
+
+distclean: clean
+	rm -f bsplines.ps
diff --git a/docs/manual/basfun_perf_helios.eps b/docs/manual/basfun_perf_helios.eps
new file mode 100644
index 0000000..85152cb
--- /dev/null
+++ b/docs/manual/basfun_perf_helios.eps
@@ -0,0 +1,1281 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 2.6.27.56-0.1-default #1 SMP 2010-12-01 16:57:58 +0100 x86_64.
+%%Title: /home/ttran/bsplines_hlst-dev/opt_wk/basfun_perf_helios.eps
+%%CreationDate: 03/27/2012  13:25:30
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:    23    77   571   764
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:    23    77   571   764
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode 0276 9168 csm
+
+    0     0  6586  8240 rc
+85 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 6587 8241 rf
+6 w
+0 2811 5104 0 0 -2811 856 3429 4 MP
+PP
+-5104 0 0 2811 5104 0 0 -2811 856 3429 5 MP stroke
+4 w
+DO
+0 sg
+ 856 3429 mt  856  618 L
+ 856  618 mt  856  618 L
+1876 3429 mt 1876  618 L
+1876  618 mt 1876  618 L
+2897 3429 mt 2897  618 L
+2897  618 mt 2897  618 L
+3918 3429 mt 3918  618 L
+3918  618 mt 3918  618 L
+4939 3429 mt 4939  618 L
+4939  618 mt 4939  618 L
+5960 3429 mt 5960  618 L
+5960  618 mt 5960  618 L
+ 856 3429 mt 5960 3429 L
+5960 3429 mt 5960 3429 L
+ 856 2866 mt 5960 2866 L
+5960 2866 mt 5960 2866 L
+ 856 2304 mt 5960 2304 L
+5960 2304 mt 5960 2304 L
+ 856 1742 mt 5960 1742 L
+5960 1742 mt 5960 1742 L
+ 856 1180 mt 5960 1180 L
+5960 1180 mt 5960 1180 L
+ 856  618 mt 5960  618 L
+5960  618 mt 5960  618 L
+SO
+6 w
+ 856 3429 mt 5960 3429 L
+ 856  618 mt 5960  618 L
+ 856 3429 mt  856  618 L
+5960 3429 mt 5960  618 L
+ 856 3429 mt 5960 3429 L
+ 856 3429 mt  856  618 L
+ 856 3429 mt  856 3403 L
+ 856  618 mt  856  643 L
+DO
+ 856 3429 mt  856  618 L
+ 856  618 mt  856  618 L
+SO
+ 856 3429 mt  856 3377 L
+ 856  618 mt  856  669 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 768 3611 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+ 901 3537 mt 
+(0) s
+1163 3429 mt 1163 3403 L
+1163  618 mt 1163  643 L
+DO
+1163 3429 mt 1163  618 L
+1163  618 mt 1163  618 L
+SO
+1343 3429 mt 1343 3403 L
+1343  618 mt 1343  643 L
+DO
+1343 3429 mt 1343  618 L
+1343  618 mt 1343  618 L
+SO
+1470 3429 mt 1470 3403 L
+1470  618 mt 1470  643 L
+DO
+1470 3429 mt 1470  618 L
+1470  618 mt 1470  618 L
+SO
+1569 3429 mt 1569 3403 L
+1569  618 mt 1569  643 L
+DO
+1569 3429 mt 1569  618 L
+1569  618 mt 1569  618 L
+SO
+1650 3429 mt 1650 3403 L
+1650  618 mt 1650  643 L
+DO
+1650 3429 mt 1650  618 L
+1650  618 mt 1650  618 L
+SO
+1718 3429 mt 1718 3403 L
+1718  618 mt 1718  643 L
+DO
+1718 3429 mt 1718  618 L
+1718  618 mt 1718  618 L
+SO
+1777 3429 mt 1777 3403 L
+1777  618 mt 1777  643 L
+DO
+1777 3429 mt 1777  618 L
+1777  618 mt 1777  618 L
+SO
+1830 3429 mt 1830 3403 L
+1830  618 mt 1830  643 L
+DO
+1830 3429 mt 1830  618 L
+1830  618 mt 1830  618 L
+SO
+1876 3429 mt 1876 3403 L
+1876  618 mt 1876  643 L
+DO
+1876 3429 mt 1876  618 L
+1876  618 mt 1876  618 L
+SO
+1876 3429 mt 1876 3377 L
+1876  618 mt 1876  669 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+1788 3611 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+1921 3537 mt 
+(1) s
+2184 3429 mt 2184 3403 L
+2184  618 mt 2184  643 L
+DO
+2184 3429 mt 2184  618 L
+2184  618 mt 2184  618 L
+SO
+2363 3429 mt 2363 3403 L
+2363  618 mt 2363  643 L
+DO
+2363 3429 mt 2363  618 L
+2363  618 mt 2363  618 L
+SO
+2491 3429 mt 2491 3403 L
+2491  618 mt 2491  643 L
+DO
+2491 3429 mt 2491  618 L
+2491  618 mt 2491  618 L
+SO
+2590 3429 mt 2590 3403 L
+2590  618 mt 2590  643 L
+DO
+2590 3429 mt 2590  618 L
+2590  618 mt 2590  618 L
+SO
+2671 3429 mt 2671 3403 L
+2671  618 mt 2671  643 L
+DO
+2671 3429 mt 2671  618 L
+2671  618 mt 2671  618 L
+SO
+2739 3429 mt 2739 3403 L
+2739  618 mt 2739  643 L
+DO
+2739 3429 mt 2739  618 L
+2739  618 mt 2739  618 L
+SO
+2798 3429 mt 2798 3403 L
+2798  618 mt 2798  643 L
+DO
+2798 3429 mt 2798  618 L
+2798  618 mt 2798  618 L
+SO
+2850 3429 mt 2850 3403 L
+2850  618 mt 2850  643 L
+DO
+2850 3429 mt 2850  618 L
+2850  618 mt 2850  618 L
+SO
+2897 3429 mt 2897 3403 L
+2897  618 mt 2897  643 L
+DO
+2897 3429 mt 2897  618 L
+2897  618 mt 2897  618 L
+SO
+2897 3429 mt 2897 3377 L
+2897  618 mt 2897  669 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+2809 3611 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+2942 3537 mt 
+(2) s
+3204 3429 mt 3204 3403 L
+3204  618 mt 3204  643 L
+DO
+3204 3429 mt 3204  618 L
+3204  618 mt 3204  618 L
+SO
+3384 3429 mt 3384 3403 L
+3384  618 mt 3384  643 L
+DO
+3384 3429 mt 3384  618 L
+3384  618 mt 3384  618 L
+SO
+3512 3429 mt 3512 3403 L
+3512  618 mt 3512  643 L
+DO
+3512 3429 mt 3512  618 L
+3512  618 mt 3512  618 L
+SO
+3611 3429 mt 3611 3403 L
+3611  618 mt 3611  643 L
+DO
+3611 3429 mt 3611  618 L
+3611  618 mt 3611  618 L
+SO
+3691 3429 mt 3691 3403 L
+3691  618 mt 3691  643 L
+DO
+3691 3429 mt 3691  618 L
+3691  618 mt 3691  618 L
+SO
+3760 3429 mt 3760 3403 L
+3760  618 mt 3760  643 L
+DO
+3760 3429 mt 3760  618 L
+3760  618 mt 3760  618 L
+SO
+3819 3429 mt 3819 3403 L
+3819  618 mt 3819  643 L
+DO
+3819 3429 mt 3819  618 L
+3819  618 mt 3819  618 L
+SO
+3871 3429 mt 3871 3403 L
+3871  618 mt 3871  643 L
+DO
+3871 3429 mt 3871  618 L
+3871  618 mt 3871  618 L
+SO
+3918 3429 mt 3918 3403 L
+3918  618 mt 3918  643 L
+DO
+3918 3429 mt 3918  618 L
+3918  618 mt 3918  618 L
+SO
+3918 3429 mt 3918 3377 L
+3918  618 mt 3918  669 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+3830 3611 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+3963 3537 mt 
+(3) s
+4225 3429 mt 4225 3403 L
+4225  618 mt 4225  643 L
+DO
+4225 3429 mt 4225  618 L
+4225  618 mt 4225  618 L
+SO
+4405 3429 mt 4405 3403 L
+4405  618 mt 4405  643 L
+DO
+4405 3429 mt 4405  618 L
+4405  618 mt 4405  618 L
+SO
+4532 3429 mt 4532 3403 L
+4532  618 mt 4532  643 L
+DO
+4532 3429 mt 4532  618 L
+4532  618 mt 4532  618 L
+SO
+4631 3429 mt 4631 3403 L
+4631  618 mt 4631  643 L
+DO
+4631 3429 mt 4631  618 L
+4631  618 mt 4631  618 L
+SO
+4712 3429 mt 4712 3403 L
+4712  618 mt 4712  643 L
+DO
+4712 3429 mt 4712  618 L
+4712  618 mt 4712  618 L
+SO
+4781 3429 mt 4781 3403 L
+4781  618 mt 4781  643 L
+DO
+4781 3429 mt 4781  618 L
+4781  618 mt 4781  618 L
+SO
+4840 3429 mt 4840 3403 L
+4840  618 mt 4840  643 L
+DO
+4840 3429 mt 4840  618 L
+4840  618 mt 4840  618 L
+SO
+4892 3429 mt 4892 3403 L
+4892  618 mt 4892  643 L
+DO
+4892 3429 mt 4892  618 L
+4892  618 mt 4892  618 L
+SO
+4939 3429 mt 4939 3403 L
+4939  618 mt 4939  643 L
+DO
+4939 3429 mt 4939  618 L
+4939  618 mt 4939  618 L
+SO
+4939 3429 mt 4939 3377 L
+4939  618 mt 4939  669 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+4851 3611 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+4984 3537 mt 
+(4) s
+5246 3429 mt 5246 3403 L
+5246  618 mt 5246  643 L
+DO
+5246 3429 mt 5246  618 L
+5246  618 mt 5246  618 L
+SO
+5426 3429 mt 5426 3403 L
+5426  618 mt 5426  643 L
+DO
+5426 3429 mt 5426  618 L
+5426  618 mt 5426  618 L
+SO
+5553 3429 mt 5553 3403 L
+5553  618 mt 5553  643 L
+DO
+5553 3429 mt 5553  618 L
+5553  618 mt 5553  618 L
+SO
+5652 3429 mt 5652 3403 L
+5652  618 mt 5652  643 L
+DO
+5652 3429 mt 5652  618 L
+5652  618 mt 5652  618 L
+SO
+5733 3429 mt 5733 3403 L
+5733  618 mt 5733  643 L
+DO
+5733 3429 mt 5733  618 L
+5733  618 mt 5733  618 L
+SO
+5801 3429 mt 5801 3403 L
+5801  618 mt 5801  643 L
+DO
+5801 3429 mt 5801  618 L
+5801  618 mt 5801  618 L
+SO
+5861 3429 mt 5861 3403 L
+5861  618 mt 5861  643 L
+DO
+5861 3429 mt 5861  618 L
+5861  618 mt 5861  618 L
+SO
+5913 3429 mt 5913 3403 L
+5913  618 mt 5913  643 L
+DO
+5913 3429 mt 5913  618 L
+5913  618 mt 5913  618 L
+SO
+5960 3429 mt 5960 3403 L
+5960  618 mt 5960  643 L
+DO
+5960 3429 mt 5960  618 L
+5960  618 mt 5960  618 L
+SO
+5960 3429 mt 5960 3377 L
+5960  618 mt 5960  669 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+5872 3611 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+6005 3537 mt 
+(5) s
+ 856 3429 mt  907 3429 L
+5960 3429 mt 5908 3429 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 755 3473 mt 
+(0) s
+ 856 2866 mt  907 2866 L
+5960 2866 mt 5908 2866 L
+ 755 2910 mt 
+(1) s
+ 856 2304 mt  907 2304 L
+5960 2304 mt 5908 2304 L
+ 755 2348 mt 
+(2) s
+ 856 1742 mt  907 1742 L
+5960 1742 mt 5908 1742 L
+ 755 1786 mt 
+(3) s
+ 856 1180 mt  907 1180 L
+5960 1180 mt 5908 1180 L
+ 755 1224 mt 
+(4) s
+ 856  618 mt  907  618 L
+5960  618 mt 5908  618 L
+ 755  662 mt 
+(5) s
+ 856 3429 mt 5960 3429 L
+ 856  618 mt 5960  618 L
+ 856 3429 mt  856  618 L
+5960 3429 mt 5960  618 L
+gs 856 618 5105 2812 rc
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+307 32 308 108 307 266 307 73 308 91 307 176 307 -58 307 -473 
+308 175 307 -360 307 -393 308 -335 307 -274 307 -228 307 -135 856 3247 16 MP stroke
+gr
+
+c8
+  36   36  856 3247 FO
+  36   36 1163 3112 FO
+  36   36 1470 2884 FO
+  36   36 1777 2610 FO
+  36   36 2085 2275 FO
+  36   36 2392 1882 FO
+  36   36 2699 1522 FO
+  36   36 3007 1697 FO
+  36   36 3314 1224 FO
+  36   36 3621 1166 FO
+  36   36 3928 1342 FO
+  36   36 4236 1433 FO
+  36   36 4543 1506 FO
+  36   36 4850 1772 FO
+  36   36 5158 1880 FO
+  36   36 5465 1912 FO
+gs 856 618 5105 2812 rc
+/c9 { 0.000000 0.500000 0.000000 sr} bdef
+c9
+307 8 308 26 307 65 307 38 308 86 307 179 307 170 307 -57 
+308 -42 307 -156 307 -142 308 -238 307 -248 307 -150 307 -90 856 3267 16 MP stroke
+gr
+
+c9
+0 j
+0 -58 -58 0 0 58 58 0 827 3238 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1134 3148 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1441 2998 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1748 2750 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2056 2512 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2363 2370 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2670 2214 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2978 2172 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3285 2115 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3592 2285 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3899 2464 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4207 2550 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4514 2588 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4821 2653 5 MP
+DP
+0 -58 -58 0 0 58 58 0 5129 2679 5 MP
+DP
+0 -58 -58 0 0 58 58 0 5436 2687 5 MP
+DP
+gs 856 618 5105 2812 rc
+gr
+
+0 sg
+2714 3754 mt 
+(Number of grouped points) s
+ 673 2907 mt  -90 rotate
+(BASFUN Vectorization Speed Up) s
+90 rotate
+2682  523 mt 
+(HELIOS, Quadratic Splines) s
+ 839 3472 mt 
+( ) s
+5944  660 mt 
+( ) s
+1 sg
+0 334 1616 0 0 -334 4284 1012 4 MP
+PP
+-1616 0 0 334 1616 0 0 -334 4284 1012 5 MP stroke
+4 w
+DO
+SO
+6 w
+0 sg
+4284 1012 mt 5900 1012 L
+4284  678 mt 5900  678 L
+4284 1012 mt 4284  678 L
+5900 1012 mt 5900  678 L
+4284 1012 mt 5900 1012 L
+4284 1012 mt 4284  678 L
+4284 1012 mt 5900 1012 L
+4284  678 mt 5900  678 L
+4284 1012 mt 4284  678 L
+5900 1012 mt 5900  678 L
+4749  812 mt 
+(Periodic Splines) s
+gs 4284 678 1617 335 rc
+c8
+358 0 4355 769 2 MP stroke
+gs 4461 696 147 147 rc
+  36   36 4534  769 FO
+gr
+
+gr
+
+c8
+0 sg
+4749  963 mt 
+(Non-perodic Splines) s
+gs 4284 678 1617 335 rc
+c9
+358 0 4355 920 2 MP stroke
+gs 4461 847 147 147 rc
+0 -58 -58 0 0 58 58 0 4505 891 5 MP
+DP
+gr
+
+gr
+
+c9
+1 sg
+0 2812 5104 0 0 -2812 856 7334 4 MP
+PP
+-5104 0 0 2812 5104 0 0 -2812 856 7334 5 MP stroke
+4 w
+DO
+0 sg
+ 856 7334 mt  856 4522 L
+ 856 4522 mt  856 4522 L
+1876 7334 mt 1876 4522 L
+1876 4522 mt 1876 4522 L
+2897 7334 mt 2897 4522 L
+2897 4522 mt 2897 4522 L
+3918 7334 mt 3918 4522 L
+3918 4522 mt 3918 4522 L
+4939 7334 mt 4939 4522 L
+4939 4522 mt 4939 4522 L
+5960 7334 mt 5960 4522 L
+5960 4522 mt 5960 4522 L
+ 856 7334 mt 5960 7334 L
+5960 7334 mt 5960 7334 L
+ 856 6771 mt 5960 6771 L
+5960 6771 mt 5960 6771 L
+ 856 6209 mt 5960 6209 L
+5960 6209 mt 5960 6209 L
+ 856 5646 mt 5960 5646 L
+5960 5646 mt 5960 5646 L
+ 856 5084 mt 5960 5084 L
+5960 5084 mt 5960 5084 L
+ 856 4522 mt 5960 4522 L
+5960 4522 mt 5960 4522 L
+SO
+6 w
+ 856 7334 mt 5960 7334 L
+ 856 4522 mt 5960 4522 L
+ 856 7334 mt  856 4522 L
+5960 7334 mt 5960 4522 L
+ 856 7334 mt 5960 7334 L
+ 856 7334 mt  856 4522 L
+ 856 7334 mt  856 7308 L
+ 856 4522 mt  856 4547 L
+DO
+ 856 7334 mt  856 4522 L
+ 856 4522 mt  856 4522 L
+SO
+ 856 7334 mt  856 7282 L
+ 856 4522 mt  856 4573 L
+ 768 7516 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+ 901 7442 mt 
+(0) s
+1163 7334 mt 1163 7308 L
+1163 4522 mt 1163 4547 L
+DO
+1163 7334 mt 1163 4522 L
+1163 4522 mt 1163 4522 L
+SO
+1343 7334 mt 1343 7308 L
+1343 4522 mt 1343 4547 L
+DO
+1343 7334 mt 1343 4522 L
+1343 4522 mt 1343 4522 L
+SO
+1470 7334 mt 1470 7308 L
+1470 4522 mt 1470 4547 L
+DO
+1470 7334 mt 1470 4522 L
+1470 4522 mt 1470 4522 L
+SO
+1569 7334 mt 1569 7308 L
+1569 4522 mt 1569 4547 L
+DO
+1569 7334 mt 1569 4522 L
+1569 4522 mt 1569 4522 L
+SO
+1650 7334 mt 1650 7308 L
+1650 4522 mt 1650 4547 L
+DO
+1650 7334 mt 1650 4522 L
+1650 4522 mt 1650 4522 L
+SO
+1718 7334 mt 1718 7308 L
+1718 4522 mt 1718 4547 L
+DO
+1718 7334 mt 1718 4522 L
+1718 4522 mt 1718 4522 L
+SO
+1777 7334 mt 1777 7308 L
+1777 4522 mt 1777 4547 L
+DO
+1777 7334 mt 1777 4522 L
+1777 4522 mt 1777 4522 L
+SO
+1830 7334 mt 1830 7308 L
+1830 4522 mt 1830 4547 L
+DO
+1830 7334 mt 1830 4522 L
+1830 4522 mt 1830 4522 L
+SO
+1876 7334 mt 1876 7308 L
+1876 4522 mt 1876 4547 L
+DO
+1876 7334 mt 1876 4522 L
+1876 4522 mt 1876 4522 L
+SO
+1876 7334 mt 1876 7282 L
+1876 4522 mt 1876 4573 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+1788 7516 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+1921 7442 mt 
+(1) s
+2184 7334 mt 2184 7308 L
+2184 4522 mt 2184 4547 L
+DO
+2184 7334 mt 2184 4522 L
+2184 4522 mt 2184 4522 L
+SO
+2363 7334 mt 2363 7308 L
+2363 4522 mt 2363 4547 L
+DO
+2363 7334 mt 2363 4522 L
+2363 4522 mt 2363 4522 L
+SO
+2491 7334 mt 2491 7308 L
+2491 4522 mt 2491 4547 L
+DO
+2491 7334 mt 2491 4522 L
+2491 4522 mt 2491 4522 L
+SO
+2590 7334 mt 2590 7308 L
+2590 4522 mt 2590 4547 L
+DO
+2590 7334 mt 2590 4522 L
+2590 4522 mt 2590 4522 L
+SO
+2671 7334 mt 2671 7308 L
+2671 4522 mt 2671 4547 L
+DO
+2671 7334 mt 2671 4522 L
+2671 4522 mt 2671 4522 L
+SO
+2739 7334 mt 2739 7308 L
+2739 4522 mt 2739 4547 L
+DO
+2739 7334 mt 2739 4522 L
+2739 4522 mt 2739 4522 L
+SO
+2798 7334 mt 2798 7308 L
+2798 4522 mt 2798 4547 L
+DO
+2798 7334 mt 2798 4522 L
+2798 4522 mt 2798 4522 L
+SO
+2850 7334 mt 2850 7308 L
+2850 4522 mt 2850 4547 L
+DO
+2850 7334 mt 2850 4522 L
+2850 4522 mt 2850 4522 L
+SO
+2897 7334 mt 2897 7308 L
+2897 4522 mt 2897 4547 L
+DO
+2897 7334 mt 2897 4522 L
+2897 4522 mt 2897 4522 L
+SO
+2897 7334 mt 2897 7282 L
+2897 4522 mt 2897 4573 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+2809 7516 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+2942 7442 mt 
+(2) s
+3204 7334 mt 3204 7308 L
+3204 4522 mt 3204 4547 L
+DO
+3204 7334 mt 3204 4522 L
+3204 4522 mt 3204 4522 L
+SO
+3384 7334 mt 3384 7308 L
+3384 4522 mt 3384 4547 L
+DO
+3384 7334 mt 3384 4522 L
+3384 4522 mt 3384 4522 L
+SO
+3512 7334 mt 3512 7308 L
+3512 4522 mt 3512 4547 L
+DO
+3512 7334 mt 3512 4522 L
+3512 4522 mt 3512 4522 L
+SO
+3611 7334 mt 3611 7308 L
+3611 4522 mt 3611 4547 L
+DO
+3611 7334 mt 3611 4522 L
+3611 4522 mt 3611 4522 L
+SO
+3691 7334 mt 3691 7308 L
+3691 4522 mt 3691 4547 L
+DO
+3691 7334 mt 3691 4522 L
+3691 4522 mt 3691 4522 L
+SO
+3760 7334 mt 3760 7308 L
+3760 4522 mt 3760 4547 L
+DO
+3760 7334 mt 3760 4522 L
+3760 4522 mt 3760 4522 L
+SO
+3819 7334 mt 3819 7308 L
+3819 4522 mt 3819 4547 L
+DO
+3819 7334 mt 3819 4522 L
+3819 4522 mt 3819 4522 L
+SO
+3871 7334 mt 3871 7308 L
+3871 4522 mt 3871 4547 L
+DO
+3871 7334 mt 3871 4522 L
+3871 4522 mt 3871 4522 L
+SO
+3918 7334 mt 3918 7308 L
+3918 4522 mt 3918 4547 L
+DO
+3918 7334 mt 3918 4522 L
+3918 4522 mt 3918 4522 L
+SO
+3918 7334 mt 3918 7282 L
+3918 4522 mt 3918 4573 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+3830 7516 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+3963 7442 mt 
+(3) s
+4225 7334 mt 4225 7308 L
+4225 4522 mt 4225 4547 L
+DO
+4225 7334 mt 4225 4522 L
+4225 4522 mt 4225 4522 L
+SO
+4405 7334 mt 4405 7308 L
+4405 4522 mt 4405 4547 L
+DO
+4405 7334 mt 4405 4522 L
+4405 4522 mt 4405 4522 L
+SO
+4532 7334 mt 4532 7308 L
+4532 4522 mt 4532 4547 L
+DO
+4532 7334 mt 4532 4522 L
+4532 4522 mt 4532 4522 L
+SO
+4631 7334 mt 4631 7308 L
+4631 4522 mt 4631 4547 L
+DO
+4631 7334 mt 4631 4522 L
+4631 4522 mt 4631 4522 L
+SO
+4712 7334 mt 4712 7308 L
+4712 4522 mt 4712 4547 L
+DO
+4712 7334 mt 4712 4522 L
+4712 4522 mt 4712 4522 L
+SO
+4781 7334 mt 4781 7308 L
+4781 4522 mt 4781 4547 L
+DO
+4781 7334 mt 4781 4522 L
+4781 4522 mt 4781 4522 L
+SO
+4840 7334 mt 4840 7308 L
+4840 4522 mt 4840 4547 L
+DO
+4840 7334 mt 4840 4522 L
+4840 4522 mt 4840 4522 L
+SO
+4892 7334 mt 4892 7308 L
+4892 4522 mt 4892 4547 L
+DO
+4892 7334 mt 4892 4522 L
+4892 4522 mt 4892 4522 L
+SO
+4939 7334 mt 4939 7308 L
+4939 4522 mt 4939 4547 L
+DO
+4939 7334 mt 4939 4522 L
+4939 4522 mt 4939 4522 L
+SO
+4939 7334 mt 4939 7282 L
+4939 4522 mt 4939 4573 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+4851 7516 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+4984 7442 mt 
+(4) s
+5246 7334 mt 5246 7308 L
+5246 4522 mt 5246 4547 L
+DO
+5246 7334 mt 5246 4522 L
+5246 4522 mt 5246 4522 L
+SO
+5426 7334 mt 5426 7308 L
+5426 4522 mt 5426 4547 L
+DO
+5426 7334 mt 5426 4522 L
+5426 4522 mt 5426 4522 L
+SO
+5553 7334 mt 5553 7308 L
+5553 4522 mt 5553 4547 L
+DO
+5553 7334 mt 5553 4522 L
+5553 4522 mt 5553 4522 L
+SO
+5652 7334 mt 5652 7308 L
+5652 4522 mt 5652 4547 L
+DO
+5652 7334 mt 5652 4522 L
+5652 4522 mt 5652 4522 L
+SO
+5733 7334 mt 5733 7308 L
+5733 4522 mt 5733 4547 L
+DO
+5733 7334 mt 5733 4522 L
+5733 4522 mt 5733 4522 L
+SO
+5801 7334 mt 5801 7308 L
+5801 4522 mt 5801 4547 L
+DO
+5801 7334 mt 5801 4522 L
+5801 4522 mt 5801 4522 L
+SO
+5861 7334 mt 5861 7308 L
+5861 4522 mt 5861 4547 L
+DO
+5861 7334 mt 5861 4522 L
+5861 4522 mt 5861 4522 L
+SO
+5913 7334 mt 5913 7308 L
+5913 4522 mt 5913 4547 L
+DO
+5913 7334 mt 5913 4522 L
+5913 4522 mt 5913 4522 L
+SO
+5960 7334 mt 5960 7308 L
+5960 4522 mt 5960 4547 L
+DO
+5960 7334 mt 5960 4522 L
+5960 4522 mt 5960 4522 L
+SO
+5960 7334 mt 5960 7282 L
+5960 4522 mt 5960 4573 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+5872 7516 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+6005 7442 mt 
+(5) s
+ 856 7334 mt  907 7334 L
+5960 7334 mt 5908 7334 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 755 7378 mt 
+(0) s
+ 856 6771 mt  907 6771 L
+5960 6771 mt 5908 6771 L
+ 755 6815 mt 
+(1) s
+ 856 6209 mt  907 6209 L
+5960 6209 mt 5908 6209 L
+ 755 6253 mt 
+(2) s
+ 856 5646 mt  907 5646 L
+5960 5646 mt 5908 5646 L
+ 755 5690 mt 
+(3) s
+ 856 5084 mt  907 5084 L
+5960 5084 mt 5908 5084 L
+ 755 5128 mt 
+(4) s
+ 856 4522 mt  907 4522 L
+5960 4522 mt 5908 4522 L
+ 755 4566 mt 
+(5) s
+ 856 7334 mt 5960 7334 L
+ 856 4522 mt 5960 4522 L
+ 856 7334 mt  856 4522 L
+5960 7334 mt 5960 4522 L
+gs 856 4522 5105 2813 rc
+c8
+307 35 308 61 307 276 307 107 308 53 307 111 307 -429 307 -115 
+308 223 307 -345 307 -360 308 -367 307 -260 307 -220 307 -151 856 7134 16 MP stroke
+gr
+
+c8
+  36   36  856 7134 FO
+  36   36 1163 6983 FO
+  36   36 1470 6763 FO
+  36   36 1777 6503 FO
+  36   36 2085 6136 FO
+  36   36 2392 5776 FO
+  36   36 2699 5431 FO
+  36   36 3007 5654 FO
+  36   36 3314 5539 FO
+  36   36 3621 5110 FO
+  36   36 3928 5221 FO
+  36   36 4236 5274 FO
+  36   36 4543 5381 FO
+  36   36 4850 5657 FO
+  36   36 5158 5718 FO
+  36   36 5465 5753 FO
+gs 856 4522 5105 2813 rc
+c9
+307 3 308 8 307 45 307 49 308 46 307 81 307 267 307 -19 
+308 -26 307 -61 307 -140 308 -173 307 -209 307 -137 307 -84 856 7174 16 MP stroke
+gr
+
+c9
+0 -58 -58 0 0 58 58 0 827 7145 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1134 7061 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1441 6924 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1748 6715 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2056 6542 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2363 6402 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2670 6341 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2978 6315 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3285 6296 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3592 6563 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3899 6644 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4207 6690 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4514 6739 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4821 6784 5 MP
+DP
+0 -58 -58 0 0 58 58 0 5129 6792 5 MP
+DP
+0 -58 -58 0 0 58 58 0 5436 6795 5 MP
+DP
+gs 856 4522 5105 2813 rc
+gr
+
+0 sg
+2714 7659 mt 
+(Number of grouped points) s
+ 673 6812 mt  -90 rotate
+(BASFUN Vectorization Speed Up) s
+90 rotate
+2788 4427 mt 
+(HELIOS, Cubic Splines) s
+ 839 7377 mt 
+( ) s
+5944 4564 mt 
+( ) s
+1 sg
+0 334 1616 0 0 -334 4284 4916 4 MP
+PP
+-1616 0 0 334 1616 0 0 -334 4284 4916 5 MP stroke
+4 w
+DO
+SO
+6 w
+0 sg
+4284 4916 mt 5900 4916 L
+4284 4582 mt 5900 4582 L
+4284 4916 mt 4284 4582 L
+5900 4916 mt 5900 4582 L
+4284 4916 mt 5900 4916 L
+4284 4916 mt 4284 4582 L
+4284 4916 mt 5900 4916 L
+4284 4582 mt 5900 4582 L
+4284 4916 mt 4284 4582 L
+5900 4916 mt 5900 4582 L
+4749 4716 mt 
+(Periodic Splines) s
+gs 4284 4582 1617 335 rc
+c8
+358 0 4355 4673 2 MP stroke
+gs 4461 4600 147 147 rc
+  36   36 4534 4673 FO
+gr
+
+gr
+
+c8
+0 sg
+4749 4867 mt 
+(Non-perodic Splines) s
+gs 4284 4582 1617 335 rc
+c9
+358 0 4355 4824 2 MP stroke
+gs 4461 4751 147 147 rc
+0 -58 -58 0 0 58 58 0 4505 4795 5 MP
+DP
+gr
+
+gr
+
+c9
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/docs/manual/basfun_perf_hpcff.eps b/docs/manual/basfun_perf_hpcff.eps
new file mode 100644
index 0000000..ccd1bd6
--- /dev/null
+++ b/docs/manual/basfun_perf_hpcff.eps
@@ -0,0 +1,1293 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 2.6.27.56-0.1-default #1 SMP 2010-12-01 16:57:58 +0100 x86_64.
+%%Title: /home/ttran/bsplines_hlst-dev/opt_wk/basfun_perf_hpcff.eps
+%%CreationDate: 03/27/2012  13:00:06
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:    23    78   571   763
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:    23    78   571   763
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode 0276 9156 csm
+
+    0     0  6586  8220 rc
+85 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 6587 8221 rf
+6 w
+0 2805 5104 0 0 -2805 856 3421 4 MP
+PP
+-5104 0 0 2805 5104 0 0 -2805 856 3421 5 MP stroke
+4 w
+DO
+0 sg
+ 856 3421 mt  856  616 L
+ 856  616 mt  856  616 L
+1876 3421 mt 1876  616 L
+1876  616 mt 1876  616 L
+2897 3421 mt 2897  616 L
+2897  616 mt 2897  616 L
+3918 3421 mt 3918  616 L
+3918  616 mt 3918  616 L
+4939 3421 mt 4939  616 L
+4939  616 mt 4939  616 L
+5960 3421 mt 5960  616 L
+5960  616 mt 5960  616 L
+ 856 3421 mt 5960 3421 L
+5960 3421 mt 5960 3421 L
+ 856 2953 mt 5960 2953 L
+5960 2953 mt 5960 2953 L
+ 856 2486 mt 5960 2486 L
+5960 2486 mt 5960 2486 L
+ 856 2018 mt 5960 2018 L
+5960 2018 mt 5960 2018 L
+ 856 1551 mt 5960 1551 L
+5960 1551 mt 5960 1551 L
+ 856 1083 mt 5960 1083 L
+5960 1083 mt 5960 1083 L
+ 856  616 mt 5960  616 L
+5960  616 mt 5960  616 L
+SO
+6 w
+ 856 3421 mt 5960 3421 L
+ 856  616 mt 5960  616 L
+ 856 3421 mt  856  616 L
+5960 3421 mt 5960  616 L
+ 856 3421 mt 5960 3421 L
+ 856 3421 mt  856  616 L
+ 856 3421 mt  856 3395 L
+ 856  616 mt  856  641 L
+DO
+ 856 3421 mt  856  616 L
+ 856  616 mt  856  616 L
+SO
+ 856 3421 mt  856 3369 L
+ 856  616 mt  856  667 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 768 3603 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+ 901 3529 mt 
+(0) s
+1163 3421 mt 1163 3395 L
+1163  616 mt 1163  641 L
+DO
+1163 3421 mt 1163  616 L
+1163  616 mt 1163  616 L
+SO
+1343 3421 mt 1343 3395 L
+1343  616 mt 1343  641 L
+DO
+1343 3421 mt 1343  616 L
+1343  616 mt 1343  616 L
+SO
+1470 3421 mt 1470 3395 L
+1470  616 mt 1470  641 L
+DO
+1470 3421 mt 1470  616 L
+1470  616 mt 1470  616 L
+SO
+1569 3421 mt 1569 3395 L
+1569  616 mt 1569  641 L
+DO
+1569 3421 mt 1569  616 L
+1569  616 mt 1569  616 L
+SO
+1650 3421 mt 1650 3395 L
+1650  616 mt 1650  641 L
+DO
+1650 3421 mt 1650  616 L
+1650  616 mt 1650  616 L
+SO
+1718 3421 mt 1718 3395 L
+1718  616 mt 1718  641 L
+DO
+1718 3421 mt 1718  616 L
+1718  616 mt 1718  616 L
+SO
+1777 3421 mt 1777 3395 L
+1777  616 mt 1777  641 L
+DO
+1777 3421 mt 1777  616 L
+1777  616 mt 1777  616 L
+SO
+1830 3421 mt 1830 3395 L
+1830  616 mt 1830  641 L
+DO
+1830 3421 mt 1830  616 L
+1830  616 mt 1830  616 L
+SO
+1876 3421 mt 1876 3395 L
+1876  616 mt 1876  641 L
+DO
+1876 3421 mt 1876  616 L
+1876  616 mt 1876  616 L
+SO
+1876 3421 mt 1876 3369 L
+1876  616 mt 1876  667 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+1788 3603 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+1921 3529 mt 
+(1) s
+2184 3421 mt 2184 3395 L
+2184  616 mt 2184  641 L
+DO
+2184 3421 mt 2184  616 L
+2184  616 mt 2184  616 L
+SO
+2363 3421 mt 2363 3395 L
+2363  616 mt 2363  641 L
+DO
+2363 3421 mt 2363  616 L
+2363  616 mt 2363  616 L
+SO
+2491 3421 mt 2491 3395 L
+2491  616 mt 2491  641 L
+DO
+2491 3421 mt 2491  616 L
+2491  616 mt 2491  616 L
+SO
+2590 3421 mt 2590 3395 L
+2590  616 mt 2590  641 L
+DO
+2590 3421 mt 2590  616 L
+2590  616 mt 2590  616 L
+SO
+2671 3421 mt 2671 3395 L
+2671  616 mt 2671  641 L
+DO
+2671 3421 mt 2671  616 L
+2671  616 mt 2671  616 L
+SO
+2739 3421 mt 2739 3395 L
+2739  616 mt 2739  641 L
+DO
+2739 3421 mt 2739  616 L
+2739  616 mt 2739  616 L
+SO
+2798 3421 mt 2798 3395 L
+2798  616 mt 2798  641 L
+DO
+2798 3421 mt 2798  616 L
+2798  616 mt 2798  616 L
+SO
+2850 3421 mt 2850 3395 L
+2850  616 mt 2850  641 L
+DO
+2850 3421 mt 2850  616 L
+2850  616 mt 2850  616 L
+SO
+2897 3421 mt 2897 3395 L
+2897  616 mt 2897  641 L
+DO
+2897 3421 mt 2897  616 L
+2897  616 mt 2897  616 L
+SO
+2897 3421 mt 2897 3369 L
+2897  616 mt 2897  667 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+2809 3603 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+2942 3529 mt 
+(2) s
+3204 3421 mt 3204 3395 L
+3204  616 mt 3204  641 L
+DO
+3204 3421 mt 3204  616 L
+3204  616 mt 3204  616 L
+SO
+3384 3421 mt 3384 3395 L
+3384  616 mt 3384  641 L
+DO
+3384 3421 mt 3384  616 L
+3384  616 mt 3384  616 L
+SO
+3512 3421 mt 3512 3395 L
+3512  616 mt 3512  641 L
+DO
+3512 3421 mt 3512  616 L
+3512  616 mt 3512  616 L
+SO
+3611 3421 mt 3611 3395 L
+3611  616 mt 3611  641 L
+DO
+3611 3421 mt 3611  616 L
+3611  616 mt 3611  616 L
+SO
+3691 3421 mt 3691 3395 L
+3691  616 mt 3691  641 L
+DO
+3691 3421 mt 3691  616 L
+3691  616 mt 3691  616 L
+SO
+3760 3421 mt 3760 3395 L
+3760  616 mt 3760  641 L
+DO
+3760 3421 mt 3760  616 L
+3760  616 mt 3760  616 L
+SO
+3819 3421 mt 3819 3395 L
+3819  616 mt 3819  641 L
+DO
+3819 3421 mt 3819  616 L
+3819  616 mt 3819  616 L
+SO
+3871 3421 mt 3871 3395 L
+3871  616 mt 3871  641 L
+DO
+3871 3421 mt 3871  616 L
+3871  616 mt 3871  616 L
+SO
+3918 3421 mt 3918 3395 L
+3918  616 mt 3918  641 L
+DO
+3918 3421 mt 3918  616 L
+3918  616 mt 3918  616 L
+SO
+3918 3421 mt 3918 3369 L
+3918  616 mt 3918  667 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+3830 3603 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+3963 3529 mt 
+(3) s
+4225 3421 mt 4225 3395 L
+4225  616 mt 4225  641 L
+DO
+4225 3421 mt 4225  616 L
+4225  616 mt 4225  616 L
+SO
+4405 3421 mt 4405 3395 L
+4405  616 mt 4405  641 L
+DO
+4405 3421 mt 4405  616 L
+4405  616 mt 4405  616 L
+SO
+4532 3421 mt 4532 3395 L
+4532  616 mt 4532  641 L
+DO
+4532 3421 mt 4532  616 L
+4532  616 mt 4532  616 L
+SO
+4631 3421 mt 4631 3395 L
+4631  616 mt 4631  641 L
+DO
+4631 3421 mt 4631  616 L
+4631  616 mt 4631  616 L
+SO
+4712 3421 mt 4712 3395 L
+4712  616 mt 4712  641 L
+DO
+4712 3421 mt 4712  616 L
+4712  616 mt 4712  616 L
+SO
+4781 3421 mt 4781 3395 L
+4781  616 mt 4781  641 L
+DO
+4781 3421 mt 4781  616 L
+4781  616 mt 4781  616 L
+SO
+4840 3421 mt 4840 3395 L
+4840  616 mt 4840  641 L
+DO
+4840 3421 mt 4840  616 L
+4840  616 mt 4840  616 L
+SO
+4892 3421 mt 4892 3395 L
+4892  616 mt 4892  641 L
+DO
+4892 3421 mt 4892  616 L
+4892  616 mt 4892  616 L
+SO
+4939 3421 mt 4939 3395 L
+4939  616 mt 4939  641 L
+DO
+4939 3421 mt 4939  616 L
+4939  616 mt 4939  616 L
+SO
+4939 3421 mt 4939 3369 L
+4939  616 mt 4939  667 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+4851 3603 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+4984 3529 mt 
+(4) s
+5246 3421 mt 5246 3395 L
+5246  616 mt 5246  641 L
+DO
+5246 3421 mt 5246  616 L
+5246  616 mt 5246  616 L
+SO
+5426 3421 mt 5426 3395 L
+5426  616 mt 5426  641 L
+DO
+5426 3421 mt 5426  616 L
+5426  616 mt 5426  616 L
+SO
+5553 3421 mt 5553 3395 L
+5553  616 mt 5553  641 L
+DO
+5553 3421 mt 5553  616 L
+5553  616 mt 5553  616 L
+SO
+5652 3421 mt 5652 3395 L
+5652  616 mt 5652  641 L
+DO
+5652 3421 mt 5652  616 L
+5652  616 mt 5652  616 L
+SO
+5733 3421 mt 5733 3395 L
+5733  616 mt 5733  641 L
+DO
+5733 3421 mt 5733  616 L
+5733  616 mt 5733  616 L
+SO
+5801 3421 mt 5801 3395 L
+5801  616 mt 5801  641 L
+DO
+5801 3421 mt 5801  616 L
+5801  616 mt 5801  616 L
+SO
+5861 3421 mt 5861 3395 L
+5861  616 mt 5861  641 L
+DO
+5861 3421 mt 5861  616 L
+5861  616 mt 5861  616 L
+SO
+5913 3421 mt 5913 3395 L
+5913  616 mt 5913  641 L
+DO
+5913 3421 mt 5913  616 L
+5913  616 mt 5913  616 L
+SO
+5960 3421 mt 5960 3395 L
+5960  616 mt 5960  641 L
+DO
+5960 3421 mt 5960  616 L
+5960  616 mt 5960  616 L
+SO
+5960 3421 mt 5960 3369 L
+5960  616 mt 5960  667 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+5872 3603 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+6005 3529 mt 
+(5) s
+ 856 3421 mt  907 3421 L
+5960 3421 mt 5908 3421 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 755 3465 mt 
+(0) s
+ 856 2953 mt  907 2953 L
+5960 2953 mt 5908 2953 L
+ 655 2997 mt 
+(0.5) s
+ 856 2486 mt  907 2486 L
+5960 2486 mt 5908 2486 L
+ 755 2530 mt 
+(1) s
+ 856 2018 mt  907 2018 L
+5960 2018 mt 5908 2018 L
+ 655 2062 mt 
+(1.5) s
+ 856 1551 mt  907 1551 L
+5960 1551 mt 5908 1551 L
+ 755 1595 mt 
+(2) s
+ 856 1083 mt  907 1083 L
+5960 1083 mt 5908 1083 L
+ 655 1127 mt 
+(2.5) s
+ 856  616 mt  907  616 L
+5960  616 mt 5908  616 L
+ 755  660 mt 
+(3) s
+ 856 3421 mt 5960 3421 L
+ 856  616 mt 5960  616 L
+ 856 3421 mt  856  616 L
+5960 3421 mt 5960  616 L
+gs 856 616 5105 2806 rc
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+307 45 308 100 307 122 307 76 308 48 307 8 307 -38 307 -46 
+308 -318 307 -160 307 -278 308 -287 307 -433 307 -379 307 -227 856 3059 16 MP stroke
+gr
+
+c8
+  36   36  856 3059 FO
+  36   36 1163 2832 FO
+  36   36 1470 2453 FO
+  36   36 1777 2020 FO
+  36   36 2085 1733 FO
+  36   36 2392 1455 FO
+  36   36 2699 1295 FO
+  36   36 3007  977 FO
+  36   36 3314  931 FO
+  36   36 3621  893 FO
+  36   36 3928  901 FO
+  36   36 4236  949 FO
+  36   36 4543 1025 FO
+  36   36 4850 1147 FO
+  36   36 5158 1247 FO
+  36   36 5465 1292 FO
+gs 856 616 5105 2806 rc
+/c9 { 0.000000 0.500000 0.000000 sr} bdef
+c9
+307 11 308 37 307 138 307 52 308 97 307 145 307 99 307 10 
+308 -87 307 -133 307 -232 308 -312 307 -376 307 -262 307 -116 856 3127 16 MP stroke
+gr
+
+c9
+0 j
+0 -58 -58 0 0 58 58 0 827 3098 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1134 2982 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1441 2720 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1748 2344 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2056 2032 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2363 1800 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2670 1667 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2978 1580 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3285 1590 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3592 1689 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3899 1834 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4207 1931 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4514 1983 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4821 2121 5 MP
+DP
+0 -58 -58 0 0 58 58 0 5129 2158 5 MP
+DP
+0 -58 -58 0 0 58 58 0 5436 2169 5 MP
+DP
+gs 856 616 5105 2806 rc
+gr
+
+0 sg
+2714 3746 mt 
+(Number of grouped points) s
+ 573 2902 mt  -90 rotate
+(BASFUN Vectorization Speed Up) s
+90 rotate
+2710  521 mt 
+(HPCFF, Quadratic Splines) s
+ 839 3464 mt 
+( ) s
+5944  658 mt 
+( ) s
+1 sg
+0 438 1627 0 0 -438 2531 2901 4 MP
+PP
+-1627 0 0 438 1627 0 0 -438 2531 2901 5 MP stroke
+4 w
+DO
+SO
+6 w
+0 sg
+2531 2901 mt 4158 2901 L
+2531 2463 mt 4158 2463 L
+2531 2901 mt 2531 2463 L
+4158 2901 mt 4158 2463 L
+2531 2901 mt 4158 2901 L
+2531 2901 mt 2531 2463 L
+2531 2901 mt 4158 2901 L
+2531 2463 mt 4158 2463 L
+2531 2901 mt 2531 2463 L
+4158 2901 mt 4158 2463 L
+2999 2625 mt 
+(Periodic Splines) s
+gs 2531 2463 1628 439 rc
+c8
+360 0 2602 2583 2 MP stroke
+gr
+
+c8
+  36   36 2782 2583 FO
+gs 2531 2463 1628 439 rc
+gr
+
+0 sg
+2999 2824 mt 
+(Non-perodic Splines) s
+gs 2531 2463 1628 439 rc
+c9
+360 0 2602 2781 2 MP stroke
+gr
+
+c9
+0 -58 -58 0 0 58 58 0 2753 2752 5 MP
+DP
+gs 2531 2463 1628 439 rc
+gr
+
+1 sg
+0 2805 5104 0 0 -2805 856 7316 4 MP
+PP
+-5104 0 0 2805 5104 0 0 -2805 856 7316 5 MP stroke
+4 w
+DO
+0 sg
+ 856 7316 mt  856 4511 L
+ 856 4511 mt  856 4511 L
+1876 7316 mt 1876 4511 L
+1876 4511 mt 1876 4511 L
+2897 7316 mt 2897 4511 L
+2897 4511 mt 2897 4511 L
+3918 7316 mt 3918 4511 L
+3918 4511 mt 3918 4511 L
+4939 7316 mt 4939 4511 L
+4939 4511 mt 4939 4511 L
+5960 7316 mt 5960 4511 L
+5960 4511 mt 5960 4511 L
+ 856 7316 mt 5960 7316 L
+5960 7316 mt 5960 7316 L
+ 856 6848 mt 5960 6848 L
+5960 6848 mt 5960 6848 L
+ 856 6381 mt 5960 6381 L
+5960 6381 mt 5960 6381 L
+ 856 5913 mt 5960 5913 L
+5960 5913 mt 5960 5913 L
+ 856 5446 mt 5960 5446 L
+5960 5446 mt 5960 5446 L
+ 856 4978 mt 5960 4978 L
+5960 4978 mt 5960 4978 L
+ 856 4511 mt 5960 4511 L
+5960 4511 mt 5960 4511 L
+SO
+6 w
+ 856 7316 mt 5960 7316 L
+ 856 4511 mt 5960 4511 L
+ 856 7316 mt  856 4511 L
+5960 7316 mt 5960 4511 L
+ 856 7316 mt 5960 7316 L
+ 856 7316 mt  856 4511 L
+ 856 7316 mt  856 7290 L
+ 856 4511 mt  856 4536 L
+DO
+ 856 7316 mt  856 4511 L
+ 856 4511 mt  856 4511 L
+SO
+ 856 7316 mt  856 7264 L
+ 856 4511 mt  856 4562 L
+ 768 7498 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+ 901 7424 mt 
+(0) s
+1163 7316 mt 1163 7290 L
+1163 4511 mt 1163 4536 L
+DO
+1163 7316 mt 1163 4511 L
+1163 4511 mt 1163 4511 L
+SO
+1343 7316 mt 1343 7290 L
+1343 4511 mt 1343 4536 L
+DO
+1343 7316 mt 1343 4511 L
+1343 4511 mt 1343 4511 L
+SO
+1470 7316 mt 1470 7290 L
+1470 4511 mt 1470 4536 L
+DO
+1470 7316 mt 1470 4511 L
+1470 4511 mt 1470 4511 L
+SO
+1569 7316 mt 1569 7290 L
+1569 4511 mt 1569 4536 L
+DO
+1569 7316 mt 1569 4511 L
+1569 4511 mt 1569 4511 L
+SO
+1650 7316 mt 1650 7290 L
+1650 4511 mt 1650 4536 L
+DO
+1650 7316 mt 1650 4511 L
+1650 4511 mt 1650 4511 L
+SO
+1718 7316 mt 1718 7290 L
+1718 4511 mt 1718 4536 L
+DO
+1718 7316 mt 1718 4511 L
+1718 4511 mt 1718 4511 L
+SO
+1777 7316 mt 1777 7290 L
+1777 4511 mt 1777 4536 L
+DO
+1777 7316 mt 1777 4511 L
+1777 4511 mt 1777 4511 L
+SO
+1830 7316 mt 1830 7290 L
+1830 4511 mt 1830 4536 L
+DO
+1830 7316 mt 1830 4511 L
+1830 4511 mt 1830 4511 L
+SO
+1876 7316 mt 1876 7290 L
+1876 4511 mt 1876 4536 L
+DO
+1876 7316 mt 1876 4511 L
+1876 4511 mt 1876 4511 L
+SO
+1876 7316 mt 1876 7264 L
+1876 4511 mt 1876 4562 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+1788 7498 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+1921 7424 mt 
+(1) s
+2184 7316 mt 2184 7290 L
+2184 4511 mt 2184 4536 L
+DO
+2184 7316 mt 2184 4511 L
+2184 4511 mt 2184 4511 L
+SO
+2363 7316 mt 2363 7290 L
+2363 4511 mt 2363 4536 L
+DO
+2363 7316 mt 2363 4511 L
+2363 4511 mt 2363 4511 L
+SO
+2491 7316 mt 2491 7290 L
+2491 4511 mt 2491 4536 L
+DO
+2491 7316 mt 2491 4511 L
+2491 4511 mt 2491 4511 L
+SO
+2590 7316 mt 2590 7290 L
+2590 4511 mt 2590 4536 L
+DO
+2590 7316 mt 2590 4511 L
+2590 4511 mt 2590 4511 L
+SO
+2671 7316 mt 2671 7290 L
+2671 4511 mt 2671 4536 L
+DO
+2671 7316 mt 2671 4511 L
+2671 4511 mt 2671 4511 L
+SO
+2739 7316 mt 2739 7290 L
+2739 4511 mt 2739 4536 L
+DO
+2739 7316 mt 2739 4511 L
+2739 4511 mt 2739 4511 L
+SO
+2798 7316 mt 2798 7290 L
+2798 4511 mt 2798 4536 L
+DO
+2798 7316 mt 2798 4511 L
+2798 4511 mt 2798 4511 L
+SO
+2850 7316 mt 2850 7290 L
+2850 4511 mt 2850 4536 L
+DO
+2850 7316 mt 2850 4511 L
+2850 4511 mt 2850 4511 L
+SO
+2897 7316 mt 2897 7290 L
+2897 4511 mt 2897 4536 L
+DO
+2897 7316 mt 2897 4511 L
+2897 4511 mt 2897 4511 L
+SO
+2897 7316 mt 2897 7264 L
+2897 4511 mt 2897 4562 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+2809 7498 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+2942 7424 mt 
+(2) s
+3204 7316 mt 3204 7290 L
+3204 4511 mt 3204 4536 L
+DO
+3204 7316 mt 3204 4511 L
+3204 4511 mt 3204 4511 L
+SO
+3384 7316 mt 3384 7290 L
+3384 4511 mt 3384 4536 L
+DO
+3384 7316 mt 3384 4511 L
+3384 4511 mt 3384 4511 L
+SO
+3512 7316 mt 3512 7290 L
+3512 4511 mt 3512 4536 L
+DO
+3512 7316 mt 3512 4511 L
+3512 4511 mt 3512 4511 L
+SO
+3611 7316 mt 3611 7290 L
+3611 4511 mt 3611 4536 L
+DO
+3611 7316 mt 3611 4511 L
+3611 4511 mt 3611 4511 L
+SO
+3691 7316 mt 3691 7290 L
+3691 4511 mt 3691 4536 L
+DO
+3691 7316 mt 3691 4511 L
+3691 4511 mt 3691 4511 L
+SO
+3760 7316 mt 3760 7290 L
+3760 4511 mt 3760 4536 L
+DO
+3760 7316 mt 3760 4511 L
+3760 4511 mt 3760 4511 L
+SO
+3819 7316 mt 3819 7290 L
+3819 4511 mt 3819 4536 L
+DO
+3819 7316 mt 3819 4511 L
+3819 4511 mt 3819 4511 L
+SO
+3871 7316 mt 3871 7290 L
+3871 4511 mt 3871 4536 L
+DO
+3871 7316 mt 3871 4511 L
+3871 4511 mt 3871 4511 L
+SO
+3918 7316 mt 3918 7290 L
+3918 4511 mt 3918 4536 L
+DO
+3918 7316 mt 3918 4511 L
+3918 4511 mt 3918 4511 L
+SO
+3918 7316 mt 3918 7264 L
+3918 4511 mt 3918 4562 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+3830 7498 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+3963 7424 mt 
+(3) s
+4225 7316 mt 4225 7290 L
+4225 4511 mt 4225 4536 L
+DO
+4225 7316 mt 4225 4511 L
+4225 4511 mt 4225 4511 L
+SO
+4405 7316 mt 4405 7290 L
+4405 4511 mt 4405 4536 L
+DO
+4405 7316 mt 4405 4511 L
+4405 4511 mt 4405 4511 L
+SO
+4532 7316 mt 4532 7290 L
+4532 4511 mt 4532 4536 L
+DO
+4532 7316 mt 4532 4511 L
+4532 4511 mt 4532 4511 L
+SO
+4631 7316 mt 4631 7290 L
+4631 4511 mt 4631 4536 L
+DO
+4631 7316 mt 4631 4511 L
+4631 4511 mt 4631 4511 L
+SO
+4712 7316 mt 4712 7290 L
+4712 4511 mt 4712 4536 L
+DO
+4712 7316 mt 4712 4511 L
+4712 4511 mt 4712 4511 L
+SO
+4781 7316 mt 4781 7290 L
+4781 4511 mt 4781 4536 L
+DO
+4781 7316 mt 4781 4511 L
+4781 4511 mt 4781 4511 L
+SO
+4840 7316 mt 4840 7290 L
+4840 4511 mt 4840 4536 L
+DO
+4840 7316 mt 4840 4511 L
+4840 4511 mt 4840 4511 L
+SO
+4892 7316 mt 4892 7290 L
+4892 4511 mt 4892 4536 L
+DO
+4892 7316 mt 4892 4511 L
+4892 4511 mt 4892 4511 L
+SO
+4939 7316 mt 4939 7290 L
+4939 4511 mt 4939 4536 L
+DO
+4939 7316 mt 4939 4511 L
+4939 4511 mt 4939 4511 L
+SO
+4939 7316 mt 4939 7264 L
+4939 4511 mt 4939 4562 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+4851 7498 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+4984 7424 mt 
+(4) s
+5246 7316 mt 5246 7290 L
+5246 4511 mt 5246 4536 L
+DO
+5246 7316 mt 5246 4511 L
+5246 4511 mt 5246 4511 L
+SO
+5426 7316 mt 5426 7290 L
+5426 4511 mt 5426 4536 L
+DO
+5426 7316 mt 5426 4511 L
+5426 4511 mt 5426 4511 L
+SO
+5553 7316 mt 5553 7290 L
+5553 4511 mt 5553 4536 L
+DO
+5553 7316 mt 5553 4511 L
+5553 4511 mt 5553 4511 L
+SO
+5652 7316 mt 5652 7290 L
+5652 4511 mt 5652 4536 L
+DO
+5652 7316 mt 5652 4511 L
+5652 4511 mt 5652 4511 L
+SO
+5733 7316 mt 5733 7290 L
+5733 4511 mt 5733 4536 L
+DO
+5733 7316 mt 5733 4511 L
+5733 4511 mt 5733 4511 L
+SO
+5801 7316 mt 5801 7290 L
+5801 4511 mt 5801 4536 L
+DO
+5801 7316 mt 5801 4511 L
+5801 4511 mt 5801 4511 L
+SO
+5861 7316 mt 5861 7290 L
+5861 4511 mt 5861 4536 L
+DO
+5861 7316 mt 5861 4511 L
+5861 4511 mt 5861 4511 L
+SO
+5913 7316 mt 5913 7290 L
+5913 4511 mt 5913 4536 L
+DO
+5913 7316 mt 5913 4511 L
+5913 4511 mt 5913 4511 L
+SO
+5960 7316 mt 5960 7290 L
+5960 4511 mt 5960 4536 L
+DO
+5960 7316 mt 5960 4511 L
+5960 4511 mt 5960 4511 L
+SO
+5960 7316 mt 5960 7264 L
+5960 4511 mt 5960 4562 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+5872 7498 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+6005 7424 mt 
+(5) s
+ 856 7316 mt  907 7316 L
+5960 7316 mt 5908 7316 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 755 7360 mt 
+(0) s
+ 856 6848 mt  907 6848 L
+5960 6848 mt 5908 6848 L
+ 655 6892 mt 
+(0.5) s
+ 856 6381 mt  907 6381 L
+5960 6381 mt 5908 6381 L
+ 755 6425 mt 
+(1) s
+ 856 5913 mt  907 5913 L
+5960 5913 mt 5908 5913 L
+ 655 5957 mt 
+(1.5) s
+ 856 5446 mt  907 5446 L
+5960 5446 mt 5908 5446 L
+ 755 5490 mt 
+(2) s
+ 856 4978 mt  907 4978 L
+5960 4978 mt 5908 4978 L
+ 655 5022 mt 
+(2.5) s
+ 856 4511 mt  907 4511 L
+5960 4511 mt 5908 4511 L
+ 755 4555 mt 
+(3) s
+ 856 7316 mt 5960 7316 L
+ 856 4511 mt 5960 4511 L
+ 856 7316 mt  856 4511 L
+5960 7316 mt 5960 4511 L
+gs 856 4511 5105 2806 rc
+c8
+307 29 308 56 307 52 307 17 308 13 307 50 307 -16 307 -14 
+308 -71 307 -202 307 -265 308 -268 307 -442 307 -365 307 -240 856 6910 16 MP stroke
+gr
+
+c8
+  36   36  856 6910 FO
+  36   36 1163 6670 FO
+  36   36 1470 6305 FO
+  36   36 1777 5863 FO
+  36   36 2085 5595 FO
+  36   36 2392 5330 FO
+  36   36 2699 5128 FO
+  36   36 3007 5057 FO
+  36   36 3314 5043 FO
+  36   36 3621 5027 FO
+  36   36 3928 5077 FO
+  36   36 4236 5090 FO
+  36   36 4543 5107 FO
+  36   36 4850 5159 FO
+  36   36 5158 5215 FO
+  36   36 5465 5244 FO
+gs 856 4511 5105 2806 rc
+c9
+307 6 308 15 307 82 307 99 308 45 307 -43 307 402 307 -56 
+308 49 307 -134 307 -144 308 -267 307 -321 307 -220 307 -94 856 7034 16 MP stroke
+gr
+
+c9
+0 -58 -58 0 0 58 58 0 827 7005 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1134 6911 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1441 6691 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1748 6370 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2056 6103 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2363 5959 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2670 5825 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2978 5874 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3285 5818 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3592 6220 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3899 6177 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4207 6222 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4514 6321 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4821 6403 5 MP
+DP
+0 -58 -58 0 0 58 58 0 5129 6418 5 MP
+DP
+0 -58 -58 0 0 58 58 0 5436 6424 5 MP
+DP
+gs 856 4511 5105 2806 rc
+gr
+
+0 sg
+2714 7641 mt 
+(Number of grouped points) s
+ 573 6797 mt  -90 rotate
+(BASFUN Vectorization Speed Up) s
+90 rotate
+2816 4416 mt 
+(HPCFF, Cubic Splines) s
+ 839 7359 mt 
+( ) s
+5944 4553 mt 
+( ) s
+1 sg
+0 438 1627 0 0 -438 2307 6924 4 MP
+PP
+-1627 0 0 438 1627 0 0 -438 2307 6924 5 MP stroke
+4 w
+DO
+SO
+6 w
+0 sg
+2307 6924 mt 3934 6924 L
+2307 6486 mt 3934 6486 L
+2307 6924 mt 2307 6486 L
+3934 6924 mt 3934 6486 L
+2307 6924 mt 3934 6924 L
+2307 6924 mt 2307 6486 L
+2307 6924 mt 3934 6924 L
+2307 6486 mt 3934 6486 L
+2307 6924 mt 2307 6486 L
+3934 6924 mt 3934 6486 L
+2775 6648 mt 
+(Periodic Splines) s
+gs 2307 6486 1628 439 rc
+c8
+360 0 2378 6606 2 MP stroke
+gs 2485 6533 147 147 rc
+  36   36 2558 6606 FO
+gr
+
+gr
+
+c8
+0 sg
+2775 6847 mt 
+(Non-perodic Splines) s
+gs 2307 6486 1628 439 rc
+c9
+360 0 2378 6804 2 MP stroke
+gs 2485 6731 147 147 rc
+0 -58 -58 0 0 58 58 0 2529 6775 5 MP
+DP
+gr
+
+gr
+
+c9
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/docs/manual/bsplines.pdf b/docs/manual/bsplines.pdf
new file mode 100644
index 0000000..1ae347a
Binary files /dev/null and b/docs/manual/bsplines.pdf differ
diff --git a/docs/manual/bsplines.tex b/docs/manual/bsplines.tex
new file mode 100644
index 0000000..ecc06a9
--- /dev/null
+++ b/docs/manual/bsplines.tex
@@ -0,0 +1,1297 @@
+%
+% @file bsplines.tex
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+% @author Stephan Brunner <stephan.brunner@epfl.ch>
+%
+\documentclass[a4paper]{article}
+\usepackage{linuxdoc-sgml}
+%\usepackage{a4wide}
+\usepackage{graphicx}
+\usepackage{hyperref}
+\usepackage{amsmath}
+%\usepackage{fancybox}
+%\usepackage[notref]{showkeys}
+
+\title{\tt BSPLINES Reference Guide}
+\author{Trach-Minh Tran, Stephan Brunner, Kurt Appert}
+\date{v0.3, February 2012}
+\abstract{Generalized splines of any order on irregular grids for
+  interpolation and solving PDEs with FEM.}
+
+\begin{document}
+\maketitle
+\tableofcontents
+
+\section{Properties of Splines}
+In this section, several properties of splines will be shown in
+a more or less rigorous way. The aim is mainly to provide a
+minimum mathematical background for using the module \texttt{BSPLINES} for the
+\emph{interpolation} problem as well as the \emph{Finite Element Method}
+to solve PDEs.  More rigorous mathematical proofs can be found
+in the book by de Boor~\cite{deBoor}.
+
+\subsection{Recurrence Relation}
+We start by defining a finite interval $[a,b]$ subdivided into $N_x$
+intervals:
+\begin{equation}
+  a=t_0 \le t_1 \le \ldots \le t_{N_x}=b.
+\end{equation}
+The sequence $t_i, i=0,\ldots,N_x$ can be irregularly spaced. The $j^{th}$
+spline of degree $p$ defined on this sequence of grid points (also called
+\textbf{knots}), is denoted by $\Lambda_{j}^{p}$ and can be constructed using
+the following recurrence relation. Starting with the \emph{constant} spline
+
+\begin{equation}
+  \Lambda_i^0(x) =
+  \begin{cases}
+    1& \text{if $t_i \le x  < t_{i+1}$}, \\
+    0& \text{otherwise}.
+  \end{cases}
+\end{equation}
+the splines of degree $p>0$ for $t_i \le x  < t_{i+1}$ can be constructed from
+
+\begin{eqnarray}
+  \Lambda_i^p &=& w_i^p\Lambda_i^{p-1} + (1-
+                    w_{i+1}^p)\Lambda_{i+1}^{p-1}, \label{eq:recRel}\\
+  w_{i}^{p} &=& \frac{x-t_i}{t_{i+p}-t_i}.
+\end{eqnarray}
+Thus the values of all \emph{non-zero}
+splines up to degree $p$ in the interval $[t_i,t_{i+1}]$ fit into
+the triangular array as shown in Fig.~\ref{fig:allSpl}. Starting from the
+first column with $\Lambda_i^0=1$, one can compute each of the $p+1$ entries
+in a subsequent column with Eq.~(\ref{eq:recRel}). Applying this procedure
+to generate splines on every intervals $[t_i,t_{i+1}], i=0,\ldots,N_{x}-1$
+would produce the sequence of $N_{x}+p$ splines of degree $p$:
+\( \Lambda_{-p}^{p},\ldots, \Lambda_{N_x-1}^{p}\).
+
+\subsection{Support and positivity}
+ The linear spline
+ \begin{equation*}
+\Lambda_i^1 = w_i^1\Lambda_i^0 + (1-w_{i+1}^1)\Lambda_{i+1}^{0}
+   =\frac{x-t_i}{t_{i+1}-t_i}\Lambda_i^0 +
+     \frac{t_{i+2}-x}{t_{i+2}-t_{i+1}}\Lambda_{i+1}^0
+ \end{equation*}
+consists of 2 \emph{linear pieces} on $[t_i,t_{i+2}]$, forming a $C^0$
+function which breaks at $t_{i+1}$ and vanishes outside of this
+interval. Likewise, the quadratic spline
+\begin{eqnarray*}
+\Lambda_i^2 &=& w_i^2\Lambda_i^1 + (1-w_{i+1}^2)\Lambda_{i+1}^{1} \\
+&=& w^2_i w^1_i \Lambda_i^0 + [w^2_i(1-w^1_{i+1})+w^1_{i+1}(1-w^2_{i+1})]
+\Lambda_{i+1}^0 + (1-w^2_{i+1})(1-w^1_{i+2})\Lambda_{i+2}^0
+\end{eqnarray*}
+consists of 3 \emph{parabolic pieces} on $[t_i,t_{i+3}]$ that
+join to form a $C^1$ function which breaks at $t_{i+1}$ and $t_{i+2}$
+and vanishes outside of this interval. In general the spline of
+degree $p$ can be expressed as:
+\begin{equation}
+\Lambda^p_i = \sum_{r=0}^{p}\, b_{i+r}^p\Lambda_{i+r}^0
+\end{equation}
+where $b_{i+r}^p$ is a sum of products of $p$ linear functions, resulting
+in $p+1$ polynomials of degree $p$, joining to form
+a $C^{p-1}$ function which breaks at $t_i,\ldots,t_{i+p+1}$
+and vanishes outside of the \emph{support} $[t_i,t_{i+p+1}]$. From its construction,
+$\Lambda^p_i$ is clearly \emph{strictly positive} on the interior of $[t_i,t_{i+p+1}]$.
+\begin{equation}
+\Lambda^p_i (x) > 0, \qquad t_i<x<t_{i+p+1}.
+\end{equation}
+
+\begin{figure}
+\centering
+\( \begin{array}{cccccc}
+     &                  &    &      &                        &                   \\
+     &                  &    &      &                        &  0                \\
+     &                  &    &      &  0                     &                \\
+     &                  &    & \cdot&                        &  \Lambda^p_{i-p}  \\
+     &                  & 0  &      &  \Lambda^{p-1}_{i-p+1} &                \\
+     & 0                &    & \cdot&                        &  \Lambda^p_{i-p+1} \\
+  0  &                  &\Lambda^{2}_{i-2}& &\Lambda^{p-1}_{i-p+2}  &  \\
+     & \Lambda^{1}_{i-1}&    & \cdot&       &  \Lambda^p_{i-p+2} \\
+ \Lambda^{0}_{i} &      &\Lambda^{2}_{i-1}&      & \cdot               &         \\
+     & \Lambda^{1}_{i}  &    & \cdot&                 &  \cdot    \\
+  0  &                  &\Lambda^{2}_{i}    &      & \Lambda^{p-1}_{i-1}  &     \\
+     & 0                &    & \cdot&     &  \Lambda^p_{i-1}    \\
+     &                  & 0  &      &  \Lambda^{p-1}_{i}                        &    \\
+     &                  &    & \cdot&     &  \Lambda^p_{i}  \\
+     &                  &    &      &                        &   \\
+     &                  &    &      &  0                     &                   \\
+     &                  &    &      &                        &  0
+\end{array} \)
+\caption{The array of all the splines of degree up to $p$ that are non-zero in
+  $[t_i,t_{i+1}]$.}
+\label{fig:allSpl}
+\end{figure}
+
+\subsection{Sum of Splines}
+For $t_i\leq x < t_{i+1}$
+\begin{eqnarray*}
+\sum_j\,\Lambda_j^0 &=& \Lambda_i^0 = 1, \\
+\sum_j\,\Lambda_j^1 &=& \Lambda_{i-1}^1 + \Lambda_{i}^1= (1-w^1_i)\Lambda_i^0
++ w^1_i\Lambda_i^0 = 1.
+\end{eqnarray*}
+Thus assuming that for $p>1$:
+\[ \sum_{j=i-p+1}^{i}\,\Lambda_j^{p-1} = 1, \]
+or that the sum of the next to last column in Fig.~\ref{fig:allSpl} is $1$,
+we have, using the recurrence relation (\ref{eq:recRel})
+\begin{eqnarray*}
+\sum_{j=i-p}^{i}\,\Lambda_j^{p} &=& \sum_{j=i-p}^{i}\,
+\left( w^p_j\Lambda_j^{p-1} +(1-w^p_{j+1})\Lambda_{j+1}^{p-1} \right) \\
+&=& \sum_{j=i-p+1}^{i}\,w^p_j\Lambda_j^{p-1} +
+    \sum_{j=i-p+1}^{i}\,(1-w^p_j)\Lambda_j^{p-1}  \\
+&=& \sum_{j=i-p+1}^{i}\,\Lambda_j^{p-1} = 1.
+\end{eqnarray*}
+
+\subsection{Derivative of Splines}
+The derivative of the splines of degree $p$ can be expressed in terms of the
+splines of degree $p-1$ by the following relation:
+\begin{equation}
+  \label{derivative of splines}
+  \frac{d}{dx}\Lambda_i^p =
+  p\left(
+  \frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
+  - \frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
+  \right).
+\end{equation}
+A straightforward consequence of this relation is that the splines of order
+$p$ are $C^{p-1}$ continuous. The demonstration of Eq.(\ref{derivative of
+splines}) is done by induction. One starts with the case $p=1$:
+\begin{eqnarray*}
+\frac{d}{dx}\Lambda_i^1
+& = &
+\frac{d}{dx}\left[
+w_i^1\Lambda_i^0 + (1-w^1_{i+1})\Lambda_{i+1}^0
+\right]
+\\
+& = &
+\frac{d\,w_i^1}{dx}\Lambda_i^0 + \frac{d\,(1-w^1_{i+1})}{dx}\Lambda_{i+1}^0
++ w_i^1\frac{d\,\Lambda_i^0}{dx} + (1-w^1_{i+1})\frac{d\,\Lambda_{i+1}^0}{dx}
+\\
+& = &
+\frac{1}{t_{i+1}-t_i}\Lambda_i^0 - \frac{1}{t_{i+2}-t_{i+1}}\Lambda_{i+1}^0,
+\end{eqnarray*}
+having used Eq.(\ref{eq:recRel}) and $d\,\Lambda_i^0/dx = 0$. One then assumes
+Eq.(\ref{derivative of splines}) true for $p-1$ and demonstrates that it
+remains true for $p$. This is done as follows:
+\begin{eqnarray}
+\label{demo deriv. 1}
+\frac{d}{dx}\Lambda_i^p
+& = &
+\frac{d}{dx}\left[
+w_i^p\Lambda_i^{p-1} + (1-w^p_{i+1})\Lambda_{i+1}^{p-1}
+\right]
+\\
+\nonumber
+& = &
+\frac{d\,w_i^p}{dx}\Lambda_i^{p-1}
++ \frac{d\,(1-w^p_{i+1})}{dx}\Lambda_{i+1}^{p-1}
++ w_i^p\frac{d\,\Lambda_i^{p-1}}{dx}
++ (1-w^p_{i+1})\frac{d\,\Lambda_{i+1}^{p-1}}{dx}
+\\
+\nonumber
+& = &
+\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
+-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
+\\
+\label{demo deriv. 2}
+&&
++ w_i^p (p-1)\left(
+  \frac{\Lambda_i^{p-2}}{t_{i+p-1}-t_i}
+  - \frac{\Lambda_{i+1}^{p-2}}{t_{i+p}-t_{i+1}}
+\right)
++ (1-w_{i+1}^p) (p-1)\left(
+  \frac{\Lambda_{i+1}^{p-2}}{t_{i+p}-t_{i+1}}
+  - \frac{\Lambda_{i+2}^{p-2}}{t_{i+p+1}-t_{i+2}}
+  \right)
+\end{eqnarray}
+having used Eq.(\ref{eq:recRel}) to obtain (\ref{demo deriv. 1}), and the
+induction hypothesis to obtain Eq.(\ref{demo deriv. 2}). Now, rearranging the
+last two terms of Eq.(\ref{demo deriv. 2}), one easily obtains:
+\begin{eqnarray}
+\nonumber
+\frac{d}{dx}\Lambda_i^p
+& = &
+\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
+-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
+\\
+\nonumber
+&&
++(p-1)\left[
+\frac{1}{t_{i+p}-t_i}
+\left(
+\frac{x-t_i}{t_{i+p-1}-t_i}\Lambda_i^{p-2}
++\frac{t_{i+p}-x}{t_{i+p}-t_{i+1}}\Lambda_{i+1}^{p-2}
+\right)
+\right.
+\\
+\nonumber
+&&
+\hspace{2.cm}
+\left.
+-\frac{1}{t_{i+p+1}-t_{i+1}}
+\left(
+\frac{x-t_{i+1}}{t_{i+p}-t_{i+1}}\Lambda_{i+1}^{p-2}
++\frac{t_{i+p+1}-x}{t_{i+p+1}-t_{i+2}}\Lambda_{i+2}^{p-2}
+\right)
+\right]
+\\
+\label{demo deriv. 3}
+& = &
+\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
+-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
++ (p-1)\left(
+\frac{\Lambda_i^{p-1}}{t_{i+p}-t_i}
+-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
+\right)
+\\
+\nonumber
+& = &
+p\left(
+\frac{\Lambda_i^{p-1} }{t_{i+p}-t_i}
+-\frac{\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
+\right)
+\end{eqnarray}
+having again used Eq.(\ref{eq:recRel}) to obtain (\ref{demo deriv. 3}). This
+completes the demonstration of relation (\ref{derivative of splines}).
+
+
+\subsection{Integrals of Splines}
+
+With the proper normalization all splines of all degrees have unitary surface:
+\begin{equation}
+  \label{integrals of splines}
+  \frac{p+1}{t_{i+p+1}-t_i}\int \Lambda_i^p(x)dx =  1.
+\end{equation}
+This relation holds trivially for $p=0$ and $p=1$. A recursive proof of the
+general statement (\ref{integrals of splines}) starts assuming
+\begin{equation}
+  \label{previousInt}
+  \frac{p}{t_{i+p}-t_i}\int \Lambda_i^{p-1}(x)dx =  1
+\end{equation}
+to be true. Then using Eq.(\ref{derivative of splines}) multiplied by $x$ and integrating one obtains:
+\begin{equation}
+\nonumber
+\int x \frac{d}{dx}\Lambda_i^p dx = -\int \Lambda_i^p dx =
+  p\int\left( \frac{x\Lambda_i^{p-1}}{t_{i+p}-t_i}
+  -\frac{x\Lambda_{i+1}^{p-1}}{t_{i+p+1}-t_{i+1}}
+ \right)dx.
+\end{equation}
+Completing the fractions in the big parentheses in view of using
+Eq.(\ref{eq:recRel}) one has
+\begin{eqnarray}
+\nonumber
+\int \Lambda_i^p dx
+  & = &
+          -\, p\int \frac{x-t_i}{t_{i+p}-t_i}\Lambda_i^{p-1} dx
+          - p\int \frac{t_i}{t_{i+p}-t_i}\Lambda_i^{p-1} dx \\
+\nonumber
+  & &
+          -\, p\int \frac{t_{i+p+1}-x}{t_{i+p+1}-t_{i+1}}\Lambda_{i+1}^{p-1} dx
+          + p\int \frac{t_{i+p+1}}{t_{i+p+1}-t_{i+1}}\Lambda_{i+1}^{p-1} dx,
+\end{eqnarray}
+where the first and the third terms on the right correspond to $-p\int\Lambda_i^p dx $,
+Eq.(\ref{eq:recRel}), and can be combined with the left side to yield
+\begin{equation}
+\label{proofIntegrals}
+(1+p)\int\Lambda_i^p dx = t_{i+p+1}-t_i,
+\end{equation}
+where relation (\ref{previousInt}) has been used for the rest on the right hand side.
+This concludes the proof of Eq.(\ref{integrals of splines}).
+
+
+\subsection{Boundary Conditions}
+Applying the recurrence relation to generate \emph{all} the splines on
+the finite domain $[t_0,t_{N_x}]$ yields the $N_x+p$ splines
+of degree $p$:
+\begin{equation}
+  \Lambda^p_{-p},\Lambda^p_{-p+1},\ldots, \Lambda^p_{N_x-1}. \label{eq:splSeq}
+\end{equation}
+Note that \emph{additional} knots beyond both ends of $[t_0,t_{N_x}]$ have to
+be defined to generate all these splines.
+
+\subsubsection{Periodic splines}
+The  extra knots are simply defined through periodicity.:
+\begin{eqnarray}
+  t_{-\nu} &=& t_{N_x-\nu}-(b-a), \\
+  t_{N_{x}+\nu} &=& t_{\nu}+(b-a), \qquad \nu=0,\ldots,p.
+\end{eqnarray}
+The $p+1$ leftmost splines in (\ref{eq:splSeq}) are thus identical to the rightmost splines:
+\begin{equation}
+\Lambda^p_{-\nu} = \Lambda^p_{N_x-\nu}, \qquad \nu=0,\ldots,p.
+\end{equation}
+
+\subsubsection{Non-periodic splines}
+The choice made in \texttt{BSPLINES} is simply:
+\begin{equation}
+  t_{-p} = \cdots = t_{0} = a, \qquad b=t_{N_x}=\cdots=t_{N_x+p}.
+\end{equation}
+Thus in the first interval $[t_0,t_1]$, the first spline $\Lambda^p_{-p}$
+is constructed (refer to the first entry on each of the column of
+Fig.~\ref{fig:allSpl}, with $i=0$) as follow:
+\begin{eqnarray*}
+ \Lambda^1_{-1} &=& (1-w^1_{0})\Lambda^0_{0} = \frac{t_{1}-x}{t_{1}-t_{0}}\Lambda^0_{0}\\
+ \Lambda^2_{-2} &=& (1-w^2_{-1})\Lambda^1_{-1} =\frac{t_{1}-x}{t_{1}-t_{-1}}\Lambda^1_{-1}
+ =\left(\frac{t_{1}-x}{t_{1}-t_{0}}\right)^{2}\Lambda^0_{0}\\
+\cdot  & & \qquad\cdot\qquad\qquad\qquad\qquad \cdot \\
+ \Lambda^p_{-p} &=& (1-w^p_{-p+1})\Lambda^p_{-p+1} =\frac{t_{1}-x}{t_{1}-t_{-p+1}}\Lambda^{p-1}_{-p+1}
+ =\left(\frac{t_{1}-x}{t_{1}-t_{0}}\right)^{p}\Lambda^0_{0}
+\end{eqnarray*}
+
+
+In the same manner, the generation of the \emph{last} spline $\Lambda^p_{N_x-1}$ (last entry
+on each of the column of Fig.~\ref{fig:allSpl}, with $i=N_{x}-1$) yields:
+\begin{eqnarray*}
+\Lambda^1_{N_x-1} &=& w^1_{N_x-1}\Lambda^0_{N_x-1} =
+   \frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\Lambda^0_{N_x-1} \\
+\Lambda^2_{N_x-1} &=& w^2_{N_x-1}\Lambda^1_{N_x-1} =
+   \frac{x-t_{N_x-1}}{t_{N_x+1}-t_{N_x-1}}\Lambda^1_{N_x-1}
+   = \left(\frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\right)^{2}\Lambda^0_{N_x-1}\\
+\cdot  & & \qquad\cdot\qquad\qquad\qquad\qquad \cdot \\
+\Lambda^p_{N_x-1} &=& w^p_{N_x-1}\Lambda^p_{N_x-1} =
+   \frac{x-t_{N_x-1}}{t_{N_x+p-1}-t_{N_x-1}}\Lambda^1_{N_x-1}
+   = \left(\frac{x-t_{N_x-1}}{t_{N_x}-t_{N_x-1}}\right)^{p}\Lambda^0_{N_x-1}
+\end{eqnarray*}
+
+Since the sum of all splines is 1 and using the positivity of splines, all the
+non-periodic splines, except the first (last) spline should vanish at $x=a$ ($x=b$):
+\begin{equation}
+  \Lambda^p_{r}(a) = \delta_{r,-p}, \qquad\Lambda^p_{r}(b) = \delta_{r,N_x-1}
+\end{equation}
+
+The spline derivatives at the boundaries $x=a$ and $x=b$ can be
+derived using Eq.(\ref{derivative of splines}) as follow. At $x=a$
+(interval $[t_0,t_1]$), by noting that only the spline
+$\Lambda^{p-1}_{-p+1}$ is non-zero at $x=a$ (see next to last column
+of Fig.{\ref{fig:allSpl}, with $i=0$), it is easy to see that
+there are only 2 non-zero derivatives
+given by
+\begin{equation}
+  \begin{split}
+  \frac{d}{dx}\Lambda^p_{-p}(a) =&
+  -\frac{p\,\Lambda^{p-1}_{-p+1}(a)}{t_1-t_{-p}} = -\frac{p}{t_1-t_0}, \\
+  \frac{d}{dx}\Lambda^p_{-p+1}(a) =&
+  \frac{p\,\Lambda^{p-1}_{-p+1}(a)}{t_1-t_{-p+1}} = \frac{p}{t_1-t_0},
+  \end{split}
+\end{equation}
+where we have used $t_0=t_{-1}=\ldots=t_{-p}=a$. Likewise, the 2
+non-zero derivatives of spline at the other boundary $x=b$ are
+\begin{equation}
+  \begin{split}
+  \frac{d}{dx}\Lambda^p_{N_x-2}(b) =&
+  -\frac{p\,\Lambda^{p-1}_{N_x-1}(b)}{t_{N_x+p-1}-t_{N_x-1}} = -\frac{p}{t_{N_x}-t_{N_x-1}}, \\
+  \frac{d}{dx}\Lambda^p_{N_x-1}(b) =&
+  \frac{p\,\Lambda^{p-1}_{N_x-1}(b)}{t_{N_x+p-1}-t_{N_x-1}} = \frac{p}{t_{N_x}-t_{N_x-1}},
+  \end{split}
+\end{equation}
+where we have used $t_{N_x}=t_{N_x+1}=\ldots=t_{N_x+p}=b$.
+
+\subsubsection{Spline expansion}
+In summary, the approximation of a function $f$ defined
+in the interval $[a,b]$ using a basis (\textbf{Is this obvious?) }of
+splines of degree $p$ associated with the sequence of knots
+$t_i, i=-p,\ldots,N_{x}+p$
+can be written as
+\begin{equation}
+  f(x) = \sum_{j=-p}^{N_x-1}\, c_j\Lambda^p_j(x), \qquad
+  \begin{array}{l}
+    \mbox{support of $\Lambda^p_j$:}\quad [t_{j},t_{j+p+1}],\\
+    t_i \leq x < t_{i+1} \Longrightarrow \Lambda^p_{i-p}(x),\ldots,
+    \Lambda^p_{i}(x) \ge 0.
+  \end{array}
+\end{equation}
+
+Note that the \emph{last} spline in the interval $[t_i,t_{i+1}]$, which can be written as
+\[ \Lambda^p_{i}(x)=w^p_i(x) \Lambda^{p-1}_{i}(x)=\ldots=w^p_i(x) w^{p-1}_i(x) \ldots
+w^{1}_i(x)\Lambda^{0}_{i}(x) \]
+\emph{vanishes at the knot} $x=t_i$. Thus at any position $x$, the sum
+involves $p+1$ terms except at the knots $t_i$ where there are only $p$ terms.
+
+It is sometimes more convenient to renumber the spline index $j$ so that it
+starts from $0$. With this new numbering, the spline expansion becomes
+\begin{equation}
+  f(x) = \sum_{j=0}^{N_x+p-1}\, c_j\Lambda^p_j(x), \qquad
+  \begin{array}{l}
+    \mbox{support of $\Lambda^p_j$:}\quad [t_{j-p},t_{j+1}], \\
+    t_i \leq x < t_{i+1} \Longrightarrow \Lambda^p_{i}(x),\ldots,
+    \Lambda^p_{i+p}(x) \ge 0.
+  \end{array}
+ \label{eq:splExp}
+\end{equation}
+
+In the \emph{periodic} case, there are $N_{x}$ \emph{independent}
+spline coefficients since
+\begin{equation}
+  c_{N_{x}+\nu} = c_{\nu}, \qquad \nu=0,\ldots,p-1. \label{eq:perSp}
+\end{equation}
+
+In the \emph{non-periodic} case,
+the first and the last spline coefficients $c_{0},\,c_{N_x+p-1}$ are
+respectively the values of $f$ at the end points $a$ and $b$.
+
+The basis functions for both non-periodic and periodic cubic splines
+($p=3$) are shown in Fig~.\ref{fig:cubic_splines} where this new numbering is
+used.
+
+
+\begin{figure}[htbp]
+  \centering
+  \includegraphics[angle=0,width=\hsize]{driv1}
+  \caption{The basis of non-periodic and periodic cubic splines. The periodic
+   splines $\Lambda_{10}$, $\Lambda_{11}$, $\Lambda_{12}$ denote the same
+   splines as $\Lambda_{0}$, $\Lambda_{1}$, $\Lambda_{2}$ respectively. }
+  \label{fig:cubic_splines}
+\end{figure}
+
+\subsection{Spline Initialization with \texttt{SET\_SPLINE}}
+The initialization of a spline is performed by calling the routine
+\texttt{SET\_SPLINE}, passing the desired degree $p$ and the
+sequence of grid points (or knots) $t_j, j=0,\ldots,N_x$. If
+Gauss points on each of the intervals $[t_j,t_{j+1}]$ are needed, a non-zero
+value of \texttt{NGAUSS} should be specified. The other input
+argument is the \emph{optional} \texttt{LOGICAL} argument \texttt{PERIOD}
+to define the periodicity of the splines. By default it is \texttt{.FALSE.}.
+The routine returns the 1d spline \texttt{SP} which is of type
+\texttt{TYPE(spline1d)}:
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE set_spline(p, ngauss, grid, sp, period)
+    INTEGER, INTENT(in) :: p, ngauss
+    DOUBLE PRECISION, INTENT(in)  :: grid(:)
+    LOGICAL, OPTIONAL, INTENT(in) :: period
+    TYPE(spline1d), INTENT(out)   :: sp
+    LOGICAL, OPTIONAL, INTENT(in) :: period
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+
+Besides the main characteristics of the spline (degree $p$ of splines,
+number of grid intervals, dimension of splines $N_x+p$, etc.)
+the following quantities will be determined and stored in \texttt{SP}:
+\begin{itemize}
+ \item values and all the $p$ derivatives of the $p+1$ non-vanishing splines
+  on each knots $t_j$. These quantities will be used to speed up computation
+  of the spline expansion (\ref{eq:splExp}).
+  \item integrals of splines \(I_i=\int\Lambda_i(x)\,dx\).
+\end{itemize}
+
+For a 2d spline
+\begin{equation}
+  \Lambda^{p+q}_{ij}(x,y) = \Lambda^p_i(x)\Lambda^q_j(y),
+\end{equation}
+on a 2d structured mesh defined by the grid points
+\texttt{grid1(0:N1), grid2(0:N2)}, the same call as in the 1d case can be
+used, except that the scalars \texttt{p, ngauss, period} become 2 element arrays and
+the output \texttt{SP} is now of type \texttt{TYPE(spline2d)}:
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  INTEGER :: p(2), ngauss(2)
+  LOGICAL, OPTIONAL :: period(2)
+  DOUBLE PRECISION, dimension(:) :: grid1, grid2
+  TYPE(spline2d) :: sp2d
+...
+  CALL set_spline(p, ngauss, grid1, grid2, sp2d, period)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+
+The derived type \texttt{spline2d} is a \emph{wrapper} of 2 \texttt{spline1d}
+objects which can be accessed through \texttt{sp2d\%sp1} and \texttt{sp2d\%sp2}.
+
+Once \texttt{SET\_SPLINE} is called, the routine \texttt{GET\_DIM} can be
+called to inquire the spline's essential characteristics such as dimension,
+number of intervals and degree, for both 1d and 2d splines: \par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE get_dim(sp, dim, nx, nidbas)
+    TYPE(spline1d), INTENT(in) :: sp
+    INTEGER, INTENT(out) :: dim
+    INTEGER, OPTIONAL, INTENT(out) :: nx, nidbas
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+
+Integral of function $\int^b_a\,f(x)dx$ is computed from its spline
+\texttt{sp} and splines coefficients in:
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  DOUBLE PRECISION FUNCTION fintg(sp, c)
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: c(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+
+For a 2d functions, the same function should be called with a 2d spline
+\texttt{sp} and 2d array $c$.
+
+Finally \texttt{DESTROY\_SP(sp)} should be called when a spline
+\texttt{sp} is not needed anymore to clean up memory space.
+
+
+\subsection{Generating Splines with \texttt{DEF\_BASFUN}}
+
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE def_basfun(xp, sp, fun, left)
+    DOUBLE PRECISION, INTENT(in) :: xp
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(out) :: fun(:,:)
+    INTEGER, OPTIONAL, INTENT(out) :: left
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+
+This routine computes, for a given point $\mbox{xp}\in [t_0,t_{N_x}]$, the value
+and optionally the $m$ derivatives of the $p+1$ splines \texttt{sp} which were previously
+defined and returns them in \texttt{fun(1:p+1,1:m+1)} with $m\leq p$. The maximum
+number of computed derivatives $m$ is determined by the size of the second dimension
+of the array \texttt{fun}. The subroutine will return the optional integer
+\texttt{left} defined such that:
+\[
+t_{\mbox{left}} \leq xp < t_{\mbox{left+1}}, \qquad 0\leq \mbox{left} \leq N_{x.-1}.
+\]
+
+\subsection{Example 1: Values and derivatives of all splines}
+In this example, we first initialize a cubic spline with
+the knot sequence $t_0,\ldots,t_{N_x}$ with \texttt{SET\_SPLINE}
+and then call \texttt{DEF\_BASFUN} to compute its values, first and second
+derivatives on the mesh points \texttt{xp(1:npts)}.
+
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  USE BSPLINES
+  INTEGER, PARAMETER :: nx=10, npts=100
+  DOUBLE PRECISION :: t(0:nx), xp(npts)
+  DOUBLE PRECISION, ALLOCATABLE :: fxp0(:,:), fxp1(:,:), fxp2(:,:)
+  DOUBLE PRECISION :: fun(4,3)   ! 4 cubic splines at a given xp
+                                 ! plus first and second derivatives.
+  INTEGER :: i, dim, left
+  TYPE(spline1d) :: sp
+!
+!   Define t(0:nx), xp(npts)
+!
+  CALL set_spline(3, 0, t, sp, period=.FALSE.)
+  CALL get_dim(sp, dim)
+  ALLOCATE(fxp0(npts,0:dim-1), fxp1(npts,0:dim-1), fxp2(npts,0:dim-1)
+  fxp0 = 0.0
+  fxp1 = 0.0
+  fxp2 = 0.0
+  DO i=1,npts
+     CALL def_basfun(xp(i), sp,  fun, left=left)
+     fxp0(i, left:left+3) = fun(1:4, 1)   ! Value
+     fxp1(i, left:left+3) = fun(1:4, 2)   ! 1st derivative
+     fxp2(i, left:left+3) = fun(1:4, 3)   ! 2nd derivative
+  END DO
+  DEALLOCATE(fxp0, fxp1, fxp2)
+  CALL destroy_sp(sp)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+
+This code fragment will store \texttt{dim=nx+3=13} splines
+and theirs first 2 derivatives in \texttt{fxp0}, \texttt{fxp1} and
+\texttt{fxp2}. Change the \texttt{period} to \texttt{.TRUE.} to obtain
+\emph{periodic} splines.
+
+\section{Spline Interpolation}
+Given the interval $[a,b]$ discretized into $\{x_k,\,k=0,\ldots,N_g\}$ with
+$x_0=a$ and $x_{N_g}=b$, the problem of interpolating $f(x), x\in [a,b]$ with
+splines of degree $p$ is to solve the following equations
+for the spline coefficients $c_i$:
+\begin{equation}
+  \sum_{i=0}^{N_x+p-1}\,c_i\Lambda^p_{i}(x_k) = f(x_k),\quad k=0,\ldots,N_g.
+  \label{eq:intEq}
+\end{equation}
+
+The sequence of knots $t_0,\ldots,t_{N_x}$ defines completely the splines
+$\Lambda^p_i$ and its choice will be described in the following section.
+
+\subsection{Choice of knots}
+If  Eqs.~(\ref{eq:intEq}) are the only conditions for our interpolation
+problem,  the number of equations should match the number
+of unknowns $c_i$. The number of knot intervals $N_x$ hence has to verify
+\begin{equation}
+N_x = N_g-p+1.\label{eq:knotNum}
+\end{equation}
+
+For the \emph{periodic} case, taking into account the $p$ periodic spline conditions
+(\ref{eq:perSp}) on
+$c_i$ and $f(a)=f(b)$, this condition reduces to:
+\begin{equation}
+N_x = N_g.
+\end{equation}
+
+For \emph{odd} values of the spline degree $p$, the knots $t_i$ could be placed on
+the \emph{interpolation sites} $x_k$ while when $p$ is even, $t_i$ should not
+be on $x_k$ to avoid a \emph{badly conditioned} linear system when solving
+Eq.~(\ref{eq:intEq}). This leads to the following choice for $t_i$ in
+\texttt{BSPLINES}:
+
+\subsubsection{Periodic splines}
+The number of knots $N_x+1$ is \emph{equal} to the number of interpolation points
+$N_g+1$ with
+\begin{equation}
+  t_i =
+  \begin{cases}
+    x_i & \text{$p$ odd} \\
+    (x_{i-1}+x_i)/2 & \text{$p$ even}
+  \end{cases}
+  ,\qquad i=0,\ldots,N_x
+\end{equation}
+
+\subsubsection{Non-Periodic splines}
+In order to satisfy the equality (\ref{eq:knotNum}), first, the 2 end points
+are retained as knots:
+\begin{equation}
+t_0=x_0, \qquad t_{N_x} = x_{N_g}.
+\end{equation}
+
+For even $p$, the first $p/2$ interpolation intervals are \emph{skipped}:
+\begin{equation}
+t_i = (x_{i+p/2-1} + x_{i+p/2})/2, \qquad i=1,\ldots,N_x-1, \label{eq:evenKnots}
+\end{equation}
+
+while for odd $p$, $(p-1)/2$ interpolation points are \emph{skipped}:
+\begin{equation}
+  t_i = x_{i+(p-1)/2 }, \qquad i=1,\ldots,N_x-1.  \label{eq:oddKnots}
+\end{equation}
+
+Instead of skipping grid points, an alternative would be to supplement the
+system of equations (\ref{eq:intEq}) with conditions on derivatives of $f(x)$
+at one or both ends of $[a,b]$. This type of boundary conditions is not
+implemented in the present version of the \texttt{BSPLINES} module.
+
+\subsection{The collocation matrix}
+The \emph{collocation matrix} $\Lambda^p_i(x_k)$ of the interpolation problem
+(\ref{eq:intEq}) is a square matrix.  Each row has at most $p+1$ non-zero
+terms. Let us consider separately the non-periodic and the periodic cases.
+
+\subsubsection{The non-periodic case}
+\paragraph{Even spline degree}
+
+From (\ref{eq:evenKnots}), there are $p/2+1$ interpolation points \(x_{0}, \ldots, x_{p/2}\)
+in the first knot interval $[t_0,t_1)$. Since there are at most $p+1$ non-zero
+splines for any points in each interval (except for $x_0$ where $\Lambda_i(x_0)=\delta_{i,0}$,
+the collocation matrix starts as:
+
+\begin{equation}
+  \left(\begin{array}{llllll}
+   \Lambda_0(x_{0})  & 0                  &\cdots &  \cdots            & \cdots              &\cdots \\
+   \Lambda_0(x_{1})  &\Lambda_1(x_{1})    &\cdots &\Lambda_p(x_{1})    & 0                   &\cdots \\
+   \vdots            &\vdots              &\cdots &\vdots              & 0                   &\cdots \\
+   \Lambda_0(x_{p/2})&\Lambda_1(x_{p/2})  &\cdots &\Lambda_p(x_{p/2})  & 0                   &\cdots \\
+   0                 &\Lambda_1(x_{p/2+1})&\cdots &\Lambda_p(x_{p/2+1})& \Lambda_{p+1}(x_{p/2+1})&0 \\
+   0                 &\ddots              &\ddots &\ddots              & \ddots              &\ddots
+  \end{array}\right)
+\end{equation}
+
+The number of \emph{upper-diagonals} (non including  the diagonal) is
+obviously determined by the second row of the matrix above, which yields $p-1$.
+Since the knot placement is identical for both ends of the interpolation mesh,
+the matrix
+$\Lambda_i(x_k)$ is \emph{banded} with half-bandwidths
+\begin{equation}
+kl=ku=p-1
+\end{equation}
+
+\paragraph{Odd spline degree}
+Applying the same procedure, it is straightforward to show for $p$ odd and
+from (\ref{eq:oddKnots}), that $x_0,\ldots.x_{(p-1)/2}$ are located in the
+first knot interval $[t_0,t_1)$ and that the matrix has again the same
+half-bandwidths as in the even $p$ case.
+
+The  resulting interpolation problem can then be solved with
+the usual \emph{banded matrix factorization} followed by a \emph{back-solve}
+phase.
+
+\subsubsection{The periodic case}
+Let consider the matrix for $p=3$ and $N_x=10$ (see lower figure of
+Fig.~(\ref{fig:cubic_splines}):
+\begin{equation}
+  \left(\begin{array}{llllll}
+  \Lambda_0(x_{0}) & \Lambda_1(x_{0}) & \Lambda_2(x_{0}) & 0                & \cdots           &      \\
+  0                & \Lambda_1(x_{1}) & \Lambda_2(x_{1}) & \Lambda_3(x_{1}) & 0                &\cdots \\
+  \vdots           & \ddots           & \ddots           & \ddots           & \ddots           &\ddots \\
+  0                & \cdots           & 0                & \Lambda_7(x_{7}) & \Lambda_8(x_{7}) & \Lambda_9(x_{7}) \\
+  \Lambda_0(x_{8}) & 0                & 0                & \cdots           &\Lambda_8(x_{8})  & \Lambda_9(x_{8}) \\
+  \Lambda_0(x_{9}) &\Lambda_1(x_{9})  & 0                & 0                & \cdots           & \Lambda_9(x_{9})
+  \end{array}\right) \label{eq:perMat}
+\end{equation}
+The matrix is ``almost triangular'' (except for the last 2 rows) and is not
+\emph{diagonally dominant}! A more satisfactory (and symmetric in shape) matrix is
+however obtained by simply renumbering the splines such that the sequence
+starts with $-\lfloor p/2 \rfloor$ instead of $0$. This renumbered splines
+are  shown in Fig.~\ref{fig:fitSpl} for the cubic
+and quadratic periodic splines. With this renumbering, the matrix
+(\ref{eq:perMat})  has a more symmetric shape and is diagonally dominant:
+\begin{equation}
+  \left(\begin{array}{lllll}
+  \Lambda_0(x_{0}) & \Lambda_1(x_{0}) & 0                & \cdots           & \Lambda_9(x_{0}) \\
+  \Lambda_0(x_{1}) & \Lambda_1(x_{1}) & \Lambda_2(x_{1}) & 0                &\cdots \\
+  \vdots           & \ddots           & \ddots           & \ddots           &\ddots \\
+  0                & \cdots           & \Lambda_7(x_{8}) & \Lambda_8(x_{8}) & \Lambda_9(x_{8}) \\
+  \Lambda_0(x_{9}) & 0                & 0                & \Lambda_8(x_{9}) & \Lambda_9(x_{9})
+  \end{array}\right) \label{eq:perMatnew}
+\end{equation}
+
+In general, for arbitrary $p$ (even and odd values), the collocation matrix
+$A=\Lambda_j(x_i)$ can be written as
+
+\begin{equation}
+  A = B + UV^T
+\end{equation}
+
+where $B$ is a banded matrix with half-bandwidths $kl=ku=b=\lfloor p/2\rfloor$ and
+rank $N_x$. $U$ and $V$ are $N_x\times 2b$ sparse matrices:
+
+\begin{equation}
+  U = \left(
+  \begin{matrix}
+    I & 0 \\
+    0 & 0 \\
+    0 & I
+  \end{matrix}\right), \qquad
+  V = \left(
+  \begin{matrix}
+    0 & D^T \\
+    0 & 0 \\
+    C^T & 0
+  \end{matrix}\right), \qquad
+  V^T = \left(
+  \begin{matrix}
+    0   & 0 & C \\
+    D & 0 & 0
+  \end{matrix}\right), \qquad
+\end{equation}
+where $C$, $D$ are the $b\times b$ \emph{off-band} sub-matrices and $I$, the
+identity matrix. In the cubic spline example considered above, the
+\emph{off-band} matrices are simply $1\times 1$ matrices with
+$C=\Lambda_9(x_0)$ and $D=\Lambda_0(x_9)$.
+
+The inverse of $A$ can be deduced from the \emph{Sherman-Morrison-Woodbury formula} \cite{Golub}:
+
+\begin{eqnarray*}
+  A^{-1} &=& B^{-1} - B^{-1}U(1+V^{T}B^{-1}U)^{-1}V^{T}B^{-1} \\
+         &=& B^{-1} - ZW^{T}B^{-1},
+\end{eqnarray*}
+where
+\begin{eqnarray*}
+  Z    &=&  B^{-1}U, \\
+  H    &=& 1+V^{T}B^{-1}U \\
+  W^T  &=& H^{-1}V^{T}.
+\end{eqnarray*}
+
+The solution of the interpolation problem $Ax=f$ can then be reduced to a
+\emph{factorization} and a \emph{back-solve} phase:
+
+\begin{enumerate}
+\item Factorization
+  \begin{enumerate}
+  \item Factor: \( B \longleftarrow L_BU_B \)
+  \item Solve:  \( (L_BU_B)Z = U, \quad U\longleftarrow Z \)
+  \item Compute: \( H = 1+V^{T}Z \)
+  \item Factor:  \( H=L_HU_H \)
+  \item Solve:   \( (L_HU_H)W^{T} = V^{T}, \quad V^{T}\longleftarrow W^{T} \)
+  \end{enumerate}
+\item Back-solve
+  \begin{enumerate}
+  \item Solve: \( (L_BU_B)y = f \)
+  \item Compute: \( t = W^{T}y \)
+  \item Compute: \( x = y - Zt \)
+  \end{enumerate}
+\end{enumerate}
+
+At the end of the factorization, only the (updated) matrices $B$, $U$ and
+$V^{T}$, required in the back-solve phase, need to be saved.
+Note that we avoid to store the product
+$ZW^T$ because it is a \emph{big} $N_x\times N_x$ matrix.
+
+After the \emph{back-solve} step, the solution $x$ is \emph{shifted back} (by
+$\lfloor p/2\rfloor$) and the appropriate periodicity condition is applied to
+obtain the spline coefficients $c_j,\, j=0,\ldots,N_x+p-1$, as defined in
+(\ref{eq:splExp}).
+
+\begin{figure}[htbp]
+  \centering
+  \includegraphics[angle=0,width=\hsize]{fit}
+  \caption{The periodic cubic and quadratic splines used
+   for interpolation. The spline knots are indicated by \emph{blue full circles} and
+   the interpolation points, by \emph{dashed vertical lines} }
+  \label{fig:fitSpl}
+\end{figure}
+
+\subsection{\texttt{PP} representation}
+The computation of $f(x)$ using directly the spline expansion Eq.~(\ref{eq:splExp}) can
+be costly, because of the evaluation of the splines $\Lambda^p_j(x)$,
+especially when interpolating on large number of points. Expanding $f(x)$,
+using truncated Taylor series in each interval $[t_\mu,t_{\mu+1}]$, we obtain
+the following \emph{Piecewise Polynomial Function} representation (or
+\texttt{ppform}) of $f(x)$:
+\begin{equation}
+  f(x) = \sum^p_{k=0}\, \Pi_{k\mu}(x-t_\mu)^k, \quad t_\mu\leq x<t_{\mu+1},
+  \label{eq:ppRep}
+\end{equation}
+
+where
+
+\begin{equation}
+  \Pi_{k\mu} = \frac{1}{k!} \frac{d^k}{dx^k} f(t_\mu)
+= \frac{1}{k!} \sum_j\,c_j\frac{d^k}{dx^k} \Lambda_j(t_\mu)
+= \sum_j\,c_j\alpha_{j\mu k}.
+\label{eq:ppCoef}
+\end{equation}
+
+Note that
+\begin{equation}
+  \alpha_{j\mu k} = \frac{1}{k!} \frac{d^k}{dx^k} \Lambda_j(t_\mu)
+\label{eq:derSpl}
+\end{equation}
+
+depend only on the spline specifications. They are \emph{pre-calculated}
+in the spline setup routine \texttt{SET\_SPLINE} and stored in the
+3d arrays \texttt{sp\%val0}. The \texttt{PP} coefficients
+$\Pi_{k\mu}$ can be computed once the spline coefficients $c_j$ are available,
+using (\ref{eq:ppCoef}).
+Then the interpolated function values together with the $p$ derivatives
+can be calculated \emph{efficiently} using the power series.
+
+\begin{eqnarray*}
+   f(x) &=& \sum^p_{k=0}\, \Pi_{k\mu}(x-t_\mu)^k \\
+   f'(x) &=& \sum^p_{k=1}\, k\,\Pi_{k\mu}(x-t_\mu)^{k-1} \\
+   f''(x) &=& \sum^p_{k=2}\, k(k-1)\,\Pi_{k\mu}(x-t_\mu)^{k-2} \\
+    \vdots
+\end{eqnarray*}
+
+These 2 steps are performed in \texttt{GRIDVAL}. Note that the first step
+(computation of $\Pi_{k\mu}$ from $c_j$) can be skipped for subsequent calls
+to \texttt{GRIDVAL} with the same function $f$, for example to compute $f$ or
+its derivatives at any others points $x$.
+
+\subsection{Example 2: Cubic spline interpolation}
+Given a function $f$ with its grid values \texttt{f(1:nx)}, the
+following example determines the spline coefficients \texttt{c(1:dim)}. Using these
+coefficients, the interpolated $f$ and derivative $f'$ are then computed on the mesh points
+\texttt{xp(1:npts)}. Note that the second call to \texttt{GRIDVAL} does not
+include the spline coefficients \texttt{c} to signal that the previously calculated
+\texttt{PP} coefficients will be reused.
+
+
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  USE BSPLINES
+  INTEGER, PARAMETER :: nx=10, npts=100
+  DOUBLE PRECISION :: x(0:nx), f(0:nx), xp(npts), fp0(npts), fp1(npts)
+  DOUBLE PRECISION, ALLOCATABLE :: c(:)
+  INTEGER :: dim
+  TYPE(spline1d) :: sp
+!
+!   Define x and f
+!
+  CALL set_splcoef(3, x, sp, period=.FALSE.)
+  CALL get_dim(sp, dim)
+  ALLOCATE(c(dim))
+  CALL get_splcoef(sp, f, c)   ! compute spline coefs c(1:dim)
+!
+!   Compute interpolated f and f' on xp(npts)
+!
+  CALL gridval(sp, xp, fp0, 0, c)
+  CALL gridval(sp, xp, fp1, 1)
+!
+  DEALLOCATE(c)
+  CALL destroy_sp(sp)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+
+The description of each of the routines called in the example above is briefly
+given below:
+
+\begin{description}
+\item[\texttt{SET\_SPLCOEF}] Determines the spline knots, sets up the splines
+  with \texttt{SET\_SPLINE}, assembles the collocation matrix
+  and performs its \emph{factorization}.
+\item[\texttt{GET\_SPLCOEF}] Computes the spline \emph{coefficients} $c$ from the input
+  grid values of function $f$ (\emph{back-solve phase}), using the factorized matrix.
+\item[\texttt{GRIDVAL}] Compute the \texttt{PP} coefficients using
+  (\ref{eq:ppCoef}) \emph{if $c$ is provided}, locates the interval containing
+  the given point $x$ and computes interpolated function values or derivatives
+  using the \texttt{PP} representation (\ref{eq:ppCoef}).
+\end{description}
+
+
+\subsection{2d interpolation}
+Consider the spline interpolation on the plane $(x,y)$, using a tensor
+product of splines defined as follow
+\begin{equation}
+ \Lambda^{p, q}_{ij}(x,y)=\Lambda^p_i(x)\Lambda^q_j(y), \qquad
+ \begin{aligned}[c]
+  i&=1,\ldots,d_1=N_1+p,\\
+  j&=1,\ldots,d_2=N_2+q,
+ \end{aligned}
+\end{equation}
+
+where $(p,q)$ are the spline degrees , $(N_1,N_2)$, the number of knot
+intervals in each direction:
+
+\[ t_0 \leq x < t_{N_1}, \quad s_0 \leq y < s_{N_2}.\]
+
+\subsubsection{Spline coefficients (\texttt{GET\_SPLCOEF})}
+
+The 2d version of (\ref{eq:intEq}) can be written as:
+
+\begin{equation}
+  \begin{split}
+  \sum_{ij}\,c_{ij}\Lambda^p_{i}(x_\mu)\Lambda^q_{j}(y_\nu) &= f(x_\mu,y_\nu)\\
+    &=f_{\mu\nu}\\
+  \end{split}
+\qquad
+ \begin{aligned}[c]
+  \mu&=0,\ldots,N_{g1}\\
+  \nu&=0,\ldots,N_{g2}
+ \end{aligned}
+\end{equation}
+
+where $(x_\mu,y_\nu)$ are the \emph{interpolation sites} on the $(x,y)$ plane.
+These equations can be rearranged into
+\begin{eqnarray}
+  \sum_i\,\bar{c}_{i\nu}\Lambda_i(x_\mu) &=& f_{\mu\nu},\\
+  \sum_j\,c_{ij}\Lambda_j(y_\nu)         &=& \bar{c}_{i\nu}.
+\end{eqnarray}
+
+Such a 2 step procedure is implemented, using the 1d version
+of \texttt{GET\_SPLCOEF} in the 2d version of
+\texttt{GET\_SPLCOEF} by the following code fragment:
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+    TYPE(spline2d) :: sp
+    DOUBLE PRECISION :: ctr(SIZE(c,2), SIZE(c,1))
+    CALL get_splcoefn(sp%sp1, f, c)
+    CALL get_splcoefn(sp%sp2, TRANSPOSE(c), ctr)
+    c = TRANSPOSE(ctr)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+
+\subsubsection{\texttt{PP} representation (\texttt{GRIDVAL})}
+Let us start with the \emph{spline representation}, for $f(x,y)$ with
+$t_\mu\leq x < t_{\mu+1}$ and $s_\nu \leq y < s_{\nu+1}$:
+\begin{equation}
+  f(x,y) = \sum_{j=1}^{d_2}\left( \sum_{i=1}^{d_1}\,c_{ij}\Lambda^p_i(x)\right)\Lambda^q_j(y).
+\end{equation}
+
+Applying successively (\ref{eq:ppRep}) to the $x$ and $y$ functional dependency
+yields the following \texttt{PP} representation for $f(x,y)$:
+
+\begin{equation}
+  f(x,y) = \sum^q_{k'=0}\left(\sum^p_{k=0}\,\Pi_{k\mu k'\nu}(x-t_\mu)^k\right)(y-s_\nu)^{k'}
+  \label{eq:pp2Rep}
+\end{equation}
+
+The 2d \texttt{PP} coefficient is the \emph{tensor product} of 2 1d
+\texttt{PP} coefficients:
+
+\begin{equation}
+  \Pi_{k\mu k'\nu} = \sum_{l'=1}^{d_2}\left(\sum_{l=1}^{d_1}\,c_{ll'}\,\alpha_{l\mu k}\right)\alpha'_{l'\nu k'}.
+\label{eq:pp2Coef}
+\end{equation}
+
+where $\alpha$ and $\alpha'$ are respectively the derivatives (\ref{eq:derSpl}) of all splines
+in $x$ and $y$ direction. All the derivatives of $f$ can be deduced straightforwardly
+from the \texttt{PP} representation (\ref{eq:pp2Rep}).
+
+In the present version, the 2d \texttt{GRIDVAL} can be called, either for (1) points
+on a 2d structured mesh: \texttt{XP(1:NPX), YP(1:NPY)} and returns the array
+\texttt{FP(1:NPX,1:NPY)}, or (2) with a 1d sequence of points
+\texttt{XP(1:NP), YP(1:NP)} and returns the 1d array \texttt{FP(1:NP)}.
+
+
+\section{Finite Elements using Splines}
+\subsection{The weak form}
+\subsection{The matrix assembly}
+\subsection{The boundary conditions}
+\subsubsection{Dirichlet condition}
+Dirichlet BC can be simply applied by imposing the conditions on the
+spline coefficients and the boundary point.
+For the BC $u(x=0)=u_{1} = c$ for example,
+the discrete equations can be expressed as:
+
+\begin{equation}
+     \left(\begin{matrix}
+       1 & 0 & \cdots \\
+       A_{21} & A_{22} & \cdots \\
+       \vdots & \vdots & \ddots \\
+     \end{matrix}\right)
+     \left(\begin{matrix}
+       u_{1} \\
+       u_{2} \\
+       \vdots\\
+       u_{N} \\
+     \end{matrix}\right) =
+     \left(\begin{matrix}
+       c \\
+       f_{2} \\
+       \vdots\\
+       f_{N} \\
+     \end{matrix}\right)
+\end{equation}
+A more appropriate transformed system which
+preserves any symmetry of the original system is:
+
+\begin{equation}
+     \left(\begin{matrix}
+       1 & 0 & \cdots \\
+       0 & A_{22} & \cdots \\
+       \vdots & \vdots & \ddots \\
+     \end{matrix}\right)
+     \left(\begin{matrix}
+       u_{1} \\
+       u_{2} \\
+       \vdots\\
+       u_{N} \\
+    \end{matrix}\right) =
+     \left(\begin{matrix}
+       c \\
+       f_{2} -cA_{21}\\
+       \vdots\\
+       f_{N} -cA_{N1}\\
+     \end{matrix}\right)
+\end{equation}
+
+
+\subsubsection{Unicity on the axis}
+Denoting the $N$ solutions at the axis by $(u_1, \ldots, u_N)$  , and
+their transforms by $(\hat u_1, \ldots, \hat u_N)$ defined by
+
+\begin{equation} \begin{array}{ccc}
+     u_1-u_N = \hat u_1  &  &   u_1 = \hat u_1 + \hat u_N   \\
+     u_2-u_N = \hat u_2  &  &   u_2 = \hat u_2 + \hat u_N   \\
+     \vdots        & \Longrightarrow & \vdots               \\
+     u_{N-1}-u_N = \hat u_{N-1}  &  &   u_{N-1} = \hat u_{1-1} + \hat u_N   \\
+     u_N = \hat u_N      &  &   u_N = \hat u_N,
+    \end{array}  \label{eq:unicity1} \end{equation}
+the unicity condition can be specified by simply imposing
+
+\begin{equation}
+  \hat u_1=\hat u_2=\ldots=\hat u_{N-1}=0. \label{eq:unicity2}
+\end{equation}
+From (\ref{eq:unicity1}), the \emph{transformation matrix} \(\mathbf U\) is defined
+as
+
+\begin{equation}
+  \mathbf{u} = \mathbf{ U \cdot\hat u}, \qquad \mathbf{U} =
+     \left(\begin{matrix}
+        1 & 0 & \dots & 0 & 1 \\
+        0 & 1 & \dots & 0 & 1 \\
+          &   & \ddots&   & \vdots \\
+        0 & 0 & \dots & 1 & 1 \\
+        0 & 0 & \dots & 0 & 1
+     \end{matrix}\right), \quad \mathbf{U^{T}} =
+     \left(\begin{matrix}
+        1 & 0 & \dots & 0 & 0 \\
+        0 & 1 & \dots & 0 & 0 \\
+          &   & \ddots&   & \vdots \\
+        0 & 0 & \dots & 1 & 0 \\
+        1 & 1 & \dots & 1 & 1
+     \end{matrix}\right).
+\end{equation}
+
+
+\paragraph{Matrix product \( \mathbf{A\cdot U}\)}
+\begin{equation}
+\mathbf{ A\cdot U} =
+     \left(\begin{array}{lllll}
+        A_{1,1} & A_{1,2}  & \dots & A_{1,N-1} &  \sum_{j} A_{1,j}  \\
+        A_{2,1} & A_{2,2}  & \dots & A_{2,N-1} &  \sum_{j} A_{2,j}  \\
+          &   & \ddots&   & \vdots \\
+        A_{N-1,1} & A_{N-1,2}  & \dots & A_{N-1,N-1} &  \sum_{j}A_{N-1,j}  \\
+        A_{N,1} & A_{N,2}  & \dots & A_{N,N-1} &  \sum_{j}A_{N,j}
+     \end{array}\right).
+\end{equation}
+Thus \emph{right multiply by \(\mathbf{U}\)} is equivalent to put the
+\emph{the sum of each row on the last column}.
+
+\paragraph{Matrix product \( \mathbf{ U^T \cdot A}\)}
+\begin{equation}
+\mathbf{ U^T \cdot A} =
+     \left(\begin{array}{lllll}
+        A_{1,1} & A_{1,2}  & \dots & A_{1,N-1} &  A_{1,N}  \\
+        A_{2,1} & A_{2,2}  & \dots & A_{2,N-1} &  A_{2,N}  \\
+          &   & \ddots&   & \vdots \\
+        A_{N-1,1} & A_{N-1,2}  & \dots & A_{N-1,N-1} & A_{N-1,N}  \\
+        \sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} &
+        \sum_{i}A_{i,N}
+     \end{array}\right).
+\end{equation}
+Thus \emph{left multiply by \(\mathbf{\hat U}\)} is equivalent to put the
+\emph{the sum of each column  on the last row}.
+
+\paragraph{Product \( \mathbf{\hat U \cdot b}\)}
+\begin{equation}
+\mathbf{\hat b} = \mathbf{U^T\cdot b} =
+     \left(\begin{array}{l}
+        b_1 \\
+        b_2 \\
+        \vdots \\
+        b_{N-1} \\
+        \sum_{i} b_{i}
+     \end{array}\right),
+\end{equation}
+
+\paragraph{Transformation of the original matrix equation}
+The full original linear system, obtained from the discretization of the
+2D \(r,\theta\) polar coordinates can be written as:
+
+\begin{equation}
+     \left(\begin{array}{ll}
+       \mathbf{A} & \mathbf{B} \\
+       \mathbf{C} & \mathbf{D}
+     \end{array}\right)
+     \left(\begin{array}{l}
+       \mathbf{u} \\
+       \mathbf{v}
+     \end{array}\right) =
+     \left(\begin{array}{l}
+       \mathbf{b} \\
+       \mathbf{c}
+     \end{array}\right), \label{eq:orig_matrix_eq}
+\end{equation}
+where the solution array is split into the solutions \(\mathbf{u}\) at \(r=0\) and
+the solutions \(\mathbf{v}\) on the remaining domain. The transformed system can
+thus be written as
+
+\begin{equation*}
+     \left(\begin{array}{ll}
+       \mathbf{U^T} & 0 \\
+       0            & \mathbf{I}
+     \end{array}\right)
+     \left(\begin{array}{ll}
+       \mathbf{A} & \mathbf{B} \\
+       \mathbf{C} & \mathbf{D}
+     \end{array}\right)
+     \left(\begin{array}{ll}
+       \mathbf{U} & 0 \\
+       0            & \mathbf{I}
+     \end{array}\right)
+     \left(\begin{array}{l}
+       \mathbf{\hat u} \\
+       \mathbf{v}
+     \end{array}\right) =
+     \left(\begin{array}{ll}
+       \mathbf{U^T} &0  \\
+       0            & \mathbf{I}
+     \end{array}\right)
+     \left(\begin{array}{l}
+       \mathbf{b} \\
+       \mathbf{c}
+     \end{array}\right),
+\end{equation*}
+
+\begin{equation}
+   \Longrightarrow
+     \left(\begin{array}{cc}
+       \mathbf{U^TAU} & \mathbf{U^TB} \\
+       \mathbf{CU} & \mathbf{D}
+     \end{array}\right)
+     \left(\begin{array}{l}
+       \mathbf{\hat u} \\
+       \mathbf{v}
+     \end{array}\right) =
+     \left(\begin{array}{c}
+       \mathbf{U^Tb} \\
+       \mathbf{c}
+     \end{array}\right),
+\end{equation}
+Notice that the transformation preserves any symmetry existing in the original system
+(\ref{eq:orig_matrix_eq}). The transformed matrix is finally given in the following where
+only the modified elements are shown and the sum is only over the first \(N\)
+points in \(\theta\) direction. The \(\times\) symbol denotes unmodified elements.
+
+\begin{equation}
+     \left(\begin{array}{lllllll}
+        \times & \times & \times & \times &  \sum_{j} A_{1,j}  & \times & \times \\
+        \times & \times & \times & \times &  \sum_{j} A_{2,j}  & \times & \times \\
+        \times & \times & \times & \times &  \vdots            & \times & \times \\
+        \times & \times & \times & \times & \sum_{j} A_{N-1,j} & \times & \times \\
+         \sum_{i}A_{i,1} & \sum_{i}A_{i,2}  & \dots & \sum_{i}A_{i,N-1} &
+         \sum_{i,j}A_{i,j} &  \sum_{i}A_{i,N+1} & \dots \\
+        \times & \times & \times & \times & \sum_{j} A_{N+1,j} & \times & \times \\
+        \times & \times & \times & \times &  \vdots            & \times & \times
+     \end{array}\right)
+\end{equation}
+Only the \(N^{th}\) column and the  \(N^{th}\) row are affected by the transformation.
+Applying now the unicity condition (\ref{eq:unicity2}) the final transformed system
+reads:
+
+\begin{equation}
+     \left(\begin{array}{lllllll}
+        1 & 0 & \dots & 0 & 0 & 0 & 0 \\
+        0 & 1 & \dots  & 0 & 0 & 0 & 0 \\
+        0 & 0 & \ddots & 0 &  \vdots            & 0 & 0 \\
+        0 & 0 & \dots  & 1 & 0 & 0 & 0 \\
+        0 & 0 & \dots  & 0 & \sum_{i,j}A_{i,j} &  \sum_{i}A_{i,N+1} & \dots \\
+        0 & 0 & \dots  & 0 & \sum_{j} A_{N+1,j} & \times & \times \\
+        0 & 0 & \dots  & 0 &  \vdots            & \times & \times
+     \end{array}\right)
+     \left(\begin{array}{l}
+        \hat u_1   \\
+        \hat u_2   \\
+        \vdots\\
+        \hat u_{N-1}\\
+        \hat u_{N} \\
+        u_{N+1} \\
+        \vdots
+     \end{array}\right) =
+     \left(\begin{array}{l}
+        0   \\
+        0   \\
+        \vdots\\
+        0   \\
+        \sum_{i} b_{i} \\
+        b_{N+1} \\
+        \vdots
+     \end{array}\right).
+\end{equation}
+
+
+\begin{thebibliography}{99}
+\bibitem{deBoor} C. de Boor, \emph{A Practical Guide to Splines}, Applied
+  Mathematical Sciences, vol.~27 (Springer, NY, 2001).
+\bibitem{Golub} G.H. Golub, C.F. Van Loan, \emph{Matrix Computation, 3rd
+  Edition}, p.5 (The John Hopkins University Press, 1996).
+\end{thebibliography}
+
+\end{document}
diff --git a/docs/manual/driv1.eps b/docs/manual/driv1.eps
new file mode 100644
index 0000000..9d6976e
--- /dev/null
+++ b/docs/manual/driv1.eps
@@ -0,0 +1,1095 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The Mathworks, Inc.
+%%Title: /home/ttran/Utils/bsplines/docs/driv1.eps
+%%CreationDate: 07/16/2007  08:16:38
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:   -74   171   686   620
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:   -74   171   686   620
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode -0888 7440 csm
+
+    0     0  9128  5386 rc
+90 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 9129 5387 rf
+6 w
+0 1837 7074 0 0 -1837 1187 2241 4 MP
+PP
+-7074 0 0 1837 7074 0 0 -1837 1187 2241 5 MP stroke
+4 w
+DO
+SO
+6 w
+0 sg
+1187 2241 mt 8261 2241 L
+1187  404 mt 8261  404 L
+1187 2241 mt 1187  404 L
+8261 2241 mt 8261  404 L
+1187 2241 mt 8261 2241 L
+1187 2241 mt 1187  404 L
+1187 2241 mt 1187 2170 L
+1187  404 mt 1187  474 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+1154 2386 mt 
+(0) s
+1894 2241 mt 1894 2170 L
+1894  404 mt 1894  474 L
+1861 2386 mt 
+(1) s
+2601 2241 mt 2601 2170 L
+2601  404 mt 2601  474 L
+2568 2386 mt 
+(2) s
+3309 2241 mt 3309 2170 L
+3309  404 mt 3309  474 L
+3276 2386 mt 
+(3) s
+4016 2241 mt 4016 2170 L
+4016  404 mt 4016  474 L
+3983 2386 mt 
+(4) s
+4724 2241 mt 4724 2170 L
+4724  404 mt 4724  474 L
+4691 2386 mt 
+(5) s
+5431 2241 mt 5431 2170 L
+5431  404 mt 5431  474 L
+5398 2386 mt 
+(6) s
+6138 2241 mt 6138 2170 L
+6138  404 mt 6138  474 L
+6105 2386 mt 
+(7) s
+6846 2241 mt 6846 2170 L
+6846  404 mt 6846  474 L
+6813 2386 mt 
+(8) s
+7553 2241 mt 7553 2170 L
+7553  404 mt 7553  474 L
+7520 2386 mt 
+(9) s
+8261 2241 mt 8261 2170 L
+8261  404 mt 8261  474 L
+8195 2386 mt 
+(10) s
+1187 2241 mt 1257 2241 L
+8261 2241 mt 8190 2241 L
+1086 2285 mt 
+(0) s
+1187 1873 mt 1257 1873 L
+8261 1873 mt 8190 1873 L
+ 986 1917 mt 
+(0.2) s
+1187 1506 mt 1257 1506 L
+8261 1506 mt 8190 1506 L
+ 986 1550 mt 
+(0.4) s
+1187 1138 mt 1257 1138 L
+8261 1138 mt 8190 1138 L
+ 986 1182 mt 
+(0.6) s
+1187  771 mt 1257  771 L
+8261  771 mt 8190  771 L
+ 986  815 mt 
+(0.8) s
+1187  404 mt 1257  404 L
+8261  404 mt 8190  404 L
+1086  448 mt 
+(1) s
+1187 2241 mt 8261 2241 L
+1187  404 mt 8261  404 L
+1187 2241 mt 1187  404 L
+8261 2241 mt 8261  404 L
+gs 1187 404 7075 1838 rc
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 71 0 
+72 0 71 2 72 11 71 34 72 66 71 110 72 166 71 233 
+72 312 71 401 71 502 1187 404 100 MP stroke
+/c9 { 0.000000 1.000000 0.000000 sr} bdef
+c9
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 71 0 72 0 71 0 72 1 
+71 2 72 8 71 15 72 27 71 40 72 56 71 76 71 98 
+72 123 71 148 72 158 71 149 72 119 71 69 72 1 71 -89 
+72 -198 71 -327 71 -476 1187 2241 100 MP stroke
+/c10 { 1.000000 0.000000 0.000000 sr} bdef
+c10
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 71 0 72 0 71 0 72 0 71 1 72 1 71 5 
+72 9 71 17 72 26 71 36 71 50 72 63 71 80 72 98 
+71 112 72 118 71 118 72 112 71 99 72 79 71 52 71 20 
+72 -20 71 -63 72 -102 71 -128 72 -145 71 -152 72 -147 71 -132 
+72 -108 71 -72 71 -27 1187 2241 100 MP stroke
+/c11 { 0.000000 1.000000 1.000000 sr} bdef
+c11
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 71 0 72 0 
+71 0 72 0 71 0 72 1 71 1 72 4 71 9 72 16 
+71 25 71 35 72 48 71 62 72 79 71 96 72 111 71 120 
+72 123 71 121 72 114 71 99 71 81 72 55 71 24 72 -10 
+71 -44 72 -71 71 -93 72 -108 71 -119 72 -123 71 -122 71 -115 
+72 -103 71 -85 72 -69 71 -53 72 -40 71 -29 72 -19 71 -12 
+72 -6 71 -2 71 -1 1187 2241 100 MP stroke
+/c12 { 1.000000 0.000000 1.000000 sr} bdef
+c12
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 1 72 1 71 3 72 9 71 15 71 24 72 34 
+71 47 72 60 71 77 72 95 71 109 72 120 71 123 72 122 
+71 114 71 101 72 83 71 58 72 28 71 -8 72 -40 71 -69 
+72 -90 71 -108 72 -117 71 -123 71 -123 72 -116 71 -104 72 -88 
+71 -70 72 -55 71 -41 72 -30 71 -20 72 -12 71 -7 71 -2 
+72 0 71 -1 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 1187 2241 100 MP stroke
+/c13 { 1.000000 1.000000 0.000000 sr} bdef
+c13
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 1 
+71 0 72 4 71 8 71 14 72 23 71 33 72 45 71 60 
+72 75 71 92 72 109 71 118 72 124 71 122 71 115 72 103 
+71 84 72 61 71 31 72 -3 71 -38 72 -66 71 -89 72 -105 
+71 -117 71 -123 72 -123 71 -117 72 -106 71 -89 72 -72 71 -56 
+72 -42 71 -31 72 -21 71 -13 71 -7 72 -3 71 0 72 -1 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 1187 2241 100 MP stroke
+0 sg
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 1 72 0 71 3 
+71 8 72 14 71 21 72 32 71 44 72 58 71 73 72 91 
+71 107 72 118 71 123 71 123 72 116 71 104 72 87 71 63 
+72 35 71 0 72 -35 71 -63 72 -87 71 -104 71 -116 72 -123 
+71 -123 72 -118 71 -107 72 -91 71 -73 72 -58 71 -44 72 -32 
+71 -21 71 -14 72 -8 71 -3 72 0 71 -1 72 0 71 0 
+72 0 71 0 72 0 71 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 1187 2241 100 MP stroke
+c8
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 1 71 0 71 3 72 7 71 13 
+72 21 71 31 72 42 71 56 72 72 71 89 72 106 71 117 
+71 123 72 123 71 117 72 105 71 89 72 66 71 38 72 3 
+71 -31 72 -61 71 -84 71 -103 72 -115 71 -122 72 -124 71 -118 
+72 -109 71 -92 72 -75 71 -60 72 -45 71 -33 71 -23 72 -14 
+71 -8 72 -4 71 0 72 -1 71 0 72 0 71 0 72 0 
+71 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 1187 2241 100 MP stroke
+c9
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 1 71 0 72 2 71 7 72 12 71 20 72 30 
+71 41 72 55 71 70 72 88 71 104 71 116 72 123 71 123 
+72 117 71 108 72 90 71 69 72 40 71 8 72 -28 71 -58 
+71 -83 72 -101 71 -114 72 -122 71 -123 72 -120 71 -109 72 -95 
+71 -77 72 -60 71 -47 71 -34 72 -24 71 -15 72 -9 71 -3 
+72 -1 71 -1 72 0 71 0 72 0 71 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 1187 2241 100 MP stroke
+c10
+72 1 71 2 72 6 71 12 72 19 71 29 72 40 71 53 
+72 69 71 85 71 103 72 115 71 122 72 123 71 119 72 108 
+71 93 72 71 71 44 72 10 71 -24 71 -55 72 -81 71 -99 
+72 -114 71 -121 72 -123 71 -120 72 -111 71 -96 72 -79 71 -62 
+71 -48 72 -35 71 -25 72 -16 71 -9 72 -4 71 -1 72 -1 
+71 0 72 0 71 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 1187 2241 100 MP stroke
+c11
+72 27 71 72 72 108 71 132 72 147 71 152 72 145 71 128 
+72 102 71 63 71 20 72 -20 71 -52 72 -79 71 -99 72 -112 
+71 -118 72 -118 71 -112 72 -98 71 -80 71 -63 72 -50 71 -36 
+72 -26 71 -17 72 -9 71 -5 72 -1 71 -1 72 0 71 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 1187 2241 100 MP stroke
+c12
+72 476 71 327 72 198 71 89 72 -1 71 -69 72 -119 71 -149 
+72 -158 71 -148 71 -123 72 -98 71 -76 72 -56 71 -40 72 -27 
+71 -15 72 -8 71 -2 72 -1 71 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 1187 2241 100 MP stroke
+c13
+72 -502 71 -401 72 -312 71 -233 72 -166 71 -110 72 -66 71 -34 
+72 -11 71 -2 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 72 0 
+71 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 72 0 71 0 71 0 72 0 71 0 72 0 
+71 0 72 0 71 0 72 0 71 0 72 0 71 0 71 0 
+72 0 71 0 72 0 71 0 72 0 71 0 72 0 71 0 
+72 0 71 0 71 0 1187 2241 100 MP stroke
+gr
+
+c13
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 192 FMSR
+
+3224  293 mt 
+(Splines of degree =  3     NX =    10) s
+ 888 1638 mt  -90 rotate
+(Splines) s
+90 rotate
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+1297  604 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+1428  700 mt 
+(0) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+3207  796 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+3338  892 mt 
+(4) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+2429  825 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+2560  921 mt 
+(3) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+1882 1027 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+2013 1123 mt 
+(2) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+1450 1027 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+1581 1123 mt 
+(1) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+6826  748 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+6957  844 mt 
+(9) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+6154  786 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+6285  882 mt 
+(8) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+5424  796 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+5555  892 mt 
+(7) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+4685  805 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+4816  901 mt 
+(6) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+3917  777 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+4048  873 mt 
+(5) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7373  825 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+7504  921 mt 
+(10) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7881  594 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+8012  690 mt 
+(12) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7776  969 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+7907 1065 mt 
+(11) s
+gs 1187 404 7075 1838 rc
+gr
+
+c8
+  48   48 1187 2241 PO
+  48   48 1894 2241 PO
+  48   48 2601 2241 PO
+  48   48 3309 2241 PO
+  48   48 4016 2241 PO
+  48   48 4724 2241 PO
+  48   48 5431 2241 PO
+  48   48 6138 2241 PO
+  48   48 6846 2241 PO
+  48   48 7553 2241 PO
+  48   48 8261 2241 PO
+  48   48 1187 2241 FO
+  48   48 1894 2241 FO
+  48   48 2601 2241 FO
+  48   48 3309 2241 FO
+  48   48 4016 2241 FO
+  48   48 4724 2241 FO
+  48   48 5431 2241 FO
+  48   48 6138 2241 FO
+  48   48 6846 2241 FO
+  48   48 7553 2241 FO
+  48   48 8261 2241 FO
+gs 1187 404 7075 1838 rc
+gr
+
+1 sg
+0 1766 7074 0 0 -1766 1187 4794 4 MP
+PP
+-7074 0 0 1766 7074 0 0 -1766 1187 4794 5 MP stroke
+4 w
+DO
+SO
+6 w
+0 sg
+1187 4794 mt 8261 4794 L
+1187 3028 mt 8261 3028 L
+1187 4794 mt 1187 3028 L
+8261 4794 mt 8261 3028 L
+1187 4794 mt 8261 4794 L
+1187 4794 mt 1187 3028 L
+1187 4794 mt 1187 4723 L
+1187 3028 mt 1187 3098 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+1154 4939 mt 
+(0) s
+1894 4794 mt 1894 4723 L
+1894 3028 mt 1894 3098 L
+1811 4939 mt 
+(0.1) s
+2601 4794 mt 2601 4723 L
+2601 3028 mt 2601 3098 L
+2518 4939 mt 
+(0.2) s
+3309 4794 mt 3309 4723 L
+3309 3028 mt 3309 3098 L
+3226 4939 mt 
+(0.3) s
+4016 4794 mt 4016 4723 L
+4016 3028 mt 4016 3098 L
+3933 4939 mt 
+(0.4) s
+4724 4794 mt 4724 4723 L
+4724 3028 mt 4724 3098 L
+4641 4939 mt 
+(0.5) s
+5431 4794 mt 5431 4723 L
+5431 3028 mt 5431 3098 L
+5348 4939 mt 
+(0.6) s
+6138 4794 mt 6138 4723 L
+6138 3028 mt 6138 3098 L
+6055 4939 mt 
+(0.7) s
+6846 4794 mt 6846 4723 L
+6846 3028 mt 6846 3098 L
+6763 4939 mt 
+(0.8) s
+7553 4794 mt 7553 4723 L
+7553 3028 mt 7553 3098 L
+7470 4939 mt 
+(0.9) s
+8261 4794 mt 8261 4723 L
+8261 3028 mt 8261 3098 L
+8228 4939 mt 
+(1) s
+1187 4794 mt 1257 4794 L
+8261 4794 mt 8190 4794 L
+1086 4838 mt 
+(0) s
+1187 4440 mt 1257 4440 L
+8261 4440 mt 8190 4440 L
+ 986 4484 mt 
+(0.2) s
+1187 4087 mt 1257 4087 L
+8261 4087 mt 8190 4087 L
+ 986 4131 mt 
+(0.4) s
+1187 3734 mt 1257 3734 L
+8261 3734 mt 8190 3734 L
+ 986 3778 mt 
+(0.6) s
+1187 3381 mt 1257 3381 L
+8261 3381 mt 8190 3381 L
+ 986 3425 mt 
+(0.8) s
+1187 3028 mt 1257 3028 L
+8261 3028 mt 8190 3028 L
+1086 3072 mt 
+(1) s
+1187 4794 mt 8261 4794 L
+1187 3028 mt 8261 3028 L
+1187 4794 mt 1187 3028 L
+8261 4794 mt 8261 3028 L
+gs 1187 3028 7075 1767 rc
+c8
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 1 71 2 70 5 71 11 71 18 71 27 70 37 
+71 50 71 64 70 80 1187 4499 100 MP stroke
+c9
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 70 0 71 0 71 0 71 0 70 1 
+71 2 71 5 71 11 70 18 71 27 71 37 71 50 70 64 
+71 80 71 96 71 109 70 115 71 118 71 114 71 105 70 91 
+71 71 71 47 70 17 1187 3616 100 MP stroke
+c10
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 1 71 2 71 5 
+70 11 71 18 71 27 70 37 71 50 71 64 71 80 70 96 
+71 109 71 115 71 118 70 114 71 105 71 91 71 71 70 47 
+71 17 71 -17 71 -47 70 -71 71 -91 71 -105 71 -114 70 -118 
+71 -115 71 -109 70 -96 1187 4499 100 MP stroke
+c11
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 1 70 2 71 5 71 11 71 18 
+70 27 71 37 71 50 71 64 70 80 71 96 71 109 71 115 
+70 118 71 114 71 105 70 91 71 71 71 47 71 17 70 -17 
+71 -47 71 -71 71 -91 70 -105 71 -114 71 -118 71 -115 70 -109 
+71 -96 71 -80 71 -64 70 -50 71 -37 71 -27 71 -18 70 -11 
+71 -5 71 -2 70 -1 1187 4794 100 MP stroke
+c12
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+70 0 71 1 71 2 71 5 70 11 71 18 71 27 71 37 
+70 50 71 64 71 80 71 96 70 109 71 115 71 118 71 114 
+70 105 71 91 71 71 71 47 70 17 71 -17 71 -47 71 -71 
+70 -91 71 -105 71 -114 70 -118 71 -115 71 -109 71 -96 70 -80 
+71 -64 71 -50 71 -37 70 -27 71 -18 71 -11 71 -5 70 -2 
+71 -1 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 70 0 1187 4794 100 MP stroke
+c13
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 1 
+71 2 70 5 71 11 71 18 71 27 70 37 71 50 71 64 
+70 80 71 96 71 109 71 115 70 118 71 114 71 105 71 91 
+70 71 71 47 71 17 71 -17 70 -47 71 -71 71 -91 71 -105 
+70 -114 71 -118 71 -115 71 -109 70 -96 71 -80 71 -64 71 -50 
+70 -37 71 -27 71 -18 70 -11 71 -5 71 -2 71 -1 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 70 0 1187 4794 100 MP stroke
+0 sg
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 1 71 2 71 5 
+71 11 70 18 71 27 71 37 71 50 70 64 71 80 71 96 
+71 109 70 115 71 118 71 114 71 105 70 91 71 71 71 47 
+70 17 71 -17 71 -47 71 -71 70 -91 71 -105 71 -114 71 -118 
+70 -115 71 -109 71 -96 71 -80 70 -64 71 -50 71 -37 71 -27 
+70 -18 71 -11 71 -5 71 -2 70 -1 71 0 71 0 71 0 
+70 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 70 0 1187 4794 100 MP stroke
+c8
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 1 71 2 71 5 70 11 71 18 
+71 27 70 37 71 50 71 64 71 80 70 96 71 109 71 115 
+71 118 70 114 71 105 71 91 71 71 70 47 71 17 71 -17 
+71 -47 70 -71 71 -91 71 -105 71 -114 70 -118 71 -115 71 -109 
+70 -96 71 -80 71 -64 71 -50 70 -37 71 -27 71 -18 71 -11 
+70 -5 71 -2 71 -1 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 70 0 1187 4794 100 MP stroke
+c9
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 1 70 2 71 5 71 11 71 18 70 27 71 37 
+71 50 71 64 70 80 71 96 71 109 71 115 70 118 71 114 
+71 105 70 91 71 71 71 47 71 17 70 -17 71 -47 71 -71 
+71 -91 70 -105 71 -114 71 -118 71 -115 70 -109 71 -96 71 -80 
+71 -64 70 -50 71 -37 71 -27 71 -18 70 -11 71 -5 71 -2 
+70 -1 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 70 0 1187 4794 100 MP stroke
+c10
+71 2 71 5 70 11 71 18 71 27 71 37 70 50 71 64 
+71 80 71 96 70 109 71 115 71 118 71 114 70 105 71 91 
+71 71 71 47 70 17 71 -17 71 -47 71 -71 70 -91 71 -105 
+71 -114 70 -118 71 -115 71 -109 71 -96 70 -80 71 -64 71 -50 
+71 -37 70 -27 71 -18 71 -11 71 -5 70 -2 71 -1 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 70 0 1187 4794 100 MP stroke
+c8
+71 109 71 115 70 118 71 114 71 105 71 91 70 71 71 47 
+71 17 71 -17 70 -47 71 -71 71 -91 71 -105 70 -114 71 -118 
+71 -115 71 -109 70 -96 71 -80 71 -64 71 -50 70 -37 71 -27 
+71 -18 70 -11 71 -5 71 -2 71 -1 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 70 0 1187 4794 100 MP stroke
+c9
+71 -47 71 -71 70 -91 71 -105 71 -114 71 -118 70 -115 71 -109 
+71 -96 71 -80 70 -64 71 -50 71 -37 71 -27 70 -18 71 -11 
+71 -5 71 -2 70 -1 71 0 71 0 71 0 70 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 70 0 1187 4794 100 MP stroke
+c10
+71 -64 71 -50 70 -37 71 -27 71 -18 71 -11 70 -5 71 -2 
+71 -1 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 71 0 70 0 71 0 71 0 71 0 70 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+71 0 70 0 71 0 71 0 71 0 70 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 71 0 70 0 71 0 71 0 71 0 
+70 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 71 0 70 0 71 0 71 0 71 0 70 0 
+71 0 71 0 70 0 1187 4794 100 MP stroke
+gr
+
+c10
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 192 FMSR
+
+ 888 4227 mt  -90 rotate
+(Splines) s
+90 rotate
+3151 2917 mt 
+(Periodic Splines of degree 3, NX =10) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+1817 3431 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+1948 3527 mt 
+(2) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+1345 3505 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+1476 3601 mt 
+(1) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+1384 4467 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+1515 4563 mt 
+(0) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+6745 3441 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+6876 3537 mt 
+(9) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+6032 3441 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+6163 3537 mt 
+(8) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+5330 3394 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+5461 3490 mt 
+(7) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+4608 3422 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+4739 3518 mt 
+(6) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+3905 3385 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+4036 3481 mt 
+(5) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+3164 3441 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+3295 3537 mt 
+(4) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+2481 3450 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+2612 3546 mt 
+(3) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7363 3304 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+7494 3400 mt 
+(10/0) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7795 3534 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+7926 3630 mt 
+(11/1) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7851 4439 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+7982 4535 mt 
+(12/2) s
+gs 1187 3028 7075 1767 rc
+gr
+
+c8
+  48   48 1187 4794 PO
+  48   48 1894 4794 PO
+  48   48 2601 4794 PO
+  48   48 3309 4794 PO
+  48   48 4016 4794 PO
+  48   48 4724 4794 PO
+  48   48 5431 4794 PO
+  48   48 6138 4794 PO
+  48   48 6846 4794 PO
+  48   48 7553 4794 PO
+  48   48 8261 4794 PO
+  48   48 1187 4794 FO
+  48   48 1894 4794 FO
+  48   48 2601 4794 FO
+  48   48 3309 4794 FO
+  48   48 4016 4794 FO
+  48   48 4724 4794 FO
+  48   48 5431 4794 FO
+  48   48 6138 4794 FO
+  48   48 6846 4794 FO
+  48   48 7553 4794 FO
+  48   48 8261 4794 FO
+gs 1187 3028 7075 1767 rc
+gr
+
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/docs/manual/fit.eps b/docs/manual/fit.eps
new file mode 100644
index 0000000..10e5562
--- /dev/null
+++ b/docs/manual/fit.eps
@@ -0,0 +1,1120 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The Mathworks, Inc.
+%%Title: /home/ttran/Utils/bsplines/wk/fit.eps
+%%CreationDate: 07/15/2007  09:22:17
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:   -69   158   682   632
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:   -69   158   682   632
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode -0828 7584 csm
+
+    0     0  9023  5683 rc
+91 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 9024 5684 rf
+6 w
+0 1861 6993 0 0 -1861 1211 5139 4 MP
+PP
+-6993 0 0 1861 6993 0 0 -1861 1211 5139 5 MP stroke
+4 w
+DO
+0 sg
+1560 5139 mt 1560 3278 L
+1560 3278 mt 1560 3278 L
+2259 5139 mt 2259 3278 L
+2259 3278 mt 2259 3278 L
+2959 5139 mt 2959 3278 L
+2959 3278 mt 2959 3278 L
+3658 5139 mt 3658 3278 L
+3658 3278 mt 3658 3278 L
+4357 5139 mt 4357 3278 L
+4357 3278 mt 4357 3278 L
+5057 5139 mt 5057 3278 L
+5057 3278 mt 5057 3278 L
+5756 5139 mt 5756 3278 L
+5756 3278 mt 5756 3278 L
+6455 5139 mt 6455 3278 L
+6455 3278 mt 6455 3278 L
+7155 5139 mt 7155 3278 L
+7155 3278 mt 7155 3278 L
+7854 5139 mt 7854 3278 L
+7854 3278 mt 7854 3278 L
+SO
+6 w
+1211 5139 mt 8204 5139 L
+1211 3278 mt 8204 3278 L
+1211 5139 mt 1211 3278 L
+8204 5139 mt 8204 3278 L
+1211 5139 mt 8204 5139 L
+1211 5139 mt 1211 3278 L
+1560 5139 mt 1560 5069 L
+1560 3278 mt 1560 3347 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+1527 5284 mt 
+(0) s
+2259 5139 mt 2259 5069 L
+2259 3278 mt 2259 3347 L
+2176 5284 mt 
+(0.1) s
+2959 5139 mt 2959 5069 L
+2959 3278 mt 2959 3347 L
+2876 5284 mt 
+(0.2) s
+3658 5139 mt 3658 5069 L
+3658 3278 mt 3658 3347 L
+3575 5284 mt 
+(0.3) s
+4357 5139 mt 4357 5069 L
+4357 3278 mt 4357 3347 L
+4274 5284 mt 
+(0.4) s
+5057 5139 mt 5057 5069 L
+5057 3278 mt 5057 3347 L
+4974 5284 mt 
+(0.5) s
+5756 5139 mt 5756 5069 L
+5756 3278 mt 5756 3347 L
+5673 5284 mt 
+(0.6) s
+6455 5139 mt 6455 5069 L
+6455 3278 mt 6455 3347 L
+6372 5284 mt 
+(0.7) s
+7155 5139 mt 7155 5069 L
+7155 3278 mt 7155 3347 L
+7072 5284 mt 
+(0.8) s
+7854 5139 mt 7854 5069 L
+7854 3278 mt 7854 3347 L
+7771 5284 mt 
+(0.9) s
+1211 5139 mt 1280 5139 L
+8204 5139 mt 8134 5139 L
+1110 5183 mt 
+(0) s
+1211 4766 mt 1280 4766 L
+8204 4766 mt 8134 4766 L
+1010 4810 mt 
+(0.2) s
+1211 4394 mt 1280 4394 L
+8204 4394 mt 8134 4394 L
+1010 4438 mt 
+(0.4) s
+1211 4022 mt 1280 4022 L
+8204 4022 mt 8134 4022 L
+1010 4066 mt 
+(0.6) s
+1211 3650 mt 1280 3650 L
+8204 3650 mt 8134 3650 L
+1010 3694 mt 
+(0.8) s
+1211 3278 mt 1280 3278 L
+8204 3278 mt 8134 3278 L
+1110 3322 mt 
+(1) s
+1211 5139 mt 8204 5139 L
+1211 3278 mt 8204 3278 L
+1211 5139 mt 1211 3278 L
+8204 5139 mt 8204 3278 L
+gs 1211 3278 6994 1862 rc
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 10 70 28 70 46 70 65 70 84 70 102 70 121 
+70 140 70 158 69 177 1211 4208 100 MP stroke
+/c9 { 0.000000 1.000000 0.000000 sr} bdef
+c9
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 10 
+70 28 70 46 70 65 70 84 69 102 70 121 70 140 70 158 
+70 177 70 167 70 131 70 93 70 56 70 18 70 -18 70 -56 
+70 -93 70 -131 69 -167 1211 4208 100 MP stroke
+/c10 { 1.000000 0.000000 0.000000 sr} bdef
+c10
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 10 69 28 70 46 
+70 65 70 84 70 102 70 121 70 140 70 158 70 177 70 167 
+70 131 70 93 70 56 70 18 69 -18 70 -56 70 -93 70 -131 
+70 -167 70 -177 70 -158 70 -140 70 -121 70 -102 70 -84 70 -65 
+70 -46 70 -28 69 -9 1211 5138 100 MP stroke
+/c11 { 0.000000 1.000000 1.000000 sr} bdef
+c11
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 10 70 28 70 46 70 65 70 84 
+70 102 70 121 70 140 70 158 70 177 70 167 69 131 70 93 
+70 56 70 18 70 -18 70 -56 70 -93 70 -131 70 -167 70 -177 
+70 -158 70 -140 70 -121 70 -102 69 -84 70 -65 70 -46 70 -28 
+70 -10 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1211 5139 100 MP stroke
+/c12 { 1.000000 0.000000 1.000000 sr} bdef
+c12
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 10 70 28 70 46 70 65 70 84 70 102 70 121 
+69 140 70 158 70 177 70 167 70 131 70 93 70 56 70 18 
+70 -18 70 -56 70 -93 70 -131 70 -167 70 -177 69 -158 70 -140 
+70 -121 70 -102 70 -84 70 -65 70 -46 70 -28 70 -10 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1211 5139 100 MP stroke
+/c13 { 1.000000 1.000000 0.000000 sr} bdef
+c13
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 10 
+70 28 69 46 70 65 70 84 70 102 70 121 70 140 70 158 
+70 177 70 167 70 131 70 93 70 56 70 18 70 -18 70 -56 
+69 -93 70 -131 70 -167 70 -177 70 -158 70 -140 70 -121 70 -102 
+70 -84 70 -65 70 -46 70 -28 70 -10 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1211 5139 100 MP stroke
+0 sg
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 10 70 28 70 46 
+70 65 70 84 70 102 70 121 70 140 70 158 70 177 70 167 
+70 131 69 93 70 56 70 18 70 -18 70 -56 70 -93 70 -131 
+70 -167 70 -177 70 -158 70 -140 70 -121 70 -102 70 -84 70 -65 
+69 -46 70 -28 70 -10 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1211 5139 100 MP stroke
+c8
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 10 70 28 70 46 70 65 70 84 
+70 102 70 121 70 140 69 158 70 177 70 167 70 131 70 93 
+70 56 70 18 70 -18 70 -56 70 -93 70 -131 70 -167 70 -177 
+70 -158 69 -140 70 -121 70 -102 70 -84 70 -65 70 -46 70 -28 
+70 -10 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1211 5139 100 MP stroke
+c9
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 10 70 28 70 46 70 65 69 84 70 102 70 121 
+70 140 70 158 70 177 70 167 70 131 70 93 70 56 70 18 
+70 -18 70 -56 70 -93 69 -131 70 -167 70 -177 70 -158 70 -140 
+70 -121 70 -102 70 -84 70 -65 70 -46 70 -28 70 -10 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1211 5139 100 MP stroke
+c10
+70 28 70 46 70 65 70 84 70 102 70 121 70 140 70 158 
+70 177 70 167 70 131 70 93 70 56 69 18 70 -18 70 -56 
+70 -93 70 -131 70 -167 70 -177 70 -158 70 -140 70 -121 70 -102 
+70 -84 70 -65 70 -46 69 -28 70 -10 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1211 5139 100 MP stroke
+c8
+70 131 70 93 70 56 70 18 70 -18 70 -56 70 -93 70 -131 
+70 -167 70 -177 70 -158 70 -140 70 -121 69 -102 70 -84 70 -65 
+70 -46 70 -28 70 -10 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1211 5139 100 MP stroke
+c9
+70 -158 70 -140 70 -121 70 -102 70 -84 70 -65 70 -46 70 -28 
+70 -10 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1211 5139 100 MP stroke
+gr
+
+c9
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 192 FMSR
+
+ 912 4524 mt  -90 rotate
+(Splines) s
+90 rotate
+2041 3167 mt 
+(Periodic Splines of degree 2 for Interpolation on [0, 1],  \
+NX =10) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7837 4338 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+7968 4434 mt 
+(10/0) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+1489 3550 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+1620 3646 mt 
+(0) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7728 3524 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+7859 3620 mt 
+(9/-1) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7086 3511 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+7217 3607 mt 
+(8) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+6424 3511 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+6555 3607 mt 
+(7) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+5690 3492 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+5821 3588 mt 
+(6) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+4978 3492 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+5109 3588 mt 
+(5) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+4309 3486 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+4440 3582 mt 
+(4) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+3611 3499 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+3742 3595 mt 
+(3) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+2941 3499 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+3072 3595 mt 
+(2) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+2180 3524 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+2311 3620 mt 
+(1) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+1302 4367 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+1433 4463 mt 
+(-1) s
+gs 1211 3278 6994 1862 rc
+gr
+
+gs 1114 5042 7188 195 rc
+/c14 { 0.478431 0.062745 0.894118 sr} bdef
+c14
+  48   48 1211 5139 PO
+  48   48 1910 5139 PO
+  48   48 2609 5139 PO
+  48   48 3308 5139 PO
+  48   48 4008 5139 PO
+  48   48 4707 5139 PO
+  48   48 5406 5139 PO
+  48   48 6106 5139 PO
+  48   48 6805 5139 PO
+  48   48 7504 5139 PO
+  48   48 8204 5139 PO
+  48   48 1211 5139 FO
+  48   48 1910 5139 FO
+  48   48 2609 5139 FO
+  48   48 3308 5139 FO
+  48   48 4008 5139 FO
+  48   48 4707 5139 FO
+  48   48 5406 5139 FO
+  48   48 6106 5139 FO
+  48   48 6805 5139 FO
+  48   48 7504 5139 FO
+  48   48 8204 5139 FO
+gr
+
+c14
+gs 1211 3278 6994 1862 rc
+gr
+
+1 sg
+0 1862 6993 0 0 -1862 1154 2400 4 MP
+PP
+-6993 0 0 1862 6993 0 0 -1862 1154 2400 5 MP stroke
+4 w
+DO
+0 sg
+1154 2400 mt 1154  538 L
+1154  538 mt 1154  538 L
+1853 2400 mt 1853  538 L
+1853  538 mt 1853  538 L
+2552 2400 mt 2552  538 L
+2552  538 mt 2552  538 L
+3251 2400 mt 3251  538 L
+3251  538 mt 3251  538 L
+3951 2400 mt 3951  538 L
+3951  538 mt 3951  538 L
+4650 2400 mt 4650  538 L
+4650  538 mt 4650  538 L
+5349 2400 mt 5349  538 L
+5349  538 mt 5349  538 L
+6049 2400 mt 6049  538 L
+6049  538 mt 6049  538 L
+6748 2400 mt 6748  538 L
+6748  538 mt 6748  538 L
+7447 2400 mt 7447  538 L
+7447  538 mt 7447  538 L
+8147 2400 mt 8147  538 L
+8147  538 mt 8147  538 L
+SO
+6 w
+1154 2400 mt 8147 2400 L
+1154  538 mt 8147  538 L
+1154 2400 mt 1154  538 L
+8147 2400 mt 8147  538 L
+1154 2400 mt 8147 2400 L
+1154 2400 mt 1154  538 L
+1154 2400 mt 1154 2330 L
+1154  538 mt 1154  607 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+1121 2545 mt 
+(0) s
+1853 2400 mt 1853 2330 L
+1853  538 mt 1853  607 L
+1770 2545 mt 
+(0.1) s
+2552 2400 mt 2552 2330 L
+2552  538 mt 2552  607 L
+2469 2545 mt 
+(0.2) s
+3251 2400 mt 3251 2330 L
+3251  538 mt 3251  607 L
+3168 2545 mt 
+(0.3) s
+3951 2400 mt 3951 2330 L
+3951  538 mt 3951  607 L
+3868 2545 mt 
+(0.4) s
+4650 2400 mt 4650 2330 L
+4650  538 mt 4650  607 L
+4567 2545 mt 
+(0.5) s
+5349 2400 mt 5349 2330 L
+5349  538 mt 5349  607 L
+5266 2545 mt 
+(0.6) s
+6049 2400 mt 6049 2330 L
+6049  538 mt 6049  607 L
+5966 2545 mt 
+(0.7) s
+6748 2400 mt 6748 2330 L
+6748  538 mt 6748  607 L
+6665 2545 mt 
+(0.8) s
+7447 2400 mt 7447 2330 L
+7447  538 mt 7447  607 L
+7364 2545 mt 
+(0.9) s
+8147 2400 mt 8147 2330 L
+8147  538 mt 8147  607 L
+8114 2545 mt 
+(1) s
+1154 2400 mt 1223 2400 L
+8147 2400 mt 8077 2400 L
+1053 2444 mt 
+(0) s
+1154 2027 mt 1223 2027 L
+8147 2027 mt 8077 2027 L
+ 953 2071 mt 
+(0.2) s
+1154 1655 mt 1223 1655 L
+8147 1655 mt 8077 1655 L
+ 953 1699 mt 
+(0.4) s
+1154 1282 mt 1223 1282 L
+8147 1282 mt 8077 1282 L
+ 953 1326 mt 
+(0.6) s
+1154  910 mt 1223  910 L
+8147  910 mt 8077  910 L
+ 953  954 mt 
+(0.8) s
+1154  538 mt 1223  538 L
+8147  538 mt 8077  538 L
+1053  582 mt 
+(1) s
+1154 2400 mt 8147 2400 L
+1154  538 mt 8147  538 L
+1154 2400 mt 1154  538 L
+8147 2400 mt 8147  538 L
+gs 1154 538 6994 1863 rc
+c8
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 1 70 2 70 6 70 11 70 19 70 29 70 39 
+70 52 70 68 69 84 1154 2089 100 MP stroke
+c9
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 1 
+70 2 70 6 70 11 70 19 69 29 70 39 70 52 70 68 
+70 84 70 101 70 115 70 122 70 124 70 120 70 110 70 96 
+70 76 70 49 69 18 1154 1158 100 MP stroke
+c10
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 1 69 2 70 6 
+70 11 70 19 70 29 70 39 70 52 70 68 70 84 70 101 
+70 115 70 122 70 124 70 120 69 110 70 96 70 76 70 49 
+70 18 70 -18 70 -49 70 -76 70 -96 70 -110 70 -120 70 -124 
+70 -122 70 -115 69 -101 1154 2089 100 MP stroke
+c11
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 1 70 2 70 6 70 11 70 19 
+70 29 70 39 70 52 70 68 70 84 70 101 69 115 70 122 
+70 124 70 120 70 110 70 96 70 76 70 49 70 18 70 -18 
+70 -49 70 -76 70 -96 70 -110 69 -120 70 -124 70 -122 70 -115 
+70 -101 70 -84 70 -68 70 -52 70 -39 70 -29 70 -19 70 -11 
+70 -6 70 -2 69 -1 1154 2400 100 MP stroke
+c12
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 1 70 2 70 6 70 11 70 19 70 29 70 39 
+69 52 70 68 70 84 70 101 70 115 70 122 70 124 70 120 
+70 110 70 96 70 76 70 49 70 18 70 -18 69 -49 70 -76 
+70 -96 70 -110 70 -120 70 -124 70 -122 70 -115 70 -101 70 -84 
+70 -68 70 -52 70 -39 70 -29 69 -19 70 -11 70 -6 70 -2 
+70 -1 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1154 2400 100 MP stroke
+c13
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 1 
+70 2 69 6 70 11 70 19 70 29 70 39 70 52 70 68 
+70 84 70 101 70 115 70 122 70 124 70 120 70 110 70 96 
+69 76 70 49 70 18 70 -18 70 -49 70 -76 70 -96 70 -110 
+70 -120 70 -124 70 -122 70 -115 70 -101 70 -84 69 -68 70 -52 
+70 -39 70 -29 70 -19 70 -11 70 -6 70 -2 70 -1 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1154 2400 100 MP stroke
+0 sg
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 1 70 2 70 6 
+70 11 70 19 70 29 70 39 70 52 70 68 70 84 70 101 
+70 115 69 122 70 124 70 120 70 110 70 96 70 76 70 49 
+70 18 70 -18 70 -49 70 -76 70 -96 70 -110 70 -120 70 -124 
+69 -122 70 -115 70 -101 70 -84 70 -68 70 -52 70 -39 70 -29 
+70 -19 70 -11 70 -6 70 -2 70 -1 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1154 2400 100 MP stroke
+c8
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 1 70 2 70 6 70 11 70 19 
+70 29 70 39 70 52 69 68 70 84 70 101 70 115 70 122 
+70 124 70 120 70 110 70 96 70 76 70 49 70 18 70 -18 
+70 -49 69 -76 70 -96 70 -110 70 -120 70 -124 70 -122 70 -115 
+70 -101 70 -84 70 -68 70 -52 70 -39 70 -29 70 -19 70 -11 
+69 -6 70 -2 70 -1 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1154 2400 100 MP stroke
+c9
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 1 70 2 70 6 70 11 69 19 70 29 70 39 
+70 52 70 68 70 84 70 101 70 115 70 122 70 124 70 120 
+70 110 70 96 70 76 69 49 70 18 70 -18 70 -49 70 -76 
+70 -96 70 -110 70 -120 70 -124 70 -122 70 -115 70 -101 70 -84 
+70 -68 69 -52 70 -39 70 -29 70 -19 70 -11 70 -6 70 -2 
+70 -1 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1154 2400 100 MP stroke
+c10
+70 2 70 6 70 11 70 19 70 29 70 39 70 52 70 68 
+70 84 70 101 70 115 70 122 70 124 69 120 70 110 70 96 
+70 76 70 49 70 18 70 -18 70 -49 70 -76 70 -96 70 -110 
+70 -120 70 -124 70 -122 69 -115 70 -101 70 -84 70 -68 70 -52 
+70 -39 70 -29 70 -19 70 -11 70 -6 70 -2 70 -1 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1154 2400 100 MP stroke
+c8
+70 115 70 122 70 124 70 120 70 110 70 96 70 76 70 49 
+70 18 70 -18 70 -49 70 -76 70 -96 69 -110 70 -120 70 -124 
+70 -122 70 -115 70 -101 70 -84 70 -68 70 -52 70 -39 70 -29 
+70 -19 70 -11 70 -6 69 -2 70 -1 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1154 2400 100 MP stroke
+c9
+70 -49 70 -76 70 -96 70 -110 70 -120 70 -124 70 -122 70 -115 
+70 -101 70 -84 70 -68 70 -52 70 -39 69 -29 70 -19 70 -11 
+70 -6 70 -2 70 -1 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1154 2400 100 MP stroke
+c10
+70 -68 70 -52 70 -39 70 -29 70 -19 70 -11 70 -6 70 -2 
+70 -1 70 0 70 0 70 0 70 0 69 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 69 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 69 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+69 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 69 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 69 0 70 0 70 0 70 0 
+70 0 70 0 70 0 70 0 70 0 70 0 70 0 70 0 
+70 0 70 0 69 0 1154 2400 100 MP stroke
+gr
+
+c10
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 192 FMSR
+
+ 855 1785 mt  -90 rotate
+(Splines) s
+90 rotate
+2037  427 mt 
+(Periodic Splines of degree 3 for Interpolation on [0,1], NX\
+ =10) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+1777  965 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+1908 1061 mt 
+(1) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+1220 1030 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+1351 1126 mt 
+(0) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+1263 2165 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+1394 2261 mt 
+(-1) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+6719  940 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+6850 1036 mt 
+(8) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+6014  940 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+6145 1036 mt 
+(7) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+5330  927 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+5461 1023 mt 
+(6) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+4590  914 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+4721 1010 mt 
+(5) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+3892  914 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+4023 1010 mt 
+(4) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+3208  940 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+3339 1036 mt 
+(3) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+2518  966 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+2649 1062 mt 
+(2) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7396  934 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+7527 1030 mt 
+(9/-1) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7819 1024 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+7950 1120 mt 
+(10/0) s
+%%IncludeResource: font Symbol
+/Symbol /ISOLatin1Encoding 192 FMSR
+
+7734 2093 mt 
+(L) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+7865 2189 mt 
+(11/1) s
+gs 1154 538 6994 1863 rc
+gr
+
+gs 1057 2303 7188 195 rc
+c8
+  48   48 1154 2400 PO
+  48   48 1853 2400 PO
+  48   48 2552 2400 PO
+  48   48 3251 2400 PO
+  48   48 3951 2400 PO
+  48   48 4650 2400 PO
+  48   48 5349 2400 PO
+  48   48 6049 2400 PO
+  48   48 6748 2400 PO
+  48   48 7447 2400 PO
+  48   48 8147 2400 PO
+  48   48 1154 2400 FO
+  48   48 1853 2400 FO
+  48   48 2552 2400 FO
+  48   48 3251 2400 FO
+  48   48 3951 2400 FO
+  48   48 4650 2400 FO
+  48   48 5349 2400 FO
+  48   48 6049 2400 FO
+  48   48 6748 2400 FO
+  48   48 7447 2400 FO
+  48   48 8147 2400 FO
+gr
+
+c8
+gs 1154 538 6994 1863 rc
+gr
+
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/docs/manual/getgrad_perf_helios.eps b/docs/manual/getgrad_perf_helios.eps
new file mode 100644
index 0000000..0c249ba
--- /dev/null
+++ b/docs/manual/getgrad_perf_helios.eps
@@ -0,0 +1,750 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 2.6.27.56-0.1-default #1 SMP 2010-12-01 16:57:58 +0100 x86_64.
+%%Title: /home/ttran/bsplines_hlst-dev/opt_wk/getgrad_perf_helios.eps
+%%CreationDate: 03/27/2012  13:30:13
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:    26   203   568   637
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:    26   203   568   637
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode 0312 7644 csm
+
+    0     0  6510  5205 rc
+85 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 6511 5206 rf
+6 w
+0 4242 5046 0 0 -4242 846 4632 4 MP
+PP
+-5046 0 0 4242 5046 0 0 -4242 846 4632 5 MP stroke
+4 w
+DO
+0 sg
+ 846 4632 mt  846  390 L
+ 846  390 mt  846  390 L
+1855 4632 mt 1855  390 L
+1855  390 mt 1855  390 L
+2864 4632 mt 2864  390 L
+2864  390 mt 2864  390 L
+3873 4632 mt 3873  390 L
+3873  390 mt 3873  390 L
+4882 4632 mt 4882  390 L
+4882  390 mt 4882  390 L
+5892 4632 mt 5892  390 L
+5892  390 mt 5892  390 L
+ 846 4632 mt 5892 4632 L
+5892 4632 mt 5892 4632 L
+ 846 3925 mt 5892 3925 L
+5892 3925 mt 5892 3925 L
+ 846 3218 mt 5892 3218 L
+5892 3218 mt 5892 3218 L
+ 846 2511 mt 5892 2511 L
+5892 2511 mt 5892 2511 L
+ 846 1804 mt 5892 1804 L
+5892 1804 mt 5892 1804 L
+ 846 1097 mt 5892 1097 L
+5892 1097 mt 5892 1097 L
+ 846  390 mt 5892  390 L
+5892  390 mt 5892  390 L
+SO
+6 w
+ 846 4632 mt 5892 4632 L
+ 846  390 mt 5892  390 L
+ 846 4632 mt  846  390 L
+5892 4632 mt 5892  390 L
+ 846 4632 mt 5892 4632 L
+ 846 4632 mt  846  390 L
+ 846 4632 mt  846 4606 L
+ 846  390 mt  846  415 L
+DO
+ 846 4632 mt  846  390 L
+ 846  390 mt  846  390 L
+SO
+ 846 4632 mt  846 4581 L
+ 846  390 mt  846  440 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 758 4814 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+ 891 4740 mt 
+(0) s
+1149 4632 mt 1149 4606 L
+1149  390 mt 1149  415 L
+DO
+1149 4632 mt 1149  390 L
+1149  390 mt 1149  390 L
+SO
+1327 4632 mt 1327 4606 L
+1327  390 mt 1327  415 L
+DO
+1327 4632 mt 1327  390 L
+1327  390 mt 1327  390 L
+SO
+1453 4632 mt 1453 4606 L
+1453  390 mt 1453  415 L
+DO
+1453 4632 mt 1453  390 L
+1453  390 mt 1453  390 L
+SO
+1551 4632 mt 1551 4606 L
+1551  390 mt 1551  415 L
+DO
+1551 4632 mt 1551  390 L
+1551  390 mt 1551  390 L
+SO
+1631 4632 mt 1631 4606 L
+1631  390 mt 1631  415 L
+DO
+1631 4632 mt 1631  390 L
+1631  390 mt 1631  390 L
+SO
+1698 4632 mt 1698 4606 L
+1698  390 mt 1698  415 L
+DO
+1698 4632 mt 1698  390 L
+1698  390 mt 1698  390 L
+SO
+1757 4632 mt 1757 4606 L
+1757  390 mt 1757  415 L
+DO
+1757 4632 mt 1757  390 L
+1757  390 mt 1757  390 L
+SO
+1809 4632 mt 1809 4606 L
+1809  390 mt 1809  415 L
+DO
+1809 4632 mt 1809  390 L
+1809  390 mt 1809  390 L
+SO
+1855 4632 mt 1855 4606 L
+1855  390 mt 1855  415 L
+DO
+1855 4632 mt 1855  390 L
+1855  390 mt 1855  390 L
+SO
+1855 4632 mt 1855 4581 L
+1855  390 mt 1855  440 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+1767 4814 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+1900 4740 mt 
+(1) s
+2158 4632 mt 2158 4606 L
+2158  390 mt 2158  415 L
+DO
+2158 4632 mt 2158  390 L
+2158  390 mt 2158  390 L
+SO
+2336 4632 mt 2336 4606 L
+2336  390 mt 2336  415 L
+DO
+2336 4632 mt 2336  390 L
+2336  390 mt 2336  390 L
+SO
+2462 4632 mt 2462 4606 L
+2462  390 mt 2462  415 L
+DO
+2462 4632 mt 2462  390 L
+2462  390 mt 2462  390 L
+SO
+2560 4632 mt 2560 4606 L
+2560  390 mt 2560  415 L
+DO
+2560 4632 mt 2560  390 L
+2560  390 mt 2560  390 L
+SO
+2640 4632 mt 2640 4606 L
+2640  390 mt 2640  415 L
+DO
+2640 4632 mt 2640  390 L
+2640  390 mt 2640  390 L
+SO
+2708 4632 mt 2708 4606 L
+2708  390 mt 2708  415 L
+DO
+2708 4632 mt 2708  390 L
+2708  390 mt 2708  390 L
+SO
+2766 4632 mt 2766 4606 L
+2766  390 mt 2766  415 L
+DO
+2766 4632 mt 2766  390 L
+2766  390 mt 2766  390 L
+SO
+2818 4632 mt 2818 4606 L
+2818  390 mt 2818  415 L
+DO
+2818 4632 mt 2818  390 L
+2818  390 mt 2818  390 L
+SO
+2864 4632 mt 2864 4606 L
+2864  390 mt 2864  415 L
+DO
+2864 4632 mt 2864  390 L
+2864  390 mt 2864  390 L
+SO
+2864 4632 mt 2864 4581 L
+2864  390 mt 2864  440 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+2776 4814 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+2909 4740 mt 
+(2) s
+3168 4632 mt 3168 4606 L
+3168  390 mt 3168  415 L
+DO
+3168 4632 mt 3168  390 L
+3168  390 mt 3168  390 L
+SO
+3345 4632 mt 3345 4606 L
+3345  390 mt 3345  415 L
+DO
+3345 4632 mt 3345  390 L
+3345  390 mt 3345  390 L
+SO
+3471 4632 mt 3471 4606 L
+3471  390 mt 3471  415 L
+DO
+3471 4632 mt 3471  390 L
+3471  390 mt 3471  390 L
+SO
+3569 4632 mt 3569 4606 L
+3569  390 mt 3569  415 L
+DO
+3569 4632 mt 3569  390 L
+3569  390 mt 3569  390 L
+SO
+3649 4632 mt 3649 4606 L
+3649  390 mt 3649  415 L
+DO
+3649 4632 mt 3649  390 L
+3649  390 mt 3649  390 L
+SO
+3717 4632 mt 3717 4606 L
+3717  390 mt 3717  415 L
+DO
+3717 4632 mt 3717  390 L
+3717  390 mt 3717  390 L
+SO
+3775 4632 mt 3775 4606 L
+3775  390 mt 3775  415 L
+DO
+3775 4632 mt 3775  390 L
+3775  390 mt 3775  390 L
+SO
+3827 4632 mt 3827 4606 L
+3827  390 mt 3827  415 L
+DO
+3827 4632 mt 3827  390 L
+3827  390 mt 3827  390 L
+SO
+3873 4632 mt 3873 4606 L
+3873  390 mt 3873  415 L
+DO
+3873 4632 mt 3873  390 L
+3873  390 mt 3873  390 L
+SO
+3873 4632 mt 3873 4581 L
+3873  390 mt 3873  440 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+3785 4814 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+3918 4740 mt 
+(3) s
+4177 4632 mt 4177 4606 L
+4177  390 mt 4177  415 L
+DO
+4177 4632 mt 4177  390 L
+4177  390 mt 4177  390 L
+SO
+4355 4632 mt 4355 4606 L
+4355  390 mt 4355  415 L
+DO
+4355 4632 mt 4355  390 L
+4355  390 mt 4355  390 L
+SO
+4481 4632 mt 4481 4606 L
+4481  390 mt 4481  415 L
+DO
+4481 4632 mt 4481  390 L
+4481  390 mt 4481  390 L
+SO
+4579 4632 mt 4579 4606 L
+4579  390 mt 4579  415 L
+DO
+4579 4632 mt 4579  390 L
+4579  390 mt 4579  390 L
+SO
+4658 4632 mt 4658 4606 L
+4658  390 mt 4658  415 L
+DO
+4658 4632 mt 4658  390 L
+4658  390 mt 4658  390 L
+SO
+4726 4632 mt 4726 4606 L
+4726  390 mt 4726  415 L
+DO
+4726 4632 mt 4726  390 L
+4726  390 mt 4726  390 L
+SO
+4784 4632 mt 4784 4606 L
+4784  390 mt 4784  415 L
+DO
+4784 4632 mt 4784  390 L
+4784  390 mt 4784  390 L
+SO
+4836 4632 mt 4836 4606 L
+4836  390 mt 4836  415 L
+DO
+4836 4632 mt 4836  390 L
+4836  390 mt 4836  390 L
+SO
+4882 4632 mt 4882 4606 L
+4882  390 mt 4882  415 L
+DO
+4882 4632 mt 4882  390 L
+4882  390 mt 4882  390 L
+SO
+4882 4632 mt 4882 4581 L
+4882  390 mt 4882  440 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+4794 4814 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+4927 4740 mt 
+(4) s
+5186 4632 mt 5186 4606 L
+5186  390 mt 5186  415 L
+DO
+5186 4632 mt 5186  390 L
+5186  390 mt 5186  390 L
+SO
+5364 4632 mt 5364 4606 L
+5364  390 mt 5364  415 L
+DO
+5364 4632 mt 5364  390 L
+5364  390 mt 5364  390 L
+SO
+5490 4632 mt 5490 4606 L
+5490  390 mt 5490  415 L
+DO
+5490 4632 mt 5490  390 L
+5490  390 mt 5490  390 L
+SO
+5588 4632 mt 5588 4606 L
+5588  390 mt 5588  415 L
+DO
+5588 4632 mt 5588  390 L
+5588  390 mt 5588  390 L
+SO
+5668 4632 mt 5668 4606 L
+5668  390 mt 5668  415 L
+DO
+5668 4632 mt 5668  390 L
+5668  390 mt 5668  390 L
+SO
+5735 4632 mt 5735 4606 L
+5735  390 mt 5735  415 L
+DO
+5735 4632 mt 5735  390 L
+5735  390 mt 5735  390 L
+SO
+5794 4632 mt 5794 4606 L
+5794  390 mt 5794  415 L
+DO
+5794 4632 mt 5794  390 L
+5794  390 mt 5794  390 L
+SO
+5845 4632 mt 5845 4606 L
+5845  390 mt 5845  415 L
+DO
+5845 4632 mt 5845  390 L
+5845  390 mt 5845  390 L
+SO
+5892 4632 mt 5892 4606 L
+5892  390 mt 5892  415 L
+DO
+5892 4632 mt 5892  390 L
+5892  390 mt 5892  390 L
+SO
+5892 4632 mt 5892 4581 L
+5892  390 mt 5892  440 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+5804 4814 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+5937 4740 mt 
+(5) s
+ 846 4632 mt  896 4632 L
+5892 4632 mt 5841 4632 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 745 4676 mt 
+(1) s
+ 846 3925 mt  896 3925 L
+5892 3925 mt 5841 3925 L
+ 745 3969 mt 
+(2) s
+ 846 3218 mt  896 3218 L
+5892 3218 mt 5841 3218 L
+ 745 3262 mt 
+(3) s
+ 846 2511 mt  896 2511 L
+5892 2511 mt 5841 2511 L
+ 745 2555 mt 
+(4) s
+ 846 1804 mt  896 1804 L
+5892 1804 mt 5841 1804 L
+ 745 1848 mt 
+(5) s
+ 846 1097 mt  896 1097 L
+5892 1097 mt 5841 1097 L
+ 745 1141 mt 
+(6) s
+ 846  390 mt  896  390 L
+5892  390 mt 5841  390 L
+ 745  434 mt 
+(7) s
+ 846 4632 mt 5892 4632 L
+ 846  390 mt 5892  390 L
+ 846 4632 mt  846  390 L
+5892 4632 mt 5892  390 L
+gs 846 390 5047 4243 rc
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+303 15 304 52 304 71 304 62 304 155 303 234 304 258 304 132 
+304 168 304 -332 303 -711 304 -995 304 -862 304 -783 303 -524 846 4632 16 MP stroke
+gr
+
+c8
+  36   36  846 4632 FO
+  36   36 1149 4108 FO
+  36   36 1453 3325 FO
+  36   36 1757 2463 FO
+  36   36 2061 1468 FO
+  36   36 2364  757 FO
+  36   36 2668  425 FO
+  36   36 2972  593 FO
+  36   36 3276  725 FO
+  36   36 3580  983 FO
+  36   36 3883 1217 FO
+  36   36 4187 1372 FO
+  36   36 4491 1434 FO
+  36   36 4795 1505 FO
+  36   36 5099 1557 FO
+  36   36 5402 1572 FO
+gs 846 390 5047 4243 rc
+/c9 { 0.000000 0.500000 0.000000 sr} bdef
+c9
+303 51 304 33 304 5 304 35 304 31 303 277 304 300 304 99 
+304 36 304 21 303 -287 304 -649 304 -578 304 -624 303 -478 846 4632 16 MP stroke
+gr
+
+c9
+0 j
+0 -58 -58 0 0 58 58 0 817 4603 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1120 4125 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1424 3501 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1728 2923 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2032 2274 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2335 1987 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2639 2008 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2943 2044 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3247 2143 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3551 2443 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3854 2720 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4158 2751 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4462 2786 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4766 2791 5 MP
+DP
+0 -58 -58 0 0 58 58 0 5070 2824 5 MP
+DP
+0 -58 -58 0 0 58 58 0 5373 2875 5 MP
+DP
+gs 846 390 5047 4243 rc
+gr
+
+0 sg
+2675 4957 mt 
+(Number of grouped points) s
+ 663 3449 mt  -90 rotate
+(GETGRAD Vectorization Speed Up) s
+90 rotate
+1663  295 mt 
+(HELIOS, 2D case \(1 periodic + 1 non-periodic\), using PPFORM) s
+ 829 4675 mt 
+( ) s
+5876  432 mt 
+( ) s
+1 sg
+0 334 1451 0 0 -334 4382 784 4 MP
+PP
+-1451 0 0 334 1451 0 0 -334 4382 784 5 MP stroke
+4 w
+DO
+SO
+6 w
+0 sg
+4382  784 mt 5833  784 L
+4382  450 mt 5833  450 L
+4382  784 mt 4382  450 L
+5833  784 mt 5833  450 L
+4382  784 mt 5833  784 L
+4382  784 mt 4382  450 L
+4382  784 mt 5833  784 L
+4382  450 mt 5833  450 L
+4382  784 mt 4382  450 L
+5833  784 mt 5833  450 L
+4840  584 mt 
+(Quadratic Splines) s
+gs 4382 450 1452 335 rc
+c8
+352 0 4452 541 2 MP stroke
+gs 4555 468 147 147 rc
+  36   36 4628  541 FO
+gr
+
+gr
+
+c8
+0 sg
+4840  735 mt 
+(Cubic Splines) s
+gs 4382 450 1452 335 rc
+c9
+352 0 4452 692 2 MP stroke
+gs 4555 619 147 147 rc
+0 -58 -58 0 0 58 58 0 4599 663 5 MP
+DP
+gr
+
+gr
+
+c9
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/docs/manual/getgrad_perf_hpcff.eps b/docs/manual/getgrad_perf_hpcff.eps
new file mode 100644
index 0000000..2760e38
--- /dev/null
+++ b/docs/manual/getgrad_perf_hpcff.eps
@@ -0,0 +1,762 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 2.6.27.56-0.1-default #1 SMP 2010-12-01 16:57:58 +0100 x86_64.
+%%Title: /home/ttran/bsplines_hlst-dev/opt_wk/getgrad_perf_hpcff.eps
+%%CreationDate: 03/27/2012  13:01:59
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:    52   222   543   619
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:    52   222   543   619
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode 0624 7428 csm
+
+    0     0  5895  4763 rc
+85 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 5896 4764 rf
+6 w
+0 3882 4569 0 0 -3882 766 4239 4 MP
+PP
+-4569 0 0 3882 4569 0 0 -3882 766 4239 5 MP stroke
+4 w
+DO
+0 sg
+ 766 4239 mt  766  357 L
+ 766  357 mt  766  357 L
+1679 4239 mt 1679  357 L
+1679  357 mt 1679  357 L
+2593 4239 mt 2593  357 L
+2593  357 mt 2593  357 L
+3507 4239 mt 3507  357 L
+3507  357 mt 3507  357 L
+4421 4239 mt 4421  357 L
+4421  357 mt 4421  357 L
+5335 4239 mt 5335  357 L
+5335  357 mt 5335  357 L
+ 766 4239 mt 5335 4239 L
+5335 4239 mt 5335 4239 L
+ 766 3753 mt 5335 3753 L
+5335 3753 mt 5335 3753 L
+ 766 3268 mt 5335 3268 L
+5335 3268 mt 5335 3268 L
+ 766 2783 mt 5335 2783 L
+5335 2783 mt 5335 2783 L
+ 766 2298 mt 5335 2298 L
+5335 2298 mt 5335 2298 L
+ 766 1812 mt 5335 1812 L
+5335 1812 mt 5335 1812 L
+ 766 1327 mt 5335 1327 L
+5335 1327 mt 5335 1327 L
+ 766  842 mt 5335  842 L
+5335  842 mt 5335  842 L
+ 766  357 mt 5335  357 L
+5335  357 mt 5335  357 L
+SO
+6 w
+ 766 4239 mt 5335 4239 L
+ 766  357 mt 5335  357 L
+ 766 4239 mt  766  357 L
+5335 4239 mt 5335  357 L
+ 766 4239 mt 5335 4239 L
+ 766 4239 mt  766  357 L
+ 766 4239 mt  766 4216 L
+ 766  357 mt  766  379 L
+DO
+ 766 4239 mt  766  357 L
+ 766  357 mt  766  357 L
+SO
+ 766 4239 mt  766 4193 L
+ 766  357 mt  766  402 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 678 4421 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+ 811 4347 mt 
+(0) s
+1041 4239 mt 1041 4216 L
+1041  357 mt 1041  379 L
+DO
+1041 4239 mt 1041  357 L
+1041  357 mt 1041  357 L
+SO
+1201 4239 mt 1201 4216 L
+1201  357 mt 1201  379 L
+DO
+1201 4239 mt 1201  357 L
+1201  357 mt 1201  357 L
+SO
+1316 4239 mt 1316 4216 L
+1316  357 mt 1316  379 L
+DO
+1316 4239 mt 1316  357 L
+1316  357 mt 1316  357 L
+SO
+1404 4239 mt 1404 4216 L
+1404  357 mt 1404  379 L
+DO
+1404 4239 mt 1404  357 L
+1404  357 mt 1404  357 L
+SO
+1477 4239 mt 1477 4216 L
+1477  357 mt 1477  379 L
+DO
+1477 4239 mt 1477  357 L
+1477  357 mt 1477  357 L
+SO
+1538 4239 mt 1538 4216 L
+1538  357 mt 1538  379 L
+DO
+1538 4239 mt 1538  357 L
+1538  357 mt 1538  357 L
+SO
+1591 4239 mt 1591 4216 L
+1591  357 mt 1591  379 L
+DO
+1591 4239 mt 1591  357 L
+1591  357 mt 1591  357 L
+SO
+1637 4239 mt 1637 4216 L
+1637  357 mt 1637  379 L
+DO
+1637 4239 mt 1637  357 L
+1637  357 mt 1637  357 L
+SO
+1679 4239 mt 1679 4216 L
+1679  357 mt 1679  379 L
+DO
+1679 4239 mt 1679  357 L
+1679  357 mt 1679  357 L
+SO
+1679 4239 mt 1679 4193 L
+1679  357 mt 1679  402 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+1591 4421 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+1724 4347 mt 
+(1) s
+1954 4239 mt 1954 4216 L
+1954  357 mt 1954  379 L
+DO
+1954 4239 mt 1954  357 L
+1954  357 mt 1954  357 L
+SO
+2115 4239 mt 2115 4216 L
+2115  357 mt 2115  379 L
+DO
+2115 4239 mt 2115  357 L
+2115  357 mt 2115  357 L
+SO
+2229 4239 mt 2229 4216 L
+2229  357 mt 2229  379 L
+DO
+2229 4239 mt 2229  357 L
+2229  357 mt 2229  357 L
+SO
+2318 4239 mt 2318 4216 L
+2318  357 mt 2318  379 L
+DO
+2318 4239 mt 2318  357 L
+2318  357 mt 2318  357 L
+SO
+2390 4239 mt 2390 4216 L
+2390  357 mt 2390  379 L
+DO
+2390 4239 mt 2390  357 L
+2390  357 mt 2390  357 L
+SO
+2452 4239 mt 2452 4216 L
+2452  357 mt 2452  379 L
+DO
+2452 4239 mt 2452  357 L
+2452  357 mt 2452  357 L
+SO
+2505 4239 mt 2505 4216 L
+2505  357 mt 2505  379 L
+DO
+2505 4239 mt 2505  357 L
+2505  357 mt 2505  357 L
+SO
+2551 4239 mt 2551 4216 L
+2551  357 mt 2551  379 L
+DO
+2551 4239 mt 2551  357 L
+2551  357 mt 2551  357 L
+SO
+2593 4239 mt 2593 4216 L
+2593  357 mt 2593  379 L
+DO
+2593 4239 mt 2593  357 L
+2593  357 mt 2593  357 L
+SO
+2593 4239 mt 2593 4193 L
+2593  357 mt 2593  402 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+2505 4421 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+2638 4347 mt 
+(2) s
+2868 4239 mt 2868 4216 L
+2868  357 mt 2868  379 L
+DO
+2868 4239 mt 2868  357 L
+2868  357 mt 2868  357 L
+SO
+3029 4239 mt 3029 4216 L
+3029  357 mt 3029  379 L
+DO
+3029 4239 mt 3029  357 L
+3029  357 mt 3029  357 L
+SO
+3143 4239 mt 3143 4216 L
+3143  357 mt 3143  379 L
+DO
+3143 4239 mt 3143  357 L
+3143  357 mt 3143  357 L
+SO
+3232 4239 mt 3232 4216 L
+3232  357 mt 3232  379 L
+DO
+3232 4239 mt 3232  357 L
+3232  357 mt 3232  357 L
+SO
+3304 4239 mt 3304 4216 L
+3304  357 mt 3304  379 L
+DO
+3304 4239 mt 3304  357 L
+3304  357 mt 3304  357 L
+SO
+3365 4239 mt 3365 4216 L
+3365  357 mt 3365  379 L
+DO
+3365 4239 mt 3365  357 L
+3365  357 mt 3365  357 L
+SO
+3418 4239 mt 3418 4216 L
+3418  357 mt 3418  379 L
+DO
+3418 4239 mt 3418  357 L
+3418  357 mt 3418  357 L
+SO
+3465 4239 mt 3465 4216 L
+3465  357 mt 3465  379 L
+DO
+3465 4239 mt 3465  357 L
+3465  357 mt 3465  357 L
+SO
+3507 4239 mt 3507 4216 L
+3507  357 mt 3507  379 L
+DO
+3507 4239 mt 3507  357 L
+3507  357 mt 3507  357 L
+SO
+3507 4239 mt 3507 4193 L
+3507  357 mt 3507  402 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+3419 4421 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+3552 4347 mt 
+(3) s
+3782 4239 mt 3782 4216 L
+3782  357 mt 3782  379 L
+DO
+3782 4239 mt 3782  357 L
+3782  357 mt 3782  357 L
+SO
+3943 4239 mt 3943 4216 L
+3943  357 mt 3943  379 L
+DO
+3943 4239 mt 3943  357 L
+3943  357 mt 3943  357 L
+SO
+4057 4239 mt 4057 4216 L
+4057  357 mt 4057  379 L
+DO
+4057 4239 mt 4057  357 L
+4057  357 mt 4057  357 L
+SO
+4146 4239 mt 4146 4216 L
+4146  357 mt 4146  379 L
+DO
+4146 4239 mt 4146  357 L
+4146  357 mt 4146  357 L
+SO
+4218 4239 mt 4218 4216 L
+4218  357 mt 4218  379 L
+DO
+4218 4239 mt 4218  357 L
+4218  357 mt 4218  357 L
+SO
+4279 4239 mt 4279 4216 L
+4279  357 mt 4279  379 L
+DO
+4279 4239 mt 4279  357 L
+4279  357 mt 4279  357 L
+SO
+4332 4239 mt 4332 4216 L
+4332  357 mt 4332  379 L
+DO
+4332 4239 mt 4332  357 L
+4332  357 mt 4332  357 L
+SO
+4379 4239 mt 4379 4216 L
+4379  357 mt 4379  379 L
+DO
+4379 4239 mt 4379  357 L
+4379  357 mt 4379  357 L
+SO
+4421 4239 mt 4421 4216 L
+4421  357 mt 4421  379 L
+DO
+4421 4239 mt 4421  357 L
+4421  357 mt 4421  357 L
+SO
+4421 4239 mt 4421 4193 L
+4421  357 mt 4421  402 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+4333 4421 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+4466 4347 mt 
+(4) s
+4696 4239 mt 4696 4216 L
+4696  357 mt 4696  379 L
+DO
+4696 4239 mt 4696  357 L
+4696  357 mt 4696  357 L
+SO
+4857 4239 mt 4857 4216 L
+4857  357 mt 4857  379 L
+DO
+4857 4239 mt 4857  357 L
+4857  357 mt 4857  357 L
+SO
+4971 4239 mt 4971 4216 L
+4971  357 mt 4971  379 L
+DO
+4971 4239 mt 4971  357 L
+4971  357 mt 4971  357 L
+SO
+5059 4239 mt 5059 4216 L
+5059  357 mt 5059  379 L
+DO
+5059 4239 mt 5059  357 L
+5059  357 mt 5059  357 L
+SO
+5132 4239 mt 5132 4216 L
+5132  357 mt 5132  379 L
+DO
+5132 4239 mt 5132  357 L
+5132  357 mt 5132  357 L
+SO
+5193 4239 mt 5193 4216 L
+5193  357 mt 5193  379 L
+DO
+5193 4239 mt 5193  357 L
+5193  357 mt 5193  357 L
+SO
+5246 4239 mt 5246 4216 L
+5246  357 mt 5246  379 L
+DO
+5246 4239 mt 5246  357 L
+5246  357 mt 5246  357 L
+SO
+5293 4239 mt 5293 4216 L
+5293  357 mt 5293  379 L
+DO
+5293 4239 mt 5293  357 L
+5293  357 mt 5293  357 L
+SO
+5335 4239 mt 5335 4216 L
+5335  357 mt 5335  379 L
+DO
+5335 4239 mt 5335  357 L
+5335  357 mt 5335  357 L
+SO
+5335 4239 mt 5335 4193 L
+5335  357 mt 5335  402 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+5247 4421 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 80 FMSR
+
+5380 4347 mt 
+(5) s
+ 766 4239 mt  811 4239 L
+5335 4239 mt 5289 4239 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 665 4283 mt 
+(1) s
+ 766 3753 mt  811 3753 L
+5335 3753 mt 5289 3753 L
+ 565 3797 mt 
+(1.5) s
+ 766 3268 mt  811 3268 L
+5335 3268 mt 5289 3268 L
+ 665 3312 mt 
+(2) s
+ 766 2783 mt  811 2783 L
+5335 2783 mt 5289 2783 L
+ 565 2827 mt 
+(2.5) s
+ 766 2298 mt  811 2298 L
+5335 2298 mt 5289 2298 L
+ 665 2342 mt 
+(3) s
+ 766 1812 mt  811 1812 L
+5335 1812 mt 5289 1812 L
+ 565 1856 mt 
+(3.5) s
+ 766 1327 mt  811 1327 L
+5335 1327 mt 5289 1327 L
+ 665 1371 mt 
+(4) s
+ 766  842 mt  811  842 L
+5335  842 mt 5289  842 L
+ 565  886 mt 
+(4.5) s
+ 766  357 mt  811  357 L
+5335  357 mt 5289  357 L
+ 665  401 mt 
+(5) s
+ 766 4239 mt 5335 4239 L
+ 766  357 mt 5335  357 L
+ 766 4239 mt  766  357 L
+5335 4239 mt 5335  357 L
+gs 766 357 4570 3883 rc
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+275 28 275 4 276 77 275 16 275 135 275 233 275 96 275 23 
+275 275 275 -61 275 -469 275 -704 275 -955 275 -806 275 -702 766 4239 16 MP stroke
+gr
+
+c8
+  36   36  766 4239 FO
+  36   36 1041 3537 FO
+  36   36 1316 2731 FO
+  36   36 1591 1776 FO
+  36   36 1866 1072 FO
+  36   36 2141  603 FO
+  36   36 2416  542 FO
+  36   36 2691  817 FO
+  36   36 2966  840 FO
+  36   36 3241  936 FO
+  36   36 3516 1169 FO
+  36   36 3791 1304 FO
+  36   36 4066 1320 FO
+  36   36 4342 1397 FO
+  36   36 4617 1401 FO
+  36   36 4892 1429 FO
+gs 766 357 4570 3883 rc
+/c9 { 0.000000 0.500000 0.000000 sr} bdef
+c9
+275 71 275 22 276 -1 275 37 275 29 275 243 275 331 275 13 
+275 101 275 111 275 -188 275 -440 275 -700 275 -680 275 -590 766 4239 16 MP stroke
+gr
+
+c9
+0 j
+0 -58 -58 0 0 58 58 0 737 4210 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1012 3620 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1287 2940 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1562 2240 5 MP
+DP
+0 -58 -58 0 0 58 58 0 1837 1800 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2112 1612 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2387 1723 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2662 1824 5 MP
+DP
+0 -58 -58 0 0 58 58 0 2937 1837 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3212 2168 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3487 2411 5 MP
+DP
+0 -58 -58 0 0 58 58 0 3762 2440 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4037 2477 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4313 2476 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4588 2498 5 MP
+DP
+0 -58 -58 0 0 58 58 0 4863 2569 5 MP
+DP
+gs 766 357 4570 3883 rc
+gr
+
+0 sg
+2356 4564 mt 
+(Number of grouped points) s
+ 483 3236 mt  -90 rotate
+(GETGRAD Vectorization Speed Up) s
+90 rotate
+1373  262 mt 
+(HPCFF, 2D case \(1 periodic + 1 non-periodic\), using PPFORM) s
+ 749 4282 mt 
+( ) s
+5319  399 mt 
+( ) s
+1 sg
+0 334 1451 0 0 -334 3825 751 4 MP
+PP
+-1451 0 0 334 1451 0 0 -334 3825 751 5 MP stroke
+4 w
+DO
+SO
+6 w
+0 sg
+3825  751 mt 5276  751 L
+3825  417 mt 5276  417 L
+3825  751 mt 3825  417 L
+5276  751 mt 5276  417 L
+3825  751 mt 5276  751 L
+3825  751 mt 3825  417 L
+3825  751 mt 5276  751 L
+3825  417 mt 5276  417 L
+3825  751 mt 3825  417 L
+5276  751 mt 5276  417 L
+4283  551 mt 
+(Quadratic Splines) s
+gs 3825 417 1452 335 rc
+c8
+352 0 3895 508 2 MP stroke
+gs 3998 435 147 147 rc
+  36   36 4071  508 FO
+gr
+
+gr
+
+c8
+0 sg
+4283  702 mt 
+(Cubic Splines) s
+gs 3825 417 1452 335 rc
+c9
+352 0 3895 659 2 MP stroke
+gs 3998 586 147 147 rc
+0 -58 -58 0 0 58 58 0 4042 630 5 MP
+DP
+gr
+
+gr
+
+c9
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/docs/manual/neuman.tex b/docs/manual/neuman.tex
new file mode 100644
index 0000000..34a8550
--- /dev/null
+++ b/docs/manual/neuman.tex
@@ -0,0 +1,141 @@
+%
+% @file neuman.tex
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+\documentclass[a4paper]{article}
+\usepackage{amsmath}
+\title{\tt Some Notes on Boundary Conditions}
+\author{Trach-Minh Tran}
+\date{March 2012}
+
+\begin{document}
+\maketitle
+
+\section{Neumann BC as an {essential} BC}
+The original equation:
+\begin{equation}
+  \mathbf{A \cdot u} = \mathbf{b}
+\end{equation}
+with the Neumann BC (1D case):
+\begin{equation}
+  \alpha u_1 + \beta u_2 = c.
+\end{equation}
+From Eq.(20) of \cite{BSPLINES}:
+\begin{equation}
+  \beta = -\alpha =\frac{p}{\Delta_1}
+\end{equation}
+where $p$ is the degree of spline and $\Delta_1$ is the lenght of the
+first insterval.
+
+Transformation $(u_1, \ldots, u_n) \Rightarrow (\hat u_1, \ldots, \hat
+u_n)$ defined by
+\begin{equation}
+  \begin{array}{ccc}
+     \alpha u_1 + \beta u_2 = \hat u_1  &  &   u_1 = \frac{1}{\alpha}\hat u_1 - \frac{\beta}{\alpha}\hat u_2   \\
+     u_2 = \hat u_2  &  &   u_2 = \hat u_2   \\
+     \vdots        & \Longrightarrow & \vdots               \\
+     u_N = \hat u_N      &  &   u_N = \hat u_N.
+    \end{array}
+\end{equation}
+The original Neumann BC becomes now a \emph{inhomogeneous Dirichlet}
+BC on $\mathbf{\hat u}$:
+\begin{equation}
+  \hat u_1 = c.
+\end{equation}
+The transformed linear system can be written as:
+\begin{equation}
+  \mathbf{(U^T\cdot A \cdot U)\cdot  \hat u} = \mathbf{U^T\cdot  b},
+\end{equation}
+where $\mathbf{U}$ is given by
+
+\begin{equation}
+  \mathbf{U} =
+     \left(\begin{matrix}
+        \frac{1}{\alpha} & -\frac{\beta}{\alpha} & \dots & 0 \\
+        0 & 1 & \dots & 0 \\
+          &   & \ddots& \vdots \\
+        0 & 0 & \dots & 1
+     \end{matrix}\right)
+\end{equation}
+
+Thus, all the symmetry, hermiticity or positivity properties of the
+original matrix are preserved with this matrix transformation!
+
+\section{Neumann BC as a \emph{natural} BC}
+Multiplying the 1D Sturm-Liouville equation (see section 1.1.1 of
+\cite{SOLVERS}) by spline $\Lambda_j(x)$ and integrating by parts, we
+obtain the following boundary terms:
+
+\begin{equation}
+-\Lambda_j(L) C_1(L) \phi'(L)  + \Lambda_j(0) C_1(0) \phi'(0)
+\end{equation}
+To impose $\phi'(0) = a$ and noting that $\Lambda_j(0)=\delta_{j1}$,
+you only need to add $[-aC_1(0)]$ to the first element of the
+RHS. Likewise, for the BC $\phi'(L) = b$ you only need to add
+$[bC_1(L)]$ to the last element of the RHS. No matrix manipulation (as
+for the \emph{essential} BC) is required! Notice that if $a$ or $b$ is
+zero, nothing needs to be done to impose these BC. That's the reason why
+it is called \emph{natural} BC!
+
+A subtle point to be noted here is that using \emph{natural} BC,
+$\phi'(0)$ \emph{is not} exaclty equal to $a$, althought it should
+converge to $a$ as $(\Delta x)^p$ where $p$ is the spline degree,
+while using the \emph{essential} BC, $\phi'(0)=a$ is \emph{exact}!
+
+\section{Diffusion Equation using second order time implicit method}
+Let rewrite Eq.(74) of your notes in vector form and replace the
+unkowns $n$ by $f$:
+\begin{equation}
+  \mathbf{B} \frac{d \mathbf{f}}{dt} = \mathbf{M\cdot f}.
+\end{equation}
+Using a \emph{second order time centered} discretization,
+
+\begin{equation}
+  \begin{split}
+  \mathbf{B} \left(\frac{\mathbf{f}^{n+1}-\mathbf{f}^{n}}{\Delta t}\right) &=
+  \mathbf{M} \left(\frac{\mathbf{f}^{n+1}+\mathbf{f}^{n}}{2}\right) \\
+\Rightarrow &
+ \left(\mathbf{B} -\frac{\Delta t}{2}
+   \mathbf{M}\right)\mathbf{f}^{n+1} =
+ \left(\mathbf{B} +\frac{\Delta t}{2}
+   \mathbf{M}\right)\mathbf{f}^{n}
+      \end{split}
+\end{equation}
+\emph{Essential} BC has to be imposed on the matrix
+\begin{equation}
+  \mathbf{B} -\frac{\Delta t}{2} \mathbf{M}
+\end{equation}
+while \emph{natural} BC is introduced while deriving the weak form
+leading to the matrix $M$.
+This method is \emph{unconditionnaly stable} and second order in
+time. When linear splines are used for the space discretization, this
+scheme is similar to the well-known \emph{Cranck-Nicolson} (see for
+example Wikipedia) discretization for parabolic PDE.
+
+\begin{thebibliography}{99}
+\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide.
+\bibitem{SOLVERS} {\tt The SOLVERS in BSPLINES} Reference Guide.
+\end{thebibliography}
+
+\end{document}
diff --git a/docs/manual/solvers.pdf b/docs/manual/solvers.pdf
new file mode 100644
index 0000000..72a28d1
Binary files /dev/null and b/docs/manual/solvers.pdf differ
diff --git a/docs/manual/solvers.tex b/docs/manual/solvers.tex
new file mode 100644
index 0000000..8abdf13
--- /dev/null
+++ b/docs/manual/solvers.tex
@@ -0,0 +1,2818 @@
+%
+% @file solvers.tex
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+\documentclass[a4paper]{article}
+\usepackage{linuxdoc-sgml}
+\usepackage{graphicx}
+\usepackage{hyperref}
+\usepackage{amsmath}
+%\usepackage{verbatim}
+%\usepackage[notref]{showkeys}
+
+\title{\tt The Solvers in BSPLINES}
+\author{Trach-Minh Tran}
+\date{v0.6, December 2011}
+\abstract{Implementation of a common simple interface to popular
+solver packages (LAPACK, PARDISO, WSMP, PETSc, etc.). The main goal is to
+provide an easy access to these packages in order to solve elliptic and
+parabolic as well as some types of integro-differential
+equations.}
+
+\begin{document}
+\maketitle
+\tableofcontents
+
+\section{Matrix form of the problem}
+\subsection{Getting started}
+\subsubsection{A one-dimensional problem}
+
+Let us start with the one-dimensional Sturm-Liouville differential
+equation:
+
+\begin{equation*}
+  -\frac{d}{dx} \left[C_1(x) \frac{d\phi}{dx}\right] + C_2(x)\phi  = \rho,
+\end{equation*}
+on the domain $0\leq x \leq L$ with suitable boundary conditions.
+On a grid with $N$ intervals, the discretized solution $\phi$, using
+the splines $\Lambda_i(x)$ of order $p$ can be written as
+\begin{equation}
+\label{sol1d}
+  \phi(x) = \sum_{i=0}^{d-1} \phi_i\Lambda_i(x),
+\end{equation}
+where \cite{BSPLINES}
+\begin{equation*}
+  d =
+  \begin{cases}
+    N   & \text{if $\phi$ is periodic}, \\
+    N+p &  \text{otherwise},
+  \end{cases}
+\end{equation*}
+and $\phi_i$ are the unknowns of the following matrix equation:
+\begin{equation}
+\label{matEq1d}
+  \sum_{i'=0}^{d-1} A_{ii'}\phi_{i'} = \rho_i, \qquad i=0,\ldots, d-1.
+\end{equation}
+Here the matrix $A_{ii'}$ and the right-hand-side $\rho_i$ are
+respectively given by:
+\begin{equation}
+  \begin{split}
+  \label{matCoef1d}
+  A_{ii'} =& \int_{0}^{L}\!dx\,C_1\Lambda_{i}^{'}\Lambda_{i'}^{'}
+           + \int_{0}^{L}\!dx\,C_2\Lambda_{i}\Lambda_{i'}, \\
+  \rho_i =& \int_{0}^{L}dx \rho\Lambda_i.\\
+  \end{split}
+\end{equation}
+For more general differential operators, the matrix coefficients $A_{ii'}$ can be
+written as a sum of contributing matrices of the form
+\begin{equation}
+  \label{mat1d}
+  A_{ii'} = \int_{0}^{L}\!dx\,C\Lambda_{i}^{\alpha}\Lambda_{i'}^{\alpha'},
+\end{equation}
+where $\Lambda_{i}^{\alpha}$ denotes the $\alpha^\text{th}$
+derivative of $\Lambda_{i}$.
+As the splines $\Lambda_i$ have a support of $p+1$ intervals, the
+matrix is sparse and its
+coefficients $A_{ii'}$ are non-zero only for $|i-i'| \leq p$: hence the
+matrix has a band structure of bandwidth equal to $2p+1$
+if the operator is purely differential. For an
+integral equation such as
+\begin{equation*}
+  \int_{0}^{L}\!dx' K(x,x')\phi(x') = \rho(x),
+\end{equation*}
+the discretization results in a \emph{dense} matrix of the form
+\begin{equation}
+  \label{matIntg1d}
+  A_{ii'} = \int_{0}^{L}\!dx \Lambda_{i}(x) \int_{0}^{L}\!dx'
+  K(x,x')\Lambda_{i'}(x').
+\end{equation}
+Note that when the kernel is separable $K(x,x') = U(x)V(x')$, the
+matrix $A_{ii'}$  is a \emph{dyadic}:
+\begin{equation}
+  A_{ii'} = \int_{0}^{L}\!dx U(x) \Lambda_{i}(x) \int_{0}^{L}\!dx
+  V(x)\Lambda_{i'}(x) = U_iV_{i'}.
+\end{equation}
+
+\subsubsection{Periodic boundary conditions}
+The splines $\Lambda_i$ are $N$-periodic ($\Lambda_{i+N}(x)=\Lambda_i(x-L)$). This
+property can be easily enforced while constructing both $\rho_i$
+and the matrix $A_{ii'}$.  This results in a solution
+$\phi_i$ which is also $N$-periodic.
+
+\subsubsection{Non-periodic boundary conditions}
+In {\tt BSPLINES} \cite{BSPLINES}, the constructed non-periodic splines are such that at the
+boundaries $x=0$ and $x=L$:
+\begin{equation}
+  \Lambda_i(0) = \delta_{i,0}, \qquad \Lambda_i(L) = \delta_{i,N+p-1},
+\end{equation}
+which imply that, using (\ref{sol1d})
+\begin{equation}
+  \phi(0) = \phi_0, \qquad \phi(L) =  \phi_{N+p-1}.
+\end{equation}
+It is thus possible to impose the Dirichlet boundary conditions
+by a simple modification of the matrix $A_{ii'}$ as shown in
+Appendix~\ref{DirichletCond}.
+
+
+\subsection{Problems in more dimensions}
+\subsubsection{Two-dimensional equations}
+The results obtained above can be extended in a
+straightforward manner. Assuming, for example a
+\emph{polar like} $(r,\theta)$ coordinate system,
+with the discretized solution and the right-hand side
+written as:
+\begin{equation}
+\label{discreteEq2d}
+  \begin{split}
+    \phi(r,\theta) &= \sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1}
+    \phi_{ij}\Lambda_i(r) \Lambda_j(\theta) \\
+    \rho_{ij} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta J(r,\theta) \rho(r,\theta)
+    \Lambda_i(r) \Lambda_j(\theta),\\
+  \end{split}
+\end{equation}
+where $J(r,\theta)$ is the Jacobian, the matrix equation to solve is
+\begin{equation}
+    \sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}
+    A_{iji'j'}\phi_{i'j'} = \rho_{ij},
+\end{equation}
+with the matrix $A_{iji'j'}$ expressed as a sum of matrices of the
+form:
+\begin{equation}
+  \label{mat2d}
+  A_{iji'j'} = \int_{0}^{R}\!dr  \int_{0}^{2\pi}\!d\theta\,
+  C(r,\theta)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\,
+  \Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta).
+\end{equation}
+
+\subsubsection{Three-dimensional equations}
+Likewise, for the three-dimension case, assuming for example a
+\emph{toroidal like} $(r,\theta, \varphi)$ coordinate system,
+with the discretized solution and the right-hand side
+written as:
+\begin{equation}
+\label{discreteEq3d}
+  \begin{split}
+    \phi(r,\theta, \varphi) &=
+    \sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1}
+    \sum_{k=0}^{N_\varphi-1}
+    \phi_{ijk}\Lambda_i(r) \Lambda_j(\theta)  \Lambda_k(\varphi) \\
+    \rho_{ijk} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta
+    \int_{0}^{2\pi} \!d\varphi J(r,\theta,\varphi) \rho(r,\theta,\varphi)
+    \Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi),\\
+  \end{split}
+\end{equation}
+where $J(r,\theta,\varphi)$ is the Jacobian, the matrix equation to solve is
+\begin{equation}
+    \sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}\sum_{k'=0}^{N_\varphi-1}
+    A_{ijki'j'k'}\phi_{i'j'k'} = \rho_{ijk},
+\end{equation}
+with the matrix $A_{ijki'j'k'}$ expressed as a sum of matrices of the
+form:
+\begin{equation}
+  \label{mat3d}
+  A_{ijki'j'k'} = \int_{0}^{R}\!dr
+  \int_{0}^{2\pi}\!d\theta\  \int_{0}^{2\pi}\!d\varphi\,
+  C(r,\theta,\varphi)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\,
+  \Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta)
+  \Lambda_{k}^{\gamma}(\varphi)\Lambda_{k'}^{\gamma'}(\varphi).
+\end{equation}
+
+\subsubsection{Unicity condition}
+In the case of the polar coordinates $(r,\theta)$ considered above,
+the unicity condition on the axis $r=0$ should be imposed. It can be
+enforced by modifications of the matrix $A$ as described in
+Appendix~\ref{unicityCond}.
+
+\subsubsection{One-dimensional numbering}
+For two-dimensional (three-dimensional) problems, the solution
+$\phi_{ij}$ ($\phi_{ijk}$) as well as the right-hand-side $\rho_{ij}$
+($\rho_{ijk}$) can be conveniently casted into one-dimensional
+arrays. As an example, by numbering first the last index, we obtain the
+following mappings:
+\begin{equation}
+\label{map1d}
+  \mu  =
+  \begin{cases}
+     j + iN_\theta  & \text{two-dimensional case} \\
+     k + (j + iN_\theta)N_\varphi & \text{three-dimensional case} \\
+  \end{cases}
+\end{equation}
+Using such a one-dimensional numbering, the matrix equation for the two and
+three dimensional cases takes a more conventional form:
+\begin{equation}
+  \sum_{\mu'=0}^{r-1} A_{\mu\mu'} \phi_{\mu'} = \rho_\mu,
+\end{equation}
+with the respective matrix ranks $r=(N_r+p_r)N_\theta$ and
+$r=(N_r+p_r)N_\theta N_\varphi$. For a pure differential operator, the
+matrix $A_{\mu\mu'}$ has a band structure of bandwidth
+$b=2(p_r+1)N_\theta-1$ and $b=2(p_r+1)N_\theta N_\varphi-1$ respectively. It is
+important to note that, except for the one-dimensional problem, there
+are \emph{many} zeros inside the matrix band!
+
+\section{The module {\tt MATRIX}}
+\subsection{Interface}
+
+The Fortran module {\tt MATRIX} contains easy-to-use routines to solve the
+matrix equation formulated in the previous section, using the direct
+solvers of LAPACK. The different matrix storage formats are defined,
+using the Fortran derived datatypes. The different types in the
+present version are listed in Table~\ref{matTypes}.
+
+\begin{table}[h]
+  \centering
+  \begin{tabular}{|l|l|}\hline
+    {\tt gemat} & General dense matrix \\
+    {\tt gbmat} & General band matrix \\
+    {\tt pbmat} & Symmetric positive-definite band matrix \\
+    {\tt periodic\_mat} & Matrix obtained for example in \\
+                      & one-dimensional periodic problems\\
+    \hline
+  \end{tabular}
+  \caption{The matrix types}
+  \label{matTypes}
+\end{table}
+
+These types define {\tt DOUBLE PRECISION} real matrices.
+{\tt DOUBLE COMPLEX} matrices are declared by prefixing
+these types with the letter ``{\tt z}'', for example {\tt zgbmat}.
+Note that {\tt zpbmat} defines a \emph{hermitian} positive-definite
+complex matrix.
+The \emph{generic} routines are defined for each of these types in Table~\ref{matRoutines}.
+Note that the routine {\tt updtmat} is  mainly used for the matrix assembly
+  while {\tt getxxx} and {\tt putxxx} are rather used to modify the
+  matrix, for example to impose boundary conditions.
+
+\begin{table}[h]
+\centering
+\begin{tabular}{|l|l|}
+  \hline
+  {\tt init}      & Initializes the data structure \\
+  {\tt destroy}   & Free the data structure memory \\
+  \hline
+  {\tt updtmat}   & Accumulates a value to the element $A_{ij}$ \\
+  {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
+                               row $i$, column $j$ \\
+  {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
+                                 row $i$, column $j$ \\
+  {\tt vmx}       & Returns the matrix-vector product \\
+  {\tt mcopy}     & Copy a matrix to another matrix \\
+  {\tt maddto}    & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
+  {\tt determinant} & Returns the matrix determinant \\
+  \hline
+  {\tt factor}    & Computes the LU (Cholesky for symmetric/hermitian \\
+                  & matrix) factorization \\
+  {\tt bsolve}    & Solves the linear system using the factorized matrix \\
+    \hline
+\end{tabular}
+  \caption{The generic routines in the {\tt MATRIX} module}
+  \label{matRoutines}
+\end{table}
+The complete description of each routine is given in
+Appendix~\ref{matRef}. More information on how to use it can be obtained by
+{\tt greping} its name on the examples found in the {\tt examples/} directory.
+
+\subsection{A two-dimensional example}
+\label{twodEx}
+Let's consider the Poisson equation using the cylindrical coordinates
+$(r,\theta)$:
+\begin{equation}
+  -\frac{1}{r}\frac{\partial}{\partial r}
+   \left[r\frac{\partial\phi}{\partial r}\right]
+  -\frac{1}{r^2}\frac{\partial^2\phi}{\partial\theta^2}  = \rho,
+  \qquad \phi(r=1,\theta) = 0.
+\end{equation}
+
+Assuming the exact solution
+\begin{equation*}
+  \phi(r,\theta) = (1-r^2)r^m\cos m\theta,
+\end{equation*}
+the right-hand-side becomes
+\begin{equation*}
+  \rho=4(m+1)r^{m}\cos m\theta.
+\end{equation*}
+The matrix and the right hand-side of the discretized problem are computed as
+
+\begin{equation}
+  \begin{split}
+  A_{iji'j'} &= \int_{0}^1\!dr  \int_{0}^{2\pi}\!d\theta\,\left[
+  r\,\Lambda'_{i}(r)\Lambda'_{i'}(r)\,
+  \Lambda_{j}(\theta)\Lambda_{j'}(\theta) +
+   \frac{1}{r}\,\Lambda_{i}(r)\Lambda_{i'}(r)\,
+  \Lambda'_{j}(\theta)\Lambda'_{j'}(\theta) \right] \\
+  \rho_{ij} &= \int_{0}^1\!dr \int_{0}^{2\pi} \!d\theta\,\,r \rho(r,\theta)
+    \,\,\Lambda_i(r) \Lambda_j(\theta).
+  \end{split}
+\end{equation}
+In the example  {\tt pde2d.f90} this problem is treated in detail.
+In the following, only the calls to the {\tt MATRIX} routines
+are reviewed to show how the matrix problem is solved using the {\tt
+  MATRIX} module.
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+!
+!  Declare a General Band matrix
+!
+  USE matrix
+  USE conmat_mod
+  TYPE(gbmat) :: mat
+!
+!  Rank and bandwidth. nidbas(1) is the spline order in
+!  the first dimension r.
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
+  ku = kl                      ! Number of super-diagnonals
+  nterms = 2                   ! Number of terms in the weak form
+!
+!  Initialize the matrix data structure
+!
+  CALL init(kl, ku, nrank, nterms, mat)
+!
+!  Construct the matrix, using 2D spline splxy
+!  and impose boundary conditions
+!
+  CALL conmat(splxy, mat, coefeq_poisson)
+  CALL ibcmat(mat, ny)
+!
+!  Compute the RHS, using the 2D spline splxy
+!  and impose boundary conditions
+!
+  CALL disrhs(mbess, splxy, rhs)
+  CALL ibcrhs(rhs, ny)
+!
+!   Factor the matrix and solve
+!
+  CALL factor(mat)
+  CALL bsolve(mat, rhs, sol)
+!
+...
+CONTAINS
+  SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+    c(1) = x
+    idt(1,1) = 1; idt(1,2) = 0
+    idw(1,1) = 1; idw(1,2) = 0
+!
+    c(2) = 1.d0/x
+    idt(2,1) = 0;  idt(2,2) = 1
+    idw(2,1) = 0;  idw(2,2) = 1
+  END SUBROUTINE coefeq_poisson
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+
+Some explanations and remarks:
+\begin{itemize}
+\item The matrix construction is performed by {\tt conmat} which will
+  be described later in section \ref{secCONMAT}. The
+  \emph{weak form} is defined in the \emph{internal}
+  procedure {\tt coefeq\_poisson} and passed as an argument to
+   {\tt  conmat}. See section \ref{secCONMAT} for a detailed
+   description of the variables {\tt c, idt, idw} returned by
+   {\tt coefeq\_poisson}.
+\item Boundary conditions are imposed by modifications
+  of the matrix in subroutine {\tt ibcmat} (see file {\tt the
+  ibcmat.f90}), using the {\tt MATRIX}
+  routines {\tt getrow, putrow, getcol, putcol}.
+\item The construction of the right-hand-side in {\tt disrhs} (see the
+  file {\tt disrhs.f90}) is computed using a Gauss quadrature..
+\item Using the {\tt pbmat} type instead of {\tt gbmat} (the matrix in
+  this example is symmetric and positive-definite!) requires only a few
+  modifications of the program (see the complete example {\tt
+    pde2d\_pb.f90}):
+  \begin{itemize}
+  \item Change the type {\tt gbmat} to {\tt pbmat} in all matrix declarations
+  \item Change the list of arguments in the routine {\tt init} to
+    ({\tt ku, nrank, nterms, mat})
+  \item Small changes in the boundary conditions ({\tt ibcmat} and {\tt
+    ibcrhs}) to take into account the symmetry.
+  \end{itemize}
+\item The module {\tt MATRIX} can be used independently of {\tt
+  BSPLINES} (which is used here only to compute the matrix and
+  right-hand-side), for example in a problem discretized using
+  Finite Differences.
+\end{itemize}
+
+\section{Sparse matrix storage}
+Using the \emph{band matrix format} for a pure differential operator
+requires to store a full bandwidth $b=2(p_r+1)N_\theta-1$ for the
+two-dimensional problem as shown in section 1, while there are only
+$(2p_r+1)^2$ non-zero elements per matrix row. In three-dimensional
+problem, it is much worse since $b=2(p_r+1)N_\theta N\varphi-1$ for
+$(2p_r+1)^3$ non-zero elements.
+
+In order to reduce the matrix storage, a solution consists of just
+storing the matrix non-zero elements and use the \emph{Sparse Direct
+Solvers}. With an optimal \emph{renumbering} strategy (or
+\emph{fill-in reducing ordering}), the
+size of the factored matrix can be expected to be smaller than the
+corresponding band matrix.
+
+An alternative is to use \emph{iterative} methods which usually need
+less memory.
+
+Such a sparse matrix is implemented in the {\tt SPARSE} module where
+each matrix row is represented by a \emph{\tt linked} list of elements
+with sorted column index. The data structure of this sparse matrix is
+wrapped up in a the Fortran data type {\tt spmat} for a real matrix and
+{\tt zspmat} for a complex matrix. Most of the generic routines which
+are already defined in the {\tt MATRIX} module are overloaded for these
+matrix types. They are listed in Table~\ref{spmatRoutines}. The complete
+documentation of these routines can be found in Appendix~\ref{spmatRef}.
+
+\begin{table}[h]
+\centering
+\begin{tabular}{|l|l|}
+  \hline
+  \emph{Matrix types} & {\tt spmat, zspmat} \\
+  \hline\hline
+  {\tt init}      & Initializes the data structure \\
+  {\tt destroy}   & Free the data structure memory \\
+  \hline
+  {\tt updtmat}   & Accumulates a value to the element $A_{ij}$ \\
+  {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
+                               row $i$, column $j$ \\
+  {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
+                                 row $i$, column $j$ \\
+  \hline
+  {\tt get\_count}   & Get the number of non-zero elements in matrix \\
+  \hline
+\end{tabular}
+  \caption{The generic routines in the {\tt SPARSE} module}
+  \label{spmatRoutines}
+\end{table}
+
+It should be noted that this module is \emph{not} used
+directly in solver problems. One usually uses instead modules which
+are specific to a type of (direct or iterative) solver. As will be
+shown in the next section, it is the routines in this solver module which
+directly calls the routines defined in the {\tt SPARSE} module during the matrix
+assembly.
+
+\section{Solvers using the module {\tt SPARSE}}
+All the solvers discussed in this section use initially the module
+{\tt SPARSE} to construct the sparse matrix. Once this construction
+procedure is complete, this matrix is converted to the (usually more
+efficient) format used by the solver.
+In a time-dependent simulation where the problem matrix
+changes but not the sparsity pattern, the subsequent matrix assembly
+will be performed directly on this solver's format.
+
+Thus for example, the first time {\tt updtmat} is called
+on a new matrix, it is the version from {\tt SPARSE}. Next, if
+{\tt  updtmat} is called again to modify the matrix, it will be the
+solver's version, unless the matrix is re-initialized by a call to
+{\tt destroy} followed by {\tt init}. This switch is completely
+transparent for the user as shown through an example in the next
+section.
+
+\subsection{The PARDISO direct solver}
+The interface to PARDISO~\cite{PARDISO} is implemented in the
+module {\tt PARDISO\_BSPLINES}.
+The matrix type (symmetric, hermitian, positive-definite) is set
+by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the
+generic routine {\tt init}. All the other generic routines defined in
+the module {MATRIX}, plus routines specific to the sparse solver,
+are listed in Table~\ref{pardisoRoutines}. The complete documentation
+of these routines is given in Appendix~\ref{pardisoRef}.
+
+
+\begin{table}[h]
+\centering
+\begin{tabular}{|l|l|}
+  \hline
+  \emph{Matrix types} & {\tt pardiso\_mat, zpardiso\_mat} \\
+  \hline\hline
+  {\tt init}      & Initializes the data structure \\
+  {\tt destroy}   & Free the data structure memory \\
+  \hline
+  {\tt updtmat}   & Accumulates a value to the element $A_{ij}$ \\
+  {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
+                               row $i$, column $j$ \\
+  {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
+                                 row $i$, column $j$ \\
+  {\tt vmx}       & Returns the matrix-vector product \\
+  {\tt mcopy}     & Copy a matrix to another matrix \\
+  {\tt maddto}    & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
+  {\tt clear\_mat}& Set the matrix elements to zero.\\
+  {\tt psum\_mat} & Parallel sum of matrices \\
+  {\tt p2p\_mat}  & Point-to-point combine sparse matrix between 2 processes\\
+  {\tt get\_count}& Get the number of non-zero elements in matrix \\
+  \hline
+  {\tt factor}    & Factorization \\
+  {\tt bsolve}    & Solves the linear system using the factorized matrix \\
+  {\tt to\_mat}   & Convert to PARDISO CSR matrix format \\
+  {\tt reord\_mat}& Reordering and symbolic factorization \\
+  {\tt numfact}   & Numerical factorization \\
+    \hline
+\end{tabular}
+  \caption{The generic routines in the {\tt PARDISO\_BSPLINES} module}
+  \label{pardisoRoutines}
+\end{table}
+
+Below, a complete example solving a simple
+two-dimensional Poisson discretized by the 5 point Finite Difference
+method illustrates how to use the {\tt PARDISO\_BSPLINES} module.
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+PROGRAM main
+  USE pardiso_bsplines
+  IMPLICIT NONE
+  TYPE(pardiso_mat) :: amat
+  DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:)
+  INTEGER :: nx=5, ny=4
+  INTEGER :: n, nnz
+  INTEGER :: i, j, irow, jcol
+!
+  WRITE(*,'(a)', advance='no') 'Enter nx, ny: '
+  READ(*,*) nx, ny
+  n = nx*ny  ! Rank of the matrix
+  ALLOCATE(rhs(n))
+  ALLOCATE(sol(n))
+  ALLOCATE(arow(n))
+!
+  CALL init(n, 1, amat, nlsym=.TRUE.)  ! Pardiso mat, symmetric case
+!
+!  Construct the matrix and RHS
+!
+  DO j=1,ny
+     DO i=1,nx
+        arow = 0.0d0
+        irow = numb(i,j)
+        arow(irow) = 4.0d0
+        IF(i.GT.1)  arow(numb(i-1,j)) = -1.0d0
+        IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0
+        IF(j.GT.1)  arow(numb(i,j-1)) = -1.0d0
+        IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0
+        CALL putrow(amat, irow, arow)
+        rhs(irow) = SUM(arow)   ! => the exact solution is 1
+     END DO
+  END DO
+!
+  WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat)
+!
+!  Factor the amat matrix (Reordering, symbolic and numerical factorization)
+!
+  CALL factor(amat)
+!
+!  Back solve
+!
+  CALL bsolve(amat, rhs, sol)
+!
+!   Check solutions
+!
+  WRITE(*,'(/a/(10f8.4))') 'Computed sol', sol
+  WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
+!
+!   Clean up
+!
+  DEALLOCATE(rhs)
+  DEALLOCATE(sol)
+  DEALLOCATE(arow)
+  CALL destroy(amat)
+CONTAINS
+  INTEGER FUNCTION numb(i,j)
+!
+!  One-dimensional numbering
+!  Number first x then y
+!
+    INTEGER, INTENT(in) :: i, j
+    numb = (j-1)*nx + i
+  END FUNCTION numb
+END PROGRAM main
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+
+It should be noted that
+\begin{itemize}
+\item The routine {\tt putrow} in the matrix construction loop uses the
+  version from the {\tt SPARSE} module to create dynamically the
+  matrix row using the linked list.
+\item The routine {\tt factor} calls successively the matrix conversion
+  {\tt to\_mat}, the reordering and symbolic factorization routine
+  {\tt reord\_mat} and finally the numerical factorization {\tt
+    numfact}. One could indeed call these three routines separately
+  instead of the single call to {\tt factor},
+\item After solving the linear system, if the matrix is modified by
+  calling for example {\tt putrow} again, it will modify
+  directly the converted matrix and not on the {\tt spmat} matrix
+  which is anyway \emph{destroyed} at the end of {\tt to\_mat}.
+\item If the matrix sparsity changes, the matrix should be
+  re-initialized by calling the {\tt destroy} and {\tt init} routines.
+\end{itemize}
+
+Other examples can be found by running ``{\tt grep pardiso\_mat}''
+on the F90 files in the directory {\tt examples/}.
+
+\subsection{The WSMP direct solver}
+The interface to WSMP~\cite{WSMP} is implemented in the
+module {\tt WSMP\_BSPLINES}.
+The matrix type (symmetric, hermitian, positive-definite) is set
+by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the
+generic routine {\tt init}. All the other generic routines defined in
+the module {MATRIX}, plus routines specific to the sparse solver,
+are listed in Table~\ref{wsmpRoutines}. The complete documentation
+of these routines is given in Appendix~\ref{wsmpRef}.
+
+\begin{table}[h]
+\centering
+\begin{tabular}{|l|l|}
+  \hline
+  \emph{Matrix types} & {\tt wsmp\_mat, zwsmp\_mat} \\
+  \hline\hline
+  {\tt init}      & Initializes the data structure \\
+  {\tt destroy}   & Free the data structure memory \\
+  \hline
+  {\tt updtmat}   & Accumulates a value to the element $A_{ij}$ \\
+  {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
+                               row $i$, column $j$ \\
+  {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
+                                 row $i$, column $j$ \\
+  {\tt vmx}       & Returns the matrix-vector product \\
+  {\tt mcopy}     & Copy a matrix to another matrix \\
+  {\tt maddto}    & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
+  {\tt clear\_mat}& Set the matrix elements to zero.\\
+  {\tt psum\_mat} & Parallel sum of matrices \\
+  {\tt p2p\_mat}  & Point-to-point combine sparse matrix between 2 processes\\
+  {\tt get\_count}& Get the number of non-zero elements in matrix \\
+  \hline
+  {\tt factor}    & Factorization \\
+  {\tt bsolve}    & Solves the linear system using the factorized matrix \\
+  {\tt to\_mat}   & Convert to WSMP CSR matrix format \\
+  {\tt reord\_mat}& Reordering and symbolic factorization \\
+  {\tt numfact}   & Numerical factorization \\
+    \hline
+\end{tabular}
+  \caption{The generic routines in the {\tt WSMP\_BSPLINES} module}
+  \label{wsmpRoutines}
+\end{table}
+
+The simple Poisson example using the {\tt PARDISO\_BSPLINES} module
+shown in the previous section can be easily adapted to the WSMP
+interface since there are only two lines to change: the {\tt USE} and
+the matrix {\tt TYPE} lines.
+
+Other examples of how to use this interface can be found by running
+``{\tt grep wsmp\_mat}'' on  the F90 files the directory {\tt examples/}.
+
+The same solver functionality can be found in both the PARDISO and
+WSMP solvers as one can verify by comparing Table~\ref{pardisoRoutines}
+and Table~\ref{wsmpRoutines} or the description of routines in
+Appendix~\ref{pardisoRef} and Appendix~\ref{wsmpRef}.
+However, there
+is an important difference. While in PARDISO (and indeed also in LAPACK),
+it is possible to define several matrices to solve simultaneously, it
+appears that in WSMP, this is possible \emph{only} for symmetric and
+hermitian matrices: in the present 10.9 version, the
+routines to store and recall the solver context
+{\tt  WSTOREMAT/WRECALLMAT} which are present in the symmetric version of
+the library are missing in the general version!
+
+A separate module named {\tt PWSMP\_BSPLINES} added the MPI
+\emph{parallelization} capability provided by WSMP. This parallel version
+implements the same user interface as shown in Table~\ref{wsmpRoutines}.
+The following considerations should be however taken in to account:
+\begin{enumerate}
+\item The coefficient matrix {\tt amat} is partitioned into blocks of
+\emph{contiguous} rows, with their indices defined in
+the interval [{\tt amat\%istart,amat\%iend}] which is defined after the call to
+{\tt init}.
+\item Calls to the routine {\tt updtmat} to update the matrix coefficients
+should not specify a row index \emph{outside} this interval.
+On the other hand, {\tt getxxx} will return 0 and {\tt putxxx} will
+ignore it if a row index \emph{not} in the range [{\tt amat\%istart,amat\%iend}]
+is passed to them.
+\item An \emph{optional} MPI communicator can be given to {\tt init}
+using the keyword {\tt comm\_in}. By default, the communicator
+{\tt MPI\_COMM\_WORLD} is used.
+\end{enumerate}
+
+A complete example using {\tt PWSMP\_BSPLINES} can be found in
+{\tt examples/pde2d\_pwsmp.f90}.
+
+\subsection{The MUMPS direct solver}
+{\tt MUMPS}~\cite{MUMPS}  is a \emph{parallel sparse direct solver}
+using {\tt MPI} and is implemented in the module
+{\tt MUMPS\_BSPLINES}. User program using this module
+should be compiled and linked with {\tt MPI} even if only the
+serial version of the solver is needed, in which case the
+{\tt MPI\_COMM\_SELF} is passed to the initialization routine
+{\tt init} as an \emph{optional} argument with the keyword
+{\tt comm\_in}. Otherwise a valid {\tt MPI} communicator should be
+passed. By default {\tt comm\_in=MPI\_COMM\_SELF}. Note that it
+is possible to use both serial and parallel solvers in the same
+program to solve different matrix problems.
+
+As for {\tt PARDISO} and {\tt WSMP}, the same user interface to the
+{\tt MUMPS} solver is used and summarized in
+Table~\ref{mumpsRoutines}. The complete documentation
+of these routines is given in Appendix~\ref{mumpsRef}.
+
+\begin{table}[h]
+\centering
+\begin{tabular}{|l|l|}
+  \hline
+  \emph{Matrix types} & {\tt mumps\_mat, zmumps\_mat} \\
+  \hline\hline
+  {\tt init}      & Initializes the data structure \\
+  {\tt destroy}   & Free the data structure memory \\
+  \hline
+  {\tt updtmat}   & Accumulates a value to the element $A_{ij}$ \\
+  {\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
+                               row $i$, column $j$ \\
+  {\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
+                                 row $i$, column $j$ \\
+  {\tt vmx}       & Returns the matrix-vector product \\
+  {\tt mcopy}     & Copy a matrix to another matrix \\
+  {\tt maddto}    & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
+  {\tt clear\_mat}& Set the matrix elements to zero.\\
+  {\tt psum\_mat} & Parallel sum of matrices \\
+  {\tt p2p\_mat}  & Point-to-point combine sparse matrix between 2 processes\\
+  {\tt get\_count}& Get the number of non-zero elements in matrix \\
+  \hline
+  {\tt factor}    & Factorization \\
+  {\tt bsolve}    & Solves the linear system using the factorized matrix \\
+  {\tt to\_mat}   & Convert to WSMP CSR matrix format \\
+  {\tt reord\_mat}& Reordering and symbolic factorization \\
+  {\tt numfact}   & Numerical factorization \\
+    \hline
+\end{tabular}
+  \caption{The generic routines in the {\tt MUMPS\_BSPLINES} module}
+  \label{mumpsRoutines}
+\end{table}
+
+
+\section{Fourier solver \cite{McMillan}}
+
+\subsection{The matrix equation in Fourier space}
+For a periodic one-dimensional problem, the solution $\phi_i$ and
+the right-hand-side $\rho_i$ in (\ref{matEq1d}) are
+$N$-periodic. Their Discrete Fourier Transform (DFT) can be defined as
+\begin{equation}
+  \begin{split}
+    \hat{\phi}_k = \sum_{j=0}^{N-1} \phi_j e^{i\frac{2\pi}{N}kj},  &\qquad
+    \hat{\rho}_k = \sum_{j=0}^{N-1} \rho_j e^{i\frac{2\pi}{N}kj},  \\
+    \phi_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\phi}_k e^{-i\frac{2\pi}{N}kj},  &\qquad
+    \rho_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\rho}_k e^{-i\frac{2\pi}{N}kj}.
+  \end{split}
+\end{equation}
+Taking the DFT of Eq.~(\ref{matEq1d}), we obtain the following matrix equation
+in Fourier space:
+\begin{equation}
+\label{Fourier1d}
+  \sum_{k'=0}^{N-1} \hat{A}_{kk'}\hat{\phi}_{k'} = \hat{\rho}_{k},
+\end{equation}
+where $\hat{A}_{kk'}$ is the DFT of the original matrix. Following the
+notations in Eq.~(\ref{mat1d}) and assuming an \emph{equidistant} mesh
+with the interval $\Delta=L/N$, each of the DFT matrices of the
+weak form can be written as
+\begin{equation}
+  \begin{split}
+  \hat{A}_{kk'}
+   &= \frac{1}{N}\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj}
+                 \sum_{j'=0}^{N-1} A_{jj'}e^{-i\frac{2\pi}{N}k'j'} \\
+   &= \frac{1}{N}\int_{0}^L\!\!dx\,C(x) \,
+         \sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\,
+         \sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'}
+         \Lambda_{j'}^{\alpha'} (x) \\
+   &= \frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \,
+         \sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\,
+         \sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'}
+         \Lambda_{j'}^{\alpha'} (x)
+  \end{split}
+\end{equation}
+Note that each of the last two sums is over the splines which are non-zero
+at a given $x$. Using the translational symmetry of the periodic splines:
+\begin{equation*}
+   \sum_j e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x) =
+    \sum_j e^{i\frac{2\pi}{N}kj}
+    \Lambda_{j-J}^\alpha(x-J\Delta) =
+    e^{i\frac{2\pi}{N}kJ}\, \hat{\Lambda}_{k}^\alpha(x-J\Delta),
+\end{equation*}
+where we have defined the DFT of splines $\hat{\Lambda}_{k}(x)$ as
+\begin{equation}
+  \hat{\Lambda}_{k}^\alpha(x) = \sum_j\Lambda_j^\alpha(x) e^{i\frac{2\pi}{N}kj},
+\end{equation}
+which are computed by the routine {\tt ft\_basfun} in the module
+{\tt BSPLINES} for any spline order $p$ and derivative $\alpha \le
+p$. The DFT matrices can now be written as:
+\begin{equation*}
+  \hat{A}_{kk'} =
+  \frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \,
+   e^{i\frac{2\pi}{N}J(k-k')} \hat{\Lambda}_k^\alpha (x-J\Delta)\,
+   \left[\hat{\Lambda}_{k'}^{\alpha'} (x-J\Delta)\right]^{*}.
+\end{equation*}
+
+Finally, making the variable transform $x\rightarrow x+J\Delta$ and
+defining the DFT of the weak-form coefficient $C$ as
+\begin{equation}
+  \hat{C}_{k}(x) =  \sum_{J=0}^{N-1}C(x+J\Delta)\,e^{i\frac{2\pi}{N}Jk},
+\end{equation}
+the DFT of the matrix $\hat{A}_{kk'}$ can be calculated as an
+integration over the first interval:
+\begin{equation}
+  \hat{A}_{kk'} = \frac{1}{N}\int_0^\Delta\!\!dx\,
+                   \hat{C}_{k-k'}(x) \,\hat{\Lambda}_{k}^\alpha(x)
+    \left[\hat{\Lambda}_{k'}^{\alpha'}(x)\right]^{*},
+\end{equation}
+which can be computed using again the same Gauss formula as before.
+For uniform $C$, $\hat A_{kk'}$ is diagonal and the matrix equation
+(\ref{Fourier1d}) reduces to a system of equations for the
+uncoupled Fourier modes.
+
+When $C$ is non-uniform, $\hat A_{kk'}$ is \emph{dense}.
+However in problems where the solution is expected to be
+``smooth'', one can keep only a small number of Fourier modes, reducing
+thus the rank of  $\hat A_{kk'}$. Furthermore, if the coefficients
+$C(x)$ of the differential equations are very smooth, peaked at a few (low
+order) modes, the DFT matrix can become \emph{sparse}!
+
+The generalization to the two-dimensional problem (\ref{mat2d}) is
+straightforward:
+
+\begin{gather}
+  \hat{A}_{im,i'm'} =
+  \frac{1}{N_\theta}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta \,
+    \hat{C}_{m-m'}(r,\theta) \,\hat{\Lambda}_{m}^\beta(\theta)
+    \left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\right\}
+    \Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha'}(r) \\
+    \hat{C}_{m}(r,\theta) =
+    \sum_{j=0}^{N_\theta-1}C(r,\theta+j\Delta\theta)\, e^{i\frac{2\pi}{N_\theta}jm}.
+\end{gather}
+
+Likewise, for the three-dimensional problem (\ref{mat3d}), we obtain
+
+\begin{gather}
+  \hat{A}_{imn,i'm'n'} =
+  \frac{1}{N_\theta N_\varphi}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta
+   \int_0^{\Delta\varphi}\!\!d\varphi \,
+    \hat{C}_{m-m',n-n'}(r,\theta,\varphi) \,\hat{\Lambda}_{m}^\beta(\theta)
+    \left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\,\hat{\Lambda}_{n}^\gamma(\varphi)
+    \left[\hat{\Lambda}_{n'}^{\gamma'}(\varphi)\right]^{*}\right\}
+    \Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha}(r) \\
+    \hat{C}_{mn}(r,\theta,\varphi) =
+    \sum_{j=0}^{N_\theta-1}\sum_{k=0}^{N_\varphi-1}C(r,\theta+j\Delta\theta,\varphi+
+    k\Delta\varphi)\, e^{i\frac{2\pi}{N_\theta}jm}\,e^{i\frac{2\pi}{N_\varphi}kn}.
+\end{gather}
+
+Note that for axi-symmetric systems where the coefficients $C$ do not
+depend on $\varphi$
+\begin{equation}
+  \hat{C}_{mn} = \hat{C}_{mn}\delta_{n,0}
+\end{equation}
+and thus the three-dimensional problem reduces to a set of independent
+two-dimensional problems with
+\begin{equation}
+  \begin{split}
+    \hat{A}^n_{im.i'm'} &= M_n \hat{A}_{im.i'm'} \\
+        M_n &= \int_0^{\Delta\varphi}\!\!d\varphi \left|\hat{\Lambda}_n(\varphi)\right|^2.
+  \end{split}
+\end{equation}
+
+\subsection{Integral equation}
+The DFT matrices for differential operators derived above can be
+extended to an integral operator of the following form:
+\begin{equation}
+  \int_{0}^{L}\!dx' K(x,x')\,\phi(x') = \rho(x),
+\end{equation}
+where $\phi(x)$ is $L$-periodic. Using the same FE discretization as
+above results in the following matrix in \emph{real} space:
+\begin{equation}
+  A_{jj'} = \int_{0}^{L}\!dx\,\Lambda_j(x)\,\int_{0}^{L}\!dx'\,
+  K(x,x')\,\Lambda_{j'}(x'),
+\end{equation}
+and its DFT
+\begin{equation}
+  \hat{A}_{kk'} = \frac{1}{N}
+  \sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!dx\,
+  \sum_{J'=0}^{N-1}\int_{J'\Delta}^{(J'+1)\Delta}\!dx'\,K(x,x')\,
+    e^{i\frac{2\pi}{N}kJ}\hat{\Lambda}_k(x-J\Delta)\,
+    e^{-i\frac{2\pi}{N}k'J'}\left[\hat{\Lambda}_{k'}(x-J'\Delta)\right]^{*},
+\end{equation}
+Now, defining the DFT of the kernel as
+\begin{equation}
+  \hat{K}_{kk'}(x,x') =
+  \sum_{J=0}^{N-1}\sum_{J'=0}^{N-1}K(x+J\Delta,x'+J'\Delta)\,
+  e^{i\frac{2\pi}{N}kJ}\,e^{-i\frac{2\pi}{N}k'J'},
+\end{equation}
+the final expression for the DFT of the matrix $\hat{A}_{kk'}$ reduces to
+\begin{equation}
+   \hat{A}_{kk'} = \frac{1}{N}
+   \int_0^\Delta\!dx\int_0^\Delta\!dx'\,\hat{K}_{kk'}(x,x')\,
+     \hat{\Lambda}_k (x)\,
+     \left[\hat{\Lambda}_{k'}(x')\right]^{*}.
+\end{equation}
+Again, notice that the dense matrix $\hat{A}$ can become \emph{sparse}
+if only a limited number of Fourier modes are retained in the DFT of
+the kernel $\hat{K}$.
+
+\subsection{A two-dimensional example with a non-uniform coefficient}
+As a check, we considered here a two-dimensional example similar to the example in
+section \ref{twodEx} but with a non-uniform coefficient:
+
+\begin{equation}
+  -\frac{1}{r}\frac{\partial}{\partial r}
+   \left[rC\frac{\partial\phi}{\partial r}\right]
+  -\frac{1}{r^2}\frac{\partial}{\partial \theta}
+   \left[C\frac{\partial\phi}{\partial \theta}\right]  = \rho.
+\end{equation}
+
+With $C(r,\theta) = 1+\epsilon r\cos\theta$, assuming the same exact solution as in
+section (\ref{twodEx})
+\begin{equation}
+  \phi(r,\theta) = (1-r^2)r^m\cos m\theta,
+\end{equation}
+the right-hand side becomes
+\begin{equation}
+  \begin{split}
+    \rho(r,\theta) =  4(m+1)r^m\cos m\theta & +
+      \frac{\epsilon r^m}{2}(4+5m-m/r^2)\cos(m-1)\theta \\
+      &+ \frac{\epsilon r^m}{2}(4+3m+m/r^2)\cos(m+1)\theta.
+  \end{split}
+\end{equation}
+
+This problem is solved in real space and Fourier space respectively in example
+{\tt pde2d\_sym\_pardiso.f90} and example {\tt pde2d\_sym\_pardiso\_dft.f90}.
+Both use  the {\tt PARDISO\_BSPLINES} module to solve the sparse
+matrix equation. It should be noted that the Fourier method should yield the
+\emph{same solution} as found with the solver in real space if \emph{all} the
+$N_\theta$ Fourier modes are kept.
+
+For the problem defined above with {\tt m=3}, by keeping only the seven Fourier
+modes in $[-3,3]$ and the three mode couplings $[-1,0,1]$ in the
+Fourier solver, we found that both methods yield the same (up to 5
+digits) \emph{relative discretization error}. Furthermore, increasing
+the number of Fourier modes to $[-4,4]$ (note that the $m=\pm 4$
+Fourier components of the right hand side $\rho$ are not null) does
+not increase the accuracy of the computed solution!
+
+In this example, the matrix in Fourier space has a rank which is
+$N_\theta/7$ times smaller than in the solver in real space. The number of
+non-zeros is also reduced by a factor of $(2p+1)/3$ since only 3
+Fourier mode coupling terms are considered.
+
+In general, the efficiency of such a \emph{matrix filter} is expected
+to be problem-dependent. The Fourier solver should be tested in real simulations.
+
+\section{The matrix construction module {\tt CONMAT\_MOD}}
+\label{secCONMAT}
+The module implements the generic matrix construction subroutine
+{\tt conmat}, using the algorithm detailed in Appendix~\ref{matAssembly},
+for 1D and 2D differential equations. The computed matrix is returned in the
+argument {\tt mat} which can be a Lapack band matrix as well as a
+PARDISO, WSMP or MUMPS sparse matrix.
+The complete interface of the subroutine is given below.
+
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE conmat(spl, mat, coefeq, maxder)
+    TYPE(spline1d|spline2d) :: spl
+    TYPE([z]gbmat|[z]pbmat|[z]periodic_mat|[z]pardiso|...) :: mat
+    INTEGER, INTENT(in), OPTIONAL :: maxder[(2)]
+    INTERFACE
+     SUBROUTINE coefeq(x, [y], idt, idw, c)
+       DOUBLE PRECISION, INTENT(in) :: x, [y]
+       INTEGER, INTENT(out) :: idt(:), idw(:)
+       DOUBLE PRECISION, INTENT(out) :: c(:)
+     END SUBROUTINE coefeq
+    END INTERFACE
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Construct the FE matrix for 1D or 2D differential operator.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   spl    : 1D or 2D spline
+   mat    : matrix object
+   coefeq : user provided subroutine (see below)
+   maxder : Maximum order of the derivatives in the weak form.
+            Equal to 1 (first derivative) by default.
+\end{verbatim}
+\end{description}
+
+The subroutine {\tt conmat} includes, in addition to the arguments
+{\tt spl, mat} and {\tt maxder} described above,
+an user provided subroutine as the third
+argument {\tt coefeq} which computes all the weak
+form coefficients defined in Eq.(\ref{locMat1d}) and Eq.(\ref{locMat2d})
+for a given point ($x$ for 1D case or $(x,y)$ for 2D case). The output
+array {\tt c} will contain all the computed $C$ with its corresponding
+derivative orders $(d,d')$ returned in {\tt idt, idw} respectively.
+Other quantities required to calculate the coefficients $C$ could be
+communicated to {\tt coefeq}, using for example a {\tt COMMON} block or
+a {\tt MODULE}.
+
+An example of using this module can be found in section \ref{twodEx}.
+
+\appendix
+
+\section{Matrix assembly for differential operators}
+\label{matAssembly}
+\subsection{1D case}
+\subsubsection{Local matrix}
+The contribution to the discretized weak-form from the interval
+$[x_i,  x_{i+1}]$ where $i=1,\ldots,N$, is a sum of the \emph{local matrices}
+\begin{equation}
+\label{locMat1d}
+  \begin{split}
+  A^i_{\alpha\alpha'} &= \int_{x_i}^{x_{i+1}}\!\!dx \;C(x)
+      \Lambda^d_{\alpha}(x)\Lambda^{d'}_{\alpha'}(x) \\
+  &\simeq \sum_{g=1}^{G}\,
+      \underbrace{w_g\Lambda^d_{\alpha}(x_g)\Lambda^{d'}_{\alpha'}(x_g)}_{F_{\alpha\alpha'g}}
+     \,\underbrace{C(x_g)}_{c_g},
+  \end{split}
+\end{equation}
+where a $G$ point Gauss quadrature over the interval $[x_i,  x_{i+1}]$
+is used to approximate the integral and
+$\Lambda^d_{\alpha}$ denotes the $d^{th}$ derivative of splines
+which are non zero in the interval $[x_i,  x_{i+1}]$. For splines of
+degree $p$, $\alpha=0,\ldots,p$. Note that the matrix can be written as a
+\emph{matrix-vector product}:
+\begin{equation}
+  \mathbf{A}= \mathbf{F} \cdot \mathbf{c}.
+\end{equation}
+
+\subsubsection{Mapping to global matrix}
+For $N$ intervals, the number of spline elements of degree $p$, is
+$N_e=N+p$, or $N_e=N$ if the system is \emph{periodic}.
+Once the local matrix $A_{\alpha\alpha'}$ is formed, it can be added
+to the \emph{global} matrix using the mapping:
+\begin{equation}
+  \begin{split}
+    A^g_{II'} &\leftarrow A^g_{II'} + A^i_{\alpha\alpha'} \\
+    I = i+\alpha, & \qquad I' = i+\alpha'
+  \end{split}
+\end{equation}
+For periodic problems, the indices $I,I'$ are further transformed
+by taking into account the periodicity $N$, using for example the
+following {\tt FORTRAN} statement
+
+\begin{center} \tt
+  I = MODULO(I-1,N) + 1 \\
+\end{center}
+
+\subsection{2D case}
+\subsubsection{Local matrix}
+In this case, the local matrix obtained for the grid cell
+$[x_i,  x_{i+1}]\times[y_j,  y_{j+1}]$ takes the form:
+\begin{equation}
+\label{locMat2d}
+  \begin{split}
+  A_{\alpha\alpha'\beta\beta'} &= \int_{x_i}^{x_{i+1}}\!\!dx
+  \int_{y_j}^{y_{j+1}}\!\!dy\,\Lambda^{d_1}_{\alpha}(x)\Lambda^{d_1'}_{\alpha'}(x)\;C(x,y)
+      \,\Lambda^{d_2}_{\beta}(y)\Lambda^{d_2'}_{\beta'}(y) \\
+  &\simeq \sum_{g_1=1}^{G_1}\,
+      \underbrace{w_{g_1}\Lambda^{d_1}_{\alpha}(x_{g_1})\Lambda^{{d_1}'}_{\alpha'}(x_{g_1})}_{F_{\alpha\alpha'g_1}}
+     \;\sum_{g_2=1}^{G_2}\,\underbrace{C(x_{g_1},y_{g_2})}_{C_{g_1g_2}}
+     \underbrace{w_{g_2}\Lambda^{d_2}_{\beta}(y_{g_2})\Lambda^{{d_2}'}_{\beta'}(y_{g_2})}_{G_{\beta\beta'g_2}},
+  \end{split}
+\end{equation}
+which can be computed efficiently as \emph{matrix-matrix products}
+\begin{equation}
+  \mathbf{A} = \mathbf{F}\cdot\mathbf{C}\cdot\mathbf{G^{T}}
+\end{equation}
+\subsubsection{Mapping to global matrix}
+The local to global element indices mapping on each of the two
+dimensions can be defined as previously as
+\begin{equation}
+  \begin{split}
+    I = i+\alpha, & \qquad I' = i+\alpha' \\
+    J = j+\beta, & \qquad J' = j+\beta'
+  \end{split}
+\end{equation}
+If any of the 2 dimensions is periodic, the periodic condition have
+to be applied to the corresponding global element index as explained
+above.
+
+Furthermore, in order to reduce the 4 dimension array $A^g_{II'JJ'}$
+to the standard 2 dimension matrix, we number first the elements in
+$y$ coordinate and obtain the following index transformation:
+\begin{equation}
+  \mu = J + N^y_e(I-1), \qquad \mu' = J' +  N^y_e(I'-1),
+\end{equation}
+where $N^y_e$ is the number of elements along the $y$ coordinate. The
+\emph{global} matrix is then constructed from
+\begin{equation}
+  A^g_{\mu\mu'} \leftarrow A^g_{\mu\mu'} + A_{\alpha\alpha'\beta\beta'}
+\end{equation}
+
+\section{The boundary conditions}
+\subsection{Dirichlet condition}
+\label{DirichletCond}
+\subsubsection{1D case}
+Let us consider the boundary condition
+$u(0)=c$. Since all the splines are 0
+at $x=0$, except for the first spline which is equal to 1,
+$\Lambda_i(0)=\delta_{i,1}$, we have simply
+\begin{equation}
+  c=u(0) = \sum_{i=1}^N u_i \Lambda_i(0) \quad \Longrightarrow
+  \quad u_{1} = c.
+\end{equation}
+The discretized linear system of equations, taking into account of
+this BC, can thus be written as
+\begin{equation}
+  \begin{split}
+    u_1 &= c\\
+    \sum_{j=2}^N A_{ij}u_j &= f_i - A_{i1}c, \quad i=2,\ldots, N
+  \end{split}
+\end{equation}
+or in the following  matrix form:
+\begin{equation}
+     \left(\begin{matrix}
+       1 & 0 & \cdots \\
+       0 & A_{22} & \cdots \\
+       \vdots & \vdots & \ddots \\
+     \end{matrix}\right)
+     \left(\begin{matrix}
+       u_{1} \\
+       u_{2} \\
+       \vdots\\
+       u_{N} \\
+    \end{matrix}\right) =
+     \left(\begin{matrix}
+       c \\
+       f_{2} -cA_{21}\\
+       \vdots\\
+       f_{N} -cA_{N1}\\
+     \end{matrix}\right)
+\end{equation}
+Note that (1) the transformed matrix preserves any symmetry or
+positivity of the original matrix, (2) the first column of the
+original matrix has to be saved in order to modify the RHS $f_i$
+but only for non zero $c$ and (3) in that case, one needs to save
+only $[A_{i1}]_{i=2}^{p+1}$, where $p$ is the spline order.
+
+In summary, the procedure for imposing the Dirichlet BC $u_1=c$ can be
+summarized as follows:
+\begin{enumerate}
+\item Matrix transformation
+  \begin{enumerate}
+  \item Clear the matrix row $i=1$ and set its diagonal term
+    $A_{11}$ to 1.
+  \item Get the matrix column $A_{j1}, \quad j=2,\ldots,p+1$ and save it.
+  \item Clear the matrix column $j=1$ and set its diagonal term
+    $A_{11}$ to 1.
+  \end{enumerate}
+\item RHS transformation
+  \begin{enumerate}
+  \item Set $f_1\leftarrow c$.
+  \item Modify the RHS: $f_i\leftarrow f_i-A_{i1}c, \quad i=2,\ldots,p+1$.
+  \end{enumerate}
+\end{enumerate}
+If the original matrix \emph{is not symmetric}, only the steps (1a)
+and (2a) are required, since the other steps are only necessary to
+preserve the symmetry of the original matrix.
+
+\subsubsection{2D case}
+In that case, let us write the solution $u(x,y)$ as
+\begin{equation}
+  u(x,y) = \sum_{i=1}^{N_1}\sum_{j=1}^{N_2} u_{ij} \Lambda_i(x)\Lambda_j(y),
+\end{equation}
+where $N_1, N_2$ are the number of elements in each
+dimension. Assuming the BC $u(0,y) = g(y)$, and since
+$\Lambda_i(0)=\delta_{i1}$, the solutions $u_{ij}$
+should satisfy
+\begin{equation}
+\label{dirich_2d}
+  \sum_{j=1}^{N_2} u_{1j} \Lambda_j(y) = g(y).
+\end{equation}
+If $g(y)$ is constant $g(y)=c$, we obtain the trivial solution
+$u_{1,j}=c$ since $\sum_{j=1}^{N_2} \Lambda_j(y)=1$ \cite{BSPLINES}.
+For non-uniform $g$,
+at least 2 methods can be used to obtain the $N_2$ unknowns $u_{1j}$
+satisfying the equation above:
+\begin{enumerate}
+\item By \emph{collocating} Eq.(\ref{dirich_2d}) on a \emph{suitable}
+  set of points $[y_k]_{ k=1}^{N_2}$, the problem is reduced to an
+  \emph{interpolation} one (see section ``Spline
+  Interpolation'' in \cite{BSPLINES}).
+\item By \emph{minimizing} the residual norm of Eq.(\ref{dirich_2d})
+  defined as follows:
+    \begin{gather}
+      R = \left\|\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)-g(y)\right\|^2 =
+      \int\!\!dy\left\{\left[\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\right]^2
+      - 2g(y)\sum_{j=1}^{N_2} c_{j} \Lambda_j(y) +g^2(y)\right\}\\
+      \frac{\partial R}{\partial c_k} = 2 \int\!\!dy\left[
+      \sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\Lambda_k(y)
+      -g(y)\Lambda_k(y)\right] = 0, \quad k=1,\ldots,N_2,
+    \end{gather}
+    the boundary solutions $c_j$ can be calculated by solving the following
+    \emph{weak-form} of Eq.(\ref{dirich_2d}):
+    \begin{equation}
+      \sum_{j=1}^{N_2} c_{j} \int\!\!dy\Lambda_j(y)\Lambda_k(y) =
+      \int\!\!dy\Lambda_k(y) g(y), \qquad k=1,\ldots,N_2.
+    \end{equation}
+\end{enumerate}
+Once the values of $c_j$ known, the procedure described for the 1D case
+above can be applied to satisfy each of the $N_2$ conditions $u_{1j}=c_j$.
+
+A full example for solving the cylindrical Laplace equation in
+cylindrical coordinates:
+\begin{equation}
+  \begin{split}
+  \frac{1}{r}\frac{\partial}{\partial r}
+  \left(r\frac{\partial\phi}{\partial r}\right) &+\frac{1}{r^2}
+  \frac{\partial^2\phi}{\partial \theta^2} = 0 \\
+    \phi(r=1,\theta) &= \cos m\theta.
+  \end{split}
+\end{equation}
+is given in {\tt bpslines/examples/dirichlet/poisson.f90}.
+
+\subsection{Unicity on the axis}
+\label{unicityCond}
+Denoting the $N$ solutions at the axis by $(u_1, \ldots, u_N)$  , and
+their transforms by $(\hat u_1, \ldots, \hat u_N)$ defined by
+
+\begin{equation} \begin{array}{ccc}
+     u_1-u_N = \hat u_1  &  &   u_1 = \hat u_1 + \hat u_N   \\
+     u_2-u_N = \hat u_2  &  &   u_2 = \hat u_2 + \hat u_N   \\
+     \vdots        & \Longrightarrow & \vdots               \\
+     u_{N-1}-u_N = \hat u_{N-1}  &  &   u_{N-1} = \hat u_{N-1} + \hat u_N   \\
+     u_N = \hat u_N      &  &   u_N = \hat u_N,
+    \end{array}  \label{eq:unicity1} \end{equation}
+the unicity condition can be specified by simply imposing
+
+\begin{equation}
+  \hat u_1=\hat u_2=\ldots=\hat u_{N-1}=0. \label{eq:unicity2}
+\end{equation}
+From (\ref{eq:unicity1}), the \emph{transformation matrix} \(\mathbf U\) is defined
+as
+
+\begin{equation}
+  \mathbf{u} = \mathbf{ U \cdot\hat u}, \qquad \mathbf{U} =
+     \left(\begin{matrix}
+        1 & 0 & \dots & 0 & 1 \\
+        0 & 1 & \dots & 0 & 1 \\
+          &   & \ddots&   & \vdots \\
+        0 & 0 & \dots & 1 & 1 \\
+        0 & 0 & \dots & 0 & 1
+     \end{matrix}\right), \quad \mathbf{U^{T}} =
+     \left(\begin{matrix}
+        1 & 0 & \dots & 0 & 0 \\
+        0 & 1 & \dots & 0 & 0 \\
+          &   & \ddots&   & \vdots \\
+        0 & 0 & \dots & 1 & 0 \\
+        1 & 1 & \dots & 1 & 1
+     \end{matrix}\right).
+\end{equation}
+
+
+\paragraph{Matrix product \( \mathbf{A\cdot U}\)}
+\begin{equation}
+\mathbf{ A\cdot U} =
+     \left(\begin{array}{lllll}
+        A_{1,1} & A_{1,2}  & \dots & A_{1,N-1} &  \sum_{j} A_{1,j}  \\
+        A_{2,1} & A_{2,2}  & \dots & A_{2,N-1} &  \sum_{j} A_{2,j}  \\
+          &   & \ddots&   & \vdots \\
+        A_{N-1,1} & A_{N-1,2}  & \dots & A_{N-1,N-1} &  \sum_{j}A_{N-1,j}  \\
+        A_{N,1} & A_{N,2}  & \dots & A_{N,N-1} &  \sum_{j}A_{N,j}
+     \end{array}\right).
+\end{equation}
+Thus \emph{right multiply by \(\mathbf{U}\)} is equivalent to put the
+\emph{the sum of each row on the last column}.
+
+\paragraph{Matrix product \( \mathbf{ U^T \cdot A}\)}
+\begin{equation}
+\mathbf{ U^T \cdot A} =
+     \left(\begin{array}{lllll}
+        A_{1,1} & A_{1,2}  & \dots & A_{1,N-1} &  A_{1,N}  \\
+        A_{2,1} & A_{2,2}  & \dots & A_{2,N-1} &  A_{2,N}  \\
+          &   & \ddots&   & \vdots \\
+        A_{N-1,1} & A_{N-1,2}  & \dots & A_{N-1,N-1} & A_{N-1,N}  \\
+        \sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} &
+        \sum_{i}A_{i,N}
+     \end{array}\right).
+\end{equation}
+Thus \emph{left multiply by \(\mathbf{\hat U}\)} is equivalent to put the
+\emph{the sum of each column  on the last row}.
+
+\paragraph{Product \( \mathbf{\hat U \cdot b}\)}
+\begin{equation}
+\mathbf{\hat b} = \mathbf{U^T\cdot b} =
+     \left(\begin{array}{l}
+        b_1 \\
+        b_2 \\
+        \vdots \\
+        b_{N-1} \\
+        \sum_{i} b_{i}
+     \end{array}\right),
+\end{equation}
+
+\paragraph{Transformation of the original matrix equation}
+The full original linear system, obtained from the discretization of the
+2D \(r,\theta\) polar coordinates can be written as:
+
+\begin{equation}
+     \left(\begin{array}{ll}
+       \mathbf{A} & \mathbf{B} \\
+       \mathbf{C} & \mathbf{D}
+     \end{array}\right)
+     \left(\begin{array}{l}
+       \mathbf{u} \\
+       \mathbf{v}
+     \end{array}\right) =
+     \left(\begin{array}{l}
+       \mathbf{b} \\
+       \mathbf{c}
+     \end{array}\right), \label{eq:orig_matrix_eq}
+\end{equation}
+where the solution array is split into the solutions \(\mathbf{u}\) at \(r=0\) and
+the solutions \(\mathbf{v}\) on the remaining domain. The transformed system can
+thus be written as
+
+\begin{equation*}
+     \left(\begin{array}{ll}
+       \mathbf{U^T} & 0 \\
+       0            & \mathbf{I}
+     \end{array}\right)
+     \left(\begin{array}{ll}
+       \mathbf{A} & \mathbf{B} \\
+       \mathbf{C} & \mathbf{D}
+     \end{array}\right)
+     \left(\begin{array}{ll}
+       \mathbf{U} & 0 \\
+       0            & \mathbf{I}
+     \end{array}\right)
+     \left(\begin{array}{l}
+       \mathbf{\hat u} \\
+       \mathbf{v}
+     \end{array}\right) =
+     \left(\begin{array}{ll}
+       \mathbf{U^T} &0  \\
+       0            & \mathbf{I}
+     \end{array}\right)
+     \left(\begin{array}{l}
+       \mathbf{b} \\
+       \mathbf{c}
+     \end{array}\right),
+\end{equation*}
+
+\begin{equation}
+   \Longrightarrow
+     \left(\begin{array}{cc}
+       \mathbf{U^TAU} & \mathbf{U^TB} \\
+       \mathbf{CU} & \mathbf{D}
+     \end{array}\right)
+     \left(\begin{array}{l}
+       \mathbf{\hat u} \\
+       \mathbf{v}
+     \end{array}\right) =
+     \left(\begin{array}{c}
+       \mathbf{U^Tb} \\
+       \mathbf{c}
+     \end{array}\right),
+\end{equation}
+Notice that the transformation preserves any symmetry existing in the original system
+(\ref{eq:orig_matrix_eq}). The transformed matrix is finally given in the following where
+only the modified elements are shown and the sum is only over the first \(N\)
+points in \(\theta\) direction. The \(\times\) symbol denotes unmodified elements.
+
+\begin{equation}
+     \left(\begin{array}{lllllll}
+        \times & \times & \times & \times &  \sum_{j} A_{1,j}  & \times & \times \\
+        \times & \times & \times & \times &  \sum_{j} A_{2,j}  & \times & \times \\
+        \times & \times & \times & \times &  \vdots            & \times & \times \\
+        \times & \times & \times & \times & \sum_{j} A_{N-1,j} & \times & \times \\
+         \sum_{i}A_{i,1} & \sum_{i}A_{i,2}  & \dots & \sum_{i}A_{i,N-1} &
+         \sum_{i,j}A_{i,j} &  \sum_{i}A_{i,N+1} & \dots \\
+        \times & \times & \times & \times & \sum_{j} A_{N+1,j} & \times & \times \\
+        \times & \times & \times & \times &  \vdots            & \times & \times
+     \end{array}\right)
+\end{equation}
+Only the \(N^{th}\) column and the  \(N^{th}\) row are affected by the transformation.
+Applying now the unicity condition (\ref{eq:unicity2}) the final transformed system
+reads:
+
+\begin{equation}
+     \left(\begin{array}{lllllll}
+        1 & 0 & \dots & 0 & 0 & 0 & 0 \\
+        0 & 1 & \dots  & 0 & 0 & 0 & 0 \\
+        0 & 0 & \ddots & 0 &  \vdots            & 0 & 0 \\
+        0 & 0 & \dots  & 1 & 0 & 0 & 0 \\
+        0 & 0 & \dots  & 0 & \sum_{i,j}A_{i,j} &  \sum_{i}A_{i,N+1} & \dots \\
+        0 & 0 & \dots  & 0 & \sum_{j} A_{N+1,j} & \times & \times \\
+        0 & 0 & \dots  & 0 &  \vdots            & \times & \times
+     \end{array}\right)
+     \left(\begin{array}{l}
+        \hat u_1   \\
+        \hat u_2   \\
+        \vdots\\
+        \hat u_{N-1}\\
+        \hat u_{N} \\
+        u_{N+1} \\
+        \vdots
+     \end{array}\right) =
+     \left(\begin{array}{l}
+        0   \\
+        0   \\
+        \vdots\\
+        0   \\
+        \sum_{i} b_{i} \\
+        b_{N+1} \\
+        \vdots
+     \end{array}\right).
+\end{equation}
+
+
+\section{{\tt MATRIX} Reference}
+\label{matRef}
+
+The following conventions are adopted in the routine descriptions:
+\begin{itemize}
+\item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)}
+  declares a variable which can be of type {\tt gemat} or {\tt zgemat}.
+\item The symbol ``$|$'' is the logical {\tt OR} operator. Thus
+\begin{verbatim}
+  TYPE([z]gemat|[z]gbmat) :: mat
+\end{verbatim}
+declares that {\tt mat} can be either of type {\tt gemat}, {\tt
+  zgemat}, {\tt pbmat} or {\tt zpbmat}.
+\item In a same declaration block, if a scalar or array of type {\tt
+  DOUBLE PRECISION|COMPLEX} is declared together with a matrix object
+  which can be also complex, both should be either real
+  or complex. For example in the routine {\tt updtmat}, if {\tt mat}
+  of type {\tt zgbmat}, {\tt val} should be complex.
+\end{itemize}
+
+\subsection{init}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+TYPE([z]gemat) :: mat
+  SUBROUTINE init(n, nterms, mat ,kmat)
+TYPE([z]gbmat) :: mat
+  SUBROUTINE init(kl, ku, n, nterms, mat, kmat)
+TYPE([z]pbmat) :: mat
+  SUBROUTINE init(ku, n, nterms, mat, kmat)
+TYPE([z]periodic_mat) :: mat
+  SUBROUTINE init(kl, ku, n, nterms, mat, kmat)
+    INTEGER, INTENT(in) :: kl, ku, n, nterms
+    INTEGER, OPTIONAL   :: kmat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Initialize data structure for matrix
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   n            : rank of matrix
+   kl, ku       : number of sub and super diagonals
+   nterms       : number of terms in weak form
+   kmat         : matrix id
+   mat          : matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{destroy}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE destroy(mat)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Free matrix memory
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{updmat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE updtmat(mat, i, j, val)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
+    INTEGER, INTENT(IN) :: i, j
+    DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Update (accumulate) element $A_{ij}$
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   i           : row index
+   j           : column index
+   val         : input value
+\end{verbatim}
+\end{description}
+
+\subsection{putele}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE putele(mat, i, j, val)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
+    INTEGER, INTENT(IN) :: i, j
+    DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Overwrite element $A_{ij}$
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   i           : row index
+   j           : column index
+   val         : input value
+\end{verbatim}
+\end{description}
+
+\subsection{putrow}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE putrow(mat, i, arr)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
+    INTEGER, INTENT(IN) :: i
+    DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Overwrite a matrix row
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   i           : row index
+   arr         : input array
+\end{verbatim}
+\end{description}
+
+\subsection{putcol}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE putcol(mat, j, arr)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
+    INTEGER, INTENT(IN) :: j
+    DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Overwrite a matrix row
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   j           : column index
+   arr         : input array
+\end{verbatim}
+\end{description}
+
+\subsection{getele}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE getele(mat, i, j, val)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
+    INTEGER, INTENT(IN) :: i, j
+    DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Get element $A_{ij}$
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   i           : row index
+   j           : column index
+   val         : output value
+\end{verbatim}
+\end{description}
+
+\subsection{getrow}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE getrow(mat, i, arr)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
+    INTEGER, INTENT(IN) :: i
+    DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Get a matrix row
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   i           : row index
+   arr         : output array
+\end{verbatim}
+\end{description}
+
+\subsection{getcol}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE getcol(mat, j, arr)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
+    INTEGER, INTENT(IN) :: j
+    DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Get a matrix column
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   i           : column index
+   arr         : output array
+\end{verbatim}
+\end{description}
+
+\subsection{vmx}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  FUNCTION vmx(mat, x)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
+    DOUBLE PRECISION|COMPLEX, DIMENSION(:), INTENT(in) :: x
+    DOUBLE PRECISION|COMPLEX, DIMENSION(SIZE(x)) :: vmx
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Matrix-vector product $Ax$
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   x           : input array
+   vmx         : output array
+\end{verbatim}
+\end{description}
+
+\subsection{mcopy}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE mcopy(mata, matb)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Matrix copy: $B = A$
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mata        : input matrix object
+   matb        : output matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{maddto}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE maddto(mata, alpha, matb)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb
+    DOUBLE PRECISION|COMPLEX : alpha
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Matrix addition: $A  \leftarrow A+\alpha B$
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mata        : input matrix object
+   matb        : output matrix object
+  alpha        : input scalar
+\end{verbatim}
+\end{description}
+
+\subsection{determinant}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE determinant(mat, base, pow)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat) :: mat
+    INTEGER, INTENT(out) :: pow
+    DOUBLE PRECISION|COMPLEX : base
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Returns the determinant of matrix as $D = \text{base}\times 10^{\text{pow}}$
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : input matrix object
+   base        : mantissa of determinant
+   pow         : exponent of determinant
+\end{verbatim}
+\end{description}
+
+\subsection{factor}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE factor(mat)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+LU (Cholesky for symmetric/hermitian matrix) factorization
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : inout matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{bsolve}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE bsolve(mat)
+    TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
+    DOUBLE PRECISION|COMPLEX, DIMENSION [(:)] :: rhs
+    DOUBLE PRECISION|COMPLEX, DIMENSION [(:),] OPTIONAL, INTENT (out) :: sol
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Solve the linear system using the factored matrix, for a single or
+multiple right-hand-side
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat  : input factored matrix object
+   rhs  : input right-hand-side, overwriten by the solution if sol is not present
+   sol  : contains solution
+\end{verbatim}
+\end{description}
+
+\section{{\tt SPMAT} Reference}
+\label{spmatRef}
+
+The following conventions are adopted in the routine descriptions:
+\begin{itemize}
+\item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)}
+  declares a variable which can be of type {\tt gemat} or {\tt zgemat}.
+\item The symbol ``$|$'' is the logical {\tt OR} operator. Thus
+\begin{verbatim}
+  TYPE([z]gemat|[z]gbmat) :: mat
+\end{verbatim}
+declares that {\tt mat} can be either of type {\tt gemat}, {\tt
+  zgemat}, {\tt pbmat} or {\tt zpbmat}.
+\item In a same declaration block, if a scalar or array of type {\tt
+  DOUBLE PRECISION|COMPLEX} is declared together with a matrix object
+  which can be also complex, both should be either real
+  or complex. For example in the routine {\tt updtmat}, if {\tt mat}
+  of type {\tt zgbmat}, {\tt val} should be complex.
+\end{itemize}
+
+\subsection{init}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE init(n, mat, istart, iend)
+    TYPE([z]spmat) :: mat
+    INTEGER, INTENT(in), OPTIONAL :: istart, iend
+    INTEGER, INTENT(in) :: n
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Initialize an empty sparse matrix of $n$ rows.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   n            : rank of matrix
+   mat          : matrix object
+   istart, iend : range of row indices. By default istart=1, iend=n
+\end{verbatim}
+\end{description}
+
+\subsection{destroy}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE destroy(mat)
+    TYPE([z]spmat) :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Free matrix memory
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{updmat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE updtmat(mat, i, j, val)
+    TYPE([z]spmat) :: mat
+    INTEGER, INTENT(IN) :: i, j
+    DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Update (accumulate) an existing element $A_{ij}$ or insert it in the
+linked list in an increasing order in the column index j.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   i           : row index
+   j           : column index
+   val         : input value
+\end{verbatim}
+\end{description}
+
+\subsection{putele}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE putele(mat, i, j, val, nlforce_zero)
+    TYPE([z]pbmat) :: mat
+    INTEGER, INTENT(IN) :: i, j
+    DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Overwrite an existing element $A_{ij}$ or insert it in the
+linked list in an increasing order in the column index j.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+   i            : row index
+   j            : column index
+   val          : input value
+   nlforce_zero : Never remove an existing element when input is zero if TRUE
+                  FALSE by default
+\end{verbatim}
+\end{description}
+
+\subsection{putrow}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE putrow(mat, i, arr, col, nlforce_zero)
+    TYPE([z]spmat) :: mat
+    INTEGER, INTENT(IN) :: i
+    DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
+    INTEGER, INTENT(in), OPTIONAL :: col(:)
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Overwrite a matrix row
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+   i            : row index
+   arr          : input array
+   col          : input array containing column indices
+   nlforce_zero : Never remove an existing element when input is zero if TRUE
+                  FALSE by default
+\end{verbatim}
+\end{description}
+
+\subsection{putcol}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE putcol(mat, j, arr, nlforce_zero)
+    TYPE([z]spmat) :: mat
+    INTEGER, INTENT(IN) :: j
+    DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Overwrite a matrix row
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+   j            : column index
+   arr          : input array
+   nlforce_zero : Never remove an existing non-zero element if .TRUE.
+                  .FALSE. by default
+\end{verbatim}
+\end{description}
+
+\subsection{getele}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE getele(mat, i, j, val)
+    TYPE([z]spmat) :: mat
+    INTEGER, INTENT(IN) :: i, j
+    DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Get element $A_{ij}$
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   i           : row index
+   j           : column index
+   val         : output value
+\end{verbatim}
+\end{description}
+
+\subsection{getrow}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE getrow(mat, i, arr, col)
+    TYPE([z]spmat) :: mat
+    INTEGER, INTENT(IN) :: i
+    DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
+    INTEGER, INTENT(out), OPTIONAL :: col(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Get a matrix row and optionally the column indices
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   i           : row index
+   arr         : output array
+   col         : output array containing column indices
+\end{verbatim}
+\end{description}
+
+\subsection{getcol}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE getcol(mat, j, arr)
+    TYPE([z]spmat) :: mat
+    INTEGER, INTENT(IN) :: j
+    DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Get a matrix column
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   i           : column index
+   arr         : output array
+\end{verbatim}
+\end{description}
+
+\subsection{get\_count}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  INTEGER FUNCTION get_count(mat, nnz)
+    TYPE([z]spmat) :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Returns the number of non-zeros and optionally an array of numbers
+of non-zeros on each row
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   nnz         : array containing numbers of non-zeros on each row.
+\end{verbatim}
+\end{description}
+
+\section{{\tt PARDISO\_BSPLINES} Reference}
+\label{pardisoRef}
+The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
+getcol, vmx, mcopy, maddto} and {\tt destroy}
+have \emph{exactly} the same list of arguments as
+those from the {\tt MATRIX} module (as documented in
+Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
+that have different arguments. The same conventions as before are used
+for the routine description.
+
+\subsection{init}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
+       &                      nlforce_zero)
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE([z]pardiso_mat) :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlpos
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Initialize the PARDISO solver. A SPMAT matrix of $n$ empty rows is initialized.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   n            : rank of matrix
+   nterms       : number of terms in weak form
+   kmat         : matrix id
+   mat          : matrix object
+   nlsym        : symmetric or not. Default is .FALSE.
+   nlherm       : Hermitian or not for complex matrix . Default is .FALSE.
+   nlpos        : Positive-definite or not. Default is .TRUE.
+   nlforce_zero : Never remove an existing non-zero element if .TRUE.
+                  .TRUE. by default
+\end{verbatim}
+\end{description}
+
+\subsection{clear\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE clear_mat(mat)
+    TYPE([z]pardiso_mat)         :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+ Clear matrix, keeping its sparse structure unchanged
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{psum\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE sum_mat(mat, comm)
+    TYPE([z]pardiso_mat)         :: mat
+    INTEGER, INTENT(in)          :: comm
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+ Parallel sum of matrices. Result matrix is placed in the sparse
+ matrix mat\%mat on all processes of comm.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+   comm         : communicator
+\end{verbatim}
+\end{description}
+
+\subsection{p2p\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
+    TYPE([z]pardiso_mat)         :: mat
+    INTEGER, INTENT(in)          :: dest
+    CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
+    CHARACTER(len=*), INTENT(in) :: op    ! ('put', 'updt')
+    INTEGER, INTENT(in)          :: comm
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+  Point-to-point combine sparse matrix between 2 processes.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+   dest         : rank of remote process
+   extyp        : exchange type ('send', 'recv', 'sendrecv')
+   op           : operation type ('put', 'updt')
+   comm         : communicator
+\end{verbatim}
+\end{description}
+
+\subsection{get\_count}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  INTEGER FUNCTION get_count(mat, nnz)
+    TYPE([z]pardiso_mat) :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Returns the number of non-zeros and optionally an array of numbers
+of non-zeros on each row
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   nnz         : array containing numbers of non-zeros on each row.
+\end{verbatim}
+\end{description}
+
+\subsection{factor}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE factor(mat, nlreord, nlmetis, debug)
+    TYPE([z]pardiso_mat)          :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlreord
+    LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Wrapper of to\_mat, reord\_mat and numfact
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   nlreord     : call reord_mat if .TRUE. (default is .TRUE.)
+   nlmetis     : use METIS nested dissection for reoredering. Default
+                 is minimum degree alogorithm.
+   debug       : verbose output from PARDISO if .TRUE. Default is .FALSE.
+\end{verbatim}
+\end{description}
+
+\subsection{bsolve}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE bsolve_pardiso_mat1(mat, rhs, sol, nref, debug)
+    TYPE([z]pardiso_mat)               :: mat
+    DOUBLE PRECISION|COMPLEX           :: rhs(:)
+    DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
+    INTEGER, OPTIONAL                  :: nref
+    LOGICAL, OPTIONAL, INTENT(in)      :: debug
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Wrapper of to\_mat, reord\_mat and numfact
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat   : matrix object
+   rhs   : input right-hand-side, overwriten by the solution if sol is not present
+   sol   : contains solution
+   ref   : maximum number of refinement steps. Default is 0 (no refinement).
+   debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
+\end{verbatim}
+\end{description}
+
+\subsection{to\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE to_mat(mat)
+    TYPE([z]pardiso_mat)          :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Convert linked list spmat to pardiso matrix structure
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{reord\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE reord_mat(mat, nlmetis, debug)
+    TYPE([z]pardiso_mat)          :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Reordering and symbolic factorization
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   nlmetis     : use METIS nested dissection for reoredering. Default
+                 is minimum degree alogorithm.
+   debug       : verbose output from PARDISO if .TRUE. Default is .FALSE.
+\end{verbatim}
+\end{description}
+
+\subsection{numfact}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE numfact(mat, debug)
+    TYPE([z]pardiso_mat)          :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Numerical factorization
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   debug       : verbose output from PARDISO if .TRUE. Default is .FALSE.
+\end{verbatim}
+\end{description}
+
+\section{{\tt [P]WSMP\_BSPLINES} Reference}
+\label{wsmpRef}
+The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
+getcol, vmx, mcopy, maddto} and {\tt destroy}
+have \emph{exactly} the same list of arguments as
+those from the {\tt MATRIX} module (as documented in
+Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
+that have different arguments. The same conventions as before are used
+for the routine description.
+
+\subsection{init}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
+       &                      nlforce_zero, [comm_in])
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE([z]wsmp_mat) :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlpos
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+    INTEGER, OPTIONAL, INTENT(in) :: comm_in
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Initialize the WSMP solver. A SPMAT matrix of $n$ empty rows is initialized.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   n            : rank of matrix
+   nterms       : number of terms in weak form
+   kmat         : matrix id
+   mat          : matrix object
+   nlsym        : symmetric or not. Default is .FALSE.
+   nlherm       : Hermitian or not for complex matrix . Default is .FALSE.
+   nlpos        : Positive-definite or not. Default is .TRUE.
+   nlforce_zero : Never remove an existing non-zero element if .TRUE.
+                  .TRUE. by default
+   comm_in      : MPI communicator. By default MPI_COMM_WORLD (only in PWSMP_BSPLINES)
+\end{verbatim}
+\end{description}
+
+\subsection{clear\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE clear_mat(mat)
+    TYPE([z]wsmp_mat) :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+ Clear matrix, keeping its sparse structure unchanged
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{psum\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE sum_mat(mat, comm)
+    TYPE([z]wsmp_mat)         :: mat
+    INTEGER, INTENT(in)          :: comm
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+ Parallel sum of matrices. Result matrix is placed in the sparse
+ matrix mat\%mat on all processes of comm.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+   comm         : communicator
+\end{verbatim}
+\end{description}
+
+\subsection{p2p\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
+    TYPE([z]wsmp_mat)         :: mat
+    INTEGER, INTENT(in)          :: dest
+    CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
+    CHARACTER(len=*), INTENT(in) :: op    ! ('put', 'updt')
+    INTEGER, INTENT(in)          :: comm
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+  Point-to-point combine sparse matrix between 2 processes.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+   dest         : rank of remote process
+   extyp        : exchange type ('send', 'recv', 'sendrecv')
+   op           : operation type ('put', 'updt')
+   comm         : communicator
+\end{verbatim}
+\end{description}
+
+\subsection{get\_count}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  INTEGER FUNCTION get_count(mat, nnz)
+    TYPE([z]wsmp_mat) :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Returns the number of non-zeros and optionally an array of numbers
+of non-zeros on each row
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   nnz         : array containing numbers of non-zeros on each row.
+\end{verbatim}
+\end{description}
+
+\subsection{factor}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE factor(mat, nlreord)
+    TYPE([z]wsmp_mat)          :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlreord
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Wrapper of to\_mat, reord\_mat and numfact
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   nlreord     : call reord_mat if .TRUE. (default is .TRUE.)
+\end{verbatim}
+\end{description}
+
+\subsection{bsolve}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref)
+    TYPE([z]wsmp_mat)               :: mat
+    DOUBLE PRECISION|COMPLEX           :: rhs(:)
+    DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
+    INTEGER, OPTIONAL                  :: nref
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Wrapper of to\_mat, reord\_mat and numfact
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat   : matrix object
+   rhs   : input right-hand-side, overwriten by the solution if sol is not present
+   sol   : contains solution
+   ref   : maximum number of refinement steps. Default is 0 (no refinement).
+   debug : verbose output from WSMP if .TRUE. Default is .FALSE.
+\end{verbatim}
+\end{description}
+
+\subsection{to\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE to_mat(mat)
+    TYPE([z]wsmp_mat)          :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Convert linked list spmat to wsmp matrix structure
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{reord\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE reord_mat(mat)
+    TYPE([z]wsmp_mat)          :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Reordering and symbolic factorization
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{numfact}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE numfact(mat)
+    TYPE([z]wsmp_mat)          :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Numerical factorization
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+\end{verbatim}
+\end{description}
+
+\section{{\tt MUMPS\_BSPLINES} Reference}
+\label{mumpsRef}
+The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
+getcol, vmx, mcopy, maddto} and {\tt destroy}
+have \emph{exactly} the same list of arguments as
+those from the {\tt MATRIX} module (as documented in
+Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
+that have different arguments. The same conventions as before are used
+for the routine description.
+
+\subsection{init}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
+       &                      nlforce_zero, comm_in)
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE([z]mumps_mat) :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlpos
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+    INTEGER, OPTIONAL, INTENT(in) :: comm_in
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Initialize the MUMPS solver. A SPMAT matrix of $n$ empty rows is initialized.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   n            : rank of matrix
+   nterms       : number of terms in weak form
+   kmat         : matrix id
+   mat          : matrix object
+   nlsym        : symmetric or not. Default is .FALSE.
+   nlherm       : Hermitian or not for complex matrix . Default is .FALSE.
+   nlpos        : Positive-definite or not. Default is .TRUE.
+   nlforce_zero : Never remove an existing non-zero element if .TRUE.
+                  .TRUE. by default
+   comm_in      : MPI communicator. By default MPI_COMM_SELF (serial mode).
+\end{verbatim}
+\end{description}
+
+\subsection{clear\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE clear_mat(mat)
+    TYPE([z]mumps_mat) :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+ Clear matrix, keeping its sparse structure unchanged
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{psum\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE sum_mat(mat, comm)
+    TYPE([z]mumps_mat)         :: mat
+    INTEGER, INTENT(in)          :: comm
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+ Parallel sum of matrices. Result matrix is placed in the sparse
+ matrix mat\%mat on all processes of comm.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+   comm         : communicator
+\end{verbatim}
+\end{description}
+
+\subsection{p2p\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
+    TYPE([z]mumps_mat)         :: mat
+    INTEGER, INTENT(in)          :: dest
+    CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
+    CHARACTER(len=*), INTENT(in) :: op    ! ('put', 'updt')
+    INTEGER, INTENT(in)          :: comm
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+  Point-to-point combine sparse matrix between 2 processes.
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat          : matrix object
+   dest         : rank of remote process
+   extyp        : exchange type ('send', 'recv', 'sendrecv')
+   op           : operation type ('put', 'updt')
+   comm         : communicator
+\end{verbatim}
+\end{description}
+
+\subsection{get\_count}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  INTEGER FUNCTION get_count(mat, nnz)
+    TYPE([z]mumps_mat) :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Returns the number of non-zeros and optionally an array of numbers
+of non-zeros on each row
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   nnz         : array containing numbers of non-zeros on each row.
+\end{verbatim}
+\end{description}
+
+\subsection{factor}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE factor(mat, nlreord)
+    TYPE([z]mumps_mat)          :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlreord
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Wrapper of to\_mat, reord\_mat and numfact
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+   nlreord     : call reord_mat if .TRUE. (default is .TRUE.)
+\end{verbatim}
+\end{description}
+
+\subsection{bsolve}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE bsolve_mumps_mat1(mat, rhs, sol, nref)
+    TYPE([z]mumps_mat)               :: mat
+    DOUBLE PRECISION|COMPLEX           :: rhs(:)
+    DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
+    INTEGER, OPTIONAL                  :: nref
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Wrapper of to\_mat, reord\_mat and numfact
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat   : matrix object
+   rhs   : input right-hand-side, overwriten by the solution if sol is not present
+   sol   : contains solution
+   ref   : maximum number of refinement steps. Default is 0 (no refinement).
+   debug : verbose output from MUMPS if .TRUE. Default is .FALSE.
+\end{verbatim}
+\end{description}
+
+\subsection{to\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE to_mat(mat)
+    TYPE([z]mumps_mat)          :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Convert linked list spmat to mumps matrix structure
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{reord\_mat}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE reord_mat(mat)
+    TYPE([z]mumps_mat)          :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Reordering and symbolic factorization
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+\end{verbatim}
+\end{description}
+
+\subsection{numfact}
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  SUBROUTINE numfact(mat)
+    TYPE([z]mumps_mat)          :: mat
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+\begin{description}
+\item[Purpose:] \mbox{}
+Numerical factorization
+\item[Arguments:] \mbox{}
+\begin{verbatim}
+   mat         : matrix object
+\end{verbatim}
+\end{description}
+
+
+\begin{thebibliography}{99}
+\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide.
+\bibitem{PARDISO} \url{http://www.pardiso-project.org/}
+\bibitem{WSMP} \url{http://www-users.cs.umn.edu/~agupta/wsmp.html}
+\bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/}
+\bibitem{McMillan} B. F. McMillan, et. al. \emph{Rapid Fourier space
+  solution of linear partial integro-differential equations in
+  toroidal magnetic confinement geometries}, Computer Physics
+  Communications 181(4),
+  715-719 (2010)
+\end{thebibliography}
+
+\end{document}
diff --git a/docs/manual/using_bsplines.pdf b/docs/manual/using_bsplines.pdf
new file mode 100644
index 0000000..6ab9c4e
Binary files /dev/null and b/docs/manual/using_bsplines.pdf differ
diff --git a/docs/manual/using_bsplines.tex b/docs/manual/using_bsplines.tex
new file mode 100644
index 0000000..fec665a
--- /dev/null
+++ b/docs/manual/using_bsplines.tex
@@ -0,0 +1,366 @@
+%
+% @file using_bsplines.tex
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+\documentclass[a4paper]{article}
+\usepackage{linuxdoc-sgml}
+\usepackage{graphicx}
+\usepackage{hyperref}
+\usepackage{amsmath}
+%\usepackage{verbatim}
+%\usepackage[notref]{showkeys}
+
+\title{\tt Using BSPLINES in Particle Codes}
+\author{Trach-Minh Tran}
+\date{v0.1, March 2012}
+\abstract{These notes present some practical considerations on using
+  BSPLINES in particle codes, in particular for the charge or current
+  assignment as well as the field interpolation. Performance
+  measurements are done on an Intel Xeon X5570 and the more recent
+  Xeon E5-2680.}
+
+\begin{document}
+\maketitle
+%\tableofcontents
+
+\section{Introduction}
+For simplicity, we assume in these notes that we are dealing with a
+2D electrostatic particle code and the 2D Poisson equation is to be solved
+using the Finite Element Method. Starting from the \emph{weak form}
+and using the \emph{splines} for both \emph{basis} and \emph{test}
+functions, the electrostatic field potential together with its
+gradient and the right hand side can be computed from
+\begin{equation}
+  \begin{split}
+    \phi(x,y) &=
+      \sum_{ij}\,c_{ij}\,\Lambda_i(x)\Lambda_j(y) \\
+      \frac{\partial\phi}{\partial x} &=
+      \sum_{ij}\,c_{ij}\,\Lambda'_i(x)\Lambda_j(y) \\
+      \frac{\partial\phi}{\partial y} &=
+      \sum_{ij}\,c_{ij}\,\Lambda_i(x)\Lambda'_j(y) \\
+    S_{ij} &= \sum_{\mu=1}^{N_p}\, q_\mu\Lambda_i(x_\mu)\Lambda_j(y_\mu),
+  \end{split}
+\end{equation}
+where $c_{ij}$ are the solutions of the discretized Poisson equation
+and $\{x_\mu,y_\mu\}$ are the coordinates of the $N_p$ simulation
+particles. At each time step, the calculation of both the field $\phi$
+and its gradient (\emph{field interpolation}) for the particle
+pusher and the construction of the RHS $S_{i}$ (\emph{charge
+  assignment}) involve thus the computation of a large number of
+splines $\Lambda$ and its derivatives $\Lambda'$.
+
+Notice that the construction of the solver matrix requires also
+the calculations of the splines. This operation is however performed only
+once at the initial timestep in the (most common) case where the
+matrix is time independent and thus will not be considered in further
+these notes.
+
+\section{Computation of splines}
+Let consider the grid defined by $x_i$, $i=1,\ldots,N+1$. Inside the
+interval $[x_i, x_{i+1}]$, the $p+1$ non-zero splines of degree $p$
+can be computed efficiently using its polynomial representation given
+by
+\begin{equation}
+  \begin{split}
+    \Lambda_{i+\alpha}(x) &= \sum_{k=0}^{p}\, V^{i}_{k\alpha}
+      (x-x_i)^k, \qquad \alpha=1,\ldots,p+1, \\
+    V^i_{k\alpha} &=
+    \left.\frac{1}{k!}\frac{d^k}{dx^k}\Lambda_{i+\alpha}(x)\right|_{x=x_i}.
+  \end{split}
+\end{equation}
+The $(p+1)^2N$ coefficients $V^i_{k\alpha}$ are precalculated and stored
+during the spline initialization (in routine {\tt SET\_SPLINE}) by
+using the \emph{recurrence relation} \cite{BSPLINES} to compute the spline and all its
+$p$ derivatives. Note that for periodic splines on an equidistant
+mesh, only $(p+1)^2$ coefficients  $V_{k\alpha}$ are required since
+the splines have \emph{translational invariance}.
+
+For a polynomial $P(x)=a_0+a_1x+\ldots +a_px^p$, its value can be
+calculated together with it first derivative, using Horner's rule as:
+
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  f = a(p)
+  fp = f
+  DO i=p-1,1,-1
+   f = a(i) + x*f
+   fp = f + x*fp
+  END DO
+  f = a(0) + x*f
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+showing that exactly $4p-2$ floating operations (flops) per point are
+required. If only the value of the polynomial is needed, only $2p$ flops
+per point are required.
+
+\section{Field interpolation}
+\subsection{1D case}
+Let considered first the 1D case. The spline expansion of $\phi$ for
+$x_i\le x < x_{i+1}$ are expressed as
+\begin{equation}
+  \phi(x) = \sum_{\alpha=0}^pc_{i+\alpha}\Lambda_{i+\alpha}(x).
+\end{equation}
+To calculate the field using this spline expansion, $p+1$ splines
+have to be first calculated followed by the sum above, which
+yields a total cost of $2(p+1)^2\sim 2p^2$ flops per point. This cost
+can be reduced by observing that
+$\phi(x)$ is a \emph{piecewise polynomial} (PP) of degree $p$ in
+each interval. Its PP coefficients can be obtained from
+\begin{equation}
+  \begin{split}
+  \phi(x) &=  \sum_{\alpha=0}^pc_{i+\alpha}\sum_{k=0}^{p}\, V^{i}_{k\alpha}
+      (x-x_i)^k \\
+      &= \sum_{k=0}^{p}\, \Pi^{i}_{k}(x-x_i)^k, \qquad
+  \Pi^{i}_{k}= \sum_{\alpha=0}^pc_{i+\alpha} V^{i}_{k\alpha}
+  \end{split}
+\end{equation}
+Once the $N(p+1)$ PP coefficients $\Pi^{i}_{k}$ have been calculated
+from the spline expansion coefficients $c_{i+\alpha}$, only $2p$ flops per
+point are required to obtain the field value, using the Horner's rule
+described previously.
+
+\subsection{2D case}
+Extension for the spline expansion and the PP representation for
+$\phi(x,y)$ is straightforwards and yields, for $x_i\le x < x_{i+1}$,
+$y_j\le y < y_{j+1}$:
+\begin{equation}
+  \begin{split}
+      \phi(x,y) &= \sum_{\alpha=0}^{p1}\sum_{\beta=0}^{p2}c_{i+\alpha,j+\beta}
+      \Lambda_{i+\alpha}(x)\Lambda_{j+\beta}(y) \\
+      \phi(x,y) &= \sum_{k=0}^{p1}\sum_{l=0}^{p2}\,\Pi^{ij}_{kl}(x-x_i)^k(y-y_j)^l, \qquad
+  \Pi^{ij}_{kl}= \sum_{\alpha=0}^{p1}\sum_{\beta=0}^{p2}c_{i+\alpha,j+\beta} V^{i}_{k\alpha}V^{j}_{l\beta},
+  \end{split}
+\end{equation}
+where $ V^{i}_{k\alpha}$ and $V^{j}_{l\beta}$ are the PP
+coefficients of the splines $\Lambda_{i+\alpha}(x)$ and
+$\Lambda_{j+\beta}(y)$ respectively. Assuming the same spline order
+$p$ in
+both $x$ and $y$, the flop counts per point for the 2 representations are
+respectively $2(3p+2)(p+1)\sim 6p^2$ and $2p(p+2)\sim 2p^2$, while the
+storages required for the spline coefficients $c$ and the PP
+coefficients $\Pi$ are $(N+p)^2\sim N^2$ and $N^2(p+1)^2$ respectively.
+
+\subsection{Implementation in BSPLINES}
+The PP representation is selected by default in BSPLINES,
+\emph{unless} the logical keyword {\tt NLPPFORM} is set to
+{\tt .FALSE.} when calling the spline initialization routine {\tt
+  SET\_SPLINE}. The flop counts per point for both methods are
+summarized in the table below
+\begin{center}
+\begin{tabular}{|l|c|c|}
+  \hline
+                 & 1D  & 2D \\\hline
+Spline expansion & $2(p+1)^{2}$ & $2(3p+2)(p+1)$ \\
+PP representation & $2p$      &   $2p(p+2)$ \\\hline
+\end{tabular}
+\end{center}
+The routine {\tt GRIDVAL} computes the value of the
+field or one of its derivatives. The first call to this routine
+computes the PP coefficients $\Pi$ if {\tt NLPPFORM=.TRUE.} is
+selected or just store the spline coefficients $c$ in the spline
+internal data otherwise. In the following calls to {\tt GRIDVAL}, $c$
+should not be passed to the routines.
+
+Notice that the PP representation requires to store the $N^2(p+1)^2$
+PP coefficients in the 2D case, which is still acceptable. In the 3D
+case, this storage requirement becomes $N^3(p+1)^3$  which can be
+prohibitive! In this case the less efficient \emph{Spline expansion}
+formulation should be selected.
+
+In the \emph{particle loop}, the routine {\tt GETGRAD} which computes
+the function and all its first partial derivatives at once should be
+called instead of {\tt GRIDVAL}.
+
+\section{Particle localization({\tt locintv})}
+In both charge assignment and field interpolation, finding in which
+interval of the spatial grid the particle is localized should be first
+performed. This operation is trivial for the case of an equidistant
+mesh. For non-equidistant mesh, an \emph{equidistant fine} mesh and its mapping to the
+actual mesh are first constructed in the spline initialization routine
+{\tt SET\_SPLINE} and used to localize the particles in the routine
+{\tt LOCINTV}.
+
+\section{Performances}
+From the considerations above, using BSPLINES to perform the charge
+assignment and field interpolation in 2D and 3D particle codes might
+result in large overheads because of the large number of calls to the
+routines {\tt BASFUN} to compute the splines or {\tt GETGRAD} to perform the field
+interpolation at a \emph{single}  particle position. In the following,
+the performances the 2D linearized gyrokinetic code GYGLES which has
+been adapted to use BSPLINES are analyzed. Vectorization by grouping
+the particles for both charge assignment and field interpolation is then
+proposed as a way to speed up these two operations when using
+BSPLINES.
+
+\subsection{Scalar performances}
+Optimization of the scalar versions of {\tt BASFUN} and {\tt GETGRAD}
+(when these routines are called with a \emph{single} particle) is
+performed essentially by
+\begin{itemize}
+\item Minimizing the flop counts and reducing redundant operations.
+\item Unrolling small loops, for example the loop over the $p+1$
+  splines that are non-zero at a given position, for small $p$.
+\item Define all routines called by {\tt BASFUN} and {\tt  GETGRAD} as
+  \emph{internal procedures}.
+\item Rearranging the memory layout of the multi-dimension array
+  containing the PP coefficients of the spline.
+\end{itemize}
+
+The timings of the charge and current assignment (assign), the particle
+pusher (push) and the main time loop for a 5 time step run of GYGLES,
+on an Intel Xeon X5570 (hpcff.fz-juelich.de), using 4 MPI
+processes and Intel Fortran 12.1.2 are summarized in the following
+table
+
+\begin{center}
+\begin{tabular}{lrrrrr}
+\hline
+           &  $T_0$(s)   &  $T_1$(s)   &  $T_2$(s)   &  $T_1/T_0$   &  $T_2/T_1$  \\
+\hline
+ assign    &  1.454E+01  &  2.126E+01  &  2.259E+01  &       1.46  &       1.06  \\
+ push      &  2.536E+01  &  3.080E+01  &  3.144E+01  &       1.21  &       1.02  \\
+ mainloop  &  4.197E+01  &  5.955E+01  &  6.149E+01  &       1.42  &       1.03  \\
+\hline
+\end{tabular}
+\end{center}
+
+where $T_0$ is the time in seconds obtained with the original code
+while $T_1$ and $T_2$ are the times obtained with BSPLINES,
+respectively using an \emph{equidistant} and \emph{non-equidistant}
+radial mesh. In all the 3 runs, a quadratic splines were used.
+The small difference between \emph{equidistant} and
+\emph{non-equidistant} mesh comes mainly from the particle localization.
+
+The same run on an Intel Xeon E5-2680 (helios.iferc-csc.org), using
+the same Intel compiler (with AVX instructions) yields
+
+\begin{center}
+\begin{tabular}{lrrrrr}
+\hline
+           &  $T_0$(s)   &  $T_1$(s)   &  $T_2$(s)   &  $T_1/T_0$   &  $T_2/T_1$  \\
+\hline
+ assign    &  1.093E+01  &  1.987E+01  &  2.086E+01  &       1.82  &       1.05  \\
+ push      &  2.385E+01  &  2.868E+01  &  2.994E+01  &       1.20  &       1.04  \\
+ mainloop  &  3.656E+01  &  5.411E+01  &  5.598E+01  &       1.48  &       1.03  \\
+\hline
+\end{tabular}
+\end{center}
+
+
+\subsection{Speed up by vectorization}
+As found in the last section, using external routines from BSPLINES
+instead of \emph{hard coding} the spline computations
+results in a slowing down of 40--50\% for the main time
+loop. As will shown later, this problem could be solved by \emph{grouping} the
+particles and using the vectorized {\tt BASFUN} and {\tt GETGRAD}
+routines. Such particle grouping can be done for example, by replacing the usual
+particle loop by the following Fortran code fragment
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{verbatim}
+  nset = npart/ngroup
+  IF(MODULO(npt, ngroup).NE.0) nset = nset+1
+  i2 = 0
+  DO is=1,nset
+    i1 = i2+1
+    i2 = MIN(i2+ngroup,npart)
+    CALL basfun(x(i1:i2), ...)
+  END DO
+\end{verbatim}
+\nopagebreak\hrule
+\addvspace{\medskipamount}
+
+where {\tt npart} particles are partitioned into {\tt nset} groups,
+each containing at most {\tt ngroup} particles. Vectorization of the
+routines {\tt BASFUN} and {\tt GETGRAD} is achieved by moving whenever
+is possible the loop over the {\tt ngroup} particles into the
+innermost loop.
+
+The vectorization performances shown in Fig~.\ref{fig:basfun_hpcff} and
+Fig~.\ref{fig:getgrad_hpcff}, respectively for {\tt BASFUN} and {\tt
+GETGRAD} are obtained using version $12.1.2$ of Intel compiler on
+an Intel Xeon X5570 (hpcff.fz-juelich.de). With a speedup of at least
+2 for quadratic splines, the slowing down found previously in the
+scalar version could be likely compensated. The new AVX instructions
+present in the recent Intel Xeon E5-2680 (helios.iferc-csc.org) seems
+to improve somewhat the vectorization performance as shown in
+Fig~.\ref{fig:basfun_helios} and Fig~.\ref{fig:getgrad_helios}.
+
+\begin{figure}
+  \centering
+  \includegraphics[angle=0,width=\hsize]{basfun_perf_hpcff}
+  \caption{In this test, $10^5$ particles are distributed randomly on
+    an equidistant mesh of 64 intervals. On each point, all the $p+1$
+    splines are computed. The particle localization routine {\tt
+    locintv} is included in the timing. In order to have a good
+    statistics in the measurements, $1'000$ iterations of the particle loop are considered.}
+  \label{fig:basfun_hpcff}
+\end{figure}
+
+\begin{figure}
+  \centering
+  \includegraphics[angle=0,width=\hsize]{getgrad_perf_hpcff}
+  \caption{In this test, $10^5$ particles are distributed randomly on
+    an equidistant 2D $(x,y)$ mesh of $64\times 64$ intervals, where
+    the coordinate $y$ is periodic. On each point, the function
+    together with its gradient are computed, using the PP
+    representation. The particle localization routine {\tt
+    locintv} is included in the timing. In order to have a good
+    statistics in the measurements, $100$ iterations of the particle loop are considered.}
+  \label{fig:getgrad_hpcff}
+\end{figure}
+
+\begin{figure}
+  \centering
+  \includegraphics[angle=0,width=\hsize]{basfun_perf_helios}
+  \caption{In this test, $10^5$ particles are distributed randomly on
+    an equidistant mesh of 64 intervals. On each point, all the $p+1$
+    splines are computed. The particle localization routine {\tt
+    locintv} is included in the timing. In order to have a good
+    statistics in the measurements, $1'000$ iterations of the particle loop are considered.}
+  \label{fig:basfun_helios}
+\end{figure}
+
+\begin{figure}
+  \centering
+  \includegraphics[angle=0,width=\hsize]{getgrad_perf_helios}
+  \caption{In this test, $10^5$ particles are distributed randomly on
+    an equidistant 2D $(x,y)$ mesh of $64\times 64$ intervals, where
+    the coordinate $y$ is periodic. On each point, the function
+    together with its gradient are computed, using the PP
+    representation. The particle localization routine {\tt
+    locintv} is included in the timing. In order to have a good
+    statistics in the measurements, $100$ iterations of the particle loop are considered.}
+  \label{fig:getgrad_helios}
+\end{figure}
+
+\begin{thebibliography}{99}
+\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide.
+\end{thebibliography}
+
+\end{document}
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 0000000..050fdcb
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,77 @@
+/**
+ * @file CMakeLists.txt
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+project(bsplines_tests)
+
+add_library(local_util STATIC 
+  pde1dp_mod.f90
+  pde3d_mod.f90
+  ppde3d_mod.f90
+  ppde3d_pb_mod.f90
+  tcdsmat_mod.f90
+  meshdist.f90
+  dismat.f90
+  ibcmat.f90
+  disrhs.f90
+)
+
+target_link_libraries(local_util PUBLIC bsplines)
+
+set(BS_TESTS
+  driv1 driv2 driv3 driv4
+  pde1d pde1dp pde1dp_cmpl 
+  pde2d pde2d_pb
+  pde1dp_cmpl_dft 
+  pde3d ppde3d ppde3d_pb
+  fit1d fit1dbc fit1dp 
+  fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y
+  moments optim1 optim2 optim3
+  tcdsmat tmassmat tbasfun tsparse1
+  basfun_perf getgrad_perf gridval_perf
+  test_kron
+  )
+
+if(HAS_PARDISO)
+  set(BS_TESTS ${BS_TESTS} 
+    pde1dp_cmpl_pardiso 
+    pde2d_pardiso
+    pde2d_sym_pardiso
+    pde2d_sym_pardiso_dft
+    tsparse2
+    )
+endif()
+
+if(HAS_MUMPS)
+  set(BS_TESTS ${BS_TESTS}
+    pde2d_mumps
+    pde1dp_cmpl_mumps
+    )
+endif()
+
+foreach(test ${BS_TESTS})
+  add_executable(${test} ${test}.f90)
+  target_link_libraries(${test} local_util ${LIBS} ${EXTRA_LIBS})
+endforeach()
diff --git a/examples/Makefile b/examples/Makefile
new file mode 100644
index 0000000..552d03f
--- /dev/null
+++ b/examples/Makefile
@@ -0,0 +1,506 @@
+#
+# @file Makefile
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Emmanuel Lanti <emmanuel.lanti@epfl.ch>
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+PREFIX=$(HOME)
+XGRAFIX=/usr/local/xgrafix_1.2/src-double
+# FUTILS=/usr/local/crpp/futils
+# BSPLINES=/usr/local/crpp/bsplines
+# PPUTILS2=../pputils2
+# PPPACK=../pppack
+# SLATEC=/usr/local/slatec
+# FFTW=/usr/local/fftw-2.1.5-opt
+ARPACK=/usr/local/ARPACK
+LAPACK95=$(MKL)/../../../mkl/include/intel64/lp64 
+
+MPIF90 = mpif90
+F90 = ifort
+LD = $(MPIF90)
+
+debug = -g -traceback -check bounds -warn alignments -warn unused
+optim = -O3 -xHOST
+
+OPT=$(debug)
+#OPT=$(optim)
+
+F90FLAGS = $(OPT) -fPIC  -I../fft -I$(BSPLINES)/include -I$(FUTILS)/include \
+	   -I$(FFTW)/include -I$(MKL)/../../include/intel64/lp64
+LDFLAGS = $(OPT) -fPIC  -L$(FUTILS)/lib  -L$(BSPLINES)/lib -L${HDF5}/lib -L$(FFTW)/lib64 \
+          -L$(SLATEC)/lib -L$(ARPACK)
+
+CC = cc
+CFLAGS = -O2
+
+LIBS =  -mkl=cluster -lbsplines -lpppack -lpppack -lpputils2 -lfutils -lfft -larpack \
+	-lfftw -lhdf5_fortran -lhdf5 -lz -lpthread
+
+LIBS1 = -mkl=cluster -lbsplines1 -lpppack -lfutils \
+        -lhdf5_fortran -lhdf5 -lz -lsz -lpthread
+
+ifdef WSMP
+LDFLAGS += -L$(WSMP)
+LIBS += -lwsmp64
+LIBS1 += -lpwsmp64
+endif
+
+ifdef MUMPS
+F90FLAGS += -I$(MUMPS)/include -I$(LAPACK95)
+LDFLAGS += -L$(MUMPS)/lib -L$(PARMETIS)/lib
+LIBS += $(MUMPSLIBS)
+endif
+
+ifdef PETSC_DIR
+include ${PETSC_DIR}/conf/variables
+F90FLAGS += -I$(PETSC_DIR)/include  -I$(PETSC_DIR)/$(PETSC_ARCH)/include \
+            -I$(MKL)/../../include
+LIBS += ${PETSC_FORTRAN_LIB} ${PETSC_KSP_LIB}
+endif
+
+PDE1DOBJS = pde1d.o 
+PDE2DOBJS = pde2d.o dismat.o disrhs.o ibcmat.o
+FIT1DOBJJS = fit1d.o
+
+.SUFFIXES:
+.SUFFIXES: .o .c .f90 .f
+
+.f90.o:
+	$(MPIF90) $(F90FLAGS) -c $<
+.f.o:
+	$(F90) $(F90FLAGS) -c $<
+
+all: examples tmat
+
+EX_FILES = driv1 driv2 driv3 driv4 pde1d pde1dp pde1dp_cmpl pde3d ppde3d ppde3d_pb \
+          pde2d pde2d_pb fit1d fit1dbc \
+          fit1dp fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y \
+	  moments optim1 optim2 optim3 tmassmat tbasfun tcdsmat tsparse1 tsparse2 \
+          pde2d_pardiso pde2d_sym_pardiso pde1dp_cmpl_pardiso pde1dp_cmpl_dft \
+          pde2d_sym_pardiso_dft \
+	  pde1d_eig_csr pde1d_eig_gb pde1d_eig_ge
+
+ifdef WSMP
+EX_FILES += pde2d_wsmp pde2d_pwsmp pde2d_sym_wsmp  pde1dp_cmpl_wsmp
+endif
+
+ifdef MUMPS
+EX_FILES += pde2d_mumps pde1dp_cmpl_mumps pde1d_eig_zmumps
+endif
+
+ifdef PETSC_DIR
+EX_FILES += pde2d_petsc
+endif
+
+examples: $(EX_FILES)
+
+tmat:     tmatrix_gb tmatrix_pb tmatrix_zpb
+
+adv:	adv.o extra.o
+	$(LD) $(LDFLAGS) -L$(XGRAFIX) -o $@ $< extra.o $(LIBS) \
+        -lfftw -lXGF -lXGC -lX11
+	cp -p $@ ../bin/
+
+driv1:	driv1.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+driv2:	driv2.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+driv3:	driv3.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+driv4:	driv4.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+pde1d:	$(PDE1DOBJS)
+	$(LD) $(LDFLAGS) -o $@ $(PDE1DOBJS) $(LIBS)
+	cp -p $@ ../bin/
+
+pde1d_eig_csr:	pde1d_eig_csr.o
+	$(LD) $(LDFLAGS) -o $@ 	pde1d_eig_csr.o $(LIBS) -lpputils2
+	cp -p $@ ../bin/
+
+pde1d_eig_zcsr:	pde1d_eig_zcsr.o
+	$(LD) $(LDFLAGS) -o $@ 	pde1d_eig_zcsr.o $(LIBS)
+	cp -p $@ ../bin/
+
+pde1d_eig_zmumps: pde1d_eig_zmumps.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS) -lpputils2
+	cp -p $@ ../bin/
+
+pde1d_eig_zcsr:	pde1d_eig_zcsr.o
+	$(LD) $(LDFLAGS) -o $@ 	pde1d_eig_zcsr.o $(LIBS)
+	cp -p $@ ../bin/
+
+pde1d_eig_gb:	pde1d_eig_gb.o
+	$(LD) $(LDFLAGS) -o $@ 	pde1d_eig_gb.o $(LIBS)
+	cp -p $@ ../bin/
+
+pde1d_eig_ge:	pde1d_eig_ge.o
+	$(LD) $(LDFLAGS) -o $@ 	pde1d_eig_ge.o $(LIBS)
+	cp -p $@ ../bin/
+
+pde1dp:	pde1dp.o pde1dp_mod.o
+	$(LD) $(LDFLAGS) -o $@ pde1dp.o pde1dp_mod.o $(LIBS)
+	cp -p $@ ../bin/
+
+pde1dp_cmpl:	pde1dp_cmpl.o
+	$(LD) $(LDFLAGS) -o $@ pde1dp_cmpl.o $(LIBS)
+	cp -p $@ ../bin/
+
+pde1dp_KA:	pde1dp_KA.o pde1dp_mod_KA.o
+	$(LD) $(LDFLAGS) -o $@ pde1dp_KA.o pde1dp_mod_KA.o $(LIBS)
+	cp -p $@ ../bin/
+
+pde2d:	$(PDE2DOBJS)
+	$(LD) $(LDFLAGS) -o $@ $(PDE2DOBJS) $(LIBS)
+	cp -p $@ ../bin/
+
+pde3d:	pde3d.o pde3d_mod.o
+	$(LD) $(LDFLAGS) -o $@ pde3d.o pde3d_mod.o $(LIBS)
+	cp -p $@ ../bin/
+
+ppde3d:	ppde3d.o ppde3d_mod.o
+	$(LD) $(LDFLAGS) -o $@ ppde3d.o ppde3d_mod.o $(LIBS)
+	cp -p $@ ../bin/
+
+ppde3d_pb:	ppde3d_pb.o ppde3d_pb_mod.o
+	$(LD) $(LDFLAGS) -o $@ ppde3d_pb.o ppde3d_pb_mod.o $(LIBS)
+	cp -p $@ ../bin/
+
+pde2d_pb:	pde2d_pb.o
+	$(LD) $(LDFLAGS) -o $@ pde2d_pb.o $(LIBS)
+	cp -p $@ ../bin/
+
+pde2d_nh:	pde2d_nh.o
+	$(LD) $(LDFLAGS) -o $@ pde2d_nh.o $(LIBS)
+	cp -p $@ ../bin/
+
+tcdsmat: tcdsmat.o tcdsmat_mod.o meshdist.o
+	$(LD) $(LDFLAGS) -o $@ tcdsmat.o tcdsmat_mod.o meshdist.o $(LIBS)
+	cp -p $@ ../bin/
+
+tmatrix_pb:	tmatrix_pb.o 
+	$(LD) $(LDFLAGS) -o $@ tmatrix_pb.o $(LIBS)
+	cp -p $@ ../bin/
+
+tmatrix_zpb:	tmatrix_zpb.o
+	$(LD) $(LDFLAGS) -o $@ tmatrix_zpb.o $(LIBS)
+	cp -p $@ ../bin/
+
+tmatrix_gb:	tmatrix_gb.o
+	$(LD) $(LDFLAGS) -o $@ tmatrix_gb.o $(LIBS)
+	cp -p $@ ../bin/
+
+fit1d:	$(FIT1DOBJJS)
+	$(LD) $(LDFLAGS) -o $@ $(FIT1DOBJJS) $(LIBS)
+	cp -p $@ ../bin/
+
+fit1d_cmpl: fit1d_cmpl.o
+	$(LD) $(LDFLAGS) -o $@ fit1d_cmpl.o $(LIBS)
+	cp -p $@ ../bin/
+
+gyro: gyro.o
+	$(LD) $(LDFLAGS) -o $@ gyro.o -lslatec $(LIBS)
+	cp -p $@ ../bin/
+
+fit1dbc:	fit1dbc.o
+	$(LD) $(LDFLAGS) -o $@ fit1dbc.o $(LIBS)
+	cp -p $@ ../bin/
+
+fit1dp:	fit1dp.o
+	$(LD) $(LDFLAGS) -o $@ fit1dp.o $(LIBS)
+	cp -p $@ ../bin/
+
+fit2d:	fit2d.o meshdist.o
+	$(LD) $(LDFLAGS) -o $@ fit2d.o meshdist.o $(LIBS)
+	cp -p $@ ../bin/
+
+fit2d1d:	fit2d1d.o meshdist.o
+	$(LD) $(LDFLAGS) -o $@ fit2d1d.o meshdist.o $(LIBS)
+	cp -p $@ ../bin/
+
+fit2d_cmpl:	fit2d_cmpl.o meshdist.o
+	$(LD) $(LDFLAGS) -o $@ fit2d_cmpl.o meshdist.o $(LIBS)
+	cp -p $@ ../bin/
+
+fit2dbc:	fit2dbc.o meshdist.o
+	$(LD) $(LDFLAGS) -o $@ fit2dbc.o meshdist.o $(LIBS)
+	cp -p $@ ../bin/
+
+fit2dbc_x:	fit2dbc_x.o meshdist.o
+	$(LD) $(LDFLAGS) -o $@ fit2dbc_x.o meshdist.o $(LIBS)
+	cp -p $@ ../bin/
+
+fit2dbc_y:	fit2dbc_y.o meshdist.o
+	$(LD) $(LDFLAGS) -o $@ fit2dbc_y.o meshdist.o $(LIBS)
+	cp -p $@ ../bin/
+
+moments:	moments.o
+	$(LD) $(LDFLAGS) -o $@ moments.o $(LIBS)
+	cp -p $@ ../bin/
+
+mesh:	mesh.o meshdist.o
+	$(LD) $(LDFLAGS) -o $@ mesh.o meshdist.o $(LIBS)
+	cp -p $@ ../bin/
+
+optim1:	optim1.o
+	$(LD) $(LDFLAGS) -o $@ optim1.o $(LIBS)
+	cp -p $@ ../bin/
+
+optim2:	optim2.o
+	$(LD) $(LDFLAGS) -o $@ optim2.o $(LIBS)
+	cp -p $@ ../bin/
+
+optim3:	optim3.o
+	$(LD) $(LDFLAGS) -o $@ optim3.o $(LIBS)
+	cp -p $@ ../bin/
+
+tmassmat:	tmassmat.o
+	$(LD) $(LDFLAGS) -o $@ tmassmat.o $(LIBS)
+	cp -p $@ ../bin/
+
+tbasfun:	tbasfun.o
+	$(LD) $(LDFLAGS) -o $@ tbasfun.o $(LIBS)
+	cp -p $@ ../bin/
+
+basfun_perf:	basfun_perf.o
+	$(LD) $(LDFLAGS) -o $@ basfun_perf.o $(LIBS)
+	cp -p $@ ../bin/
+
+gridval_perf:	gridval_perf.o
+	$(LD) $(LDFLAGS) -o $@ gridval_perf.o $(LIBS)
+	cp -p $@ ../bin/
+
+getgrad_perf:	getgrad_perf.o
+	$(LD) $(LDFLAGS) -o $@ getgrad_perf.o $(LIBS)
+	cp -p $@ ../bin/
+
+basfun_perf1:	basfun_perf1.o
+	$(LD) $(LDFLAGS) -o $@ basfun_perf1.o $(LIBS)
+	cp -p $@ ../bin/
+
+tlocintv:	tlocintv.o  meshdist.o
+	$(LD) $(LDFLAGS) -o $@ tlocintv.o meshdist.o $(LIBS)
+	cp -p $@ ../bin/
+
+tgausleg:	tgausleg.o  meshdist.o
+	$(LD) $(LDFLAGS) -o $@ tgausleg.o meshdist.o $(LIBS)
+	cp -p $@ ../bin/
+
+poisson:	poisson.o 
+	$(LD) $(LDFLAGS) -o $@ poisson.o  $(LIBS)
+	cp -p $@ ../bin/
+
+poisson_mumps:	poisson_mumps.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+poisson_petsc:	poisson_petsc.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+tsparse1: tsparse1.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+tsparse2: tsparse2.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+pde2d_pardiso: pde2d_pardiso.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+pde2d_mumps: pde2d_mumps.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)	
+	cp -p $@ ../bin/
+
+pde2d_petsc: pde2d_petsc.o $(PPUTILS2)/pputils2.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)	
+	cp -p $@ ../bin/
+
+pde2d_sym_pardiso: pde2d_sym_pardiso.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+pde2d_sym_pardiso_dft: pde2d_sym_pardiso_dft.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+pde2d_sym_wsmp_dft: pde2d_sym_wsmp_dft.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+zssmp_ex1: zssmp_ex1.o
+	$(LD) $(LDFLAGS) -o $@ $<  $(LIBS)
+	cp -p $@ ../bin/
+
+zpardiso_ex1: zpardiso_ex1.o
+	$(LD) $(LDFLAGS) -o $@ $<  $(LIBS)
+	cp -p $@ ../bin/
+
+pde2d_sym_wsmp: pde2d_sym_wsmp.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+pde2d_wsmp: pde2d_wsmp.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+pde2d_pwsmp: pde2d_pwsmp.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS1)
+	cp -p $@ ../bin/
+
+pde1dp_cmpl_pardiso:	pde1dp_cmpl_pardiso.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+pde1dp_cmpl_mumps:	pde1dp_cmpl_mumps.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+pde1dp_cmpl_dft:	pde1dp_cmpl_dft.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+pde1dp_cmpl_wsmp:	pde1dp_cmpl_wsmp.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+tspline:	tspline.o
+	$(LD) $(LDFLAGS) -o $@ tspline.o $(LIBS)
+	cp -p $@ ../bin/
+
+tpsum_mat: tpsum_mat.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+tp2p_mat: tp2p_mat.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+	cp -p $@ ../bin/
+
+test_pwsmp:	test_pwsmp.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS1)
+	cp -p $@ ../bin/
+
+driv1.o:
+driv2.o:
+driv3.o:
+driv4.o:
+pde1d.o:
+pde1dp.o: pde1dp_mod.o
+pde1dp_cmpl.o:
+pde1dp_mod.o:
+pde1dp_KA.o: pde1dp_mod_KA.o
+pde1dp_mod_KA.o:
+pde2d.o:
+pde3d.o: pde3d_mod.o
+pde3d_mod.o:
+ppde3d.o: ppde3d_mod.o
+ppde3d_mod.o:
+ppde3d_pb.o: ppde3d_pb_mod.o
+ppde3d_pb_mod.o:
+pde2d_pb.o:
+pde2d_nh.o:
+tcdsmat.o: tcdsmat_mod.o
+tcdsmat_mod.o:
+fit1d.o:
+fit1d_cmpl.o:
+gyro.o: 
+fit1dbc.o:
+fit1dp.o:
+fit2d.o:
+fit2d_cmpl.o:
+fit2dbc.o:
+dismat.o:
+disrhs.o:
+ibcmat.o:
+adv.o:
+tmatrix_pb.o:
+tmatrix_zpb.o:
+tmatrix_gb.o:
+moments.o:
+mesh.o:
+optim1.o:
+optim2.o:
+optim3.o:
+tmassmat.o:
+tbasfun.o:
+basfun_perf.o:
+basfun_perf1.o:
+tlocintv.o:
+tgausleg.o:
+poisson.o:
+poisson_mumps.o:
+tsparse1.o:
+tsparse2.o:
+pde2d_pardiso.o:
+pde2d_mumps.o:
+pde2d_petsc.o:
+pde2d_sym_pardiso.o:
+pde2d_sym_pardiso_dft.o:
+pde2d_sym_wsmp_dft.o:
+pde2d_wsmp.o:
+pde2d_pwsmp.o:
+pde2d_sym_wsmp.o:
+pde1dp_cmpl_pardiso.o:
+pde1dp_cmpl_mumps.o:
+pde1dp_cmpl_dft.o:
+pde1dp_cmpl_wsmp.o:
+tpsum_mat.o:
+tp2p_mat.o:
+poisson_petsc.o:
+
+tags:
+	etags *.f *.f90 ../src/*.f90 $(PPPACK)/*.f90
+
+clean:
+	rm -f *.o *.mod *~ a.out
+
+distclean: clean
+# $(MAKE) -C ../src distclean
+# $(MAKE) -C ../fft distclean
+# $(MAKE) -C $(PPUTILS2) distclean
+	rm -f *.a *.mod pde1d pde1dp pde1dp_cmpl pde1dp_KA driv1 driv2 driv3 driv4 \
+              tmatrix_pb tmatrix_gb tmatrix_zpb \
+              pde2d pde2d_pb pde2d_nh pde3d ppde3d ppde3d_pb\
+              fit1d fit1d_cmpl gyro fit1dbc fit1dp \
+              fit2d  fit2d1d fit2d_cmpl fit2dbc \
+              fit2dbc_x fit2dbc_y adv moments tcdsmat poisson poisson_mumps\
+              mesh optim1 optim2 optim3 tmassmat tbasfun \
+              basfun_perf gridval_perf getgrad_perf tlocintv \
+	      tsparse1 tsparse2 \
+              pde2d_pardiso pde2d_sym_pardiso pde2d_wsmp pde2d_sym_wsmp \
+              pde1dp_cmpl_dft pde1dp_cmpl_wsmp pde1d_eig_csr pde1d_eig_pb pde1d_eig_ge \
+              pde2d_sym_pardiso_dft pde1dp_cmpl_pardiso \
+	      pde2d_mumps pde1dp_cmpl_mumps tpsum_mat tp2p_mat pde2d_sym_wsmp_dft \
+	      poisson_petsc pde2d_petsc \
+              pde1d_eig_csr pde1d_eig_zcsr pde1d_eig_zmumps pde1d_eig_gb pde1d_eig.ge \
+              ../bin/*
+
diff --git a/examples/adv.f90 b/examples/adv.f90
new file mode 100644
index 0000000..8c40a29
--- /dev/null
+++ b/examples/adv.f90
@@ -0,0 +1,320 @@
+!>
+!> @file adv.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   1d Periodic Advection: F(x,t) = F(x-u*dt,t-dt)
+!   using module bsplines
+!
+  USE bsplines
+!
+  IMPLICIT NONE
+  INCLUDE 'fftw_f77.h'
+  TYPE(spline1d) :: spl
+  INTEGER, PARAMETER :: nhistmx=1000, ncomb=4
+  INTEGER :: nx, nidbas, dim
+  INTEGER :: nstep, nskipt, nhist, mhist
+  DOUBLE PRECISION :: a, b, dt, u, w,  coefx(5)
+  DOUBLE PRECISION, DIMENSION(0:nhistmx) :: thist, tmass, tfmin, tfmax, ermass
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fgrid(:), xshft(:)
+  DOUBLE PRECISION, ALLOCATABLE :: coefs(:), ferr(:), kx(:), ampl(:)
+  DOUBLE COMPLEX, ALLOCATABLE :: cfgrid(:), ffft(:)
+  INTEGER(8) :: forw
+  DOUBLE PRECISION :: time
+  INTEGER :: i
+  NAMELIST /newrun/ nx, nidbas, a, b, dt, u, w,  coefx
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 100    ! Number of intevals in x
+  a = 0.0     ! Left boundary of interval
+  b = 100.0   ! Right boundary of interval
+  dt = 0.1    ! Time step
+  u = 1.0     ! Velocity
+  w = 2.0     ! Shape of initial function
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!===========================================================================
+!              2.0 Define initial conditions
+!
+!   Set up mesh
+!
+  ALLOCATE(xgrid(0:nx), xshft(0:nx), fgrid(0:nx), ferr(0:nx))
+  xgrid(0) = a
+  xgrid(nx) = b
+  CALL meshdist(coefx, xgrid, nx)
+!
+!   Set up the spline interpolation
+!
+  CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.)
+  CALL get_dim(spl, dim)
+  WRITE(*,'(a,i6)') 'dimension of splines', dim
+  ALLOCATE(coefs(dim))
+!
+!   Initial conditions
+!
+  time = 0.0d0
+  nstep = 0
+  nskipt = 1
+  DO i=0,nx-1
+     fgrid(i) = finit(xgrid(i))
+  END DO
+  fgrid(nx) = fgrid(0)
+  ferr = 0.0
+  CALL get_splcoef(spl, fgrid, coefs)
+  WRITE(*,'(a/(10f8.3))') 'knots', spl%knots
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+  WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
+  WRITE(*,'(a/(10f8.3))') 'coefs', coefs
+!
+!   Set up FFT
+!
+  ALLOCATE(kx(-nx/2+1:nx/2), ampl(-nx/2+1:nx/2))
+  ALLOCATE(cfgrid(0:nx-1), ffft(0:nx-1))
+  DO i=-nx/2+1,nx/2
+     kx(i) = i
+  END DO
+  cfgrid(0:nx-1) = fgrid(0:nx-1)
+  CALL fftw_f77_create_plan(forw, nx, FFTW_FORWARD, FFTW_ESTIMATE)
+  CALL fftw_f77_one(forw, cfgrid, ffft)
+  ampl(0:nx/2) = ABS(ffft(0:nx/2))
+  ampl(-nx/2+1:-1) = ABS(ffft(nx/2+1:nx-1))
+!
+!   Set up history arrays
+!
+  nhist = 0
+  thist(nhist) = 0.0
+!!$  tmass(nhist) = SUM(coefs(1:nx))
+  tmass(nhist) = SUM(fgrid(1:nx))
+  tfmin(nhist) = MINVAL(fgrid)
+  tfmax(nhist) = MAXVAL(fgrid)
+  ermass(nhist) = tmass(nhist)-tmass(0)
+  WRITE(*,'(a,(10f8.3))') 'Initial mass', tmass(nhist)
+!
+!  Initialize Xgrafix
+!
+  CALL xginit(3,'ADV','adv',' ',' ',' ',' ',time)
+!
+  CALL xgset2d('linlin', 'X', 'F', 'open', 220, 60, 1.d0, 1.d0, &
+       &       .FALSE., .FALSE., xgrid(0), xgrid(nx), -0.2d0, 1.2d0)
+  CALL xgcurve(xgrid, fgrid, nx+1, 1)
+!
+  CALL xgset2d('linlin', 'X', 'FERR', 'open', 620, 60, 1.d0, 1.d0, &
+       &       .FALSE., .FALSE., xgrid(0), xgrid(nx), -1.d0, 1.d0)
+  CALL xgcurve(xgrid, ferr, nx+1, 1)
+!
+  CALL xgset2d('linlin', 'Time', 'Error Mass', 'open', 820, 400, 1.d0, 1.d0, &
+       &       .TRUE., .TRUE., 0.d0, 1.d0, 0.d0, 1.d0)
+  CALL xgcurve(thist, ermass, nhist, 1)
+!
+  CALL xgset2d('linlin', 'Time', 'Min/Max', 'open', 420, 400, 1.d0, 1.d0, &
+       &       .TRUE., .TRUE., 0.d0, 1.d0, 0.d0, 1.d0)
+  CALL xgcurve(thist, tfmin, nhist, 1)
+  CALL xgcurve(thist, tfmax, nhist, 2)
+!
+  CALL xgset2d('linlin', 'kx', 'Amplitude of F', 'open', 20, 400, 1.d0, 1.d0, &
+       &       .FALSE., .FALSE., kx(-nx/2+1), kx(nx/2), &
+       &        0.0d0, MAXVAL(ampl))
+  CALL xgcurve(kx, ampl, nx, 1)
+!
+  CALL xgupdate
+!===========================================================================
+!              3.0 Time loop
+!
+  nskipt = 1
+  DO
+     nstep = nstep+1
+     time = time+dt
+     CALL xgevent
+!
+!   Shift x
+!
+     CALL get_splcoef(spl, fgrid, coefs)
+     xshft(0:nx) = xgrid(0:nx) - u*dt
+     CALL gridval(spl, xshft, fgrid, 0, coefs)
+!
+     xshft(0:nx) = xgrid(0:nx) - u*time
+     DO i =0,nx
+        ferr(i) = fgrid(i) - finit(xshft(i))
+     END DO
+!
+     cfgrid(0:nx-1) = fgrid(0:nx-1)
+     CALL fftw_f77_one(forw, cfgrid, ffft)
+     ampl(0:nx/2) = ABS(ffft(0:nx/2))
+     ampl(-nx/2+1:-1) = ABS(ffft(nx/2+1:nx-1))
+!
+!   Diagnostics
+!
+     IF( MOD(nstep,nskipt) .EQ. 0 ) THEN
+        nhist = nhist+1
+        IF( nhist .GT. nhistmx ) THEN
+           nskipt = ncomb*nskipt
+           mhist = nhist-1
+           CALL packarr(mhist, thist, ncomb, nhist)
+           CALL packarr(mhist, tmass, ncomb, nhist)
+           CALL packarr(mhist, tfmin, ncomb, nhist)
+           CALL packarr(mhist, tfmax, ncomb, nhist)
+           CALL packarr(mhist, ermass, ncomb, nhist)
+        END IF
+        thist(nhist) = time
+        tmass(nhist) = SUM(fgrid(1:nx))
+!!$        tmass(nhist) = SUM(coefs(1:nx))
+        tfmin(nhist) = MINVAL(fgrid)
+        tfmax(nhist) = MAXVAL(fgrid)
+        ermass(nhist) = (tmass(nhist)-tmass(0))/tmass(0)
+     END IF
+!     
+     CALL xgupdate
+  END DO
+!===========================================================================
+!              9.0 Prologue
+!
+  CALL fftw_f77_destroy_plan(forw)
+  DEALLOCATE(xgrid, fgrid, xshft, coefs)
+CONTAINS
+  DOUBLE PRECISION FUNCTION finit(xx)
+!
+!   A "box" function
+!
+    DOUBLE PRECISION, INTENT(in) :: xx
+    DOUBLE PRECISION :: xl, xr, xl0, xr0, h, x, xlen
+    INTEGER :: kl, kr, klflag, krflag
+!
+    xlen = b-a
+    x = a + MODULO(xx-a+xlen, xlen)
+!
+    xl = 0.375*(b-a)
+    xr = 0.624*(b-a)
+    CALL interv(xgrid, nx+1, xl, kl, klflag)
+    CALL interv(xgrid, nx+1, xr, kr, krflag)
+    xl0 = xl + w*(xgrid(kl)-xgrid(kl-1))
+    xr0 = xr - w*(xgrid(kr)-xgrid(kr-1))
+    CALL interv(xgrid, nx+1, xl0, kl, klflag)
+    CALL interv(xgrid, nx+1, xr0, kr, krflag)
+    IF( x .LT. xl0 ) THEN
+       h = xgrid(kl)-xgrid(kl-1)
+       finit = EXP(-((x-xl0)/(w*h))**2)
+    ELSE IF( x .GT. xr0) THEN
+       h = xgrid(kr)-xgrid(kr-1)
+       finit = EXP(-((x-xr0)/(w*h))**2)
+    ELSE
+       finit = 1.0d0
+    END IF
+  END FUNCTION finit
+
+END PROGRAM main
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
+
+SUBROUTINE packarr(n, arr, skip, nhist)
+  IMPLICIT NONE
+  INTEGER :: n, skip, i, ii, nhist
+  DOUBLE PRECISION :: arr(0:n)
+  ii = 0
+  DO i=0,n,skip
+     arr(ii) = arr(i)
+     ii=ii+1
+  END DO
+  nhist = ii
+END SUBROUTINE packarr
+!+++
+
+SUBROUTINE dump(filename, l)
+!
+!    Is invoked when button "Dump" is pressed.
+!
+  IMPLICIT NONE
+  CHARACTER(len=*) ::  filename
+  INTEGER          :: l
+  WRITE(*,'(a,a,a1)') 'Dumpfile = "', filename(1:l),'"'
+END SUBROUTINE dump
+
+SUBROUTINE quit()
+!
+!   Is invoked when button "Quit" is pressed
+!
+  IMPLICIT NONE
+  PRINT*, 'Program terminated ...'
+END SUBROUTINE quit
diff --git a/examples/basfun_perf.f90 b/examples/basfun_perf.f90
new file mode 100644
index 0000000..7cab71f
--- /dev/null
+++ b/examples/basfun_perf.f90
@@ -0,0 +1,170 @@
+!>
+!> @file basfun_perf.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Performance of scalar and vector versions of def_basfun
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas, nrank, npt=10, jdermx
+  DOUBLE PRECISION :: dx
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
+  DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fun(:, :)
+  INTEGER :: left, i, i1, i2
+  INTEGER :: ngroup, nset, nremain
+  TYPE(spline1d) :: splx
+  DOUBLE PRECISION :: t0, t1, seconds
+  DOUBLE PRECISION :: t_loop, t_locintv1, t_basfun1, t_locintv, t_basfun
+  INTEGER :: its, nits
+  INTEGER, ALLOCATABLE :: vleft(:)
+  DOUBLE PRECISION, ALLOCATABLE :: vfun(:,:,:)
+  LOGICAL :: nlperiod
+!
+  NAMELIST /newrun/ nx, nidbas, npt, nits, ngroup, jdermx, nlperiod
+!
+!===============================================================================
+!
+!   1D grid
+!
+  nx = 10
+  nidbas = 3
+  npt = 1000000
+  nits = 100
+  ngroup = 10
+  jdermx = 0
+  nlperiod = .FALSE.
+  READ(*,newrun)
+  WRITE(*,newrun)
+
+  ALLOCATE(xgrid(0:nx))
+  dx = 1.0d0/REAL(nx)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+!
+!   Set up spline
+!
+  CALL set_spline(nidbas, 4, xgrid, splx, period=nlperiod)
+  nrank = splx%dim
+  WRITE(*,'(a, i5)') 'nrank =', nrank
+  WRITE(*,'(a/(10f8.3))') 'knots', splx%knots
+!
+  ALLOCATE(xpt(npt))
+  ALLOCATE(fun(0:nidbas,0:jdermx))  ! Values and first derivatives of all Splines
+  CALL RANDOM_NUMBER(xpt)
+!===============================================================================
+!                            1.0 Scalar version
+!
+!    loop
+  t0 = seconds()
+  DO its=1,nits
+     DO i=1,npt
+     END DO
+  END DO
+  t_loop = (seconds()-t0)/REAL(nits*npt,8)
+!
+!   locintv
+  t0 = seconds()
+  DO its=1,nits
+     DO i=1,npt
+        CALL locintv(splx, xpt(i), left)
+     END DO
+  END DO
+  t_locintv1 = (seconds()-t0)/REAL(nits*npt,8)
+!
+!   def_basfun
+  t0 = seconds()
+  DO its=1,nits
+     DO i=1,npt
+        CALL locintv(splx, xpt(i), left)
+        CALL basfun(xpt(i), splx, fun, left+1)
+     END DO
+  END DO
+  t_basfun1 = (seconds()-t0)/REAL(nits*npt,8)
+!
+  WRITE(*,'(6x,3a12)') 'loop', 'locintv', 'basfun'
+  WRITE(*,'(6x,8(1pe12.3))') t_loop, t_locintv1, t_basfun1
+!===============================================================================
+!                            2.0 Vector version
+!
+  ngroup = 1
+  DO WHILE (ngroup .LT. npt/2)
+     ALLOCATE(vleft(ngroup))
+     ALLOCATE(vfun(0:nidbas, 0:jdermx, ngroup))
+     nset = npt/ngroup
+     nremain = MODULO(npt, ngroup)
+     IF(nremain.NE.0) nset = nset+1
+!
+!    loop
+     t0 = seconds()
+     DO its=1,nits
+        i2 = 0
+        DO i=1,nset
+           i1 = i2+1
+           i2 = MIN(i2+ngroup,npt)
+        END DO
+     END DO
+     t_loop = (seconds()-t0)/REAL(nits*nset,8)
+!
+!   locintv
+     t0 = seconds()
+     DO its=1,nits
+        i2 = 0
+        DO i=1,nset
+           i1 = i2+1
+           i2 = MIN(i2+ngroup,npt)
+           CALL locintv(splx, xpt(i1:i2), vleft)
+        END DO
+     END DO
+     t_locintv = (seconds()-t0)/REAL(nits*npt,8)
+!
+!   basfun
+     t0 = seconds()
+     DO its=1,nits
+        i2 = 0
+        DO i=1,nset
+           i1 = i2+1
+           i2 = MIN(i2+ngroup,npt)
+           CALL locintv(splx, xpt(i1:i2), vleft)
+           CALL basfun(xpt(i1:i2), splx, vfun, vleft+1)
+        END DO
+     END DO
+     t_basfun = (seconds()-t0)/REAL(nits*npt,8)
+!
+     WRITE(*,'(i6,8(1pe12.3))') ngroup, t_loop, t_locintv, t_basfun, &
+          &      t_locintv1/t_locintv, t_basfun1/t_basfun
+     DEALLOCATE(vleft)
+     DEALLOCATE(vfun)
+     ngroup = ngroup*2
+  END DO
+!===============================================================================
+!
+!   Clean up
+!
+  CALL destroy_sp(splx)
+  DEALLOCATE(xgrid)
+  DEALLOCATE(xpt)
+  DEALLOCATE(fun)
+END PROGRAM main
diff --git a/examples/dirichlet/Makefile b/examples/dirichlet/Makefile
new file mode 100644
index 0000000..4f0edc9
--- /dev/null
+++ b/examples/dirichlet/Makefile
@@ -0,0 +1,62 @@
+#
+# @file Makefile
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Emmanuel Lanti <emmanuel.lanti@epfl.ch>
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+#BSPLINES = $(HOME)/bsplines
+#FUTILS   = $(HOME)/futils
+
+F90 = mpif90
+LD = $(F90)
+
+debug = -g -traceback -check bounds -warn alignments -warn nounused
+optim = -O3 -xHOST
+
+F90FLAGS = $(OPT) -I$(BSPLINES)/include -I$(FUTILS)/include
+LDFLAGS = $(OPT) -L$(BSPLINES)/lib -L$(FUTILS)/lib -L$(HDF5)/lib
+LIBS = -lbsplines -lpppack -lfutils -lhdf5_fortran -lhdf5 -lz
+
+LDFLAGS += -g -L$(MKL)
+LIBS += -lmkl_intel_lp64  -lmkl_sequential -lmkl_core -lpthread
+
+OPT=$(debug)
+#OPT=$(optim)
+
+.SUFFIXES:
+.SUFFIXES: .o .c .f90
+.f90.o:
+	$(F90) $(F90FLAGS) -c $<
+
+all:	poisson
+
+poisson: poisson.o poisson_mod.o
+	$(LD) $(LDFLAGS) -o $@ $^ $(LIBS)
+
+poisson.o: poisson_mod.o
+
+clean:
+	rm -f *.o *.mod
+
+distclean: clean
+	rm -f poisson a.out *~ *.h5 *.fig *.eps *.pdf
diff --git a/examples/dirichlet/poisson.f90 b/examples/dirichlet/poisson.f90
new file mode 100644
index 0000000..fa96f4a
--- /dev/null
+++ b/examples/dirichlet/poisson.f90
@@ -0,0 +1,383 @@
+!>
+!> @file poisson.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 2d Poisson in cylibdrical coordinates, using splines:
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 0, with f(x=1,y) = cos(my)
+!    exact solution: f(x,y) = r^m cos(my)
+!
+  USE bsplines
+  USE matrix
+  USE conmat_mod
+  USE poisson_mod
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, dirmeth, nterms
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  TYPE(spline2d) :: splxy
+  TYPE(pbmat) :: mat
+!
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2, shifty
+  DOUBLE PRECISION :: err00, err10, err01
+  INTEGER :: nits=500
+!
+  CHARACTER(len=128) :: file='poisson.h5'
+  INTEGER :: fid
+!
+!  Dirichlet BC properties encapsulated in a derived datatype
+!
+  TYPE(dirich) :: right_bc
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, dirmeth, &
+       &            coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  dirmeth = 1         ! 1: use spline interpolation in Dirichlet BC
+                      ! 2: residual minimization in Dirichlet BC
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
+  ku = kl                      ! Number of super-diagnonals
+  WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
+!
+  CALL init(ku, nrank, nterms, mat)
+  t0 = seconds()
+  CALL conmat(splxy, mat, coefeq)
+  tmat = seconds() - t0
+  ALLOCATE(arr(nrank))
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   Store some usefull parameters in right_bc
+!
+    right_bc%meth = dirmeth
+    right_bc%mbess = mbess
+    right_bc%n1 = nx
+    right_bc%n2 = ny
+    right_bc%nidbas1 = nidbas(1)
+    right_bc%nidbas2 = nidbas(2)
+!
+!   BC on Matrix and RHS
+!
+  CALL ibcmat(mat, right_bc)
+  CALL ibcrhs(rhs, ygrid, right_bc)
+
+  WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL factor(mat)
+  tfact = seconds() - t0
+  gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
+
+  t0 = seconds()
+  CALL bsolve(mat, rhs, sol)
+!
+!   Backtransform of solution
+!
+  sol(1:ny-1) = sol(ny)
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+!
+  tsolv = seconds() - t0
+  gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
+!===========================================================================
+!              4.0 Check the solution
+!
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = f_exact(mbess, xgrid(i), ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM at first call to gridval
+  IF(nlppform) THEN
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+  END IF
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+!
+  errsol = solana - solcal
+  WRITE(*,'(a/(8(1pe12.3)))') 'Error at the boundary r = 1', errsol(nx,:)
+  err00 = err2_norm(splxy, jder, mbess, f_exact)
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = 0.0d0
+        ELSE
+           solana(i,j) = fx_exact(mbess,xgrid(i),ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+!
+  errsol = solana - solcal
+  err10 = err2_norm(splxy, jder, mbess, fx_exact)
+!
+  DO i=0,nx
+     DO j=0,ny
+         IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = 0.0d0
+        ELSE
+           solana(i,j) = fy_exact(mbess, xgrid(i), ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+!
+  errsol = solana - solcal
+  err01 = err2_norm(splxy, jder, mbess, fy_exact)
+!
+  WRITE(*,'(/a,3(1pe12.3))') 'Discretization errors', err00, err10, err01
+!===========================================================================
+!              9.0  Epilogue
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+  WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+  CALL closef(fid)
+!
+!===========================================================================
+!
+CONTAINS
+!--
+  DOUBLE PRECISION FUNCTION f_exact(m,x,y)
+    INTEGER,INTENT(in) :: m
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    f_exact = (x**m)*COS(m*y)
+  END FUNCTION f_exact
+!--
+  DOUBLE PRECISION FUNCTION fx_exact(m,x,y)
+    INTEGER,INTENT(in) :: m
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    fx_exact = m*(x**(m-1))*COS(m*y)
+  END FUNCTION fx_exact
+!--
+  DOUBLE PRECISION FUNCTION fy_exact(m,x,y)
+    INTEGER,INTENT(in) :: m
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    fy_exact = -m*(x**m)*SIN(m*y)
+  END FUNCTION fy_exact
+!--
+  SUBROUTINE prntmat(str, a)
+    DOUBLE PRECISION, DIMENSION(:,:) :: a
+    CHARACTER(len=*) :: str
+    INTEGER :: i
+    WRITE(*,'(a)') TRIM(str)
+    DO i=1,SIZE(a,1)
+       WRITE(*,'(10f8.1)') a(i,:)
+    END DO
+  END SUBROUTINE prntmat
+!--
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+!--
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+!--
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
+
diff --git a/examples/dirichlet/poisson.in b/examples/dirichlet/poisson.in
new file mode 100644
index 0000000..8655f50
--- /dev/null
+++ b/examples/dirichlet/poisson.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 16, ny = 16,
+ nidbas = 3,3,
+ ngauss = 4,4,
+ mbess = 3,
+ nlppform = t,
+ dirmeth = 2,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/examples/dirichlet/poisson.m b/examples/dirichlet/poisson.m
new file mode 100644
index 0000000..3a56ac8
--- /dev/null
+++ b/examples/dirichlet/poisson.m
@@ -0,0 +1,88 @@
+%
+% @file poisson.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='poisson.h5';
+m=3;
+%
+%     Get data from data sets
+%
+r=hdf5read(file,'/xgrid');
+t=hdf5read(file,'/ygrid');
+sol=hdf5read(file,'/sol')';
+solexact=hdf5read(file,'/solana')';
+err=hdf5read(file,'/errors')';
+solr=hdf5read(file,'/derivx')';
+solt=hdf5read(file,'/derivy')';
+
+
+[R,T]=meshgrid(r,t);
+x = R.*cos(T); y= R.*sin(T);
+solx = cos(T).*solr - sin(T)./R.*solt;
+soly = sin(T).*solr + cos(T)./R.*solt;
+
+figure
+subplot(221)
+pcolor(double(r),double(t),double(sol));
+shading interp
+hold on, quiver(r,t,solr,solt)
+xlabel('r'); ylabel('\theta')
+title('R-THETA plane')
+colorbar
+
+subplot(222)
+pcolor(double(x),double(y),double(sol))
+shading interp
+hold on, quiver(x,y,solx,soly)
+hold off, axis image
+xlabel('X'); ylabel('Y')
+title('X-Y plane')
+colorbar
+
+subplot(223)
+surfc(double(x),double(y),double(sol))
+xlabel('X'); ylabel('Y');
+title('Solutions')
+
+subplot(224)
+surfc(double(x),double(y),double(err))
+xlabel('X'); ylabel('Y');
+title('Errors')
+
+figure
+subplot(211)
+plot(r,sol(1,:),'o',r,solexact(1,:))
+xlabel('r')
+ylabel('Solutions at \theta=0')
+grid on
+subplot(212)
+tt=0:0.01:2*pi;
+plot(t,sol(:,end),'o',tt,cos(m.*tt))
+xlabel('\theta')
+ylabel('Solutions at r=1')
+grid on
+
+
+
+
diff --git a/examples/dirichlet/poisson_mod.f90 b/examples/dirichlet/poisson_mod.f90
new file mode 100644
index 0000000..ae234c5
--- /dev/null
+++ b/examples/dirichlet/poisson_mod.f90
@@ -0,0 +1,354 @@
+!>
+!> @file poisson_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE poisson_mod
+  IMPLICIT NONE
+!
+!  Dirichlet BC encapsulated in a derived datatype
+!
+  TYPE dirich
+     INTEGER :: meth, mbess, n1, n2, nidbas1, nidbas2
+     INTEGER :: i0, i1
+     DOUBLE PRECISION, POINTER :: amat(:,:) => NULL()
+     DOUBLE PRECISION, POINTER :: g(:) => NULL()
+  END TYPE dirich
+!
+CONTAINS
+  SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    USE bsplines
+    INTEGER, INTENT(in) :: mbess
+    TYPE(spline2d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+!
+!   The RHS is 0 
+!
+    rhs(:) = 0.0d0
+  END SUBROUTINE disrhs
+!
+  SUBROUTINE ibcmat(mat, bc)
+!
+!   Apply BC on matrix
+!
+    USE matrix
+    TYPE(pbmat), INTENT(inout) :: mat
+    TYPE(dirich) :: bc
+    INTEGER :: ny
+    INTEGER :: kl, ku, nrank, i, j, k
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+    INTEGER :: i0, i1, ii
+!===========================================================================
+!              1.0 Prologue
+!
+    ku = mat%ku
+    kl = ku
+    nrank = mat%rank
+    ny = bc%n2
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    ALLOCATE(zsum(nrank))
+    ALLOCATE(arr(nrank))
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       DO j=1,ny+ku
+          zsum(j) = zsum(j) + arr(j)
+       END DO
+    END DO
+!
+    zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
+    CALL putrow(mat, ny, zsum)
+    DEALLOCATE(zsum)
+!
+!   The away operator
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+    DEALLOCATE(arr)
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+!!$    i0 = nrank - ku
+!!$    i1 = nrank - ny
+    i0 = (bc%n1-1)*bc%n2 + 1
+    i1 = nrank - bc%n2
+    bc%i0 = i0
+    bc%i1 = i1
+!
+    IF(ASSOCIATED(bc%amat)) DEALLOCATE(bc%amat)
+    IF(ASSOCIATED(bc%g)) DEALLOCATE(bc%g)
+    ALLOCATE(bc%amat(i0:i1,ny))
+    ALLOCATE(bc%g(ny))
+!
+    WRITE(*,'(/a,2i6)') 'IBCMAT: i0, i1 =', i0, i1
+!
+!   Extract and save the last ny columns of matrix
+!
+    ALLOCATE(arr(nrank))
+    DO k=1,ny
+       j = nrank-ny+k
+       CALL getcol(mat, j, arr)
+       bc%amat(i0:i1,k) = arr(i0:i1)
+       IF( ANY(arr(1:i0-1) .NE. 0.0d0) ) THEN
+          WRITE(*,'(a,i4)') 'i0 is underestimated for j =', j
+       END IF
+    END DO
+!
+!   The away operator
+!
+    DO k=1,ny
+       j = nrank-ny+k
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putrow(mat, j, arr)     
+    END DO
+!
+    DEALLOCATE(arr)
+!
+  END SUBROUTINE ibcmat
+!+++
+  SUBROUTINE ibcrhs(rhs, ygrid, bc)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    DOUBLE PRECISION, INTENT(in)    :: ygrid(:)
+    TYPE(dirich)                    :: bc
+!
+    INTEGER :: nrank, ny, m, i0, i1
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+    ny = bc%n2
+    m = bc%mbess
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+!   Get spline coefs at boundary r=1
+!
+    SELECT CASE (bc%meth)
+    CASE(1)
+       CALL dirich_interp(ygrid, bc, frhs)
+    CASE(2)
+       CALL dirich_minres(ygrid, bc, frhs)
+    END SELECT
+!
+!   Modify RHS
+!
+    i0 = bc%i0
+    i1 = bc%i1
+    rhs(i0:i1) = rhs(i0:i1) - MATMUL(bc%amat, bc%g)
+    rhs(i1+1:nrank) = bc%g(1:ny)
+  CONTAINS
+    DOUBLE PRECISION FUNCTION frhs(x)
+      DOUBLE PRECISION, INTENT(in) :: x
+      frhs = COS(m*x)
+    END FUNCTION frhs
+  END SUBROUTINE ibcrhs
+!++++
+  SUBROUTINE coefeq(x, y, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+    c(1) = x        ! 
+    idt(1,1) = 1
+    idt(1,2) = 0
+    idw(1,1) = 1
+    idw(1,2) = 0
+!
+    c(2) = 1.d0/x
+    idt(2,1) = 0
+    idt(2,2) = 1
+    idw(2,1) = 0
+    idw(2,2) = 1
+  END SUBROUTINE coefeq
+!++++
+  SUBROUTINE dirich_interp(ygrid, bc, frhs)
+!
+!   Dirichlet BC by interpolation
+!
+    USE bsplines
+    DOUBLE PRECISION, INTENT(in) :: ygrid(:)
+    TYPE(dirich)                 :: bc
+    INTERFACE
+       DOUBLE PRECISION FUNCTION frhs(x)
+         DOUBLE PRECISION, INTENT(in) :: x
+       END FUNCTION frhs
+    END INTERFACE
+!
+    INTEGER :: nidbas, dim, n2, i
+    DOUBLE PRECISION :: shifty
+    DOUBLE PRECISION :: gval(SIZE(ygrid))
+    DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
+    DOUBLE PRECISION :: ygrid_interp(SIZE(ygrid))
+    TYPE(spline1d) :: spl_interp
+!    
+    nidbas = bc%nidbas2
+    n2 = bc%n2
+!
+    IF(MODULO(nidbas,2) .EQ. 0 ) THEN
+       shifty = 0.5d0*(ygrid(2)-ygrid(1))
+       ygrid_interp(:) = ygrid(:) + shifty
+    ELSE
+       ygrid_interp(:) = ygrid(:)
+    END IF
+    CALL set_splcoef(nidbas, ygrid_interp, spl_interp, period=.TRUE.)
+    CALL get_dim(spl_interp, dim)
+    ALLOCATE(coefs(dim))
+!
+    DO i=1,SIZE(ygrid)
+       gval(i) = frhs(ygrid_interp(i))
+    END DO
+    CALL get_splcoef(spl_interp, gval, coefs)
+!
+!   Store spline coefs in bc
+!
+    bc%g(1:n2) = coefs(1:n2)
+!
+    DEALLOCATE(coefs)
+    CALL destroy_sp(spl_interp)
+  END SUBROUTINE dirich_interp
+!++++
+  SUBROUTINE dirich_minres(xgrid, bc, frhs)
+!
+!   Dirichlet BC by minimization of residual
+!
+    USE bsplines
+    USE matrix
+    USE conmat_mod
+    DOUBLE PRECISION, INTENT(in) :: xgrid(:)
+    TYPE(dirich)                 :: bc
+    INTERFACE
+       DOUBLE PRECISION FUNCTION frhs(x)
+         DOUBLE PRECISION, INTENT(in) :: x
+       END FUNCTION frhs
+    END INTERFACE
+!
+    INTEGER :: nx, nidbas, ngauss, kl, ku
+    TYPE(periodic_mat) :: mass_mat
+    TYPE(spline1d) :: spl
+!
+    nidbas = bc%nidbas2
+    ngauss = nidbas+1
+    nx = bc%n2
+    kl = nidbas
+    ku = kl
+!    
+    CALL set_spline(nidbas, ngauss, xgrid, spl, period=.TRUE.)   
+    CALL init(kl, ku, nx, 1, mass_mat)
+    CALL conmat(spl, mass_mat, coefeq_mass)
+    CALL conrhs(spl, bc%g, frhs)
+    CALL factor(mass_mat)
+    CALL bsolve(mass_mat, bc%g)
+!
+    CALL destroy(mass_mat)
+    CALL destroy_sp(spl)
+!
+  CONTAINS
+    SUBROUTINE coefeq_mass(x, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x
+      INTEGER, INTENT(out) :: idt(:), idw(:)
+      DOUBLE PRECISION, INTENT(out) :: c(:)
+      c(1) = 1.0d0
+      idt(1) = 0
+      idw(1) = 0
+    END SUBROUTINE coefeq_mass
+  END SUBROUTINE dirich_minres
+!++++
+  DOUBLE PRECISION FUNCTION err2_norm(spl, jder, mbess, fexact)
+!
+!  Compute error L2 norm unsing Gauss points
+!
+    USE bsplines
+    TYPE(spline2d) :: spl
+    INTEGER, INTENT(in) :: jder(:)
+    INTEGER, INTENT(in) :: mbess
+    INTERFACE
+       DOUBLE PRECISION FUNCTION fexact(m,x,y)
+         INTEGER, INTENT(in)          :: m
+         DOUBLE PRECISION, INTENT(in) :: x, y
+       END FUNCTION fexact
+    END INTERFACE
+!
+    DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:)
+    INTEGER :: i1, ig1, n1, nidbas1, ndim1, ng1
+    INTEGER :: i2, ig2, n2, nidbas2, ndim2, ng2
+    DOUBLE PRECISION :: contrib
+    DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:), sol(:,:)
+!
+!   Gauss points and weights on all intervals
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    xg1 => spl%sp1%gausx  ! xg1(ng1,n1)
+    wg1 => spl%sp1%gausw  ! wg1(ng1,n1)
+    ng1 = SIZE(xg1,1)
+    xg2 => spl%sp2%gausx
+    wg2 => spl%sp2%gausw
+    ng2 = SIZE(xg2,1)
+!
+    err2_norm = 0.0d0
+    ALLOCATE(x(ng1), y(ng2))
+    ALLOCATE(sol(ng1,ng2))
+    DO i1=1,n1                
+       x=xg1(:,i1)
+       DO i2=1,n2
+          y=xg2(:,i2)
+          CALL gridval(spl, x, y, sol, jder)
+          DO ig1=1,ng1            
+             DO ig2=1,ng2      
+                contrib = wg1(ig1,i1)*wg2(ig2,i2)*(sol(ig1,ig2) - &
+                     &    fexact(mbess,x(ig1),y(ig2)))**2
+                err2_norm = err2_norm +  x(ig1)*contrib !use same inner-product in weak-form
+             END DO
+          END DO
+       END DO
+    END DO
+    DEALLOCATE(x)
+    DEALLOCATE(y)
+    DEALLOCATE(sol)
+    err2_norm = SQRT(err2_norm)
+  END FUNCTION err2_norm
+END MODULE poisson_mod
diff --git a/examples/dirichlet/run_poisson.sh b/examples/dirichlet/run_poisson.sh
new file mode 100644
index 0000000..dafc434
--- /dev/null
+++ b/examples/dirichlet/run_poisson.sh
@@ -0,0 +1,46 @@
+#
+# @file run_poisson.sh
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+#!/bin/sh
+EXEC=./poisson
+
+cat > in0 <<EOF
+&newrun
+ nx = xnints, ny = xnints,
+ nidbas = 4,4
+ ngauss = 5,5
+ mbess = 3,
+ nlppform = t,
+ dirmeth = 2, 
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
+EOF
+
+for x in 8 16 32 64 128 256; do
+    sed "s/xnints/$x/g" in0 > in1
+    $EXEC < in1 | grep 'Discretization errors '
+done
+rm -f in?
diff --git a/examples/dismat.f90 b/examples/dismat.f90
new file mode 100644
index 0000000..a15e8df
--- /dev/null
+++ b/examples/dismat.f90
@@ -0,0 +1,157 @@
+!>
+!> @file dismat.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+  TYPE(spline2d), INTENT(in) :: spl
+  TYPE(gbmat), INTENT(inout) :: mat
+!
+  INTEGER :: n1, nidbas1, ndim1, ng1
+  INTEGER :: n2, nidbas2, ndim2, ng2
+  INTEGER :: i, j, ig1, ig2
+  INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
+  DOUBLE PRECISION:: contrib
+!
+  INTEGER :: kterms         ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
+  INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+  CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+  WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+  WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl%sp1, ng1)
+  CALL get_gauss(spl%sp2, ng2)
+  ALLOCATE(xg1(ng1), wg1(ng1))
+  ALLOCATE(xg2(ng1), wg2(ng1))
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+  ALLOCATE(coefs(kterms,ng1,ng2))
+!
+  ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
+  ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
+!===========================================================================
+!              2.0 Assembly loop
+!
+  ALLOCATE(left1(ng1))
+  ALLOCATE(left2(ng2))
+  DO i=1,n1
+     CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+     left1 = i
+     CALL basfun(xg1, spl%sp1, fun1, left1)
+     DO j=1,n2
+        CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+        left2 = j
+        CALL basfun(xg2, spl%sp2, fun2, left2)
+!
+        DO ig1=1,ng1
+           DO ig2=1,ng2
+              CALL coefeq(xg1(ig1), xg2(ig2), &
+                   &      idert(:,:,ig1,ig2), &
+                   &      iderw(:,:,ig1,ig2), &
+                   &      coefs(:,ig1,ig2))
+           END DO
+        END DO
+!
+        DO iw1=0,nidbas1  ! Weight function in dir 1
+           igw1 = i+iw1
+           DO iw2=0,nidbas2  ! Weight function in dir 2
+              igw2 = MODULO(j+iw2-1, n2) + 1
+              irow = igw2 + (igw1-1)*n2
+              DO it1=0,nidbas1  ! Test function in dir 1
+                 igt1 = i+it1
+                 DO it2=0,nidbas2  ! Test function in dir 2
+                    igt2 = MODULO(j+it2-1, n2) + 1
+                    jcol = igt2 + (igt1-1)*n2
+!-------------
+                    contrib = 0.0d0
+                    DO ig1=1,ng1
+                       DO ig2=1,ng2
+                          DO iterm=1,kterms
+                             contrib = contrib + &
+                                  &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                                  &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
+                                  &    coefs(iterm,ig1,ig2) *                 &
+                                  &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
+                                  &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                                  &    wg1(ig1) * wg2(ig2)
+                          END DO
+                       END DO
+                    END DO
+                    CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                 END DO
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xg1, wg1, fun1)
+  DEALLOCATE(xg2, wg2, fun2)
+  DEALLOCATE(idert, iderw, coefs)
+  DEALLOCATE(left1,left2)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, y, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+    c(1) = x        ! 
+    idt(1,1) = 1
+    idt(1,2) = 0
+    idw(1,1) = 1
+    idw(1,2) = 0
+!
+    c(2) = 1.d0/x
+    idt(2,1) = 0
+    idt(2,2) = 1
+    idw(2,1) = 0
+    idw(2,2) = 1
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
diff --git a/examples/disrhs.f90 b/examples/disrhs.f90
new file mode 100644
index 0000000..24cbb84
--- /dev/null
+++ b/examples/disrhs.f90
@@ -0,0 +1,198 @@
+!>
+!> @file disrhs.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER, INTENT(in) :: mbess
+  TYPE(spline2d), INTENT(in) :: spl
+  DOUBLE PRECISION, INTENT(out) :: rhs(:)
+  INTEGER :: n1, nidbas1, ndim1, ng1
+  INTEGER :: n2, nidbas2, ndim2, ng2
+  INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+  DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+  DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+  CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+  ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+  ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl%sp1, ng1)
+  CALL get_gauss(spl%sp2, ng2)
+  WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
+  ALLOCATE(xg1(ng1), wg1(ng1))
+  ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  nrank = SIZE(rhs)
+  rhs(1:nrank) = 0.0d0
+!
+  DO i=1,n1
+     CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+     DO ig1=1,ng1
+        CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+        DO j=1,n2
+           CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+           DO ig2=1,ng2
+              CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+              contrib = wg1(ig1)*wg2(ig2) * &
+                   &    rhseq(xg1(ig1),xg2(ig2), mbess)
+              DO k1=0,nidbas1
+                 i1 = i+k1
+                 DO k2=0,nidbas2
+                    j2 = MODULO(j+k2-1,n2) + 1
+                    ij = j2 + (i1-1)*n2
+                    rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                 END DO
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xg1, wg1, fun1)
+  DEALLOCATE(xg2, wg2, fun2)
+!
+CONTAINS
+  DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+    DOUBLE PRECISION, INTENT(in) :: x1, x2
+    INTEGER, INTENT(in) :: m
+    rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+SUBROUTINE disrhs3(mbess, npow, spl, rhs)
+!
+!   Assembly the RHS using 3d spline spl
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER, INTENT(in)           :: mbess, npow
+  TYPE(spline2d1d), TARGET      :: spl
+  DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
+!
+  TYPE(spline1d), POINTER :: sp1, sp2, sp3
+  INTEGER :: n1, nidbas1, ndim1, ng1
+  INTEGER :: n2, nidbas2, ndim2, ng2
+  INTEGER :: n3, nidbas3, ndim3, ng3
+  INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
+  DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
+  DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  sp1 => spl%sp12%sp1
+  sp2 => spl%sp12%sp2
+  sp3 => spl%sp3
+!
+  CALL get_dim(sp1, ndim1, n1, nidbas1)
+  CALL get_dim(sp2, ndim2, n2, nidbas2) 
+  CALL get_dim(sp3, ndim3, n3, nidbas3) 
+!
+  ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+  ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+  ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(sp1, ng1)
+  CALL get_gauss(sp2, ng2)
+  CALL get_gauss(sp3, ng3)
+  WRITE(*,'(/a, 3i3)') 'Gauss points and weights, ngauss =', ng1, ng2, ng3
+  ALLOCATE(xg1(ng1), wg1(ng1))
+  ALLOCATE(xg2(ng2), wg2(ng2))
+  ALLOCATE(xg3(ng3), wg3(ng3))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  nrank = SIZE(rhs,1)
+  rhs(1:nrank,1:n3) = 0.0d0
+!
+  DO i=1,n1
+     CALL get_gauss(sp1, ng1, i, xg1, wg1)
+     DO ig1=1,ng1
+        CALL basfun(xg1(ig1), sp1, fun1, i)
+        DO j=1,n2
+           CALL get_gauss(sp2, ng2, j, xg2, wg2)
+           DO ig2=1,ng2
+              CALL basfun(xg2(ig2), sp2, fun2, j)
+              DO k=1,n3
+                 CALL get_gauss(sp3, ng3, k, xg3, wg3)
+                 DO ig3=1,ng3
+                    CALL basfun(xg3(ig3), sp3, fun3, k)
+                    contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
+                         &    rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
+                    DO k1=0,nidbas1
+                       i1 = i+k1
+                       DO k2=0,nidbas2
+                          j2 = MODULO(j+k2-1,n2) + 1
+                          ij = j2 + (i1-1)*n2
+                          DO k3=0,nidbas3
+                             kk = MODULO(k+k3-1,n3) + 1
+                             rhs(ij,kk) = rhs(ij, kk) + &
+                                  &  contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
+                          END DO
+                       END DO
+                    END DO
+                 END DO
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xg1, wg1, fun1)
+  DEALLOCATE(xg2, wg2, fun2)
+  DEALLOCATE(xg3, wg3, fun3)
+!
+CONTAINS
+  DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
+    DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
+    INTEGER, INTENT(in) :: m, n
+    rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
+  END FUNCTION rhseq
+END SUBROUTINE disrhs3
diff --git a/examples/driv1.f90 b/examples/driv1.f90
new file mode 100644
index 0000000..03a9329
--- /dev/null
+++ b/examples/driv1.f90
@@ -0,0 +1,189 @@
+!>
+!> @file driv1.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+! Basis splines on a 2d grid.
+!
+  USE bsplines
+  USE futils
+  IMPLICIT NONE
+  TYPE(spline1d) :: spx, spy
+  INTEGER :: nx=10, ny=8, nidbas=2, ngauss=4, npts=1000
+  DOUBLE PRECISION :: a, b, coefx(5), coefy(5)
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), ygrid(:), fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xp(:), funxp(:,:), yp(:), funyp(:,:)
+  DOUBLE PRECISION :: dx, dy
+  INTEGER :: i, j, left
+  CHARACTER(len=256) :: title
+  INTEGER :: fid
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, a, b, coefx, coefy
+!===========================================================================
+  nidbas = 3
+  ngauss = 4
+  nx = 10    ! Number of intevals in x
+  ny = 8     ! Number of intevals in y
+  a = 0.0d0  ! Left boundary of interval
+  b = 1.0d0  ! Right boundary of interval
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  READ(*,newrun)
+  WRITE(*,newrun)
+!===========================================================================
+!                  1.0 Set up grids
+!
+  ALLOCATE( xgrid(0:nx) )
+  xgrid(0) = a
+  xgrid(nx) = b
+  CALL meshdist(coefx, xgrid, nx)
+!
+!!$  dy = 2.d0*pi/REAL(ny)
+  dy = 1.0d0
+  ALLOCATE( ygrid(0:ny) )
+  ygrid(0) = a
+  ygrid(ny) = b
+  CALL meshdist(coefy, ygrid, ny)
+!===========================================================================
+!                  2.0 Set up splines on (x,y)
+!
+  CALL set_spline(nidbas, ngauss, xgrid, spx)
+  CALL set_spline(nidbas, ngauss, ygrid, spy, period=.TRUE.)
+  WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of x-splines', LBOUND(spx%knots), &
+       &                       ':',UBOUND(spx%knots), spx%knots
+  WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', spx%dim
+  WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of y-splines', LBOUND(spy%knots), &
+       &                       ':',UBOUND(spy%knots), spy%knots
+  WRITE(*,'(2(a,i5, 2x))') 'NY =', ny, 'DIM =', spy%dim
+!===========================================================================
+!                  3.0 Graph the splines on (x,y)
+!
+  ALLOCATE( fun(nidbas+1,1) )         ! Only 0-th derivative
+!!$  ALLOCATE( fun(nidbas+1,0:1) )         ! Only 0-th derivative
+  ALLOCATE( xp(npts), funxp(npts,0:spx%dim-1) )
+  ALLOCATE( yp(npts-1), funyp(npts-1,0:spy%dim-1) )
+!
+!   Splines in X (non-peridic)
+!
+  WRITE(*,'(a)') 'Splines in x'
+  dx = (xgrid(nx)-xgrid(0)) / REAL(NPTS-1)
+  DO i=1,npts
+     xp(i) = xgrid(0) + (i-1)*dx
+     CALL locintv(spx, xp(i), left)
+     CALL basfun(xp(i), spx, fun, left+1)
+     funxp(i,left:left+nidbas) = fun(:,1)
+  END DO
+!
+!   Splines in Y (periodic)
+!
+  WRITE(*,'(a)') 'Splines in y'
+  dy = (ygrid(ny)-ygrid(0)) / REAL(NPTS-1)
+  DO i=1,npts-1
+     yp(i) = ygrid(0) + (i-1)*dy
+     CALL locintv(spy, yp(i), left)
+     CALL basfun(yp(i), spy, fun, left+1)
+     funyp(i,left:left+nidbas) = fun(:,1)
+  END DO
+!
+!  Create hdf5 file
+!
+  CALL creatf('driv1.h5', fid, real_prec='d')
+!
+  WRITE(title,'(a,i3,5x,a,i6)') 'Splines of degree =', nidbas, 'NX =', nx
+  CALL putarr(fid, 'X', xp)
+  CALL putarr(fid, 'KNOTSX', spx%knots)
+  CALL putarr(fid, 'splinesx', funxp, TRIM(title))
+  CALL putarr(fid, 'KNOTSY', spy%knots)
+!
+  WRITE(title,'(a,i3,5x,a,i6)') 'Periodic splines of degree =', nidbas, 'NY =', ny
+  CALL putarr(fid, 'Y', yp)
+  CALL putarr(fid, 'splinesy', funyp, TRIM(title))
+  CALL closef(fid)
+!===========================================================================
+!                  9.0 Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgrid, ygrid)
+  DEALLOCATE(xp, funxp)
+  CALL destroy_sp(spx)
+  CALL destroy_sp(spy)
+END PROGRAM main
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/driv2.f90 b/examples/driv2.f90
new file mode 100644
index 0000000..b67b578
--- /dev/null
+++ b/examples/driv2.f90
@@ -0,0 +1,180 @@
+!>
+!> @file driv2.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+! Integration of splines
+!
+  USE bsplines
+!
+  IMPLICIT NONE
+  TYPE(spline1d) :: spx
+  INTEGER :: nx, nidbas, ngauss
+  DOUBLE PRECISION :: a, b, coefx(5)
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), fun(:,:), finteg(:)
+  DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:)
+  DOUBLE PRECISION :: support, res, err
+  INTEGER :: i, ig, j, jj, left
+  INTEGER :: dim, ng
+  LOGICAL :: periodic
+  NAMELIST /newrun/ periodic, nx, nidbas, ngauss, a, b, coefx
+!===========================================================================
+!                  1.0 Set up grids
+!
+!   Read in data specific to run
+!
+  periodic = .FALSE.
+  nidbas = 3
+  ngauss = 4
+  nx = 10    ! Number of intevals in x
+  a = 0.0d0  ! Left boundary of interval
+  b = 1.0d0  ! Right boundary of interval
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid/knots
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0) = a
+  xgrid(nx) = b
+  CALL meshdist(coefx, xgrid, nx)
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
+!===========================================================================
+!                  2.0 Set up splines
+!
+  CALL set_spline(nidbas, ngauss, xgrid, spx, periodic)
+  WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of x-splines', LBOUND(spx%knots), &
+       &                       ':',UBOUND(spx%knots), spx%knots
+  WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', spx%dim
+!===========================================================================
+!                  3.0 Integrate all splines
+!
+  CALL get_dim(spx, dim) 
+  ALLOCATE( finteg(0:dim-1), xg(ngauss), wg(ngauss), fun(0:nidbas,1) )
+  finteg = 0.0
+  fun = 0.0
+  DO i=1,nx             ! Loop thru the intervals
+     CALL get_gauss(spx, ng, i, xg, wg)
+     DO ig=1,ng         ! Loop thru Gauss points
+        CALL basfun(xg(ig), spx, fun, i)
+        left = i-1
+        DO j=0,nidbas   ! Loop thru the splines in this interval
+           jj = left+j
+           IF( periodic ) jj = MODULO(left+j, nx)
+           finteg(jj) = finteg(jj) + wg(ig)*fun(j,1)
+        END DO
+     END DO
+  END DO
+!!$  IF( periodic ) THEN
+!!$     DO i=nx,dim-1
+!!$        finteg(i) = finteg(i-nx)
+!!$     END DO
+!!$  END IF
+!
+  WRITE(*,'(a/(10f10.5))') 'Integrals of splines', finteg
+  PRINT*, 'Sum of finteg', SUM(finteg)
+!!$  IF( periodic ) THEN
+!!$     PRINT*, 'Sum of finteg', SUM(finteg(0:nx-1))
+!!$  ELSE
+!!$     PRINT*, 'Sum of finteg', SUM(finteg)
+!!$  END IF
+!
+  WRITE(*,'(a/(10f10.5))') 'Integrals of splines from module', spx%intspl
+  PRINT*, 'Sum of finteg', SUM(spx%intspl)
+  WRITE(*,'(a5,4a12)') '#', 'I', 'S', '(p+1)I/S', '(p+1)I/S-1'
+  DO i=0,spx%dim-1
+     support = spx%knots(i+1)-spx%knots(i-nidbas)
+     res =  spx%intspl(i)/support*(nidbas+1)
+     err = res - 1.0d0
+     WRITE(*,'(i5,4(1pe12.4))') i, spx%intspl(i), support, res, err
+  END DO
+!===========================================================================
+!                  9.0 Epilogue
+!
+  DEALLOCATE( finteg, xg, wg, fun )
+  DEALLOCATE(xgrid)
+  CALL destroy_sp(spx)
+END PROGRAM main
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/driv3.f90 b/examples/driv3.f90
new file mode 100644
index 0000000..8df6e99
--- /dev/null
+++ b/examples/driv3.f90
@@ -0,0 +1,159 @@
+!>
+!> @file driv3.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+! Computation of croos mass matrix between two splines sp1 & sp2 
+! sp1 and sp2 can be splines of any type (i.e. either set up with set_spline or
+! set_splcoef) and of any order.
+
+!
+  USE bsplines
+!
+  IMPLICIT NONE
+  TYPE(spline1d) :: sp1, sp2
+  INTEGER :: nx, nidbas1, nidbas2, ngauss
+  INTEGER :: i, j
+  DOUBLE PRECISION :: a, b, coefx(5)
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:)
+  DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMat
+  LOGICAL :: periodic1, periodic2
+  NAMELIST /newrun/ nx, a, b, coefx, nidbas1, nidbas2, periodic1, periodic2
+!===========================================================================
+!                  1.0 Set up grids
+!
+!   Read in data specific to run
+!
+  nx = 8    ! Number of intevals in x
+  a = 0.0d0  ! Left boundary of interval
+  b = 1.0d0  ! Right boundary of interval
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  periodic1 = .FALSE.
+  periodic2 = .FALSE.
+  nidbas1 = 3
+  nidbas2 = 2
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid/knots
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0 ) = a
+  xgrid(nx) = b
+  CALL meshdist(coefx, xgrid, nx)
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+!===========================================================================
+!                  2.0 Set up splines
+!
+  ngauss = 1 ! Gauss points initialized with set_spline are in fact not used
+             ! for computing cross mass matrix
+  ! First spline set up as for solving a PDE with FEMs
+  CALL set_spline(nidbas1, ngauss, xgrid, sp1, periodic1)
+
+  ! Second spline set up as for interpolation
+  CALL set_splcoef(nidbas2, xgrid, sp2, periodic2)
+
+  WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp1', LBOUND(sp1%knots), &
+       &                       ':',UBOUND(sp1%knots), sp1%knots
+  WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp2', LBOUND(sp2%knots), &
+       &                       ':',UBOUND(sp2%knots), sp2%knots
+  WRITE(*,'(3(a,i5, 2x))') 'NX =', nx, 'DIM sp1 =', sp1%dim, 'DIM sp2 =', sp2%dim
+!===========================================================================
+!                  3.0 Compute cross mass matrix
+!
+  CALL CompMassMatrix(sp1, sp2, a, b, MassMat)
+
+  WRITE(*, "(a)") "Cross-mass matrix between splines sp1 & sp2:"
+  DO i = 1, SIZE(MassMat, 1)
+     WRITE(*, "(15f13.5)") (MassMat(i, j), j = 1, MIN(SIZE(MassMat, 2), 15))
+  END DO
+
+!===========================================================================
+!                  9.0 Epilogue
+!
+  DEALLOCATE(MassMat)
+  DEALLOCATE(xgrid)
+  CALL destroy_sp(sp1)
+  CALL destroy_sp(sp2)
+
+END PROGRAM main
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/driv4.f90 b/examples/driv4.f90
new file mode 100644
index 0000000..63711b7
--- /dev/null
+++ b/examples/driv4.f90
@@ -0,0 +1,225 @@
+!>
+!> @file driv4.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+! Computation of croos mass matrix between two splines sp1 & sp2 
+! sp1 and sp2 can be splines of any type (i.e. either set up with set_spline or
+! set_splcoef) and of any order.
+
+!
+  USE bsplines
+  USE matrix
+!
+  IMPLICIT NONE
+  TYPE(gbmat) :: matm
+  INTEGER :: mrows, ncols, kl, ku
+  DOUBLE PRECISION, ALLOCATABLE :: avec(:,:), bvec(:,:), matfull(:,:)
+!
+  TYPE(zgbmat) :: zmatm
+  DOUBLE COMPLEX, ALLOCATABLE :: zavec(:,:), zbvec(:,:)
+  DOUBLE PRECISION :: dznrm2
+!
+  TYPE(spline1d) :: sp1, sp2
+  INTEGER :: nx, nidbas1, nidbas2, ngauss
+  INTEGER :: i, j
+  DOUBLE PRECISION :: a, b, coefx(5)
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:)
+  DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMat
+  LOGICAL :: periodic1, periodic2
+  NAMELIST /newrun/ nx, a, b, coefx, nidbas1, nidbas2, periodic1, periodic2
+!===========================================================================
+!                  1.0 Set up grids
+!
+!   Read in data specific to run
+!
+  nx = 8    ! Number of intevals in x
+  a = 0.0d0  ! Left boundary of interval
+  b = 1.0d0  ! Right boundary of interval
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  periodic1 = .FALSE.
+  periodic2 = .FALSE.
+  nidbas1 = 3
+  nidbas2 = 2
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid/knots
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0 ) = a
+  xgrid(nx) = b
+  CALL meshdist(coefx, xgrid, nx)
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+!===========================================================================
+!                  2.0 Set up splines
+!
+  ngauss = 1 ! Gauss points initialized with set_spline are in fact not used
+             ! for computing cross mass matrix
+  ! First spline set up as for solving a PDE with FEMs
+  CALL set_spline(nidbas1, ngauss, xgrid, sp1, periodic1)
+
+  ! Second spline set up as for interpolation
+!!$  CALL set_splcoef(nidbas2, xgrid, sp2, periodic2)
+  CALL set_spline(nidbas2, ngauss, xgrid, sp2, periodic2)
+
+  WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp1', LBOUND(sp1%knots), &
+       &                       ':',UBOUND(sp1%knots), sp1%knots
+  WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of spline sp2', LBOUND(sp2%knots), &
+       &                       ':',UBOUND(sp2%knots), sp2%knots
+  WRITE(*,'(3(a,i5, 2x))') 'NX =', nx, 'DIM sp1 =', sp1%dim, 'DIM sp2 =', sp2%dim
+!===========================================================================
+!                  3.0 Compute cross mass matrix
+!
+  CALL CompMassMatrix(sp1, sp2, a, b, MassMat)
+
+  WRITE(*, "(a)") "Cross-mass matrix between splines sp1 & sp2:"
+  DO i = 1, SIZE(MassMat, 1)
+     WRITE(*, "(15f10.5)") (MassMat(i, j), j = 1, MIN(SIZE(MassMat, 2), 15))
+  END DO
+!
+!  Should equal to 1 for splines i "not close to  the boundaries":
+!                  p1 .LT. i .LE. N
+!
+  WRITE(*,'(/a/(15f8.5))') 'Sum of cols * NX', SUM(MassMat,dim=2)*REAL(nx,8)
+!===========================================================================
+!                  3.0  Use DGB matrice
+!
+!!$  mrows = nx+nidbas1
+!!$  ncols = nx+nidbas2
+  CALL get_dim(sp1, mrows)
+  CALL get_dim(sp2, ncols)
+  kl = nidbas1
+  ku = nidbas2
+  CALL init(kl, ku, ncols, 1, matm, mrows=mrows)
+  WRITE(*,'(/a, 2i3)') 'Band matrix:, kl, ku =', kl, ku
+!
+  CALL CompMassMatrix(sp1, sp2, a, b, matm)
+!
+  DO i=1,SIZE(matm%val,1)
+     WRITE(*,'(15f10.5)') matm%val(i,:)
+  END DO
+!
+  WRITE(*,'(/a)') 'Full matrix'  
+  ALLOCATE(matfull(mrows,ncols))
+  matfull = 0.0d0
+  DO i=1,mrows
+     CALL getrow(matm, i, matfull(i,:))
+     WRITE(*,'(15f10.5)') matfull(i,:)
+  END DO
+  WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(matfull-MassMat))
+!
+!   Check VMX
+  ALLOCATE(avec(ncols,2))
+  ALLOCATE(bvec(mrows,2))
+  avec = 1.0d0
+  bvec = vmx(matm,avec)*REAL(nx,8)
+  WRITE(*,'(a)') 'M*a, with a=1'
+  DO j=1,2
+     WRITE(*,'(15f8.5)') bvec(:,j)
+  END DO
+!===========================================================================
+!                  4.0 Test complex version
+!
+  CALL init(kl, ku, ncols, 1, zmatm, mrows=mrows)
+  CALL CompMassMatrix(sp1, sp2, a, b, zmatm)
+  ALLOCATE(zavec(ncols,2))
+  ALLOCATE(zbvec(mrows,2))
+  zavec = (1.0d0,0.0d0)
+  zbvec = vmx(zmatm,zavec)*REAL(nx,8)
+  zbvec = zbvec-bvec
+  WRITE(*,'(/a)') 'Check complex version'
+  WRITE(*,'(a,2(1pe12.4))') 'Norm of errors =', &
+       &              (dznrm2(mrows, zbvec(1,j), 1), j=1,2)
+  zmatm%val = zmatm%val-matm%val
+  WRITE(*,'(a,1pe12.4)') 'Diff of matrix elements =', MAXVAL(ABS(zmatm%val))
+!===========================================================================
+!                  9.0 Epilogue
+!
+  DEALLOCATE(MassMat)
+  DEALLOCATE(xgrid)
+  CALL destroy_sp(sp1)
+  CALL destroy_sp(sp2)
+  call destroy(matm)
+
+END PROGRAM main
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/extra.c b/examples/extra.c
new file mode 100644
index 0000000..572cecd
--- /dev/null
+++ b/examples/extra.c
@@ -0,0 +1,49 @@
+/**
+ * @file extra.c
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+/**********************************************/
+#include <string.h>
+
+void quit_();
+void dump_(char *filename, int *l);
+
+void Dump(filename)
+char *filename;
+{
+  /* The user's dump routine should go here.  */
+  int l = strlen(filename);
+  dump_(filename, &l);
+
+}  /* End DUMP */
+
+/**********************************************/
+
+void Quit()
+{
+  /* The user's quit routine should go here.  */
+
+  quit_();
+
+}  /* End QUIT */
diff --git a/examples/fit1d.f90 b/examples/fit1d.f90
new file mode 100644
index 0000000..db3b786
--- /dev/null
+++ b/examples/fit1d.f90
@@ -0,0 +1,251 @@
+!>
+!> @file fit1d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Fit grid value function to a spline of any order
+!
+  USE bsplines
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas
+  DOUBLE PRECISION :: a, b, coefx(5)
+!!$  DOUBLE PRECISION, ALLOCATABLE ::  xgrid(:), fgrid(:), coefs(:)
+  DOUBLE PRECISION, ALLOCATABLE ::  xgrid(:), fgrid(:,:), coefs(:,:)
+  INTEGER :: i, dim, left
+  TYPE(spline1d) :: spl
+  DOUBLE PRECISION :: dx
+  INTEGER :: npts
+  DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:)
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, POINTER :: splines(:,:) => null()
+!
+  CHARACTER(len=128) :: file='fit1d.h5'
+  INTEGER :: fid
+!
+  NAMELIST /newrun/ nx, nidbas, a, b, coefx
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 10    ! Number of intevals in x
+  a = 0.0d0  ! Left boundary of interval
+  b = 1.0d0  ! Right boundary of interval
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid and function values
+!
+  ALLOCATE(xgrid(0:nx), fgrid(0:nx,1))
+  xgrid(0) = a
+  xgrid(nx) = b
+  CALL meshdist(coefx, xgrid, nx)
+  fgrid(:,1) = func(xgrid)
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+  WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'FIT1D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+!===========================================================================
+!              2.0 Spline interpolation
+!
+!   Set up the spline interpolation
+!!$  CALL splcoef_setup(nidbas, xgrid, spl)
+  CALL set_splcoef(nidbas, xgrid, spl)
+  PRINT*, 'nlequid =', spl%nlequid
+!
+!   Compute spline values and derivatives at Boundaries
+  ALLOCATE(fun(nidbas+1,0:nidbas))
+  WRITE(*,'(/a)') 'spline at the left boundary'
+  CALL locintv(spl, a, left)
+  CALL basfun(a, spl, fun, left+1)
+  DO i=0,nidbas
+     WRITE(*,'(8(1pe12.4))') fun(:,i)
+  END DO
+!
+  WRITE(*,'(/a)') 'spline at the right boundary'
+  CALL locintv(spl, b, left)
+  CALL basfun(b, spl, fun, left+1)
+  DO i=0,nidbas
+     WRITE(*,'(8(1pe12.4))') fun(:,i)
+  END DO
+  DEALLOCATE(fun)
+!
+  CALL get_dim(spl, dim)
+  ALLOCATE(coefs(dim,1))
+  WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim
+  WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), &
+       &                       ':',UBOUND(spl%knots), spl%knots
+!
+!   From given grid values fgrid, compute the spline coefs
+  CALL get_splcoef(spl, fgrid, coefs)
+  WRITE(*,'(a/(10f8.3))') 'coefs', coefs
+!
+!   Plot all splines
+  npts = 100
+  ALLOCATE(xpt(npts))
+  dx = (b-a)/REAL(npts-1)
+  DO i=1,npts
+     xpt(i) = a + (i-1)*dx
+  END DO
+  CALL allsplines(spl, xpt, splines)
+  CALL putarr(fid, '/X', xpt)
+  CALL putarr(fid,'/SPLINES', splines)
+!
+!   Check interpolation
+  ALLOCATE(fcalc(npts), fexact(npts), err(npts))
+  fexact = func(xpt)
+!
+!  Function values
+  CALL gridval(spl, xpt, fcalc, 0, coefs(:,1))
+  err = fexact - fcalc
+  CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
+  CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
+  CALL putarr(fid, '/ERROR', err, 'Interpolation errors')
+  WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
+!
+!  Derivatives values
+  CALL gridval(spl, xpt, fcalc, 1)
+  fexact = func1(xpt)
+  err = fexact - fcalc
+  CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative')
+  CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative')
+  CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative')
+  WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
+!
+
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xpt, splines, fexact, fcalc, err)
+  DEALLOCATE(xgrid, fgrid)
+  CALL destroy_sp(spl)
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION func(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: func(SIZE(x))
+!!$    INTEGER :: n
+!!$    n = SIZE(x)
+!!$    func = 1.d0+x*(1.d0+x*(1.d0+x))
+!!$    func(1:n/2) = 1.0d0
+!!$    func(n/2+1:n) = 0.5d0
+    func = EXP(-8.*x*x)
+  END FUNCTION func
+!
+  FUNCTION func1(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: func1(SIZE(x))
+!!$    INTEGER :: n
+!!$    n = SIZE(x)
+!!$    func = 1.d0+x*(1.d0+x*(1.d0+x))
+!!$    func(1:n/2) = 1.0d0
+!!$    func(n/2+1:n) = 0.5d0
+    func1 = -16.d0*x*EXP(-8.*x*x)
+  END FUNCTION func1
+!
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       sum2 = sum2 + x(i)**2
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+END PROGRAM main
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/fit1d_cmpl.f90 b/examples/fit1d_cmpl.f90
new file mode 100644
index 0000000..ba3c7db
--- /dev/null
+++ b/examples/fit1d_cmpl.f90
@@ -0,0 +1,106 @@
+!>
+!> @file fit1d_cmpl.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Fit a 1d complex function
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER,PARAMETER :: NX=10, NIDBAS=3, NPTS=40
+  DOUBLE PRECISION :: pi, dx, xgrid(0:NX), xpt(NPTS), err
+  DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
+  DOUBLE COMPLEX :: fgrid(0:NX), fexact(NPTS), fcalc(NPTS)
+  INTEGER :: dim, i
+  TYPE(spline1d) :: spl
+!================================================================================
+!
+!   Define grid and function values on grid
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  xgrid(0) = 0.0d0
+  dx = 2.0d0*pi/NX
+  DO i=1,NX
+     xgrid(i) = xgrid(0) + i*dx
+  END DO
+!
+  fgrid = func(xgrid)
+!
+  WRITE(*,'(2a10)') 'x', 'f'
+  DO i=0,NX
+     WRITE(*,'(3f10.4)') xgrid(i), fgrid(i)
+  END DO
+!
+!   Set up spline
+!
+  CALL set_splcoef(NIDBAS, xgrid, spl, period=.TRUE.)
+  CALL get_dim(spl, dim)
+  ALLOCATE(coefs(dim))
+!
+!   Get Spline coefficients
+!
+  CALL get_splcoef(spl, fgrid, coefs)
+  WRITE(*,'(a)') 'Interpolation coefs'
+  DO i=1,dim
+     WRITE(*,'(2(1pe12.3))')  coefs(i)
+  END DO
+!
+!   Check interpolation
+!
+  CALL RANDOM_NUMBER(xpt)
+  xpt = (2.0d0*pi) * xpt
+  fexact = func(xpt)
+!
+  CALL gridval(spl, xpt, fcalc, 0, coefs)
+!
+  WRITE(*,'(a10,2a20)') 'x', 'fexact', 'fcacl'
+  DO i=1,NPTS
+     WRITE(*,'(5f10.4)') xpt(i), fexact(i), fcalc(i)
+  END DO
+  err = norm2(fcalc-fexact)
+  WRITE(*,'(a,1pe12.3)') 'error', err
+!
+!   Clean up
+!
+  DEALLOCATE(coefs)
+  CALL destroy_sp(spl)  
+!================================================================================
+CONTAINS
+  FUNCTION func(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE COMPLEX :: func(size(x))
+    func = EXP( CMPLX(0.0d0, x))
+  END FUNCTION func
+!
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of vector x
+!
+    DOUBLE PRECISION :: norm2
+    DOUBLE COMPLEX, INTENT(in) :: x(:)
+!
+    norm2 = SQRT(DOT_PRODUCT(x,x))
+  END FUNCTION norm2
+END PROGRAM main
diff --git a/examples/fit1dbc.f90 b/examples/fit1dbc.f90
new file mode 100644
index 0000000..503a469
--- /dev/null
+++ b/examples/fit1dbc.f90
@@ -0,0 +1,254 @@
+!>
+!> @file fit1dbc.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Fit grid value function to a spline of any order
+!  BC using derivatives at both ends.
+!
+  USE bsplines
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas
+  DOUBLE PRECISION :: a, b, coefx(5)
+!!$  DOUBLE PRECISION, ALLOCATABLE ::  xgrid(:), fgrid(:), coefs(:)
+  DOUBLE PRECISION, ALLOCATABLE ::  xgrid(:), fgrid(:,:), coefs(:,:)
+  INTEGER :: i, dim
+  TYPE(spline1d) :: spl
+  DOUBLE PRECISION :: dx
+  INTEGER :: npts
+  DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:)
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, POINTER :: splines(:,:) => null()
+  INTEGER :: ibc(2,10)
+!!$  DOUBLE PRECISION :: fbc(2,10)
+  DOUBLE PRECISION :: fbc(2,10,1)
+!
+  CHARACTER(len=128) :: file='fit1d.h5'
+  INTEGER :: fid
+!
+  NAMELIST /newrun/ nx, nidbas, a, b, coefx, ibc, fbc
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 10    ! Number of intevals in x
+  a = 0.0d0  ! Left boundary of interval
+  b = 1.0d0  ! Right boundary of interval
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  ibc(1,1:10) = (/2,3,4,5,6,7,8,9,10,11/)
+  ibc(2,1:10) = ibc(1,1:10)
+  fbc = 0.0
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid and function values
+!
+  ALLOCATE(xgrid(0:nx), fgrid(0:nx,1))
+  xgrid(0) = a
+  xgrid(nx) = b
+  CALL meshdist(coefx, xgrid, nx)
+  fgrid(:,1) = func(xgrid)
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+  WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'FIT1D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+!===========================================================================
+!              2.0 Spline interpolation
+!
+!   Set up the spline interpolation
+  CALL set_splcoef(nidbas, xgrid, spl, ibc=ibc)
+!
+!   Compute spline values and derivatives at Boundaries
+  ALLOCATE(fun(nidbas+1,0:nidbas))
+  WRITE(*,'(/a)') 'spline at the left boundary'
+  CALL basfun(a, spl, fun, 1)
+  DO i=0,nidbas
+     WRITE(*,'(8(1pe12.4))') fun(:,i)
+  END DO
+!
+  WRITE(*,'(/a)') 'spline at the right boundary'
+  CALL basfun(b, spl, fun, nx)
+  DO i=0,nidbas
+     WRITE(*,'(8(1pe12.4))') fun(:,i)
+  END DO
+  DEALLOCATE(fun)
+!
+  CALL get_dim(spl, dim)
+  ALLOCATE(coefs(dim,1))
+  WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim
+  WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), &
+       &                       ':',UBOUND(spl%knots), spl%knots
+!
+!   From given grid values fgrid, compute the spline coefs
+  CALL get_splcoef(spl, fgrid, coefs, fbc)
+  WRITE(*,'(a/(10f8.3))') 'coefs', coefs
+!
+!   Plot all splines
+  npts = 100
+  ALLOCATE(xpt(npts))
+  dx = (b-a)/REAL(npts-1)
+  DO i=1,npts
+     xpt(i) = a + (i-1)*dx
+  END DO
+  CALL allsplines(spl, xpt, splines)
+  CALL putarr(fid, '/X', xpt)
+  CALL putarr(fid,'/SPLINES', splines)
+!
+!   Check interpolation
+  ALLOCATE(fcalc(npts), fexact(npts), err(npts))
+  fexact = func(xpt)
+!
+!  Function values
+  CALL gridval(spl, xpt, fcalc, 0, coefs(:,1))
+  err = fexact - fcalc
+  CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
+  CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
+  CALL putarr(fid, '/ERROR', err, 'Interpolation errors')
+  WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
+!
+!  Derivatives values
+  CALL gridval(spl, xpt, fcalc, 1)
+  fexact = func1(xpt)
+  err = fexact - fcalc
+  CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative')
+  CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative')
+  CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative')
+  WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
+!
+
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xpt, splines, fexact, fcalc, err)
+  DEALLOCATE(xgrid, fgrid)
+  CALL destroy_sp(spl)
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION func(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: func(SIZE(x))
+!!$    INTEGER :: n
+!!$    n = SIZE(x)
+!!$    func = 1.d0+x*(1.d0+x*(1.d0+x))
+!!$    func(1:n/2) = 1.0d0
+!!$    func(n/2+1:n) = 0.5d0
+    func = EXP(-8.*x*x)
+  END FUNCTION func
+!
+  FUNCTION func1(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: func1(SIZE(x))
+!!$    INTEGER :: n
+!!$    n = SIZE(x)
+!!$    func = 1.d0+x*(1.d0+x*(1.d0+x))
+!!$    func(1:n/2) = 1.0d0
+!!$    func(n/2+1:n) = 0.5d0
+    func1 = -16.d0*x*EXP(-8.*x*x)
+  END FUNCTION func1
+!
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       sum2 = sum2 + x(i)**2
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+END PROGRAM main
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/fit1dp.f90 b/examples/fit1dp.f90
new file mode 100644
index 0000000..97ca6be
--- /dev/null
+++ b/examples/fit1dp.f90
@@ -0,0 +1,227 @@
+!>
+!> @file fit1dp.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Fit grid value function to a spline of any order
+!  Periodic case.
+!
+  USE bsplines
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas
+  DOUBLE PRECISION :: a, b, coefx(5)
+  DOUBLE PRECISION, ALLOCATABLE ::  xgrid(:), fgrid(:,:), coefs(:,:)
+  INTEGER :: i, dim
+  TYPE(spline1d) :: spl
+  DOUBLE PRECISION :: dx, x0, x1
+  INTEGER :: npts
+  DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fcalc(:), fexact(:), err(:)
+  DOUBLE PRECISION, POINTER :: splines(:,:) => null()
+!
+  CHARACTER(len=128) :: file='fit1d.h5'
+  INTEGER :: fid
+!
+  NAMELIST /newrun/ nx, nidbas, a, b, coefx
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 10    ! Number of intevals in x
+  a = 0.0d0  ! Left boundary of interval
+  b = 1.0d0  ! Right boundary of interval
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid and function values
+!
+  ALLOCATE(xgrid(0:nx), fgrid(0:nx,1))
+  xgrid(0) = a
+  xgrid(nx) = b
+  CALL meshdist(coefx, xgrid, nx)
+  fgrid(:,1) = func(xgrid)
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+  WRITE(*,'(a/(10f8.3))') 'fgrid', fgrid
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'FIT1D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+!===========================================================================
+!              2.0 Spline interpolation
+!
+!   Set up the spline interpolation
+  CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.)
+  CALL get_dim(spl, dim)
+  WRITE(*,'(2(a,i5, 2x))') 'NX =', nx, 'DIM =', dim
+  WRITE(*,'(/a,i6,a,i6/(10(f8.3)))') 'KNOTS of splines', LBOUND(spl%knots), &
+       &                       ':',UBOUND(spl%knots), spl%knots
+!
+  ALLOCATE(coefs(dim,1))
+!
+!   From given grid values fgrid, compute the spline coefs
+  CALL get_splcoef(spl, fgrid, coefs)
+  WRITE(*,'(a/(10f8.3))') 'coefs', coefs
+!
+!   Plot all splines
+!
+  npts = 100
+  ALLOCATE(xpt(npts))
+!!$  x0 = a
+!!$  x1 = b
+  x0 = spl%knots(0)
+  x1 = spl%knots(nx)
+  dx = (x1 -x0)/REAL(npts)  ! Last point b not inluded
+  DO i=1,npts
+     xpt(i) = x0 + (i-1)*dx
+  END DO
+  CALL allsplines(spl, xpt, splines)
+  CALL putarr(fid, '/X', xpt)
+  CALL putarr(fid,'/SPLINES', splines)
+!
+!   Check interpolation
+!
+  ALLOCATE(fcalc(npts), fexact(npts), err(npts))
+  fexact = func(xpt)
+!
+  CALL gridval(spl, xpt, fcalc, 0, coefs(:,1))
+  err = fexact - fcalc
+  CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
+  CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
+  CALL putarr(fid, '/ERROR', err, 'Interpolation errors')
+  WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
+!
+!  Derivatives values
+  CALL gridval(spl, xpt, fcalc, 1)
+  fexact = func1(xpt)
+  err = fexact - fcalc
+  CALL putarr(fid, '/FEXACT1', fexact, 'Exact values of first derivative')
+  CALL putarr(fid, '/FCALC1', fcalc, 'Interpolated first derivative')
+  CALL putarr(fid, '/ERROR1', err, 'Interpolation errors on first derivative')
+  WRITE(*,'(/a,1pe12.3)') 'Norm of error', norm2(err)/norm2(fexact)
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xpt, splines, fexact, fcalc, err)
+  DEALLOCATE(xgrid, fgrid)
+  CALL destroy_sp(spl)
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION func(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: func(SIZE(x))
+    DOUBLE PRECISION :: pi
+    pi = 4.0*ATAN(1.0d0)
+    func = SIN(2.d0*pi*x) + 2.0d0*COS(8.d0*pi*x)
+  END FUNCTION func
+  FUNCTION func1(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: func1(SIZE(x))
+    DOUBLE PRECISION :: pi
+    pi = 4.0*ATAN(1.0d0)
+    func1 = 2.d0*pi*COS(2.d0*pi*x) - 16.0d0*pi*SIN(8.d0*pi*x)
+  END FUNCTION func1
+!
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       sum2 = sum2 + x(i)**2
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+END PROGRAM main
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/fit2d.f90 b/examples/fit2d.f90
new file mode 100644
index 0000000..ab598de
--- /dev/null
+++ b/examples/fit2d.f90
@@ -0,0 +1,159 @@
+!>
+!> @file fit2d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Fit 2d grid value function to a 2d spline of any order
+!
+  USE bsplines
+  USE futils
+!
+  IMPLICIT NONE
+  CHARACTER(len=128) :: file='fit2d.h5'
+  INTEGER :: fid
+  INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
+  INTEGER, PARAMETER :: nptx=100, npty=100
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
+  DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
+  DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
+  TYPE(spline2d) :: splxy
+  DOUBLE PRECISION :: mem
+  INTEGER :: i, j
+!
+  NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intevals in x
+  ny = 8              ! Number of intevals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  mbes = 2
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+  fgrid = func(xgrid,ygrid)
+!
+  WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
+  IF( nx.LE.10 .AND. ny.LE.10 ) THEN
+     WRITE(*,'(a)') 'FGRID'
+     DO j=0,ny
+        WRITE(*,'(12f8.3)') fgrid(:,j)
+     END DO
+     WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
+  END IF
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'MBESS', mbes)
+!===========================================================================
+!              2.0 Spline interpolation
+!
+!   Setup the spline interpolation
+  CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
+  WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
+!
+!   Compute spline interpolation coefficients
+  CALL get_dim(splxy, dims)
+  ALLOCATE(bcoefs(dims(1),dims(2)))
+  WRITE(*,'(a,2i4)') 'Dims of spline', dims
+!
+  CALL get_splcoef(splxy, fgrid, bcoefs)
+!===========================================================================
+!              2.0 Check interpolation
+!
+  dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
+  dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
+  DO i=1,nptx
+     xpt(i) = xgrid(0) + (i-1)*dx
+  END DO
+  DO i=1,npty
+     ypt(i) = ygrid(0) + (i-1)*dy
+  END DO
+  fexact = func(xpt,ypt)
+  CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
+  errs = fcalc-fexact
+  WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
+!
+  CALL putarr(fid, '/xpt', xpt, 'r')
+  CALL putarr(fid, '/ypt', ypt, '\theta')
+  CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
+  CALL putarr(fid, '/fexact', fexact, 'Exact')
+  CALL putarr(fid, '/errs', errs, 'Errors')
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(bcoefs)
+  DEALLOCATE(xgrid, ygrid, fgrid)
+  CALL destroy_sp(splxy)
+  CALL closef(fid)
+!
+CONTAINS
+  FUNCTION func(x,y)
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
+    DOUBLE PRECISION :: zy
+    INTEGER :: j
+    DO j=1,SIZE(y)
+       zy = -mbes * SIN(mbes*y(j))
+       func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
+    END DO
+  END FUNCTION func
+!
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       sum2 = sum2 + x(i)**2
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+END PROGRAM main
diff --git a/examples/fit2d1d.f90 b/examples/fit2d1d.f90
new file mode 100644
index 0000000..e0ffec3
--- /dev/null
+++ b/examples/fit2d1d.f90
@@ -0,0 +1,154 @@
+!>
+!> @file fit2d1d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Fit 2d grid value function to a 2d spline of any order
+!  Interpolating on an grid (x_i,y_j) or a set of particle
+!  positions (x_p,y_p).
+!
+  USE bsplines
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
+  INTEGER, PARAMETER :: nptx=100, npty=100
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
+  DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
+  DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
+  DOUBLE PRECISION, DIMENSION(nptx*npty) :: xp, yp, fcalcp, fexactp, errsp
+  TYPE(spline2d) :: splxy
+  DOUBLE PRECISION :: mem
+  INTEGER :: i, j
+!
+  NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intevals in x
+  ny = 8              ! Number of intevals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  mbes = 2
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+  fgrid = func(xgrid,ygrid)
+!===========================================================================
+!              2.0 Spline interpolation
+!
+!   Setup the spline interpolation
+  CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
+!
+!   Compute spline interpolation coefficients
+  CALL get_dim(splxy, dims)
+  ALLOCATE(bcoefs(dims(1),dims(2)))
+  WRITE(*,'(a,2i4)') 'Dims of spline', dims
+!
+  CALL get_splcoef(splxy, fgrid, bcoefs)
+!===========================================================================
+!              2.0 Check interpolation
+!
+  dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
+  dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
+  DO i=1,nptx
+     xpt(i) = xgrid(0) + (i-1)*dx
+  END DO
+  DO i=1,npty
+     ypt(i) = ygrid(0) + (i-1)*dy
+  END DO
+  fexact = func(xpt,ypt)
+  CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
+  errs = fcalc-fexact
+  WRITE(*,*) 'Using the GRIDVAL2D2D'
+  WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
+!
+! The 2d1d version
+  WRITE(*,*) 'Using the GRIDVAL2D1D'
+  CALL RANDOM_NUMBER(xp)
+  CALL RANDOM_NUMBER(yp)
+  yp=2.0*pi*yp
+  fexactp = func1(xp,yp)
+!!$  CALL gridval(splxy, xp, yp, fcalcp, (/0,0/), bcoefs)
+  CALL gridval(splxy, xp, yp, fcalcp, (/0,0/))
+  errsp = fcalcp-fexactp
+  WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errsp), MAXVAL(errsp)
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(bcoefs)
+  DEALLOCATE(xgrid, ygrid, fgrid)
+  CALL destroy_sp(splxy)
+!
+CONTAINS
+  FUNCTION func(x,y)
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
+    DOUBLE PRECISION :: zy
+    INTEGER :: j
+    DO j=1,SIZE(y)
+       zy = -mbes * SIN(mbes*y(j))
+       func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
+    END DO
+  END FUNCTION func
+  FUNCTION func1(x,y)
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: func1(SIZE(x))
+    DOUBLE PRECISION :: zy
+    INTEGER :: j
+    DO j=1,SIZE(x)
+       zy = -mbes * SIN(mbes*y(j))
+       func1(j) =(1-x(j)**2) * x(j)**mbes * zy
+    END DO
+  END FUNCTION func1
+!
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       sum2 = sum2 + x(i)**2
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+END PROGRAM main
diff --git a/examples/fit2d_cmpl.f90 b/examples/fit2d_cmpl.f90
new file mode 100644
index 0000000..6e6a4ee
--- /dev/null
+++ b/examples/fit2d_cmpl.f90
@@ -0,0 +1,132 @@
+!>
+!> @file fit2d_cmpl.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Fit 2d grid value function to a 2d spline of any order
+!
+  USE bsplines
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
+  INTEGER, PARAMETER :: npt=10000
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:)
+  DOUBLE COMPLEX, ALLOCATABLE :: fgrid(:,:),  fgrid_calc(:,:), bcoefs(:,:)
+  DOUBLE PRECISION :: dx, dy, xpt(npt), ypt(npt), errs(npt)
+  DOUBLE COMPLEX, DIMENSION(npt) :: fcalc, fexact
+  TYPE(spline2d) :: splxy
+  INTEGER :: i, j
+!
+  NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intevals in x
+  ny = 8              ! Number of intevals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  mbes = 2
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  ALLOCATE(fgrid(0:nx,0:ny), fgrid_calc(0:nx,0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+  fgrid = func2(xgrid,ygrid)
+!===========================================================================
+!              2.0 Spline interpolation
+!
+!   Setup the spline interpolation
+  CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
+!
+!   Compute spline interpolation coefficients
+  CALL get_dim(splxy, dims)
+  ALLOCATE(bcoefs(dims(1),dims(2)))
+  WRITE(*,'(a,2i4)') 'Dims of spline', dims
+!
+  CALL get_splcoef(splxy, fgrid, bcoefs)
+!===========================================================================
+!              2.0 Check interpolation
+!
+  CALL RANDOM_NUMBER(xpt)
+  CALL RANDOM_NUMBER(ypt)
+  ypt(:) = ypt(:)*2.0*pi
+  fexact = func1(xpt,ypt)
+!
+  CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
+  errs = ABS(fcalc-fexact)
+  WRITE(*,'(a,2(1pe12.3))') 'Max errors (on random points)', MAXVAL(errs)
+!
+  CALL gridval(splxy, xgrid, ygrid, fgrid_calc, (/0,0/))
+  WRITE(*,'(a,2(1pe12.3))') 'Max errors (on grid points)', &
+       &     MAXVAL(ABS(fgrid_calc-fgrid))
+!
+  fgrid_calc = 0.0
+  DO j=0,ny
+     ypt(1:nx+1) = ygrid(j)
+     CALL gridval(splxy, xgrid, ypt(1:nx+1), fgrid_calc(:,j), (/0,0/))
+  END DO
+  WRITE(*,'(a,2(1pe12.3))') 'Max errors (on grid points)', &
+       &     MAXVAL(ABS(fgrid_calc-fgrid))
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(bcoefs)
+  DEALLOCATE(xgrid, ygrid, fgrid, fgrid_calc)
+  CALL destroy_sp(splxy)
+!
+CONTAINS
+  FUNCTION func2(x,y)
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE COMPLEX :: func2(SIZE(x), SIZE(y))
+    DOUBLE COMPLEX :: zy
+    INTEGER :: j
+    DO j=1,SIZE(y)
+       zy = -mbes * CMPLX(SIN(mbes*y(j)), COS(mbes*y(j)))
+       func2(:,j) =(1-x(:)**2) * x(:)**mbes * zy
+    END DO
+  END FUNCTION func2
+  FUNCTION func1(x,y)
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE COMPLEX :: func1(SIZE(x))
+    DOUBLE COMPLEX :: zy
+    INTEGER :: j
+    DO j=1,SIZE(x)
+       zy = -mbes * CMPLX(SIN(mbes*y(j)), COS(mbes*y(j)))
+       func1(j) =(1-x(j)**2) * x(j)**mbes * zy
+    END DO
+  END FUNCTION func1
+END PROGRAM main
diff --git a/examples/fit2dbc.f90 b/examples/fit2dbc.f90
new file mode 100644
index 0000000..75862c0
--- /dev/null
+++ b/examples/fit2dbc.f90
@@ -0,0 +1,183 @@
+!>
+!> @file fit2dbc.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Fit 2d grid value function to a 2d spline of any order
+!  BC using derivatives at both ends, in the non-periodic direction.
+!
+  USE bsplines
+  USE futils
+!
+  IMPLICIT NONE
+  CHARACTER(len=128) :: file='fit2d.h5'
+  INTEGER :: fid
+  INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
+  INTEGER, PARAMETER :: nptx=100, npty=100
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
+  DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
+  DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
+  TYPE(spline2d) :: splxy
+  DOUBLE PRECISION :: mem
+  INTEGER :: i, j, ii
+  INTEGER :: ibc1(2,10), ibc2(2,10)
+  DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:)
+!
+  NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
+
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intevals in x
+  ny = 8              ! Number of intevals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  mbes = 2
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 1.0d0
+  CALL meshdist(coefy, ygrid, ny)
+  fgrid = func(xgrid,ygrid)
+!
+  WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
+  IF( nx.LE.10 .AND. ny.LE.10 ) THEN
+     WRITE(*,'(a)') 'FGRID'
+     DO j=0,ny
+        WRITE(*,'(12f8.3)') fgrid(:,j)
+     END DO
+     WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
+  END IF
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'MBESS', mbes)
+!===========================================================================
+!              2.0 Spline interpolation
+!
+!   Setup the spline interpolation
+  ii=1           ! Start with first derivative
+  DO i = 1, nidbas(1)/2
+     ibc1(1,i) = ii+i-1
+     ibc1(2,i) = ii+i-1
+  END DO
+  ibc2 = ibc1
+  CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .FALSE./),&
+       &           ibc1=ibc1, ibc2=ibc2)
+  WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
+!
+!   Compute spline interpolation coefficients
+  CALL get_dim(splxy, dims)
+  ALLOCATE(bcoefs(dims(1),dims(2)))
+  WRITE(*,'(a,2i4)') 'Dims of spline', dims
+!
+  ALLOCATE(fbc(2, nidbas(1)/2, 0:ny))
+  fbc=0.0d0
+!
+  WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:)
+  WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:)
+!
+  CALL get_splcoef(splxy, fgrid, bcoefs, fbc1=fbc, fbc2=fbc)
+!
+  DEALLOCATE(fbc)
+!===========================================================================
+!              2.0 Check interpolation
+!
+  dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
+  dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
+  DO i=1,nptx
+     xpt(i) = xgrid(0) + (i-1)*dx
+  END DO
+  DO i=1,npty
+     ypt(i) = ygrid(0) + (i-1)*dy
+  END DO
+  fexact = func(xpt,ypt)
+  CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
+  errs = fcalc-fexact
+  WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
+!
+  CALL putarr(fid, '/xpt', xpt, 'r')
+  CALL putarr(fid, '/ypt', ypt, '\theta')
+  CALL putarr(fid, '/bcoefs', bcoefs, 'bcoefs')
+  CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
+  CALL putarr(fid, '/fexact', fexact, 'Exact')
+  CALL putarr(fid, '/errs', errs, 'Errors')
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(bcoefs)
+  DEALLOCATE(xgrid, ygrid, fgrid)
+  CALL destroy_sp(splxy)
+  CALL closef(fid)
+!
+!===========================================================================
+CONTAINS
+  FUNCTION func(x,y)
+!
+!   A function with zeo derivatives at both ends
+!
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
+    DOUBLE PRECISION :: zy
+    INTEGER :: j
+    DO j=1,SIZE(y)
+       zy = y(j)*y(j)*(y(j)-1.5d0)
+       func(:,j) = x(:)*x(:)*(x(:)-1.5d0) + zy
+    END DO
+  END FUNCTION func
+!
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       sum2 = sum2 + x(i)**2
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+END PROGRAM main
diff --git a/examples/fit2dbc_x.f90 b/examples/fit2dbc_x.f90
new file mode 100644
index 0000000..8795668
--- /dev/null
+++ b/examples/fit2dbc_x.f90
@@ -0,0 +1,202 @@
+!>
+!> @file fit2dbc_x.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Fit 2d grid value function to a 2d spline of any order
+!  BC using derivatives at both ends, in the non-periodic direction.
+!
+! Testing BC on derivative along first direction
+!
+  USE bsplines
+  USE futils
+!
+  IMPLICIT NONE
+  CHARACTER(len=128) :: file='fit2d.h5'
+  INTEGER :: fid
+  INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
+  INTEGER, PARAMETER :: nptx=100, npty=100
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
+  DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
+  DOUBLE PRECISION, DIMENSION(nptx,npty) :: fcalc, fexact, errs
+  TYPE(spline2d) :: splxy
+  DOUBLE PRECISION :: mem
+  INTEGER :: i, j, ii
+  INTEGER :: ibc(2,10)
+  DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:)
+!
+  NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
+
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intevals in x
+  ny = 8              ! Number of intevals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  mbes = 2
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:nx,0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+  fgrid = func(xgrid,ygrid)
+!
+  WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
+  IF( nx.LE.10 .AND. ny.LE.10 ) THEN
+     WRITE(*,'(a)') 'FGRID(x, y)'
+     DO j=0,ny
+        WRITE(*,'(12f8.3)') fgrid(:,j)
+     END DO
+     WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
+  END IF
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'MBESS', mbes)
+!===========================================================================
+!              2.0 Spline interpolation
+!
+!   Setup the spline interpolation
+  ii=1           ! Start with first derivative
+  DO i = 1, nidbas(1)/2
+     ibc(1,i) = ii+i-1
+     ibc(2,i) = ii+i-1
+  END DO
+  CALL set_splcoef(nidbas, xgrid, ygrid, splxy, (/.FALSE., .TRUE./),&
+       &           ibc1=ibc)
+  WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
+!
+!   Compute spline interpolation coefficients
+  CALL get_dim(splxy, dims)
+  ALLOCATE(bcoefs(dims(1),dims(2)))
+  WRITE(*,'(a,2i4)') 'Dims of spline', dims
+!
+  ALLOCATE(fbc(2, nidbas(1)/2, 0:ny))
+  fbc=0.0d0
+!
+!!$!   Exact first derivatives at boundaries
+!!$  fbc(1,1:1,:) = func1(xgrid(0:0), ygrid(0:ny))
+!!$  fbc(2,1:1,:) = func1(xgrid(nx:nx), ygrid(0:ny))
+!
+!!$!   Derivatives at boundaries approximated with FD
+!!$  DO j=0,ny
+!!$     fbc(1,1,j+1) = fgrid(1,j)-fgrid(0,j)
+!!$     fbc(2,1,j+1) = fgrid(nx,j)-fgrid(nx-1,j)
+!!$  END DO
+!!$  fbc(1,1,:) = fbc(1,1,:)/(xgrid(1)-xgrid(0))
+!!$  fbc(2,1,:) = fbc(2,1,:)/(xgrid(nx)-xgrid(nx-1))
+!
+  WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:)
+  WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:)
+!
+  CALL get_splcoef(splxy, fgrid, bcoefs, fbc1=fbc)
+!
+  DEALLOCATE(fbc)
+!===========================================================================
+!              2.0 Check interpolation
+!
+  dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
+  dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
+  DO i=1,nptx
+     xpt(i) = xgrid(0) + (i-1)*dx
+  END DO
+  DO i=1,npty
+     ypt(i) = ygrid(0) + (i-1)*dy
+  END DO
+  fexact = func(xpt,ypt)
+  CALL gridval(splxy, xpt, ypt, fcalc, (/0,0/), bcoefs)
+  errs = fcalc-fexact
+  WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
+!
+  CALL putarr(fid, '/xpt', xpt, 'r')
+  CALL putarr(fid, '/ypt', ypt, '\theta')
+  CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
+  CALL putarr(fid, '/fexact', fexact, 'Exact')
+  CALL putarr(fid, '/errs', errs, 'Errors')
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(bcoefs)
+  DEALLOCATE(xgrid, ygrid, fgrid)
+  CALL destroy_sp(splxy)
+  CALL closef(fid)
+!
+!===========================================================================
+CONTAINS
+  FUNCTION func(x,y)
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
+    DOUBLE PRECISION :: zy
+    INTEGER :: j
+    DO j=1,SIZE(y)
+       zy = -mbes * SIN(mbes*y(j))
+       func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
+    END DO
+  END FUNCTION func
+  FUNCTION func1(x,y)
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: func1(SIZE(x), SIZE(y))
+    DOUBLE PRECISION :: zy
+    INTEGER :: j
+    DO j=1,SIZE(y)
+       zy = -mbes * SIN(mbes*y(j))
+       func1(:,j) = (mbes - (mbes+2.0d0)*x(:)**2) * x(:)**(mbes-1) * zy
+    END DO
+  END FUNCTION func1
+!
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       sum2 = sum2 + x(i)**2
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+END PROGRAM main
diff --git a/examples/fit2dbc_y.f90 b/examples/fit2dbc_y.f90
new file mode 100644
index 0000000..ff73d65
--- /dev/null
+++ b/examples/fit2dbc_y.f90
@@ -0,0 +1,202 @@
+!>
+!> @file fit2dbc_y.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Fit 2d grid value function to a 2d spline of any order
+!  BC using derivatives at both ends, in the non-periodic direction.
+!
+! Testing BC on derivative along second direction
+!
+  USE bsplines
+  USE futils
+!
+  IMPLICIT NONE
+  CHARACTER(len=128) :: file='fit2d.h5'
+  INTEGER :: fid
+  INTEGER :: nx, ny, nidbas(2), mbes, dims(2)
+  INTEGER, PARAMETER :: nptx=100, npty=100
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:),ygrid(:),fgrid(:,:),bcoefs(:,:)
+  DOUBLE PRECISION :: dx, dy, xpt(nptx), ypt(npty)
+  DOUBLE PRECISION, DIMENSION(npty,nptx) :: fcalc, fexact, errs
+  TYPE(spline2d) :: splxy
+  DOUBLE PRECISION :: mem
+  INTEGER :: i, j, ii
+  INTEGER :: ibc(2,10)
+  DOUBLE PRECISION, ALLOCATABLE:: fbc(:,:,:)
+!
+  NAMELIST /newrun/ nx, ny, nidbas, mbes, coefx, coefy
+
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intevals in x
+  ny = 8              ! Number of intevals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  mbes = 2
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny), fgrid(0:ny,0:nx))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+  fgrid = TRANSPOSE(func(xgrid,ygrid))
+!
+  WRITE(*,'(a/(12f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(12f8.3))') 'YGRID', ygrid(0:ny)
+  IF( nx.LE.10 .AND. ny.LE.10 ) THEN
+     WRITE(*,'(a)') 'FGRID(y, x)'
+     DO j=0,ny
+        WRITE(*,'(12f8.3)') fgrid(j,:)
+     END DO
+     WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
+  END IF
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'MBESS', mbes)
+!===========================================================================
+!              2.0 Spline interpolation
+!
+!   Setup the spline interpolation
+  ii=1           ! Start with first derivative
+  DO i = 1, nidbas(1)/2
+     ibc(1,i) = ii+i-1
+     ibc(2,i) = ii+i-1
+  END DO
+  CALL set_splcoef((/nidbas(2), nidbas(1)/), ygrid, xgrid, splxy, (/.TRUE., .FALSE./),&
+       &           ibc2=ibc)
+  WRITE(*,'(a,f8.3)') 'Memory used so far (MB) =', mem()
+!
+!   Compute spline interpolation coefficients
+  CALL get_dim(splxy, dims)
+  ALLOCATE(bcoefs(dims(1),dims(2)))
+  WRITE(*,'(a,2i4)') 'Dims of spline', dims
+!
+  ALLOCATE(fbc(2, nidbas(1)/2, 0:ny))
+  fbc=0.0d0
+!
+!!$!   Exact first derivatives at boundaries
+!!$  fbc(1,1:1,:) = func1(xgrid(0:0), ygrid(0:ny))
+!!$  fbc(2,1:1,:) = func1(xgrid(nx:nx), ygrid(0:ny))
+!
+!!$!   Derivatives at boundaries approximated with FD
+!!$  DO j=0,ny
+!!$     fbc(1,1,j+1) = fgrid(1,j)-fgrid(0,j)
+!!$     fbc(2,1,j+1) = fgrid(nx,j)-fgrid(nx-1,j)
+!!$  END DO
+!!$  fbc(1,1,:) = fbc(1,1,:)/(xgrid(1)-xgrid(0))
+!!$  fbc(2,1,:) = fbc(2,1,:)/(xgrid(nx)-xgrid(nx-1))
+!
+  WRITE(*,'(a/(10f8.3))') 'fbc(1)', fbc(1,1,:)
+  WRITE(*,'(a/(10f8.3))') 'fbc(2)', fbc(2,1,:)
+!
+  CALL get_splcoef(splxy, fgrid, bcoefs, fbc2=fbc)
+!
+  DEALLOCATE(fbc)
+!===========================================================================
+!              2.0 Check interpolation
+!
+  dx=(xgrid(nx)-xgrid(0))/REAL(nptx-1)
+  dy=(ygrid(ny)-ygrid(0))/REAL(npty-1)
+  DO i=1,nptx
+     xpt(i) = xgrid(0) + (i-1)*dx
+  END DO
+  DO i=1,npty
+     ypt(i) = ygrid(0) + (i-1)*dy
+  END DO
+  fexact = TRANSPOSE(func(xpt,ypt))
+  CALL gridval(splxy, ypt, xpt, fcalc, (/0,0/), bcoefs)
+  errs = fcalc-fexact
+  WRITE(*,'(a,2(1pe12.3))') 'Min max or errors', MINVAL(errs), MAXVAL(errs)
+!
+  CALL putarr(fid, '/xpt', xpt, 'r')
+  CALL putarr(fid, '/ypt', ypt, '\theta')
+  CALL putarr(fid, '/fcalc', fcalc, 'Interpolated')
+  CALL putarr(fid, '/fexact', fexact, 'Exact')
+  CALL putarr(fid, '/errs', errs, 'Errors')
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(bcoefs)
+  DEALLOCATE(xgrid, ygrid, fgrid)
+  CALL destroy_sp(splxy)
+  CALL closef(fid)
+!
+!===========================================================================
+CONTAINS
+  FUNCTION func(x,y)
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: func(SIZE(x), SIZE(y))
+    DOUBLE PRECISION :: zy
+    INTEGER :: j
+    DO j=1,SIZE(y)
+       zy = -mbes * SIN(mbes*y(j))
+       func(:,j) =(1-x(:)**2) * x(:)**mbes * zy
+    END DO
+  END FUNCTION func
+  FUNCTION func1(x,y)
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: func1(SIZE(x), SIZE(y))
+    DOUBLE PRECISION :: zy
+    INTEGER :: j
+    DO j=1,SIZE(y)
+       zy = -mbes * SIN(mbes*y(j))
+       func1(:,j) = (mbes - (mbes+2.0d0)*x(:)**2) * x(:)**(mbes-1) * zy
+    END DO
+  END FUNCTION func1
+!
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       sum2 = sum2 + x(i)**2
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+END PROGRAM main
diff --git a/examples/getgrad_perf.f90 b/examples/getgrad_perf.f90
new file mode 100644
index 0000000..0a168c9
--- /dev/null
+++ b/examples/getgrad_perf.f90
@@ -0,0 +1,221 @@
+!>
+!> @file getgrad_perf.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test and compare performance of using "spline" and
+!   "pp" forms. 2D case
+!
+  USE bsplines
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, ngauss(2), nidbas(2), nits
+  INTEGER :: npt, d1, d2
+  INTEGER :: i, j, its, ngroup=4
+  INTEGER :: i1, i2, nset, nremain
+  DOUBLE PRECISION :: pi, dx, dy
+  DOUBLE PRECISION :: seconds, t0, t1, tscal, tvec
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs
+  DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fgrid00(:), fgrid01(:), fgrid10(:)
+  DOUBLE PRECISION, ALLOCATABLE :: fgrad00(:), fgrad01(:), fgrad10(:)
+  TYPE(spline2d) :: splxy
+!
+  NAMELIST /newrun/ nx, ny, nidbas, npt, nits
+!===============================================================================
+!                    0.0  Prologue
+!
+!   2D grid
+!
+  nx = 8
+  ny = 8
+  nidbas = (/ 3, 3 /)
+  npt = 100000
+  nits =100
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  dx = 1.0d0/REAL(nx)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  dy = 2.0d0*pi/REAL(ny)
+  ygrid = (/ (j*dy,j=0,ny) /)
+!
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
+!
+!   Set up spline
+!
+  ngauss = 4
+  CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
+  d1 = splxy%sp1%dim
+  d2 = splxy%sp2%dim
+  WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
+!
+  ALLOCATE(xpt(npt), ypt(npt))
+  CALL RANDOM_NUMBER(xpt)
+  CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
+!
+  ALLOCATE(coefs(d1,d2))
+  ALLOCATE(fgrad00(npt), fgrad01(npt), fgrad10(npt))
+  ALLOCATE(fgrid00(npt), fgrid01(npt), fgrid10(npt))
+!
+!===============================================================================
+!                      1.0 PPFORM
+!
+  coefs = 1.0d0 ! => f=1, all derivatives = 0!
+!
+  splxy%sp1%nlppform = .TRUE.
+  splxy%sp2%nlppform = .TRUE.
+  CALL gridval(splxy, xpt, ypt, fgrid00, (/0,0/), coefs)
+!
+!   Vector GRIDVAL
+  WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp'
+  ngroup = 1
+  DO WHILE (ngroup.LT.npt/2)
+     nset = npt/ngroup
+     nremain = MODULO(npt, ngroup)
+     IF(nremain.NE.0) nset = nset+1
+     t0 = seconds()
+     i2 = 0
+     DO i=1,nset
+        i1 = i2+1
+        i2 = MIN(i2+ngroup,npt)
+        DO its=1,nits
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid00(i1:i2), (/0,0/))
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid01(i1:i2), (/0,1/))
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid10(i1:i2), (/1,0/))
+        END DO
+     END DO
+     t1 = seconds()-t0
+     tvec = t1/REAL(npt*nits)
+     IF(ngroup.EQ.1) tscal=tvec
+     WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
+     ngroup = 2*ngroup
+  END DO
+  WRITE(*,'(/a,3(1pe12.3))') 'GRIDVAL PPFORM: Max errors', &
+       & MAXVAL(ABS(fgrid00-1.0d0)), MAXVAL(ABS(fgrid01)), MAXVAL(ABS(fgrid10))
+!
+!   Vector GETGRAD
+  WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp'
+  ngroup = 1
+  DO WHILE (ngroup.LT.npt/2)
+     nset = npt/ngroup
+     nremain = MODULO(npt, ngroup)
+     IF(nremain.NE.0) nset = nset+1
+     t0 = seconds()
+     i2 = 0
+     DO i=1,nset
+        i1 = i2+1
+        i2 = MIN(i2+ngroup,npt)
+        DO its=1,nits
+           CALL getgrad(splxy, xpt(i1:i2), ypt(i1:i2), &
+                &   fgrad00(i1:i2), fgrad10(i1:i2), fgrad01(i1:i2))
+        END DO
+     END DO
+     t1 = seconds()-t0
+     tvec = t1/REAL(npt*nits)
+     IF(ngroup.EQ.1) tscal=tvec
+     WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
+     ngroup = 2*ngroup
+  END DO
+  WRITE(*,'(/a,3(1pe12.3))') 'GETGRAD PPFORM: Max errors', &
+       & MAXVAL(ABS(fgrad00-fgrid00)), MAXVAL(ABS(fgrad01-fgrid01)), &
+       & MAXVAL(ABS(fgrad10-fgrid10))
+!===============================================================================
+!                      2.0 Spline expansion
+!
+  coefs = 1.0d0 ! => f=1, all derivatives = 0!
+!
+  splxy%sp1%nlppform = .FALSE.
+  splxy%sp2%nlppform = .FALSE.
+  CALL gridval(splxy, xpt, ypt, fgrid00, (/0,0/), coefs)
+!
+!   Vector GRIDVAL
+  WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp'
+  ngroup = 1
+  DO WHILE (ngroup.LT.npt/2)
+     nset = npt/ngroup
+     nremain = MODULO(npt, ngroup)
+     IF(nremain.NE.0) nset = nset+1
+     t0 = seconds()
+     i2 = 0
+     DO i=1,nset
+        i1 = i2+1
+        i2 = MIN(i2+ngroup,npt)
+        DO its=1,nits
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid00(i1:i2), (/0,0/))
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid01(i1:i2), (/0,1/))
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fgrid10(i1:i2), (/1,0/))
+        END DO
+     END DO
+     t1 = seconds()-t0
+     tvec = t1/REAL(npt*nits)
+     IF(ngroup.EQ.1) tscal=tvec
+     WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
+     ngroup = 2*ngroup
+  END DO
+  WRITE(*,'(/a,3(1pe12.3))') 'GRIDVAL SPLINE: Max errors', &
+       & MAXVAL(ABS(fgrid00-1.0d0)), MAXVAL(ABS(fgrid01)), MAXVAL(ABS(fgrid10))
+!
+!   Vector GETGRAD
+  WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp'
+  ngroup = 1
+  DO WHILE (ngroup.LT.npt/2)
+     nset = npt/ngroup
+     nremain = MODULO(npt, ngroup)
+     IF(nremain.NE.0) nset = nset+1
+     t0 = seconds()
+     i2 = 0
+     DO i=1,nset
+        i1 = i2+1
+        i2 = MIN(i2+ngroup,npt)
+        DO its=1,nits
+           CALL getgrad(splxy, xpt(i1:i2), ypt(i1:i2), &
+                &   fgrad00(i1:i2), fgrad10(i1:i2), fgrad01(i1:i2))
+        END DO
+     END DO
+     t1 = seconds()-t0
+     tvec = t1/REAL(npt*nits)
+     IF(ngroup.EQ.1) tscal=tvec
+     WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
+     ngroup = 2*ngroup
+  END DO
+  WRITE(*,'(/a,3(1pe12.3))') 'GETGRAD SPLINE: Max errors', &
+       & MAXVAL(ABS(fgrad00-fgrid00)), MAXVAL(ABS(fgrad01-fgrid01)), &
+       & MAXVAL(ABS(fgrad10-fgrid10))
+!===============================================================================
+
+!
+!   Clean up
+!
+  CALL destroy_sp(splxy)
+  DEALLOCATE(xgrid, ygrid, coefs)
+  DEALLOCATE(xpt, ypt, fgrid00, fgrid01, fgrid10)
+  DEALLOCATE(fgrad00, fgrad01, fgrad10)
+END PROGRAM main
diff --git a/examples/gridval_perf.f90 b/examples/gridval_perf.f90
new file mode 100644
index 0000000..d098531
--- /dev/null
+++ b/examples/gridval_perf.f90
@@ -0,0 +1,194 @@
+!>
+!> @file gridval_perf.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test and compare performance of using "spline" and
+!   "pp" forms. 2D case
+!
+  USE bsplines
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, ngauss(2), nidbas(2), nits
+  INTEGER :: npt, d1, d2
+  INTEGER :: i, j, its, ngroup=4
+  INTEGER :: i1, i2, nset, nremain
+  DOUBLE PRECISION :: pi, dx, dy
+  DOUBLE PRECISION :: seconds, t0, t1, tscal, tvec
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs
+  DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fpt00(:), fpt01(:), fpt10(:)
+  DOUBLE PRECISION, ALLOCATABLE :: fscal00(:), fscal01(:), fscal10(:)
+  TYPE(spline2d) :: splxy
+!
+  NAMELIST /newrun/ nx, ny, nidbas, npt, nits
+!===============================================================================
+!                    0.0  Prologue
+!
+!   2D grid
+!
+  nx = 8
+  ny = 8
+  nidbas = (/ 3, 3 /)
+  npt = 100000
+  nits =100
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  dx = 1.0d0/REAL(nx)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  dy = 2.0d0*pi/REAL(ny)
+  ygrid = (/ (j*dy,j=0,ny) /)
+!
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
+!
+!   Set up spline
+!
+  ngauss = 4
+  CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
+  d1 = splxy%sp1%dim
+  d2 = splxy%sp2%dim
+  WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
+!===============================================================================
+!                      1.0 PPFORM
+!
+  ALLOCATE(xpt(npt), ypt(npt))
+  CALL RANDOM_NUMBER(xpt)
+  CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
+!
+  ALLOCATE(coefs(d1,d2))
+  ALLOCATE(fscal00(npt), fscal01(npt), fscal10(npt))
+  ALLOCATE(fpt00(npt), fpt01(npt), fpt10(npt))
+!
+  coefs = 1.0d0 ! => f=1, all derivatives = 0!
+!
+  splxy%sp1%nlppform = .TRUE.
+  splxy%sp2%nlppform = .TRUE.
+  CALL gridval(splxy, xpt, ypt, fscal00, (/0,0/), coefs)
+!
+!    Scalar PPFORM
+  t0 = seconds()
+  DO i=1,npt
+     DO its=1,nits
+        CALL gridval(splxy, xpt(i), ypt(i), fscal00(i), (/0,0/))
+        CALL gridval(splxy, xpt(i), ypt(i), fscal01(i), (/0,1/))
+        CALL gridval(splxy, xpt(i), ypt(i), fscal10(i), (/1,0/))
+     END DO
+  END DO
+  t1 = seconds()-t0
+  tscal =  t1/REAL(npt*nits,8)
+  WRITE(*,'(/a,3(1pe12.3))') 'Scalar PPFORM: Max errors', &
+       & MAXVAL(ABS(fscal00-1.0d0)), MAXVAL(ABS(fscal01)), MAXVAL(ABS(fscal10))
+  WRITE(*,'(a,3(1pe12.3))') 'time(s)', tscal
+!
+!   Vector PPFORM
+  WRITE(*,'(/a/a5,2a12)') 'Vector ppform', 'N', 't(s)', 'SpeedUp'
+  ngroup = 1
+  DO WHILE (ngroup.LT.npt/2)
+     nset = npt/ngroup
+     nremain = MODULO(npt, ngroup)
+     IF(nremain.NE.0) nset = nset+1
+     t0 = seconds()
+     i2 = 0
+     DO i=1,nset
+        i1 = i2+1
+        i2 = MIN(i2+ngroup,npt)
+        DO its=1,nits
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt00(i1:i2), (/0,0/))
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt01(i1:i2), (/0,1/))
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt10(i1:i2), (/1,0/))
+        END DO
+     END DO
+     t1 = seconds()-t0
+     tvec = t1/REAL(npt*nits)
+     WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
+     ngroup = 2*ngroup
+  END DO
+  WRITE(*,'(/a,3(1pe12.3))') 'Vector PPFORM: Max errors', &
+       & MAXVAL(ABS(fpt00-fscal00)), MAXVAL(ABS(fpt01-fscal01)), MAXVAL(ABS(fpt10-fscal10))
+!===============================================================================
+!                      2.0 Sline expansion
+!
+  coefs = 1.0d0 ! => f=1, all derivatives = 0!
+!
+  splxy%sp1%nlppform = .FALSE.
+  splxy%sp2%nlppform = .FALSE.
+  CALL gridval(splxy, xpt, ypt, fscal00, (/0,0/), coefs)
+!
+!    Scalar SPLINE
+  t0 = seconds()
+  DO i=1,npt
+     DO its=1,nits
+        CALL gridval(splxy, xpt(i), ypt(i), fscal00(i), (/0,0/))
+        CALL gridval(splxy, xpt(i), ypt(i), fscal01(i), (/0,1/))
+        CALL gridval(splxy, xpt(i), ypt(i), fscal10(i), (/1,0/))
+     END DO
+  END DO
+  t1 = seconds()-t0
+  tscal =  t1/REAL(npt*nits,8)
+  WRITE(*,'(/a,3(1pe12.3))') 'Scalar SPLINE: Max errors', &
+       & MAXVAL(ABS(fscal00-1.0d0)), MAXVAL(ABS(fscal01)), MAXVAL(ABS(fscal10))
+  WRITE(*,'(a,3(1pe12.3))') 'time(s)', tscal
+!
+!   Vector SPLINE
+  WRITE(*,'(/a/a5,2a12)') 'Vector spline', 'N', 't(s)', 'SpeedUp'
+  ngroup = 1
+  DO WHILE (ngroup.LT.npt/2)
+     nset = npt/ngroup
+     nremain = MODULO(npt, ngroup)
+     IF(nremain.NE.0) nset = nset+1
+     t0 = seconds()
+     i2 = 0
+     DO i=1,nset
+        i1 = i2+1
+        i2 = MIN(i2+ngroup,npt)
+        DO its=1,nits
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt00(i1:i2), (/0,0/))
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt01(i1:i2), (/0,1/))
+           CALL gridval(splxy, xpt(i1:i2), ypt(i1:i2), fpt10(i1:i2), (/1,0/))
+        END DO
+     END DO
+     t1 = seconds()-t0
+     tvec = t1/REAL(npt*nits)
+     WRITE(*,'(i5,3(1pe12.3))') ngroup, tvec, tscal/tvec
+     ngroup = 2*ngroup
+  END DO
+  WRITE(*,'(/a,3(1pe12.3))') 'Vector SPLINE: Max errors', &
+       & MAXVAL(ABS(fpt00-fscal00)), MAXVAL(ABS(fpt01-fscal01)), MAXVAL(ABS(fpt10-fscal10))
+!===============================================================================
+
+!
+!   Clean up
+!
+  CALL destroy_sp(splxy)
+  DEALLOCATE(xgrid, ygrid, coefs)
+  DEALLOCATE(xpt, ypt, fpt00, fpt01, fpt10)
+  DEALLOCATE(fscal00, fscal01, fscal10)
+END PROGRAM main
diff --git a/examples/gyro.f90 b/examples/gyro.f90
new file mode 100644
index 0000000..d5a79e8
--- /dev/null
+++ b/examples/gyro.f90
@@ -0,0 +1,179 @@
+!>
+!> @file gyro.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Gyro-average using splines.
+!   F(x)=cos(x) => \bar F(x,rho) = J_0(rho) cos(x)
+!
+  USE bsplines
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER, PARAMETER :: nx=10, nidbas=3, dim=nx+nidbas, npt=100, &
+       &                nrho=21, nnq=5
+  DOUBLE PRECISION :: xgrid(0:nx), fgrid(0:nx), coefs(dim)
+  DOUBLE PRECISION :: xpt(npt), fcalc(npt), fexact(npt)
+  DOUBLE PRECISION :: averf(0:nx), averfexact(0:nx)
+  DOUBLE PRECISION, POINTER :: splines(:,:)
+  TYPE(spline1d) :: spl
+  DOUBLE PRECISION :: pi, twopi, dx, lperiod, dth
+  DOUBLE PRECISION :: drho, rho(nrho), erraver(nrho,nnq)
+  INTEGER :: i, j, nq(nnq)
+  DOUBLE PRECISION :: dbesj0
+!
+  CHARACTER(len=128) :: file='gyro.h5'
+  INTEGER :: fid
+!
+  INTERFACE
+     SUBROUTINE gyro(spl, xgrid, coefs, rho, nq, averf)
+       USE bsplines
+       TYPE(spline1d) :: spl
+       DOUBLE PRECISION, INTENT(in) :: xgrid(0:), coefs(:), rho
+       INTEGER, INTENT(in) :: nq
+       DOUBLE PRECISION, INTENT(out) :: averf(0:)
+     END SUBROUTINE gyro
+  END INTERFACE
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  twopi = 2.0d0*pi
+!
+!   Create hdf5 file
+  CALL creatf(file, fid, 'gyro Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+!
+!   Grid and function values
+  dx = twopi/nx
+  xgrid(0) = 0.0d0
+  DO i=1,nx
+     xgrid(i) = xgrid(0) + i*dx
+  END DO
+  lperiod = xgrid(nx)-xgrid(0)
+  fgrid = func(xgrid)
+!
+!   Spline interpolation
+  CALL set_splcoef(nidbas, xgrid, spl, period=.TRUE.)
+  CALL get_splcoef(spl, fgrid, coefs)
+  WRITE(*,'(a)') 'Spline coefficients'
+  WRITE(*,'(i5,f12.4)') (i-1, coefs(i), i=1,dim)
+!
+!   Error of interpolation
+  CALL RANDOM_NUMBER(xpt)
+  xpt = twopi*xpt
+  CALL gridval(spl, xpt, fcalc, 0, coefs)
+  fexact = func(xpt)
+!!$  WRITE(*,'(a)') 'Interpolated and exact f'
+!!$  WRITE(*,'(3(1pe12.3))') (xpt(i), fcalc(i), fexact(i), i=1,npt)
+  WRITE(*,'(a,1pe12.3)') 'Interpolation error', norm2(fcalc-fexact)
+  CALL putarr(fid, '/X', xpt)
+  CALL putarr(fid, '/FEXACT', fexact, 'Exact values')
+  CALL putarr(fid, '/FCALC', fcalc, 'Interpolated values')
+!
+!   Gyro-averaged of F at grid points xgrid(0:nx-1)
+  drho = 5.0d0/nrho
+  DO i=1,nrho
+     rho(i) = i*drho
+     DO j=1,nnq
+        nq(j) = 2**(j+1)
+        CALL gyro(spl, xgrid, coefs, rho(i), nq(j), averf)
+        averfexact = dbesj0(rho(i))*COS(xgrid)
+        erraver(i,j) = norm2(averfexact-averf)
+     END DO
+  END DO
+!
+  WRITE(*,'(a,f8.3,i5)') 'averaged f at rho, nq =', rho(nrho), nq(nnq)
+  WRITE(*,'(3(1pe12.3))') (xgrid(i),averf(i),averfexact(i), i=0,nx)
+  CALL putarr(fid, '/XGRID', xgrid)
+  CALL putarr(fid, '/AVERF', averf, 'Averaged F')
+  CALL putarr(fid, '/AVERFEXACT', averfexact, 'Averaged F exact')
+!
+  CALL putarr(fid, '/RHO', rho)
+  CALL putarr(fid, '/NQ', nq)
+  CALL putarr(fid, '/ERRAVER', erraver)
+!
+!   Clean up
+  CALL destroy_sp(spl)
+  CALL closef(fid)
+CONTAINS
+  FUNCTION func(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: func(SIZE(x))
+    func = COS(x)
+  END FUNCTION func
+  FUNCTION norm2(x)
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    norm2 = SQRT(DOT_PRODUCT(x,x))
+  END FUNCTION norm2
+END PROGRAM main
+!
+SUBROUTINE gyro(spl, xgrid, coefs, rho, nq, averf)
+!
+!  Gyro-average using spline SPL and NQ point quadratire for
+!  theta-integration.
+!
+  USE bsplines
+  IMPLICIT NONE
+  TYPE(spline1d) :: spl
+  DOUBLE PRECISION, INTENT(in) :: xgrid(0:), coefs(:), rho
+  INTEGER, INTENT(in) :: nq
+  DOUBLE PRECISION, INTENT(out) :: averf(0:)
+!
+  DOUBLE PRECISION :: th(nq), wth(nq), xq(nq)
+  DOUBLE PRECISION, ALLOCATABLE :: avermat(:,:)
+  DOUBLE PRECISION :: pi, twopi, lperiod, dth
+  DOUBLE PRECISION, POINTER :: splines(:,:)
+  INTEGER :: i, j, iq, nx, dim
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  twopi = 2.0d0*pi
+!
+!   Quadrature in theta
+  dth = twopi/nq
+  th(1) = -pi + dth/2.0d0
+  DO iq=2,nq
+     th(iq) = th(iq-1)+dth
+  END DO
+  wth = dth
+!
+!   Gyro-averaging matrix
+  CALL get_dim(spl, dim, nx)
+  lperiod = xgrid(nx)-xgrid(0)
+  ALLOCATE(avermat(0:nx,dim))
+  DO i=0,nx
+     xq = xgrid(i) + rho*COS(th)
+     xq = xgrid(0) + MODULO(xq-xgrid(0), lperiod)
+     CALL allsplines(spl, xq, splines)
+     DO j=1,dim
+        avermat(i,j) = DOT_PRODUCT(wth, splines(:,j))/twopi
+     END DO
+  END DO
+!
+!   Gyro-averaged of F at grid points xgrid(0:nx)
+  averf = MATMUL(avermat, coefs)
+!
+  DEALLOCATE(avermat)
+END SUBROUTINE gyro
diff --git a/examples/ibcmat.f90 b/examples/ibcmat.f90
new file mode 100644
index 0000000..d48ea56
--- /dev/null
+++ b/examples/ibcmat.f90
@@ -0,0 +1,176 @@
+!>
+!> @file ibcmat.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+  USE matrix
+  IMPLICIT NONE
+  TYPE(gbmat), INTENT(inout) :: mat
+  INTEGER, INTENT(in) :: ny
+  INTEGER :: kl, ku, nrank, i, j
+  DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+  INTEGER :: i0, ii
+  INTEGER :: i0_arr(ny), col(ny)
+!===========================================================================
+!              1.0 Prologue
+!
+
+  kl = mat%kl
+  ku = mat%ku
+  nrank = mat%rank
+  ALLOCATE(zsum(nrank), arr(nrank))
+!
+  i0 = nrank - ku
+  WRITE(*,'(a,i6)') 'Estimated i0', i0
+  DO i=1,ny
+     arr = 0.0d0
+     col(i) = nrank-ny+i
+     CALL getcol(mat, nrank-ny+i, arr)
+     DO ii=1,nrank
+        i0_arr(i)=ii
+        IF(arr(ii) .NE. 0.0d0) EXIT
+     END DO
+  END DO
+!!$  WRITE(*,'(a/(10i6))') 'col', col
+!!$  WRITE(*,'(a/(10i6))') 'i0_arr', i0_arr
+!
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+  zsum = 0.0d0
+  DO i=1,ny
+     arr = 0.0d0
+     CALL getrow(mat, i, arr)
+     DO j=1,ny+ku
+        zsum(j) = zsum(j) + arr(j)
+     END DO
+  END DO
+  CALL putrow(mat, ny, zsum)
+!
+!   The horizontal sum on the NY-th column
+!
+  zsum = 0.0d0
+  DO j=1,ny
+     arr = 0.0d0
+     CALL getcol(mat, j, arr)
+     DO i=ny,ny+kl
+        zsum(i) = zsum(i) + arr(i)
+     END DO
+  END DO
+  CALL putcol(mat, ny, zsum)
+!
+!   The away operator
+!
+  DO j = 1,ny-1
+     arr = 0.0d0; arr(j) = 1.0d0
+     CALL putcol(mat, j, arr)     
+  END DO
+!
+  DO i = 1,ny-1
+     arr = 0.0d0; arr(i) = 1.0d0
+     CALL putrow(mat, i, arr)     
+  END DO
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+  DO j = nrank, nrank-ny+1, -1
+     arr = 0.0d0; arr(j) = 1.0d0
+     CALL putcol(mat, j, arr)     
+  END DO
+!
+  DO i = nrank, nrank-ny+1, -1
+     arr = 0.0d0; arr(i) = 1.0d0
+     CALL putrow(mat, i, arr)     
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(zsum, arr)
+!
+END SUBROUTINE ibcmat
+!+++
+SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+  INTEGER, INTENT(in) :: ny
+  INTEGER :: nrank
+  DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+  nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+  zsum = SUM(rhs(1:ny))
+  rhs(ny) = zsum
+  rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+  rhs(nrank-ny+1:nrank) = 0.0d0
+END SUBROUTINE ibcrhs
+!+++
+SUBROUTINE ibcrhs3(rhs, ny)
+!
+!   Apply BC on RHS
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
+  INTEGER, INTENT(in) :: ny
+  INTEGER :: nrank, nz, k
+  DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+  nrank = SIZE(rhs,1)
+  nz = SIZE(rhs,2)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+  DO k=1,nz
+     zsum = SUM(rhs(1:ny,k))
+     rhs(ny,k) = zsum
+     rhs(1:ny-1,k) = 0.0d0
+  END DO
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+  DO k=1,nz
+     rhs(nrank-ny+1:nrank,k) = 0.0d0
+  END DO
+END SUBROUTINE ibcrhs3
diff --git a/examples/mesh.f90 b/examples/mesh.f90
new file mode 100644
index 0000000..aa102a9
--- /dev/null
+++ b/examples/mesh.f90
@@ -0,0 +1,66 @@
+!>
+!> @file mesh.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Equidistant and non-equidistant mesh
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER :: i, nx
+  DOUBLE PRECISION :: coefs(5), dev, dx
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
+!
+  INTERFACE
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+  END INTERFACE
+!
+  NAMELIST /newrun/ nx,  coefs
+!
+  nx = 8     ! Number oh intevals in x
+  coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+                                                 ! see function FDIST in MESHDIST
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0) = 0.0d0
+  xgrid(nx) = 1.0d0
+  CALL meshdist(coefs, xgrid, nx)
+  PRINT*, 'Mesh is equidistant?', is_equid(xgrid, dev)
+  PRINT*, 'dev =', dev
+!
+  dx = 1.0d0/REAL(nx,8)
+  xgrid = (/ (i*dx, i=0,nx) /)
+  PRINT*, 'Mesh is equidistant?', is_equid(xgrid, dev)
+  PRINT*, 'dev =', dev
+!
+  DEALLOCATE(xgrid)
+END PROGRAM main
diff --git a/examples/meshdist.f90 b/examples/meshdist.f90
new file mode 100644
index 0000000..a20c2fd
--- /dev/null
+++ b/examples/meshdist.f90
@@ -0,0 +1,82 @@
+!>
+!> @file meshdist.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct a 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+
diff --git a/examples/moments.f90 b/examples/moments.f90
new file mode 100644
index 0000000..0cb925b
--- /dev/null
+++ b/examples/moments.f90
@@ -0,0 +1,228 @@
+!>
+!> @file moments.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!   Compute moments of f(x), using its Spline representation
+!
+MODULE globals
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+  DOUBLE PRECISION, PARAMETER   :: pi = 3.14159265359d0
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), rhs(:), sol(:)
+  DOUBLE PRECISION, ALLOCATABLE :: finteg(:,:), moms(:)
+  TYPE(spline1d), SAVE :: splx
+  TYPE(gbmat), SAVE    :: mat
+CONTAINS
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly FE matrix mat using spline spl
+!
+    TYPE(spline1d), INTENT(in) :: spl
+    TYPE(gbmat), INTENT(inout) :: mat
+    INTEGER :: nrank, nx, nidbas, ngauss
+    INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+    DOUBLE PRECISION :: contrib
+!
+    INTEGER :: kterms                ! Number of terms in weak form
+    INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+    DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl, nrank, nx, nidbas)
+    ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl, ngauss)
+    ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    DO i=1,nx
+       CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+       DO igauss=1,ngauss
+          CALL basfun(xgauss(igauss), spl, fun, i)
+          CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+          DO iterm=1,kterms
+             DO jt=0,nidbas
+                DO iw=0,nidbas
+                   contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                        &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                   irow=i+iw; jcol=i+jt
+                   CALL updtmat(mat, irow, jcol, contrib)
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(fun)
+    DEALLOCATE(xgauss, wgauss)
+    DEALLOCATE(iderw, idert, coefs)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x
+      INTEGER, INTENT(out) :: idt(:), idw(:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
+!
+! Mass matrix
+!
+      c(1) = 1.d0
+      idt(1) = 0
+      idw(1) = 0
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(spl, rhs)
+!
+!   Assenbly the RHS using spline spl
+!
+    TYPE(spline1d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+    INTEGER :: nrank, nx, nidbas, ngauss
+    INTEGER :: i, igauss
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl, nrank, nx, nidbas)
+!
+    ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl, ngauss)
+    ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,nx
+       CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+       DO igauss=1,ngauss
+          CALL basfun(xgauss(igauss), spl, fun, i)
+          contrib = wgauss(igauss)*rhseq(xgauss(igauss))
+          rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(fun)
+    DEALLOCATE(xgauss, wgauss)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(v)
+      DOUBLE PRECISION, INTENT(in) :: v
+      rhseq = SQRT(1.0d0/(2.0d0*pi)) * EXP(-0.5d0*v**2)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+END MODULE globals
+!
+!================================================================================
+PROGRAM main
+  USE bsplines
+  USE globals
+  IMPLICIT NONE
+  INTEGER          :: nidbas, nx, nmoms, ngauss, rank, kl, ku
+  INTEGER          :: i
+  DOUBLE PRECISION :: a, b, dx
+!
+!   Input
+!
+  WRITE(*,'(a)') 'Enter, nidbas, a, b, nx, nmoms'
+  READ(*,*) nidbas, a, b, nx, nmoms
+!
+!   Equidistant mesh
+!
+  ALLOCATE(xgrid(0:nx))
+  dx = (b-a)/REAL(nx)
+  DO i=0, nx
+     xgrid(i) = a + i*dx
+  END DO
+  WRITE(*,'(a/(8(1pe12.4)))') 'XGRID', xgrid
+!
+!   Set up spline
+!
+  ngauss = nidbas+1
+  CALL set_spline(nidbas, ngauss, xgrid, splx)
+!
+!    Mass matrix
+!
+  CALL get_dim(splx, rank)       ! Rank of the FE Mass matrix
+  kl = nidbas
+  ku = kl
+  CALL init(kl, ku, rank, 1, mat)
+  WRITE(*,'(a,3i6)') 'kl, ku, rank', kl, ku, rank
+  CALL dismat(splx, mat)
+!
+!   RHS
+!
+  ALLOCATE(rhs(rank), sol(rank))
+  CALL disrhs(splx, rhs)
+!
+!   Solve for Spline coefs
+!
+  CALL factor(mat)
+  CALL bsolve(mat, rhs, sol)
+  WRITE(*,'(a/(8(1pe12.4)))') 'SOL', sol
+  WRITE(*,'(a,1pe20.12)') ' Integral of sol using FINTG', fintg(splx, sol)
+!
+!   Moments
+!
+  ALLOCATE(finteg(rank,0:nmoms), moms(0:nmoms))
+  CALL calc_integ(splx, finteg)
+  DO i=0,nmoms
+     moms(i) = DOT_PRODUCT(sol(:), finteg(:,i))
+  END DO
+  WRITE(*,'(a,i3)') 'Moments of orders from 0 to', nmoms
+  WRITE(*,'(8(1pe20.12))') moms
+!
+  DEALLOCATE(finteg, moms)
+  DEALLOCATE(rhs, sol)
+  DEALLOCATE(xgrid)
+  CALL destroy(mat)
+  CALL destroy_sp(splx)
+END PROGRAM main
diff --git a/examples/optim1.f90 b/examples/optim1.f90
new file mode 100644
index 0000000..8db612e
--- /dev/null
+++ b/examples/optim1.f90
@@ -0,0 +1,138 @@
+!>
+!> @file optim1.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test and compare performance of using "spline" and
+!   "pp" forms. 1D case
+!
+  USE bsplines
+!
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas, nrank, npt=1000000
+  INTEGER :: i
+  DOUBLE PRECISION :: dx
+  DOUBLE PRECISION :: seconds, t0, t1
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, coefs
+  DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fpt0(:), fpt1(:), fun(:, :)
+  INTEGER, ALLOCATABLE :: left(:)
+  TYPE(spline1d) :: splx
+!
+  NAMELIST /newrun/ nx, nidbas, npt
+!===============================================================================
+!
+!   1D grid
+!
+  nx = 10
+  nidbas = 3
+  npt = 1000000
+  READ(*,newrun)
+  WRITE(*,newrun)
+
+  ALLOCATE(xgrid(0:nx))
+  dx = 1.0d0/REAL(nx)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+!
+!   Set up spline
+!
+  CALL set_spline(nidbas, 4, xgrid, splx)
+  nrank = splx%dim
+  WRITE(*,'(a, i5)') 'nrank =', nrank
+  WRITE(*,'(a/(10f8.3))') 'knots', splx%knots
+!
+  ALLOCATE(xpt(npt))
+  ALLOCATE(left(npt))
+  ALLOCATE(fun(0:nidbas,0:1))  ! Values and first derivatives of all Splines
+  CALL RANDOM_NUMBER(xpt)
+  CALL locintv(splx, xpt, left)
+!===============================================================================
+!
+!   Check def_basfun_opt
+!
+  CALL basfun_recur(xpt(101), splx, fun, left(101)+1)
+  WRITE(*,'(/a,f20.15,i4/(2f20.15))') 'BASFUN_RECUR at X=', xpt(101), left(101),&
+       & (fun(:,i), i=0,1)
+!
+!!$  CALL def_basfun(xpt(101), splx, fun)
+  CALL basfun(xpt(101), splx, fun, left(101)+1)
+  WRITE(*,'(/a,f20.15/(2f20.15))') 'DEF_BASFUN at X=', xpt(101), &
+       & (fun(:,i), i=0,1)
+!
+!   Performance of basis function computations
+!
+  t0 = seconds()
+  DO i=1,npt
+     CALL basfun_recur(xpt(i), splx, fun, left(i)+1)
+  END DO
+  WRITE(*,'(/a,1pe12.3)') 'BASFUN_RECUR time (s)', (seconds()-t0)/REAL(npt)
+!
+  t0 = seconds()
+  DO i=1,npt
+!!$     CALL def_basfun(xpt(i), splx, fun)
+     CALL basfun(xpt(i), splx, fun, left(i)+1)
+  END DO
+  WRITE(*,'(/a,1pe12.3)') 'DEF_BASFUN time (s)', (seconds()-t0)/REAL(npt)
+!===============================================================================
+!
+!   Check and performance of GRIDVAL using PPFORM and SPLINE expansion
+!
+  ALLOCATE(coefs(nrank))
+  DEALLOCATE(xpt)
+  ALLOCATE(xpt(npt), fpt0(npt), fpt1(npt))
+  CALL RANDOM_NUMBER(xpt)
+!
+  splx%nlppform = .TRUE.
+  coefs = 1.0d0
+!
+  CALL gridval(splx, xpt(1:1), fpt0(1:1), 0, coefs)
+!
+  t0 = seconds()
+  CALL gridval(splx, xpt, fpt1, 1)
+  CALL gridval(splx, xpt, fpt0, 0)
+  t1 = seconds()-t0
+  WRITE(*,'(/a,2(1pe12.3))') 'PPFORM: Max errors', &
+       & MAXVAL(ABS(fpt0-1.0d0)) ,MAXVAL(ABS(fpt1))
+  WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
+!
+  splx%nlppform = .FALSE.
+  coefs = 1.0d0
+  CALL gridval(splx, xpt(1:1), fpt0(1:1), 0, coefs)
+  t0 = seconds()
+  CALL gridval(splx, xpt, fpt1, 1)
+  CALL gridval(splx, xpt, fpt0, 0)
+  t1 = seconds()-t0
+  WRITE(*,'(/a,2(1pe12.3))') 'SPLINES: Max errors', &
+       & MAXVAL(ABS(fpt0-1.0d0)) ,MAXVAL(ABS(fpt1))
+  WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
+!===============================================================================
+!
+!   Clean up
+!
+  CALL destroy_sp(splx)
+  DEALLOCATE(xgrid, coefs)
+  DEALLOCATE(xpt, fpt0, fpt1)
+  DEALLOCATE(fun)
+END PROGRAM main
diff --git a/examples/optim2.f90 b/examples/optim2.f90
new file mode 100644
index 0000000..ffb9773
--- /dev/null
+++ b/examples/optim2.f90
@@ -0,0 +1,119 @@
+!>
+!> @file optim2.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test and compare performance of using "spline" and
+!   "pp" forms. 2D case
+!
+  USE bsplines
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, ngauss(2), nidbas(2)
+  INTEGER :: npt, d1, d2
+  INTEGER :: i, j
+  DOUBLE PRECISION :: pi, dx, dy
+  DOUBLE PRECISION :: seconds, t0, t1
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: coefs
+  DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), fpt00(:), fpt01(:), fpt10(:)
+  TYPE(spline2d) :: splxy
+!
+  NAMELIST /newrun/ nx, ny, nidbas, npt
+!===============================================================================
+!
+!   2D grid
+!
+  nx = 8
+  ny = 8
+  nidbas = (/ 3, 3 /)
+  npt = 1000000
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  dx = 1.0d0/REAL(nx)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  dy = 2.0d0*pi/REAL(ny)
+  ygrid = (/ (j*dy,j=0,ny) /)
+!
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
+!
+!   Set up spline
+!
+  ngauss = 4
+  CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, (/.FALSE., .TRUE./))
+  d1 = splxy%sp1%dim
+  d2 = splxy%sp2%dim
+  WRITE(*,'(a,3i4)') 'd1, d2 =', d1, d2
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
+!===============================================================================
+!
+!   Check and performance of GRIDVAL using PPFORM and SPLINE expansion
+!
+  ALLOCATE(xpt(npt), ypt(npt))
+  CALL RANDOM_NUMBER(xpt)
+  CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
+!
+  ALLOCATE(coefs(d1,d2))
+  ALLOCATE(fpt00(npt), fpt01(npt), fpt10(npt))
+!
+  coefs = 1.0d0
+!
+  splxy%sp1%nlppform = .TRUE.
+  splxy%sp2%nlppform = .TRUE.
+  CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/), coefs)
+!
+  t0 = seconds()
+  CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/))
+  CALL gridval(splxy, xpt, ypt, fpt01, (/0,1/))
+  CALL gridval(splxy, xpt, ypt, fpt10, (/1,0/))
+  t1 = seconds()-t0
+  WRITE(*,'(/a,3(1pe12.3))') 'PPFORM: Max errors', &
+       & MAXVAL(ABS(fpt00-1.0d0)), MAXVAL(ABS(fpt01)), MAXVAL(ABS(fpt10))
+  WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
+!
+  splxy%sp1%nlppform = .FALSE.
+  splxy%sp2%nlppform = .FALSE.
+  CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/), coefs)
+  t0 = seconds()
+  CALL gridval(splxy, xpt, ypt, fpt00, (/0,0/))
+  CALL gridval(splxy, xpt, ypt, fpt01, (/0,1/))
+  CALL gridval(splxy, xpt, ypt, fpt10, (/1,0/))
+  t1 = seconds()-t0
+  WRITE(*,'(/a,3(1pe12.3))') 'SPLINES: Max errors', &
+       & MAXVAL(ABS(fpt00-1.0d0)), MAXVAL(ABS(fpt01)), MAXVAL(ABS(fpt10))
+  WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
+!===============================================================================
+!
+!   Clean up
+!
+  CALL destroy_sp(splxy)
+  DEALLOCATE(xgrid, ygrid, coefs)
+  DEALLOCATE(xpt, ypt, fpt00, fpt01, fpt10)
+END PROGRAM main
diff --git a/examples/optim3.f90 b/examples/optim3.f90
new file mode 100644
index 0000000..9a617a0
--- /dev/null
+++ b/examples/optim3.f90
@@ -0,0 +1,137 @@
+!>
+!> @file optim3.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test and compare performance of using "spline" and
+!   "pp" forms. 2D1D case
+!
+  USE bsplines
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nz, ngauss(3), nidbas(3)
+  INTEGER :: npt, d1, d2, d3
+  INTEGER :: i, j, k
+  DOUBLE PRECISION :: pi, dx, dy, dz
+  DOUBLE PRECISION :: seconds, t0, t1
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid
+  DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: coefs
+  DOUBLE PRECISION, ALLOCATABLE :: xpt(:), ypt(:), zpt(:)
+  DOUBLE PRECISION, ALLOCATABLE :: fpt000(:), fpt100(:), fpt010(:), fpt001(:)
+  TYPE(spline2d1d) :: splxyz
+!
+  NAMELIST /newrun/ nx, ny, nz, nidbas, npt
+!===============================================================================
+!
+!   2D grid
+!
+  nx = 8
+  ny = 8
+  nz = 8
+  nidbas = (/ 3, 3, 3 /)
+  npt = 1000000
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
+  dx = 1.0d0/REAL(nx)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  dy = 2.0d0*pi/REAL(ny)
+  ygrid = (/ (j*dy,j=0,ny) /)
+  dz = 2.0d0*pi/REAL(nz)
+  zgrid = (/ (k*dz,k=0,nz) /)
+!
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
+  WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
+!
+!   Set up spline
+!
+  ngauss = 4
+  CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid, splxyz, &
+       &         (/.FALSE., .TRUE., .TRUE./))
+  d1 = splxyz%sp12%sp1%dim
+  d2 = splxyz%sp12%sp2%dim
+  d3 = splxyz%sp3%dim
+  WRITE(*,'(a,3i4)') 'd1, d2, d3 =', d1, d2, d3
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxyz%sp12%sp1%knots
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxyz%sp12%sp2%knots
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Z', splxyz%sp3%knots
+!===============================================================================
+!
+!   Check and performance of GRIDVAL using PPFORM and SPLINE expansion
+!
+  ALLOCATE(xpt(npt), ypt(npt), zpt(npt))
+  CALL RANDOM_NUMBER(xpt)
+  CALL RANDOM_NUMBER(ypt); ypt = 2.0d0*pi*ypt
+  CALL RANDOM_NUMBER(zpt); zpt = 2.0d0*pi*zpt
+!
+  ALLOCATE(coefs(d1,d2,d3))
+  ALLOCATE(fpt000(npt), fpt100(npt), fpt010(npt), fpt001(npt))
+!
+  coefs = 1.0d0
+!
+  splxyz%sp12%sp1%nlppform = .TRUE.
+  splxyz%sp12%sp2%nlppform = .TRUE.
+  splxyz%sp3%nlppform = .TRUE.
+  CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/), coefs)
+  t0 = seconds()
+  CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/))
+  CALL gridval(splxyz, xpt, ypt, zpt, fpt100, (/1,0,0/))
+  CALL gridval(splxyz, xpt, ypt, zpt, fpt010, (/0,1,0/))
+  CALL gridval(splxyz, xpt, ypt, zpt, fpt001, (/0,0,1/))
+  t1 = seconds()-t0
+  WRITE(*,'(/a,4(1pe12.3))') 'PPFORM: Max errors', &
+       & MAXVAL(ABS(fpt000-1.0d0)), &
+       & MAXVAL(ABS(fpt100)), &
+       & MAXVAL(ABS(fpt010)), &
+       & MAXVAL(ABS(fpt001))
+  WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
+!
+  splxyz%sp12%sp1%nlppform = .FALSE.
+  splxyz%sp12%sp2%nlppform = .FALSE.
+  splxyz%sp3%nlppform = .FALSE.
+  CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/), coefs)
+  t0 = seconds()
+  CALL gridval(splxyz, xpt, ypt, zpt, fpt000, (/0,0,0/))
+  CALL gridval(splxyz, xpt, ypt, zpt, fpt100, (/1,0,0/))
+  CALL gridval(splxyz, xpt, ypt, zpt, fpt010, (/0,1,0/))
+  CALL gridval(splxyz, xpt, ypt, zpt, fpt001, (/0,0,1/))
+  t1 = seconds()-t0
+  WRITE(*,'(/a,4(1pe12.3))') 'SPLINES: Max errors', &
+       & MAXVAL(ABS(fpt000-1.0d0)), &
+       & MAXVAL(ABS(fpt100)), &
+       & MAXVAL(ABS(fpt010)), &
+       & MAXVAL(ABS(fpt001))
+  WRITE(*,'(a,3(1pe12.3))') 'time(s)', t1/REAL(npt)
+!===============================================================================
+!
+!   Clean up
+!
+  CALL destroy_sp(splxyz)
+  DEALLOCATE(xgrid, ygrid, zgrid, coefs)
+  DEALLOCATE(xpt, ypt, fpt000,fpt100, fpt010, fpt001)
+END PROGRAM main
diff --git a/examples/pde1d.f90 b/examples/pde1d.f90
new file mode 100644
index 0000000..86584b8
--- /dev/null
+++ b/examples/pde1d.f90
@@ -0,0 +1,419 @@
+!>
+!> @file pde1d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 1d differential  eqation using splines:
+!
+!    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
+!    exact solution: f(r) = 1 - r^k
+!
+  USE bsplines
+  USE matrix
+  USE futils
+  USE conmat_mod
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas, ngauss, kdiff
+  INTEGER :: i, nrank, kl, ku
+  LOGICAL :: nlppform
+  DOUBLE PRECISION :: coefs(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
+  TYPE(spline1d) :: splx
+  TYPE(gbmat) :: mat
+!
+  CHARACTER(len=128) :: file='pde1d.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, t0, tmat, tfact, tsolv
+!
+  INTERFACE
+     SUBROUTINE dismat(spl, mat)
+       USE bsplines
+       USE matrix
+       TYPE(spline1d), INTENT(in) :: spl
+       TYPE(gbmat), INTENT(inout) :: mat
+     END SUBROUTINE dismat
+     SUBROUTINE disrhs(kdiff, spl, rhs)
+       USE bsplines
+       INTEGER, INTENT(in) :: kdiff
+       TYPE(spline1d), INTENT(in) :: spl
+       DOUBLE PRECISION, INTENT(out) :: rhs(:)
+     END SUBROUTINE disrhs
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+  END INTERFACE
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8     ! Number oh intevals in x
+  nidbas = 3 ! Degree of splines
+  ngauss = 4 ! Number of Gauss points/interval
+  kdiff = 2  ! Exponent of differential problem
+  nlppform = .TRUE. ! Use PPFORM for gridval or not
+  coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+                                                 ! see function FDIST in MESHDIST
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0) = 0.0d0
+  xgrid(nx) = 1.0d0
+  CALL meshdist(coefs, xgrid, nx)
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE1D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+  CALL attach(fid, '/', 'NGAUSS', ngauss)
+  CALL attach(fid, '/', 'KDIFF', kdiff)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
+  CALL get_dim(splx, nrank)       ! Rank of the FE matrix
+  WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
+!
+!   FE matrix assembly
+!
+  kl = nidbas
+  ku = kl
+  CALL init(kl, ku, nrank, 1, mat)
+  WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
+!!$  CALL dismat(splx, mat)
+  CALL conmat(splx, mat, coefeq)
+!
+  ALLOCATE(arr(nrank))
+!!$  WRITE(*,'(/a)') 'Matrice before BC'
+!!$  DO i=1,nrank
+!!$     CALL getrow(mat, i, arr)
+!!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
+!!$  END DO
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(kdiff, splx, rhs)
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
+!
+!   Set BC f(r=1) = 0 on matrix
+!
+  arr(1:nrank-1) = 0.0d0
+  arr(nrank) = 1.0d0
+  CALL putrow(mat, nrank, arr)
+  CALL putcol(mat, nrank, arr)
+  tmat = seconds() - t0
+!!$  WRITE(*,'(/a)') 'Matrice after BC'
+!!$  DO i=1,nrank
+!!$     CALL getrow(mat, i, arr)
+!!$     WRITE(*,'(12f8.3)') arr
+!!$  END DO
+!
+!   Set BC f(r=1) = 0 on RHS
+!
+  rhs(nrank) = 0.0d0
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS after BC', rhs
+  CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  CALL putarr(fid, '/MAT',  mat%val, 'GB matrice with BC')
+  CALL attach(fid, '/MAT', 'KL', mat%kl)
+  CALL attach(fid, '/MAT', 'KU', mat%ku)
+  CALL attach(fid, '/MAT', 'RANK', mat%rank)
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL factor(mat)
+  tfact = seconds() - t0
+
+  t0 = seconds()
+  CALL bsolve(mat, rhs, sol)
+  tsolv = seconds() - t0
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'SOL', sol
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!
+  WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splx, sol), &
+       &                    fintg(splx, sol)-REAL(kdiff,8)/REAL(kdiff+1,8)
+!===========================================================================
+!              4.0 Check the solution and its 1st derivative
+!
+  ALLOCATE(solcal(0:nx,0:2), solana(0:nx,0:2), errsol(0:nx,0:2))
+  DO i =0,nx
+     solana(i,0) = 1.0d0-xgrid(i)**kdiff
+     solana(i,1) = -kdiff*xgrid(i)**(kdiff-1)
+     solana(i,2) = -kdiff*(kdiff-1)*xgrid(i)**(kdiff-2)
+  END DO
+  CALL gridval(splx, xgrid, solcal(:,0), 0, sol) ! Compute PPFORM and grid values
+  CALL gridval(splx, xgrid, solcal(:,1), 1) ! 1st derivative
+  CALL gridval(splx, xgrid, solcal(:,2), 2) ! 2nd derivative
+  errsol = solana - solcal
+!
+  CALL putarr(fid, '/XGRID', xgrid)
+  CALL putarr(fid, '/SOLCAL', solcal)
+  CALL putarr(fid, '/SOLANA', solana)
+  CALL putarr(fid, '/ERR', errsol)
+!
+  CALL creatg(fid, '/spline')
+  CALL attach(fid, '/spline', 'order', splx%order)
+  CALL putarr(fid, '/spline/knots', splx%knots, 'Spline knots')
+  WRITE(*,'(a,3(1pe12.3))') 'Rel. discretization errors (solution and derivatives).', &
+       &    (SQRT( DOT_PRODUCT(errsol(:,i),errsol(:,i)) / &
+       &    DOT_PRODUCT(solana(:,i),solana(:,i)) ), i=0,2)
+!
+  WRITE(*,'(a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(arr)
+  CALL destroy(mat)
+  CALL destroy_sp(splx)
+  CALL closef(fid)
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function defined in FDIST
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
+SUBROUTINE dismat(spl, mat)
+!
+!   Assembly FE matrix mat using spline spl
+!
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+  TYPE(spline1d), INTENT(in) :: spl
+  TYPE(gbmat), INTENT(inout) :: mat
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION :: contrib
+!
+  INTEGER :: kterms                ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+  ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+        DO iterm=1,kterms
+           DO jt=0,nidbas
+              DO iw=0,nidbas
+                 contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                      &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                 irow=i+iw; jcol=i+jt
+                 CALL updtmat(mat, irow, jcol, contrib)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(iderw, idert, coefs)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+!+++
+SUBROUTINE disrhs(kdiff, spl, rhs)
+!
+!   Assenbly the RHS using spline spl
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER, INTENT(in) :: kdiff
+  TYPE(spline1d), INTENT(in) :: spl
+  DOUBLE PRECISION, INTENT(out) :: rhs(:)
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, left
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+!!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
+!
+  ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+!!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  rhs(1:nrank) = 0.0d0
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+!!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        left = i-1
+!!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
+        contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
+        rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+!
+CONTAINS
+  DOUBLE PRECISION FUNCTION rhseq(x,k)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in) :: k
+    rhseq = k*k*x**(k-1)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig.f90 b/examples/pde1d_eig.f90
new file mode 100644
index 0000000..698efea
--- /dev/null
+++ b/examples/pde1d_eig.f90
@@ -0,0 +1,459 @@
+!>
+!> @file pde1d_eig.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 1d differential  eqation using splines:
+!
+!    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
+!    exact solution: f(r) = 1 - r^k
+!
+  USE bsplines
+  USE matrix
+  USE futils
+  USE conmat_mod
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas, ngauss, kdiff
+  INTEGER :: i, nrank, kl, ku
+  LOGICAL :: nlppform
+  DOUBLE PRECISION :: coefs(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
+  TYPE(spline1d) :: splx
+  TYPE(gemat) :: mat
+!!$  TYPE(gbmat) :: mat
+!
+  CHARACTER(len=128) :: file='pde1d.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
+!
+  DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:)
+  INTEGER :: j, info
+!
+  INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, &
+       &                lworkl=maxncv*(maxncv+8), zero=0.0d0
+  DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), &
+       &              d(maxncv,2), resid(maxn), w(maxn), &
+       &              tol=0.0d0, sigma
+  DOUBLE PRECISION, EXTERNAL :: dnrm2
+  INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
+       &     mode1=1, ierr
+  INTEGER :: ido, ipntr(11), iparam(11)
+  
+  CHARACTER(len=1) :: bmat='I'
+  CHARACTER(len=2) :: which='SA'
+  LOGICAL :: rvec, select(maxncv)
+  
+!
+  INTERFACE
+     SUBROUTINE dismat(spl, mat)
+       USE bsplines
+       USE matrix
+       TYPE(spline1d), INTENT(in) :: spl
+       TYPE(gbmat), INTENT(inout) :: mat
+     END SUBROUTINE dismat
+     SUBROUTINE disrhs(kdiff, spl, rhs)
+       USE bsplines
+       INTEGER, INTENT(in) :: kdiff
+       TYPE(spline1d), INTENT(in) :: spl
+       DOUBLE PRECISION, INTENT(out) :: rhs(:)
+     END SUBROUTINE disrhs
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+  END INTERFACE
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8     ! Number oh intevals in x
+  nidbas = 3 ! Degree of splines
+  ngauss = 4 ! Number of Gauss points/interval
+  kdiff = 2  ! Exponent of differential problem
+  nlppform = .TRUE. ! Use PPFORM for gridval or not
+  coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+                                                 ! see function FDIST in MESHDIST
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0) = 0.0d0
+  xgrid(nx) = 1.0d0
+  CALL meshdist(coefs, xgrid, nx)
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE1D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+  CALL attach(fid, '/', 'NGAUSS', ngauss)
+  CALL attach(fid, '/', 'KDIFF', kdiff)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
+  CALL get_dim(splx, nrank)       ! Rank of the FE matrix
+  WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
+!
+!   FE matrix assembly
+!
+  kl = nidbas
+  ku = kl
+!!$  CALL init(kl, ku, nrank, 1, mat)
+  WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
+  CALL init(nrank, 1, mat)
+!!$  CALL dismat(splx, mat)
+  CALL conmat(splx, mat, coefeq)
+!
+  ALLOCATE(arr(nrank))
+!!$  WRITE(*,'(/a)') 'Matrice before BC'
+!!$  DO i=1,nrank
+!!$     CALL getrow(mat, i, arr)
+!!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
+!!$  END DO
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(kdiff, splx, rhs)
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
+!
+!   Set BC f(r=1) = 0 on matrix
+!
+  arr(1:nrank-1) = 0.0d0
+  arr(nrank) = 1.0d0
+  CALL putrow(mat, nrank, arr)
+  CALL putcol(mat, nrank, arr)
+  tmat = seconds() - t0
+!
+!===========================================================================
+!              3.0  Eigevalue problem
+!
+!   Using Lapack dsyev
+!
+  t0 = seconds()
+  ALLOCATE(mata(nrank,nrank))
+  ALLOCATE(eigvals(nrank))
+  ALLOCATE(work(3*nrank))
+  mata=0.0d0
+  DO j=1,nrank
+     mata(j:nrank,j) = mat%val(j:nrank,j)
+  END DO
+  CALL putarr(fid, '/MAT', mata, 'matrix A')
+  CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
+  teig = seconds() - t0
+  PRINT*,'Info from DSYEV', info, arr(1)
+  IF(info.EQ.0) THEN
+     CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A')
+     CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A')
+     WRITE(*,'(a/(10f10.4))') 'eigval', eigvals
+  END IF
+!
+!   Using Arpack
+!
+  ido = 0
+  iparam(1) = ishfts
+  iparam(3) = maxitr
+  iparam(7) = mode1
+!
+  IF(nev.GT.0) THEN
+     t0 = seconds()
+     DO   ! ARPACK reverse communication loop 
+        CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, &
+             &      ncv, v, maxn, iparam, ipntr, workd, workl,&
+             &      lworkl, info)
+        IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
+!!$           WRITE(*,'(a/(10i4))') 'ipntr', ipntr
+           CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
+           CYCLE
+        END IF
+        IF(info .LT. 0) THEN
+           PRINT*, 'Error in _saupd with info =', info
+        ELSE
+           rvec = .TRUE.
+           CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
+                &      bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
+                &      iparam, ipntr, workd, workl, lworkl, ierr )
+           IF( ierr .NE. 0 ) THEN
+              PRINT*,'Error in _seupd with ierr =', ierr
+           ELSE
+              nconv = iparam(5)
+              PRINT*, '--- eigenvalues and error ---'
+              DO j=1,nconv ! Residual norms
+!!$              CALL av(nrank,v(1,j), w)   ! d(1,j) is the j^th eigenvalue
+!!$              CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
+!!$              d(j,2) = dnrm2(nrank, w, 1)
+!!$              d(j,2) = d(j,2)/abs(d(j,1))
+                 WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1)
+!!$              CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
+!!$              CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
+              END DO
+              EXIT
+           END IF
+        END IF
+     END DO   ! End of ARPACK reverse communication loop
+     tarpack = seconds()-t0
+  END IF
+!===========================================================================
+!              9.0  Epilogue
+!
+  WRITE(*,'(a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
+  WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(arr)
+  CALL destroy(mat)
+  CALL destroy_sp(splx)
+  CALL closef(fid)
+CONTAINS
+  SUBROUTINE av(n,v,w)
+!
+! Matrix vector product: w <- Av
+    INTEGER, INTENT(in) :: n
+    DOUBLE PRECISION, INTENT(in) :: v(*)
+    DOUBLE PRECISION, INTENT(out) :: w(*)
+    w(1:n) = vmx(mat,v(1:n))
+  END SUBROUTINE av
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function defined in FDIST
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
+SUBROUTINE dismat(spl, mat)
+!
+!   Assembly FE matrix mat using spline spl
+!
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+  TYPE(spline1d), INTENT(in) :: spl
+  TYPE(gbmat), INTENT(inout) :: mat
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION :: contrib
+!
+  INTEGER :: kterms                ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+  ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+        DO iterm=1,kterms
+           DO jt=0,nidbas
+              DO iw=0,nidbas
+                 contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                      &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                 irow=i+iw; jcol=i+jt
+                 CALL updtmat(mat, irow, jcol, contrib)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(iderw, idert, coefs)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+!+++
+SUBROUTINE disrhs(kdiff, spl, rhs)
+!
+!   Assenbly the RHS using spline spl
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER, INTENT(in) :: kdiff
+  TYPE(spline1d), INTENT(in) :: spl
+  DOUBLE PRECISION, INTENT(out) :: rhs(:)
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, left
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+!!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
+!
+  ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+!!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  rhs(1:nrank) = 0.0d0
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+!!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        left = i-1
+!!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
+        contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
+        rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+!
+CONTAINS
+  DOUBLE PRECISION FUNCTION rhseq(x,k)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in) :: k
+    rhseq = k*k*x**(k-1)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_csr.f90 b/examples/pde1d_eig_csr.f90
new file mode 100644
index 0000000..02996ca
--- /dev/null
+++ b/examples/pde1d_eig_csr.f90
@@ -0,0 +1,469 @@
+!>
+!> @file pde1d_eig_csr.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 1d differential  eqation using splines:
+!
+!    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
+!    exact solution: f(r) = 1 - r^k
+!
+  USE bsplines
+  USE csr
+  USE futils
+  USE conmat_mod
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas, ngauss, kdiff
+  INTEGER :: i, nrank, kl, ku
+  LOGICAL :: nlppform
+  DOUBLE PRECISION :: coefs(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
+  TYPE(spline1d) :: splx
+  TYPE(csr_mat) :: mat
+!!$  TYPE(gemat) :: mat
+!!$  TYPE(gbmat) :: mat
+!
+  CHARACTER(len=128) :: file='pde1d.h5'
+  INTEGER :: fid, ffid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
+!
+  DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:)
+  INTEGER :: j, info
+!
+  INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, &
+       &                lworkl=maxncv*(maxncv+8), zero=0.0d0
+  DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), &
+       &              d(maxncv,2), resid(maxn), w(maxn), &
+       &              tol=0.0d0, sigma
+  DOUBLE PRECISION, EXTERNAL :: dnrm2
+  INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
+       &     mode1=1, ierr
+  INTEGER :: ido, ipntr(11), iparam(11)
+  
+  CHARACTER(len=1) :: bmat='I'
+  CHARACTER(len=2) :: which='SA'
+  LOGICAL :: rvec, select(maxncv)
+  
+!
+  INTERFACE
+     SUBROUTINE dismat(spl, mat)
+       USE bsplines
+       USE matrix
+       TYPE(spline1d), INTENT(in) :: spl
+       TYPE(gbmat), INTENT(inout) :: mat
+     END SUBROUTINE dismat
+     SUBROUTINE disrhs(kdiff, spl, rhs)
+       USE bsplines
+       INTEGER, INTENT(in) :: kdiff
+       TYPE(spline1d), INTENT(in) :: spl
+       DOUBLE PRECISION, INTENT(out) :: rhs(:)
+     END SUBROUTINE disrhs
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+  END INTERFACE
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8     ! Number oh intevals in x
+  nidbas = 3 ! Degree of splines
+  ngauss = 4 ! Number of Gauss points/interval
+  kdiff = 2  ! Exponent of differential problem
+  nlppform = .TRUE. ! Use PPFORM for gridval or not
+  coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+                                                 ! see function FDIST in MESHDIST
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0) = 0.0d0
+  xgrid(nx) = 1.0d0
+  CALL meshdist(coefs, xgrid, nx)
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE1D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+  CALL attach(fid, '/', 'NGAUSS', ngauss)
+  CALL attach(fid, '/', 'KDIFF', kdiff)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
+  CALL get_dim(splx, nrank)       ! Rank of the FE matrix
+  WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
+!
+!   FE matrix assembly
+!
+  kl = nidbas
+  ku = kl
+  WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
+  CALL init(nrank, 1, mat)
+!!$  CALL dismat(splx, mat)
+  CALL conmat(splx, mat, coefeq)
+  CALL to_mat(mat)
+  CALL creatf('pde1d_eig.h5', ffid, 'PDE1D Result File')
+  PRINT*, 'rank', mat%rank
+  PRINT*, 'nnz', mat%nnz
+  PRINT*, 'irow', mat%irow
+  PRINT*, 'cols', mat%cols
+  CALL putmat(ffid,'/MAT',mat,'FE matrix')
+  CALL closef(ffid)
+!
+!
+  ALLOCATE(arr(nrank))
+!!$  WRITE(*,'(/a)') 'Matrice before BC'
+!!$  DO i=1,nrank
+!!$     CALL getrow(mat, i, arr)
+!!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
+!!$  END DO
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(kdiff, splx, rhs)
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
+!!$!
+!!$!   Set BC f(r=1) = 0 on matrix
+!!$!
+!!$  arr(1:nrank-1) = 0.0d0
+!!$  arr(nrank) = 1.0d0
+!!$  CALL putrow(mat, nrank, arr)
+!!$  CALL putcol(mat, nrank, arr)
+  CALL putmat(fid,'/MATA', mat, 'FE matrix')
+  tmat = seconds() - t0
+!
+!===========================================================================
+!              3.0  Eigevalue problem
+!
+!   Using Lapack dsyev
+!
+  t0 = seconds()
+  ALLOCATE(mata(nrank,nrank))
+  ALLOCATE(eigvals(nrank))
+  ALLOCATE(work(3*nrank))
+  mata=0.0d0
+  DO j=1,nrank
+     CALL getcol(mat, j, mata(:,j))  ! mata is a dense matrix
+  END DO
+  CALL putarr(fid, '/MAT', mata, 'matrix A')
+  CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
+  teig = seconds() - t0
+  PRINT*,'Info from DSYEV', info, arr(1)
+  IF(info.EQ.0) THEN
+     CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A')
+     CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A')
+     WRITE(*,'(a/(10f10.4))') 'eigval', eigvals
+  END IF
+!
+!   Using Arpack
+!
+  ido = 0
+  iparam(1) = ishfts
+  iparam(3) = maxitr
+  iparam(7) = mode1
+!
+  IF(nev.GT.0) THEN
+     t0 = seconds()
+     DO   ! ARPACK reverse communication loop 
+        CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, &
+             &      ncv, v, maxn, iparam, ipntr, workd, workl,&
+             &      lworkl, info)
+        IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
+!!$           WRITE(*,'(a/(10i4))') 'ipntr', ipntr
+           CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
+           CYCLE
+        END IF
+        IF(info .LT. 0) THEN
+           PRINT*, 'Error in _saupd with info =', info
+        ELSE
+           rvec = .TRUE.
+           CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
+                &      bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
+                &      iparam, ipntr, workd, workl, lworkl, ierr )
+           IF( ierr .NE. 0 ) THEN
+              PRINT*,'Error in _seupd with ierr =', ierr
+           ELSE
+              nconv = iparam(5)
+              PRINT*, '--- eigenvalues and error ---'
+              DO j=1,nconv ! Residual norms
+!!$              CALL av(nrank,v(1,j), w)   ! d(1,j) is the j^th eigenvalue
+!!$              CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
+!!$              d(j,2) = dnrm2(nrank, w, 1)
+!!$              d(j,2) = d(j,2)/abs(d(j,1))
+                 WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1)
+!!$              CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
+!!$              CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
+              END DO
+              EXIT
+           END IF
+        END IF
+     END DO   ! End of ARPACK reverse communication loop
+     tarpack = seconds()-t0
+  END IF
+!===========================================================================
+!              9.0  Epilogue
+!
+  WRITE(*,'(a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
+  WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(arr)
+  CALL destroy(mat)
+  CALL destroy_sp(splx)
+  CALL closef(fid)
+CONTAINS
+  SUBROUTINE av(n,v,w)
+!
+! Matrix vector product: w <- Av
+    INTEGER, INTENT(in) :: n
+    DOUBLE PRECISION, INTENT(in) :: v(*)
+    DOUBLE PRECISION, INTENT(out) :: w(*)
+    w(1:n) = vmx(mat,v(1:n))
+  END SUBROUTINE av
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function defined in FDIST
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
+SUBROUTINE dismat(spl, mat)
+!
+!   Assembly FE matrix mat using spline spl
+!
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+  TYPE(spline1d), INTENT(in) :: spl
+  TYPE(gbmat), INTENT(inout) :: mat
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION :: contrib
+!
+  INTEGER :: kterms                ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+  ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+        DO iterm=1,kterms
+           DO jt=0,nidbas
+              DO iw=0,nidbas
+                 contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                      &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                 irow=i+iw; jcol=i+jt
+                 CALL updtmat(mat, irow, jcol, contrib)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(iderw, idert, coefs)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+!+++
+SUBROUTINE disrhs(kdiff, spl, rhs)
+!
+!   Assenbly the RHS using spline spl
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER, INTENT(in) :: kdiff
+  TYPE(spline1d), INTENT(in) :: spl
+  DOUBLE PRECISION, INTENT(out) :: rhs(:)
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, left
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+!!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
+!
+  ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+!!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  rhs(1:nrank) = 0.0d0
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+!!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        left = i-1
+!!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
+        contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
+        rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+!
+CONTAINS
+  DOUBLE PRECISION FUNCTION rhseq(x,k)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in) :: k
+    rhseq = k*k*x**(k-1)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_gb.f90 b/examples/pde1d_eig_gb.f90
new file mode 100644
index 0000000..c80f22d
--- /dev/null
+++ b/examples/pde1d_eig_gb.f90
@@ -0,0 +1,460 @@
+!>
+!> @file pde1d_eig_gb.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 1d differential  eqation using splines:
+!
+!    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
+!    exact solution: f(r) = 1 - r^k
+!
+  USE bsplines
+  USE matrix
+  USE futils
+  USE conmat_mod
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas, ngauss, kdiff
+  INTEGER :: i, nrank, kl, ku
+  LOGICAL :: nlppform
+  DOUBLE PRECISION :: coefs(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
+  TYPE(spline1d) :: splx
+  TYPE(gemat) :: mat
+!!$  TYPE(gbmat) :: mat
+!
+  CHARACTER(len=128) :: file='pde1d.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
+!
+  DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvals(:), work(:)
+  INTEGER :: j, info
+!
+  INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25, &
+       &                lworkl=maxncv*(maxncv+8), zero=0.0d0
+  DOUBLE PRECISION :: v(maxn,maxncv), workl(lworkl), workd(3*maxn), &
+       &              d(maxncv,2), resid(maxn), w(maxn), &
+       &              tol=0.0d0, sigma
+  DOUBLE PRECISION, EXTERNAL :: dnrm2
+  INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
+       &     mode1=1, ierr
+  INTEGER :: ido, ipntr(11), iparam(11)
+  
+  CHARACTER(len=1) :: bmat='I'
+  CHARACTER(len=2) :: which='SA'
+  LOGICAL :: rvec, select(maxncv)
+  
+!
+  INTERFACE
+     SUBROUTINE dismat(spl, mat)
+       USE bsplines
+       USE matrix
+       TYPE(spline1d), INTENT(in) :: spl
+       TYPE(gbmat), INTENT(inout) :: mat
+     END SUBROUTINE dismat
+     SUBROUTINE disrhs(kdiff, spl, rhs)
+       USE bsplines
+       INTEGER, INTENT(in) :: kdiff
+       TYPE(spline1d), INTENT(in) :: spl
+       DOUBLE PRECISION, INTENT(out) :: rhs(:)
+     END SUBROUTINE disrhs
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+  END INTERFACE
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8     ! Number oh intevals in x
+  nidbas = 3 ! Degree of splines
+  ngauss = 4 ! Number of Gauss points/interval
+  kdiff = 2  ! Exponent of differential problem
+  nlppform = .TRUE. ! Use PPFORM for gridval or not
+  coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+                                                 ! see function FDIST in MESHDIST
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0) = 0.0d0
+  xgrid(nx) = 1.0d0
+  CALL meshdist(coefs, xgrid, nx)
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE1D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+  CALL attach(fid, '/', 'NGAUSS', ngauss)
+  CALL attach(fid, '/', 'KDIFF', kdiff)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
+  CALL get_dim(splx, nrank)       ! Rank of the FE matrix
+  WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
+!
+!   FE matrix assembly
+!
+  kl = nidbas
+  ku = kl
+!!$  CALL init(kl, ku, nrank, 1, mat)
+!!$  WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
+  CALL init(nrank, 1, mat)
+!!$  CALL dismat(splx, mat)
+  CALL conmat(splx, mat, coefeq)
+!
+  ALLOCATE(arr(nrank))
+!!$  WRITE(*,'(/a)') 'Matrice before BC'
+!!$  DO i=1,nrank
+!!$     CALL getrow(mat, i, arr)
+!!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
+!!$  END DO
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(kdiff, splx, rhs)
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
+!
+!   Set BC f(r=1) = 0 on matrix
+!
+  arr(1:nrank-1) = 0.0d0
+  arr(nrank) = 1.0d0
+  CALL putrow(mat, nrank, arr)
+  CALL putcol(mat, nrank, arr)
+  tmat = seconds() - t0
+!
+!===========================================================================
+!              3.0  Eigevalue problem
+!
+!   Using Lapack dsyev
+!
+  t0 = seconds()
+  ALLOCATE(mata(nrank,nrank))
+  ALLOCATE(eigvals(nrank))
+  ALLOCATE(work(3*nrank))
+  mata=0.0d0
+  DO j=1,nrank
+     mata(j:nrank,j) = mat%val(j:nrank,j)
+  END DO
+     CALL putarr(fid, '/MAT', mata, 'matrix A')
+  CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
+  teig = seconds() - t0
+  PRINT*,'Info from DSYEV', info, arr(1)
+  IF(info.EQ.0) THEN
+     CALL putarr(fid, '/EIGVS', eigvals, 'eigenvalues of A')
+     CALL putarr(fid, '/EIGVECS', mata, 'eigenvectors of A')
+     WRITE(*,'(a/(10f10.4))') 'eigval', eigvals
+  END IF
+!
+!   Using Arpack
+!
+  ido = 0
+  iparam(1) = ishfts
+  iparam(3) = maxitr
+  iparam(7) = mode1
+!
+  IF(nev.GT.0) THEN
+     t0 = seconds()
+     DO   ! ARPACK reverse communication loop 
+        CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, &
+             &      ncv, v, maxn, iparam, ipntr, workd, workl,&
+             &      lworkl, info)
+        IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
+           PRINT*, 'Error in _saupd with info =', info
+           WRITE(*,'(a/(10i4))') 'ipntr', ipntr
+           CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
+           CYCLE
+        END IF
+        IF(info .LT. 0) THEN
+           PRINT*, 'Error in _saupd with info =', info
+        ELSE
+           rvec = .TRUE.
+           CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
+                &      bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
+                &      iparam, ipntr, workd, workl, lworkl, ierr )
+           IF( ierr .NE. 0 ) THEN
+              PRINT*,'Error in _seupd with ierr =', ierr
+           ELSE
+              nconv = iparam(5)
+              PRINT*, '--- eigenvalues and error ---'
+              DO j=1,nconv ! Residual norms
+!!$              CALL av(nrank,v(1,j), w)   ! d(1,j) is the j^th eigenvalue
+!!$              CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
+!!$              d(j,2) = dnrm2(nrank, w, 1)
+!!$              d(j,2) = d(j,2)/abs(d(j,1))
+              WRITE(*,'(i3,2(1pe12.4))') j, d(j,1), eigvals(j)-d(j,1)
+!!$              CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
+!!$              CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
+              END DO
+              EXIT
+           END IF
+        END IF
+     END DO   ! End of ARPACK reverse communication loop
+     tarpack = seconds()-t0
+  END IF
+!===========================================================================
+!              9.0  Epilogue
+!
+  WRITE(*,'(a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
+  WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(arr)
+  CALL destroy(mat)
+  CALL destroy_sp(splx)
+  CALL closef(fid)
+CONTAINS
+  SUBROUTINE av(n,v,w)
+!
+! Matrix vector product: w <- Av
+    INTEGER, INTENT(in) :: n
+    DOUBLE PRECISION, INTENT(in) :: v(*)
+    DOUBLE PRECISION, INTENT(out) :: w(*)
+    w(1:n) = vmx(mat,v(1:n))
+  END SUBROUTINE av
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function defined in FDIST
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
+SUBROUTINE dismat(spl, mat)
+!
+!   Assembly FE matrix mat using spline spl
+!
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+  TYPE(spline1d), INTENT(in) :: spl
+  TYPE(gbmat), INTENT(inout) :: mat
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION :: contrib
+!
+  INTEGER :: kterms                ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+  ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+        DO iterm=1,kterms
+           DO jt=0,nidbas
+              DO iw=0,nidbas
+                 contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                      &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                 irow=i+iw; jcol=i+jt
+                 CALL updtmat(mat, irow, jcol, contrib)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(iderw, idert, coefs)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+!+++
+SUBROUTINE disrhs(kdiff, spl, rhs)
+!
+!   Assenbly the RHS using spline spl
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER, INTENT(in) :: kdiff
+  TYPE(spline1d), INTENT(in) :: spl
+  DOUBLE PRECISION, INTENT(out) :: rhs(:)
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, left
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+!!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
+!
+  ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+!!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  rhs(1:nrank) = 0.0d0
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+!!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        left = i-1
+!!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
+        contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
+        rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+!
+CONTAINS
+  DOUBLE PRECISION FUNCTION rhseq(x,k)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in) :: k
+    rhseq = k*k*x**(k-1)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_ge.f90 b/examples/pde1d_eig_ge.f90
new file mode 100644
index 0000000..713a66e
--- /dev/null
+++ b/examples/pde1d_eig_ge.f90
@@ -0,0 +1,474 @@
+!>
+!> @file pde1d_eig_ge.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 1d differential  eqation using splines:
+!
+!    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
+!    exact solution: f(r) = 1 - r^k
+!
+  USE bsplines
+  USE matrix
+  USE futils
+  USE conmat_mod
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas, ngauss, kdiff
+  INTEGER :: i, nrank, kl, ku
+  LOGICAL :: nlppform
+  DOUBLE PRECISION :: coefs(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
+  TYPE(spline1d) :: splx
+  TYPE(gemat) :: mat
+!
+  CHARACTER(len=128) :: file='pde1d.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
+!
+  DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), eigvecsr(:,:), eigvecsl(:,:), &
+       &                           wr(:), wi(:), work(:)
+  INTEGER :: j, info
+!
+  INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=maxn
+  DOUBLE PRECISION :: v(maxn,maxncv),workd(3*maxn), workev(3*maxncv), &
+       &              d(maxncv,2), resid(maxn), w(maxncv,maxn), &
+       &              zero=0.0d0, tol=0.0d0, sigmar, sigmai
+  DOUBLE PRECISION, ALLOCATABLE :: workl(:)
+  DOUBLE PRECISION, EXTERNAL :: dnrm2
+  INTEGER :: nev=10, ncv=30, ishfts=1, maxitr=300, nconv, &
+       &     mode1=1, ierr, lworkl
+  INTEGER :: ido, ipntr(11), iparam(11)
+  
+  CHARACTER(len=1) :: bmat='I'
+  CHARACTER(len=2) :: which='SA'
+  LOGICAL :: rvec, select(maxncv)
+  
+!
+  INTERFACE
+     SUBROUTINE dismat(spl, mat)
+       USE bsplines
+       USE matrix
+       TYPE(spline1d), INTENT(in) :: spl
+       TYPE(gbmat), INTENT(inout) :: mat
+     END SUBROUTINE dismat
+     SUBROUTINE disrhs(kdiff, spl, rhs)
+       USE bsplines
+       INTEGER, INTENT(in) :: kdiff
+       TYPE(spline1d), INTENT(in) :: spl
+       DOUBLE PRECISION, INTENT(out) :: rhs(:)
+     END SUBROUTINE disrhs
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+  END INTERFACE
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, &
+       &            nev, ncv, which
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8     ! Number oh intevals in x
+  nidbas = 3 ! Degree of splines
+  ngauss = 4 ! Number of Gauss points/interval
+  kdiff = 2  ! Exponent of differential problem
+  nlppform = .TRUE. ! Use PPFORM for gridval or not
+  coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+                                                 ! see function FDIST in MESHDIST
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0) = 0.0d0
+  xgrid(nx) = 1.0d0
+  CALL meshdist(coefs, xgrid, nx)
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE1D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+  CALL attach(fid, '/', 'NGAUSS', ngauss)
+  CALL attach(fid, '/', 'KDIFF', kdiff)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
+  CALL get_dim(splx, nrank)       ! Rank of the FE matrix
+  WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
+!
+!   FE matrix assembly
+!
+  kl = nidbas
+  ku = kl
+!!$  CALL init(kl, ku, nrank, 1, mat)
+  WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
+  CALL init(nrank, 1, mat)
+!!$  CALL dismat(splx, mat)
+  CALL conmat(splx, mat, coefeq)
+!
+  ALLOCATE(arr(nrank))
+!!$  WRITE(*,'(/a)') 'Matrice before BC'
+!!$  DO i=1,nrank
+!!$     CALL getrow(mat, i, arr)
+!!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
+!!$  END DO
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(kdiff, splx, rhs)
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
+!
+!   Set BC f(r=1) = 0 on matrix
+!
+  arr(1:nrank-1) = 0.0d0
+  arr(nrank) = 1.0d0
+  CALL putrow(mat, nrank, arr)
+  CALL putcol(mat, nrank, arr)
+  tmat = seconds() - t0
+!
+!===========================================================================
+!              3.0  Eigevalue problem
+!
+!   Using Lapack dgeev
+!
+  t0 = seconds()
+  ALLOCATE(mata(nrank,nrank))
+  ALLOCATE(eigvecsr(nrank,nrank))
+  ALLOCATE(eigvecsl(nrank,nrank))
+  ALLOCATE(work(4*nrank))
+  ALLOCATE(wr(nrank), wi(nrank))
+  mata(:,:) = mat%val(:,:)
+  CALL putarr(fid, '/MAT', mata, 'matrix A')
+!!$  CALL dsyev('V', 'L', nrank, mata, nrank, eigvals, work, SIZE(work), info)
+  CALL dgeev('N', 'V', nrank, mata, nrank, wr, wi, eigvecsl, SIZE(eigvecsl,1), &
+       &     eigvecsr, SIZE(eigvecsr,1), work, SIZE(work), info)
+  teig = seconds() - t0
+  PRINT*,'Info from DGEEV', info, arr(1)
+  IF(info.EQ.0) THEN
+     CALL putarr(fid, '/REIGVS', wr, 'Real of eigenvalues of A')
+     CALL putarr(fid, '/IEIGVS', wi, 'Imag of eigenvalues of A')
+     CALL putarr(fid, '/EIGVECL', eigvecsl, 'left eigenvalues of A')
+     CALL putarr(fid, '/EIGVECR', eigvecsr, 'right eigenvalues of A')
+     WRITE(*,'(a/(10f10.4))') 'Real eigval', wr
+     WRITE(*,'(a/(10f10.4))') 'Imag eigval', wi
+  END IF
+!
+!   Using Arpack
+!
+  ido = 0
+  iparam(1) = ishfts
+  iparam(3) = maxitr
+  iparam(7) = mode1
+!
+ lworkl  = 3*ncv**2+6*ncv
+ ALLOCATE(workl(lworkl))
+  
+!
+  t0 = seconds()
+  DO   ! ARPACK reverse communication loop 
+     CALL dnaupd(ido, bmat, nrank, which, nev, tol, resid, &
+          &      ncv, v, maxn, iparam, ipntr, workd, workl,&
+          &      lworkl, info)
+     IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
+        CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
+        CYCLE
+     END IF
+     PRINT*, 'INFO =', info
+     IF(info .LT. 0) THEN
+        PRINT*, 'Error in dnaupd with info =', info
+     ELSE
+        rvec = .TRUE.
+        CALL dneupd(rvec, 'A', select, d, d(1,2), v, size(v,1), &
+             &      sigmar, sigmai, workev, bmat, nrank, which, nev, tol, &
+             &      resid, ncv, v, size(v,1), iparam, ipntr, workd, workl, &
+             &      lworkl, ierr )
+        IF( ierr .NE. 0 ) THEN
+           PRINT*,'Error in dneupd with ierr =', ierr
+        ELSE
+           nconv = iparam(5)
+           PRINT*, '--- Real eigenvalues and comprison with Lapack results ---'
+! eiegvalues and diff with Lapack results
+           DO j=1,nconv
+              WRITE(*,'(i3,3(1pe12.4))') j, d(j,1), wr(j), wr(j)-d(j,1)
+           END DO
+           PRINT*, '--- Imag eigenvalues and comprison with Lapack results ---'
+           DO j=1,nconv
+              WRITE(*,'(i3,3(1pe12.4))') j, d(j,2), wi(j), wi(j)-d(j,2)
+           END DO
+!!$              CALL av(nrank,v(1,j), w)   ! d(1,j) is the j^th eigenvalue
+!!$              CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
+!!$              d(j,2) = dnrm2(nrank, w, 1)
+!!$              d(j,2) = d(j,2)/abs(d(j,1))
+           CALL putarr(fid, '/EIGVS', d(1:nconv,:), 'ARPACK eigenvalues of A')
+!!$              CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
+!!$           END DO
+        END IF
+        EXIT
+     END IF
+  END DO   ! End of ARPACK reverse communication loop
+  tarpack = seconds()-t0
+  
+!===========================================================================
+!              9.0  Epilogue
+!
+  WRITE(*,'(a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
+  WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(arr)
+  CALL destroy(mat)
+  CALL destroy_sp(splx)
+  CALL closef(fid)
+CONTAINS
+  SUBROUTINE av(n,v,w)
+!
+! Matrix vector product: w <- Av
+    INTEGER, INTENT(in) :: n
+    DOUBLE PRECISION, INTENT(in) :: v(*)
+    DOUBLE PRECISION, INTENT(out) :: w(*)
+    w(1:n) = vmx(mat,v(1:n))
+  END SUBROUTINE av
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function defined in FDIST
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
+SUBROUTINE dismat(spl, mat)
+!
+!   Assembly FE matrix mat using spline spl
+!
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+  TYPE(spline1d), INTENT(in) :: spl
+  TYPE(gbmat), INTENT(inout) :: mat
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION :: contrib
+!
+  INTEGER :: kterms                ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+  ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+        DO iterm=1,kterms
+           DO jt=0,nidbas
+              DO iw=0,nidbas
+                 contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                      &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                 irow=i+iw; jcol=i+jt
+                 CALL updtmat(mat, irow, jcol, contrib)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(iderw, idert, coefs)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+!+++
+SUBROUTINE disrhs(kdiff, spl, rhs)
+!
+!   Assenbly the RHS using spline spl
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER, INTENT(in) :: kdiff
+  TYPE(spline1d), INTENT(in) :: spl
+  DOUBLE PRECISION, INTENT(out) :: rhs(:)
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, left
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+!!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
+!
+  ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+!!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  rhs(1:nrank) = 0.0d0
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+!!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        left = i-1
+!!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
+        contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
+        rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+!
+CONTAINS
+  DOUBLE PRECISION FUNCTION rhseq(x,k)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in) :: k
+    rhseq = k*k*x**(k-1)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_zcsr.f90 b/examples/pde1d_eig_zcsr.f90
new file mode 100644
index 0000000..d467456
--- /dev/null
+++ b/examples/pde1d_eig_zcsr.f90
@@ -0,0 +1,481 @@
+!>
+!> @file pde1d_eig_zcsr.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 1d differential  eqation using splines:
+!
+!    -d/dr[r d/dr] f = k^2 r^(k-1), with f(r=1) = 0
+!    exact solution: f(r) = 1 - r^k
+!
+  USE bsplines
+  USE csr
+  USE futils
+  USE f95_precision, ONLY: WP => DP
+  USE lapack95, ONLY: geev
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas, ngauss, kdiff
+  INTEGER :: i, nrank, kl, ku
+  LOGICAL :: nlppform
+  DOUBLE PRECISION :: coefs(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, rhs, sol
+  TYPE(spline1d) :: splx
+  TYPE(zcsr_mat) :: mat
+!
+  CHARACTER(len=128) :: file='pde1d.h5'
+  INTEGER :: fid, ffid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION :: seconds, t0, tmat, teig, tarpack
+!
+!   Lapack95 GEEV arguments
+  COMPLEX(WP), ALLOCATABLE :: mata(:,:), w(:)
+  REAL(WP), ALLOCATABLE    :: WR(:), WI(:)
+  INTEGER :: j, info
+!
+  INTEGER, PARAMETER :: maxn=256, maxnev=maxn, maxncv=25
+  INTEGER :: lworkl, zero=0.0d0
+  DOUBLE PRECISION :: d(maxncv,2), tol=0.0d0, rwork(maxncv)
+  DOUBLE COMPLEX :: v(maxn,maxncv), resid(maxn), sigma
+  DOUBLE COMPLEX :: workd(3*maxncv), lwork(2*maxn)
+  DOUBLE COMPLEX, ALLOCATABLE :: workl(:), vl(:,:), vr(:,:)
+  DOUBLE PRECISION, EXTERNAL :: dnrm2
+  INTEGER :: nev=10, ncv=20, ishfts=1, maxitr=300, nconv, &
+       &     mode1=1, ierr
+  INTEGER :: ido, ipntr(14), iparam(11)
+  
+  CHARACTER(len=1) :: bmat='I'
+  CHARACTER(len=2) :: which='SA'
+  LOGICAL :: rvec, select(maxncv)
+  
+!
+  INTERFACE
+     SUBROUTINE dismat(spl, mat)
+       USE bsplines
+       USE csr
+       TYPE(spline1d), INTENT(in) :: spl
+       TYPE(zcsr_mat), INTENT(inout) :: mat
+     END SUBROUTINE dismat
+     SUBROUTINE disrhs(kdiff, spl, rhs)
+       USE bsplines
+       INTEGER, INTENT(in) :: kdiff
+       TYPE(spline1d), INTENT(in) :: spl
+       DOUBLE PRECISION, INTENT(out) :: rhs(:)
+     END SUBROUTINE disrhs
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+  END INTERFACE
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, nev, ncv, which
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8     ! Number oh intevals in x
+  nidbas = 3 ! Degree of splines
+  ngauss = 4 ! Number of Gauss points/interval
+  kdiff = 2  ! Exponent of differential problem
+  nlppform = .TRUE. ! Use PPFORM for gridval or not
+  coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+                                                 ! see function FDIST in MESHDIST
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0) = 0.0d0
+  xgrid(nx) = 1.0d0
+  CALL meshdist(coefs, xgrid, nx)
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE1D Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+  CALL attach(fid, '/', 'NGAUSS', ngauss)
+  CALL attach(fid, '/', 'KDIFF', kdiff)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
+  CALL get_dim(splx, nrank)       ! Rank of the FE matrix
+  WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
+!
+!   FE matrix assembly
+!
+  kl = nidbas
+  ku = kl
+  WRITE(*,'(a,3i6)') 'kl, ku, nrank', kl, ku, nrank
+  CALL init(nrank, 1, mat)
+  CALL dismat(splx, mat)
+!!$  CALL conmat(splx, mat, coefeq)
+  CALL to_mat(mat)
+  PRINT*,'MAT after to_mat', mat%val
+  CALL creatf('pde1d_eig.h5', ffid, 'PDE1D Result File', real_prec='D')
+  PRINT*, 'rank', mat%rank
+  PRINT*, 'nnz', mat%nnz
+  PRINT*, 'irow', mat%irow
+  PRINT*, 'cols', mat%cols
+  CALL putmat(ffid,'/MAT',mat,'FE matrix')
+  PRINT*, 'MAT',mat%val
+  CALL closef(ffid)
+  STOP
+!
+!
+  ALLOCATE(arr(nrank))
+!!$  WRITE(*,'(/a)') 'Matrice before BC'
+!!$  DO i=1,nrank
+!!$     CALL getrow(mat, i, arr)
+!!$     WRITE(*,'(12f8.3)') arr, SUM(arr)
+!!$  END DO
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(kdiff, splx, rhs)
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'RHS before BC', rhs
+!!$!
+!!$!   Set BC f(r=1) = 0 on matrix
+!!$!
+!!$  arr(1:nrank-1) = 0.0d0
+!!$  arr(nrank) = 1.0d0
+!!$  CALL putrow(mat, nrank, arr)
+!!$  CALL putcol(mat, nrank, arr)
+  CALL putmat(fid,'/MATA', mat, 'FE matrix')
+  tmat = seconds() - t0
+!
+!===========================================================================
+!              3.0  Eigevalue problem
+!
+!   Using Lapack dsyev
+!
+  t0 = seconds()
+  ALLOCATE(mata(nrank,nrank))
+  ALLOCATE(w(nrank))
+  ALLOCATE(wr(nrank), wi(nrank))
+  ALLOCATE(vl(nrank,nrank), vr(nrank,nrank))
+  mata=0.0d0
+  DO j=1,nrank
+     CALL getcol(mat, j, mata(:,j))  ! convert to dense matrix mata
+  END DO
+  CALL putarr(fid, '/MAT', mata, 'matrix A')
+  CALL geev(mata, w)
+  wr(:) = REAL(w(:))
+  wi(:) = AIMAG(w(:))
+  teig = seconds() - t0
+  PRINT*,'Info from ZGEEV', info, arr(1)
+  IF(info.EQ.0) THEN
+     CALL putarr(fid, '/REIGVS', wr, 'eigenvalues of A')
+     CALL putarr(fid, '/IEIGVS', wi, 'eigenvectors of A')
+     WRITE(*,'(a/(10f10.4))') 'Real eigval', wr
+     WRITE(*,'(a/(10f10.4))') 'Imag eigval', wi
+  END IF
+!
+!   Using Arpack
+!
+  ido = 0
+  iparam(1) = ishfts
+  iparam(3) = maxitr
+  iparam(7) = mode1
+!
+  lworkl  = 3*ncv**2+5*ncv
+  ALLOCATE(workl(lworkl))
+!
+  IF(nev.GT.0) THEN
+     t0 = seconds()
+     DO   ! ARPACK reverse communication loop 
+        CALL dsaupd(ido, bmat, nrank, which, nev, tol, resid, ncv, &
+             &      v, SIZE(v,1), iparam, ipntr, workd, workl, lworkl, &
+             &      rwork, info)
+        IF(ido.EQ.-1 .OR. ido.EQ.1) THEN
+!!$           WRITE(*,'(a/(10i4))') 'ipntr', ipntr
+           CALL av(nrank,workd(ipntr(1)), workd(ipntr(2)))
+           CYCLE
+        END IF
+        IF(info .LT. 0) THEN
+           PRINT*, 'Error in _saupd with info =', info
+        ELSE
+           rvec = .TRUE.
+           CALL dseupd(rvec, 'All', SELECT, d, v, maxn, sigma, &
+                &      bmat, nrank, which, nev, tol, resid, ncv, v, maxn, &
+                &      iparam, ipntr, workd, workl, lworkl, ierr )
+           IF( ierr .NE. 0 ) THEN
+              PRINT*,'Error in _seupd with ierr =', ierr
+           ELSE
+              nconv = iparam(5)
+              PRINT*, '--- eigenvalues and error ---'
+              DO j=1,nconv ! Residual norms
+!!$              CALL av(nrank,v(1,j), w)   ! d(1,j) is the j^th eigenvalue
+!!$              CALL daxpy(nrank, -d(j,1), v(1,j), 1, w, 1)
+!!$              d(j,2) = dnrm2(nrank, w, 1)
+!!$              d(j,2) = d(j,2)/abs(d(j,1))
+                 WRITE(*,'(2(1pe12.4))') d(j,1), eigvals(j)-d(j,1)
+!!$              CALL putarr(fid, '/EIGVS', d(:,1), 'ARPACK eigenvalues of A')
+!!$              CALL putarr(fid, '/EIGVECS', v, 'ARPACK eigenvectors of A')
+              END DO
+              EXIT
+           END IF
+        END IF
+     END DO   ! End of ARPACK reverse communication loop
+     tarpack = seconds()-t0
+  END IF
+!===========================================================================
+!              9.0  Epilogue
+!
+  WRITE(*,'(a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Lapack: Matrice eigvalue time (s)', teig
+  WRITE(*,'(a,1pe12.3)') 'Arpack: Matrice eigvalue time (s)', tarpack
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(arr)
+  CALL destroy(mat)
+  CALL destroy_sp(splx)
+  CALL closef(fid)
+CONTAINS
+  SUBROUTINE av(n,v,w)
+!
+! Matrix vector product: w <- Av
+    INTEGER, INTENT(in) :: n
+    DOUBLE COMPLEX, INTENT(in) :: v(*)
+    DOUBLE COMPLEX, INTENT(out) :: w(*)
+    w(1:n) = vmx(mat,v(1:n))
+  END SUBROUTINE av
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function defined in FDIST
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
+SUBROUTINE dismat(spl, mat)
+!
+!   Assembly FE matrix mat using spline spl
+!
+  USE bsplines
+  USE csr
+  IMPLICIT NONE
+  TYPE(spline1d), INTENT(in) :: spl
+  TYPE(zcsr_mat), INTENT(inout) :: mat
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE COMPLEX                :: contrib
+!
+  INTEGER :: kterms                ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+  ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+        DO iterm=1,kterms
+           DO jt=0,nidbas
+              DO iw=0,nidbas
+                 contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                      &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                 irow=i+iw; jcol=i+jt
+                 CALL updtmat(mat, irow, jcol, contrib)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(iderw, idert, coefs)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+!+++
+SUBROUTINE disrhs(kdiff, spl, rhs)
+!
+!   Assenbly the RHS using spline spl
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER, INTENT(in) :: kdiff
+  TYPE(spline1d), INTENT(in) :: spl
+  DOUBLE PRECISION, INTENT(out) :: rhs(:)
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, left
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+!!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
+!
+  ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+!!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  rhs(1:nrank) = 0.0d0
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+!!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        left = i-1
+!!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
+        contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
+        rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+!
+CONTAINS
+  DOUBLE PRECISION FUNCTION rhseq(x,k)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in) :: k
+    rhseq = k*k*x**(k-1)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
diff --git a/examples/pde1d_eig_zmumps.f90 b/examples/pde1d_eig_zmumps.f90
new file mode 100644
index 0000000..ec17841
--- /dev/null
+++ b/examples/pde1d_eig_zmumps.f90
@@ -0,0 +1,460 @@
+!>
+!> @file pde1d_eig_zmumps.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 1d differential  eqation using splines:
+!
+!  Solve the standard eigenvalue:
+!      A*x = \lambda *x or inv(A)*x = 1/\lambda * x using Arpack and MUMPS.
+!   where A is obtained from discretozation of
+!     -d/dr[r d/dr] f = k^2 r^(k-1)
+!
+  USE bsplines
+  USE mumps_bsplines
+  USE futils
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas, ngauss, kdiff
+  INTEGER :: i, nrank
+  LOGICAL :: nlppform
+  DOUBLE PRECISION :: coefs(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
+  TYPE(spline1d) :: splx
+  TYPE(zmumps_mat) :: mat
+!
+  INTEGER :: ierr
+  INTEGER :: fid
+  CHARACTER(len=32) :: str
+  DOUBLE PRECISION :: seconds, t0, tmat, tfac, tarpack
+!
+!   Arpack:  Solve the standard eigenvalue problem
+!
+  INTEGER :: nev = 10, ncv = 10
+  LOGICAL :: nlinv = .FALSE.  ! Solve inv(A) = 1/\lambda * x if nlinv=.TRUE.
+  CHARACTER(len=2) :: which='SM'
+  INTEGER :: info=0   ! Use random vector to start the Arnoldi iterations
+  INTEGER :: ido=0    ! Reverse communications
+  LOGICAL :: rvec
+  LOGICAL, ALLOCATABLE :: select(:)
+  INTEGER :: iparam(11), ipntr(14), nconv
+  DOUBLE PRECISION :: tol=0.0d0
+  CHARACTER(len=1) :: bmat='I'
+!
+  INTEGER :: lworkl
+  DOUBLE COMPLEX, ALLOCATABLE :: workl(:), workd(:), workev(:)
+  DOUBLE COMPLEX, ALLOCATABLE :: eig_vals(:), eig_vecs(:,:), resid(:)
+  DOUBLE COMPLEX :: sigma
+  DOUBLE PRECISION, ALLOCATABLE :: rwork(:)
+!
+  INTERFACE
+     SUBROUTINE dismat(spl, mat)
+       USE bsplines
+       USE mumps_bsplines
+       TYPE(spline1d), INTENT(in) :: spl
+       TYPE(zmumps_mat), INTENT(inout) :: mat
+     END SUBROUTINE dismat
+     SUBROUTINE disrhs(kdiff, spl, rhs)
+       USE bsplines
+       INTEGER, INTENT(in) :: kdiff
+       TYPE(spline1d), INTENT(in) :: spl
+       DOUBLE PRECISION, INTENT(out) :: rhs(:)
+     END SUBROUTINE disrhs
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+  END INTERFACE
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, kdiff, nlppform, coefs, &
+       &            nev, ncv, nlinv, which, tol
+!===========================================================================
+!              1.0 Prologue
+!
+  CALL mpi_init(ierr)
+!
+!   Read in data specific to run
+!
+  nx = 8     ! Number oh intevals in x
+  nidbas = 3 ! Degree of splines
+  ngauss = 4 ! Number of Gauss points/interval
+  kdiff = 2  ! Exponent of differential problem
+  nlppform = .TRUE. ! Use PPFORM for gridval or not
+  coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+                                                 ! see function FDIST in MESHDIST
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0) = 0.0d0
+  xgrid(nx) = 1.0d0
+  CALL meshdist(coefs, xgrid, nx)
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID**2', (xgrid(i)**2-xgrid(i-1)**2, i=1,nx)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, xgrid, splx, nlppform=nlppform)
+  CALL get_dim(splx, nrank)       ! Rank of the FE matrix
+  WRITE(*,'(a,l6)') 'splx%nlequid', splx%nlequid
+!
+!   FE matrix assembly
+!
+  WRITE(*,'(a,i6)') ' nrank', nrank
+  CALL init(nrank, 1, mat)
+  CALL dismat(splx, mat)
+  CALL to_mat(mat)
+  tmat = seconds() - t0
+!
+  mat%mumps_par%IRN => mat%mumps_par%IRN_loc ! Work around for single proc.
+  PRINT*, 'nnz_loc', mat%nnz_loc
+  PRINT*, 'mat%mumps_par%N', mat%mumps_par%N
+  PRINT*, 'mat%mumps_par%NZ_loc', mat%mumps_par%NZ_loc
+  PRINT*, 'size of mat%mumps_par%IRN', SIZE(mat%mumps_par%IRN)
+  PRINT*, 'mat%istart,mat%iend', mat%istart,mat%iend
+!  
+  CALL creatf('pde1d_eig_zmumps.h5', fid, 'PDE1D Result File', real_prec='d')
+  PRINT*, 'rank', mat%rank
+  PRINT*, 'nnz', mat%nnz
+!
+  CALL putmat(fid,'/MAT',mat,'FE matrix')
+!
+  IF(nlinv) THEN
+     t0 = seconds()
+     CALL factor(mat)
+     tfac = seconds()-t0
+  END IF
+!===========================================================================
+!              3.0 Solve the standard eigenvalue problem
+!
+  lworkl = 3*ncv**2 + 5*ncv
+  ALLOCATE(workl(lworkl))
+  ALLOCATE(workd(3*nrank))
+  ALLOCATE(workev(2*ncv))
+  ALLOCATE(eig_vals(ncv), eig_vecs(nrank,ncv))
+  ALLOCATE(resid(nrank))
+  ALLOCATE(rwork(ncv))
+!
+  iparam(1) = 1   ! shfts
+  iparam(3) = 300 ! Max. number of iterations
+  iparam(7) = 1   ! Regular mode
+!
+! The reverse communication loop
+!
+  t0 = seconds()
+  DO
+     CALL znaupd  (ido, bmat, nrank, which, nev, tol, resid, ncv, &
+          &        eig_vecs, nrank, iparam, ipntr, workd, workl, lworkl, &
+          &        rwork, info )
+!
+     IF(ido .EQ. -1 .OR. ido .EQ. 1) THEN   ! Compute A*v
+        CALL av(nrank, workd(ipntr(1)), workd(ipntr(2)))
+        CYCLE
+     END IF
+!
+     IF(info .LT. 0) THEN  ! Error
+        PRINT*, 'Error in _naupd with info =', info
+        EXIT
+     ELSE
+        rvec = .TRUE.
+        ALLOCATE(select(ncv))
+        CALL zneupd  (rvec, 'A', select, eig_vals, eig_vecs, nrank, &
+             &       sigma, workev, bmat, nrank, which, nev, tol, resid, &
+             &       ncv, eig_vecs, nrank, iparam, ipntr, workd, workl, lworkl,&
+             &       rwork, ierr)
+        IF(ierr .NE. 0) THEN
+           PRINT*, 'Error in _neupd with ierr =', ierr
+           EXIT
+        ELSE
+           nconv = iparam(5)
+           PRINT*,'Number of converged eigenvalues', nconv
+           IF(nlinv) THEN
+              eig_vals(1:nconv) = (1.d0,0.0d0) / eig_vals(1:nconv)
+           END IF
+           WRITE(*,'(2(1pe12.3))') eig_vals(1:nconv)
+           CALL putarr(fid, '/eig_vals', eig_vals(1:nconv))
+           CALL putarr(fid, '/eig_vecs', eig_vecs(1:nrank,1:nconv))
+           DO i=1,nconv
+!!$              WRITE(*,'(/a,2(pe20.6))') '*** eigen value =', eig_vals(i)
+!!$              WRITE(*,'(a/(10(1pe12.4)))') 'Real of eigen vector', &
+!!$                   &                        REAL(eig_vecs(1:nrank,i))
+!!$              WRITE(*,'(a/(10(1pe12.4)))') 'Imag of eigen vector', &
+!!$                   &                        aimag(eig_vecs(1:nrank,i))
+              WRITE(str,'(a,i3.3)') '/eig_vecs_',i
+              CALL putarr(fid, TRIM(str), eig_vecs(1:nrank,i))
+           END DO
+           EXIT
+        END IF
+     END IF
+  END DO  ! End of reverse commuinication loop
+  IF(info .EQ. 1) THEN
+     PRINT*, 'Maximum number of iterations reached!'
+     PRINT*, 'IPARAM(5) =', iparam(5)
+  END IF
+  PRINT*, 'Number of Arnoldi iterations', iparam(3)
+  tarpack = seconds() - t0
+!===========================================================================
+!              9.0  Epilogue
+!
+  WRITE(*,'(a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s)  ', tmat
+  IF(nlinv) THEN
+     WRITE(*,'(a,1pe12.3)') 'Matrice factorization time (s) ', tfac
+  END IF
+  WRITE(*,'(a,1pe12.3)') 'Arpack time (s)                ', tarpack
+!
+  DEALLOCATE(xgrid)
+  CALL destroy(mat)
+  CALL destroy_sp(splx)
+  CALL closef(fid)
+  CALL mpi_finalize(ierr)
+!
+CONTAINS
+  SUBROUTINE av(n,v,w)
+!
+    INTEGER, INTENT(in) :: n
+    DOUBLE COMPLEX, INTENT(in) :: v(*)
+    DOUBLE COMPLEX, INTENT(out)   :: w(*)
+!
+    IF(nlinv) THEN
+       w(1:n) = v(1:n)
+       CALL bsolve(mat,w(1:n))  ! Solve A*w = v or w=inv(A)*v
+    ELSE
+       w(1:n) = vmx(mat,v(1:n)) ! A*v matrix product
+    END IF
+  END SUBROUTINE av
+!
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function defined in FDIST
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
+SUBROUTINE dismat(spl, mat)
+!
+!   Assembly FE matrix mat using spline spl
+!
+  USE bsplines
+  USE mumps_bsplines
+  IMPLICIT NONE
+  TYPE(spline1d), INTENT(in) :: spl
+  TYPE(zmumps_mat), INTENT(inout) :: mat
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE COMPLEX                :: contrib
+!
+  INTEGER :: kterms                ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+  ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+        DO iterm=1,kterms
+           DO jt=0,nidbas
+              DO iw=0,nidbas
+                 contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                      &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                 irow=i+iw; jcol=i+jt
+                 CALL updtmat(mat, irow, jcol, contrib)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(iderw, idert, coefs)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
+!
+! Weak form = Int(x*dw/dx*dt/dx)dx
+!
+    c(1) = x
+    idt(1) = 1
+    idw(1) = 1
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+!+++
+SUBROUTINE disrhs(kdiff, spl, rhs)
+!
+!   Assenbly the RHS using spline spl
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER, INTENT(in) :: kdiff
+  TYPE(spline1d), INTENT(in) :: spl
+  DOUBLE PRECISION, INTENT(out) :: rhs(:)
+  INTEGER :: nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, left
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, nrank, nx, nidbas)
+!!$  WRITE(*,'(/a, 5i3)') 'nrank, nx, nidbas =', nrank, nx, nidbas
+!
+  ALLOCATE(fun(nidbas+1,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+!!$  WRITE(*,'(/a, i3)') 'Gauss points and weights, ngauss =', ngauss
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  rhs(1:nrank) = 0.0d0
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+!!$     WRITE(*,'(a,i3,(8f10.4))') 'i=',i, xgauss, wgauss
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        left = i-1
+!!$        WRITE(*,'(a,5i4)') '>>>igauss, left', igauss, left
+        contrib = wgauss(igauss)*rhseq(xgauss(igauss), kdiff)
+        rhs(i:i+nidbas) = rhs(i:i+nidbas) + contrib*fun(:,1)
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+!
+CONTAINS
+  DOUBLE PRECISION FUNCTION rhseq(x,k)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in) :: k
+    rhseq = k*k*x**(k-1)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
diff --git a/examples/pde1dp.f90 b/examples/pde1dp.f90
new file mode 100644
index 0000000..7eb32bd
--- /dev/null
+++ b/examples/pde1dp.f90
@@ -0,0 +1,170 @@
+!>
+!> @file pde1dp.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   1D PDE with priodic BC
+!
+  USE pde1dp_mod
+  USE bsplines
+  USE matrix
+  USE futils
+  USE conmat_mod
+!
+  IMPLICIT NONE
+  CHARACTER(len=128) :: file='pde1dp.h5'
+  INTEGER :: fid
+  INTEGER :: nx, nidbas, ngauss, ibcoef
+  INTEGER :: nrank, kl, ku, dim
+  DOUBLE PRECISION :: coefs(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, sol, rhs
+  INTEGER, PARAMETER :: npts=100
+  DOUBLE PRECISION, DIMENSION(0:npts-1) :: xpts, frhs
+  DOUBLE PRECISION :: dx, errmx
+  DOUBLE PRECISION, ALLOCATABLE :: arr(:,:)
+  TYPE(periodic_mat) :: mat
+  INTEGER :: i, j
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, ibcoef, coefs
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8     ! Number oh intevals in x
+  nidbas = 3 ! Degree of splines
+  ngauss = 4 ! Number of Gauss points/interval
+  ibcoef = 1 ! Index of non-zero spline coef for RHS
+  coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+                                                 ! see function FDIST in MESHDIST
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0) = 0.0d0
+  xgrid(nx) = 1.0d0
+  CALL meshdist(coefs, xgrid, nx)
+  WRITE(*,'(/a/(10f8.3))') 'XGRID', xgrid(0:nx)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE1DP Result File')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+  CALL attach(fid, '/', 'NGAUSS', ngauss)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up periodic spline
+!
+  CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
+  nrank = nx       ! Rank of the FE matrix
+!
+!   Mass matrix assembly
+!
+  kl = nidbas
+  ku = kl
+  CALL init(kl, ku, nrank, 1, mat)
+  CALL get_dim(splx, dim)
+  WRITE(*,'(/a,4i6)') 'kl, ku, nrank, dim', kl, ku, nrank, dim
+!!$  CALL dismat(splx, mat)
+  CALL conmat(splx, mat, coefeq_mass)
+!
+!   Store matrix in hdf5 file
+!
+  ALLOCATE(arr(nrank,nrank))
+  DO j=1,nrank
+     CALL getcol(mat, j, arr(:,j))
+  END DO
+  CALL putarr(fid, '/mata', arr)
+  DEALLOCATE(arr)
+!
+!   Check RHS constructed using input spline coefs.
+!
+  ALLOCATE(bcoef(0:dim-1))
+  bcoef = 0.0d0; bcoef(ibcoef-1) = 1.0d0
+!
+  DO i=nrank,dim-1  ! Periodicity to fill array of spline coefs
+     bcoef(i) = bcoef(MODULO(i,nrank))
+  END DO
+  WRITE(*,'(/a/(10f8.3))') 'bcoef from input', bcoef
+  dx = (xgrid(nx)-xgrid(0))/npts
+  DO i=0,npts-1
+     xpts(i) = xgrid(0) + i*dx
+     frhs(i) = rhseq(xpts(i))
+  END DO
+  CALL creatg(fid, '/rhs')
+  CALL putarr(fid,'/rhs/x', xpts)
+  CALL putarr(fid,'/rhs/f', frhs)
+!
+!   Assembly RHS and check A*x = f, using method vmx
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(splx, rhs)
+  sol = vmx(mat, bcoef(0:nrank-1))
+  WRITE(*,'(/6x,3a12)') 'A*x', 'rhs', 'Err'
+  errmx = 0.0d0
+  DO i=1,nrank
+     WRITE(*,'(i6,3(1pe12.3))') i, sol(i), rhs(i), sol(i)-rhs(i)
+     errmx=MAX(errmx,ABS(sol(i)-rhs(i)))
+  END DO
+  WRITE(*,'(a,1pe12.3)') 'Max. error =', errmx
+!
+!   Factor and solve
+!
+  CALL factor(mat)
+  CALL bsolve(mat, rhs, sol)
+  WRITE(*,'(/6x,3a12)') 'Computed', 'Exact', 'Err'
+  errmx = 0.0d0
+  DO i=1,nrank
+     WRITE(*,'(i6,3(1pe12.3))') i, sol(i), bcoef(i-1), sol(i)-bcoef(i-1)
+     errmx=MAX(errmx,ABS(sol(i)-bcoef(i-1)))
+  END DO
+  WRITE(*,'(a,1pe12.3)') 'Max. error =', errmx
+!===========================================================================
+!              9.0  Clean up
+!
+  DEALLOCATE(xgrid)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(rhs, sol)
+  CALL destroy(mat)
+  CALL destroy_sp(splx)
+  CALL closef(fid)  
+CONTAINS
+  SUBROUTINE coefeq_mass(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Mass matrix
+!
+    c(1) = 1.0d0
+    idt(1) = 0
+    idw(1) = 0
+  END SUBROUTINE coefeq_mass
+END PROGRAM main
diff --git a/examples/pde1dp_cmpl.f90 b/examples/pde1dp_cmpl.f90
new file mode 100644
index 0000000..48b2d1e
--- /dev/null
+++ b/examples/pde1dp_cmpl.f90
@@ -0,0 +1,403 @@
+!>
+!> @file pde1dp_cmpl.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE pde1dp_cmpl_mod
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+!
+CONTAINS
+  SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
+!
+  TYPE(spline1d) :: spl
+  INTEGER, INTENT(in) :: mmode
+  DOUBLE COMPLEX, INTENT(in) :: alpha, beta
+  DOUBLE COMPLEX, INTENT(out) :: rhs(:)   
+!
+  INTEGER :: dim, nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, it, irow
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE COMPLEX :: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  nrank = SIZE(rhs)
+  CALL get_dim(spl, dim, nx, nidbas)
+!
+  ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  rhs(:) = 0.0d0
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        contrib = wgauss(igauss) * rhseq(xgauss(igauss))
+        DO it=0,nidbas
+           irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
+           rhs(irow) = rhs(irow) + contrib*fun(it,1)
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+CONTAINS
+  DOUBLE COMPLEX FUNCTION rhseq(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    DOUBLE PRECISION :: arg
+    arg = mmode*x
+    rhseq = (mmode**2*alpha-beta)*COS(arg)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat, alpha, beta)
+!
+  TYPE(spline1d) :: spl
+  TYPE(zperiodic_mat) :: mat
+  DOUBLE COMPLEX, INTENT(in) :: alpha, beta
+!
+  INTEGER :: dim, nx, nidbas, ngauss
+  INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE COMPLEX :: contrib
+!
+  INTEGER :: kterms                ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, dim, nx, nidbas)
+  ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+  kterms = mat%mat%nterms
+  ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+        DO iterm=1,kterms
+           DO jt=0,nidbas
+              DO iw=0,nidbas
+                 contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                      &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                 irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
+                 jcol=MODULO(i+jt-1,nx) + 1
+                 CALL updtmat(mat, irow, jcol, contrib)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(iderw, idert, coefs)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
+!
+    c(1) = alpha
+    idt(1) = 1
+    idw(1) = 1
+!
+    c(2) = -beta
+    idt(2) = 0
+    idw(2) = 0
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE save_mat(fid, label, mat)
+!
+!   Save zperiodic_mat in dense format
+!
+    USE futils
+!
+    INTEGER, INTENT(in) :: fid
+    CHARACTER(len=*), INTENT(in) :: label
+    TYPE(zperiodic_mat) :: mat
+    INTEGER :: j, n
+    DOUBLE COMPLEX, ALLOCATABLE :: fullmat(:,:)
+!
+    n=mat%mat%rank
+    ALLOCATE(fullmat(n,n))
+    DO j=1,n
+       CALL getcol(mat, j, fullmat(:,j))
+    END DO
+    CALL putarr(fid, label, fullmat)
+    DEALLOCATE(fullmat)
+  END SUBROUTINE save_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of complex array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE COMPLEX, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+!
+    sum2 = DOT_PRODUCT(x,x)
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE meshdist(mmode, x)
+!
+!   Construct a 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+    INTEGER, INTENT(in) :: mmode
+    DOUBLE PRECISION, INTENT(inout) :: x(0:)
+    INTEGER :: nx, nintg
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+    DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+    INTEGER :: i, k
+!
+    nx = SIZE(x)-1
+    a=x(0)
+    b=x(nx)
+    nintg = 10*nx
+    ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+    dx = (b-a)/REAL(nintg)
+    xint(0) = a
+    fint(0) = 0.0d0
+    f1 = fdist(xint(0))
+    DO i=1,nintg
+       f0 = f1
+       xint(i) = xint(i-1) + dx
+       f1 = fdist(xint(i))
+       fint(i) = fint(i-1) + 0.5*(f0+f1)
+    END DO
+!
+!  Normalization
+!
+    scal = REAL(nx) / fint(nintg)
+    fint(0:nintg) = fint(0:nintg) * scal
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+    k = 1
+    DO i=1,nintg-1
+       IF( fint(i) .GE. REAL(k) ) THEN
+          x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+               &   (k-fint(i))
+          k = k+1
+       END IF
+    END DO
+!
+    DEALLOCATE(xint, fint)
+  CONTAINS
+    DOUBLE PRECISION FUNCTION fdist(x)
+      DOUBLE PRECISION, INTENT(in) :: x
+      fdist = 2.0 + COS(mmode*x)
+    END FUNCTION fdist
+  END SUBROUTINE meshdist
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde1dp_cmpl_mod
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!
+PROGRAM main
+!
+!   1D complex PDE with periodic BC
+!
+  USE pde1dp_cmpl_mod
+  USE bsplines
+  USE matrix
+  USE futils
+!
+  IMPLICIT NONE
+  TYPE(spline1d) :: splx
+  TYPE(zperiodic_mat) :: mat
+  INTEGER :: kl, ku, nrank
+!
+  CHARACTER(len=128) :: file='pde1dp_cmpl.h5'
+  INTEGER :: fid
+  INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
+  LOGICAL :: nlequid
+  DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
+  DOUBLE PRECISION :: dx
+  DOUBLE COMPLEX :: alpha, beta
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE ::  sol, rhs, bcoef
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE ::  solcal, solana
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION :: err_norm
+  INTEGER :: i
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8             ! Number oh intevals in x
+  nidbas = 3         ! Degree of splines
+  ngauss = 4         ! Number of Gauss points/interval
+  nlequid = .TRUE.   ! Use exact sol. as mesh dist. function if .FALSE.
+  mmode = 1          ! Fourier mode
+  alpha = (1.0, 1.0) ! Complex "diffusion"
+  beta  = 1.0
+  npt = 100
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  dx = 2.d0*pi/REAL(nx,8)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  IF( .NOT. nlequid ) THEN
+     CALL meshdist(mmode, xgrid)
+  END IF
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+  CALL putarr(fid, '/xgrid', xgrid)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up periodic spline
+!
+  CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
+  WRITE(*,'(a,l6)') 'nlequid =', nlequid
+  nrank = nx       ! Rank of the FE matrix
+!
+!   FE matrix assembly
+!
+  kl = nidbas
+  ku = kl
+  CALL init(kl, ku, nrank, 2, mat)
+  CALL get_dim(splx, dim)
+  WRITE(*,'(/a,4i6)') 'kl, ku, nrank, dim', kl, ku, nrank, dim
+  CALL dismat(splx, mat, alpha, beta)
+!
+!   RHS assembly
+!  
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(splx, rhs, mmode, alpha, beta)
+!
+  CALL save_mat(fid, '/mat', mat)
+  CALL putarr(fid, '/rhs', rhs)
+!
+!   Factor and solve
+!
+  CALL factor(mat)
+  CALL bsolve(mat, rhs, sol)
+  CALL putarr(fid, '/sol', sol)
+!===========================================================================
+!              3.0   Check solution
+!
+!   Exact solution
+  ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
+  dx=2.0d0*pi/REAL(npt,8)
+  x = (/ (i*dx, i=0,npt) /)
+  solana = COS(mmode*x)
+!
+!   Prolongate solution using periodicity
+!
+  ALLOCATE(bcoef(dim))
+  bcoef(1:nrank) = sol(1:nrank)
+  DO i=nrank+1,dim
+     bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
+  END DO
+!
+!   Interpolate field
+!
+  CALL gridval(splx, x, solcal, 0, bcoef)
+!
+  err = ABS(solcal-solana)
+  CALL putarr(fid, '/x', x)
+  CALL putarr(fid, '/solana', solana)
+  CALL putarr(fid, '/solcal', solcal)
+  CALL putarr(fid, '/err', err)
+!
+!   Compute discretization error norm by Gauss integration
+!
+  err_norm=0.0
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+  DO i=1,nx
+     CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
+     CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
+     solana(1:ngauss) = COS(mmode*xgauss)
+     err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
+          &          solana(1:ngauss)-solcal(1:ngauss))
+     err_norm = err_norm + SUM(wgauss*err(1:ngauss))
+  END DO
+  err_norm = SQRT(err_norm)
+  WRITE(*,'(a,1pe12.3)') 'Discretization error ',  err_norm
+!
+  DEALLOCATE(x, solcal, solana, err)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(bcoef)
+!===========================================================================
+!              9.0  Clean up
+!
+  DEALLOCATE(xgrid)
+  DEALLOCATE(rhs, sol)
+  CALL destroy(mat)
+  CALL destroy_sp(splx)
+  CALL closef(fid)  
+END PROGRAM main
diff --git a/examples/pde1dp_cmpl_dft.f90 b/examples/pde1dp_cmpl_dft.f90
new file mode 100644
index 0000000..d866885
--- /dev/null
+++ b/examples/pde1dp_cmpl_dft.f90
@@ -0,0 +1,290 @@
+!>
+!> @file pde1dp_cmpl_dft.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE pde1dp_cmpl_dft_mod
+  USE bsplines
+  IMPLICIT NONE
+!
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat, alpha, beta)
+!
+    USE bsplines
+    TYPE(spline1d)             :: spl
+    DOUBLE COMPLEX             :: mat(:)
+    DOUBLE COMPLEX, INTENT(in) :: alpha, beta
+!
+    INTEGER :: dim, nx, nidbas, ngauss, intv, igauss
+    DOUBLE COMPLEX, ALLOCATABLE :: ft_fun(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, dim, nx, nidbas)
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+  ALLOCATE(ft_fun(0:nx-1,2))  ! Up to first derivative
+!
+!   Weak form: integration on first interval
+!
+  intv = 1
+  CALL get_gauss(spl, ngauss, intv, xgauss, wgauss)
+  mat = 0.0d0
+  DO igauss=1,ngauss
+     CALL ft_basfun(xgauss(igauss), spl, ft_fun, intv)
+     mat(:) = mat(:) + wgauss(igauss) * ( &
+          &       alpha*ft_fun(:,2)*CONJG(ft_fun(:,2)) &
+          &      - beta*ft_fun(:,1)*CONJG(ft_fun(:,1))  &
+          &       )
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(ft_fun)
+  DEALLOCATE(xgauss, wgauss)
+!    
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
+!
+    TYPE(spline1d) :: spl
+    INTEGER, INTENT(in) :: mmode
+    DOUBLE COMPLEX, INTENT(in) :: alpha, beta
+    DOUBLE COMPLEX, INTENT(out) :: rhs(:)   
+!
+    INTEGER :: dim, nrank, nx, nidbas, ngauss
+    INTEGER :: i, igauss, it, irow
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+    DOUBLE COMPLEX :: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    nrank = SIZE(rhs)
+    CALL get_dim(spl, dim, nx, nidbas)
+!
+    ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl, ngauss)
+    ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    rhs(:) = 0.0d0
+!
+    DO i=1,nx
+       CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+       DO igauss=1,ngauss
+          CALL basfun(xgauss(igauss), spl, fun, i)
+          contrib = wgauss(igauss) * rhseq(xgauss(igauss))
+          DO it=0,nidbas
+             irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
+             rhs(irow) = rhs(irow) + contrib*fun(it,1)
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(fun)
+    DEALLOCATE(xgauss, wgauss)
+  CONTAINS
+    DOUBLE COMPLEX FUNCTION rhseq(x)
+      DOUBLE PRECISION, INTENT(in) :: x
+      DOUBLE PRECISION :: arg
+      arg = mmode*x
+      rhseq = (mmode**2*alpha-beta)*COS(arg)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+END MODULE pde1dp_cmpl_dft_mod
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!
+PROGRAM main
+!
+!   1D complex PDE with periodic BC, using DFT
+!
+  USE pde1dp_cmpl_dft_mod
+  USE bsplines
+  USE matrix
+  USE futils
+  USE fft
+!
+  IMPLICIT NONE
+  TYPE(spline1d) :: splx
+  DOUBLE COMPLEX, ALLOCATABLE :: mat(:)
+  INTEGER ::nrank
+!
+  CHARACTER(len=128) :: file='pde1dp_cmpl_dft.h5'
+  INTEGER :: fid
+  INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
+  DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
+  DOUBLE PRECISION :: dx
+  DOUBLE COMPLEX :: alpha, beta
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE ::  sol, rhs, bcoef
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE ::  sol_shifted, rhs_shifted
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE ::  solcal, solana
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION :: err_norm
+  INTEGER :: i
+  INTEGER :: k, kmin, kmax
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, alpha, beta, mmode, npt
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8             ! Number oh intevals in x
+  nidbas = 3         ! Degree of splines
+  ngauss = 4         ! Number of Gauss points/interval
+  mmode = 1          ! Fourier mode
+  alpha = (1.0, 1.0) ! Complex "diffusion"
+  beta  = 1.0
+  npt = 100
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  dx = 2.d0*pi/REAL(nx,8)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+  CALL putarr(fid, '/xgrid', xgrid)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up periodic spline
+!
+  CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
+  nrank = nx       ! Rank of the FE matrix
+  CALL get_dim(splx, dim)
+  WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim
+!
+!   Init DFT
+  kmin = -nx/2
+  kmax = nx/2-1
+  CALL init_dft(splx, kmin, kmax)
+!
+!   FE matrix assembly in Fourier space
+!
+  ALLOCATE(mat(0:nx-1))
+  CALL dismat(splx, mat, alpha, beta)
+  CALL putarr(fid, '/mat', mat)
+!
+!   RHS assembly in real space
+!  
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(splx, rhs, mmode, alpha, beta)
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+!  Fourier solve
+!
+  CALL putarr(fid, '/rhs', rhs)
+!
+  CALL fourcol(rhs, 1)
+!
+  ALLOCATE(rhs_shifted(kmin:kmax))
+  ALLOCATE(sol_shifted(kmin:kmax))
+  DO k=kmin,kmax
+     rhs_shifted(k) = rhs(MODULO(k+nx,nx)+1)/REAL(nx,8)
+  END DO
+  sol_shifted = rhs_shifted / mat
+  DO k=kmin,kmax
+     sol(MODULO(k+nx,nx)+1) = sol_shifted(k)
+  END DO
+!
+  CALL putarr(fid, '/rhs_fft', rhs)
+  CALL putarr(fid, '/sol_fft', sol)
+!
+!   Solution in real space
+!
+  CALL fourcol(sol,-1)
+  CALL putarr(fid, '/sol', sol)
+!===========================================================================
+!              4.0   Check solution
+!
+!   Exact solution
+  ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
+  dx=2.0d0*pi/REAL(npt,8)
+  x = (/ (i*dx, i=0,npt) /)
+  solana = COS(mmode*x)
+!
+!   Prolongate solution using periodicity
+!
+  ALLOCATE(bcoef(dim))
+  bcoef(1:nrank) = sol(1:nrank)
+  DO i=nrank+1,dim
+     bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
+  END DO
+!
+!   Interpolate field
+!
+  CALL gridval(splx, x, solcal, 0, bcoef)
+!
+  err = ABS(solcal-solana)
+  CALL putarr(fid, '/x', x)
+  CALL putarr(fid, '/solana', solana)
+  CALL putarr(fid, '/solcal', solcal)
+  CALL putarr(fid, '/err', err)
+!
+!   Compute discretization error norm by Gauss integration
+!
+  err_norm=0.0
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+  DO i=1,nx
+     CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
+     CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
+     solana(1:ngauss) = COS(mmode*xgauss)
+     err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
+          &          solana(1:ngauss)-solcal(1:ngauss))
+     err_norm = err_norm + SUM(wgauss*err(1:ngauss))
+  END DO
+  err_norm = SQRT(err_norm)
+  WRITE(*,'(a,1pe12.3)') 'Discretization error ',  err_norm
+!!
+!===========================================================================
+!              9.0  Clean up
+!
+  DEALLOCATE(xgrid)
+  DEALLOCATE(mat)
+  CALL destroy_sp(splx)
+  CALL closef(fid)  
+END PROGRAM main
diff --git a/examples/pde1dp_cmpl_mumps.f90 b/examples/pde1dp_cmpl_mumps.f90
new file mode 100644
index 0000000..ab14f2a
--- /dev/null
+++ b/examples/pde1dp_cmpl_mumps.f90
@@ -0,0 +1,478 @@
+!>
+!> @file pde1dp_cmpl_mumps.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE pde1dp_cmpl_mumps_mod
+  USE bsplines
+  USE mumps_bsplines
+  IMPLICIT NONE
+!
+CONTAINS
+  SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
+!
+  TYPE(spline1d) :: spl
+  INTEGER, INTENT(in) :: mmode
+  DOUBLE COMPLEX, INTENT(in) :: alpha, beta
+  DOUBLE COMPLEX, INTENT(out) :: rhs(:)   
+!
+  INTEGER :: dim, nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, it, irow
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE COMPLEX :: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  nrank = SIZE(rhs)
+  CALL get_dim(spl, dim, nx, nidbas)
+!
+  ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  rhs(:) = 0.0d0
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        contrib = wgauss(igauss) * rhseq(xgauss(igauss))
+        DO it=0,nidbas
+           irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
+           rhs(irow) = rhs(irow) + contrib*fun(it,1)
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+CONTAINS
+  DOUBLE COMPLEX FUNCTION rhseq(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    DOUBLE PRECISION :: arg
+    arg = mmode*x
+    rhseq = (mmode**2*alpha-beta)*COS(arg)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat, alpha, beta)
+!
+  TYPE(spline1d) :: spl
+  TYPE(zmumps_mat) :: mat
+  DOUBLE COMPLEX, INTENT(in) :: alpha, beta
+!
+  INTEGER :: dim, nx, nidbas, ngauss
+  INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE COMPLEX :: contrib
+!
+  INTEGER :: kterms                ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
+!
+  INTEGER :: istart, iend
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, dim, nx, nidbas)
+  ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  istart = mat%istart
+  iend   = mat%iend
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+        DO iterm=1,kterms
+           DO iw=0,nidbas
+              irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
+              IF( irow.GE.istart .AND. irow.LE.iend) THEN
+                 DO jt=0,nidbas
+                    contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                         &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                    jcol=MODULO(i+jt-1,nx) + 1
+                    CALL updtmat(mat, irow, jcol, contrib)
+                 END DO
+              END IF
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(iderw, idert, coefs)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
+!
+    c(1) = alpha
+    idt(1) = 1
+    idw(1) = 1
+!
+    c(2) = -beta
+    idt(2) = 0
+    idw(2) = 0
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of complex array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE COMPLEX, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+!
+    sum2 = DOT_PRODUCT(x,x)
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE meshdist(mmode, x)
+!
+!   Construct a 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+    INTEGER, INTENT(in) :: mmode
+    DOUBLE PRECISION, INTENT(inout) :: x(0:)
+    INTEGER :: nx, nintg
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+    DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+    INTEGER :: i, k
+!
+    nx = SIZE(x)-1
+    a=x(0)
+    b=x(nx)
+    nintg = 10*nx
+    ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+    dx = (b-a)/REAL(nintg)
+    xint(0) = a
+    fint(0) = 0.0d0
+    f1 = fdist(xint(0))
+    DO i=1,nintg
+       f0 = f1
+       xint(i) = xint(i-1) + dx
+       f1 = fdist(xint(i))
+       fint(i) = fint(i-1) + 0.5*(f0+f1)
+    END DO
+!
+!  Normalization
+!
+    scal = REAL(nx) / fint(nintg)
+    fint(0:nintg) = fint(0:nintg) * scal
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+    k = 1
+    DO i=1,nintg-1
+       IF( fint(i) .GE. REAL(k) ) THEN
+          x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+               &   (k-fint(i))
+          k = k+1
+       END IF
+    END DO
+!
+    DEALLOCATE(xint, fint)
+  CONTAINS
+    DOUBLE PRECISION FUNCTION fdist(x)
+      DOUBLE PRECISION, INTENT(in) :: x
+      fdist = 2.0 + COS(mmode*x)
+    END FUNCTION fdist
+  END SUBROUTINE meshdist
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde1dp_cmpl_mumps_mod
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!
+PROGRAM main
+!
+!   1D complex PDE with periodic BC
+!
+  USE pde1dp_cmpl_mumps_mod
+  USE futils
+!
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+  TYPE(spline1d) :: splx
+  TYPE(zmumps_mat) :: mat
+  TYPE(zmumps_mat) :: newmat
+  INTEGER :: kl, ku, nrank
+!
+  CHARACTER(len=128) :: file='pde1dp_cmpl_mumps.h5'
+  INTEGER :: fid
+  INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
+  LOGICAL :: nlequid
+  LOGICAL :: nlsym, nlherm, nlpos
+  DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
+  DOUBLE PRECISION :: dx
+  DOUBLE COMPLEX :: alpha, beta
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION :: err_norm
+  INTEGER :: i
+  INTEGER :: ierr, me
+  INTEGER :: nzfact
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, &
+       &            nlsym, nlherm, nlpos
+!===========================================================================
+!              1.0 Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Read in data specific to run
+!
+  nx = 8             ! Number oh intevals in x
+  nidbas = 3         ! Degree of splines
+  ngauss = 4         ! Number of Gauss points/interval
+  nlequid = .TRUE.   ! Use exact sol. as mesh dist. function if .FALSE.
+  mmode = 1          ! Fourier mode
+  alpha = (1.0, 1.0) ! Complex "diffusion"
+  beta  = 1.0
+  npt = 100
+  nlsym = .TRUE.    ! Is matrice symmetric
+  nlherm = .FALSE.  ! Is matrice hermitian
+  nlpos = .TRUE.    ! and positive definite ?
+!
+  IF(me.EQ.0) THEN
+     READ(*,newrun)
+     WRITE(*,newrun)
+  END IF
+  CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nidbas, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(ngauss, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlequid, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(mmode, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(alpha, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(beta, 1, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(npt, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlherm, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlpos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  dx = 2.d0*pi/REAL(nx,8)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  IF( .NOT. nlequid ) THEN
+     CALL meshdist(mmode, xgrid)
+  END IF
+  IF(me.EQ.0) WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+!
+!   Create hdf5 file
+!
+  IF(me.EQ.0) THEN
+     CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
+     CALL attach(fid, '/', 'NX', nx)
+     CALL attach(fid, '/', 'NIDBAS', nidbas)
+     CALL putarr(fid, '/xgrid', xgrid)
+  END IF
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up periodic spline
+!
+  CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
+  CALL get_dim(splx, dim)
+  nrank = nx       ! Rank of the FE matrix
+!
+!   FE matrix assembly
+!
+  CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
+  WRITE(*,'(a,i4.4,a,3i6)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, &
+       &        mat%iend-mat%istart+1
+!
+  CALL dismat(splx, mat, alpha, beta)
+!
+!   RHS assembly
+!  
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(splx, rhs, mmode, alpha, beta)
+!
+  IF(me.EQ.0) CALL putarr(fid, '/rhs', rhs)
+!
+!   Factor and solve
+!
+  CALL factor(mat, debug=.FALSE.)
+  CALL bsolve(mat, rhs, sol, debug=.FALSE.)
+!
+  nzfact = mat%mumps_par%INFOG(29)
+  IF(nzfact<0) THEN
+     nzfact = -nzfact*1000000
+  END IF
+  IF(me.EQ.0) THEN
+     CALL putarr(fid, '/sol', sol)
+     WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix       = ',get_count(mat)
+     WRITE(*,'(a,i8)')  'Number of nonzeros in factors of A  = ',nzfact
+!
+!   Compute residue
+!
+     WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs)
+  END IF
+!===========================================================================
+!              3.0   Check solution
+!
+  IF(me.EQ.0) THEN
+!
+!   Exact solution
+     ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
+     dx=2.0d0*pi/REAL(npt,8)
+     x = (/ (i*dx, i=0,npt) /)
+     solana = COS(mmode*x)
+!
+!   Prolongate solution using periodicity
+!
+     ALLOCATE(bcoef(dim))
+     bcoef(1:nrank) = sol(1:nrank)
+     DO i=nrank+1,dim
+        bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
+     END DO
+!
+!   Interpolate field
+!
+     CALL gridval(splx, x, solcal, 0, bcoef)
+!
+     err = ABS(solcal-solana)
+     CALL putarr(fid, '/x', x)
+     CALL putarr(fid, '/solana', solana)
+     CALL putarr(fid, '/solcal', solcal)
+     CALL putarr(fid, '/err', err)
+!
+!   Compute discretization error norm by Gauss integration
+!
+     err_norm=0.0
+     ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+     DO i=1,nx
+        CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
+        CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
+        solana(1:ngauss) = COS(mmode*xgauss)
+        err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
+             &          solana(1:ngauss)-solcal(1:ngauss))
+        err_norm = err_norm + SUM(wgauss*err(1:ngauss))
+     END DO
+     err_norm = SQRT(err_norm)
+     WRITE(*,'(a,1pe12.3)') 'Discretization error ',  err_norm
+  END IF
+!
+!===========================================================================
+!              4.0 Test of getrow/putrow, getcol/putcol and mcopy
+!
+  CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
+  ALLOCATE(arow(nrank), newsol(nrank))
+!
+  DO i=1,nrank
+     CALL getrow(mat, i, arow)
+     CALL putrow(newmat, i, arow)
+  END DO
+  CALL factor(newmat)
+  CALL bsolve(newmat, rhs, newsol)
+  WRITE(*,'(/a)') 'putrow/getrow ...'
+  WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
+  WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
+!
+  DO i=1,nrank
+     CALL getcol(mat, i, arow)
+     CALL putcol(newmat, i, arow)
+  END DO
+  CALL factor(newmat)
+  CALL bsolve(newmat, rhs, newsol)
+  WRITE(*,'(/a)') 'putcol/getcol ...'
+  WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
+  WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
+!
+  CALL clear_mat(newmat)
+  CALL mcopy(mat, newmat)
+  WRITE(*,'(/a)') 'mcopy ...'
+  newmat%val = (1000.0d0,0.0d0)*newmat%val
+  CALL factor(newmat)
+  CALL bsolve(newmat, rhs, newsol)
+  WRITE(*,'(a)') 'Backsolve the new system'
+  WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
+!
+  WRITE(*,'(a)') 'Destroy NEWMAT ...'
+  CALL destroy(newmat)
+!
+  CALL bsolve(mat, rhs, sol)
+  WRITE(*,'(/a)') 'Backsolve the old system'
+  WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol)
+!
+  WRITE(*,'(a)') 'Destroy MAT ...'
+  CALL destroy(mat)
+!
+!!$  WRITE(*,'(/a)') 'Should crash since NEWMAT is gone!'
+!!$  CALL bsolve(newmat, rhs, newsol)
+!===========================================================================
+!              9.0  Clean up
+!
+  IF(me.EQ.0) THEN
+     DEALLOCATE(x, solcal, solana, err)
+     DEALLOCATE(bcoef)
+     DEALLOCATE(xgauss, wgauss)
+  END IF
+  DEALLOCATE(xgrid)
+  DEALLOCATE(rhs, sol)
+  DEALLOCATE(arow, newsol)
+  CALL destroy_sp(splx)
+  IF(me.EQ.0) CALL closef(fid)  
+  CALL mpi_finalize(ierr)
+END PROGRAM main
diff --git a/examples/pde1dp_cmpl_pardiso.f90 b/examples/pde1dp_cmpl_pardiso.f90
new file mode 100644
index 0000000..e3cf4a5
--- /dev/null
+++ b/examples/pde1dp_cmpl_pardiso.f90
@@ -0,0 +1,457 @@
+!>
+!> @file pde1dp_cmpl_pardiso.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE pde1dp_cmpl_pardiso_mod
+  USE bsplines
+  USE pardiso_bsplines
+  IMPLICIT NONE
+!
+CONTAINS
+  SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
+!
+  TYPE(spline1d) :: spl
+  INTEGER, INTENT(in) :: mmode
+  DOUBLE COMPLEX, INTENT(in) :: alpha, beta
+  DOUBLE COMPLEX, INTENT(out) :: rhs(:)   
+!
+  INTEGER :: dim, nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, it, irow
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE COMPLEX :: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  nrank = SIZE(rhs)
+  CALL get_dim(spl, dim, nx, nidbas)
+!
+  ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  rhs(:) = 0.0d0
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        contrib = wgauss(igauss) * rhseq(xgauss(igauss))
+        DO it=0,nidbas
+           irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
+           rhs(irow) = rhs(irow) + contrib*fun(it,1)
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+CONTAINS
+  DOUBLE COMPLEX FUNCTION rhseq(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    DOUBLE PRECISION :: arg
+    arg = mmode*x
+    rhseq = (mmode**2*alpha-beta)*COS(arg)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat, alpha, beta)
+!
+  TYPE(spline1d) :: spl
+  TYPE(zpardiso_mat) :: mat
+  DOUBLE COMPLEX, INTENT(in) :: alpha, beta
+!
+  INTEGER :: dim, nx, nidbas, ngauss
+  INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE COMPLEX :: contrib
+!
+  INTEGER :: kterms                ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, dim, nx, nidbas)
+  ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+        DO iterm=1,kterms
+           DO jt=0,nidbas
+              DO iw=0,nidbas
+                 contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                      &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                 irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
+                 jcol=MODULO(i+jt-1,nx) + 1
+                 CALL updtmat(mat, irow, jcol, contrib)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(iderw, idert, coefs)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
+!
+    c(1) = alpha
+    idt(1) = 1
+    idw(1) = 1
+!
+    c(2) = -beta
+    idt(2) = 0
+    idw(2) = 0
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of complex array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE COMPLEX, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+!
+    sum2 = DOT_PRODUCT(x,x)
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE meshdist(mmode, x)
+!
+!   Construct a 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+    INTEGER, INTENT(in) :: mmode
+    DOUBLE PRECISION, INTENT(inout) :: x(0:)
+    INTEGER :: nx, nintg
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+    DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+    INTEGER :: i, k
+!
+    nx = SIZE(x)-1
+    a=x(0)
+    b=x(nx)
+    nintg = 10*nx
+    ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+    dx = (b-a)/REAL(nintg)
+    xint(0) = a
+    fint(0) = 0.0d0
+    f1 = fdist(xint(0))
+    DO i=1,nintg
+       f0 = f1
+       xint(i) = xint(i-1) + dx
+       f1 = fdist(xint(i))
+       fint(i) = fint(i-1) + 0.5*(f0+f1)
+    END DO
+!
+!  Normalization
+!
+    scal = REAL(nx) / fint(nintg)
+    fint(0:nintg) = fint(0:nintg) * scal
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+    k = 1
+    DO i=1,nintg-1
+       IF( fint(i) .GE. REAL(k) ) THEN
+          x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+               &   (k-fint(i))
+          k = k+1
+       END IF
+    END DO
+!
+    DEALLOCATE(xint, fint)
+  CONTAINS
+    DOUBLE PRECISION FUNCTION fdist(x)
+      DOUBLE PRECISION, INTENT(in) :: x
+      fdist = 2.0 + COS(mmode*x)
+    END FUNCTION fdist
+  END SUBROUTINE meshdist
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde1dp_cmpl_pardiso_mod
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!
+PROGRAM main
+!
+!   1D complex PDE with periodic BC
+!
+  USE pde1dp_cmpl_pardiso_mod
+  USE futils
+  USE conmat_mod
+!
+  IMPLICIT NONE
+  TYPE(spline1d) :: splx
+  TYPE(zpardiso_mat) :: mat
+  TYPE(zpardiso_mat) :: newmat
+  INTEGER :: kl, ku, nrank
+!
+  CHARACTER(len=128) :: file='pde1dp_cmpl_pardiso.h5'
+  INTEGER :: fid
+  INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
+  LOGICAL :: nlequid
+  LOGICAL :: nlsym, nlherm, nlpos
+  DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
+  DOUBLE PRECISION :: dx
+  DOUBLE COMPLEX :: alpha, beta
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION :: err_norm
+  INTEGER :: i
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, &
+       &            nlsym, nlherm, nlpos
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8             ! Number oh intevals in x
+  nidbas = 3         ! Degree of splines
+  ngauss = 4         ! Number of Gauss points/interval
+  nlequid = .TRUE.   ! Use exact sol. as mesh dist. function if .FALSE.
+  mmode = 1          ! Fourier mode
+  alpha = (1.0, 1.0) ! Complex "diffusion"
+  beta  = 1.0
+  npt = 100
+  nlsym = .TRUE.    ! Is matrice symmetric
+  nlherm = .FALSE.  ! Is matrice hermitian
+  nlpos = .TRUE.    ! and positive definite ?
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  dx = 2.d0*pi/REAL(nx,8)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  IF( .NOT. nlequid ) THEN
+     CALL meshdist(mmode, xgrid)
+  END IF
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+  CALL putarr(fid, '/xgrid', xgrid)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up periodic spline
+!
+  CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
+  WRITE(*,'(a,l6)') 'nlequid =', nlequid
+  nrank = nx       ! Rank of the FE matrix
+!
+!   FE matrix assembly
+!
+  CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
+  CALL get_dim(splx, dim)
+  WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim
+!!$  CALL dismat(splx, mat, alpha, beta)
+  CALL conmat(splx, mat, coefeq)
+!
+!   RHS assembly
+!  
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(splx, rhs, mmode, alpha, beta)
+!
+  CALL putarr(fid, '/rhs', rhs)
+!
+!   Factor and solve
+!
+  WRITE(*,'(a/(10i6))') 'iparm', mat%p%iparm
+  CALL factor(mat)
+  CALL putmat(fid,'/MAT', mat)
+  CALL bsolve(mat, rhs, sol)
+  CALL putarr(fid, '/sol', sol)
+  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix       = ',get_count(mat)
+  WRITE(*,'(a,i8)')  'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
+!
+!   Compute residue
+!
+  WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs)
+!===========================================================================
+!              3.0   Check solution
+!
+!   Exact solution
+  ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
+  dx=2.0d0*pi/REAL(npt,8)
+  x = (/ (i*dx, i=0,npt) /)
+  solana = COS(mmode*x)
+!
+!   Prolongate solution using periodicity
+!
+  ALLOCATE(bcoef(dim))
+  bcoef(1:nrank) = sol(1:nrank)
+  DO i=nrank+1,dim
+     bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
+  END DO
+!
+!   Interpolate field
+!
+  CALL gridval(splx, x, solcal, 0, bcoef)
+!
+  err = ABS(solcal-solana)
+  CALL putarr(fid, '/x', x)
+  CALL putarr(fid, '/solana', solana)
+  CALL putarr(fid, '/solcal', solcal)
+  CALL putarr(fid, '/err', err)
+!
+!   Compute discretization error norm by Gauss integration
+!
+  err_norm=0.0
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+  DO i=1,nx
+     CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
+     CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
+     solana(1:ngauss) = COS(mmode*xgauss)
+     err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
+          &          solana(1:ngauss)-solcal(1:ngauss))
+     err_norm = err_norm + SUM(wgauss*err(1:ngauss))
+  END DO
+  err_norm = SQRT(err_norm)
+  WRITE(*,'(a,1pe12.3)') 'Discretization error ',  err_norm
+!
+!===========================================================================
+!              4.0 Test of getrow/putrow, getcol/putcol and mcopy
+!
+  CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
+  ALLOCATE(arow(nrank), newsol(nrank))
+!
+  DO i=1,nrank
+     CALL getrow(mat, i, arow)
+     CALL putrow(newmat, i, arow)
+  END DO
+  CALL factor(newmat)
+  CALL bsolve(newmat, rhs, newsol)
+  WRITE(*,'(/a)') 'putrow/getrow ...'
+  WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
+  WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
+!
+  DO i=1,nrank
+     CALL getcol(mat, i, arow)
+     CALL putcol(newmat, i, arow)
+  END DO
+  CALL factor(newmat)
+  CALL bsolve(newmat, rhs, newsol)
+  WRITE(*,'(/a)') 'putcol/getcol ...'
+  WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
+  WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
+!
+  CALL clear_mat(newmat)
+  CALL mcopy(mat, newmat)
+  WRITE(*,'(/a)') 'mcopy ...'
+  newmat%val = (1000.0d0,0.0d0)*newmat%val
+  CALL factor(newmat)
+  CALL bsolve(newmat, rhs, newsol)
+  WRITE(*,'(a)') 'Backsolve the new system'
+  WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
+!
+  WRITE(*,'(a)') 'Destroy NEWMAT ...'
+  CALL destroy(newmat)
+!
+  CALL bsolve(mat, rhs, sol)
+  WRITE(*,'(/a)') 'Backsolve the old system'
+  WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol)
+!
+  WRITE(*,'(a)') 'Destroy MAT ...'
+  CALL destroy(mat)
+!!$!
+!!$  WRITE(*,'(/a)') 'Should crash since NEWMAT is gone!'
+!!$  CALL bsolve(newmat, rhs, newsol)
+!!$  WRITE(*,'(a)') 'Backsolve the new system'
+!!$  WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
+!===========================================================================
+!              9.0  Clean up
+!
+  DEALLOCATE(x, solcal, solana, err)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(xgrid)
+  DEALLOCATE(rhs, sol)
+  DEALLOCATE(arow, newsol)
+  CALL destroy_sp(splx)
+  CALL closef(fid)  
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE COMPLEX, INTENT(out) :: c(:)
+!
+    c(1) = alpha
+    idt(1) = 1
+    idw(1) = 1
+!
+    c(2) = -beta
+    idt(2) = 0
+    idw(2) = 0
+  END SUBROUTINE coefeq
+END PROGRAM main
diff --git a/examples/pde1dp_cmpl_wsmp.f90 b/examples/pde1dp_cmpl_wsmp.f90
new file mode 100644
index 0000000..1760563
--- /dev/null
+++ b/examples/pde1dp_cmpl_wsmp.f90
@@ -0,0 +1,436 @@
+!>
+!> @file pde1dp_cmpl_wsmp.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE pde1dp_cmpl_wsmp_mod
+  USE bsplines
+  USE wsmp_bsplines
+  IMPLICIT NONE
+!
+CONTAINS
+  SUBROUTINE disrhs(spl, rhs, mmode, alpha, beta)
+!
+  TYPE(spline1d) :: spl
+  INTEGER, INTENT(in) :: mmode
+  DOUBLE COMPLEX, INTENT(in) :: alpha, beta
+  DOUBLE COMPLEX, INTENT(out) :: rhs(:)   
+!
+  INTEGER :: dim, nrank, nx, nidbas, ngauss
+  INTEGER :: i, igauss, it, irow
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE COMPLEX :: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  nrank = SIZE(rhs)
+  CALL get_dim(spl, dim, nx, nidbas)
+!
+  ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  rhs(:) = 0.0d0
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        contrib = wgauss(igauss) * rhseq(xgauss(igauss))
+        DO it=0,nidbas
+           irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
+           rhs(irow) = rhs(irow) + contrib*fun(it,1)
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+CONTAINS
+  DOUBLE COMPLEX FUNCTION rhseq(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    DOUBLE PRECISION :: arg
+    arg = mmode*x
+    rhseq = (mmode**2*alpha-beta)*COS(arg)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat, alpha, beta)
+!
+  TYPE(spline1d) :: spl
+  TYPE(zwsmp_mat) :: mat
+  DOUBLE COMPLEX, INTENT(in) :: alpha, beta
+!
+  INTEGER :: dim, nx, nidbas, ngauss
+  INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE COMPLEX :: contrib
+!
+  INTEGER :: kterms                ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE COMPLEX, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, dim, nx, nidbas)
+  ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl, ngauss)
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  DO i=1,nx
+     CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+     DO igauss=1,ngauss
+        CALL basfun(xgauss(igauss), spl, fun, i)
+        CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+        DO iterm=1,kterms
+           DO jt=0,nidbas
+              DO iw=0,nidbas
+                 contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                      &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                 irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
+                 jcol=MODULO(i+jt-1,nx) + 1
+                 CALL updtmat(mat, irow, jcol, contrib)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(iderw, idert, coefs)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE COMPLEX, INTENT(out) :: c(SIZE(idt))
+!
+    c(1) = alpha
+    idt(1) = 1
+    idw(1) = 1
+!
+    c(2) = -beta
+    idt(2) = 0
+    idw(2) = 0
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of complex array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE COMPLEX, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2
+!
+    sum2 = DOT_PRODUCT(x,x)
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE meshdist(mmode, x)
+!
+!   Construct a 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+    INTEGER, INTENT(in) :: mmode
+    DOUBLE PRECISION, INTENT(inout) :: x(0:)
+    INTEGER :: nx, nintg
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+    DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+    INTEGER :: i, k
+!
+    nx = SIZE(x)-1
+    a=x(0)
+    b=x(nx)
+    nintg = 10*nx
+    ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+    dx = (b-a)/REAL(nintg)
+    xint(0) = a
+    fint(0) = 0.0d0
+    f1 = fdist(xint(0))
+    DO i=1,nintg
+       f0 = f1
+       xint(i) = xint(i-1) + dx
+       f1 = fdist(xint(i))
+       fint(i) = fint(i-1) + 0.5*(f0+f1)
+    END DO
+!
+!  Normalization
+!
+    scal = REAL(nx) / fint(nintg)
+    fint(0:nintg) = fint(0:nintg) * scal
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+    k = 1
+    DO i=1,nintg-1
+       IF( fint(i) .GE. REAL(k) ) THEN
+          x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+               &   (k-fint(i))
+          k = k+1
+       END IF
+    END DO
+!
+    DEALLOCATE(xint, fint)
+  CONTAINS
+    DOUBLE PRECISION FUNCTION fdist(x)
+      DOUBLE PRECISION, INTENT(in) :: x
+      fdist = 2.0 + COS(mmode*x)
+    END FUNCTION fdist
+  END SUBROUTINE meshdist
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde1dp_cmpl_wsmp_mod
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!
+PROGRAM main
+!
+!   1D complex PDE with periodic BC
+!
+  USE pde1dp_cmpl_wsmp_mod
+  USE futils
+!
+  IMPLICIT NONE
+  TYPE(spline1d) :: splx
+  TYPE(zwsmp_mat) :: mat
+  TYPE(zwsmp_mat) :: newmat
+  INTEGER :: kl, ku, nrank
+!
+  CHARACTER(len=128) :: file='pde1dp_cmpl_wsmp.h5'
+  INTEGER :: fid
+  INTEGER :: nx, nidbas, ngauss, mmode, npt, dim
+  LOGICAL :: nlequid
+  LOGICAL :: nlsym, nlherm, nlpos
+  DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
+  DOUBLE PRECISION :: dx
+  DOUBLE COMPLEX :: alpha, beta
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, x, err
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: sol, rhs, bcoef
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: newsol, arow
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: solcal, solana
+  DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+  DOUBLE PRECISION :: err_norm
+  INTEGER :: i
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, nlequid, alpha, beta, mmode, npt, &
+       &            nlsym, nlherm, nlpos
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8             ! Number oh intevals in x
+  nidbas = 3         ! Degree of splines
+  ngauss = 4         ! Number of Gauss points/interval
+  nlequid = .TRUE.   ! Use exact sol. as mesh dist. function if .FALSE.
+  mmode = 1          ! Fourier mode
+  alpha = (1.0, 1.0) ! Complex "diffusion"
+  beta  = 1.0
+  npt = 100
+  nlsym = .TRUE.    ! Is matrice symmetric
+  nlherm = .FALSE.  ! Is matrice hermitian
+  nlpos = .TRUE.    ! and positive definite ?
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  dx = 2.d0*pi/REAL(nx,8)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  IF( .NOT. nlequid ) THEN
+     CALL meshdist(mmode, xgrid)
+  END IF
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE1DP Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NIDBAS', nidbas)
+  CALL putarr(fid, '/xgrid', xgrid)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up periodic spline
+!
+  CALL set_spline(nidbas, ngauss, xgrid, splx, period=.TRUE.)
+  WRITE(*,'(a,l6)') 'nlequid =', nlequid
+  nrank = nx       ! Rank of the FE matrix
+!
+!   FE matrix assembly
+!
+  CALL init(nrank, 2, mat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
+  CALL get_dim(splx, dim)
+  WRITE(*,'(/a,4i6)') 'nrank, dim', nrank, dim
+  CALL dismat(splx, mat, alpha, beta)
+!
+!   RHS assembly
+!  
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(splx, rhs, mmode, alpha, beta)
+!
+  CALL putarr(fid, '/rhs', rhs)
+!
+!   Factor and solve
+!
+!!$  CALL factor(mat, nlmetis=.TRUE.)
+  CALL factor(mat)
+  CALL putmat(fid,'/MAT', mat)
+  CALL bsolve(mat, rhs, sol)
+  CALL putarr(fid, '/sol', sol)
+  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix       = ',get_count(mat)
+  WRITE(*,'(a,i8)')  'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
+!
+!   Compute residue
+!
+  WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(mat,sol)-rhs)
+!===========================================================================
+!              3.0   Check solution
+!
+!   Exact solution
+  ALLOCATE(x(0:npt), solcal(0:npt), solana(0:npt), err(0:npt))
+  dx=2.0d0*pi/REAL(npt,8)
+  x = (/ (i*dx, i=0,npt) /)
+  solana = COS(mmode*x)
+!
+!   Prolongate solution using periodicity
+!
+  ALLOCATE(bcoef(dim))
+  bcoef(1:nrank) = sol(1:nrank)
+  DO i=nrank+1,dim
+     bcoef(i) = bcoef(MODULO(i-1,nrank)+1)
+  END DO
+!
+!   Interpolate field
+!
+  CALL gridval(splx, x, solcal, 0, bcoef)
+!
+  err = ABS(solcal-solana)
+  CALL putarr(fid, '/x', x)
+  CALL putarr(fid, '/solana', solana)
+  CALL putarr(fid, '/solcal', solcal)
+  CALL putarr(fid, '/err', err)
+!
+!   Compute discretization error norm by Gauss integration
+!
+  err_norm=0.0
+  ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+  DO i=1,nx
+     CALL get_gauss(splx, ngauss, i, xgauss, wgauss)
+     CALL gridval(splx, xgauss, solcal(1:ngauss), 0)
+     solana(1:ngauss) = COS(mmode*xgauss)
+     err(1:ngauss) = DOT_PRODUCT(solana(1:ngauss)-solcal(1:ngauss), &
+          &          solana(1:ngauss)-solcal(1:ngauss))
+     err_norm = err_norm + SUM(wgauss*err(1:ngauss))
+  END DO
+  err_norm = SQRT(err_norm)
+  WRITE(*,'(a,1pe12.3)') 'Discretization error ',  err_norm
+!
+!===========================================================================
+!              4.0 Test of getrow/putrow, getcol/putcol and mcopy
+!
+  CALL init(nrank, 2, newmat, nlsym=nlsym, nlherm=nlherm, nlpos=nlpos)
+  ALLOCATE(arow(nrank), newsol(nrank))
+!
+  DO i=1,nrank
+     CALL getrow(mat, i, arow)
+     CALL putrow(newmat, i, arow)
+  END DO
+  CALL factor(newmat)
+  CALL bsolve(newmat, rhs, newsol)
+  WRITE(*,'(/a)') 'putrow/getrow ...'
+  WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
+  WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
+!
+  DO i=1,nrank
+     CALL getcol(mat, i, arow)
+     CALL putcol(newmat, i, arow)
+  END DO
+  CALL factor(newmat)
+  CALL bsolve(newmat, rhs, newsol)
+  WRITE(*,'(/a)') 'putcol/getcol ...'
+  WRITE(*,'(a,1pe12.4)') 'Residue =', norm2(vmx(newmat,newsol)-rhs)
+  WRITE(*,'(a,1pe12.3)') 'Error ',  norm2(sol-newsol)
+!
+  CALL clear_mat(newmat)
+  CALL mcopy(mat, newmat)
+  WRITE(*,'(/a)') 'mcopy ...'
+  newmat%val = (1000.0d0,0.0d0)*newmat%val
+  CALL factor(newmat)
+  CALL bsolve(newmat, rhs, newsol)
+  WRITE(*,'(a, i3)') 'Backsolve the system', newmat%matid
+  WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
+!
+  CALL bsolve(mat, rhs, sol)
+  WRITE(*,'(a, i3)') 'Backsolve the system', mat%matid
+  WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(sol)
+!
+  CALL bsolve(newmat, rhs, newsol)
+  WRITE(*,'(a, i3)') 'Backsolve the system', newmat%matid
+  WRITE(*,'(a,1pe12.4)') 'Norm =', norm2(newsol)
+!===========================================================================
+!              9.0  Clean up
+!
+  DEALLOCATE(x, solcal, solana, err)
+  DEALLOCATE(xgauss, wgauss)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(xgrid)
+  DEALLOCATE(rhs, sol)
+  DEALLOCATE(arow, newsol)
+  CALL destroy(mat)
+  CALL destroy(newmat)
+  CALL destroy_sp(splx)
+  CALL closef(fid)  
+END PROGRAM main
diff --git a/examples/pde1dp_mod.f90 b/examples/pde1dp_mod.f90
new file mode 100644
index 0000000..0a508ac
--- /dev/null
+++ b/examples/pde1dp_mod.f90
@@ -0,0 +1,225 @@
+!>
+!> @file pde1dp_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE pde1dp_mod
+!
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+  DOUBLE PRECISION, ALLOCATABLE :: bcoef(:)
+  TYPE(spline1d), SAVE :: splx
+!  
+CONTAINS 
+  SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function defined in FDIST
+!
+    DOUBLE PRECISION, INTENT(in) :: c(5)
+    INTEGER, INTENT(iN) :: nx
+    DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+    INTEGER :: nintg
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+    DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+    INTEGER :: i, k
+!
+    a=x(0)
+    b=x(nx)
+    nintg = 10*nx
+    ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+    dx = (b-a)/REAL(nintg)
+    xint(0) = a
+    fint(0) = 0.0d0
+    f1 = fdist(xint(0))
+    DO i=1,nintg
+       f0 = f1
+       xint(i) = xint(i-1) + dx
+       f1 = fdist(xint(i))
+       fint(i) = fint(i-1) + 0.5*(f0+f1)
+    END DO
+!
+!  Normalization
+!
+    scal = REAL(nx) / fint(nintg)
+    fint(0:nintg) = fint(0:nintg) * scal
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+    k = 1
+    DO i=1,nintg-1
+       IF( fint(i) .GE. REAL(k) ) THEN
+          x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+               &   (k-fint(i))
+          k = k+1
+       END IF
+    END DO
+!
+    DEALLOCATE(xint, fint)
+  CONTAINS
+    DOUBLE PRECISION FUNCTION fdist(x)
+      DOUBLE PRECISION, INTENT(in) :: x
+      fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+    END FUNCTION fdist
+  END SUBROUTINE meshdist
+!+++
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly FE matrix (with periodic BC) mat using spline spl
+!
+    TYPE(spline1d), INTENT(in) :: spl
+    TYPE(periodic_mat), INTENT(inout) :: mat
+    INTEGER :: dim, nx, nidbas, ngauss
+    INTEGER :: i, igauss, iterm, iw, jt, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+    DOUBLE PRECISION :: contrib
+!
+    INTEGER :: kterms                ! Number of terms in weak form
+    INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+    DOUBLE PRECISION, ALLOCATABLE :: coefs(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl, dim, nx, nidbas)
+    ALLOCATE(fun(0:nidbas,0:1)) ! Spline and its 1st derivative
+!
+!   Weak form
+!
+    kterms = mat%mat%nterms
+    ALLOCATE(idert(kterms), iderw(kterms), coefs(kterms))
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl, ngauss)
+    ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    DO i=1,nx
+       CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+       DO igauss=1,ngauss
+          CALL basfun(xgauss(igauss), spl, fun, i)
+          CALL coefeq(xgauss(igauss), idert, iderw, coefs)
+          DO iterm=1,kterms
+             DO jt=0,nidbas
+                DO iw=0,nidbas
+                   contrib = fun(jt,idert(iterm)) * coefs(iterm) * &
+                        &    fun(iw,iderw(iterm)) * wgauss(igauss)
+                   irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
+                   jcol=MODULO(i+jt-1,nx) + 1
+                   CALL updtmat(mat, irow, jcol, contrib)
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(fun)
+    DEALLOCATE(xgauss, wgauss)
+    DEALLOCATE(iderw, idert, coefs)
+  END SUBROUTINE dismat
+!
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt))
+!
+! Mass matrix
+!
+    c(1) = 1.0d0
+    idt(1) = 0
+    idw(1) = 0
+  END SUBROUTINE coefeq
+!+++
+  SUBROUTINE disrhs(spl, rhs)
+!
+!   Assenbly the RHS using spline spl
+!
+    TYPE(spline1d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+    INTEGER :: dim, nrank, nx, nidbas, ngauss
+    INTEGER :: i, igauss, it, irow
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    nrank = SIZE(rhs)
+    CALL get_dim(spl, dim, nx, nidbas)
+!
+    ALLOCATE(fun(0:nidbas,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl, ngauss)
+    ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    rhs(:) = 0.0d0
+!
+    DO i=1,nx
+       CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+       DO igauss=1,ngauss
+          CALL basfun(xgauss(igauss), spl, fun, i)
+          contrib = wgauss(igauss) * rhseq(xgauss(igauss))
+          DO it=0,nidbas
+             irow=MODULO(i+it-1,nx) + 1   ! Periodic BC
+             rhs(irow) = rhs(irow) + contrib*fun(it,1)
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(fun)
+    DEALLOCATE(xgauss, wgauss)
+  END SUBROUTINE disrhs
+!
+DOUBLE PRECISION FUNCTION rhseq(x)
+  DOUBLE PRECISION, INTENT(in) :: x
+  DOUBLE PRECISION :: xarr(1), farr(1)
+  INTEGER, SAVE :: icall =0
+  xarr(1) = x
+  IF( icall.EQ.0 ) THEN
+     icall = icall+1
+     CALL gridval(splx, xarr, farr, 0, bcoef)
+  ELSE
+     CALL gridval(splx, xarr, farr, 0)
+  END IF
+  rhseq = farr(1)
+END FUNCTION rhseq
+
+END MODULE pde1dp_mod
diff --git a/examples/pde2d.f90 b/examples/pde2d.f90
new file mode 100644
index 0000000..b881711
--- /dev/null
+++ b/examples/pde2d.f90
@@ -0,0 +1,409 @@
+!>
+!> @file pde2d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 2d PDE using splines:
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+  USE bsplines
+  USE matrix
+  USE futils
+  USE conmat_mod
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  LOGICAL :: nlppform, nlconmat
+  INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  TYPE(spline2d) :: splxy
+  TYPE(gbmat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  INTEGER :: nits=500
+!
+  INTERFACE
+     SUBROUTINE dismat(spl, mat)
+       USE bsplines
+       USE matrix
+       TYPE(spline2d), INTENT(in) :: spl
+       TYPE(gbmat), INTENT(inout) :: mat
+     END SUBROUTINE dismat
+     SUBROUTINE disrhs(mbess, spl, rhs)
+       USE bsplines
+       INTEGER, INTENT(in) :: mbess
+       TYPE(spline2d), INTENT(in) :: spl
+       DOUBLE PRECISION, INTENT(out) :: rhs(:)
+     END SUBROUTINE disrhs
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+     SUBROUTINE ibcmat(mat, ny)
+       USE matrix
+       TYPE(gbmat), INTENT(inout) :: mat
+       INTEGER, INTENT(in) :: ny
+     END SUBROUTINE ibcmat
+     SUBROUTINE ibcrhs(rhs, ny)
+       DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+       INTEGER, INTENT(in) :: ny
+     END SUBROUTINE ibcrhs
+!!$     SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
+!!$       DOUBLE PRECISION, INTENT(in) :: x, y
+!!$       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+!!$       DOUBLE PRECISION, INTENT(out) :: c(:)
+!!$     END SUBROUTINE coefeq_poisson
+  END INTERFACE
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlconmat, &
+       &            coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlconmat = .TRUE.   ! Use CONMAT instead of DISMAT
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'MBESS', mbess)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
+  ku = kl                      ! Number of super-diagnonals
+  WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
+!
+  CALL init(kl, ku, nrank, nterms, mat)
+  t0 = seconds()
+  IF(nlconmat) THEN
+     CALL conmat(splxy, mat, coefeq_poisson)
+  ELSE
+     CALL dismat(splxy, mat)
+  END IF
+  tmat = seconds() - t0
+  CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice')
+  ALLOCATE(arr(nrank))
+!
+!   BC on Matrix
+!
+  IF(nrank.LT.100) &
+       & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(kl+ku+1,:)
+  CALL ibcmat(mat, ny)
+  IF(nrank.LT.100) &
+       &   WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(kl+ku+1,:)
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, ny)
+
+  CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  CALL putmat(fid, '/MAT1', mat, 'GB matrice with BC')
+  WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL factor(mat)
+  tfact = seconds() - t0
+  gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
+
+  t0 = seconds()
+  CALL bsolve(mat, rhs, sol)
+!
+!   Backtransform of solution
+!
+  sol(1:ny-1) = sol(ny)
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
+!
+  tsolv = seconds() - t0
+  gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+  errsol = solana - solcal
+  IF( SIZE(bcoef,2) .LE. 10 ) THEN
+     CALL prnmat('BCOEF', bcoef)
+     CALL prnmat('SOLANA', solana)
+     CALL prnmat('SOLCAL', solcal)
+     CALL prnmat('ERRSOL', errsol)
+  END IF
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = -2.0d0 * xgrid(i)
+        ELSE
+           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!===========================================================================
+!              9.0  Epilogue
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+    c(1) = x        ! 
+    idt(1,1) = 1
+    idt(1,2) = 0
+    idw(1,1) = 1
+    idw(1,2) = 0
+!
+    c(2) = 1.d0/x
+    idt(2,1) = 0
+    idt(2,2) = 1
+    idw(2,1) = 0
+    idw(2,2) = 1
+  END SUBROUTINE coefeq_poisson
+!
+!+++
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/pde2d_mumps.f90 b/examples/pde2d_mumps.f90
new file mode 100644
index 0000000..772fce2
--- /dev/null
+++ b/examples/pde2d_mumps.f90
@@ -0,0 +1,937 @@
+!>
+!> @file pde2d_mumps.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the 2d PDE using splines and MUMPS non-symmetric and symmetric 
+!  matrix
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+MODULE pde2d_mumps_mod
+  USE bsplines
+  USE mumps_bsplines
+  IMPLICIT NONE
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in)       :: spl
+    TYPE(mumps_mat), INTENT(inout) :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+    DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
+    INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!
+    INTEGER :: istart, iend
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+!!$    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+!!$    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+    ALLOCATE(coefs(kterms,ng1,ng2))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
+!
+!   Matrix partition
+!
+    istart = mat%istart
+    iend   = mat%iend
+!===========================================================================
+!              2.0 Assembly loop
+!
+    ALLOCATE(left1(ng1))
+    ALLOCATE(left2(ng2))
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       left1 = i
+       CALL basfun(xg1, spl%sp1, fun1, left1)
+       DO j=1,n2
+          CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+          left2 = j
+          CALL basfun(xg2, spl%sp2, fun2, left2)
+!
+          DO ig1=1,ng1
+             DO ig2=1,ng2
+                CALL coefeq(xg1(ig1), xg2(ig2), &
+                     &      idert(:,:,ig1,ig2), &
+                     &      iderw(:,:,ig1,ig2), &
+                     &      coefs(:,ig1,ig2))
+             END DO
+          END DO
+!
+          DO iw1=0,nidbas1  ! Weight function in dir 1
+             igw1 = i+iw1
+             DO iw2=0,nidbas2  ! Weight function in dir 2
+                igw2 = MODULO(j+iw2-1, n2) + 1
+                irow = igw2 + (igw1-1)*n2
+                IF( irow.GE.istart .AND. irow.LE.iend) THEN
+                   DO it1=0,nidbas1  ! Test function in dir 1
+                      igt1 = i+it1
+                      DO it2=0,nidbas2  ! Test function in dir 2
+                         igt2 = MODULO(j+it2-1, n2) + 1
+                         jcol = igt2 + (igt1-1)*n2
+!-------------
+                         contrib = 0.0d0
+                         DO ig1=1,ng1
+                            DO ig2=1,ng2
+                               DO iterm=1,kterms
+                                  contrib = contrib + &
+                                       &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                                       &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
+                                       &    coefs(iterm,ig1,ig2) *                 &
+                                       &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
+                                       &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                                       &    wg1(ig1) * wg2(ig2)
+                               END DO
+                            END DO
+                         END DO
+                         CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                      END DO
+                   END DO
+                END IF
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+    DEALLOCATE(left1,left2)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+      !
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    INTEGER, INTENT(in) :: mbess
+    TYPE(spline2d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2
+      INTEGER, INTENT(in) :: m
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(mumps_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = mat%rank
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    ALLOCATE(zsum(nrank), arr(nrank))
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       zsum(:) = zsum(:) + arr(:)
+    END DO
+    IF(mat%nlsym) THEN
+       zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
+    END IF
+    CALL putrow(mat, ny, zsum)
+!
+!   The horizontal sum on the NY-th column
+!
+    IF( .NOT.mat%nlsym) THEN
+       zsum = 0.0d0
+       DO j=1,ny
+          arr = 0.0d0
+          CALL getcol(mat, j, arr)
+          zsum(ny:) = zsum(ny:) + arr(ny:)
+       END DO
+       CALL putcol(mat, ny, zsum)
+    END IF
+!
+!   The away operator
+!
+    IF( .NOT.mat%nlsym) THEN
+       DO j = 1,ny-1
+          arr = 0.0d0; arr(j) = 1.0d0
+          CALL putcol(mat, j, arr)     
+       END DO
+    END IF
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+    DEALLOCATE(zsum)
+    DEALLOCATE(arr)
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+       ALLOCATE(arr(nrank))
+       IF( .NOT.mat%nlsym) THEN
+          DO j = nrank, nrank-ny+1, -1
+             arr = 0.0d0; arr(j) = 1.0d0
+             CALL putcol(mat, j, arr)     
+          END DO
+       END IF
+!
+       DO i = nrank, nrank-ny+1, -1
+          arr = 0.0d0; arr(i) = 1.0d0
+          CALL putrow(mat, i, arr)     
+       END DO
+       DEALLOCATE(arr)
+!===========================================================================
+!              9.0  Epilogue
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-ny+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE write_matrix(lun, mat, comm)
+!
+!   Write the distribute matrix to (single) file
+!
+    INCLUDE 'mpif.h'
+!
+    INTEGER             :: lun
+    TYPE(mumps_mat)     :: mat
+    INTEGER, INTENT(in) :: comm
+!
+    INTEGER :: nprocs, me, ierr
+    INTEGER :: nrank, nnz, nnz_loc, istart, iend, nloc
+    INTEGER :: i
+    INTEGER, ALLOCATABLE :: displs(:), nlocs(:), cols(:), irow(:)
+    DOUBLE PRECISION, ALLOCATABLE :: val(:)
+!
+    CALL mpi_comm_size(comm, nprocs, ierr)
+    CALL mpi_comm_rank(comm, me, ierr)
+!
+    IF(.NOT.ASSOCIATED(mat%val)) THEN
+       WRITE(*,'(a)') 'WRITE_MATRIX: MUMPS matrix does not exist!'
+       STOP
+    END IF
+!
+!    Info on matrix
+!
+!!$    IF(me.EQ.0) THEN
+!!$       s0 = mat%nnz_start-1
+!!$       DO i=mat%istart,mat%iend
+!!$          s=mat%irow(i)-s0
+!!$          e=mat%irow(i+1)-1-s0
+!!$          WRITE(*,'(a,i6,1pe12.3)') 'nnz, Sum(val)', e-s+1, SUM(mat%val(s:e))
+!!$       END DO
+!!$    END IF
+!
+    nrank = mat%rank
+    nnz_loc = mat%nnz_loc
+    nnz = mat%nnz
+    istart = mat%istart
+    iend = mat%iend
+!
+    IF(me.EQ.0) THEN
+       WRITE(lun) nrank, nnz
+    END IF
+!
+!   Write irow
+!
+    nloc = iend-istart+1
+    IF (me.EQ.0) THEN
+       ALLOCATE(displs(0:nprocs))
+       ALLOCATE(nlocs(0:nprocs-1))
+       ALLOCATE(irow(nrank+1))
+    END IF
+    CALL mpi_gather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, 0, comm, ierr)
+    IF(me.EQ.0) THEN
+       displs(0) = 0
+       DO i=0,nprocs-1
+          displs(i+1) = displs(i)+nlocs(i)
+       END DO
+    END IF
+    CALL mpi_gatherv(mat%irow, nloc, MPI_INTEGER, &
+         &           irow, nlocs, displs, MPI_INTEGER, 0, comm, ierr)
+    IF(me.EQ.0) THEN
+       irow(nrank+1) = nnz+1
+       WRITE(lun) irow
+       DEALLOCATE(irow)
+    END IF
+!
+!   Write cols
+!
+    nloc = mat%nnz_loc
+    IF(me.EQ.0) THEN
+       ALLOCATE(cols(nnz))
+    END IF
+    CALL mpi_gather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, 0, comm, ierr)
+    IF(me.EQ.0) THEN
+       displs(0) = 0
+       DO i=0,nprocs-1
+          displs(i+1) = displs(i)+nlocs(i)
+       END DO
+    END IF
+    CALL mpi_gatherv(mat%cols, nloc, MPI_INTEGER, &
+         &           cols, nlocs, displs, MPI_INTEGER, 0, comm, ierr)
+    IF(me.EQ.0) THEN
+       WRITE(lun) cols
+       DEALLOCATE(cols)
+    END IF
+!
+!   Write val (Same data partition as "cols"
+!
+    IF(me.EQ.0) THEN
+       ALLOCATE(val(nnz))
+    END IF
+    CALL mpi_gatherv(mat%val, nloc, MPI_DOUBLE_PRECISION, &
+         &           val, nlocs, displs, MPI_DOUBLE_PRECISION, 0, comm, ierr)
+    IF(me.EQ.0) THEN
+       WRITE(lun) val
+       DEALLOCATE(val)
+    END IF
+!
+!  Epilogue
+!
+    IF(me.EQ.0) THEN
+       DEALLOCATE(displs, nlocs)
+    END IF
+  END SUBROUTINE write_matrix
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_mumps_mod
+PROGRAM main
+  USE pde2d_mumps_mod
+  USE futils
+!
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  LOGICAL :: debug_mumps=.FALSE.
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
+  TYPE(spline2d) :: splxy
+  TYPE(mumps_mat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d_mumps.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1
+  DOUBLE PRECISION :: tconv, treord
+  INTEGER :: nits=100
+  LOGICAL :: nlsym, nlpos
+  LOGICAL :: nlmetis, nlforce_zero
+  LOGICAL :: nlserial
+!
+  INTEGER :: ierr, me
+  INTEGER(kind=8) :: nzfact
+  DOUBLE PRECISION :: mem_loc
+!
+  CHARACTER(len=128) :: matfile=''
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlsym, nlpos,&
+       &            nlmetis, nlforce_zero, nlserial, coefx, coefy, matfile, &
+       &            debug_mumps
+!===========================================================================
+!              1.0 Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlsym    = .FALSE.  ! Symmetric or unsymmetric matrix
+  nlpos    = .TRUE.   ! Positive definite matrix
+  nlmetis = .FALSE.   ! Use metis ordering or minimum degree
+  nlserial = .TRUE.   ! Serial. The solver is duplicated on each process. Otherwise
+                      ! the solver matrix is partionned among the processes.
+  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  matfile = ''        ! Save matrix file to matfile if not empty
+!
+  IF(me.EQ.0) THEN
+     READ(*,newrun)
+     WRITE(*,newrun)
+  END IF
+  CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlpos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlmetis, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(debug_mumps, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlserial, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+!   Create hdf5 file
+!
+  IF(me.EQ.0) THEN
+     CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+     CALL attach(fid, '/', 'NX', nx)
+     CALL attach(fid, '/', 'NY', ny)
+     CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+     CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+     CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+     CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+     CALL attach(fid, '/', 'MBESS', mbess)
+  END IF
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  IF(me.EQ.0) WRITE(*,'(a,i8)') 'nrank', nrank
+!
+  IF(nlserial) THEN   ! The solver is duplicated
+     CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero,&
+          &    nlsym=nlsym, nlpos=nlpos)
+  ELSE  ! The solver is distributed
+     CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero,&
+          &    nlsym=nlsym, nlpos=nlpos, comm_in=MPI_COMM_WORLD)
+  END IF
+  mat%mumps_par%ICNTL(23) = 400
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a/(20i6))') 'ICNTL =', mat%mumps_par%ICNTL
+  END IF
+  WRITE(*,'(a,i4.4,a,3i16)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, &
+       &        mat%iend-mat%istart+1
+!
+  CALL dismat(splxy, mat)
+!
+!   BC on Matrix
+!
+  CALL ibcmat(mat, ny)
+  tmat = seconds() - t0
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, ny)
+!
+  mem_loc = mem()
+  CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT')
+  IF(me.EQ.0) THEN
+     CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  END IF
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+
+  t0 = seconds()
+  CALL to_mat(mat)
+  WRITE(*,'(a/(10i6))') 'MInmax IRN_loc', MINVAL(mat%mumps_par%IRN_loc), MAXVAL(mat%mumps_par%IRN_loc)
+  WRITE(*,'(a/(10i6))') 'JCN_loc', MINVAL(mat%mumps_par%JCN_loc), MAXVAL(mat%mumps_par%JCN_loc)
+  tconv = seconds() -t0
+  CALL minmax_i(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz')
+  IF(me.EQ.0) THEN 
+     WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz
+  END IF
+!
+!   Write Matrix and RHS to file
+!
+  IF(LEN_TRIM(matfile).GT.0) THEN 
+     IF(me.EQ.0) THEN
+        OPEN(99, file=matfile, form='unformatted')
+     END IF
+     CALL write_matrix(99, mat, MPI_COMM_WORLD)
+  END IF
+!
+  t0 = seconds()
+  CALL reord_mat(mat, nlmetis=nlmetis, debug=debug_mumps)
+  treord = seconds() - t0
+!
+  t0 = seconds()
+  CALL numfact(mat, debug=debug_mumps)
+  tfact = seconds() - t0
+!
+!
+  mem_loc = mem()
+  CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after FACTOR')
+  IF(me.EQ.0) THEN 
+     nzfact = mat%mumps_par%INFOG(29)
+     IF(nzfact<0) THEN
+        nzfact = -nzfact*1000000
+     END IF
+     WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A  = ',nzfact
+     WRITE(*,'(a,f12.2)')  'Number of factorization MFLOPS      = ',&
+          &               mat%mumps_par%RINFOG(3)/1.e6
+  END IF
+  gflops1 = mat%mumps_par%RINFOG(3) / tfact / 1.d9
+!
+  CALL bsolve(mat, rhs, sol, debug=debug_mumps)
+!
+  IF(LEN_TRIM(matfile).GT.0) THEN
+     IF(me.EQ.0) THEN
+        WRITE(99) rhs
+        WRITE(99) sol
+        CLOSE(99)
+     END IF
+  END IF
+!
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL bsolve(mat, rhs, sol)
+     sol(1:ny-1) = sol(ny)
+  END DO
+  tsolv = (seconds() - t0)/REAL(nits)
+!
+  mem_loc = mem()
+  CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE')
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  IF(me.EQ.0) THEN 
+     WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
+     CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+  END IF
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  IF(me.EQ.0) THEN
+     ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+     DO i=0,nx
+        DO j=0,ny
+           solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+        END DO
+     END DO
+     jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+     WRITE(*,'(/a)') '*** Checking solutions'
+     t0 = seconds()
+     DO it=1,nits   ! nits iterations for timing
+        CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+     END DO
+     tgrid = (seconds() - t0)/REAL(nits)
+     errsol = solana - solcal
+     IF( SIZE(bcoef,2) .LE. 10 ) THEN
+        CALL prnmat('BCOEF', bcoef)
+        CALL prnmat('SOLANA', solana)
+        CALL prnmat('SOLCAL', solcal)
+        CALL prnmat('ERRSOL', errsol)
+     END IF
+     WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+          &    norm2(errsol) / norm2(solana)
+     WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+     CALL putarr(fid, '/xgrid', xgrid, 'r')
+     CALL putarr(fid, '/ygrid', ygrid, '\theta')
+     CALL putarr(fid, '/sol', solcal, 'Solutions')
+     CALL putarr(fid, '/solana', solana,'Exact solutions')
+     CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+     WRITE(*,'(/a)') '*** Checking gradient'
+     DO i=0,nx
+        DO j=0,ny
+           IF( mbess .EQ. 0 ) THEN
+              solana(i,j) = -2.0d0 * xgrid(i)
+           ELSE
+              solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+                   &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+           END IF
+        END DO
+     END DO
+!
+     jder = (/1,0/)
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+     errsol = solana - solcal
+     CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+     WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+     DO i=0,nx
+        DO j=0,ny
+           solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+        END DO
+     END DO
+!
+     jder = (/0,1/)
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+     CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+     errsol = solana - solcal
+     WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!
+     WRITE(*,'(/a)') '---'
+     WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+     WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
+     WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
+     WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+     WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
+     WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+     WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+     DEALLOCATE(solcal, solana, errsol)
+  END IF
+!===========================================================================
+!              5.0 Clear the matrix and recompute
+!
+  IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...'
+  t0 = seconds()
+  CALL clear_mat(mat)
+  CALL dismat(splxy, mat)
+  CALL ibcmat(mat, ny)
+  tmat = seconds()-t0
+!
+  t0 = seconds()
+  CALL numfact(mat, debug=debug_mumps)
+  tfact = seconds()-t0
+  gflops1 = mat%mumps_par%RINFOG(3) / tfact / 1.d9
+!
+  t0 = seconds()
+  ALLOCATE(newsol(nrank))
+  CALL bsolve(mat, rhs, newsol)
+  newsol(1:ny-1) = newsol(ny)
+  tsolv = seconds()-t0
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
+     WRITE(*,'(/a)') '---'
+     WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+     WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+     WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+     WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
+     WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+  END IF
+!
+  DEALLOCATE(newsol)
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(bcoef)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+  IF(me.EQ.0) CALL closef(fid)
+  CALL mpi_finalize(ierr)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+!
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+!
+  SUBROUTINE minmax_i(k, comm, str)
+    CHARACTER(len=*), INTENT(in) :: str
+    INTEGER, INTENT(in)          :: k
+    INTEGER, INTENT(in)          :: comm
+    INTEGER :: me, ierr, kmin, kmax
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr)
+    CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr)
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
+    END IF
+  END SUBROUTINE minmax_i
+!
+  SUBROUTINE minmax_r(x, comm, str)
+    CHARACTER(len=*), INTENT(in) :: str
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in)          :: comm
+    INTEGER :: me, ierr
+    DOUBLE PRECISION ::  xmin, xmax
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
+    CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
+    END IF
+  END SUBROUTINE minmax_r
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/pde2d_nh.f90 b/examples/pde2d_nh.f90
new file mode 100644
index 0000000..ee5a008
--- /dev/null
+++ b/examples/pde2d_nh.f90
@@ -0,0 +1,684 @@
+!>
+!> @file pde2d_nh.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the following 2d PDE using splines:
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my),
+!     with BC: f(x=1,y) = cos(y)
+!
+!    Exact solution: f(x,y) = (1-x^2) x^m cos(my) + x*cos(y)
+!
+MODULE pde2d_nh_mod
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+!
+  LOGICAL :: nlfix
+CONTAINS
+  SUBROUTINE dismat(spl, mat)
+ !
+ !   Assembly of FE matrix mat using spline spl
+ !
+    TYPE(spline2d), INTENT(in) :: spl
+    TYPE(gbmat), INTENT(inout) :: mat
+ !
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+ !
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw  ! Derivative order
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE  :: coefs ! Terms in weak form
+ !===========================================================================
+ !              1.0 Prologue
+ !
+ !   Properties of spline space
+ !
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+ !
+ !   Weak form
+ !
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
+ !
+    ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1)) ! 
+ !
+ !   Gauss quadature
+ !
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng2), wg2(ng2))
+ !===========================================================================
+ !              2.0 Assembly loop
+ !
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
+                DO iterm=1,kterms
+                   DO iw1=0,nidbas1  ! Weight function in dir 1
+                      igw1 = i+iw1
+                      DO iw2=0,nidbas2  ! Weight function in dir 2
+                         igw2 = MODULO(j+iw2-1, n2) + 1
+                         irow = igw2 + (igw1-1)*n2
+                         DO it1=0,nidbas1  ! Test function in dir 1
+                            igt1 = i+it1
+                            DO it2=0,nidbas2  ! Test function in dir 2
+                               igt2 = MODULO(j+it2-1, n2) + 1
+                               jcol = igt2 + (igt1-1)*n2
+                               contrib = fun1(iw1,iderw(iterm,1)) * &
+                                    &    fun2(iw2,iderw(iterm,2)) * &
+                                    &    coefs(iterm) *             &
+                                    &    fun2(it2,idert(iterm,2)) * &
+                                    &    fun1(it1,idert(iterm,1)) * &
+                                    &    wg1(ig1) * wg2(ig2)
+                               CALL updtmat(mat, irow, jcol, contrib)
+                            END DO
+                         END DO
+                      END DO
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+ !===========================================================================
+ !              9.0  Epilogue
+ !
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+ !
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+!
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    IMPLICIT NONE
+    INTEGER, INTENT(in) :: mbess
+    TYPE(spline2d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng2), wg2(ng2))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2
+      INTEGER, INTENT(in) :: m
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, spl)
+!
+!   Apply BC on matrix
+!
+    IMPLICIT NONE
+    TYPE(gbmat), INTENT(inout) :: mat
+    TYPE(spline2d) :: spl
+    INTEGER :: nx, ndim1, nidbas1
+    INTEGER :: ny, ndim2, nidbas2
+    INTEGER :: kl, ku, nrank, i, j
+    INTEGER :: krow, kcol, jf
+    DOUBLE PRECISION :: yg
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:), fun(:,:)
+!===========================================================================
+!              1.0 Prologue
+!
+    CALL get_dim(spl%sp1, ndim1, nx, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, ny, nidbas2)
+!
+    kl = mat%kl
+    ku = mat%ku
+    nrank = mat%rank
+    ALLOCATE(zsum(nrank), arr(nrank))
+    ALLOCATE(fun(0:nidbas2,1))
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       DO j=1,ny+ku
+          zsum(j) = zsum(j) + arr(j)
+       END DO
+    END DO
+    CALL putrow(mat, ny, zsum)
+!
+!   The horizontal sum on the NY-th column
+!
+    zsum = 0.0d0
+    DO j=1,ny
+       arr = 0.0d0
+       CALL getcol(mat, j, arr)
+       DO i=ny,ny+kl
+          zsum(i) = zsum(i) + arr(i)
+       END DO
+    END DO
+    CALL putcol(mat, ny, zsum)
+!
+!   The away operator
+!
+    DO j = 1,ny-1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    i=nx+nidbas1     ! The last spline in X
+    DO j=1,ny
+       krow=(i-1)*ny+j
+       IF(MODULO(nidbas2,2) .EQ. 0 .AND. nlfix) THEN
+          yg = (spl%sp2%knots(j-1)+spl%sp2%knots(j))/2.0d0
+       ELSE
+          yg = spl%sp2%knots(j-1)
+       END IF
+       CALL basfun(yg, spl%sp2, fun, j)
+       arr = 0.0d0
+       DO jf=0,nidbas2
+          kcol=(i-1)*ny + MODULO(jf+j-1,ny)+1
+          arr(kcol) = arr(kcol)+fun(jf,1)
+       END DO
+       CALL putrow(mat, krow, arr)
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(zsum, arr)
+    DEALLOCATE(fun)
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, spl)
+!
+!   Apply BC on RHS
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    TYPE(spline2d) :: spl
+    INTEGER :: nx, ndim1, nidbas1
+    INTEGER :: ny, ndim2, nidbas2
+    INTEGER :: nrank
+    INTEGER :: i, j, k
+    DOUBLE PRECISION :: xg, yg, zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    CALL get_dim(spl%sp1, ndim1, nx, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, ny, nidbas2)
+    nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    i = nx+nidbas1          ! The last spline index on x
+    xg = spl%sp1%knots(nx)  ! Right boundary radial coordinate
+    DO j=1,ny
+       k = (i-1)*ny + j
+       IF(MODULO(nidbas2,2) .EQ. 0 .AND. nlfix) THEN
+          yg = (spl%sp2%knots(j-1)+spl%sp2%knots(j))/2.0d0
+       ELSE
+          yg = spl%sp2%knots(j-1)
+       END IF
+       rhs(k) = xg*COS(yg)
+    END DO
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION, INTENT(in) :: c(5)
+    INTEGER, INTENT(iN) :: nx
+    DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+    INTEGER :: nintg
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+    DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+    INTEGER :: i, k
+!
+    a=x(0)
+    b=x(nx)
+    nintg = 10*nx
+    ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+    dx = (b-a)/REAL(nintg)
+    xint(0) = a
+    fint(0) = 0.0d0
+    f1 = fdist(xint(0))
+    DO i=1,nintg
+       f0 = f1
+       xint(i) = xint(i-1) + dx
+       f1 = fdist(xint(i))
+       fint(i) = fint(i-1) + 0.5*(f0+f1)
+    END DO
+!
+!  Normalization
+!
+    scal = REAL(nx) / fint(nintg)
+    fint(0:nintg) = fint(0:nintg) * scal
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+    k = 1
+    DO i=1,nintg-1
+       IF( fint(i) .GE. REAL(k) ) THEN
+          x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+               &   (k-fint(i))
+          k = k+1
+       END IF
+    END DO
+    DEALLOCATE(xint, fint)
+  CONTAINS
+    DOUBLE PRECISION FUNCTION fdist(x)
+      DOUBLE PRECISION, INTENT(in) :: x
+      fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+    END FUNCTION fdist
+  END SUBROUTINE meshdist
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_nh_mod
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+PROGRAM main
+!
+  USE pde2d_nh_mod
+  USE bsplines
+  USE matrix
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  TYPE(spline2d) :: splxy
+  TYPE(gbmat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d_nh.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  INTEGER :: nits=500
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, coefx, coefy, nlfix
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  nlfix = .TRUE.      ! Fix or not for even nidbas2
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'MBESS', mbess)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
+  ku = kl                      ! Number of super-diagnonals
+  WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
+!
+  CALL init(kl, ku, nrank, nterms, mat)
+  CALL dismat(splxy, mat)
+  CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice')
+  ALLOCATE(arr(nrank))
+!
+!   BC on Matrix
+!
+  IF(nrank.LT.100) &
+       & WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(kl+ku+1,:)
+  CALL ibcmat(mat, splxy)
+  tmat = seconds() - t0
+  IF(nrank.LT.100) &
+       &   WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(kl+ku+1,:)
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, splxy)
+
+  CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  CALL putmat(fid, '/MAT1', mat, 'GB matrice with BC')
+  WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL factor(mat)
+  tfact = seconds() - t0
+  gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
+
+  t0 = seconds()
+  CALL bsolve(mat, rhs, sol)
+!
+!   Backtransform of solution
+!
+  sol(1:ny-1) = sol(ny)
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
+!
+  tsolv = seconds() - t0
+  gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j)) &
+             &        + xgrid(i)*COS(ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+  errsol = solana - solcal
+  IF( SIZE(bcoef,2) .LE. 10 ) THEN
+     CALL prnmat('BCOEF', bcoef)
+     CALL prnmat('SOLANA', solana)
+     CALL prnmat('SOLCAL', solcal)
+     CALL prnmat('ERRSOL', errsol)
+  END IF
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = -2.0d0 * xgrid(i)
+        ELSE
+           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) &
+             &          + COS(ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+  CALL putarr(fid, '/errors_x', errsol, 'Errors in d/dx')
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
+             &        SIN(mbess*ygrid(j)) &
+             &        -xgrid(i)*SIN(ygrid(j))
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+  errsol = solana - solcal
+  CALL putarr(fid, '/errors_y', errsol, 'Errors in d/dy')
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!===========================================================================
+!              9.0  Epilogue
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+!+++
diff --git a/examples/pde2d_pardiso.f90 b/examples/pde2d_pardiso.f90
new file mode 100644
index 0000000..276f727
--- /dev/null
+++ b/examples/pde2d_pardiso.f90
@@ -0,0 +1,741 @@
+!>
+!> @file pde2d_pardiso.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the 2d PDE using splines and PARDISO non-symmetric matrix
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+MODULE pde2d_pardiso_mod
+  USE bsplines
+  USE pardiso_bsplines
+  IMPLICIT NONE
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in)       :: spl
+    TYPE(pardiso_mat), INTENT(inout) :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+    DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
+    INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+    ALLOCATE(coefs(kterms,ng1,ng2))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
+!===========================================================================
+!              2.0 Assembly loop
+!
+    ALLOCATE(left1(ng1))
+    ALLOCATE(left2(ng2))
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       left1 = i
+       CALL basfun(xg1, spl%sp1, fun1, left1)
+       DO j=1,n2
+          CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+          left2 = j
+          CALL basfun(xg2, spl%sp2, fun2, left2)
+!
+          DO ig1=1,ng1
+             DO ig2=1,ng2
+                CALL coefeq(xg1(ig1), xg2(ig2), &
+                     &      idert(:,:,ig1,ig2), &
+                     &      iderw(:,:,ig1,ig2), &
+                     &      coefs(:,ig1,ig2))
+             END DO
+          END DO
+!
+          DO iw1=0,nidbas1  ! Weight function in dir 1
+             igw1 = i+iw1
+             DO iw2=0,nidbas2  ! Weight function in dir 2
+                igw2 = MODULO(j+iw2-1, n2) + 1
+                irow = igw2 + (igw1-1)*n2
+                DO it1=0,nidbas1  ! Test function in dir 1
+                   igt1 = i+it1
+                   DO it2=0,nidbas2  ! Test function in dir 2
+                      igt2 = MODULO(j+it2-1, n2) + 1
+                      jcol = igt2 + (igt1-1)*n2
+!-------------
+                      contrib = 0.0d0
+                      DO ig1=1,ng1
+                         DO ig2=1,ng2
+                            DO iterm=1,kterms
+                               contrib = contrib + &
+                                    &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                                    &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
+                                    &    coefs(iterm,ig1,ig2) *                 &
+                                    &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
+                                    &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                                    &    wg1(ig1) * wg2(ig2)
+                            END DO
+                         END DO
+                      END DO
+                      CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+    DEALLOCATE(left1,left2)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+      !
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    INTEGER, INTENT(in) :: mbess
+    TYPE(spline2d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2
+      INTEGER, INTENT(in) :: m
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(pardiso_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = mat%rank
+    ALLOCATE(zsum(nrank), arr(nrank))
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       zsum(:) = zsum(:) + arr(:)
+    END DO
+    CALL putrow(mat, ny, zsum)
+!
+!   The horizontal sum on the NY-th column
+!
+    zsum = 0.0d0
+    DO j=1,ny
+       arr = 0.0d0
+       CALL getcol(mat, j, arr)
+       zsum(ny:) = zsum(ny:) + arr(ny:)
+    END DO
+    CALL putcol(mat, ny, zsum)
+!
+!   The away operator
+!
+    DO j = 1,ny-1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO j = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+!
+    DO i = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(zsum, arr)
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-ny+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+    c(1) = x        ! 
+    idt(1,1) = 1
+    idt(1,2) = 0
+    idw(1,1) = 1
+    idw(1,2) = 0
+!
+    c(2) = 1.d0/x
+    idt(2,1) = 0
+    idt(2,2) = 1
+    idw(2,1) = 0
+    idw(2,2) = 1
+  END SUBROUTINE coefeq_poisson
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_pardiso_mod
+PROGRAM main
+  USE pde2d_pardiso_mod
+  USE futils
+  USE conmat_mod
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  LOGICAL :: nlppform, nlconmat
+  INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
+  TYPE(spline2d) :: splxy
+  TYPE(pardiso_mat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d_pardiso.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  DOUBLE PRECISION :: tconv, treord
+  INTEGER :: nits=100
+  LOGICAL :: nlmetis, nlforce_zero
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, &
+       &            nlforce_zero, nlconmat, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlmetis = .FALSE.   ! Use metis ordering or minimum degree
+  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
+  nlconmat = .TRUE.   ! Use CONMAT instead of DISMAT
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'MBESS', mbess)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  WRITE(*,'(a,i8)') 'nrank', nrank
+!
+  CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero)
+  t0 = seconds()
+  IF(nlconmat) THEN
+     CALL conmat(splxy, mat, coefeq_poisson)
+  ELSE
+     CALL dismat(splxy, mat)
+  END IF
+  tmat = seconds() - t0
+  ALLOCATE(arr(nrank))
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
+  END IF
+!
+!   BC on Matrix
+!
+  WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
+  WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  CALL ibcmat(mat, ny)
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
+     WRITE(*,'(a)') 'Last rows'
+     DO i=nrank-ny,nrank
+        CALL getrow(mat, i, arr)
+        WRITE(*,'(10(1pe12.3))') arr
+     END DO
+  END IF
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, ny)
+!
+  CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL to_mat(mat)
+  tconv = seconds() -t0
+  WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
+  WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
+!
+  t0 = seconds()
+  CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.)
+  CALL putmat(fid, '/MAT', mat)
+  treord = seconds() - t0
+!
+  t0 = seconds()
+  CALL numfact(mat, debug=.FALSE.)
+  tfact = seconds() - t0
+ 
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
+  WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
+  WRITE(*,'(a,i12)')  'Number of factorization MFLOPS      = ',mat%p%iparm(19)
+  gflops1 = mat%p%iparm(19) / tfact / 1.d3
+!
+  CALL bsolve(mat, rhs, sol, debug=.FALSE.)
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL bsolve(mat, rhs, sol)
+     sol(1:ny-1) = sol(ny)
+  END DO
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
+  tsolv = (seconds() - t0)/REAL(nits)
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+  errsol = solana - solcal
+  IF( SIZE(bcoef,2) .LE. 10 ) THEN
+     CALL prnmat('BCOEF', bcoef)
+     CALL prnmat('SOLANA', solana)
+     CALL prnmat('SOLCAL', solcal)
+     CALL prnmat('ERRSOL', errsol)
+  END IF
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = -2.0d0 * xgrid(i)
+        ELSE
+           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
+  WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
+  WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!===========================================================================
+!              5.0 Clear the matrix and recompute
+!
+  WRITE(*,'(/a)') 'Recompute the solver ...'
+  CALL clear_mat(mat)
+  t0 = seconds()
+  IF(nlconmat) THEN
+     CALL conmat(splxy, mat, coefeq_poisson)
+  ELSE
+     CALL dismat(splxy, mat)
+  END IF
+  tmat = seconds()-t0
+  CALL ibcmat(mat, ny)
+!
+  t0 = seconds()
+  CALL numfact(mat, debug=.FALSE.)
+  tfact = seconds()-t0
+  gflops1 = mat%p%iparm(19) / tfact / 1.d3
+!
+  t0 = seconds()
+  ALLOCATE(newsol(nrank))
+  CALL bsolve(mat, rhs, newsol)
+  newsol(1:ny-1) = newsol(ny)
+  tsolv = seconds()-t0
+!
+  WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+  WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!
+  DEALLOCATE(newsol)
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/pde2d_pb.f90 b/examples/pde2d_pb.f90
new file mode 100644
index 0000000..e764273
--- /dev/null
+++ b/examples/pde2d_pb.f90
@@ -0,0 +1,696 @@
+!>
+!> @file pde2d_pb.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 2d PDE using splines:
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+  USE bsplines
+  USE matrix
+  USE conmat_mod
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  LOGICAL :: nlppform, nlconmat
+  INTEGER :: i, j, ij, dimx, dimy, nrank, kl, ku, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  TYPE(spline2d) :: splxy
+  TYPE(pbmat) :: mat
+!
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  INTEGER :: nits=500
+!
+  INTERFACE
+     SUBROUTINE dismat(spl, mat)
+       USE bsplines
+       USE matrix
+       TYPE(spline2d), INTENT(in) :: spl
+       TYPE(pbmat), INTENT(inout) :: mat
+     END SUBROUTINE dismat
+     SUBROUTINE disrhs(mbess, spl, rhs)
+       USE bsplines
+       INTEGER, INTENT(in) :: mbess
+       TYPE(spline2d), INTENT(in) :: spl
+       DOUBLE PRECISION, INTENT(out) :: rhs(:)
+     END SUBROUTINE disrhs
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+     SUBROUTINE ibcmat(mat, ny)
+       USE matrix
+       TYPE(pbmat), INTENT(inout) :: mat
+       INTEGER, INTENT(in) :: ny
+     END SUBROUTINE ibcmat
+     SUBROUTINE ibcrhs(rhs, ny)
+       DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+       INTEGER, INTENT(in) :: ny
+     END SUBROUTINE ibcrhs
+     SUBROUTINE coefeq(x, y, idt, idw, c)
+       DOUBLE PRECISION, INTENT(in) :: x, y
+       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+       DOUBLE PRECISION, INTENT(out) :: c(:)
+     END SUBROUTINE coefeq
+  END INTERFACE
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlconmat, &
+       &            coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlconmat = .TRUE.   ! Use CONMAT instead of DISMAT
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
+!!$  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
+  ku = kl                      ! Number of super-diagnonals
+  WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
+!
+  CALL init(ku, nrank, nterms, mat)
+  t0 = seconds()
+  IF(nlconmat) THEN
+     CALL conmat(splxy, mat, coefeq)
+  ELSE
+     CALL dismat(splxy, mat)
+  END IF
+  tmat = seconds() - t0
+  ALLOCATE(arr(nrank))
+!
+!   BC on Matrix
+!
+  IF(nrank.LT.100) &
+       &  WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', mat%val(ku+1,:)
+  CALL ibcmat(mat, ny)
+  IF(nrank.LT.100) &
+       &    WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', mat%val(ku+1,:)
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, ny)
+
+  WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL factor(mat)
+  tfact = seconds() - t0
+  gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
+
+  t0 = seconds()
+  CALL bsolve(mat, rhs, sol)
+!
+!   Backtransform of solution
+!
+  sol(1:ny-1) = sol(ny)
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
+!
+  tsolv = seconds() - t0
+  gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM at first call to gridval
+  IF(nlppform) THEN
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+  END IF
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+  errsol = solana - solcal
+  IF( SIZE(bcoef,2) .LE. 10 ) THEN
+     CALL prnmat('BCOEF', bcoef)
+     CALL prnmat('SOLANA', solana)
+     CALL prnmat('SOLCAL', solcal)
+     CALL prnmat('ERRSOL', errsol)
+  END IF
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = -2.0d0 * xgrid(i)
+        ELSE
+           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!===========================================================================
+!              9.0  Epilogue
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+!===========================================================================
+!
+CONTAINS
+  SUBROUTINE prntmat(str, a)
+    DOUBLE PRECISION, DIMENSION(:,:) :: a
+    CHARACTER(len=*) :: str
+    INTEGER :: i
+    WRITE(*,'(a)') TRIM(str)
+    DO i=1,SIZE(a,1)
+       WRITE(*,'(10f8.1)') a(i,:)
+    END DO
+  END SUBROUTINE prntmat
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
+SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+  TYPE(spline2d), INTENT(in) :: spl
+  TYPE(pbmat), INTENT(inout) :: mat
+!
+  INTEGER :: n1, nidbas1, ndim1, ng1
+  INTEGER :: n2, nidbas2, ndim2, ng2
+  INTEGER :: i, j, ig1, ig2
+  INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+  DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
+  DOUBLE PRECISION:: contrib
+!
+  INTEGER :: kterms         ! Number of terms in weak form
+  INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
+  INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+  CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+  WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+  WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl%sp1, ng1)
+  CALL get_gauss(spl%sp2, ng2)
+  ALLOCATE(xg1(ng1), wg1(ng1))
+  ALLOCATE(xg2(ng1), wg2(ng1))
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+  ALLOCATE(coefs(kterms,ng1,ng2))
+!
+  ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
+  ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
+!===========================================================================
+!              2.0 Assembly loop
+!
+  ALLOCATE(left1(ng1))
+  ALLOCATE(left2(ng2))
+  DO i=1,n1
+     CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+     left1 = i
+     CALL basfun(xg1, spl%sp1, fun1, left1)
+     DO j=1,n2
+        CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+        left2 = j
+        CALL basfun(xg2, spl%sp2, fun2, left2)
+!
+        DO ig1=1,ng1
+           DO ig2=1,ng2
+              CALL coefeq(xg1(ig1), xg2(ig2), &
+                   &      idert(:,:,ig1,ig2), &
+                   &      iderw(:,:,ig1,ig2), &
+                   &      coefs(:,ig1,ig2))
+           END DO
+        END DO
+!
+        DO iw1=0,nidbas1  ! Weight function in dir 1
+           igw1 = i+iw1
+           DO iw2=0,nidbas2  ! Weight function in dir 2
+              igw2 = MODULO(j+iw2-1, n2) + 1
+              irow = igw2 + (igw1-1)*n2
+              DO it1=0,nidbas1  ! Test function in dir 1
+                 igt1 = i+it1
+                 DO it2=0,nidbas2  ! Test function in dir 2
+                    igt2 = MODULO(j+it2-1, n2) + 1
+                    jcol = igt2 + (igt1-1)*n2
+!-------------
+                    contrib = 0.0d0
+                    DO ig1=1,ng1
+                       DO ig2=1,ng2
+                          DO iterm=1,kterms
+                             contrib = contrib + &
+                                  &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                                  &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
+                                  &    coefs(iterm,ig1,ig2) *                 &
+                                  &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
+                                  &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                                  &    wg1(ig1) * wg2(ig2)
+                          END DO
+                       END DO
+                    END DO
+                    CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                 END DO
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xg1, wg1, fun1)
+  DEALLOCATE(xg2, wg2, fun2)
+  DEALLOCATE(idert, iderw, coefs)
+  DEALLOCATE(left1,left2)
+!
+CONTAINS
+  SUBROUTINE coefeq(x, y, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+    c(1) = x        ! 
+    idt(1,1) = 1
+    idt(1,2) = 0
+    idw(1,1) = 1
+    idw(1,2) = 0
+!
+    c(2) = 1.d0/x
+    idt(2,1) = 0
+    idt(2,2) = 1
+    idw(2,1) = 0
+    idw(2,2) = 1
+  END SUBROUTINE coefeq
+END SUBROUTINE dismat
+
+SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER, INTENT(in) :: mbess
+  TYPE(spline2d), INTENT(in) :: spl
+  DOUBLE PRECISION, INTENT(out) :: rhs(:)
+  INTEGER :: n1, nidbas1, ndim1, ng1
+  INTEGER :: n2, nidbas2, ndim2, ng2
+  INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+  DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+  DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+  CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+  ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+  ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+  CALL get_gauss(spl%sp1, ng1)
+  CALL get_gauss(spl%sp2, ng2)
+  WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
+  ALLOCATE(xg1(ng1), wg1(ng1))
+  ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+  nrank = SIZE(rhs)
+  rhs(1:nrank) = 0.0d0
+!
+  DO i=1,n1
+     CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+     DO ig1=1,ng1
+        CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+        DO j=1,n2
+           CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+           DO ig2=1,ng2
+              CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+              contrib = wg1(ig1)*wg2(ig2) * &
+                   &    rhseq(xg1(ig1),xg2(ig2), mbess)
+              DO k1=0,nidbas1
+                 i1 = i+k1
+                 DO k2=0,nidbas2
+                    j2 = MODULO(j+k2-1,n2) + 1
+                    ij = j2 + (i1-1)*n2
+                    rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                 END DO
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xg1, wg1, fun1)
+  DEALLOCATE(xg2, wg2, fun2)
+!
+CONTAINS
+  DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+    DOUBLE PRECISION, INTENT(in) :: x1, x2
+    INTEGER, INTENT(in) :: m
+    rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+  END FUNCTION rhseq
+END SUBROUTINE disrhs
+
+SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+  USE matrix
+  IMPLICIT NONE
+  TYPE(pbmat), INTENT(inout) :: mat
+  INTEGER, INTENT(in) :: ny
+  INTEGER :: kl, ku, nrank, i, j
+  DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+  INTEGER :: i0, ii
+  INTEGER :: i0_arr(ny)
+!===========================================================================
+!              1.0 Prologue
+!
+
+  ku = mat%ku
+  kl = ku
+  nrank = mat%rank
+  ALLOCATE(zsum(nrank), arr(nrank))
+!
+  i0 = nrank - ku
+  WRITE(*,'(a,i6)') 'Estimated i0', i0
+  DO i=1,ny
+     CALL getcol(mat, nrank-ny+i, arr)
+     DO ii=1,nrank
+        i0_arr(i)=ii
+        IF(arr(ii) .NE. 0.0d0) EXIT
+     END DO
+  END DO
+!!$  WRITE(*,'(a/(10i6))') 'i0_arr', i0_arr
+!
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+  zsum = 0.0d0
+  DO i=1,ny
+     arr = 0.0d0
+     CALL getrow(mat, i, arr)
+     DO j=1,ny+ku
+        zsum(j) = zsum(j) + arr(j)
+     END DO
+  END DO
+!
+  zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
+  CALL putrow(mat, ny, zsum)
+!
+!   The away operator
+!
+  DO i = 1,ny-1
+     arr = 0.0d0; arr(i) = 1.0d0
+     CALL putrow(mat, i, arr)     
+  END DO
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+  DO i = nrank, nrank-ny+1, -1
+     CALL getcol(mat, i, arr)
+     arr = 0.0d0; arr(i) = 1.0d0
+     CALL putrow(mat, i, arr)     
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(zsum, arr)
+!
+END SUBROUTINE ibcmat
+!+++
+SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+  INTEGER, INTENT(in) :: ny
+  INTEGER :: nrank
+  DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+  nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+  zsum = SUM(rhs(1:ny))
+  rhs(ny) = zsum
+  rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+  rhs(nrank-ny+1:nrank) = 0.0d0
+END SUBROUTINE ibcrhs
+!++++
+SUBROUTINE coefeq(x, y, idt, idw, c)
+  DOUBLE PRECISION, INTENT(in) :: x, y
+  INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+  DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+  c(1) = x        ! 
+  idt(1,1) = 1
+  idt(1,2) = 0
+  idw(1,1) = 1
+  idw(1,2) = 0
+!
+  c(2) = 1.d0/x
+  idt(2,1) = 0
+  idt(2,2) = 1
+  idw(2,1) = 0
+  idw(2,2) = 1
+END SUBROUTINE coefeq
diff --git a/examples/pde2d_petsc.f90 b/examples/pde2d_petsc.f90
new file mode 100644
index 0000000..4378b3f
--- /dev/null
+++ b/examples/pde2d_petsc.f90
@@ -0,0 +1,795 @@
+!>
+!> @file pde2d_petsc.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the 2d PDE using splines and PETSC matrix
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+MODULE pde2d_petsc_mod
+  USE bsplines
+  USE petsc_bsplines
+  IMPLICIT NONE
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in)     :: spl
+    TYPE(petsc_mat), INTENT(inout) :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+    DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
+    INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!
+    INTEGER :: istart, iend
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+!!$    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+!!$    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+    ALLOCATE(coefs(kterms,ng1,ng2))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1,ng2)) !
+!
+!   Matrix partition
+!
+    istart = mat%istart
+    iend   = mat%iend
+!===========================================================================
+!              2.0 Assembly loop
+!
+    ALLOCATE(left1(ng1))
+    ALLOCATE(left2(ng2))
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       left1 = i
+       CALL basfun(xg1, spl%sp1, fun1, left1)
+       DO j=1,n2
+          CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+          left2 = j
+          CALL basfun(xg2, spl%sp2, fun2, left2)
+!
+          DO ig1=1,ng1
+             DO ig2=1,ng2
+                CALL coefeq(xg1(ig1), xg2(ig2), &
+                     &      idert(:,:,ig1,ig2), &
+                     &      iderw(:,:,ig1,ig2), &
+                     &      coefs(:,ig1,ig2))
+             END DO
+          END DO
+!
+          DO iw1=0,nidbas1  ! Weight function in dir 1
+             igw1 = i+iw1
+             DO iw2=0,nidbas2  ! Weight function in dir 2
+                igw2 = MODULO(j+iw2-1, n2) + 1
+                irow = igw2 + (igw1-1)*n2
+                IF( irow.GE.istart .AND. irow.LE.iend) THEN
+                   DO it1=0,nidbas1  ! Test function in dir 1
+                      igt1 = i+it1
+                      DO it2=0,nidbas2  ! Test function in dir 2
+                         igt2 = MODULO(j+it2-1, n2) + 1
+                         jcol = igt2 + (igt1-1)*n2
+!-------------
+                         contrib = 0.0d0
+                         DO ig1=1,ng1
+                            DO ig2=1,ng2
+                               DO iterm=1,kterms
+                                  contrib = contrib + &
+                                       &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                                       &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
+                                       &    coefs(iterm,ig1,ig2) *                 &
+                                       &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
+                                       &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                                       &    wg1(ig1) * wg2(ig2)
+                               END DO
+                            END DO
+                         END DO
+                         CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                      END DO
+                   END DO
+                END IF
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+    DEALLOCATE(left1,left2)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+      !
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    INTEGER, INTENT(in) :: mbess
+    TYPE(spline2d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2
+      INTEGER, INTENT(in) :: m
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(petsc_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = mat%rank
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    ALLOCATE(zsum(nrank), arr(nrank))
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       zsum(:) = zsum(:) + arr(:)
+    END DO
+    IF(mat%nlsym) THEN
+       zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
+    END IF
+    CALL putrow(mat, ny, zsum)
+!
+!   The horizontal sum on the NY-th column
+!
+    IF( .NOT.mat%nlsym) THEN
+       zsum = 0.0d0
+       DO j=1,ny
+          arr = 0.0d0
+          CALL getcol(mat, j, arr)
+          zsum(ny:) = zsum(ny:) + arr(ny:)
+       END DO
+       CALL putcol(mat, ny, zsum)
+    END IF
+!
+!   The away operator
+!
+    IF( .NOT.mat%nlsym) THEN
+       DO j = 1,ny-1
+          arr = 0.0d0; arr(j) = 1.0d0
+          CALL putcol(mat, j, arr)     
+       END DO
+    END IF
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+    DEALLOCATE(zsum)
+    DEALLOCATE(arr)
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+       ALLOCATE(arr(nrank))
+       IF( .NOT.mat%nlsym) THEN
+          DO j = nrank, nrank-ny+1, -1
+             arr = 0.0d0; arr(j) = 1.0d0
+             CALL putcol(mat, j, arr)     
+          END DO
+       END IF
+!
+       DO i = nrank, nrank-ny+1, -1
+          arr = 0.0d0; arr(i) = 1.0d0
+          CALL putrow(mat, i, arr)     
+       END DO
+       DEALLOCATE(arr)
+!===========================================================================
+!              9.0  Epilogue
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-ny+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_petsc_mod
+PROGRAM main
+  USE pde2d_petsc_mod
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  INTEGER :: nitmax=10000, nits, nits0, ntrials=0
+  DOUBLE PRECISION :: rtol=1.e-9
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
+  DOUBLE PRECISION, ALLOCATABLE :: row_sum(:), row(:)
+  TYPE(spline2d) :: splxy
+  TYPE(petsc_mat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d_petsc.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tsolv, tsolv0, tgrid, gflops1, gflops2
+  LOGICAL :: nlsym
+  LOGICAL :: nlforce_zero
+!
+  INTEGER :: ierr, me
+  INTEGER(kind=8) :: nzfact
+  INTEGER :: nnz_loc, nnz
+  DOUBLE PRECISION :: mem_loc, mem_min, mem_max
+!
+  CHARACTER(len=128) :: matfile='mat.dat'
+  logical :: file_exist
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlsym,&
+       &            nlforce_zero, coefx, coefy, nitmax, rtol, ntrials, &
+       &            matfile
+!===========================================================================
+!              1.0 Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlsym    = .FALSE.  ! Symmetric or unsymmetric matrix
+  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  nitmax = 10000      ! Max number ofviterations
+  rtol = 1.e-9        ! Relative tolerance
+  ntrials = 0         ! Run ntrials solution steps after setup
+  matfile = ''        ! Save matrix file to matfile if not empty
+!
+  IF(me.EQ.0) THEN
+     READ(*,newrun)
+     WRITE(*,newrun)
+  END IF
+  CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nitmax, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(ntrials, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+!   Create hdf5 file
+!
+  IF(me.EQ.0) THEN
+     CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+     CALL attach(fid, '/', 'NX', nx)
+     CALL attach(fid, '/', 'NY', ny)
+     CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+     CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+     CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+     CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+     CALL attach(fid, '/', 'MBESS', mbess)
+  END IF
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = mpi_wtime()
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  IF(me.EQ.0) WRITE(*,'(a,i12 )') 'nrank', nrank
+!
+  CALL init(nrank, nterms, mat, comm=MPI_COMM_WORLD)
+!
+  INQUIRE(file=TRIM(matfile), exist=file_exist)
+  IF( file_exist ) THEN
+     t0 = mpi_wtime()
+     CALL load_mat(mat, matfile)
+     tmat = mpi_wtime()-t0
+     if(me.eq.0) WRITE(*,'(a,1pe12.3)')    'Mat read time (s) ', tmat
+  ELSE
+     t0 = mpi_wtime()
+     CALL dismat(splxy, mat)
+     CALL ibcmat(mat, ny)
+!
+!!$     ALLOCATE(row_sum(mat%istart:mat%iend))
+!!$     ALLOCATE(row(mat%rank))
+!!$     DO i=mat%istart,mat%iend
+!!$        row = 0.0d0
+!!$        CALL getrow(mat, i, row)
+!!$        row_sum(i) = SUM(row)        
+!!$     END DO
+!!$     WRITE(*,'(a,i3.3,a,(10(1pe12.3)))') 'PE', me, ': row_sum', row_sum
+!
+     CALL to_mat(mat)
+!
+!!$     DO i=mat%istart,mat%iend
+!!$        row = 0.0d0
+!!$        CALL getrow(mat, i, row)
+!!$        row_sum(i) = SUM(row)        
+!!$     END DO
+!!$     WRITE(*,'(a,i3.3,a,(10(1pe12.3)))') 'PE', me, ': row_sum(after)', row_sum
+     CALL save_mat(mat, matfile)
+     tmat = mpi_wtime() - t0
+     IF(me.EQ.0) WRITE(*,'(a,1pe12.3)')    'Mat construction time (s) ', tmat
+  END IF
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2i16)') 'Mat rank, nnz', mat%rank, mat%nnz
+  END IF
+!
+!   RHS assembly
+!
+  t0=mpi_wtime()
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, ny)
+!
+  mem_loc = mem()
+  CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT')
+  IF(me.EQ.0) THEN
+     CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  END IF
+  IF(me.EQ.0)  WRITE(*,'(a,1pe12.3)') 'RHS construction time (s) ', mpi_wtime()-t0
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+
+  CALL minmax_i8(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz')
+  IF(me.EQ.0) THEN 
+     WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz
+  END IF
+!
+  t0 = mpi_wtime()
+  CALL bsolve(mat, rhs, sol, rtol, nitmax, nits0)
+  tsolv0 = mpi_wtime() - t0
+  IF(me.EQ.0) WRITE(*,'(a,1pe12.3,i8)') 'Solve+setup time(s) and nits   ', tsolv0, nits0
+!
+  IF(ntrials .GT. 0) THEN
+     t0 = mpi_wtime()
+     DO it=1,ntrials   ! ntrials iterations for timing
+        sol = 0.0d0
+        CALL bsolve(mat, rhs, sol, rtol, nitmax, nits)
+        sol(1:ny-1) = sol(ny)
+     END DO
+     tsolv = (mpi_wtime() - t0)/REAL(ntrials)
+     IF(me.EQ.0) WRITE(*,'(a,1pe12.3,i8)') 'Solve time(s) and nits         ', tsolv, nits
+  END IF
+!
+  mem_loc = mem()
+  CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE')
+!
+  CALL destroy(mat)
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+!
+  mem_loc = mem()
+  CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after setting bcoef')
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  IF(me.EQ.0) THEN
+     ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+     DO i=0,nx
+        DO j=0,ny
+           solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+        END DO
+     END DO
+     jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+     WRITE(*,'(/a)') '*** Checking solutions'
+     errsol = solana - solcal
+     WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+          &    norm2(errsol) / norm2(solana)
+!
+     CALL putarr(fid, '/xgrid', xgrid, 'r')
+     CALL putarr(fid, '/ygrid', ygrid, '\theta')
+     CALL putarr(fid, '/sol', solcal, 'Solutions')
+     CALL putarr(fid, '/solana', solana,'Exact solutions')
+     CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+     WRITE(*,'(/a)') '*** Checking gradient'
+     DO i=0,nx
+        DO j=0,ny
+           IF( mbess .EQ. 0 ) THEN
+              solana(i,j) = -2.0d0 * xgrid(i)
+           ELSE
+              solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+                   &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+           END IF
+        END DO
+     END DO
+!
+     jder = (/1,0/)
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+     errsol = solana - solcal
+     CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+     WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+     DO i=0,nx
+        DO j=0,ny
+           solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+        END DO
+     END DO
+!
+     jder = (/0,1/)
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+     CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+     errsol = solana - solcal
+     WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!
+     DEALLOCATE(solcal, solana, errsol)
+  END IF
+!!$!===========================================================================
+!!$!              5.0 Clear the matrix and recompute
+!!$!
+!!$  IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...'
+!!$  t0 = mpi_wtime()()
+!!$  CALL clear_mat(mat)
+!!$  CALL dismat(splxy, mat)
+!!$  CALL ibcmat(mat, ny)
+!!$  tmat = mpi_wtime()()-t0
+!!$!
+!!$  t0 = mpi_wtime()()
+!!$  CALL numfact(mat, debug=.FALSE.)
+!!$  tfact = mpi_wtime()()-t0
+!!$  gflops1 = mat%petsc_par%RINFOG(3) / tfact / 1.d9
+!!$!
+!!$  t0 = mpi_wtime()()
+!!$  ALLOCATE(newsol(nrank))
+!!$  CALL bsolve(mat, rhs, newsol)
+!!$  newsol(1:ny-1) = newsol(ny)
+!!$  tsolv = mpi_wtime()()-t0
+!!$!
+!!$  IF(me.EQ.0) THEN
+!!$     WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
+!!$     WRITE(*,'(/a)') '---'
+!!$     WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+!!$     WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+!!$     WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+!!$     WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
+!!$     WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!!$  END IF
+!!$!
+!!$  DEALLOCATE(newsol)
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(bcoef)
+!!$  CALL destroy(mat)
+  CALL destroy_sp(splxy)
+!
+  IF(me.EQ.0) CALL closef(fid)
+  CALL mpi_finalize(ierr)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+!
+  SUBROUTINE minmax_i(k, comm, str)
+    CHARACTER(len=*), INTENT(in) :: str
+    INTEGER, INTENT(in)          :: k
+    INTEGER, INTENT(in)          :: comm
+    INTEGER :: me, ierr, kmin, kmax
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr)
+    CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr)
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
+    END IF
+  END SUBROUTINE minmax_i
+!
+  SUBROUTINE minmax_i8(k, comm, str)
+    CHARACTER(len=*), INTENT(in) :: str
+    INTEGER(8), INTENT(in)          :: k
+    INTEGER, INTENT(in)          :: comm
+    INTEGER :: me, ierr
+    INTEGER(8) :: kmin, kmax
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_reduce(k, kmin, 1, MPI_INTEGER8, MPI_MIN, 0, comm, ierr)
+    CALL mpi_reduce(k, kmax, 1, MPI_INTEGER8, MPI_MAX, 0, comm, ierr)
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
+    END IF
+  END SUBROUTINE minmax_i8
+!!
+  SUBROUTINE minmax_r(x, comm, str)
+    CHARACTER(len=*), INTENT(in) :: str
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in)          :: comm
+    INTEGER :: me, ierr
+    DOUBLE PRECISION ::  xmin, xmax
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
+    CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
+    END IF
+  END SUBROUTINE minmax_r
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/pde2d_pwsmp.f90 b/examples/pde2d_pwsmp.f90
new file mode 100644
index 0000000..1cd7592
--- /dev/null
+++ b/examples/pde2d_pwsmp.f90
@@ -0,0 +1,776 @@
+!>
+!> @file pde2d_pwsmp.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the 2d PDE using splines and WSMP non-symmetric matrix
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+MODULE pde2d_pwsmp_mod
+  USE bsplines
+  USE pwsmp_bsplines
+  IMPLICIT NONE
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in)       :: spl
+    TYPE(wsmp_mat), INTENT(inout)    :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+    DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
+    INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!
+    INTEGER :: istart, iend
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+    ALLOCATE(coefs(kterms,ng1,ng2))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
+!
+!   Matrix partition
+!
+    istart = mat%istart
+    iend   = mat%iend
+!===========================================================================
+!              2.0 Assembly loop
+!
+    ALLOCATE(left1(ng1))
+    ALLOCATE(left2(ng2))
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       left1 = i
+       CALL basfun(xg1, spl%sp1, fun1, left1)
+       DO j=1,n2
+          CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+          left2 = j
+          CALL basfun(xg2, spl%sp2, fun2, left2)
+!
+          DO ig1=1,ng1
+             DO ig2=1,ng2
+                CALL coefeq(xg1(ig1), xg2(ig2), &
+                     &      idert(:,:,ig1,ig2), &
+                     &      iderw(:,:,ig1,ig2), &
+                     &      coefs(:,ig1,ig2))
+             END DO
+          END DO
+!
+          DO iw1=0,nidbas1  ! Weight function in dir 1
+             igw1 = i+iw1
+             DO iw2=0,nidbas2  ! Weight function in dir 2
+                igw2 = MODULO(j+iw2-1, n2) + 1
+                irow = igw2 + (igw1-1)*n2
+                IF( irow.GE.istart .AND. irow.LE.iend) THEN
+                   DO it1=0,nidbas1  ! Test function in dir 1
+                      igt1 = i+it1
+                      DO it2=0,nidbas2  ! Test function in dir 2
+                         igt2 = MODULO(j+it2-1, n2) + 1
+                         jcol = igt2 + (igt1-1)*n2
+!-------------
+                         contrib = 0.0d0
+                         DO ig1=1,ng1
+                            DO ig2=1,ng2
+                               DO iterm=1,kterms
+                                  contrib = contrib + &
+                                       &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                                       &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
+                                       &    coefs(iterm,ig1,ig2) *                 &
+                                       &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
+                                       &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                                       &    wg1(ig1) * wg2(ig2)
+                               END DO
+                            END DO
+                         END DO
+                         CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                      END DO
+                   END DO
+                END IF
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+    DEALLOCATE(left1,left2)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+      !
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    INTEGER, INTENT(in) :: mbess
+    TYPE(spline2d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2
+      INTEGER, INTENT(in) :: m
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(wsmp_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = mat%rank
+    ALLOCATE(zsum(nrank), arr(nrank))
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       zsum(:) = zsum(:) + arr(:)
+    END DO
+    CALL putrow(mat, ny, zsum)
+!
+!   The horizontal sum on the NY-th column
+!
+    IF( .NOT.mat%nlsym) THEN
+       zsum = 0.0d0
+       DO j=1,ny
+          arr = 0.0d0
+          CALL getcol(mat, j, arr)
+          zsum(ny:) = zsum(ny:) + arr(ny:)
+       END DO
+       CALL putcol(mat, ny, zsum)
+    END IF
+!
+!   The away operator
+!
+    IF( .NOT.mat%nlsym) THEN
+       DO j = 1,ny-1
+          arr = 0.0d0; arr(j) = 1.0d0
+          CALL putcol(mat, j, arr)     
+       END DO
+    END IF
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO j = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+!
+    DO i = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(zsum, arr)
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-ny+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_pwsmp_mod
+PROGRAM main
+  USE pde2d_pwsmp_mod
+  USE futils
+!
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
+  TYPE(spline2d) :: splxy
+  TYPE(wsmp_mat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d_wsmp.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  DOUBLE PRECISION :: tconv, treord
+  INTEGER :: nits=100
+  LOGICAL :: nlsym, nlforce_zero
+!
+  INTEGER :: ierr, me, nprocs
+  DOUBLE PRECISION :: mem_loc, mem_min, mem_max
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, &
+       &            nlsym, nlforce_zero, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, nprocs, ierr)
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlsym = .TRUE.      ! Symmetric matrix or not
+  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  IF(me.EQ.0) THEN
+     READ(*,newrun)
+     WRITE(*,newrun)
+  END IF
+  CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(mbess, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nterms, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlppform, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlforce_zero, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlsym, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(coefx, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(coefy, 5, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+!   Create hdf5 file
+!
+  if(me.eq.0) then
+     CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+     CALL attach(fid, '/', 'NX', nx)
+     CALL attach(fid, '/', 'NY', ny)
+     CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+     CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+     CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+     CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+     CALL attach(fid, '/', 'MBESS', mbess)
+  end if
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,i8,a,i4)') 'nrank =', nrank, '   nprocs =', nprocs
+  END IF
+!
+  CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero, nlsym=nlsym, &
+       &    comm_in=MPI_COMM_WORLD)
+  WRITE(*,'(a,i4.4,a,3i16)') 'PE', me, ' istart, iend, nloc', mat%istart, mat%iend, &
+       &        mat%iend-mat%istart+1
+!
+  CALL dismat(splxy, mat)
+!
+!   BC on Matrix
+!
+  CALL ibcmat(mat, ny)
+  tmat = seconds() - t0
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, ny)
+!
+  mem_loc = mem()
+  CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after DISMAT')
+  IF(me.EQ.0) THEN
+     CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  END IF
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL to_mat(mat)
+  tconv = seconds() -t0
+  CALL minmax_i(mat%nnz_loc, MPI_COMM_WORLD, 'local nnz')
+  IF(me.EQ.0) THEN 
+     WRITE(*,'(a,i16)') 'Number of non-zeros of matrix = ', mat%nnz
+  END IF
+!
+  t0 = seconds()
+  CALL reord_mat(mat)
+  treord = seconds() - t0
+!
+  t0 = seconds()
+  CALL numfact(mat)
+  tfact = seconds() - t0
+!
+  mem_loc = mem()
+  CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after FACTOR')
+! 
+  IF(me.EQ.0) THEN
+     WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
+     WRITE(*,'(a,1pe12.3)')  'Number of factorization GFLOPS      = ',mat%p%dparm(23)/1.d9
+  END IF
+  gflops1 = mat%p%dparm(23) / tfact / 1.d9
+!
+  CALL bsolve(mat, rhs, sol)
+!
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL bsolve(mat, rhs, sol)
+     sol(1:ny-1) = sol(ny)
+  END DO
+  tsolv = (seconds() - t0)/REAL(nits)
+!
+  mem_loc = mem()
+  CALL minmax_r(mem_loc, MPI_COMM_WORLD, 'mem used (MB) after BSOLVE')
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
+     CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+  END IF
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method (only on proc 0)
+!
+  IF(me.EQ.0) THEN
+     ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+     DO i=0,nx
+        DO j=0,ny
+           solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+        END DO
+     END DO
+     jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+     WRITE(*,'(/a)') '*** Checking solutions'
+     t0 = seconds()
+     DO it=1,nits   ! nits iterations for timing
+        CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+     END DO
+     tgrid = (seconds() - t0)/REAL(nits)
+     errsol = solana - solcal
+     IF( SIZE(bcoef,2) .LE. 10 ) THEN
+        CALL prnmat('BCOEF', bcoef)
+        CALL prnmat('SOLANA', solana)
+        CALL prnmat('SOLCAL', solcal)
+        CALL prnmat('ERRSOL', errsol)
+     END IF
+     WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+          &    norm2(errsol) / norm2(solana)
+     WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+     CALL putarr(fid, '/xgrid', xgrid, 'r')
+     CALL putarr(fid, '/ygrid', ygrid, '\theta')
+     CALL putarr(fid, '/sol', solcal, 'Solutions')
+     CALL putarr(fid, '/solana', solana,'Exact solutions')
+     CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+     WRITE(*,'(/a)') '*** Checking gradient'
+     DO i=0,nx
+        DO j=0,ny
+           IF( mbess .EQ. 0 ) THEN
+              solana(i,j) = -2.0d0 * xgrid(i)
+           ELSE
+              solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+                   &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+           END IF
+        END DO
+     END DO
+!
+     jder = (/1,0/)
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+     errsol = solana - solcal
+     CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+     WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+     DO i=0,nx
+        DO j=0,ny
+           solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+        END DO
+     END DO
+!
+     jder = (/0,1/)
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+     CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+     errsol = solana - solcal
+     WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!
+     WRITE(*,'(/a)') '---'
+     WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+     WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
+     WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
+     WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+     WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
+     WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
+     WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+     DEALLOCATE(solcal)
+     DEALLOCATE(solana)
+     DEALLOCATE(errsol)
+  END IF
+!===========================================================================
+!              5.0 Clear the matrix and recompute
+!
+  IF(me.EQ.0) WRITE(*,'(/a)') 'Recompute the solver ...'
+  t0 = seconds()
+  CALL clear_mat(mat)
+  CALL dismat(splxy, mat)
+  CALL ibcmat(mat, ny)
+  tmat = seconds()-t0
+!
+  t0 = seconds()
+!!$  CALL numfact(mat)
+  CALL factor(mat, nlreord=.FALSE.)
+  tfact = seconds()-t0
+  gflops1 = mat%p%dparm(23) / tfact / 1.d9
+!
+  t0 = seconds()
+  ALLOCATE(newsol(nrank))
+  CALL bsolve(mat, rhs, newsol)
+  newsol(1:ny-1) = newsol(ny)
+  tsolv = seconds()-t0
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
+     WRITE(*,'(/a)') '---'
+     WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+     WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+     WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
+     WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
+     WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+  END IF
+!
+  DEALLOCATE(newsol)
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(bcoef)
+  DEALLOCATE(xgrid, rhs, sol)
+9999 CONTINUE
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+  IF(me.EQ.0) CALL closef(fid)
+  CALL mpi_finalize(ierr)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+!
+  SUBROUTINE minmax_i(k, comm, str)
+    CHARACTER(len=*), INTENT(in) :: str
+    INTEGER, INTENT(in)          :: k
+    INTEGER, INTENT(in)          :: comm
+    INTEGER :: me, ierr, kmin, kmax
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_reduce(k, kmin, 1, MPI_INTEGER, MPI_MIN, 0, comm, ierr)
+    CALL mpi_reduce(k, kmax, 1, MPI_INTEGER, MPI_MAX, 0, comm, ierr)
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(a,2i16)') 'Minmax of ' // TRIM(str), kmin, kmax
+    END IF
+  END SUBROUTINE minmax_i
+!
+  SUBROUTINE minmax_r(x, comm, str)
+    CHARACTER(len=*), INTENT(in) :: str
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in)          :: comm
+    INTEGER :: me, ierr
+    DOUBLE PRECISION ::  xmin, xmax
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
+    CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
+    END IF
+  END SUBROUTINE minmax_r
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/pde2d_sym_pardiso.f90 b/examples/pde2d_sym_pardiso.f90
new file mode 100644
index 0000000..47c7cda
--- /dev/null
+++ b/examples/pde2d_sym_pardiso.f90
@@ -0,0 +1,715 @@
+!>
+!> @file pde2d_sym_pardiso.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the 2d PDE using splines and PARDISO symmetric matrix
+!
+!    -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0
+!     C(x,y) = 1 + \epsilon x cos(y)
+!     exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+MODULE pde2d_sym_pardiso_mod
+  USE bsplines
+  USE pardiso_bsplines
+  IMPLICIT NONE
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, epsi, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in)       :: spl
+    DOUBLE PRECISION, INTENT(in)     :: epsi
+    TYPE(pardiso_mat), INTENT(inout) :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+    DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
+    INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+    ALLOCATE(coefs(kterms,ng1,ng2))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
+!===========================================================================
+!              2.0 Assembly loop
+!
+    ALLOCATE(left1(ng1))
+    ALLOCATE(left2(ng2))
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       left1 = i
+       CALL basfun(xg1, spl%sp1, fun1, left1)
+       DO j=1,n2
+          CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+          left2 = j
+          CALL basfun(xg2, spl%sp2, fun2, left2)
+!
+          DO ig1=1,ng1
+             DO ig2=1,ng2
+                CALL coefeq(xg1(ig1), xg2(ig2), &
+                     &      idert(:,:,ig1,ig2), &
+                     &      iderw(:,:,ig1,ig2), &
+                     &      coefs(:,ig1,ig2))
+             END DO
+          END DO
+!
+          DO iw1=0,nidbas1  ! Weight function in dir 1
+             igw1 = i+iw1
+             DO iw2=0,nidbas2  ! Weight function in dir 2
+                igw2 = MODULO(j+iw2-1, n2) + 1
+                irow = igw2 + (igw1-1)*n2
+                DO it1=0,nidbas1  ! Test function in dir 1
+                   igt1 = i+it1
+                   DO it2=0,nidbas2  ! Test function in dir 2
+                      igt2 = MODULO(j+it2-1, n2) + 1
+                      jcol = igt2 + (igt1-1)*n2
+!-------------
+                      contrib = 0.0d0
+                      DO ig1=1,ng1
+                         DO ig2=1,ng2
+                            DO iterm=1,kterms
+                               contrib = contrib + &
+                                    &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                                    &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
+                                    &    coefs(iterm,ig1,ig2) *                 &
+                                    &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
+                                    &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                                    &    wg1(ig1) * wg2(ig2)
+                            END DO
+                         END DO
+                      END DO
+                      CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+    DEALLOCATE(left1,left2)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+      DOUBLE PRECISION :: zcoef
+!
+! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy
+!      C(x,y) = 1 + epsilon*x*cos(y)
+!
+      zcoef = 1.0d0 + epsi*x*COS(y)
+!
+      c(1) = x*zcoef        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+!
+      c(2) = zcoef/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, epsi, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    INTEGER, INTENT(in)           :: mbess
+    DOUBLE PRECISION, INTENT(in)  :: epsi
+    TYPE(spline2d), INTENT(in)    :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x, y, m)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(in) :: m
+      DOUBLE PRECISION    :: xm
+!
+      xm = REAL(m,8)
+      rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + &
+           &  epsi*x*( &
+           &    ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + &
+           &     (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) &
+           &    ))
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(pardiso_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = mat%rank
+    ALLOCATE(zsum(nrank), arr(nrank))
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       zsum(:) = zsum(:) + arr(:)
+    END DO
+    zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
+    CALL putrow(mat, ny, zsum)
+!
+!   The away operator
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO i = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(zsum, arr)
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-ny+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_sym_pardiso_mod
+PROGRAM main
+  USE pde2d_sym_pardiso_mod
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
+  DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
+  TYPE(spline2d) :: splxy
+  TYPE(pardiso_mat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d_sym_pardiso.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  DOUBLE PRECISION :: tconv, treord
+  INTEGER :: nits=100
+  LOGICAL :: nlmetis, nlforce_zero, nlpos
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, epsi, nlppform, nlmetis, &
+       &            nlforce_zero, nlpos, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  epsi = 0.5          ! Non-uniformity in the Laplacian coefficicient
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlmetis = .FALSE.   ! Use metis ordering or minimum degree
+  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
+  nlpos = .TRUE.         ! Matrix is positive definite
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'MBESS', mbess)
+  CALL attach(fid, '/', 'EPSI', epsi)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  WRITE(*,'(a,i8)') 'nrank', nrank
+!
+  t0 = seconds()
+  CALL init(nrank, nterms, mat, nlsym=.TRUE.)
+  CALL dismat(splxy, epsi, mat)
+  ALLOCATE(arr(nrank))
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
+  END IF
+!
+!   BC on Matrix
+!
+  WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
+  WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  CALL ibcmat(mat, ny)
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
+     WRITE(*,'(a)') 'Last rows'
+     DO i=nrank-ny,nrank
+        CALL getrow(mat, i, arr)
+        WRITE(*,'(10(1pe12.3))') arr
+     END DO
+  END IF
+  tmat = seconds() - t0
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, epsi, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, ny)
+!
+  CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+!!$  CALL factor(mat)
+!
+!   The call to "factor" could be split into the
+!   3 following calls
+!
+  CALL to_mat(mat)
+  tconv = seconds() -t0
+  WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
+  WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
+!
+  t0 = seconds()
+  CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.)
+  CALL putmat(fid, '/MAT', mat)
+  treord = seconds() - t0
+!
+  t0 = seconds()
+  CALL numfact(mat, debug=.FALSE.)
+  tfact = seconds() - t0
+ 
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
+  WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
+  WRITE(*,'(a,i12)')  'Number of factorization MFLOPS      = ',mat%p%iparm(19)
+  gflops1 = mat%p%iparm(19) / tfact / 1.d3
+!
+  CALL bsolve(mat, rhs, sol, debug=.FALSE.)
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL bsolve(mat, rhs, sol)
+     sol(1:ny-1) = sol(ny)
+  END DO
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
+  tsolv = (seconds() - t0)/REAL(nits)
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+  errsol = solana - solcal
+  IF( SIZE(bcoef,2) .LE. 10 ) THEN
+     CALL prnmat('BCOEF', bcoef)
+     CALL prnmat('SOLANA', solana)
+     CALL prnmat('SOLCAL', solcal)
+     CALL prnmat('ERRSOL', errsol)
+  END IF
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = -2.0d0 * xgrid(i)
+        ELSE
+           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
+  WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!===========================================================================
+!              5.0 Clear the matrix and recompute
+!
+  WRITE(*,'(/a)') 'Recompute the solver ...'
+  t0 = seconds()
+  CALL clear_mat(mat)
+  CALL dismat(splxy, epsi, mat)
+  CALL ibcmat(mat, ny)
+  tmat = seconds()-t0
+!
+  t0 = seconds()
+!!$  CALL numfact(mat, debug=.FALSE.)
+  CALL factor(mat, nlreord=.FALSE., debug=.FALSE.)
+  tfact = seconds()-t0
+  gflops1 = mat%p%iparm(19) / tfact / 1.d3
+!
+  t0 = seconds()
+  ALLOCATE(newsol(nrank))
+  CALL bsolve(mat, rhs, newsol)
+  newsol(1:ny-1) = newsol(ny)
+  tsolv = seconds()-t0
+!
+  WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
+  WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!
+  DEALLOCATE(newsol)
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/pde2d_sym_pardiso_dft.f90 b/examples/pde2d_sym_pardiso_dft.f90
new file mode 100644
index 0000000..42426e6
--- /dev/null
+++ b/examples/pde2d_sym_pardiso_dft.f90
@@ -0,0 +1,1034 @@
+!>
+!> @file pde2d_sym_pardiso_dft.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the 2d PDE using splines and PARDISO symmetric matrix.
+!  The periodic coordinate y is discrete Fourier transformed.
+!
+!    -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0
+!     C(x,y) = 1 + \epsilon x cos(y)
+!     exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+MODULE pde2d_sym_pardiso_dft_mod
+  USE bsplines
+  USE pardiso_bsplines
+  IMPLICIT NONE
+!
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, epsi, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in)       :: spl
+    DOUBLE PRECISION, INTENT(in)     :: epsi
+    TYPE(zpardiso_mat), INTENT(inout) :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: kmin, kmax, dk
+    INTEGER :: i, j, ig1, ig2, kc
+    INTEGER :: iterm, iw1, mw, igw1, it1, mt, igt1, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
+    DOUBLE COMPLEX, ALLOCATABLE   :: ft_fun2(:,:,:), fft_temp(:)
+    DOUBLE COMPLEX :: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER :: kcoupl         ! Number of mode couplings
+    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+    DOUBLE COMPLEX, ALLOCATABLE  :: coefs(:,:,:,:)          ! Terms in weak form
+    INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    kmin = spl%sp2%dft%kmin
+    kmax = spl%sp2%dft%kmax
+    dk   = spl%sp2%dft%dk
+    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+    WRITE(*,'(a, 5i6)')  'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+    WRITE(*,'(a, 5i6)')  'kmin, kmax, dk     =', kmin, kmax, dk
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng2), wg2(ng2))
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    kcoupl = SIZE(spl%sp2%dft%mode_couplings)
+    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+    ALLOCATE(coefs(kterms,kcoupl,ng1,ng2))
+!
+!   Splines and derivatives at all Gauss points
+!
+    ALLOCATE(fun1(0:nidbas1,0:1,ng1))    ! Spline and 1st derivative
+    ALLOCATE(ft_fun2(kmin:kmax,0:1,ng2)) ! DFT of splines and 1st derivative
+    ALLOCATE(fft_temp(0:n2-1))           ! Used in coefeq
+!===========================================================================
+!              2.0 Assembly loop
+!
+    ALLOCATE(left1(ng1))
+    ALLOCATE(left2(ng2))
+!
+! First interval in 2nd (periodic) coordinate
+!
+    j = 1
+    CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
+    left2 = j
+    CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
+
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       left1 = i
+       CALL basfun(xg1, spl%sp1, fun1, left1)
+       DO ig1=1,ng1
+          DO ig2=1,ng2
+             CALL coefeq(xg1(ig1), xg2(ig2), &
+                  &      idert(:,:,ig1,ig2), &
+                  &      iderw(:,:,ig1,ig2), &
+                  &      coefs(:,:,ig1,ig2))
+          END DO
+       END DO
+!
+       DO iw1=0,nidbas1     ! Weight function in dir 1
+          igw1 = i+iw1
+          DO it1=0,nidbas1  ! Test function in dir 1
+             igt1 = i+it1
+             DO mt=kmin,kmax ! Test Fourier mode
+                DO kc=1,kcoupl
+                   mw = mt + spl%sp2%dft%mode_couplings(kc)
+                   IF(mw.LT.kmin .OR. mw.GT.kmax) CYCLE
+!-------------
+                   contrib = (0.0d0, 0.0d0)
+                   DO ig1=1,ng1
+                      DO ig2=1,ng2
+                         DO iterm=1,kterms
+                            contrib = contrib + &
+                              &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                              &    ft_fun2(mw,iderw(iterm,2,ig1,ig2),ig2) * &
+                              &    coefs(iterm,kc,ig1,ig2) *                 &
+                              &    CONJG(ft_fun2(mt,idert(iterm,2,ig1,ig2),ig2)) * &
+                              &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                              &    wg1(ig1) * wg2(ig2) /REAL(n2,8)
+                         END DO
+                      END DO
+                   END DO
+                   irow = (igw1-1)*dk + (mw-kmin)+1  ! Number first mode m then radial coord.
+                   jcol = (igt1-1)*dk + (mt-kmin)+1
+                   CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, ft_fun2)
+    DEALLOCATE(idert, iderw, coefs)
+    DEALLOCATE(left1,left2)
+    DEALLOCATE(fft_temp)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      USE fft
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE COMPLEX, INTENT(out) :: c(:,:)
+!
+      DOUBLE PRECISION :: zcoef, dy
+      INTEGER :: j, k, kc, kp
+!
+! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy
+!      C(x,y) = 1 + epsilon*x*cos(y)
+!
+      dy = spl%sp2%dft%dx
+      kc = SIZE(spl%sp2%dft%mode_couplings)
+      DO j=0,n2-1
+         fft_temp(j) = 1.0d0+epsi*x*COS(y+j*dy)
+      END DO
+      CALL fourcol(fft_temp,1)
+      DO k=1,kc
+         kp = spl%sp2%dft%mode_couplings(k)
+         IF(kp.LT.0) kp=kp+n2
+         c(1,k) = x*fft_temp(kp)
+         c(2,k) = fft_temp(kp)/x
+      END DO
+!!$      WRITE(*,'(a/(10(1pe12.4)))') 'fft_temp', ABS(fft_temp)
+!!$      WRITE(*,'(a/(10(1pe12.4)))') 'c1', ABS(c(1,:))
+!!$      WRITE(*,'(a/(10(1pe12.4)))') 'c2', ABS(c(2,:))
+!
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+!
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, epsi, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    INTEGER, INTENT(in)           :: mbess
+    DOUBLE PRECISION, INTENT(in)  :: epsi
+    TYPE(spline2d), INTENT(in)    :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x, y, m)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(in)          :: m
+!
+      DOUBLE PRECISION :: xm
+      xm = REAL(m,8)
+      rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + &
+           &  epsi*x*( &
+           &    ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + &
+           &     (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) &
+           &    ))
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, dft)
+!
+!   Apply BC on matrix
+!
+    TYPE(zpardiso_mat), INTENT(inout) :: mat
+    TYPE(dftmap), INTENT(in)          :: dft
+    INTEGER :: nrank, k, kmin, kmax, dk, i
+    DOUBLE COMPLEX :: arr(mat%rank)
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = mat%rank
+    kmin = dft%kmin
+    kmax = dft%kmax
+    dk   = dft%dk
+!===========================================================================
+!              2.0 BC at the axis
+!
+!   zero for non-zero modes
+!
+    DO k=kmin,kmax
+       IF(k.NE.0) THEN
+          i = k-kmin+1
+          arr = 0.0d0; arr(i) = 1.0d0
+          CALL putrow(mat, i, arr)
+       END IF
+    END DO
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO i = nrank, nrank-dk+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, dft)
+!
+!   Apply BC on RHS
+!
+    DOUBLE COMPLEX, INTENT(inout) :: rhs(:)
+    TYPE(dftmap), INTENT(in)      :: dft
+    INTEGER :: nrank, kmin, kmax, dk, k
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+    kmin = dft%kmin
+    kmax = dft%kmax
+    dk = dft%dk
+!===========================================================================
+!              2.0 BC at the axis
+!
+!   zero for non-zero modes
+!
+    DO k=kmin,kmax
+       IF(k.NE.0) rhs(k-kmin+1) = 0.0
+    END DO
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-dk+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE spectrum0(spl, carr, xpt, cspec)
+!
+!  DFT modes at xpt (integration on the first interval)
+!
+    DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
+    DOUBLE PRECISION, PARAMETER :: pi = 3.141592653589793d0
+    TYPE(spline2d), INTENT(in)   :: spl
+    DOUBLE COMPLEX, INTENT(in)   :: carr(:)
+    DOUBLE PRECISION, INTENT(in) :: xpt
+    DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
+!
+    INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
+    INTEGER :: k,  kmin, kmax, dk, kk
+    INTEGER :: ng2, ig2
+    INTEGER, ALLOCATABLE :: left2(:)
+    DOUBLE PRECISION :: temp(1)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
+    DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), psi(:), coefs(:,:)
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    kmin = spl%sp2%dft%kmin
+    kmax = spl%sp2%dft%kmax
+    dk   = spl%sp2%dft%dk
+!
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(left2(ng2))
+    ALLOCATE(xg2(ng2), wg2(ng2))
+    ALLOCATE(ft_fun2(kmin:kmax,1,ng2)) ! DFT of splines
+    ALLOCATE(psi(kmin:kmax))
+!
+!  Integration over first interval
+!
+    left2 = 1
+    CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
+    CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
+    psi = (0.0d0,0.0d0)
+    DO k=kmin,kmax
+       DO ig2=1,ng2
+          psi(k)  = psi(k) + wg2(ig2)*EXP(k*ci*xg2(ig2))*CONJG(ft_fun2(k,1,ig2))
+       END DO
+    END DO
+!
+    ALLOCATE(coefs(dk,ndim1))
+    coefs = RESHAPE(carr, SHAPE(coefs))
+    temp = xpt
+    DO kk=kmin,kmax
+       k=kk-kmin+1
+       coefs(k,:) = psi(kk)*coefs(k,:)
+       CALL gridval(spl%sp1, temp, cspec(k:k), 0, coefs(k,:))
+    END DO
+    cspec = cspec/(2.0d0*pi)
+!
+    DEALLOCATE(left2)
+    DEALLOCATE(xg2, wg2)
+    DEALLOCATE(ft_fun2)
+    DEALLOCATE(psi)
+    DEALLOCATE(coefs)    
+  END SUBROUTINE spectrum0
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE spectrum1(spl, carr, xpt, ypt0, cspec)
+!
+!  DFT modes at xpt (at the initial ypt0)
+!
+    DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
+!
+    TYPE(spline2d), INTENT(in)   :: spl
+    DOUBLE COMPLEX, INTENT(in)   :: carr(:)
+    DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
+    DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
+!
+    INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
+    INTEGER :: k,  kmin, kmax, dk
+    DOUBLE PRECISION :: temp(1)
+    DOUBLE COMPLEX :: ctemp(1)
+    DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:), coefs(:,:)
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    kmin = spl%sp2%dft%kmin
+    kmax = spl%sp2%dft%kmax
+    dk   = spl%sp2%dft%dk
+!
+! DFT of splines at ypt0
+    ALLOCATE(ft_fun2(kmin:kmax,1)) 
+    ALLOCATE(coefs(kmin:kmax,ndim1))
+    CALL ft_basfun(ypt0, spl%sp2, ft_fun2, 1)
+    coefs = RESHAPE(carr, SHAPE(coefs))
+!
+    temp = xpt
+    DO k=kmin,kmax
+       CALL gridval(spl%sp1, temp, ctemp, 0, coefs(k,:))
+       cspec(k-kmin+1) = CONJG(ft_fun2(k,1))*ctemp(1)*EXP(k*ci*ypt0)
+    END DO
+    cspec = cspec/REAL(n2,8)
+!
+    DEALLOCATE(ft_fun2)
+    DEALLOCATE(coefs)    
+  END SUBROUTINE spectrum1
+!!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE spectrum2(spl, xpt, ypt0, cspec)
+!
+!  DFT modes at xpt (at the initial ypt0)
+!
+    USE fft
+    USE bsplines
+!
+    DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
+!
+    TYPE(spline2d)               :: spl
+    DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
+    DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
+!
+    INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
+    INTEGER :: k,  kmin, kmax, dk
+    DOUBLE PRECISION, ALLOCATABLE :: ypt(:), fun(:,:)
+    DOUBLE COMPLEX, ALLOCATABLE ::   ft_fun(:)
+    DOUBLE PRECISION :: temp(1)
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    kmin = spl%sp2%dft%kmin
+    kmax = spl%sp2%dft%kmax
+    dk   = spl%sp2%dft%dk
+!
+    ALLOCATE(ypt(0:n2-1))
+    ALLOCATE(fun(1, 0:n2-1))
+    ALLOCATE(ft_fun(0:n2-1))
+!
+!  Function values at points ypt
+!
+    ypt(0:n2-1) = ypt0 + spl%sp2%knots(0:n2-1)
+    temp = xpt
+    CALL gridval(spl, temp, ypt, fun, (/0,0/))
+    ft_fun = fun(1,:)
+!
+!  Discrete Fourier Transform
+!
+    CALL fourcol(ft_fun, 1)
+    DO k=kmin,kmax
+       IF(k.LT.0) THEN
+          cspec(k-kmin+1) = ft_fun(k+n2)*EXP(k*ci*ypt0)
+       ELSE
+          cspec(k-kmin+1) = ft_fun(k)*EXP(k*ci*ypt0)
+       END IF
+    END DO
+    cspec = cspec/REAL(n2,8)
+!
+    DEALLOCATE(ypt)
+    DEALLOCATE(fun)
+    DEALLOCATE(ft_fun)
+  END SUBROUTINE spectrum2
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_sym_pardiso_dft_mod
+PROGRAM main
+  USE pde2d_sym_pardiso_dft_mod
+  USE futils
+  USE fft
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms, kmin, kmax, dk
+  INTEGER :: n_mode_couplings
+  INTEGER, ALLOCATABLE :: mode_couplings(:)
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, nrank_full, jder(2), it, i0, i0_r
+  INTEGER :: k, kp, ik
+  DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  DOUBLE COMPLEX, ALLOCATABLE   ::  crhs(:), crhs_r(:), csol(:), csol_r(:)
+  TYPE(spline2d) :: splxy
+  TYPE(zpardiso_mat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d_sym_pardiso_dft.h5'
+  INTEGER :: fid
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: arr, srow
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tfour, tfour0, tgrid, gflops1
+  INTEGER :: nits=100
+  LOGICAL :: nlmetis, nlforce_zero, nlpos
+!
+  DOUBLE PRECISION :: xpt, ypt0
+  DOUBLE COMPLEX, ALLOCATABLE :: cspec0(:), cspec(:), energy_k(:)
+  DOUBLE COMPLEX :: energy, energy_exact
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, kmin, kmax, mbess, epsi, &
+       &            nlppform, nlmetis, nlforce_zero, nlpos, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  kmin = -3           ! Minimum Fourier mode number
+  kmax = 3            ! Maximum Fourier mode number
+  mbess = 2           ! Exponent of differential problem
+  epsi = 0.5          ! Non-uniformity in the Laplacian coefficicient
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlmetis = .FALSE.   ! Use metis ordering or minimum degree
+  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
+  nlpos = .TRUE.         ! Matrix is positive definite
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!!
+!   Read table of mode couplings
+!
+  READ(*,*) n_mode_couplings
+  ALLOCATE(mode_couplings(n_mode_couplings))
+  READ(*,*) mode_couplings
+  WRITE(*,'(/a/(20i4))') 'Mode couplings', mode_couplings
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+!   Exact energy
+!
+  energy_exact = 2.0d0*pi/REAL(2+mbess,8)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'MBESS', mbess)
+  CALL attach(fid, '/', 'EPSI', epsi)
+  CALL attach(fid, '/', 'KMIN', kmin)
+  CALL attach(fid, '/', 'KMAX', kmax)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, &
+       &          (/.FALSE., .TRUE./))
+!
+!   Init DFT for spline in 2nd direction
+!
+  CALL init_dft(splxy%sp2, kmin, kmax, mode_couplings)
+  dk = splxy%sp2%dft%dk
+!
+!   FE matrix assembly
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  nrank = (nx+nidbas(1))*dk       ! Rank of restricted matrix
+  nrank_full = (nx+nidbas(1))*ny  ! Rank of full matrix
+!
+  ALLOCATE(rhs(nrank_full), sol(nrank_full))
+  ALLOCATE(crhs(nrank_full), csol(nrank_full))
+  ALLOCATE(crhs_r(nrank), csol_r(nrank))
+!
+  WRITE(*,'(a,i8)') 'nrank_full', nrank_full
+  WRITE(*,'(a,i8)') 'nrank     ', nrank
+!
+  t0 = seconds()
+  CALL init(nrank, nterms, mat, nlherm=.TRUE.)
+  CALL dismat(splxy, epsi, mat)
+  ALLOCATE(arr(nrank))
+  ALLOCATE(srow(nrank))
+  DO i=1,nrank
+     CALL getrow(mat, i, arr)
+     srow(i) = SUM(arr)
+  END DO
+!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows before BC', REAL(srow)
+!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows before BC', AIMAG(srow)
+  PRINT*, 'Sum of mat before BC', SUM(srow)
+!
+!   BC on Matrix
+!
+  WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
+  WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  CALL ibcmat(mat, splxy%sp2%dft)
+  tmat = seconds() - t0
+  DO i=1,nrank
+     CALL getrow(mat, i, arr)
+     srow(i) = SUM(arr)
+  END DO
+!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows after BC', REAL(srow)
+!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows after BC', AIMAG(srow)
+  PRINT*, 'Sum of mat after BC', SUM(srow)
+!
+  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
+!
+!   RHS assembly
+!
+  CALL disrhs(mbess, epsi, splxy, rhs)
+!
+!   Init FFT
+!
+  t0 = seconds()
+  CALL fourcol(crhs(1:ny),1)
+  CALL fourcol(crhs(1:ny),-1)
+  tfour0 = seconds()-t0
+  crhs = crhs/REAL(ny,8)
+!
+!  DFT of RHS
+!
+  t0 = seconds()
+  crhs = rhs
+  DO i=1,nx+nidbas(1)
+     i0 = (i-1)*ny
+     CALL fourcol(crhs(i0+1:i0+ny), 1)
+  END DO
+  tfour = seconds()-t0
+!
+!   Restriction in Fourier space
+!     k  = kmin:kmax  (restricted)
+!     kp = 0:ny-1     (full)
+!
+  DO i=1,nx+nidbas(1)
+     i0 = (i-1)*ny
+     i0_r = (i-1)*dk
+     DO k=kmin,kmax
+        kp = k
+        IF(kp.LT.0) kp = kp+ny
+        crhs_r(i0_r+k-kmin+1) = crhs(i0+kp+1)
+     END DO
+  END DO
+!
+!   BC on RHS
+!
+  CALL ibcrhs(crhs_r, splxy%sp2%dft)
+!
+  IF(nrank.LT.100) THEN
+     WRITE(*,'(a/(10(1pe12.3)))') 'Real of crhs', REAL(crhs)
+     WRITE(*,'(a/(10(1pe12.3)))') 'Imag of crhs', AIMAG(crhs)
+  END IF
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+!   Matrix factorization
+!
+  t0 = seconds()
+!!$  CALL factor(mat, nlmetis=nlmetis)
+  CALL to_mat(mat)
+  CALL reord_mat(mat, nlmetis=nlmetis);  CALL putmat(fid, '/MAT1', mat)
+  CALL numfact(mat)
+  tfact = seconds() - t0
+  DO i=1,nrank
+     CALL getrow(mat, i, arr)
+     srow(i) = SUM(arr)
+  END DO
+  PRINT*, 'Sum of mat after factor', SUM(srow)
+ 
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
+  WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
+  WRITE(*,'(a,i12)')  'Number of factorization MFLOPS      = ',mat%p%iparm(19)
+  gflops1 = mat%p%iparm(19) / tfact / 1.d3
+!
+!   Backsolve
+!
+  t0 = seconds()
+  PRINT*, 'SUM of crhs_r', SUM(crhs_r)
+  CALL bsolve(mat, crhs_r, csol_r, debug=.FALSE.)
+  WRITE(*,'(a,1pe12.4)') 'Residue =', cnorm2(vmx(mat,csol_r)-crhs_r)
+  tsolv = seconds() - t0
+  PRINT*, 'SUM of csol_r', SUM(csol_r)
+!
+  CALL putarr(fid, '/FT_RHS', crhs_r, 'DFT of RHS')
+  CALL putarr(fid, '/FT_SOL', csol_r, 'DFT of Spline coefficients')
+!===========================================================================
+!              4.0 Perform some diagnostics in Fourier space
+!
+!   Fourier spectrum at xpt
+!
+  xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
+  ALLOCATE(cspec0(dk))
+  CALL spectrum0(splxy, csol_r, xpt, cspec0)
+  WRITE(*,'(/a,f10.5)') 'DFT spectrum (by integration) at x = ', xpt
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
+  END DO
+!
+  ypt0 = 0.0d0
+  WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
+  CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
+  END DO
+  ypt0 = splxy%sp2%dft%dx/2.0d0   ! Center of first interval
+  WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
+  CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
+  END DO
+!
+!   Spectral energy
+!
+  WRITE(*,'(/a)') 'Spectral energies'
+  ALLOCATE(energy_k(kmin:kmax))
+  energy_k = (0.0d0,0.0d0)
+  DO i=1,dimx
+     i0_r = (i-1)*dk
+     DO k=kmin,kmax
+        ik = i0_r+k-kmin+1
+        energy_k(k) = energy_k(k) + csol_r(ik)*CONJG(crhs_r(ik))
+     END DO
+  END DO
+  energy_k = energy_k/REAL(ny,8)
+  energy = SUM(energy_k)
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, energy_k(k), ABS(energy_k(k))
+  END DO
+  WRITE(*,'(a5,4(1pe15.3))') 'Sum', energy, ABS(energy), REAL(energy-energy_exact)
+!
+  CALL putarr(fid, '/ENERGY_K', energy_k, 'Spectral energies')
+!===========================================================================
+!              5.0 Transform back to real space
+!
+!   Expand to full Fourier space
+!     k  = kmin:kmax  (restricted)
+!     kp = 0:ny-1     (full)
+!
+  crhs = (0.0d0,0.0d0)
+  DO i=1,nx+nidbas(1)
+     i0 = (i-1)*ny
+     i0_r = (i-1)*dk
+     DO k=kmin,kmax
+        kp = k
+        IF(kp.LT.0) kp = kp+ny
+        csol(i0+kp+1) = csol_r(i0_r+k-kmin+1)
+     END DO
+  END DO
+!
+!   Fourier transform back to real space
+!
+  t0 = seconds()
+  DO i=1,nx+nidbas(1)
+     i0 = (i-1)*ny
+     CALL fourcol(csol(i0+1:i0+ny),-1)
+  END DO
+  sol = REAL(csol)/REAL(ny,8)
+  tfour = tfour + seconds()-t0
+!
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!
+!   Total energy
+!
+  WRITE(*,'(/a, 2(1pe15.3))') 'Total energy and error(real space)', &
+       &     DOT_PRODUCT(rhs,sol), &
+       &     DOT_PRODUCT(rhs,sol)-REAL(energy_exact)               
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
+!===========================================================================
+!              6.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+!
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+!    Fourier spectrum at xpt
+!
+  xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
+  ALLOCATE(cspec(dk))
+!
+  ypt0 = 0.0d0
+  WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
+  CALL spectrum2(splxy, xpt, ypt0, cspec)
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
+  END DO
+!
+  ypt0 = splxy%sp2%dft%dx/2.0d0   ! Center of first interval
+  WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
+  CALL spectrum2(splxy, xpt, ypt0, cspec)
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
+  END DO
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+  errsol = solana - solcal
+  IF( SIZE(bcoef,2) .LE. 10 ) THEN
+     CALL prnmat('BCOEF', bcoef)
+     CALL prnmat('SOLANA', solana)
+     CALL prnmat('SOLCAL', solcal)
+     CALL prnmat('ERRSOL', errsol)
+  END IF
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = -2.0d0 * xgrid(i)
+        ELSE
+           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
+  WRITE(*,'(a,1pe12.3)') 'Init FFT time (s)             ', tfour0
+  WRITE(*,'(a,1pe12.3)') 'FFT time (s)                  ', tfour
+  WRITE(*,'(a,2f10.3)')  'Factor  Gflop/s', gflops1
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(cspec0, cspec)
+  DEALLOCATE(mode_couplings)
+  DEALLOCATE(xgrid, ygrid, rhs, sol)
+  DEALLOCATE(crhs, csol)
+  DEALLOCATE(crhs_r, csol_r)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  DEALLOCATE(srow)
+  DEALLOCATE(energy_k)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+!
+  FUNCTION cnorm2(x)
+    DOUBLE COMPLEX, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: cnorm2
+    cnorm2 = SQRT(DOT_PRODUCT(x,x))    
+  END FUNCTION cnorm2
+!
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/pde2d_sym_wsmp.f90 b/examples/pde2d_sym_wsmp.f90
new file mode 100644
index 0000000..2d0c56e
--- /dev/null
+++ b/examples/pde2d_sym_wsmp.f90
@@ -0,0 +1,696 @@
+!>
+!> @file pde2d_sym_wsmp.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the 2d PDE using splines and WSMP symmetric matrix
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+MODULE pde2d_sym_wsmp_mod
+  USE bsplines
+  USE wsmp_bsplines
+  IMPLICIT NONE
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in)       :: spl
+    TYPE(wsmp_mat), INTENT(inout)    :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+    DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
+    INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+    ALLOCATE(coefs(kterms,ng1,ng2))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
+!===========================================================================
+!              2.0 Assembly loop
+!
+    ALLOCATE(left1(ng1))
+    ALLOCATE(left2(ng2))
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       left1 = i
+       CALL basfun(xg1, spl%sp1, fun1, left1)
+       DO j=1,n2
+          CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+          left2 = j
+          CALL basfun(xg2, spl%sp2, fun2, left2)
+!
+          DO ig1=1,ng1
+             DO ig2=1,ng2
+                CALL coefeq(xg1(ig1), xg2(ig2), &
+                     &      idert(:,:,ig1,ig2), &
+                     &      iderw(:,:,ig1,ig2), &
+                     &      coefs(:,ig1,ig2))
+             END DO
+          END DO
+!
+          DO iw1=0,nidbas1  ! Weight function in dir 1
+             igw1 = i+iw1
+             DO iw2=0,nidbas2  ! Weight function in dir 2
+                igw2 = MODULO(j+iw2-1, n2) + 1
+                irow = igw2 + (igw1-1)*n2
+                DO it1=0,nidbas1  ! Test function in dir 1
+                   igt1 = i+it1
+                   DO it2=0,nidbas2  ! Test function in dir 2
+                      igt2 = MODULO(j+it2-1, n2) + 1
+                      jcol = igt2 + (igt1-1)*n2
+!-------------
+                      contrib = 0.0d0
+                      DO ig1=1,ng1
+                         DO ig2=1,ng2
+                            DO iterm=1,kterms
+                               contrib = contrib + &
+                                    &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                                    &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
+                                    &    coefs(iterm,ig1,ig2) *                 &
+                                    &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
+                                    &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                                    &    wg1(ig1) * wg2(ig2)
+                            END DO
+                         END DO
+                      END DO
+                      CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+    DEALLOCATE(left1,left2)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+      !
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    INTEGER, INTENT(in) :: mbess
+    TYPE(spline2d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2
+      INTEGER, INTENT(in) :: m
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(wsmp_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = mat%rank
+    ALLOCATE(zsum(nrank), arr(nrank))
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       zsum(:) = zsum(:) + arr(:)
+    END DO
+    zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
+    CALL putrow(mat, ny, zsum)
+!
+!   The away operator
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO i = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(zsum, arr)
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-ny+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_sym_wsmp_mod
+PROGRAM main
+  USE pde2d_sym_wsmp_mod
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
+  TYPE(spline2d) :: splxy
+  TYPE(wsmp_mat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d_sym_wsmp.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  DOUBLE PRECISION :: tconv, treord
+  INTEGER :: nits=100
+  LOGICAL :: nlforce_zero, nlpos
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlpos, &
+       &            nlforce_zero, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
+  nlpos = .TRUE.         ! Matrix is positive definite
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'MBESS', mbess)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  WRITE(*,'(a,i8)') 'nrank', nrank
+!
+  CALL init(nrank, nterms, mat, nlsym=.TRUE., nlpos=nlpos)
+  CALL dismat(splxy, mat)
+  ALLOCATE(arr(nrank))
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
+  END IF
+!
+!   BC on Matrix
+!
+  WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
+  WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  CALL ibcmat(mat, ny)
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
+     WRITE(*,'(a)') 'Last rows'
+     DO i=nrank-ny,nrank
+        CALL getrow(mat, i, arr)
+        WRITE(*,'(10(1pe12.3))') arr
+     END DO
+  END IF
+  tmat = seconds() - t0
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, ny)
+
+  CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL factor(mat)
+!
+!   The call to "factor" could be split into the
+!   3 following calls
+!
+!!$  CALL to_mat(mat)
+!!$  tconv = seconds() -t0
+!!$  WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
+!!$  WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
+!!$!
+!!$  t0 = seconds()
+!!$  CALL reord_mat(mat)
+!!$  CALL putmat(fid, '/MAT', mat)
+!!$  treord = seconds() - t0
+!!$!
+!!$  t0 = seconds()
+!!$  CALL numfact(mat)
+  tfact = seconds() - t0
+ 
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
+  WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
+  WRITE(*,'(a,1pe12.3)')  'Number of factorization GFLOPS      = ',mat%p%dparm(23)/1.d9
+  gflops1 = mat%p%dparm(23) / tfact / 1.d9
+!
+  CALL bsolve(mat, rhs, sol)
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL bsolve(mat, rhs, sol)
+     sol(1:ny-1) = sol(ny)
+  END DO
+  WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(6)
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
+  tsolv = (seconds() - t0)/REAL(nits)
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+  errsol = solana - solcal
+  IF( SIZE(bcoef,2) .LE. 10 ) THEN
+     CALL prnmat('BCOEF', bcoef)
+     CALL prnmat('SOLANA', solana)
+     CALL prnmat('SOLCAL', solcal)
+     CALL prnmat('ERRSOL', errsol)
+  END IF
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = -2.0d0 * xgrid(i)
+        ELSE
+           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
+  WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!===========================================================================
+!              5.0 Clear the matrix and recompute
+!
+  WRITE(*,'(/a)') 'Recompute the solver ...'
+  t0 = seconds()
+  CALL clear_mat(mat)
+  CALL dismat(splxy, mat)
+  CALL ibcmat(mat, ny)
+  tmat = seconds()-t0
+!
+  t0 = seconds()
+  CALL numfact(mat)
+  tfact = seconds()-t0
+  gflops1 = mat%p%dparm(23) / tfact / 1.d9
+!
+  t0 = seconds()
+  ALLOCATE(newsol(nrank))
+  CALL bsolve(mat, rhs, newsol)
+  newsol(1:ny-1) = newsol(ny)
+  tsolv = seconds()-t0
+!
+  WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
+  WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!
+  DEALLOCATE(newsol)
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/pde2d_sym_wsmp_dft.f90 b/examples/pde2d_sym_wsmp_dft.f90
new file mode 100644
index 0000000..f3226e9
--- /dev/null
+++ b/examples/pde2d_sym_wsmp_dft.f90
@@ -0,0 +1,1039 @@
+!>
+!> @file pde2d_sym_wsmp_dft.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Ben McMillan <ben.mcmillan@epfl.ch>
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the 2d PDE using splines and WSMP symmetric matrix.
+!  The periodic coordinate y is discrete Fourier transformed.
+!
+!    -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0
+!     C(x,y) = 1 + \epsilon x cos(y)
+!     exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+MODULE pde2d_sym_wsmp_dft_mod
+  USE bsplines
+  USE wsmp_bsplines
+  IMPLICIT NONE
+!
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, epsi, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in)       :: spl
+    DOUBLE PRECISION, INTENT(in)     :: epsi
+    TYPE(zwsmp_mat), INTENT(inout) :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: kmin, kmax, dk
+    INTEGER :: i, j, ig1, ig2, kc
+    INTEGER :: iterm, iw1, mw, igw1, it1, mt, igt1, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
+    DOUBLE COMPLEX, ALLOCATABLE   :: ft_fun2(:,:,:), fft_temp(:)
+    DOUBLE COMPLEX :: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER :: kcoupl         ! Number of mode couplings
+    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+    DOUBLE COMPLEX, ALLOCATABLE  :: coefs(:,:,:,:)          ! Terms in weak form
+    INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    kmin = spl%sp2%dft%kmin
+    kmax = spl%sp2%dft%kmax
+    dk   = spl%sp2%dft%dk
+    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+    WRITE(*,'(a, 5i6)')  'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+    WRITE(*,'(a, 5i6)')  'kmin, kmax, dk     =', kmin, kmax, dk
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng2), wg2(ng2))
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    kcoupl = SIZE(spl%sp2%dft%mode_couplings)
+    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+    ALLOCATE(coefs(kterms,kcoupl,ng1,ng2))
+!
+!   Splines and derivatives at all Gauss points
+!
+    ALLOCATE(fun1(0:nidbas1,0:1,ng1))    ! Spline and 1st derivative
+    ALLOCATE(ft_fun2(kmin:kmax,0:1,ng2)) ! DFT of splines and 1st derivative
+    ALLOCATE(fft_temp(0:n2-1))           ! Used in coefeq
+!===========================================================================
+!              2.0 Assembly loop
+!
+    ALLOCATE(left1(ng1))
+    ALLOCATE(left2(ng2))
+!
+! First interval in 2nd (periodic) coordinate
+!
+    j = 1
+    CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
+    left2 = j
+    CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
+
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       left1 = i
+       CALL basfun(xg1, spl%sp1, fun1, left1)
+       DO ig1=1,ng1
+          DO ig2=1,ng2
+             CALL coefeq(xg1(ig1), xg2(ig2), &
+                  &      idert(:,:,ig1,ig2), &
+                  &      iderw(:,:,ig1,ig2), &
+                  &      coefs(:,:,ig1,ig2))
+          END DO
+       END DO
+!
+       DO iw1=0,nidbas1     ! Weight function in dir 1
+          igw1 = i+iw1
+          DO it1=0,nidbas1  ! Test function in dir 1
+             igt1 = i+it1
+             DO mt=kmin,kmax ! Test Fourier mode
+                DO kc=1,kcoupl
+                   mw = mt + spl%sp2%dft%mode_couplings(kc)
+                   IF(mw.LT.kmin .OR. mw.GT.kmax) CYCLE
+!-------------
+                   contrib = (0.0d0, 0.0d0)
+                   DO ig1=1,ng1
+                      DO ig2=1,ng2
+                         DO iterm=1,kterms
+                            contrib = contrib + &
+                              &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                              &    ft_fun2(mw,iderw(iterm,2,ig1,ig2),ig2) * &
+                              &    coefs(iterm,kc,ig1,ig2) *                 &
+                              &    CONJG(ft_fun2(mt,idert(iterm,2,ig1,ig2),ig2)) * &
+                              &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                              &    wg1(ig1) * wg2(ig2) /REAL(n2,8)
+                         END DO
+                      END DO
+                   END DO
+                   irow = (igw1-1)*dk + (mw-kmin)+1  ! Number first mode m then radial coord.
+                   jcol = (igt1-1)*dk + (mt-kmin)+1
+                   CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, ft_fun2)
+    DEALLOCATE(idert, iderw, coefs)
+    DEALLOCATE(left1,left2)
+    DEALLOCATE(fft_temp)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      USE fft
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE COMPLEX, INTENT(out) :: c(:,:)
+!
+      DOUBLE PRECISION :: zcoef, dy
+      INTEGER :: j, k, kc, kp
+!
+! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy
+!      C(x,y) = 1 + epsilon*x*cos(y)
+!
+      dy = spl%sp2%dft%dx
+      kc = SIZE(spl%sp2%dft%mode_couplings)
+      DO j=0,n2-1
+         fft_temp(j) = 1.0d0+epsi*x*COS(y+j*dy)
+      END DO
+      CALL fourcol(fft_temp,1)
+      DO k=1,kc
+         kp = spl%sp2%dft%mode_couplings(k)
+         IF(kp.LT.0) kp=kp+n2
+         c(1,k) = x*fft_temp(kp)
+         c(2,k) = fft_temp(kp)/x
+      END DO
+!!$      WRITE(*,'(a/(10(1pe12.4)))') 'fft_temp', ABS(fft_temp)
+!!$      WRITE(*,'(a/(10(1pe12.4)))') 'c1', ABS(c(1,:))
+!!$      WRITE(*,'(a/(10(1pe12.4)))') 'c2', ABS(c(2,:))
+!
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+!
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, epsi, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    INTEGER, INTENT(in)           :: mbess
+    DOUBLE PRECISION, INTENT(in)  :: epsi
+    TYPE(spline2d), INTENT(in)    :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x, y, m)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(in)          :: m
+!
+      DOUBLE PRECISION :: xm
+      xm = REAL(m,8)
+      rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + &
+           &  epsi*x*( &
+           &    ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + &
+           &     (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) &
+           &    ))
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, dft)
+!
+!   Apply BC on matrix
+!
+    TYPE(zwsmp_mat), INTENT(inout) :: mat
+    TYPE(dftmap), INTENT(in)          :: dft
+    INTEGER :: nrank, k, kmin, kmax, dk, i
+    DOUBLE COMPLEX :: arr(mat%rank)
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = mat%rank
+    kmin = dft%kmin
+    kmax = dft%kmax
+    dk   = dft%dk
+!===========================================================================
+!              2.0 BC at the axis
+!
+!   zero for non-zero modes
+!
+    DO k=kmin,kmax
+       IF(k.NE.0) THEN
+          i = k-kmin+1
+          arr = 0.0d0; arr(i) = 1.0d0
+          CALL putrow(mat, i, arr)
+       END IF
+    END DO
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO i = nrank, nrank-dk+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, dft)
+!
+!   Apply BC on RHS
+!
+    DOUBLE COMPLEX, INTENT(inout) :: rhs(:)
+    TYPE(dftmap), INTENT(in)      :: dft
+    INTEGER :: nrank, kmin, kmax, dk, k
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+    kmin = dft%kmin
+    kmax = dft%kmax
+    dk = dft%dk
+!===========================================================================
+!              2.0 BC at the axis
+!
+!   zero for non-zero modes
+!
+    DO k=kmin,kmax
+       IF(k.NE.0) rhs(k-kmin+1) = 0.0
+    END DO
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-dk+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE spectrum0(spl, carr, xpt, cspec)
+!
+!  DFT modes at xpt (integration on the first interval)
+!
+    DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
+    DOUBLE PRECISION, PARAMETER :: pi = 3.141592653589793d0
+    TYPE(spline2d), INTENT(in)   :: spl
+    DOUBLE COMPLEX, INTENT(in)   :: carr(:)
+    DOUBLE PRECISION, INTENT(in) :: xpt
+    DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
+!
+    INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
+    INTEGER :: k,  kmin, kmax, dk, kk
+    INTEGER :: ng2, ig2
+    INTEGER, ALLOCATABLE :: left2(:)
+    DOUBLE PRECISION :: temp(1)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
+    DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), psi(:), coefs(:,:)
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    kmin = spl%sp2%dft%kmin
+    kmax = spl%sp2%dft%kmax
+    dk   = spl%sp2%dft%dk
+!
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(left2(ng2))
+    ALLOCATE(xg2(ng2), wg2(ng2))
+    ALLOCATE(ft_fun2(kmin:kmax,1,ng2)) ! DFT of splines
+    ALLOCATE(psi(kmin:kmax))
+!
+!  Integration over first interval
+!
+    left2 = 1
+    CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
+    CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
+    psi = (0.0d0,0.0d0)
+    DO k=kmin,kmax
+       DO ig2=1,ng2
+          psi(k)  = psi(k) + wg2(ig2)*EXP(k*ci*xg2(ig2))*CONJG(ft_fun2(k,1,ig2))
+       END DO
+    END DO
+!
+    ALLOCATE(coefs(dk,ndim1))
+    coefs = RESHAPE(carr, SHAPE(coefs))
+    temp = xpt
+    DO kk=kmin,kmax
+       k=kk-kmin+1
+       coefs(k,:) = psi(kk)*coefs(k,:)
+       CALL gridval(spl%sp1, temp, cspec(k:k), 0, coefs(k,:))
+    END DO
+    cspec = cspec/(2.0d0*pi)
+!
+    DEALLOCATE(left2)
+    DEALLOCATE(xg2, wg2)
+    DEALLOCATE(ft_fun2)
+    DEALLOCATE(psi)
+    DEALLOCATE(coefs)    
+  END SUBROUTINE spectrum0
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE spectrum1(spl, carr, xpt, ypt0, cspec)
+!
+!  DFT modes at xpt (at the initial ypt0)
+!
+    DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
+!
+    TYPE(spline2d), INTENT(in)   :: spl
+    DOUBLE COMPLEX, INTENT(in)   :: carr(:)
+    DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
+    DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
+!
+    INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
+    INTEGER :: k,  kmin, kmax, dk
+    DOUBLE PRECISION :: temp(1)
+    DOUBLE COMPLEX :: ctemp(1)
+    DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:), coefs(:,:)
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    kmin = spl%sp2%dft%kmin
+    kmax = spl%sp2%dft%kmax
+    dk   = spl%sp2%dft%dk
+!
+! DFT of splines at ypt0
+    ALLOCATE(ft_fun2(kmin:kmax,1)) 
+    ALLOCATE(coefs(kmin:kmax,ndim1))
+    CALL ft_basfun(ypt0, spl%sp2, ft_fun2, 1)
+    coefs = RESHAPE(carr, SHAPE(coefs))
+!
+    temp = xpt
+    DO k=kmin,kmax
+       CALL gridval(spl%sp1, temp, ctemp, 0, coefs(k,:))
+       cspec(k-kmin+1) = CONJG(ft_fun2(k,1))*ctemp(1)*EXP(k*ci*ypt0)
+    END DO
+    cspec = cspec/REAL(n2,8)
+!
+    DEALLOCATE(ft_fun2)
+    DEALLOCATE(coefs)    
+  END SUBROUTINE spectrum1
+!!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE spectrum2(spl, xpt, ypt0, cspec)
+!
+!  DFT modes at xpt (at the initial ypt0)
+!
+    USE fft
+    USE bsplines
+!
+    DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
+!
+    TYPE(spline2d)               :: spl
+    DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
+    DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
+!
+    INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
+    INTEGER :: k,  kmin, kmax, dk
+    DOUBLE PRECISION, ALLOCATABLE :: ypt(:), fun(:,:)
+    DOUBLE COMPLEX, ALLOCATABLE ::   ft_fun(:)
+    DOUBLE PRECISION :: temp(1)
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    kmin = spl%sp2%dft%kmin
+    kmax = spl%sp2%dft%kmax
+    dk   = spl%sp2%dft%dk
+!
+    ALLOCATE(ypt(0:n2-1))
+    ALLOCATE(fun(1, 0:n2-1))
+    ALLOCATE(ft_fun(0:n2-1))
+!
+!  Function values at points ypt
+!
+    ypt(0:n2-1) = ypt0 + spl%sp2%knots(0:n2-1)
+    temp = xpt
+    CALL gridval(spl, temp, ypt, fun, (/0,0/))
+    ft_fun = fun(1,:)
+!
+!  Discrete Fourier Transform
+!
+    CALL fourcol(ft_fun, 1)
+    DO k=kmin,kmax
+       IF(k.LT.0) THEN
+          cspec(k-kmin+1) = ft_fun(k+n2)*EXP(k*ci*ypt0)
+       ELSE
+          cspec(k-kmin+1) = ft_fun(k)*EXP(k*ci*ypt0)
+       END IF
+    END DO
+    cspec = cspec/REAL(n2,8)
+!
+    DEALLOCATE(ypt)
+    DEALLOCATE(fun)
+    DEALLOCATE(ft_fun)
+  END SUBROUTINE spectrum2
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_sym_wsmp_dft_mod
+PROGRAM main
+  USE pde2d_sym_wsmp_dft_mod
+  USE futils
+  USE fft
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms, kmin, kmax, dk
+  INTEGER :: n_mode_couplings
+  INTEGER, ALLOCATABLE :: mode_couplings(:)
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, nrank_full, jder(2), it, i0, i0_r
+  INTEGER :: k, kp, ik
+  DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  DOUBLE COMPLEX, ALLOCATABLE   ::  crhs(:), crhs_r(:), csol(:), csol_r(:)
+  TYPE(spline2d) :: splxy
+  TYPE(zwsmp_mat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d_sym_wsmp_dft.h5'
+  INTEGER :: fid
+  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: arr, srow
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tfour, tfour0, tgrid, gflops1
+  INTEGER :: nits=100
+  LOGICAL :: nlmetis, nlforce_zero, nlpos
+!
+  DOUBLE PRECISION :: xpt, ypt0
+  DOUBLE COMPLEX, ALLOCATABLE :: cspec0(:), cspec(:), energy_k(:)
+  DOUBLE COMPLEX :: energy, energy_exact
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, kmin, kmax, mbess, epsi, &
+       &            nlppform, nlmetis, nlforce_zero, nlpos, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  kmin = -3           ! Minimum Fourier mode number
+  kmax = 3            ! Maximum Fourier mode number
+  mbess = 2           ! Exponent of differential problem
+  epsi = 0.5          ! Non-uniformity in the Laplacian coefficicient
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlmetis = .FALSE.   ! Use metis ordering or minimum degree
+  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
+  nlpos = .TRUE.         ! Matrix is positive definite
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!!
+!   Read table of mode couplings
+!
+  READ(*,*) n_mode_couplings
+  ALLOCATE(mode_couplings(n_mode_couplings))
+  READ(*,*) mode_couplings
+  WRITE(*,'(/a/(20i4))') 'Mode couplings', mode_couplings
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+!   Exact energy
+!
+  energy_exact = 2.0d0*pi/REAL(2+mbess,8)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'MBESS', mbess)
+  CALL attach(fid, '/', 'EPSI', epsi)
+  CALL attach(fid, '/', 'KMIN', kmin)
+  CALL attach(fid, '/', 'KMAX', kmax)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, &
+       &          (/.FALSE., .TRUE./))
+!
+!   Init DFT for spline in 2nd direction
+!
+  CALL init_dft(splxy%sp2, kmin, kmax, mode_couplings)
+  dk = splxy%sp2%dft%dk
+!
+!   FE matrix assembly
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  nrank = (nx+nidbas(1))*dk       ! Rank of restricted matrix
+  nrank_full = (nx+nidbas(1))*ny  ! Rank of full matrix
+!
+  ALLOCATE(rhs(nrank_full), sol(nrank_full))
+  ALLOCATE(crhs(nrank_full), csol(nrank_full))
+  ALLOCATE(crhs_r(nrank), csol_r(nrank))
+!
+  WRITE(*,'(a,i8)') 'nrank_full', nrank_full
+  WRITE(*,'(a,i8)') 'nrank     ', nrank
+!
+  t0 = seconds()
+  CALL init(nrank, nterms, mat, nlherm=.TRUE., nlpos=nlpos)
+  CALL dismat(splxy, epsi, mat)
+  ALLOCATE(arr(nrank))
+  ALLOCATE(srow(nrank))
+  DO i=1,nrank
+     CALL getrow(mat, i, arr)
+     srow(i) = SUM(arr)
+  END DO
+!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows before BC', REAL(srow)
+!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows before BC', AIMAG(srow)
+  PRINT*, 'Sum of mat before BC', SUM(srow)
+!
+!   BC on Matrix
+!
+  WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
+  WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  CALL ibcmat(mat, splxy%sp2%dft)
+  tmat = seconds() - t0
+  DO i=1,nrank
+     CALL getrow(mat, i, arr)
+     srow(i) = SUM(arr)
+  END DO
+!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows after BC', REAL(srow)
+!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows after BC', AIMAG(srow)
+  PRINT*, 'Sum of mat after BC', SUM(srow)
+!
+  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
+!
+!   RHS assembly
+!
+  CALL disrhs(mbess, epsi, splxy, rhs)
+!
+!   Init FFT
+!
+  t0 = seconds()
+  CALL fourcol(crhs(1:ny),1)
+  CALL fourcol(crhs(1:ny),-1)
+  tfour0 = seconds()-t0
+  crhs = crhs/REAL(ny,8)
+!
+!  DFT of RHS
+!
+  t0 = seconds()
+  crhs = rhs
+  DO i=1,nx+nidbas(1)
+     i0 = (i-1)*ny
+     CALL fourcol(crhs(i0+1:i0+ny), 1)
+  END DO
+  tfour = seconds()-t0
+!
+!   Restriction in Fourier space
+!     k  = kmin:kmax  (restricted)
+!     kp = 0:ny-1     (full)
+!
+  DO i=1,nx+nidbas(1)
+     i0 = (i-1)*ny
+     i0_r = (i-1)*dk
+     DO k=kmin,kmax
+        kp = k
+        IF(kp.LT.0) kp = kp+ny
+        crhs_r(i0_r+k-kmin+1) = crhs(i0+kp+1)
+     END DO
+  END DO
+!
+!   BC on RHS
+!
+  CALL ibcrhs(crhs_r, splxy%sp2%dft)
+!
+  IF(nrank.LT.100) THEN
+     WRITE(*,'(a/(10(1pe12.3)))') 'Real of crhs', REAL(crhs)
+     WRITE(*,'(a/(10(1pe12.3)))') 'Imag of crhs', AIMAG(crhs)
+  END IF
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+!   Matrix factorization
+!
+  t0 = seconds()
+!!$  CALL factor(mat)
+  CALL to_mat(mat)
+  CALL reord_mat(mat);  CALL putmat(fid, '/MAT1', mat)
+  CALL numfact(mat)
+  tfact = seconds() - t0
+  DO i=1,nrank
+     CALL getrow(mat, i, arr)
+     srow(i) = SUM(arr)
+  END DO
+  PRINT*, 'Sum of mat after factor', SUM(srow)
+  PRINT*, 'iparm(64) after factor', mat%p%iparm(64)
+ 
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
+  WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
+  WRITE(*,'(a,1pe12.3)')  'Number of factorization GFLOPS      = ',mat%p%dparm(23)/1.d9
+  gflops1 = mat%p%dparm(23) / tfact / 1.d9
+!
+!   Backsolve
+!
+  t0 = seconds()
+  PRINT*, 'SUM of crhs_r', SUM(crhs_r)
+  CALL bsolve(mat, crhs_r, csol_r)
+  tsolv = seconds() - t0
+  WRITE(*,'(a,1pe12.4)') 'Residue =', cnorm2(vmx(mat,csol_r)-crhs_r)
+  PRINT*, 'SUM of csol_r', SUM(csol_r)
+  PRINT*, 'iparm(64) after bsolve', mat%p%iparm(64)
+  PRINT*, 'Residue from WSMP', mat%p%dparm(7)
+  WRITE(*,'(a/(20i4))') 'iparm', mat%p%iparm
+!
+  CALL putarr(fid, '/FT_RHS', crhs_r, 'DFT of RHS')
+  CALL putarr(fid, '/FT_SOL', csol_r, 'DFT of Spline coefficients')
+!===========================================================================
+!              4.0 Perform some diagnostics in Fourier space
+!
+!   Fourier spectrum at xpt
+!
+  xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
+  ALLOCATE(cspec0(dk))
+  CALL spectrum0(splxy, csol_r, xpt, cspec0)
+  WRITE(*,'(/a,f10.5)') 'DFT spectrum (by integration) at x = ', xpt
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
+  END DO
+!
+  ypt0 = 0.0d0
+  WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
+  CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
+  END DO
+  ypt0 = splxy%sp2%dft%dx/2.0d0   ! Center of first interval
+  WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
+  CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
+  END DO
+!
+!   Spectral energy
+!
+  WRITE(*,'(/a)') 'Spectral energies'
+  ALLOCATE(energy_k(kmin:kmax))
+  energy_k = (0.0d0,0.0d0)
+  DO i=1,dimx
+     i0_r = (i-1)*dk
+     DO k=kmin,kmax
+        ik = i0_r+k-kmin+1
+        energy_k(k) = energy_k(k) + csol_r(ik)*CONJG(crhs_r(ik))
+     END DO
+  END DO
+  energy_k = energy_k/REAL(ny,8)
+  energy = SUM(energy_k)
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, energy_k(k), ABS(energy_k(k))
+  END DO
+  WRITE(*,'(a5,4(1pe15.3))') 'Sum', energy, ABS(energy), REAL(energy-energy_exact)
+!
+  CALL putarr(fid, '/ENERGY_K', energy_k, 'Spectral energies')
+!===========================================================================
+!              5.0 Transform back to real space
+!
+!   Expand to full Fourier space
+!     k  = kmin:kmax  (restricted)
+!     kp = 0:ny-1     (full)
+!
+  crhs = (0.0d0,0.0d0)
+  DO i=1,nx+nidbas(1)
+     i0 = (i-1)*ny
+     i0_r = (i-1)*dk
+     DO k=kmin,kmax
+        kp = k
+        IF(kp.LT.0) kp = kp+ny
+        csol(i0+kp+1) = csol_r(i0_r+k-kmin+1)
+     END DO
+  END DO
+!
+!   Fourier transform back to real space
+!
+  t0 = seconds()
+  DO i=1,nx+nidbas(1)
+     i0 = (i-1)*ny
+     CALL fourcol(csol(i0+1:i0+ny),-1)
+  END DO
+  sol = REAL(csol)/REAL(ny,8)
+  tfour = tfour + seconds()-t0
+!
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!
+!   Total energy
+!
+  WRITE(*,'(/a, 2(1pe15.3))') 'Total energy and error(real space)', &
+       &     DOT_PRODUCT(rhs,sol), &
+       &     DOT_PRODUCT(rhs,sol)-REAL(energy_exact)               
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
+!===========================================================================
+!              6.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+!
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+!    Fourier spectrum at xpt
+!
+  xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
+  ALLOCATE(cspec(dk))
+!
+  ypt0 = 0.0d0
+  WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
+  CALL spectrum2(splxy, xpt, ypt0, cspec)
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
+  END DO
+!
+  ypt0 = splxy%sp2%dft%dx/2.0d0   ! Center of first interval
+  WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
+  CALL spectrum2(splxy, xpt, ypt0, cspec)
+  DO k=kmin,kmax
+     WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
+  END DO
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+  errsol = solana - solcal
+  IF( SIZE(bcoef,2) .LE. 10 ) THEN
+     CALL prnmat('BCOEF', bcoef)
+     CALL prnmat('SOLANA', solana)
+     CALL prnmat('SOLCAL', solcal)
+     CALL prnmat('ERRSOL', errsol)
+  END IF
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = -2.0d0 * xgrid(i)
+        ELSE
+           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
+  WRITE(*,'(a,1pe12.3)') 'Init FFT time (s)             ', tfour0
+  WRITE(*,'(a,1pe12.3)') 'FFT time (s)                  ', tfour
+  WRITE(*,'(a,2f10.3)')  'Factor  Gflop/s', gflops1
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(cspec0, cspec)
+  DEALLOCATE(mode_couplings)
+  DEALLOCATE(xgrid, ygrid, rhs, sol)
+  DEALLOCATE(crhs, csol)
+  DEALLOCATE(crhs_r, csol_r)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  DEALLOCATE(srow)
+  DEALLOCATE(energy_k)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+!
+  FUNCTION cnorm2(x)
+    DOUBLE COMPLEX, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: cnorm2
+    cnorm2 = SQRT(DOT_PRODUCT(x,x))    
+  END FUNCTION cnorm2
+!
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/pde2d_wsmp.f90 b/examples/pde2d_wsmp.f90
new file mode 100644
index 0000000..6a20630
--- /dev/null
+++ b/examples/pde2d_wsmp.f90
@@ -0,0 +1,711 @@
+!>
+!> @file pde2d_wsmp.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the 2d PDE using splines and WSMP non-symmetric matrix
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+MODULE pde2d_wsmp_mod
+  USE bsplines
+  USE wsmp_bsplines
+  IMPLICIT NONE
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in)       :: spl
+    TYPE(wsmp_mat), INTENT(inout)    :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+    DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
+    INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+    ALLOCATE(coefs(kterms,ng1,ng2))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
+!===========================================================================
+!              2.0 Assembly loop
+!
+    ALLOCATE(left1(ng1))
+    ALLOCATE(left2(ng2))
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       left1 = i
+       CALL basfun(xg1, spl%sp1, fun1, left1)
+       DO j=1,n2
+          CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+          left2 = j
+          CALL basfun(xg2, spl%sp2, fun2, left2)
+!
+          DO ig1=1,ng1
+             DO ig2=1,ng2
+                CALL coefeq(xg1(ig1), xg2(ig2), &
+                     &      idert(:,:,ig1,ig2), &
+                     &      iderw(:,:,ig1,ig2), &
+                     &      coefs(:,ig1,ig2))
+             END DO
+          END DO
+!
+          DO iw1=0,nidbas1  ! Weight function in dir 1
+             igw1 = i+iw1
+             DO iw2=0,nidbas2  ! Weight function in dir 2
+                igw2 = MODULO(j+iw2-1, n2) + 1
+                irow = igw2 + (igw1-1)*n2
+                DO it1=0,nidbas1  ! Test function in dir 1
+                   igt1 = i+it1
+                   DO it2=0,nidbas2  ! Test function in dir 2
+                      igt2 = MODULO(j+it2-1, n2) + 1
+                      jcol = igt2 + (igt1-1)*n2
+!-------------
+                      contrib = 0.0d0
+                      DO ig1=1,ng1
+                         DO ig2=1,ng2
+                            DO iterm=1,kterms
+                               contrib = contrib + &
+                                    &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                                    &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
+                                    &    coefs(iterm,ig1,ig2) *                 &
+                                    &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
+                                    &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                                    &    wg1(ig1) * wg2(ig2)
+                            END DO
+                         END DO
+                      END DO
+                      CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+    DEALLOCATE(left1,left2)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+      !
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    INTEGER, INTENT(in) :: mbess
+    TYPE(spline2d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2
+      INTEGER, INTENT(in) :: m
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(wsmp_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = mat%rank
+    ALLOCATE(zsum(nrank), arr(nrank))
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       zsum(:) = zsum(:) + arr(:)
+    END DO
+    CALL putrow(mat, ny, zsum)
+!
+!   The horizontal sum on the NY-th column
+!
+    zsum = 0.0d0
+    DO j=1,ny
+       arr = 0.0d0
+       CALL getcol(mat, j, arr)
+       zsum(ny:) = zsum(ny:) + arr(ny:)
+    END DO
+    CALL putcol(mat, ny, zsum)
+!
+!   The away operator
+!
+    DO j = 1,ny-1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO j = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+!
+    DO i = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(zsum, arr)
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-ny+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_wsmp_mod
+PROGRAM main
+  USE pde2d_wsmp_mod
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
+  TYPE(spline2d) :: splxy
+  TYPE(wsmp_mat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d_wsmp.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  DOUBLE PRECISION :: tconv, treord
+  INTEGER :: nits=100
+  LOGICAL :: nlforce_zero
+  LOGICAL :: nlserial
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, &
+       &            nlforce_zero, nlserial, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'MBESS', mbess)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  WRITE(*,'(a,i8)') 'nrank', nrank
+!
+  CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero)
+  CALL dismat(splxy, mat)
+  ALLOCATE(arr(nrank))
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
+  END IF
+!
+!   BC on Matrix
+!
+  WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
+  WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  CALL ibcmat(mat, ny)
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
+     WRITE(*,'(a)') 'Last rows'
+     DO i=nrank-ny,nrank
+        CALL getrow(mat, i, arr)
+        WRITE(*,'(10(1pe12.3))') arr
+     END DO
+  END IF
+  tmat = seconds() - t0
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, ny)
+!
+  CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL to_mat(mat)
+  tconv = seconds() -t0
+  WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
+  WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
+!
+  t0 = seconds()
+  CALL reord_mat(mat)
+  CALL putmat(fid, '/MAT', mat)
+  treord = seconds() - t0
+!
+  t0 = seconds()
+  CALL numfact(mat)
+  tfact = seconds() - t0
+ 
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
+  WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
+  WRITE(*,'(a,1pe12.3)')  'Number of factorization GFLOPS      = ',mat%p%dparm(23)/1.d9
+  gflops1 = mat%p%dparm(23) / tfact / 1.d9
+!
+  CALL bsolve(mat, rhs, sol)
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL bsolve(mat, rhs, sol)
+     sol(1:ny-1) = sol(ny)
+  END DO
+  WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(26)
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
+  tsolv = (seconds() - t0)/REAL(nits)
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+  errsol = solana - solcal
+  IF( SIZE(bcoef,2) .LE. 10 ) THEN
+     CALL prnmat('BCOEF', bcoef)
+     CALL prnmat('SOLANA', solana)
+     CALL prnmat('SOLCAL', solcal)
+     CALL prnmat('ERRSOL', errsol)
+  END IF
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = -2.0d0 * xgrid(i)
+        ELSE
+           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
+  WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!===========================================================================
+!              5.0 Clear the matrix and recompute
+!
+  WRITE(*,'(/a)') 'Recompute the solver ...'
+  t0 = seconds()
+  CALL clear_mat(mat)
+  CALL dismat(splxy, mat)
+  CALL ibcmat(mat, ny)
+  tmat = seconds()-t0
+!
+  t0 = seconds()
+!!$  CALL numfact(mat)
+  CALL factor(mat, nlreord=.FALSE.)
+  tfact = seconds()-t0
+  gflops1 = mat%p%dparm(23) / tfact / 1.d9
+!
+  t0 = seconds()
+  ALLOCATE(newsol(nrank))
+  CALL bsolve(mat, rhs, newsol)
+  newsol(1:ny-1) = newsol(ny)
+  tsolv = seconds()-t0
+!
+  WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
+  WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!
+  DEALLOCATE(newsol)
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/examples/pde3d.f90 b/examples/pde3d.f90
new file mode 100644
index 0000000..9fed6e9
--- /dev/null
+++ b/examples/pde3d.f90
@@ -0,0 +1,396 @@
+!>
+!> @file pde3d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 3d PDE using splines:
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n
+!
+  USE futils
+  USE fft
+  USE pde3d_mod
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms
+  LOGICAL :: nlppform
+  INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku
+  INTEGER :: jder(3), it
+  DOUBLE PRECISION :: pi, coefx(5)
+  DOUBLE PRECISION :: dy, dz
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: xgrid, ygrid, zgrid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: fftmass
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol
+  DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE   :: crhs
+!
+  TYPE(spline2d1d), TARGET :: splxyz
+  TYPE(spline2d), POINTER  :: splxy
+  TYPE(gbmat)              :: mat
+!
+  CHARACTER(len=128) :: file='pde3d.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  INTEGER :: nits=500
+!
+  INTEGER, PARAMETER :: npart=10
+  DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal
+!
+  INTEGER :: kmin, kmax
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: fftmass_shifted
+!
+  NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nz = 8              ! Number of intervals in z
+  nidbas = (/3,3,3/)  ! Degree of splines
+  ngauss = (/4,4, 4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  npow = 2            ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
+!
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+! 
+  dy = 2.d0*pi/REAL(ny,8)        ! Equidistant in y
+  ygrid = (/ (j*dy, j=0,ny) /)
+!
+  dz = 2.0d0*pi/REAL(nz,8)       ! Equidistant in z
+  zgrid = (/ (k*dz, k=0,nz) /)
+!
+  WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
+  WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NZ', nz)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NIDBAS3', nidbas(3))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(3))
+  CALL attach(fid, '/', 'MBESS', mbess)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid, splxyz, &
+       &          (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform)
+  splxy => splxyz%sp12
+!
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
+  WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Z', splxyz%sp3%knots
+!
+!   2D FE matrix assembly (in plane x-y)
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
+  ku = kl                      ! Number of super-diagnonals
+  WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
+!
+  CALL init(kl, ku, nrank, nterms, mat)
+  CALL dismat(splxy, mat)
+!!$  CALL putmat(fid, '/MAT0', mat, 'Assembled GB matrice')
+  ALLOCATE(arr(nrank))
+!
+!   BC on Matrix
+!
+  CALL ibcmat(mat, ny)
+  tmat = seconds() - t0
+!
+!   3D RHS assembly
+!
+  ALLOCATE(rhs(nrank,0:nz-1), sol(nrank,0:nz-1))
+  CALL disrhs3(mbess, npow, splxyz, rhs)
+!
+!   FFT in z of RHS
+!
+  ALLOCATE(crhs(nrank,0:nz-1))
+  crhs = rhs
+  CALL fourrow(crhs, 1)
+  crhs = crhs/REAL(nz,8)
+!
+!   Apply Mass matrix to crhs
+!
+  kmin =-nz/2
+  kmax = nz/2-1
+  CALL init_dft(splxyz%sp3, kmin, kmax)
+  ALLOCATE(fftmass_shifted(kmin:kmax))
+  ALLOCATE(fftmass(0:nz-1))
+  CALL calc_fftmass(splxyz%sp3, fftmass_shifted)
+  DO k=kmin,kmax
+     fftmass(MODULO(k+nz,nz)) = fftmass_shifted(k)
+  END DO
+  DO k=0,nz-1
+     crhs(:,k) = crhs(:,k)/fftmass(k)
+  END DO
+!
+!   Fourier transform back crhs to real space in z
+!
+  CALL fourrow(crhs, -1)
+  rhs(:,:) = REAL(crhs(:,:),8)
+!
+!   BC on RHS
+!
+  CALL ibcrhs3(rhs, ny)
+!
+  CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL factor(mat)
+  tfact = seconds() - t0
+  gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
+
+  t0 = seconds()
+  CALL bsolve(mat, rhs, sol)
+!
+!   Backtransform of solution
+!
+  DO k=0,nz-1
+     sol(1:ny-1,k) = sol(ny,k)
+  END DO
+  tsolv = seconds() - t0
+  gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!
+!   Spline coefficients, taking into account of periodicity in y and z
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  dimz = splxyz%sp3%dim
+  WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        DO k=0,dimz-1
+           kk = MODULO(k,nz)
+           bcoef(i,j,k) = sol(ij,kk)
+        END DO
+     END DO
+  END DO
+  CALL putarr(fid, '/BCOEF', bcoef, 'Spline coefficients of solution')
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  WRITE(*,'(/a)') 'Check with analytical solutions ...'
+  CALL RANDOM_NUMBER(xp)
+  yp=0.0d0
+  zp=0.0d0
+  jder = (/0,0,0/)
+  CALL gridval(splxyz, xp, yp, zp, fp_calc, jder, bcoef)
+!!$  WRITE(*,'(4a12)') 'X', 'CALC', 'ANAL', 'ERROR'
+!!$  DO i=1,npart
+!!$     fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess &
+!!$          &        * COS(mbess*yp(i)) * COS(zp(i))**npow
+!!$     WRITE(*,'(4(1pe12.3))') xp(i), fp_calc(i), fp_anal(i), fp_calc(i)-fp_anal(i)
+!!$  END DO
+!
+  ALLOCATE(solcal(0:nx,0:ny,0:nz))
+  ALLOCATE(solana(0:nx,0:ny,0:nz))
+  ALLOCATE(errsol(0:nx,0:ny,0:nz))
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nz
+           solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess &
+                &        * COS(mbess*ygrid(j)) * COS(zgrid(k))**npow
+        END DO
+     END DO
+  END DO
+!
+  jder = (/0,0,0/)
+  CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder, bcoef)
+  t0 = seconds()
+  CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
+  tgrid = seconds()-t0
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/zgrid', zgrid, '\phi')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+!
+!   Check derivative d/dx
+!
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nz
+           IF( mbess .EQ. 0 ) THEN
+              solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k))**npow
+           ELSE
+              solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+                   &        xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * &
+                   &        COS(zgrid(k))**npow
+           END IF
+        END DO
+     END DO
+  END DO
+!
+  jder = (/1,0,0/)
+  t0 = seconds()
+  CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
+  tgrid = tgrid + seconds()-t0
+  errsol = solana - solcal
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+  CALL putarr(fid, '/derivx_exact', solana)
+  WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', norm2(errsol)/norm2(solana)
+!
+!   Check derivative d/dy
+!
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nz
+           solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
+                &        SIN(mbess*ygrid(j))* COS(zgrid(k))**npow
+        END DO
+     END DO
+  END DO
+!
+  jder = (/0,1,0/)
+  t0 = seconds()
+  CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
+  tgrid = tgrid + seconds()-t0
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+  CALL putarr(fid, '/derivy_exact', solana)
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', norm2(errsol)/norm2(solana)
+!
+!   Check derivative d/dz
+!
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nz
+           solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess &
+                &        * COS(mbess*ygrid(j)) * COS(zgrid(k))**(npow-1) &
+                &        * SIN(zgrid(k))
+        END DO
+     END DO
+  END DO
+!
+  jder = (/0,0,1/)
+  t0 = seconds()
+  IF(nlppform) THEN
+     CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder)
+  ELSE
+     CALL gridval(splxyz, xgrid, ygrid, zgrid, solcal, jder, bcoef)
+  END IF
+  tgrid = tgrid + seconds()-t0
+  CALL putarr(fid, '/derivz', solcal, 'd/dz of solutions')
+  CALL putarr(fid, '/derivz_exact', solana)
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', norm2(errsol)/norm2(solana)
+!===========================================================================
+!              9.0  Epilogue
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+  WRITE(*,'(a,1pe12.3)') 'gridval time (s)              ', tgrid
+  WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
+  WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
+!
+  DEALLOCATE(xgrid, ygrid, zgrid)
+  DEALLOCATE(rhs, sol)
+  DEALLOCATE(crhs)
+  DEALLOCATE(fftmass)
+  DEALLOCATE(fftmass_shifted)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxyz)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          DO k=1,SIZE(x,3)
+             sum2 = sum2 + x(i,j,k)**2
+          END DO
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
diff --git a/examples/pde3d_mod.f90 b/examples/pde3d_mod.f90
new file mode 100644
index 0000000..79c3edf
--- /dev/null
+++ b/examples/pde3d_mod.f90
@@ -0,0 +1,397 @@
+!>
+!> @file pde3d_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE pde3d_mod
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+!
+CONTAINS
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in) :: spl
+    TYPE(gbmat), INTENT(inout) :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw  ! Derivative order
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE  :: coefs ! Terms in weak form
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1)) ! 
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
+                DO iterm=1,kterms
+                   DO iw1=0,nidbas1  ! Weight function in dir 1
+                      igw1 = i+iw1
+                      DO iw2=0,nidbas2  ! Weight function in dir 2
+                         igw2 = MODULO(j+iw2-1, n2) + 1
+                         irow = igw2 + (igw1-1)*n2
+                         DO it1=0,nidbas1  ! Test function in dir 1
+                            igt1 = i+it1
+                            DO it2=0,nidbas2  ! Test function in dir 2
+                               igt2 = MODULO(j+it2-1, n2) + 1
+                               jcol = igt2 + (igt1-1)*n2
+                               contrib = fun1(iw1,iderw(iterm,1)) * &
+                                    &    fun2(iw2,iderw(iterm,2)) * &
+                                    &    coefs(iterm) *             &
+                                    &    fun2(it2,idert(iterm,2)) * &
+                                    &    fun1(it1,idert(iterm,1)) * &
+                                    &    wg1(ig1) * wg2(ig2)
+                               CALL updtmat(mat, irow, jcol, contrib)
+                            END DO
+                         END DO
+                      END DO
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+!
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs3(mbess, npow, spl, rhs)
+!
+!   Assembly the RHS using 3d spline spl
+!
+    INTEGER, INTENT(in)           :: mbess, npow
+    TYPE(spline2d1d), TARGET      :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
+!
+    TYPE(spline1d), POINTER :: sp1, sp2, sp3
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: n3, nidbas3, ndim3, ng3
+    INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    sp1 => spl%sp12%sp1
+    sp2 => spl%sp12%sp2
+    sp3 => spl%sp3
+!
+    CALL get_dim(sp1, ndim1, n1, nidbas1)
+    CALL get_dim(sp2, ndim2, n2, nidbas2) 
+    CALL get_dim(sp3, ndim3, n3, nidbas3) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(sp1, ng1)
+    CALL get_gauss(sp2, ng2)
+    CALL get_gauss(sp3, ng3)
+    WRITE(*,'(/a, 3i3)') 'Gauss points and weights, ngauss =', ng1, ng2, ng3
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng2), wg2(ng2))
+    ALLOCATE(xg3(ng3), wg3(ng3))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs,1)
+    rhs(1:nrank,1:n3) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), sp2, fun2, j)
+                DO k=1,n3
+                   CALL get_gauss(sp3, ng3, k, xg3, wg3)
+                   DO ig3=1,ng3
+                      CALL basfun(xg3(ig3), sp3, fun3, k)
+                      contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
+                           &    rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
+                      DO k1=0,nidbas1
+                         i1 = i+k1
+                         DO k2=0,nidbas2
+                            j2 = MODULO(j+k2-1,n2) + 1
+                            ij = j2 + (i1-1)*n2
+                            DO k3=0,nidbas3
+                               kk = MODULO(k+k3-1,n3) + 1
+                               rhs(ij,kk) = rhs(ij, kk) + &
+                                    &  contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
+                            END DO
+                         END DO
+                      END DO
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(xg3, wg3, fun3)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
+      INTEGER, INTENT(in) :: m, n
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs3
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+    DOUBLE PRECISION, INTENT(in) :: c(5)
+    INTEGER, INTENT(iN) :: nx
+    DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+    INTEGER :: nintg
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+    DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+    INTEGER :: i, k
+!
+    a=x(0)
+    b=x(nx)
+    nintg = 10*nx
+    ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+    dx = (b-a)/REAL(nintg)
+    xint(0) = a
+    fint(0) = 0.0d0
+    f1 = fdist(xint(0))
+    DO i=1,nintg
+       f0 = f1
+       xint(i) = xint(i-1) + dx
+       f1 = fdist(xint(i))
+       fint(i) = fint(i-1) + 0.5*(f0+f1)
+    END DO
+!
+!  Normalization
+!
+    scal = REAL(nx) / fint(nintg)
+    fint(0:nintg) = fint(0:nintg) * scal
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+    k = 1
+    DO i=1,nintg-1
+       IF( fint(i) .GE. REAL(k) ) THEN
+          x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+               &   (k-fint(i))
+          k = k+1
+       END IF
+    END DO
+!
+    DEALLOCATE(xint, fint)
+  CONTAINS
+    DOUBLE PRECISION FUNCTION fdist(x)
+      DOUBLE PRECISION, INTENT(in) :: x
+      fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+    END FUNCTION fdist
+  END SUBROUTINE meshdist
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(gbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: kl, ku, nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    kl = mat%kl
+    ku = mat%ku
+    nrank = mat%rank
+    ALLOCATE(zsum(nrank), arr(nrank))
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       DO j=1,ny+ku
+          zsum(j) = zsum(j) + arr(j)
+       END DO
+    END DO
+    CALL putrow(mat, ny, zsum)
+!
+!   The horizontal sum on the NY-th column
+!
+    zsum = 0.0d0
+    DO j=1,ny
+       arr = 0.0d0
+       CALL getcol(mat, j, arr)
+       DO i=ny,ny+kl
+          zsum(i) = zsum(i) + arr(i)
+       END DO
+    END DO
+    CALL putcol(mat, ny, zsum)
+!
+!   The away operator
+!
+    DO j = 1,ny-1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO j = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+    !
+    DO i = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(zsum, arr)
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs3(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, nz, k
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+    nz = SIZE(rhs,2)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    DO k=1,nz
+       zsum = SUM(rhs(1:ny,k))
+       rhs(ny,k) = zsum
+       rhs(1:ny-1,k) = 0.0d0
+    END DO
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO k=1,nz
+       rhs(nrank-ny+1:nrank,k) = 0.0d0
+    END DO
+  END SUBROUTINE ibcrhs3
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde3d_mod
diff --git a/examples/poisson_mumps.f90 b/examples/poisson_mumps.f90
new file mode 100644
index 0000000..941a715
--- /dev/null
+++ b/examples/poisson_mumps.f90
@@ -0,0 +1,169 @@
+!>
+!> @file poisson_mumps.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+  USE mumps_bsplines
+  USE cds
+!
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+  TYPE(mumps_mat) :: amat
+  TYPE(cds_mat) :: amat_cds
+  DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:)
+  INTEGER :: nx=5, ny=4
+  INTEGER :: n
+  INTEGER :: i, j, irow
+  INTEGER :: ierr, me
+  INTEGER, ALLOCATABLE :: dists(:)
+  DOUBLE PRECISION :: t0
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a)', advance='no') 'Enter nx, ny: '
+     READ(*,*) nx, ny
+  END IF
+  CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  n = nx*ny  ! Rank of the matrix
+  ALLOCATE(rhs(n))
+  ALLOCATE(sol(n))
+  ALLOCATE(arow(n))
+!
+  WRITE(*,'(/a)') 'Mumps using CSR mat ...'
+  CALL init(n, 1, amat)
+!
+!  Construct the matrix and RHS
+!
+  t0 = mpi_wtime(0)
+  DO j=1,ny
+     DO i=1,nx
+        arow = 0.0d0
+        irow = numb(i,j)
+        arow(irow) = 4.0d0
+        IF(i.GT.1)  arow(numb(i-1,j)) = -1.0d0
+        IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0
+        IF(j.GT.1)  arow(numb(i,j-1)) = -1.0d0
+        IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0
+        CALL putrow(amat, irow, arow)
+        rhs(irow) = SUM(arow)   ! => the exact solution is 1
+     END DO
+  END DO
+!
+  WRITE(*,'(a,i6)') 'Rank of matrix', n
+  WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat)
+  WRITE(*,'(a,1pe12.3)') 'Matrix construction time (s)', mpi_wtime()-t0
+!
+!  Factor the amat matrix (Reordering, symbolic and numerical factorization)
+!
+  t0 = mpi_wtime(0)
+  CALL factor(amat, nlmetis=.TRUE.)
+  sol=rhs
+  CALL bsolve(amat, sol)
+  WRITE(*,'(a,1pe12.3)') 'Direct solve time (s)', mpi_wtime()-t0
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
+  END IF
+  CALL destroy(amat)
+!
+!   CDS matrix
+!
+  WRITE(*,'(/a)') 'Mumps using CDS mat ...'
+  IF(ALLOCATED(dists)) DEALLOCATE(dists)
+  ALLOCATE(dists(-2:2))
+  dists = [-nx, -1, 0, 1, nx]
+  WRITE(*,'(a/(20i4))') 'dists used in INIT =', dists  
+  CALL init(n, dists, 1, amat_cds)
+!
+  t0 = mpi_wtime(0)
+  DO j=1,ny
+     DO i=1,nx
+        arow = 0.0d0
+        irow = numb(i,j)
+        amat_cds%val(irow,0) = 4.0d0
+        IF(i.GT.1)  amat_cds%val(irow,-1) = -1.0d0
+        IF(i.LT.nx) amat_cds%val(irow,+1) = -1.0d0
+        IF(j.GT.1)  amat_cds%val(irow,-2) = -1.0d0
+        IF(j.LT.ny) amat_cds%val(irow,+2) = -1.0d0
+     END DO
+  END DO
+  WRITE(*,'(a,1pe12.3)') 'Matrix construction time (s)', mpi_wtime()-t0
+!
+!   Compute dists of amat
+  PRINT*, 'stat of mata%mat', ASSOCIATED(amat%mat)
+  PRINT*, 'rank of mata', amat%mat%rank
+  CALL mstruct(amat%mat, dists)
+  WRITE(*,'(A/(20i4))') 'dists from MSTRUCT=', dists  
+!
+  t0 = mpi_wtime(0)
+  CALL cds2mumps(amat_cds, amat)
+  CALL factor(amat, debug=.FALSE.)
+  sol = rhs
+  CALL bsolve(amat, sol)
+  WRITE(*,'(a,1pe12.3)') 'Direct solve time (s)', mpi_wtime()-t0
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
+  END IF
+!
+!   Clean up
+!
+  DEALLOCATE(rhs)
+  DEALLOCATE(sol)
+  DEALLOCATE(arow)
+  CALL destroy(amat)
+  CALL mpi_finalize(ierr)
+CONTAINS
+  SUBROUTINE mstruct(mat, dists)
+    TYPE(spmat), INTENT(in) :: mat
+    INTEGER, ALLOCATABLE, INTENT(inout) :: dists(:)
+    TYPE(elt), POINTER :: t
+    INTEGER :: n, i, j0
+    j0 = LBOUND(dists,1)
+    n = mat%rank
+    PRINT*, 'rank of mat', n
+    DO i=1,n                    ! scan the matrix rows   
+       t => mat%row(i)%row0
+       DO WHILE(ASSOCIATED(t))  ! walk thru the linked list row(i)
+          j = t%index
+          IF(ABS(t%val) .LE. EPSILON(0.0d0)) THEN
+             dists(j0) = t%index-i ! distance from main diag
+             j0 = j0+1
+          END IF
+          t => t%next
+       END DO
+    END DO
+  END SUBROUTINE mstruct
+  INTEGER FUNCTION numb(i,j)
+!
+!  One-dimensional numbering
+!  Number first x then y
+!
+    INTEGER, INTENT(in) :: i, j
+    numb = (j-1)*nx + i
+  END FUNCTION numb
+END PROGRAM main
diff --git a/examples/poisson_petsc.f90 b/examples/poisson_petsc.f90
new file mode 100644
index 0000000..341afba
--- /dev/null
+++ b/examples/poisson_petsc.f90
@@ -0,0 +1,218 @@
+!>
+!> @file poisson_petsc.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+  USE petsc_bsplines
+  IMPLICIT NONE
+  TYPE(petsc_mat) :: amat
+  DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:)
+  INTEGER :: nx=5, ny=4, ntrials=10
+  INTEGER :: nitmax=10000, nits
+  DOUBLE PRECISION :: rtol=1.e-9
+  INTEGER :: n, nnz, nnz_loc
+  INTEGER :: i, j, irow, jcol
+  INTEGER :: ierr, me, npes, istart, iend
+  DOUBLE PRECISION :: t0
+  INTEGER :: ncols, cols(5)   !  Max nnz by row   .LE. 5
+!
+  CHARACTER(len=128) :: matfile='mat.dat', rhsfile='rhs.dat'
+  LOGICAL :: file_exist
+!
+  NAMELIST /newrun/ nx, ny, nitmax, rtol, matfile, rhsfile
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+!
+  IF(me.EQ.0) THEN
+     READ(*, newrun)
+     WRITE(*, newrun)
+     WRITE(*,'(a,i6)') 'npes =', npes
+  END IF
+  CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nitmax, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(matfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(rhsfile, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+!
+  n = nx*ny  ! Rank of the matrix
+!
+!   Initialize matrix
+!
+  CALL init(n, 1, amat, comm=MPI_COMM_WORLD)
+  istart = amat%istart
+  iend   = amat%iend
+!!$  WRITE(*,'(a,i3.3,a,3i6)') 'PE', me, ': istart, iend', istart, iend
+!
+!
+  INQUIRE(file=TRIM(matfile), exist=file_exist)
+!
+  IF( file_exist ) THEN
+     CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+     t0 = mpi_wtime()
+     CALL load_mat(amat, matfile)
+     CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+     IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. loading time (s)', mpi_wtime()-t0
+  ELSE
+!
+!  Construct the matrix
+!
+     CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+     t0 = mpi_wtime()
+     ALLOCATE(arow(5))   !  Max nnz per row .LE. 5
+     DO j=1,ny
+        DO i=1,nx
+           irow = numb(i,j)
+           IF( irow.GE.istart .AND. irow.LE.iend) THEN
+              ncols = 1; cols(ncols) = irow; arow(ncols) = 4.0d0
+              IF(i.GT.1)  THEN 
+                 ncols = ncols+1
+                 cols(ncols) = numb(i-1,j); arow(ncols) = -1.0d0
+              END IF
+              IF(i.LT.nx) THEN
+                 ncols = ncols+1
+                 cols(ncols) = numb(i+1,j); arow(ncols) = -1.0d0
+              END IF
+              IF(j.GT.1)  THEN
+                 ncols = ncols+1
+                 cols(ncols) = numb(i,j-1); arow(ncols) = -1.0d0
+              END IF
+              IF(j.LT.ny) THEN 
+                 ncols = ncols+1
+                 cols(ncols) = numb(i,j+1); arow(ncols) = -1.0d0
+              END IF
+              CALL putrow(amat, irow, arow(1:ncols), cols(1:ncols))
+           END IF
+        END DO
+     END DO
+     DEALLOCATE(arow)
+     CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+     IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. construction time (s)', mpi_wtime()-t0
+!
+!   Convert to PETSC mat
+!
+     CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+     t0=mpi_wtime()
+     CALL to_mat(amat)
+     CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+     IF(me.EQ.0) WRITE(*,'(a,1pe12.3)') 'Mat. conversion time (s)', mpi_wtime()-t0
+!
+     CALL save_mat(amat, matfile)
+  END IF
+!
+!  Matrix size and partition could have changed after loading from file!
+!
+  n = amat%rank
+  istart = amat%istart
+  iend   = amat%iend
+!
+  nnz_loc = get_count(amat)
+  CALL mpi_reduce(nnz_loc, nnz, 1, MPI_INTEGER, mpi_sum, 0, MPI_COMM_WORLD, ierr)
+  IF(npes.LE.4) THEN
+     WRITE(*,'(a,i3.3,a,3i6)') 'PE', me, ': istart, iend (after), nloc, nnz_loc', &
+          &   istart, iend, iend-istart+1, nnz_loc
+  END IF
+!
+!  Construct or read  RHS
+!
+  CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+  t0 = mpi_wtime()
+  ALLOCATE(rhs(n))
+  INQUIRE(file=TRIM(rhsfile), exist=file_exist)
+  IF( file_exist ) THEN
+     OPEN(unit=99, file=TRIM(rhsfile), status='old', form='unformatted')
+     READ(99) rhs(1:n)
+     CLOSE(99)
+     CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+     IF(me.EQ.0) THEN
+        WRITE(*,'(a,1pe12.3)') 'RHS read time (s)', mpi_wtime()-t0
+     END IF
+  ELSE
+     rhs = 0.0d0
+     ALLOCATE(arow(n))
+     DO i=istart, iend 
+        arow = 0.0d0
+        CALL getrow(amat, i, arow)
+        rhs(i) = SUM(arow)   ! => the exact solution is 1
+     END DO
+     arow = rhs
+     CALL mpi_allreduce(arow, rhs, n, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr)
+     DEALLOCATE(arow)
+     IF( me.EQ.0 ) THEN   ! All processes have the gobla rhs
+        OPEN(unit=99, file=TRIM(rhsfile), status='new', form='unformatted')
+        WRITE(99) rhs(1:n)
+        CLOSE(99)
+     END IF
+     CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+     IF(me.EQ.0) THEN
+        WRITE(*,'(a,1pe12.3)') 'RHS construction time (s)', mpi_wtime()-t0
+     END IF
+  END IF
+  CLOSE(99)
+  CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+!
+!  Back solve
+! 
+  ALLOCATE(sol(n))
+!
+  CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+  sol = 0.0d0
+  t0=mpi_wtime()
+  CALL bsolve(amat, rhs, sol, rtol, nitmax, nits)
+  CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,1pe12.3,i8,1pe12.3)') 'Error, nits, solve time(s)', &
+          &   MAXVAL(ABS(sol-1.0d0)), nits, mpi_wtime()-t0
+  END IF
+  CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+  t0=mpi_wtime()
+  DO i=1,ntrials
+     sol = 0.0d0
+     CALL bsolve(amat, rhs, sol, rtol, nitmax, nits)
+  END DO
+  CALL mpi_barrier(MPI_COMM_WORLD,ierr)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,1pe12.3,i8,1pe12.3)') 'Error, nits, solve time(s)', &
+          &   MAXVAL(ABS(sol-1.0d0)), nits, (mpi_wtime()-t0)/REAL(ntrials)
+  END IF
+!
+!   Clean up
+!
+  DEALLOCATE(rhs)
+  DEALLOCATE(sol)
+  CALL destroy(amat)
+  CALL PetscFinalize(ierr)
+  CALL mpi_finalize(ierr)
+CONTAINS
+  INTEGER FUNCTION numb(i,j)
+!
+!  One-dimensional numbering
+!  Number first x then y
+!
+    INTEGER, INTENT(in) :: i, j
+    numb = (j-1)*nx + i
+  END FUNCTION numb
+END PROGRAM main
diff --git a/examples/ppde3d.f90 b/examples/ppde3d.f90
new file mode 100644
index 0000000..00e4f06
--- /dev/null
+++ b/examples/ppde3d.f90
@@ -0,0 +1,510 @@
+!>
+!> @file ppde3d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 3d PDE using splines:
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n
+!
+  USE futils
+  USE fft
+  USE pputils2, ONLY : pptransp
+  USE ppde3d_mod
+!
+  IMPLICIT NONE
+!
+  CHARACTER(len=128) :: infile="ppde3d.in"
+  INTEGER :: l
+  INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms
+  INTEGER :: startz, endz, nzloc
+  INTEGER :: start_rank, end_rank, nrank_loc
+  LOGICAL :: nlppform
+  INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku
+  INTEGER :: jder(3), it
+  DOUBLE PRECISION :: pi, coefx(5)
+  DOUBLE PRECISION :: dy, dz
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: xgrid, ygrid, zgrid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: fftmass
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol, rhs_t
+  DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE   :: crhs_t
+!
+  TYPE(spline2d1d), TARGET :: splxyz
+  TYPE(spline2d), POINTER  :: splxy
+  TYPE(spline1d)           :: splz
+  TYPE(gbmat)              :: mat
+!
+  CHARACTER(len=128) :: file='ppde3d.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  INTEGER :: nits=500
+!
+  INTEGER, PARAMETER :: npart=10000000
+  INTEGER :: nploc
+  DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal
+  DOUBLE PRECISION zsuml, zsumg, errnorm2
+!
+  NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx
+!===========================================================================
+!              1.0 Prologue
+!
+!   Init MPI
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Get input file name from command argument
+!
+  IF( COMMAND_ARGUMENT_COUNT() .EQ. 1 ) THEN
+     CALL GET_COMMAND_ARGUMENT(1, infile, l, ierr)
+  END IF
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nz = 8              ! Number of intervals in z
+  nidbas = (/3,3,3/)  ! Degree of splines
+  ngauss = (/4,4, 4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  npow = 2            ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  OPEN(unit=99, file=TRIM(infile), status='old', action='read')
+  READ(99,newrun)
+  IF( me.EQ.0) THEN
+     WRITE(*,newrun)
+  END IF
+  CLOSE(99)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
+!
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+! 
+  dy = 2.d0*pi/REAL(ny,8)        ! Equidistant in y
+  ygrid = (/ (j*dy, j=0,ny) /)
+!
+!   Partitionned toroidal grid z
+!
+  dz = 2.0d0*pi/REAL(nz,8)       ! Equidistant in z
+  zgrid = (/ (k*dz, k=0,nz) /)
+  CALL dist1d(0, nz, startz, nzloc)
+  endz = startz+nzloc
+!!$  PRINT*, 'PE', me, ' startz, endz, nzloc', startz, endz, nzloc
+!
+  IF( me.EQ.0) THEN
+     WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+     WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
+     WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
+  END IF
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d', &
+       &      mpicomm=MPI_COMM_WORLD)
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NZ', nz)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NIDBAS3', nidbas(3))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(3))
+  CALL attach(fid, '/', 'MBESS', mbess)
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/zgrid', zgrid(0:nz-1), '\phi')
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid(startz:endz), &
+       &          splxyz, (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform)
+  splxy => splxyz%sp12
+!
+  IF( me.EQ.0) THEN
+     WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
+     WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
+  END IF
+  CALL disp(splxyz%sp3%knots, 'KNOTS in Z', MPI_COMM_WORLD)
+!
+!   2D FE matrix assembly (in plane x-y)
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
+  ku = kl                      ! Number of super-diagnonals
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
+  END IF
+!
+  CALL init(kl, ku, nrank, nterms, mat)
+  CALL dismat(splxy, mat)
+  ALLOCATE(arr(nrank))
+!
+!   BC on Matrix
+!
+  CALL ibcmat(mat, ny)
+  tmat = seconds() - t0
+!
+!   3D RHS assembly
+!
+  ALLOCATE(rhs(nrank,0:nzloc+nidbas(3)-1)) ! With right guard cells nzloc:nzloc+nidbas3-1
+  ALLOCATE(sol(nrank,0:nzloc-1))
+  CALL disrhs3(mbess, npow, splxyz, rhs)
+!
+  zsuml = SUM(ABS(rhs(:,0:nzloc-1)))
+  CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF(me.EQ.0) PRINT*, 'sum of rhs after DISRHS3', zsumg
+!
+!   FFT in z of RHS
+!
+  CALL dist1d(1, nrank, start_rank, nrank_loc)
+  end_rank = start_rank+nrank_loc-1
+  ALLOCATE(rhs_t(0:nz-1,nrank_loc), crhs_t(0:nz-1,nrank_loc))
+!
+  CALL pptransp(MPI_COMM_WORLD, rhs(:,0:nzloc-1), rhs_t)
+  crhs_t = rhs_t
+  CALL fourcol(crhs_t,1)
+  crhs_t = crhs_t/REAL(nz,8)
+!
+!   Apply Mass matrix to crhs
+!
+  PRINT*, 4
+  CALL set_spline(nidbas(3), ngauss(3), zgrid, splz, .TRUE.)
+  ALLOCATE(fftmass(0:nz-1))
+  PRINT*, 5
+!!$  CALL calc_fftmass(splz, fftmass)
+  CALL calc_fftmass_old(splz, fftmass)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(/a/(10(1pe12.3)))') 'Mass matrix', fftmass
+  END IF
+  DO k=0,nz-1
+     crhs_t(k,:) = crhs_t(k,:)/fftmass(k)
+  END DO
+!
+!   Fourier transform back crhs to real space in z
+!
+  CALL fourcol(crhs_t, -1)
+  rhs_t(:,:) = REAL(crhs_t(:,:),8)
+  CALL pptransp(MPI_COMM_WORLD, rhs_t, sol)  ! Put the final RHS in SOL
+!
+!   BC on RHS
+!
+  CALL ibcrhs3(sol, ny)
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
+  END IF
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL factor(mat)
+  tfact = seconds() - t0
+  gflops1 = dopla('DGBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
+
+  t0 = seconds()
+  CALL bsolve(mat, sol)
+!
+!   Backtransform of solution
+!
+  DO k=0,nzloc-1
+     sol(1:ny-1,k) = sol(ny,k)
+  END DO
+!
+  zsuml = SUM(ABS(sol))
+  CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF(me.EQ.0) PRINT*, 'sum of sol', zsumg
+!
+  tsolv = seconds() - t0
+  gflops2 = dopla('DGBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
+!
+!   Spline coefficients, taking into account of periodicity in y and z
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  dimz = splxyz%sp3%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1))
+!
+!   Get 3D array of spline coefs.
+!
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        DO k=0,nzloc-1
+           bcoef(i,j,k) = sol(ij,k)
+        END DO
+     END DO
+  END DO
+!
+!   Get missing coefs from remote guard cells
+!
+  prev = MODULO(me-1,npes)
+  next = MODULO(me+1,npes)
+  count = dimx*dimy
+  DO i=0,nidbas(3)-1
+     CALL mpi_sendrecv(bcoef(0,0,i), count, MPI_DOUBLE_PRECISION, prev, 0, &
+          &            bcoef(0,0,nzloc+i), count, MPI_DOUBLE_PRECISION, next, 0, &
+          &            MPI_COMM_WORLD, status, ierr)
+  END DO
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz
+  END IF
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  CALL RANDOM_NUMBER(xp)
+  CALL RANDOM_NUMBER(yp); yp = 2.d0*pi*yp
+  CALL RANDOM_NUMBER(zp); zp = 2.d0*pi*zp
+  nploc = 0
+  DO i=1,npart
+     IF(zp(i).GE.zgrid(startz) .AND. zp(i).LT.zgrid(endz)) THEN
+        nploc = nploc+1
+        xp(nploc) = xp(i)
+        yp(nploc) = yp(i)
+        zp(nploc) = zp(i)
+     END IF
+  END DO
+  jder = (/0,0,0/)
+  CALL gridval(splxyz, xp(1:nploc), yp(1:nploc), zp(1:nploc), fp_calc(1:nploc), jder, bcoef)
+  DO i=1,nploc
+     fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess &
+          &        * COS(mbess*yp(i)) * COS(zp(i))**npow
+  END DO
+  errnorm2 =  norm21(fp_calc(1:nploc)-fp_anal(1:nploc))/norm21(fp_calc(1:nploc))
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors, using random points', &
+          &    errnorm2
+  END IF
+!
+  ALLOCATE(solcal(0:nx,0:ny,0:nzloc-1))
+  ALLOCATE(solana(0:nx,0:ny,0:nzloc-1))
+  ALLOCATE(errsol(0:nx,0:ny,0:nzloc-1))
+!
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nzloc-1
+           kk=startz+k
+           solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess &
+                &        * COS(mbess*ygrid(j)) * COS(zgrid(kk))**npow
+        END DO
+     END DO
+  END DO
+!
+  jder = (/0,0,0/)
+  CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
+  t0 = seconds()
+  CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
+  tgrid = seconds()-t0
+  errsol = solana - solcal
+!
+  errnorm2 =  norm2(errsol) / norm2(solana)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+          &    errnorm2
+  END IF
+  CALL putarr(fid, '/sol', solcal,pardim=3)
+  CALL putarr(fid, '/solana', solana,pardim=3)
+!
+!   Check derivative d/dx
+!
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nzloc-1
+           IF( mbess .EQ. 0 ) THEN
+              solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k+startz))**npow
+           ELSE
+              solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+                   &        xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * &
+                   &        COS(zgrid(k+startz))**npow
+           END IF
+        END DO
+     END DO
+  END DO
+!
+  jder = (/1,0,0/)
+  t0 = seconds()
+  CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
+  tgrid = tgrid + seconds()-t0
+  errsol = solana - solcal
+  errnorm2 =  norm2(errsol) / norm2(solana)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', errnorm2
+  END IF
+  CALL putarr(fid, '/derivx', solcal, pardim=3)
+  CALL putarr(fid, '/derivx_exact', solana,pardim=3)
+!
+!   Check derivative d/dy
+!
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nzloc-1
+           solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
+                &        SIN(mbess*ygrid(j))* COS(zgrid(k+startz))**npow
+        END DO
+     END DO
+  END DO
+!
+  jder = (/0,1,0/)
+  t0 = seconds()
+  CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
+  tgrid = tgrid + seconds()-t0
+  errsol = solana - solcal
+  errnorm2 =  norm2(errsol) / norm2(solana)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', errnorm2
+  END IF
+  CALL putarr(fid, '/derivy', solcal, pardim=3)
+  CALL putarr(fid, '/derivy_exact', solana,pardim=3)
+!
+!   Check derivative d/dz
+!
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nzloc-1
+           solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess &
+                &        * COS(mbess*ygrid(j)) * COS(zgrid(k+startz))**(npow-1) &
+                &        * SIN(zgrid(k+startz))
+        END DO
+     END DO
+  END DO
+!
+  jder = (/0,0,1/)
+  t0 = seconds()
+  IF(nlppform) THEN
+     CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
+  ELSE
+     CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
+  END IF
+  tgrid = tgrid + seconds()-t0
+  errsol = solana - solcal
+  errnorm2 =  norm2(errsol) / norm2(solana)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', errnorm2
+  END IF
+  CALL putarr(fid, '/derivz', solcal, pardim=3)
+  CALL putarr(fid, '/derivz_exact', solana,pardim=3)
+!===========================================================================
+!              9.0  Epilogue
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(/a)') '---'
+     WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+     WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+     WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+     WRITE(*,'(a,1pe12.3)') 'gridval time (s)              ', tgrid
+     WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
+     WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
+  END IF
+!
+  DEALLOCATE(xgrid, ygrid, zgrid)
+  DEALLOCATE(fftmass)
+  DEALLOCATE(rhs)
+  DEALLOCATE(sol)
+  DEALLOCATE(rhs_t)
+  DEALLOCATE(crhs_t)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxyz)
+  CALL destroy_sp(splz)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!
+  CALL mpi_finalize(ierr)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:,:)
+    DOUBLE PRECISION :: sum2, sum2g
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          DO k=1,SIZE(x,3)
+             sum2 = sum2 + x(i,j,k)**2
+          END DO
+       END DO
+    END DO
+    CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
+         &          MPI_COMM_WORLD, ierr)
+    norm2 = SQRT(sum2g)
+  END FUNCTION norm2
+!
+  FUNCTION norm21(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm21
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2, sum2g
+    INTEGER :: i, j
+!
+    sum2 = DOT_PRODUCT(x,x)
+    CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
+         &          MPI_COMM_WORLD, ierr)
+    norm21 = SQRT(sum2g)
+  END FUNCTION norm21
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
diff --git a/examples/ppde3d_mod.f90 b/examples/ppde3d_mod.f90
new file mode 100644
index 0000000..c04c788
--- /dev/null
+++ b/examples/ppde3d_mod.f90
@@ -0,0 +1,473 @@
+!>
+!> @file ppde3d_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE ppde3d_mod
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+  INCLUDE "mpif.h"
+!
+  INTEGER :: me, npes
+  INTEGER :: prev, next
+  INTEGER :: count, status(MPI_STATUS_SIZE), ierr
+!
+CONTAINS
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in) :: spl
+    TYPE(gbmat), INTENT(inout) :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw  ! Derivative order
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE  :: coefs ! Terms in weak form
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1)) ! 
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
+                DO iterm=1,kterms
+                   DO iw1=0,nidbas1  ! Weight function in dir 1
+                      igw1 = i+iw1
+                      DO iw2=0,nidbas2  ! Weight function in dir 2
+                         igw2 = MODULO(j+iw2-1, n2) + 1
+                         irow = igw2 + (igw1-1)*n2
+                         DO it1=0,nidbas1  ! Test function in dir 1
+                            igt1 = i+it1
+                            DO it2=0,nidbas2  ! Test function in dir 2
+                               igt2 = MODULO(j+it2-1, n2) + 1
+                               jcol = igt2 + (igt1-1)*n2
+                               contrib = fun1(iw1,iderw(iterm,1)) * &
+                                    &    fun2(iw2,iderw(iterm,2)) * &
+                                    &    coefs(iterm) *             &
+                                    &    fun2(it2,idert(iterm,2)) * &
+                                    &    fun1(it1,idert(iterm,1)) * &
+                                    &    wg1(ig1) * wg2(ig2)
+                               CALL updtmat(mat, irow, jcol, contrib)
+                            END DO
+                         END DO
+                      END DO
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+!
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs3(mbess, npow, spl, rhs)
+!
+!   Assembly the RHS using 3d spline spl
+!
+    INTEGER, INTENT(in)           :: mbess, npow
+    TYPE(spline2d1d), TARGET      :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
+!
+    TYPE(spline1d), POINTER :: sp1, sp2, sp3
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: n3, nidbas3, ndim3, ng3
+    INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: buf(:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    sp1 => spl%sp12%sp1
+    sp2 => spl%sp12%sp2
+    sp3 => spl%sp3
+!
+    CALL get_dim(sp1, ndim1, n1, nidbas1)
+    CALL get_dim(sp2, ndim2, n2, nidbas2) 
+    CALL get_dim(sp3, ndim3, n3, nidbas3) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(sp1, ng1)
+    CALL get_gauss(sp2, ng2)
+    CALL get_gauss(sp3, ng3)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng2), wg2(ng2))
+    ALLOCATE(xg3(ng3), wg3(ng3))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs,1)
+    rhs = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), sp2, fun2, j)
+                DO k=1,n3
+                   CALL get_gauss(sp3, ng3, k, xg3, wg3)
+                   DO ig3=1,ng3
+                      CALL basfun(xg3(ig3), sp3, fun3, k)
+                      contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
+                           &    rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
+                      DO k1=0,nidbas1
+                         i1 = i+k1
+                         DO k2=0,nidbas2
+                            j2 = MODULO(j+k2-1,n2) + 1
+                            ij = j2 + (i1-1)*n2
+                            DO k3=0,nidbas3
+                               kk = k+k3
+                               rhs(ij,kk) = rhs(ij, kk) + &
+                                    &  contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
+                            END DO
+                         END DO
+                      END DO
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!
+!     Update from remote guard cells
+!
+    CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+    CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+    next = MODULO(me+1,npes)
+    prev = MODULO(me-1,npes)
+    count = nrank
+    ALLOCATE(buf(nrank))
+    DO i=nidbas3,1,-1
+       CALL mpi_sendrecv(rhs(1,n3+i), count, MPI_DOUBLE_PRECISION, next, 0, &
+         &             buf, count, MPI_DOUBLE_PRECISION, prev, 0, &
+         &             MPI_COMM_WORLD, status, ierr)
+       rhs(:,i) =  rhs(:,i) + buf(:)
+    END DO
+    DEALLOCATE(buf)
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(xg3, wg3, fun3)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
+      INTEGER, INTENT(in) :: m, n
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs3
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+    DOUBLE PRECISION, INTENT(in) :: c(5)
+    INTEGER, INTENT(iN) :: nx
+    DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+    INTEGER :: nintg
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+    DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+    INTEGER :: i, k
+!
+    a=x(0)
+    b=x(nx)
+    nintg = 10*nx
+    ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+    dx = (b-a)/REAL(nintg)
+    xint(0) = a
+    fint(0) = 0.0d0
+    f1 = fdist(xint(0))
+    DO i=1,nintg
+       f0 = f1
+       xint(i) = xint(i-1) + dx
+       f1 = fdist(xint(i))
+       fint(i) = fint(i-1) + 0.5*(f0+f1)
+    END DO
+!
+!  Normalization
+!
+    scal = REAL(nx) / fint(nintg)
+    fint(0:nintg) = fint(0:nintg) * scal
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+    k = 1
+    DO i=1,nintg-1
+       IF( fint(i) .GE. REAL(k) ) THEN
+          x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+               &   (k-fint(i))
+          k = k+1
+       END IF
+    END DO
+!
+    DEALLOCATE(xint, fint)
+  CONTAINS
+    DOUBLE PRECISION FUNCTION fdist(x)
+      DOUBLE PRECISION, INTENT(in) :: x
+      fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+    END FUNCTION fdist
+  END SUBROUTINE meshdist
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(gbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: kl, ku, nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    kl = mat%kl
+    ku = mat%ku
+    nrank = mat%rank
+    ALLOCATE(zsum(nrank), arr(nrank))
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       DO j=1,ny+ku
+          zsum(j) = zsum(j) + arr(j)
+       END DO
+    END DO
+    CALL putrow(mat, ny, zsum)
+!
+!   The horizontal sum on the NY-th column
+!
+    zsum = 0.0d0
+    DO j=1,ny
+       arr = 0.0d0
+       CALL getcol(mat, j, arr)
+       DO i=ny,ny+kl
+          zsum(i) = zsum(i) + arr(i)
+       END DO
+    END DO
+    CALL putcol(mat, ny, zsum)
+!
+!   The away operator
+!
+    DO j = 1,ny-1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO j = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+    !
+    DO i = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(zsum, arr)
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs3(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, nz, k
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+    nz = SIZE(rhs,2)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    DO k=1,nz
+       zsum = SUM(rhs(1:ny,k))
+       rhs(ny,k) = zsum
+       rhs(1:ny-1,k) = 0.0d0
+    END DO
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO k=1,nz
+       rhs(nrank-ny+1:nrank,k) = 0.0d0
+    END DO
+  END SUBROUTINE ibcrhs3
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dist1d(s0, ntot, s, nloc)
+    INCLUDE 'mpif.h'
+    INTEGER, INTENT(in) :: s0, ntot
+    INTEGER, INTENT(out) :: s, nloc
+    INTEGER :: me, npes, ierr, naver, rem
+!
+    CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr)
+    CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr)
+    naver = ntot/npes
+    rem = MODULO(ntot,npes)
+    s = s0 + MIN(rem,me) + me*naver
+    nloc = naver
+    IF( me.LT.rem ) nloc = nloc+1
+  END SUBROUTINE dist1d
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disp(a, str, comm)
+!
+!   Gather partitionned 1d array to 0 and print it
+!
+    INCLUDE 'mpif.h'
+    DOUBLE PRECISION, INTENT(in) :: a(:)
+    INTEGER, INTENT(in) :: comm
+    CHARACTER(len=*), INTENT(in) :: str
+    INTEGER :: n, ntot, npes, me, ierr, i
+    DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: c
+    INTEGER, ALLOCATABLE, DIMENSION(:) :: counts, displs
+!
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_comm_size(comm, npes, ierr)
+    n = SIZE(a)
+    IF(me.EQ.0) THEN
+       ALLOCATE(counts(npes), displs(npes+1))
+    END IF
+    CALL mpi_gather(n, 1, MPI_INTEGER, counts, 1, MPI_INTEGER, 0, comm, ierr)
+    IF(me.EQ.0) THEN
+       displs(1) = 0
+       DO i=2,npes+1
+          displs(i) = displs(i-1)+counts(i-1)
+       END DO
+       ntot = displs(npes+1)
+       ALLOCATE(c(ntot))
+       c = 0.0d0
+    END IF
+    CALL mpi_gatherv(a, n, MPI_DOUBLE_PRECISION, c, counts, displs, &
+         &           MPI_DOUBLE_PRECISION, 0, comm, ierr)    
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(/a)') TRIM(str)
+       DO i=1,npes
+          WRITE(*,'(a,i3.3/(10(1pe12.3)))') 'PE', i-1,  &
+               &        c(displs(i)+1:displs(i+1))
+       END DO
+       DEALLOCATE(c)
+       DEALLOCATE(counts)
+       DEALLOCATE(displs)
+    END IF
+  END SUBROUTINE disp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE ppde3d_mod
diff --git a/examples/ppde3d_pb.f90 b/examples/ppde3d_pb.f90
new file mode 100644
index 0000000..850056b
--- /dev/null
+++ b/examples/ppde3d_pb.f90
@@ -0,0 +1,518 @@
+!>
+!> @file ppde3d_pb.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 3d PDE using splines:
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n
+!
+  USE futils
+  USE fft
+  USE pputils2, ONLY : pptransp
+  USE ppde3d_pb_mod
+!
+  IMPLICIT NONE
+!
+  CHARACTER(len=128) :: infile="ppde3d_pb.in"
+  INTEGER :: l
+  INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms
+  INTEGER :: startz, endz, nzloc
+  INTEGER :: start_rank, end_rank, nrank_loc
+  LOGICAL :: nlppform
+  INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku
+  INTEGER :: jder(3), it
+  DOUBLE PRECISION :: pi, coefx(5)
+  DOUBLE PRECISION :: dy, dz
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: xgrid, ygrid, zgrid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: fftmass
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol, rhs_t
+  DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE   :: crhs_t
+!
+  TYPE(spline2d1d), TARGET :: splxyz
+  TYPE(spline2d), POINTER  :: splxy
+  TYPE(spline1d)           :: splz
+  TYPE(pbmat)              :: mat
+!
+  CHARACTER(len=128) :: file='ppde3d_pb.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  INTEGER :: nits=500
+!
+  INTEGER, PARAMETER :: npart=10000000
+  INTEGER :: nploc
+  DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal
+  DOUBLE PRECISION zsuml, zsumg, errnorm2
+!
+  INTEGER :: kmin, kmax
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE   :: fftmass_shifted
+!
+  NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx
+!===========================================================================
+!              1.0 Prologue
+!
+!   Init MPI
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Get input file name from command argument
+!
+  IF( COMMAND_ARGUMENT_COUNT() .EQ. 1 ) THEN
+     CALL GET_COMMAND_ARGUMENT(1, infile, l, ierr)
+  END IF
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nz = 8              ! Number of intervals in z
+  nidbas = (/3,3,3/)  ! Degree of splines
+  ngauss = (/4,4, 4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  npow = 2            ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  OPEN(unit=99, file=TRIM(infile), status='old', action='read')
+  READ(99,newrun)
+  IF( me.EQ.0) THEN
+     WRITE(*,newrun)
+  END IF
+  CLOSE(99)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz))
+!
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+! 
+  dy = 2.d0*pi/REAL(ny,8)        ! Equidistant in y
+  ygrid = (/ (j*dy, j=0,ny) /)
+!
+!   Partitionned toroidal grid z
+!
+  dz = 2.0d0*pi/REAL(nz,8)       ! Equidistant in z
+  zgrid = (/ (k*dz, k=0,nz) /)
+  CALL dist1d(0, nz, startz, nzloc)
+  endz = startz+nzloc
+!!$  PRINT*, 'PE', me, ' startz, endz, nzloc', startz, endz, nzloc
+!
+  IF( me.EQ.0) THEN
+     WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx)
+     WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny)
+     WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz)
+  END IF
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d', &
+       &      mpicomm=MPI_COMM_WORLD)
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NZ', nz)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NIDBAS3', nidbas(3))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(3))
+  CALL attach(fid, '/', 'MBESS', mbess)
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/zgrid', zgrid(0:nz-1), '\phi')
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid(startz:endz), &
+       &          splxyz, (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform)
+  splxy => splxyz%sp12
+!
+  IF( me.EQ.0) THEN
+     WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots
+     WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots
+  END IF
+  CALL disp(splxyz%sp3%knots, 'KNOTS in Z', MPI_COMM_WORLD)
+!
+!   2D FE matrix assembly (in plane x-y)
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  kl = (nidbas(1)+1)*ny -1     ! Number of sub-diagnonals
+  ku = kl                      ! Number of super-diagnonals
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku
+  END IF
+!
+  CALL init(ku, nrank, nterms, mat)
+  CALL dismat(splxy, mat)
+  ALLOCATE(arr(nrank))
+!
+!   BC on Matrix
+!
+  CALL ibcmat(mat, ny)
+  tmat = seconds() - t0
+!
+!   3D RHS assembly
+!
+  ALLOCATE(rhs(nrank,0:nzloc+nidbas(3)-1)) ! With right guard cells nzloc:nzloc+nidbas3-1
+  ALLOCATE(sol(nrank,0:nzloc-1))
+  CALL disrhs3(mbess, npow, splxyz, rhs)
+!
+  zsuml = SUM(ABS(rhs(:,0:nzloc-1)))
+  CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF(me.EQ.0) PRINT*, 'sum of rhs after DISRHS3', zsumg
+!
+!   FFT in z of RHS
+!
+  CALL dist1d(1, nrank, start_rank, nrank_loc)
+  end_rank = start_rank+nrank_loc-1
+  ALLOCATE(rhs_t(0:nz-1,nrank_loc), crhs_t(0:nz-1,nrank_loc))
+!
+  CALL pptransp(MPI_COMM_WORLD, rhs(:,0:nzloc-1), rhs_t)
+  crhs_t = rhs_t
+  CALL fourcol(crhs_t,1)
+  crhs_t = crhs_t/REAL(nz,8)
+!
+!   Apply Mass matrix to crhs
+!
+  CALL set_spline(nidbas(3), ngauss(3), zgrid, splz, .TRUE.)
+  kmin =-nz/2
+  kmax = nz/2-1
+  CALL init_dft(splz, kmin, kmax)
+  ALLOCATE(fftmass_shifted(kmin:kmax))
+  ALLOCATE(fftmass(0:nz-1))
+  CALL calc_fftmass_old(splz, fftmass_shifted)
+  DO k=kmin,kmax
+     fftmass(MODULO(k+nz,nz)) = fftmass_shifted(k)
+  END DO
+  IF(me.EQ.0) THEN
+     WRITE(*,'(/a/(10(1pe12.3)))') 'Mass matrix', fftmass
+  END IF
+  DO k=0,nz-1
+     crhs_t(k,:) = crhs_t(k,:)/fftmass(k)
+  END DO
+!
+!   Fourier transform back crhs to real space in z
+!
+  CALL fourcol(crhs_t, -1)
+  rhs_t(:,:) = REAL(crhs_t(:,:),8)
+  CALL pptransp(MPI_COMM_WORLD, rhs_t, sol)  ! Put the final RHS in SOL
+!
+!   BC on RHS
+!
+  CALL ibcrhs3(sol, ny)
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
+  END IF
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL factor(mat)
+  tfact = seconds() - t0
+  gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9
+
+  t0 = seconds()
+  CALL bsolve(mat, sol)
+!
+!   Backtransform of solution
+!
+  DO k=0,nzloc-1
+     sol(1:ny-1,k) = sol(ny,k)
+  END DO
+!
+  zsuml = SUM(ABS(sol))
+  CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF(me.EQ.0) PRINT*, 'sum of sol', zsumg
+!
+  tsolv = seconds() - t0
+  gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9
+!
+!   Spline coefficients, taking into account of periodicity in y and z
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  dimz = splxyz%sp3%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1))
+!
+!   Get 3D array of spline coefs.
+!
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        DO k=0,nzloc-1
+           bcoef(i,j,k) = sol(ij,k)
+        END DO
+     END DO
+  END DO
+!
+!   Get missing coefs from remote guard cells
+!
+  prev = MODULO(me-1,npes)
+  next = MODULO(me+1,npes)
+  count = dimx*dimy
+  DO i=0,nidbas(3)-1
+     CALL mpi_sendrecv(bcoef(0,0,i), count, MPI_DOUBLE_PRECISION, prev, 0, &
+          &            bcoef(0,0,nzloc+i), count, MPI_DOUBLE_PRECISION, next, 0, &
+          &            MPI_COMM_WORLD, status, ierr)
+  END DO
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz
+  END IF
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  CALL RANDOM_NUMBER(xp)
+  CALL RANDOM_NUMBER(yp); yp = 2.d0*pi*yp
+  CALL RANDOM_NUMBER(zp); zp = 2.d0*pi*zp
+  nploc = 0
+  DO i=1,npart
+     IF(zp(i).GE.zgrid(startz) .AND. zp(i).LT.zgrid(endz)) THEN
+        nploc = nploc+1
+        xp(nploc) = xp(i)
+        yp(nploc) = yp(i)
+        zp(nploc) = zp(i)
+     END IF
+  END DO
+  jder = (/0,0,0/)
+  CALL gridval(splxyz, xp(1:nploc), yp(1:nploc), zp(1:nploc), fp_calc(1:nploc), jder, bcoef)
+  DO i=1,nploc
+     fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess &
+          &        * COS(mbess*yp(i)) * COS(zp(i))**npow
+  END DO
+  errnorm2 =  norm21(fp_calc(1:nploc)-fp_anal(1:nploc))/norm21(fp_calc(1:nploc))
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors, using random points', &
+          &    errnorm2
+  END IF
+!
+  ALLOCATE(solcal(0:nx,0:ny,0:nzloc-1))
+  ALLOCATE(solana(0:nx,0:ny,0:nzloc-1))
+  ALLOCATE(errsol(0:nx,0:ny,0:nzloc-1))
+!
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nzloc-1
+           kk=startz+k
+           solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess &
+                &        * COS(mbess*ygrid(j)) * COS(zgrid(kk))**npow
+        END DO
+     END DO
+  END DO
+!
+  jder = (/0,0,0/)
+  CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
+  t0 = seconds()
+  CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
+  tgrid = seconds()-t0
+  errsol = solana - solcal
+!
+  errnorm2 =  norm2(errsol) / norm2(solana)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+          &    errnorm2
+  END IF
+  CALL putarr(fid, '/sol', solcal,pardim=3)
+  CALL putarr(fid, '/solana', solana,pardim=3)
+!
+!   Check derivative d/dx
+!
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nzloc-1
+           IF( mbess .EQ. 0 ) THEN
+              solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k+startz))**npow
+           ELSE
+              solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+                   &        xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * &
+                   &        COS(zgrid(k+startz))**npow
+           END IF
+        END DO
+     END DO
+  END DO
+!
+  jder = (/1,0,0/)
+  t0 = seconds()
+  CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
+  tgrid = tgrid + seconds()-t0
+  errsol = solana - solcal
+  errnorm2 =  norm2(errsol) / norm2(solana)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', errnorm2
+  END IF
+  CALL putarr(fid, '/derivx', solcal, pardim=3)
+  CALL putarr(fid, '/derivx_exact', solana,pardim=3)
+!
+!   Check derivative d/dy
+!
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nzloc-1
+           solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * &
+                &        SIN(mbess*ygrid(j))* COS(zgrid(k+startz))**npow
+        END DO
+     END DO
+  END DO
+!
+  jder = (/0,1,0/)
+  t0 = seconds()
+  CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
+  tgrid = tgrid + seconds()-t0
+  errsol = solana - solcal
+  errnorm2 =  norm2(errsol) / norm2(solana)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', errnorm2
+  END IF
+  CALL putarr(fid, '/derivy', solcal, pardim=3)
+  CALL putarr(fid, '/derivy_exact', solana,pardim=3)
+!
+!   Check derivative d/dz
+!
+  DO i=0,nx
+     DO j=0,ny
+        DO k=0,nzloc-1
+           solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess &
+                &        * COS(mbess*ygrid(j)) * COS(zgrid(k+startz))**(npow-1) &
+                &        * SIN(zgrid(k+startz))
+        END DO
+     END DO
+  END DO
+!
+  jder = (/0,0,1/)
+  t0 = seconds()
+  IF(nlppform) THEN
+     CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder)
+  ELSE
+     CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef)
+  END IF
+  tgrid = tgrid + seconds()-t0
+  errsol = solana - solcal
+  errnorm2 =  norm2(errsol) / norm2(solana)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', errnorm2
+  END IF
+  CALL putarr(fid, '/derivz', solcal, pardim=3)
+  CALL putarr(fid, '/derivz_exact', solana,pardim=3)
+!===========================================================================
+!              9.0  Epilogue
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,'(/a)') '---'
+     WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+     WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+     WRITE(*,'(a,1pe12.3)') 'Backsolve time (s)            ', tsolv
+     WRITE(*,'(a,1pe12.3)') 'gridval time (s)              ', tgrid
+     WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2
+     WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
+  END IF
+!
+  DEALLOCATE(xgrid, ygrid, zgrid)
+  DEALLOCATE(fftmass)
+  DEALLOCATE(fftmass_shifted)
+  DEALLOCATE(rhs)
+  DEALLOCATE(sol)
+  DEALLOCATE(rhs_t)
+  DEALLOCATE(crhs_t)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxyz)
+  CALL destroy_sp(splz)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!
+  CALL mpi_finalize(ierr)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:,:)
+    DOUBLE PRECISION :: sum2, sum2g
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          DO k=1,SIZE(x,3)
+             sum2 = sum2 + x(i,j,k)**2
+          END DO
+       END DO
+    END DO
+    CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
+         &          MPI_COMM_WORLD, ierr)
+    norm2 = SQRT(sum2g)
+  END FUNCTION norm2
+!
+  FUNCTION norm21(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm21
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sum2, sum2g
+    INTEGER :: i, j
+!
+    sum2 = DOT_PRODUCT(x,x)
+    CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
+         &          MPI_COMM_WORLD, ierr)
+    norm21 = SQRT(sum2g)
+  END FUNCTION norm21
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
diff --git a/examples/ppde3d_pb_mod.f90 b/examples/ppde3d_pb_mod.f90
new file mode 100644
index 0000000..72c4917
--- /dev/null
+++ b/examples/ppde3d_pb_mod.f90
@@ -0,0 +1,452 @@
+!>
+!> @file ppde3d_pb_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE ppde3d_pb_mod
+  USE bsplines
+  USE matrix
+  IMPLICIT NONE
+  INCLUDE "mpif.h"
+!
+  INTEGER :: me, npes
+  INTEGER :: prev, next
+  INTEGER :: count, status(MPI_STATUS_SIZE), ierr
+!
+CONTAINS
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in) :: spl
+    TYPE(pbmat), INTENT(inout) :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw  ! Derivative order
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE  :: coefs ! Terms in weak form
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1)) ! 
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
+                DO iterm=1,kterms
+                   DO iw1=0,nidbas1  ! Weight function in dir 1
+                      igw1 = i+iw1
+                      DO iw2=0,nidbas2  ! Weight function in dir 2
+                         igw2 = MODULO(j+iw2-1, n2) + 1
+                         irow = igw2 + (igw1-1)*n2
+                         DO it1=0,nidbas1  ! Test function in dir 1
+                            igt1 = i+it1
+                            DO it2=0,nidbas2  ! Test function in dir 2
+                               igt2 = MODULO(j+it2-1, n2) + 1
+                               jcol = igt2 + (igt1-1)*n2
+                               contrib = fun1(iw1,iderw(iterm,1)) * &
+                                    &    fun2(iw2,iderw(iterm,2)) * &
+                                    &    coefs(iterm) *             &
+                                    &    fun2(it2,idert(iterm,2)) * &
+                                    &    fun1(it1,idert(iterm,1)) * &
+                                    &    wg1(ig1) * wg2(ig2)
+                               CALL updtmat(mat, irow, jcol, contrib)
+                            END DO
+                         END DO
+                      END DO
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+!
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs3(mbess, npow, spl, rhs)
+!
+!   Assembly the RHS using 3d spline spl
+!
+    INTEGER, INTENT(in)           :: mbess, npow
+    TYPE(spline2d1d), TARGET      :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:,:)
+!
+    TYPE(spline1d), POINTER :: sp1, sp2, sp3
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: n3, nidbas3, ndim3, ng3
+    INTEGER :: i, j, k, ig1, ig2, ig3, k1, k2, k3, i1, j2, ij, kk, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg3(:), wg3(:), fun3(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: buf(:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    sp1 => spl%sp12%sp1
+    sp2 => spl%sp12%sp2
+    sp3 => spl%sp3
+!
+    CALL get_dim(sp1, ndim1, n1, nidbas1)
+    CALL get_dim(sp2, ndim2, n2, nidbas2) 
+    CALL get_dim(sp3, ndim3, n3, nidbas3) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun3(0:nidbas3,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(sp1, ng1)
+    CALL get_gauss(sp2, ng2)
+    CALL get_gauss(sp3, ng3)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng2), wg2(ng2))
+    ALLOCATE(xg3(ng3), wg3(ng3))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs,1)
+    rhs = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), sp2, fun2, j)
+                DO k=1,n3
+                   CALL get_gauss(sp3, ng3, k, xg3, wg3)
+                   DO ig3=1,ng3
+                      CALL basfun(xg3(ig3), sp3, fun3, k)
+                      contrib = wg1(ig1)*wg2(ig2)*wg3(ig3) * &
+                           &    rhseq(xg1(ig1), xg2(ig2), xg3(ig3), mbess, npow)
+                      DO k1=0,nidbas1
+                         i1 = i+k1
+                         DO k2=0,nidbas2
+                            j2 = MODULO(j+k2-1,n2) + 1
+                            ij = j2 + (i1-1)*n2
+                            DO k3=0,nidbas3
+                               kk = k+k3
+                               rhs(ij,kk) = rhs(ij, kk) + &
+                                    &  contrib*fun1(k1,1)*fun2(k2,1)*fun3(k3,1)
+                            END DO
+                         END DO
+                      END DO
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!
+!     Update from remote guard cells
+!
+    CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+    CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+    next = MODULO(me+1,npes)
+    prev = MODULO(me-1,npes)
+    count = nrank
+    ALLOCATE(buf(nrank))
+    DO i=nidbas3,1,-1
+       CALL mpi_sendrecv(rhs(1,n3+i), count, MPI_DOUBLE_PRECISION, next, 0, &
+         &             buf, count, MPI_DOUBLE_PRECISION, prev, 0, &
+         &             MPI_COMM_WORLD, status, ierr)
+       rhs(:,i) =  rhs(:,i) + buf(:)
+    END DO
+    DEALLOCATE(buf)
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(xg3, wg3, fun3)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, x3, m, n)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2, x3
+      INTEGER, INTENT(in) :: m, n
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)*COS(x3)**n
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs3
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+    DOUBLE PRECISION, INTENT(in) :: c(5)
+    INTEGER, INTENT(iN) :: nx
+    DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+    INTEGER :: nintg
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+    DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+    INTEGER :: i, k
+!
+    a=x(0)
+    b=x(nx)
+    nintg = 10*nx
+    ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+    dx = (b-a)/REAL(nintg)
+    xint(0) = a
+    fint(0) = 0.0d0
+    f1 = fdist(xint(0))
+    DO i=1,nintg
+       f0 = f1
+       xint(i) = xint(i-1) + dx
+       f1 = fdist(xint(i))
+       fint(i) = fint(i-1) + 0.5*(f0+f1)
+    END DO
+!
+!  Normalization
+!
+    scal = REAL(nx) / fint(nintg)
+    fint(0:nintg) = fint(0:nintg) * scal
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+    k = 1
+    DO i=1,nintg-1
+       IF( fint(i) .GE. REAL(k) ) THEN
+          x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+               &   (k-fint(i))
+          k = k+1
+       END IF
+    END DO
+!
+    DEALLOCATE(xint, fint)
+  CONTAINS
+    DOUBLE PRECISION FUNCTION fdist(x)
+      DOUBLE PRECISION, INTENT(in) :: x
+      fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+    END FUNCTION fdist
+  END SUBROUTINE meshdist
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(pbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: kl, ku, nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    ku = mat%ku
+    kl = ku
+    nrank = mat%rank
+    ALLOCATE(zsum(nrank), arr(nrank))
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       DO j=1,ny+ku
+          zsum(j) = zsum(j) + arr(j)
+       END DO
+    END DO
+!
+    zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
+    CALL putrow(mat, ny, zsum)
+!
+!   The away operator
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO i = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(zsum, arr)
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs3(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:,:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, nz, k
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+    nz = SIZE(rhs,2)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    DO k=1,nz
+       zsum = SUM(rhs(1:ny,k))
+       rhs(ny,k) = zsum
+       rhs(1:ny-1,k) = 0.0d0
+    END DO
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO k=1,nz
+       rhs(nrank-ny+1:nrank,k) = 0.0d0
+    END DO
+  END SUBROUTINE ibcrhs3
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dist1d(s0, ntot, s, nloc)
+    INCLUDE 'mpif.h'
+    INTEGER, INTENT(in) :: s0, ntot
+    INTEGER, INTENT(out) :: s, nloc
+    INTEGER :: me, npes, ierr, naver, rem
+!
+    CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr)
+    CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr)
+    naver = ntot/npes
+    rem = MODULO(ntot,npes)
+    s = s0 + MIN(rem,me) + me*naver
+    nloc = naver
+    IF( me.LT.rem ) nloc = nloc+1
+  END SUBROUTINE dist1d
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disp(a, str, comm)
+!
+!   Gather partitionned 1d array to 0 and print it
+!
+    INCLUDE 'mpif.h'
+    DOUBLE PRECISION, INTENT(in) :: a(:)
+    INTEGER, INTENT(in) :: comm
+    CHARACTER(len=*), INTENT(in) :: str
+    INTEGER :: n, ntot, npes, me, ierr, i
+    DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: c
+    INTEGER, ALLOCATABLE, DIMENSION(:) :: counts, displs
+!
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_comm_size(comm, npes, ierr)
+    n = SIZE(a)
+    IF(me.EQ.0) THEN
+       ALLOCATE(counts(npes), displs(npes+1))
+    END IF
+    CALL mpi_gather(n, 1, MPI_INTEGER, counts, 1, MPI_INTEGER, 0, comm, ierr)
+    IF(me.EQ.0) THEN
+       displs(1) = 0
+       DO i=2,npes+1
+          displs(i) = displs(i-1)+counts(i-1)
+       END DO
+       ntot = displs(npes+1)
+       ALLOCATE(c(ntot))
+       c = 0.0d0
+    END IF
+    CALL mpi_gatherv(a, n, MPI_DOUBLE_PRECISION, c, counts, displs, &
+         &           MPI_DOUBLE_PRECISION, 0, comm, ierr)    
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(/a)') TRIM(str)
+       DO i=1,npes
+          WRITE(*,'(a,i3.3/(10(1pe12.3)))') 'PE', i-1,  &
+               &        c(displs(i)+1:displs(i+1))
+       END DO
+       DEALLOCATE(c)
+       DEALLOCATE(counts)
+       DEALLOCATE(displs)
+    END IF
+  END SUBROUTINE disp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE ppde3d_pb_mod
diff --git a/examples/tbasfun.f90 b/examples/tbasfun.f90
new file mode 100644
index 0000000..26bf500
--- /dev/null
+++ b/examples/tbasfun.f90
@@ -0,0 +1,137 @@
+!>
+!> @file tbasfun.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test scalar and vector versions of def_basfun
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas, nrank, npt, jdermx
+  DOUBLE PRECISION :: dx
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
+  DOUBLE PRECISION, ALLOCATABLE :: xpt(:), fun(:, :), vfun(:,:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: fun1(:, :), vfun1(:,:,:)
+  DOUBLE PRECISION :: errfun
+  INTEGER :: left, i, nerrs, k
+  INTEGER, ALLOCATABLE :: vleft(:)
+  LOGICAL :: nlper=.FALSE.
+  TYPE(spline1d) :: splx
+!
+  NAMELIST /newrun/ nx, nidbas, npt, jdermx, nlper
+!
+!===============================================================================
+!
+!   1D grid
+!
+  nx = 10
+  nidbas = 3
+  npt = 1000000
+  jdermx = 0
+  READ(*,newrun)
+  WRITE(*,newrun)
+
+  ALLOCATE(xgrid(0:nx))
+  dx = 1.0d0/REAL(nx)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+!
+!   Set up spline
+!
+  CALL set_spline(nidbas, 4, xgrid, splx, period=nlper)
+  nrank = splx%dim
+  WRITE(*,'(a, i5)') 'nrank =', nrank
+  WRITE(*,'(a/(10f8.3))') 'knots', splx%knots
+!
+  IF(nx.LE.2) THEN
+     WRITE(*,'(/a)') 'VAL0'
+     DO i=1,nx
+        WRITE(*,'(a,i3)') 'Interval', i
+        DO k=1,nidbas+1 ! Spline number
+           WRITE(*,'(10f12.4)') splx%val0(:,k,i)
+        END DO
+     END DO
+     IF(nlper) THEN
+        WRITE(*,'(/a)') 'VALC'
+        DO k=1,nidbas+1 ! Spline number
+           WRITE(*,'(10f12.4)') splx%valc(:,k)
+        END DO
+     END IF
+  END IF
+!
+  ALLOCATE(xpt(npt))
+  ALLOCATE(vleft(npt))
+  ALLOCATE(fun(0:nidbas,0:jdermx))  ! Values and derivatives of all Splines
+  ALLOCATE(vfun(0:nidbas,0:jdermx,npt))
+  ALLOCATE(fun1(0:nidbas,0:jdermx))  ! Values and derivatives of all Splines
+  ALLOCATE(vfun1(0:nidbas,0:jdermx,npt))
+  CALL RANDOM_NUMBER(xpt)
+!===============================================================================
+!
+!   Check def_basfun
+!
+  CALL def_basfun(xpt, splx, vfun, vleft)
+!
+  WRITE(*,'(/a)') 'vector def_basfun versus scalar def_basfun'
+  WRITE(*,'(a6,a12, 2a6, a12)') 'i', 'x', 'left', 'vleft', 'Max. err'
+  DO i=1,npt
+     CALL def_basfun(xpt(i), splx, fun, left)
+     errfun= MAXVAL(ABS(fun(:,:)-vfun(:,:,i)))
+     WRITE(*,'(i6,1pe12.4,2i6,1pe12.4)') i, xpt(i), left, vleft(i), errfun
+  END DO
+!
+  IF(npt.LE.10) THEN
+     WRITE(*,'(/a)')  'Scalar/vector basfun'
+     DO i=1,npt
+        CALL basfun(xpt(i), splx, fun, vleft(i)+1)
+        WRITE(*,'(a,1pe12.4/10(1pe12.4))') 'x = ', xpt(i),  fun(:,:)
+        WRITE(*,'(10(1pe12.4))') vfun(:,:,i)
+     END DO
+  END IF
+!===============================================================================
+!
+!   Check basfun
+!
+  CALL basfun(xpt, splx, vfun1, vleft+1)
+  WRITE(*,'(/a,1pe12.4)') 'vector basfun versus vector def_basun: Max err', &
+       &    MAXVAL(ABS(vfun-vfun1))
+!
+  WRITE(*,'(/a)') 'vector basfun versus scalar basfun'
+  WRITE(*,'(a6,a12,a12)') 'i', 'x', 'Max. err'
+  DO i=1,npt
+     CALL basfun(xpt(i), splx, fun1, vleft(i)+1)
+     errfun= MAXVAL(ABS(fun1(:,:)-vfun1(:,:,i)))
+     WRITE(*,'(i6,1pe12.4,1pe12.4)') i, xpt(i), errfun
+  END DO
+!!===============================================================================
+!
+!   Clean up
+!
+  CALL destroy_sp(splx)
+  DEALLOCATE(xgrid)
+  DEALLOCATE(xpt)
+  DEALLOCATE(vleft)
+  DEALLOCATE(fun)
+END PROGRAM main
diff --git a/examples/tcdsmat.f90 b/examples/tcdsmat.f90
new file mode 100644
index 0000000..f832abf
--- /dev/null
+++ b/examples/tcdsmat.f90
@@ -0,0 +1,283 @@
+!>
+!> @file tcdsmat.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Solving the following 2d PDE using splines and iterative method
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+  USE tcdsmat_mod
+  USE bsplines
+  USE cds
+  USE futils
+!
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+!
+  INTEGER                       :: nints(2), nidbas(2), ngauss(2), mbess, nterms
+  DOUBLE PRECISION              :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), ygrid(:)
+  DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:)
+
+  INTEGER            :: mrank, bw0
+  TYPE(spline2d)     :: splxy
+  TYPE(cds_mat)      :: mat
+  LOGICAL            :: readmat, verbose
+  CHARACTER(len=128) :: file='tcdsmat.h5'
+  CHARACTER(len=128) :: filein
+  INTEGER            :: fid, fidin
+  DOUBLE PRECISION   :: mem, seconds
+  DOUBLE PRECISION   :: t0, tmat, tbal, tsolv, tgrid, tmumps(2)
+  INTEGER            :: nitmx=100, niter, nssor
+  DOUBLE PRECISION   :: rtolmx=1.0d-6, omega=0.0d0, resid
+!
+  INTEGER                       :: i, j, ij, dimx, dimy
+  DOUBLE PRECISION, ALLOCATABLE :: bcoef(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: solcal(:,:), solana(:,:), errsol(:,:)
+  INTEGER, ALLOCATABLE          :: dists(:)
+!
+  INTEGER :: ierr, me
+  TYPE(mumps_mat) :: mat_mumps
+!
+  NAMELIST /newrun/ nints, nidbas, ngauss, mbess, coefx, coefy, &
+       &            nitmx, rtolmx, omega, nssor, readmat, verbose, filein
+!===========================================================================
+!              1.0     Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Read in data specific to run
+!
+  WRITE(*,'(/a)') 'Prologue ...'
+  readmat = .FALSE.   ! Read matrix and rhs from file
+  filein = 'mat.h5'
+  nints = (/8,8/)     ! Number of intervals in x, y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nitmx = 1000        ! Max number of iterations
+  rtolmx = 1.e-12     ! Max relative tolerance
+  nssor  = 1          ! Number of SSOR precond steps
+  verbose = .FALSE.   ! Output residue at each iteration
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+!
+!   Overwrite some input by reading in "filein" if required
+!
+  IF(readmat) THEN
+     CALL openf(filein, fidin, mode='r')
+     CALL getatt(fidin, '/', 'NX', nints(1))
+     CALL getatt(fidin, '/', 'NY', nints(2))
+     CALL getatt(fidin, '/', 'NIDBAS1', nidbas(1))
+     CALL getatt(fidin, '/', 'NIDBAS2', nidbas(2))
+     CALL getatt(fidin, '/', 'NGAUSS1', ngauss(1))
+     CALL getatt(fidin, '/', 'NGAUSS2', ngauss(2))
+     CALL getatt(fidin, '/', 'MBESS', mbess)
+  END IF
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nints(1)), ygrid(0:nints(2)))
+  xgrid(0) = 0.0d0;   xgrid(nints(1)) = 1.0d0
+  CALL meshdist(coefx, xgrid, nints(1))
+  ygrid(0) = 0.0d0;   ygrid(nints(2)) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, nints(2))
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'TCDSMAT Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nints(1))
+  CALL attach(fid, '/', 'NY', nints(2))
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'MBESS', mbess)
+!===========================================================================
+!              2.0       Discretize the PDE
+!
+!   Set up spline
+!
+  WRITE(*,'(/a)') 'Discretize the PDE ...'
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./) )
+!
+!   Set up CDS matrix for solver
+!
+  CALL mstruct(nidbas, nints, mrank, dists)
+  bw0=(nidbas(1)+1)*nints(2)   ! Half band including all zero diagonals
+  CALL init(mrank, dists, nterms, mat, bw0=bw0)
+  WRITE(*,'(a,4i8)') 'rank, kl, ku, bw0 = ', mat%rank, mat%kl, mat%ku, bw0
+  WRITE(*,'(i4,a/(10i8))') mat%ndiags, ' diagonals:', mat%dists
+!
+!   FE matrix assembly and apply BC 
+!
+  IF(readmat) THEN
+     CALL getmat(fidin, '/MAT1', mat)
+  ELSE
+     CALL dismat(splxy, mat)
+     CALL ibcmat(mat, nints(2), nidbas(1))
+  END IF
+!
+  WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem()
+!
+!   Assembly RHS and apply BC
+!
+  ALLOCATE(rhs(mrank), sol(mrank))
+!
+  IF(readmat) THEN
+     CALL getarr(fidin, '/RHS', rhs)
+  ELSE
+     CALL disrhs(mbess, splxy, rhs)
+     CALL ibcrhs(rhs, nints(2))
+     CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  END IF
+  tmat = seconds() - t0
+!===========================================================================
+!              3.0       Diagonal balance of matrix
+!
+  WRITE(*,'(/a)') 'Diagonal balance of matrix ...'
+  tbal = seconds()
+  IF( .NOT. readmat ) THEN
+     CALL diagbal(mat)
+     CALL putmat(fid,'/MAT1', mat, 'CDS matrix with BC')
+  END IF
+  rhs = mat%bal * rhs
+  tbal = seconds()-tbal
+!===========================================================================
+!              4.0       Analytical solutions
+!
+  ALLOCATE(solcal(0:nints(1),0:nints(2)))
+  ALLOCATE(solana(0:nints(1),0:nints(2)))
+  ALLOCATE(errsol(0:nints(1),0:nints(2)))
+  DO i=0,nints(1)
+     DO j=0,nints(2)
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+     END DO
+  END DO
+!===========================================================================
+!              5.0       Direct solve with MUMPS
+!
+!
+  WRITE(*,'(/a)') 'Solve the linear system using MUMPS ...'
+!
+  tmumps(1) = seconds()
+  CALL cds2mumps(mat, mat_mumps)
+  CALL factor(mat_mumps, debug=.FALSE.)
+  tmumps(2) = seconds()
+  sol = rhs
+  CALL bsolve(mat_mumps, sol, debug=.FALSE.)
+  sol = mat%bal * sol
+  sol(1:nints(2)-1) = sol(nints(2))  ! Unicity
+  tmumps(1:2) = seconds()-tmumps(1:2)
+  
+!
+  tgrid = seconds()
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+!
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,nints(2)) + i*nints(2) + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  CALL gridval(splxy, xgrid, ygrid, solcal, (/0,0/), bcoef)
+  errsol = solana - solcal
+  tgrid = seconds() - tgrid
+!
+  PRINT*, 'Relative discretization errors', norm2(errsol) / norm2(solana)
+!!$  WRITE(*,'(a,2(1pe15.6))') 'Relative discretization errors', &
+!!$       &    norm2(errsol) / norm2(solana)
+!===========================================================================
+!              5.0       Solve the linear system using CG
+!
+  WRITE(*,'(/a)') 'Solve the linear system using CG ...'
+!
+  tsolv = seconds()
+  sol(:) = 0.0d0         !  Initial guest for solution
+  IF( nssor .EQ. 0 ) THEN
+     CALL cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, niter, &
+          &  verbose=verbose)
+  ELSE
+     CALL cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, niter, &
+          &  verbose=verbose, nssor=nssor)
+  END IF
+  sol = mat%bal * sol
+  sol(1:nints(2)-1) = sol(nints(2))  ! Unicity
+  tsolv = seconds()-tsolv
+!
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,nints(2)) + i*nints(2) + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  CALL gridval(splxy, xgrid, ygrid, solcal, (/0,0/), bcoef)
+  errsol = solana - solcal
+!
+  PRINT*, 'Relative discretization errors', norm2(errsol) / norm2(solana)
+!===========================================================================
+!              9.0  Epilogue
+!
+  CALL putarr(fid, '/SOL', sol)
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)')    'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)')    'Matrice balancing time (s)    ', tbal
+  WRITE(*,'(a,2(1pe12.3))') 'MUMPS solver time (s)         ', tmumps
+  WRITE(*,'(a,1pe12.3)')    'Solution at grid time (s)     ', tgrid
+  WRITE(*,'(a,i8,2(1pe12.3))') 'nits, resid, t(s)', niter, resid, tsolv
+!
+  DEALLOCATE(xgrid,ygrid)
+  DEALLOCATE(rhs, sol)
+  DEALLOCATE(dists)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(solcal,solana,errsol)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+  CALL closef(fid)
+  IF(readmat) THEN
+     CALL closef(fidin)
+  END IF
+!
+  CALL mpi_finalize(ierr)
+!===========================================================================
+END PROGRAM main
diff --git a/examples/tcdsmat_mod.f90 b/examples/tcdsmat_mod.f90
new file mode 100644
index 0000000..776e422
--- /dev/null
+++ b/examples/tcdsmat_mod.f90
@@ -0,0 +1,635 @@
+!>
+!> @file tcdsmat_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE tcdsmat_mod
+  IMPLICIT NONE
+!
+  INTERFACE
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+  END INTERFACE
+!
+  INTERFACE norm2
+     MODULE PROCEDURE norm2_1d, norm2_2d
+  END INTERFACE
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE mstruct(p, n, rank, dist)
+!
+!  It is assumed that:
+!    . 2nd dimension is number first
+!    . 2nd dimension is periodic
+!
+    INTEGER, INTENT(in)  :: p(2), n(2)
+    INTEGER, INTENT(out) :: rank
+    INTEGER, ALLOCATABLE :: dist(:)
+!
+    INTEGER, ALLOCATABLE :: pvect(:,:)
+    INTEGER              :: kl, ku, i
+!
+    rank = (n(1)+p(1))*n(2)    ! Rank of the FE matrix
+    ku = (p(1)+1)*(2*p(2)+1)-1
+    kl = ku
+    ALLOCATE(pvect(0:2*p(2),0:p(1)))
+    IF( ALLOCATED(dist)) DEALLOCATE(dist)
+    ALLOCATE(dist(-kl:ku))
+!
+!   Upper (North) points
+    pvect(0:p(2),0) = (/(i,i=0,p(2))/)
+!
+!   Lower (South) points and periodicity of 2nd dim.
+    DO i=1,p(2)
+       pvect(p(2)+i,0) = n(2)-pvect(p(2)-i+1,0)
+    END DO
+!
+!  Shift by N2 for points on the right (West) side
+    DO i=1,p(1)
+       pvect(:,i) = pvect(:,i-1)+n(2)
+    END DO
+!
+!   Super-diagonals including the diagonal
+    dist(0:ku) = RESHAPE(pvect, (/ku+1/))
+!
+!   Sub-diagonals
+    DO i=-1,-kl,-1
+       dist(i) = -dist(-i)
+    END DO
+!
+    DEALLOCATE(pvect)
+  END SUBROUTINE mstruct
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    USE bsplines
+    USE cds
+!
+    TYPE(spline2d), INTENT(in)   :: spl
+    TYPE(cds_mat), INTENT(inout) :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, DIMENSION(:,:), ALLOCATABLE :: idert, iderw  ! Derivative order
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE  :: coefs ! Terms in weak form
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2), iderw(kterms,2), coefs(kterms))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1)) ! 
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+ !===========================================================================
+!              2.0 Assembly loop
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                CALL coefeq(xg1(ig1), xg2(ig2), idert, iderw, coefs)
+                DO iterm=1,kterms
+                   DO iw1=0,nidbas1  ! Weight function in dir 1
+                      igw1 = i+iw1
+                      DO iw2=0,nidbas2  ! Weight function in dir 2
+                         igw2 = MODULO(j+iw2-1, n2) + 1
+                         irow = igw2 + (igw1-1)*n2
+                         DO it1=0,nidbas1  ! Test function in dir 1
+                            igt1 = i+it1
+                            DO it2=0,nidbas2  ! Test function in dir 2
+                               igt2 = MODULO(j+it2-1, n2) + 1
+                               jcol = igt2 + (igt1-1)*n2
+                               contrib = fun1(iw1,iderw(iterm,1)) * &
+                                    &    fun2(iw2,iderw(iterm,2)) * &
+                                    &    coefs(iterm) *             &
+                                    &    fun2(it2,idert(iterm,2)) * &
+                                    &    fun1(it1,idert(iterm,1)) * &
+                                    &    wg1(ig1) * wg2(ig2)
+                               CALL updtmat(mat, irow, jcol, contrib)
+                            END DO
+                         END DO
+                      END DO
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+!
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny, px)
+!
+!   Apply BC on matrix
+!
+    USE cds
+    IMPLICIT NONE
+    TYPE(cds_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in)          :: ny, px
+!
+    INTEGER :: kl, ku, n, bw0, i, j
+    DOUBLE PRECISION :: zsum(mat%rank), arr(mat%rank)
+!===========================================================================
+!              1.0 Prologue
+!
+    kl = mat%kl
+    ku = mat%ku
+    n = mat%rank
+    mat%ny = ny
+!
+!   Size of  row ny and column ny
+!
+    bw0 = SIZE(mat%rowv)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!   Store the sums at row ny in MAT%ROWV
+!
+    zsum(1:n) = 0.0d0
+    DO i=1,ny
+       CALL getrow(mat, i, arr)
+       zsum(1:bw0) = zsum(1:bw0) + arr(1:bw0)
+       IF( i .LE. ny ) THEN    ! Clear rows 1:(ny-1)
+          arr = 0.0d0; arr(i) = 1.0d0
+          CALL putrow(mat, i, arr)
+       END IF
+    END DO
+    mat%rowv(1:bw0) = zsum(1:bw0)   ! row ny
+!!$    WRITE(*,'(/a,/(10(f8.3)))') 'rowv', mat%rowv(1:130)
+!
+!   The horizontal sum on the NY-th column
+!   The NY-th row of matrix was stored in mat%rowv
+!   Store the sums ar column ny at MAT%COLH
+!
+    zsum(1:n) = 0.0d0
+    DO j=1,ny
+       CALL getcol(mat, j, arr)
+       zsum(ny) = zsum(ny) + mat%rowv(j)
+       zsum(ny+1:bw0) = zsum(ny+1:bw0) + arr(ny+1:bw0)
+       IF( j .NE. ny ) THEN    ! Clear columns 1:(ny-1)
+          mat%rowv(j) = 0.0d0
+          arr = 0.0d0; arr(j) = 1.0d0
+          CALL putcol(mat, j, arr)
+       END IF
+    END DO
+    mat%rowv(ny) = 0.0d0   ! Its value is now in mat%colh(ny)
+    arr = 0.0d0
+    CALL putcol(mat, ny, arr)
+    CALL putrow(mat, ny, arr)
+    mat%colh(1:bw0) = zsum(1:bw0) ! column ny
+!
+!   Move the diagonal term from mat%colh back to main diagonal
+!
+    CALL putele(mat, ny, ny, mat%colh(ny))
+    mat%colh(ny) = 0.0d0
+!!$    WRITE(*,'(/a,/(10(f8.3)))') 'rowv', mat%rowv(1:130)
+!!$    WRITE(*,'(/a,/(10(f8.3)))') 'colh', mat%colh(1:130)
+!!$    WRITE(*,'(/a,/(10(f8.3)))') 'colh', mat%val(1:ny,0)
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO j = n, n-ny+1, -1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+!
+    DO i = n, n-ny+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!!$    WRITE(*,'(/a,/(10(f8.3)))') 'diag 1', mat%val(n-ny-1:n,1)
+!!$    WRITE(*,'(/a,/(10(f8.3)))') 'diag 0', mat%val(n-ny:n,0)
+!!$    WRITE(*,'(/a,/(10(f8.3)))') 'diag -1', mat%val(n-ny-1:n,-1)
+!===========================================================================
+!              9.0  Epilogue
+!
+  END SUBROUTINE ibcmat
+!===========================================================================
+  SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    USE bsplines
+    INTEGER, INTENT(in)           :: mbess
+    TYPE(spline2d), INTENT(in)    :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+!
+    INTEGER                       :: n1, nidbas1, ndim1, ng1
+    INTEGER                       :: n2, nidbas2, ndim2, ng2
+    INTEGER                       :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    WRITE(*,'(/a, 2i3)') 'Gauss points and weights, ngauss =', ng1, ng2
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2
+      INTEGER, INTENT(in) :: m
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    INTEGER, INTENT(in)              :: ny
+!
+    INTEGER          :: nrank
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-ny+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!===========================================================================
+  SUBROUTINE cg(mat, rhs, omega, nitmx, rtolmx, sol, resid, nit, verbose, &
+       &        nssor)
+!
+!   Preconditionned Conjugate Gradient solver
+!
+    USE cds
+    TYPE(cds_mat)                           :: mat
+    DOUBLE PRECISION, INTENT(in)            :: rhs(:), omega, rtolmx
+    INTEGER, INTENT(in)                     :: nitmx
+    DOUBLE PRECISION, INTENT(inout)         :: sol(:)
+    DOUBLE PRECISION, OPTIONAL, INTENT(out) :: resid
+    INTEGER, OPTIONAL, INTENT(out)          :: nit
+    INTEGER, OPTIONAL, INTENT(in)           :: nssor
+    LOGICAL, OPTIONAL, INTENT(in)           :: verbose
+!
+    DOUBLE PRECISION, DIMENSION(SIZE(rhs,1)) :: wr, wz, wp, wq
+    DOUBLE PRECISION :: bnrm2, residue, rho0, rho1, alpha, beta
+    INTEGER          :: it
+!
+    bnrm2 = SQRT(DOT_PRODUCT(rhs,rhs))  ! Euclidian norm of RHS
+    it = 0
+    wr = rhs-vmx(mat,sol)
+!
+!... Iteration loop  (see fig. 2.5, p.15 of "Templates...")
+    DO
+       it = it+1
+       IF( PRESENT(nssor) ) THEN
+          CALL psolve(mat, wz, wr, omega, nssor)
+       ELSE
+          wz = wr
+       END IF
+       rho1 = DOT_PRODUCT(wr,wz)
+       IF( it .EQ. 1 ) THEN
+          wp = wz
+       ELSE
+          beta = rho1/rho0
+          wp = wz + beta*wp
+       END IF
+       wq = vmx(mat,wp)
+       alpha = rho1 / DOT_PRODUCT(wp,wq)
+       sol = sol + alpha*wp
+       wr = wr - alpha*wq
+       residue = SQRT(DOT_PRODUCT(wr,wr)) / bnrm2
+       IF( PRESENT(verbose) ) THEN
+          IF(verbose) WRITE(*,'(a,i8,1pe12.3)') 'it, resid', it, residue
+       END IF
+       IF( residue .LE. rtolmx .OR. it .GE. nitmx) EXIT
+       rho0 = rho1
+    END DO
+    IF(PRESENT(resid)) resid = residue
+    IF(PRESENT(nit)) nit = it
+  END SUBROUTINE cg
+!===========================================================================
+  SUBROUTINE psolve(mat, x, b, omega, niter_in)
+!
+!   Preconditionners
+!
+    USE cds
+    TYPE(cds_mat)                 :: mat
+    DOUBLE PRECISION, INTENT(out) :: x(:)
+    DOUBLE PRECISION, INTENT(in)  :: b(:)
+    DOUBLE PRECISION, INTENT(in)  :: omega
+    INTEGER, OPTIONAL, INTENT(in) :: niter_in
+!
+    INTEGER          :: niter
+    DOUBLE PRECISION :: rtolmx
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!                        1.   No-preconditionning
+!
+    IF( omega .LT. 0.0d0 ) THEN
+       x = b
+       RETURN
+    END IF
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!                        2.   SSOR Preconditionner
+!
+    niter = 1
+    rtolmx = 1.d-6
+    IF(PRESENT(niter_in)) THEN
+       niter = niter_in
+    END IF
+    CALL ssor(mat, b, omega, niter, rtolmx, x)
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  END SUBROUTINE psolve
+!
+!===========================================================================
+  SUBROUTINE ssor(mat, b, omega, nitmx, rtolmx, x, resid, nit, verbose)
+!
+!   Solve Ax = b using SSOR method
+!
+    USE cds
+    TYPE(cds_mat)                           :: mat
+    DOUBLE PRECISION, INTENT(out)           :: x(:)
+    DOUBLE PRECISION, INTENT(in)            :: b(:)
+    DOUBLE PRECISION, INTENT(in)            :: omega, rtolmx
+    INTEGER, INTENT(in)                     :: nitmx
+    DOUBLE PRECISION, OPTIONAL, INTENT(out) :: resid
+    INTEGER, OPTIONAL, INTENT(out)          :: nit
+    LOGICAL, OPTIONAL, INTENT(in)           :: verbose
+!
+    INTEGER          :: n, iter
+    INTEGER          :: k, i, j, d, bw0, ny
+    DOUBLE PRECISION :: omega1, bnrm2, residue
+    DOUBLE PRECISION :: rhs(SIZE(x))
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!                            1. Initialization
+    n = SIZE(x)
+    bw0 = SIZE(mat%rowv)
+    ny = mat%ny
+    bnrm2 = norm2(b)  ! Euclidian norm of RHS
+    omega1 = 1.0d0-omega
+    iter = 0
+    DO
+       iter = iter+1
+!
+!                            2. Forward SOR
+!  Set RHS
+       rhs = omega*b  ! rhs <- omega*b
+       IF( iter .GT. 1 ) THEN
+          rhs = rhs + omega1*x ! rhs <- rhs + (1-omega)*x0
+          DO k=1,mat%ku        ! rhs <- rhs - omega*U*x0
+             d = mat%dists(k)
+             DO i=MAX(1,1-d),MIN(n,n-d)
+                rhs(i) = rhs(i) - omega*mat%val(i,k)*x(i+d)
+             END DO
+          END DO
+          IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
+             rhs(ny) = rhs(ny) - omega*DOT_PRODUCT(mat%rowv(ny+1:bw0),x(ny+1:bw0))
+          END IF
+       END IF
+!
+!  Solve (1+omega*L) x = rhs
+       x = rhs
+       IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
+          rhs(ny+1:bw0) = rhs(ny+1:bw0) - omega*mat%colh(ny:bw0)*x(ny)
+       END IF
+       DO i=ny+1,n
+          DO k=-1,-mat%kl,-1
+             d = mat%dists(k)
+             j=i+d
+             IF( j.LE.0 ) EXIT
+             x(i) = x(i) - omega*mat%val(i,k)*x(j)
+          END DO
+       END DO
+!
+!                            3. Backward SOR
+!  Set RHS
+       rhs = omega*b + omega1*x  ! rhs <- omega*b + (1-omega)*x0
+       IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
+          rhs(ny+1:bw0) = rhs(ny+1:bw0) - omega*mat%colh(ny:bw0)*x(ny)
+       END IF
+       DO k=-mat%kl,-1           ! rhs <- rhs - omega*L*x0
+          d = mat%dists(k)
+          DO i=MAX(1,1-d),MIN(n,n-d)
+             rhs(i) = rhs(i) - omega*mat%val(i,k)*x(i+d)
+          END DO
+       END DO
+!
+!  Solve (1+omega*U) x = rhs
+       x = rhs
+       DO i=n-1,ny+1,-1
+          DO k=1,mat%ku
+             d = mat%dists(k)
+             j = i+d
+             IF( j.GT.n ) EXIT
+             x(i) = x(i) - omega*mat%val(i,k)*x(j)
+          END DO
+       END DO
+       IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
+          x(ny) = x(ny) - omega*DOT_PRODUCT(mat%rowv(ny+1:bw0),x(ny+1:bw0))
+       END IF
+!
+!                            4. Compute residue
+!
+       IF( PRESENT(resid) ) THEN
+          residue = norm2(b-vmx(mat,x)) / bnrm2
+          IF(PRESENT(verbose)) THEN
+             IF(verbose) WRITE(*,'(a,i8,1pe12.3)') 'it, resid', iter, residue
+          END IF
+          IF( residue .LT. rtolmx ) EXIT
+       END IF
+!
+       IF( iter .GE. nitmx ) EXIT
+    END DO  ! End of SSOR iterations
+    IF(PRESENT(nit))  nit = iter
+    IF(PRESENT(resid)) resid = residue
+  END SUBROUTINE ssor
+!===========================================================================
+  SUBROUTINE diagbal(mat)
+!
+!   Diagonal matrix balancing: store D^(-1/2) in mat%bal
+!
+    USE cds
+    TYPE(cds_mat)    :: mat
+    INTEGER          :: n, bw0, ny, d, i, k
+    DOUBLE PRECISION :: diag(mat%rank)
+! 
+    n = mat%rank
+    ny = mat%ny
+    bw0 = SIZE(mat%colh)
+    diag(1:n) = mat%val(1:n,0)
+    IF( MINVAL(diag) .LE. 0.0d0 ) THEN
+       WRITE(*,'(a)') 'Diagonal elements of matrix are not stricly positive!'
+       STOP
+    END IF
+    diag(1:n) = 1.0d0/SQRT(diag(1:n))
+!
+!     Scale the matrix
+
+!$OMP parallel do private (k,d,i)
+    DO k=-mat%kl,mat%ku
+       d = mat%dists(k)
+       DO i=MAX(1,1-d),MIN(n,n-d)
+          mat%val(i,k) = diag(i)*diag(i+d)*mat%val(i,k)
+       END DO
+    END DO
+!$OMP end parallel do
+!
+!     The ny^th column and row
+    IF( ny.NE.0 ) THEN
+       mat%rowv(1:bw0) = diag(1:bw0)*diag(ny)*mat%rowv(1:bw0)
+       mat%colh(1:bw0) = diag(ny)*diag(1:bw0)*mat%colh(1:bw0)
+    END IF
+!
+!     Save D^(-1/2)
+    mat%bal(:) = diag(:)
+  END SUBROUTINE diagbal
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION norm2_1d(x)
+!
+!  Compute the 2-norm of 1d array
+!
+    DOUBLE PRECISION :: x(:)
+    DOUBLE PRECISION :: norm2_1d
+    norm2_1d = SQRT(SUM(x*x))
+  END FUNCTION norm2_1d
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION norm2_2d(x)
+!
+!  Compute the 2-norm of 2d array
+!
+    DOUBLE PRECISION :: x(:,:)
+    DOUBLE PRECISION :: norm2_2d
+    norm2_2d = SQRT(SUM(x*x))
+  END FUNCTION norm2_2d
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE tcdsmat_mod
diff --git a/examples/test_kron.f90 b/examples/test_kron.f90
new file mode 100644
index 0000000..ee9d5fe
--- /dev/null
+++ b/examples/test_kron.f90
@@ -0,0 +1,125 @@
+!>
+!> @file test_kron.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test Kronecker product for both GE and CSR versions.
+!
+  USE iso_fortran_env, ONLY : rkind => real64
+  USE matrix, ONLY : gemat, init, kron, vmx
+  USE csr, ONLY    : csr_mat, init, full_to_csr, kron, vmx
+  IMPLICIT NONE
+!
+  TYPE(gemat)   :: mata, matb, matc
+  TYPE(gemat)   :: matu1
+  TYPE(csr_mat) :: mata_csr, matb_csr, matc_csr
+  INTEGER, PARAMETER :: m=3, n=2
+  REAL(rkind) :: a(m*m)=[ 1.1, 0.0, 0.0, 0.2, 1.5, 1.0, 0.5, 0.0, 1.0 ]
+  REAL(rkind) :: b(n*n)=[ 2.0, 1.0, 0.0, 3.0 ]
+  REAL(rkind), TARGET :: u(m,n), uu(m,n), v(m,n)
+  REAL(rkind), POINTER :: u1d(:), uu1d(:)
+  INTEGER :: i, s, e
+!
+  CALL init(m, 0, mata)
+  CALL init(n, 0, matb)
+  mata%val(1:m,1:m) = RESHAPE(a, [m,m])
+  matb%val(1:n,1:n) = RESHAPE(b, [n,n])
+!
+  CALL printmat_ge('Matrix A', mata)
+  CALL printmat_ge('Matrix B', matb)
+!
+  u1d(1:m*n) => u   !  u1d = vec(u)
+  u1d = [ (REAL(i,rkind), i=1,m*n) ]
+  CALL printmat('Array U', u)
+!
+!   Compute (A.U).B^T
+!
+  CALL init(n, 0, matu1, mrows=m)
+  v = TRANSPOSE(vmx(matb, TRANSPOSE(vmx(mata, u))))
+  CALL printmat('(A.U).B^T', v)
+!
+!  Compute (BxA).vec(U)
+!
+  CALL kron(matb, mata, matc)
+  uu1d(1:m*n) => uu
+  uu1d = vmx(matc, u1d)
+  CALL printmat_ge('Matrix C', matc)
+  CALL printmat('(BxA).vec(U)', uu)
+!----------------------------------------------------------------------
+!
+!   Using CSR matrices
+!
+  CALL full_to_csr(mata%val, mata_csr)
+  CALL full_to_csr(matb%val, matb_csr)
+  CALL printmat_csr('Matrix A', mata_csr)
+  CALL printmat_csr('Matrix B', matb_csr)
+!
+  CALL kron(matb_csr, mata_csr, matc_csr)
+  uu1d = vmx(matc_csr, u1d)
+!
+  CALL printmat_csr('Matrix C', matc_csr)
+  CALL printmat('(BxA).vec(U)', uu)
+!  
+CONTAINS
+  SUBROUTINE printmat(str, a)
+    CHARACTER(len=*) :: str
+    REAL(rkind), INTENT(in)  :: a(:,:)
+    INTEGER :: i,m,n
+    WRITE(*,'(/a)')  TRIM(str)
+    m=SIZE(a,1)
+    n=SIZE(a,2)
+    DO i=1,m
+       WRITE(*,'(12f8.3)') a(i,:)
+    END DO
+  END SUBROUTINE printmat
+  SUBROUTINE printmat_ge(str, a)
+    CHARACTER(len=*) :: str
+    TYPE(gemat) :: a
+    INTEGER :: i
+    WRITE(*,'(/a)')  TRIM(str)
+    DO i=1,a%mrows
+       WRITE(*,'(12f8.3)') a%val(i,:)
+    END DO
+  END SUBROUTINE printmat_ge
+  SUBROUTINE printmat_csr(str, a)
+    CHARACTER(len=*) :: str
+    TYPE(csr_mat) :: a
+    INTEGER :: i, s, e
+    REAL(rkind) :: arow(a%ncols)
+    WRITE(*,'(/a,a,3i4)') TRIM(str),  ': m, n, nnz', a%mrows, a%ncols, a%nnz
+    DO i=1,a%mrows
+       arow = 0.0_rkind
+       s = a%irow(i)
+       e = a%irow(i+1)-1
+       arow(a%cols(s:e)) = a%val(s:e)
+       WRITE(*,'(12f8.3)') arow
+    END DO
+    IF(SIZE(a%idiag) .GT. 0) THEN
+       WRITE(*,'(a,(20i4))') 'idiag =', a%idiag
+       WRITE(*,'(a,12f8.3)') 'diag = ', a%val(a%idiag)
+    END IF
+!
+  END SUBROUTINE printmat_csr
+END PROGRAM main
diff --git a/examples/test_pwsmp.f90 b/examples/test_pwsmp.f90
new file mode 100644
index 0000000..0537296
--- /dev/null
+++ b/examples/test_pwsmp.f90
@@ -0,0 +1,287 @@
+!>
+!> @file test_pwsmp.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!!$  USE futils
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+!
+  INTEGER :: npes, me, ierr, comm=MPI_COMM_WORLD
+  INTEGER :: l, i, lun=99
+  INTEGER :: nrank, nnz, s, e, nrank_loc, nnz_loc, nnz_sum
+  INTEGER :: istart, iend
+  INTEGER, ALLOCATABLE :: irow(:), cols(:)
+  INTEGER, ALLOCATABLE :: irow_loc(:), cols_loc(:)
+  DOUBLE PRECISION, ALLOCATABLE :: val(:), val_loc(:)
+  DOUBLE PRECISION, ALLOCATABLE :: rhs(:), rhs_loc(:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol(:), sol_loc(:)
+  DOUBLE PRECISION :: mem
+  CHARACTER(len=128) :: fname = "mat.dat"
+  DOUBLE PRECISION :: mem_loc, mem_min, mem_max
+  DOUBLE PRECISION :: err, err_max, err_norm
+  DOUBLE PRECISION :: t0, tfact, tsolv
+  INTEGER :: it, nits=100
+!
+!  PWSMP vars
+!
+  DOUBLE PRECISION :: dparm(64)
+  INTEGER          :: iparm(64)
+  INTEGER, ALLOCATABLE :: perm(:), invp(:)
+!
+  INTEGER :: mrp                     ! just a placeholder in this program
+  DOUBLE PRECISION :: aux, diag      ! just placeholders in this program
+  INTEGER :: naux=0, nrhs=1
+!===========================================================================
+!                             1.0  Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(comm, npes, ierr)
+  CALL mpi_comm_rank(comm, me, ierr)
+!===========================================================================
+!                             2.0  Read matrix
+!
+!     File header
+  IF( command_argument_count() > 0 ) THEN
+     CALL get_command_argument(1, fname, l, ierr)
+  END IF
+  OPEN(unit=lun, file=fname, form="unformatted")
+  READ(lun) nrank, nnz
+  IF(me.EQ.0) WRITE(*,'(a,3i16)') 'npes, nrank, nnz', npes, nrank, nnz
+!
+!    Matrix partition
+  CALL dist1d(comm, 1, nrank, istart, nrank_loc)
+  iend = istart+nrank_loc-1
+  WRITE(*,'(a,i3.3,a,2i12)') 'PE', me, ':istart, iend', istart, iend
+  ALLOCATE(irow_loc(nrank_loc+1))
+!
+!   Read irow
+  ALLOCATE(irow(nrank+1))
+  READ(lun) irow
+  nnz_loc = irow(iend+1)-irow(istart)
+  CALL mpi_reduce(nnz_loc, nnz_sum, 1, MPI_INTEGER, MPI_SUM, 0, comm, ierr)
+  IF(me.EQ.0) THEN
+     PRINT*, 'nnz_sum', nnz_sum
+  END IF
+  irow_loc(:) = irow(istart:iend+1)   ! Still unshifted
+  DEALLOCATE(irow)
+!
+  ALLOCATE(cols_loc(nnz_loc))
+  ALLOCATE(val_loc(nnz_loc))
+  ALLOCATE(rhs_loc(nrank_loc))
+  ALLOCATE(sol_loc(nrank_loc))
+!
+  s = irow_loc(1)
+  e = irow_loc(nrank_loc+1)-1
+  irow_loc(:) = irow_loc(:)-s+1     ! Shifted relative irow
+  WRITE(*,'(a,i3.3,a,3i12)') 'PE', me, ':s, e, nnz_loc', s, e, nnz_loc
+!
+!   Read cols
+  ALLOCATE(cols(nnz))
+  READ(lun) cols
+  cols_loc(:) = cols(s:e)
+  DEALLOCATE(cols)
+!
+!   Read vals
+  ALLOCATE(val(nnz))
+  READ(lun) val
+  val_loc(:) = val(s:e)
+  DEALLOCATE(val)
+!
+!   Read RHS
+  ALLOCATE(rhs(nrank))
+  READ(lun) rhs
+  rhs_loc(:) = rhs(istart:iend)
+  DEALLOCATE(rhs)
+!
+!!$  mem_loc = mem()
+!!$  CALL minmax_r(mem_loc, comm, 'mem used (MB) after matrix read')
+!===========================================================================
+!                             3.0  Call PWSMP
+!
+!   Initializing of PWSMP.
+!
+!!$  CALL pwsmp_initialize
+  ALLOCATE(invp(nrank), perm(nrank))
+!
+!   Fill 'iparm' and 'dparm' arrays with default values.
+  iparm(1:3) = 0
+  CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
+       &       rhs_loc, nrank_loc, nrhs, &
+       &       aux, naux, mrp, iparm, dparm)
+  IF(iparm(64).NE.0) THEN
+     PRINT*, 'WSMP init failed with iparm(64) =', iparm(64)
+     CALL mpi_abort(comm, iparm(64), ierr)
+  ELSE
+     IF(me.EQ.0) PRINT*, 'WSMP init ok'
+  END IF
+!
+!    Ordering
+  iparm(2) = 1
+  iparm(3) = 1
+  CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
+       &       rhs_loc, nrank_loc, nrhs, &
+       &       aux, naux, mrp, iparm, dparm)
+  IF(iparm(64).NE.0) THEN
+     PRINT*, 'WSMP ordering failed with iparm(64) =', iparm(64)
+     CALL mpi_abort(comm, iparm(64), ierr)
+  ELSE
+     IF(me.EQ.0) PRINT*, 'WSMP ordering ok'
+  END IF
+!
+!    Symbolic factorization
+  iparm(2) = 2
+  iparm(3) = 2
+  CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
+       &       rhs_loc, nrank_loc, nrhs, &
+       &       aux, naux, mrp, iparm, dparm)
+  IF(iparm(64).NE.0) THEN
+     PRINT*, 'WSMP symbolic failed with iparm(64) =', iparm(64)
+     CALL mpi_abort(comm, iparm(64), ierr)
+  ELSE
+     IF(me.EQ.0) PRINT*, 'WSMP symbolic ok'
+  END IF
+  IF(me.EQ.0) THEN
+     PRINT *,'Number of nonzeros in factor L = 1000 X ',iparm(24)
+     PRINT *,'Number of FLOPS in factorization = ',dparm(23)
+     PRINT *,'Double words needed to factor on 0 = 1000 X ',iparm(23)
+  END IF
+!
+!    Cholesky factorizarion
+  iparm(2) = 3
+  iparm(3) = 3
+  t0 = mpi_wtime()
+  CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
+       &       rhs_loc, nrank_loc, nrhs, &
+       &       aux, naux, mrp, iparm, dparm)
+  tfact = mpi_wtime()-t0
+  IF(iparm(64).NE.0) THEN
+     PRINT*, 'WSMP Choleski failed with iparm(64) =', iparm(64)
+     CALL mpi_abort(comm, iparm(64), ierr)
+  ELSE
+     IF(me.EQ.0) PRINT*, 'WSMP Choleski ok'
+  END IF
+!
+!    Backsolve
+  t0 = mpi_wtime()
+  DO it=1,nits
+     sol_loc=rhs_loc
+     iparm(2) = 4
+     iparm(3) = 4
+     CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
+          &       sol_loc, nrank_loc, nrhs, &
+          &       aux, naux, mrp, iparm, dparm)
+  END DO
+  rhs_loc=sol_loc
+  tsolv = (mpi_wtime()-t0)/REAL(nits,8)
+  IF(iparm(64).NE.0) THEN
+     PRINT*, 'WSMP backsolve failed with iparm(64) =', iparm(64)
+     CALL mpi_abort(comm, iparm(64), ierr)
+  ELSE
+     IF(me.EQ.0) PRINT*, 'WSMP backsolve ok'
+  END IF
+!
+!   Iterative refinement
+  iparm(2) = 5
+  iparm(3) = 5
+  CALL pwssmp (nrank_loc, irow_loc, cols_loc, val_loc, diag, perm, invp, &
+       &       rhs_loc, nrank_loc, nrhs, &
+       &       aux, naux, mrp, iparm, dparm)
+  IF(iparm(64).NE.0) THEN
+     PRINT*, 'WSMP refinement failed with iparm(64) =', iparm(64)
+     CALL mpi_abort(comm, iparm(64), ierr)
+  ELSE
+     IF(me.EQ.0) PRINT*, 'WSMP refinement ok'
+  END IF
+!
+!!$  mem_loc = mem()
+!!$  CALL minmax_r(mem_loc, comm, 'mem used (MB) after PWSMP')
+!===========================================================================
+!                             4.0  Check SOL
+!
+!   Read SOL
+  ALLOCATE(sol(nrank))
+  READ(lun) sol
+  sol_loc(:) = sol(istart:iend)
+  DEALLOCATE(sol)
+  PRINT*, 'Comp. sol', SUM(rhs_loc)
+  PRINT*, 'Exact sol', SUM(sol_loc)
+!
+  err=MAXVAL(ABS(sol_loc-rhs_loc))
+  CALL mpi_reduce(err, err_max, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
+  IF(me.EQ.0) THEN
+     PRINT*, 'Max. error', err_max
+  END IF
+  rhs_loc = rhs_loc-sol_loc
+  err = DOT_PRODUCT(rhs_loc,rhs_loc)
+  CALL mpi_reduce(err, err_norm, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0, comm, ierr)
+  IF(me.EQ.0) THEN
+     PRINT*, 'Norm of error', SQRT(err_norm)
+  END IF
+!
+!!$  mem_loc = mem()
+!!$  CALL minmax_r(mem_loc, comm, 'mem used (MB)')
+!===========================================================================
+!                             9.0  Epilogue
+!
+  CALL minmax_r(tfact, comm, 'Factorisation time(s)')
+  CALL minmax_r(tsolv, comm, '    Backsolve time(s)')
+  CALL mpi_finalize(ierr)
+!
+CONTAINS
+  SUBROUTINE dist1d(comm, s0, ntot, s, nloc)
+!
+!  1d distribute ntot elements, returns offset s and local number of 
+!  elements nloc.
+! 
+    IMPLICIT NONE
+    INCLUDE 'mpif.h'
+    INTEGER, INTENT(in) :: s0, ntot
+    INTEGER, INTENT(out) :: s, nloc
+    INTEGER :: comm, me, npes, ierr, naver, rem
+!
+    CALL MPI_COMM_SIZE(comm, npes, ierr)
+    CALL MPI_COMM_RANK(comm, me, ierr)
+    naver = ntot/npes
+    rem = MODULO(ntot,npes)
+    s = s0 + MIN(rem,me) + me*naver
+    nloc = naver
+    IF( me.LT.rem ) nloc = nloc+1
+  END SUBROUTINE dist1d
+!
+  SUBROUTINE minmax_r(x, comm, str)
+    CHARACTER(len=*), INTENT(in) :: str
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in)          :: comm
+    INTEGER :: me, ierr
+    DOUBLE PRECISION ::  xmin, xmax
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_reduce(x, xmin, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm, ierr)
+    CALL mpi_reduce(x, xmax, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm, ierr)
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(a,2(1pe12.4))') 'Minmax of ' // TRIM(str), xmin, xmax
+    END IF
+  END SUBROUTINE minmax_r
+!
+END PROGRAM main
diff --git a/examples/tlocintv.f90 b/examples/tlocintv.f90
new file mode 100644
index 0000000..0208d29
--- /dev/null
+++ b/examples/tlocintv.f90
@@ -0,0 +1,140 @@
+!>
+!> @file tlocintv.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Optimization of locintv
+!
+  USE bsplines
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas, ngauss, np, nits
+  DOUBLE PRECISION :: a, b, coefs(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid
+  TYPE(spline1d) :: splx
+!
+  INTEGER :: i, nerrs, it
+  DOUBLE PRECISION :: t0, t1, seconds, tscal, tscal_new, tvec, tvec_new
+  DOUBLE PRECISION, ALLOCATABLE :: xp(:)
+  INTEGER, ALLOCATABLE :: left(:)
+!
+  INTERFACE
+     SUBROUTINE meshdist(coefs, x, nx)
+       DOUBLE PRECISION, INTENT(in) :: coefs(5)
+       INTEGER, INTENT(iN) :: nx
+       DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+     END SUBROUTINE meshdist
+  END INTERFACE
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, np, nits, a, b, coefs
+!===========================================================================
+!   Read in data
+!
+  nx = 8       ! Number oh intevals in x
+  nidbas = 3   ! Degree of splines
+  ngauss = 4   ! Number of Gauss points/interval
+  np = 10 ! Number of random points in [a,b]
+  nits = 1000000
+  a = 0.0
+  b = 1.0
+  coefs(1:5) = (/0.2d0, 1.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+                                                 ! see function FDIST in MESHDIST
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x axis
+!
+  ALLOCATE(xgrid(0:nx))
+  xgrid(0) = a
+  xgrid(nx) = b
+  CALL meshdist(coefs, xgrid, nx)
+  WRITE(*,'(/a/(10f8.3))') 'XGRID', xgrid(0:nx)
+  WRITE(*,'(a/(10f8.3))') 'Diff XGRID', (xgrid(i)-xgrid(i-1), i=1,nx)
+!
+!   Set up spline
+!
+  CALL set_spline(nidbas, ngauss, xgrid, splx)
+  WRITE(*,'(a,l1)') 'Is mesh equidistant? ', splx%nlequid
+!
+!   Test locintv
+!
+  ALLOCATE(xp(np))
+  ALLOCATE(left(np))
+  xp(:) = (b-a)*xp(:) + a
+  tscal = 0.0d0
+  tscal_new = 0.0d0
+  tvec = 0.0d0
+  tvec_new = 0.0d0
+!
+  nerrs = 0
+  DO it=1,nits
+     CALL RANDOM_NUMBER(xp)
+     t0 = seconds()
+     DO i=1,np
+        CALL locintv_old(splx, xp(i), left(i))
+     END DO
+     tscal = tscal + seconds()-t0
+     nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
+!
+     t0 = seconds()
+     DO i=1,np
+        CALL locintv(splx, xp(i), left(i))
+     END DO
+     tscal_new = tscal_new + seconds()-t0
+     nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
+!
+     t0 = seconds()
+     CALL locintv_old(splx, xp, left)
+     tvec = tvec + seconds()-t0
+     nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
+!
+     t0 = seconds()
+     CALL locintv(splx, xp, left)
+     tvec_new = tvec_new + seconds()-t0
+     nerrs = nerrs + COUNT(.NOT.in_interv(xp, left))
+  END DO
+  PRINT*, 'nerrs =', nerrs
+!
+  tscal = tscal/(REAL(nits*np,8))
+  tscal_new = tscal_new/(REAL(nits*np,8))
+  tvec = tvec/(REAL(nits*np,8))
+  tvec_new = tvec_new/(REAL(nits*np,8))
+  WRITE(*,'(4a12)') 'scalar', 'scalar new', 'vector', 'vector new'
+  WRITE(*,'(4(1pe12.3))') tscal, tscal_new, tvec, tvec_new
+!
+!   Clean up
+!
+  DEALLOCATE(xp)
+  DEALLOCATE(left)
+  DEALLOCATE(xgrid)
+  CALL destroy_sp(splx)
+!
+CONTAINS
+  LOGICAL ELEMENTAL FUNCTION in_interv(x, l)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(in) :: l
+    in_interv = x.GE.xgrid(l) .AND. x.LT.xgrid(l+1)
+  END FUNCTION in_interv  
+END PROGRAM main
diff --git a/examples/tmassmat.f90 b/examples/tmassmat.f90
new file mode 100644
index 0000000..679f965
--- /dev/null
+++ b/examples/tmassmat.f90
@@ -0,0 +1,189 @@
+!>
+!> @file tmassmat.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   FT of mass matrix
+!
+  USE bsplines
+  USE matrix
+!
+  IMPLICIT NONE
+  INTEGER :: nx, nidbas
+  INTEGER :: ngauss, nrank, kl, ku
+  INTEGER :: i, k, kmin, kmax
+  DOUBLE PRECISION :: pi, xlenght, dx, arg0, arg
+  DOUBLE PRECISION, ALLOCATABLE :: xgrid(:), arow(:)
+  DOUBLE PRECISION, ALLOCATABLE :: fftmassm1(:), fftmassm2(:), fftmassm3(:)
+  DOUBLE PRECISION, ALLOCATABLE :: fftmass_shifted(:)
+  TYPE(spline1d) :: splx
+  TYPE(periodic_mat) :: massm
+!
+  NAMELIST /newrun/ nx, nidbas, xlenght
+!================================================================================
+!              1.0 Prologue
+!
+  pi = 4.0d0*ATAN(1.0d0)
+!
+  nx = 8
+  nidbas = 3
+  xlenght = 2.0d0*pi
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+  ngauss = nidbas+1  ! Exact integration for polynomials of degree 2*nidbas
+!
+  ALLOCATE(xgrid(0:nx))
+  dx = xlenght/REAL(nx)
+  xgrid = (/ (i*dx,i=0,nx) /)
+  WRITE(*,'(a/(10f8.3))') 'xgrid', xgrid
+!
+  CALL set_spline(nidbas, ngauss, xgrid, splx, .TRUE.)
+!===========================================================================
+!              2.0  Mass matrix
+  nrank = nx
+  kl = nidbas
+  ku = kl
+  CALL init(kl, ku, nrank, 1, massm)
+  CALL dismat(splx, massm)
+!
+  ALLOCATE(arow(nrank))
+!!$  WRITE(*,'(/a)') 'Mass matrix'
+!!$  DO i=1,nrank
+!!$     CALL getrow(massm, i, arow)
+!!$     WRITE(*,'(10(1pe12.4))') arow
+!!$  END DO
+!===========================================================================
+!              3.0  Fourier transform of Mass matrix
+!
+  ALLOCATE(fftmassm1(0:nx-1))
+  ALLOCATE(fftmassm2(0:nx-1))
+  ALLOCATE(fftmassm3(0:nx-1))
+  IF(nidbas.LE.3) THEN
+     CALL analytic(nidbas, fftmassm1)
+     fftmassm1 = dx*fftmassm1
+     WRITE(*,'(/a/(10(1pe12.4)))') 'Analytic', fftmassm1
+  END IF
+!
+  CALL calc_fftmass_old(splx, fftmassm2)
+  WRITE(*,'(/a/(10(1pe12.4)))') 'Old version', fftmassm2
+  IF(nidbas.LE.3) THEN
+     WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(fftmassm2-fftmassm1))
+  END IF
+!
+!   Init DFT
+  kmin = -nx/2
+  kmax = nx/2-1
+  CALL init_dft(splx, kmin, kmax)
+!
+  ALLOCATE(fftmass_shifted(kmin:kmax))
+  CALL calc_fftmass(splx, fftmass_shifted)
+  DO k=kmin, kmax
+     fftmassm3(MODULO(k+nx,nx))=fftmass_shifted(k)
+  END DO
+  WRITE(*,'(/a/(10(1pe12.4)))') 'New version', fftmassm3
+  WRITE(*,'(a,1pe12.4)') 'error =', MAXVAL(ABS(fftmassm3-fftmassm2))
+!
+!   Check dftcoefs
+  WRITE(*,'(/a)') 'Check DFT of splines'
+  PRINT*, 'dims of dftcoefs', SHAPE(splx%dft%coefs)
+  DO i=0,nidbas
+     WRITE(*,'(a,i3,2(1pe12.4))') 'Sum of coefs for spline', i, &
+          &                        SUM(splx%dft%coefs(:,i))
+  END DO
+!===========================================================================
+!              9.0  Clean up
+!
+  DEALLOCATE(xgrid)
+  DEALLOCATE(fftmassm1)
+  DEALLOCATE(fftmassm2)
+  DEALLOCATE(fftmassm3)
+  DEALLOCATE(fftmass_shifted)
+  DEALLOCATE(arow)
+  CALL destroy(massm)
+  CALL destroy_sp(splx)
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat)
+    TYPE(spline1d) :: spl
+    TYPE(periodic_mat) :: mat
+    INTEGER :: dim, nx, nidbas, ngauss
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), xgauss(:), wgauss(:)
+    INTEGER :: i, igauss, iw, jt, irow, jcol
+    DOUBLE PRECISION :: contrib
+!
+    CALL get_dim(spl, dim, nx, nidbas)
+    ALLOCATE(fun(0:nidbas,1)) ! Spline
+    CALL get_gauss(spl, ngauss)
+    ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+!
+    DO i=1,nx
+       CALL get_gauss(spl, ngauss, i, xgauss, wgauss)
+       DO igauss=1,ngauss
+          CALL basfun(xgauss(igauss), spl, fun, i)
+          DO jt=0,nidbas
+             DO iw=0,nidbas
+                contrib = fun(jt,1) * fun(iw,1) * wgauss(igauss)
+                irow=MODULO(i+iw-1,nx) + 1   ! Periodic BC
+                jcol=MODULO(i+jt-1,nx) + 1
+                CALL updtmat(mat, irow, jcol, contrib)
+             END DO
+          END DO
+       END DO
+    END DO
+!
+    DEALLOCATE(fun)
+    DEALLOCATE(xgauss, wgauss)
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE analytic(nidbas, mat)
+!
+!   Analytic form for nidbas .le. 3
+!
+    INTEGER, INTENT(in) :: nidbas
+    DOUBLE PRECISION, INTENT(out) :: mat(0:)
+    DOUBLE PRECISION :: arg0, arg, cosk
+    INTEGER :: n, k
+!
+    n = SIZE(mat)
+    arg0 = 2.0d0*pi/REAL(n,8)
+    DO k=0,n-1
+       arg = k*arg0
+       cosk = COS(arg)
+       SELECT CASE (nidbas)
+       CASE (1)
+          mat(k) = (2.0d0 + cosk)/3.0d0
+       CASE (2)
+          mat(k) = (16.0d0 + cosk*(13.0d0+cosk))/30.0d0
+       CASE (3)
+          mat(k) = (272.0d0 + cosk*(297.0d0+cosk*(60.0d0+cosk)))/630.0d0
+       CASE default
+          WRITE(*,'(a,i4,a)') 'ANALYTIC: nidbas =', nidbas, ' is not implemented!'
+          STOP
+       END SELECT
+    END DO
+  END SUBROUTINE analytic
+END PROGRAM main
diff --git a/examples/tmatrix_gb.f90 b/examples/tmatrix_gb.f90
new file mode 100644
index 0000000..6183335
--- /dev/null
+++ b/examples/tmatrix_gb.f90
@@ -0,0 +1,114 @@
+!>
+!> @file tmatrix_gb.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test some routines of module matrix
+!
+  USE matrix
+  IMPLICIT NONE
+  TYPE(gbmat) :: mata
+  INTEGER, PARAMETER :: n=5, ku=3, kl=3
+  DOUBLE PRECISION :: arr(n), fulla(n,n), base
+  DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p
+  INTEGER :: i, j, iaway, pow
+  CHARACTER(len=32) :: str
+!
+  CALL init(ku, ku, n, 0, mata)
+  CALL getvalp(mata, p)
+  PRINT*, 'shape of A: ', SHAPE(p)
+!
+!   Test updtmat
+  p = 0.0d0
+  DO j=1,n
+     DO i=1,n
+        arr(i) = 10*i + j
+        IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(i))
+     END DO
+  END DO
+  CALL prntmat('Test of UPDTMAT', p)
+!
+!   Test PUTCOL
+  p = 0.0d0
+  DO j=1,n
+     DO i=1,n
+        arr(i) = 10*i + j
+     END DO
+     CALL putcol(mata, j, arr)
+  END DO
+  CALL prntmat('Test of PUTCOL', p)
+!
+!
+!   Test PUTROW
+  p = 0.0d0
+  DO i=1,n
+     DO j=1,n
+        arr(j) = 10*i + j
+     END DO
+     CALL putrow(mata, i, arr)
+  END DO
+  CALL prntmat('Test of PUTROW', p)
+!
+!   Test GETCOL
+  fulla = 0.0
+  DO j=1,n
+     CALL getcol(mata, j, fulla(:,j))
+  END DO
+  CALL prntmat('Test of GETCOL', fulla)
+!
+  iaway=4
+  arr = 0.0d0
+  arr(iaway) =1.0
+  CALL putrow(mata, iaway, arr)
+  CALL putcol(mata, iaway, arr)
+  WRITE(str,'(a,i3)') 'Away on i = j =',iaway
+  CALL prntmat(TRIM(str), p)
+!
+!   Test GETCOL
+  fulla = 0.0
+  DO j=1,n
+     CALL getcol(mata, j, fulla(:,j))
+  END DO
+  CALL prntmat('Matrix full', fulla)
+!
+!   Test of determinant
+  CALL determinant(mata, base, pow)
+  CALL prntmat('Factored A (gb)', p)
+  PRINT*, 'Prod. of factored A diagnonals', PRODUCT(p(kl+ku+1,:))
+  WRITE(*,'(a,1pe15.6,i6)') 'Determinant(bas,power) =', base, pow
+  PRINT*, 'Pivots ', mata%piv
+!
+  call destroy(mata)
+CONTAINS
+  SUBROUTINE prntmat(str, a)
+    DOUBLE PRECISION, DIMENSION(:,:) :: a
+    CHARACTER(len=*) :: str
+    INTEGER :: i
+    WRITE(*,'(a)') TRIM(str)
+    DO i=1,SIZE(a,1)
+       WRITE(*,'(10f8.1)') a(i,:)
+    END DO
+  END SUBROUTINE prntmat
+END PROGRAM main
diff --git a/examples/tmatrix_pb.f90 b/examples/tmatrix_pb.f90
new file mode 100644
index 0000000..eae0fd4
--- /dev/null
+++ b/examples/tmatrix_pb.f90
@@ -0,0 +1,143 @@
+!>
+!> @file tmatrix_pb.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test some routines of module matrix
+!
+  USE matrix
+  IMPLICIT NONE
+  TYPE(pbmat) :: mata, matb
+  INTEGER, PARAMETER :: n=5, ku=3
+  DOUBLE PRECISION :: arr(n), fulla(n,n), fullb(n,n), base
+  DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p, pb
+  INTEGER :: i, j, info, pow
+!
+  CALL init(ku, n, 0, mata)
+  CALL init(1, n, 0, matb)
+  CALL getvalp(mata, p)
+  CALL getvalp(matb, pb)
+  PRINT*, 'shape of A: ', SHAPE(p)
+!
+!   Test updtmat
+  p = 0.0d0
+  DO i=1,n
+     DO j=i,n
+        arr(j) = 10*i + j
+        IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(j))
+     END DO
+  END DO
+  CALL prntmat('Test of UPDTMAT', p)
+!
+!   Test GETCOL
+  fulla = 0.0
+  DO j=1,n
+     CALL getcol(mata, j, fulla(:,j))
+  END DO
+  CALL prntmat('Full matrix from GETCOL', fulla)
+!
+!   Test GETROW
+  fulla = 0.0
+  DO i=1,n
+     CALL getrow(mata, i, fulla(i,:))
+  END DO
+  CALL prntmat('Full matrix from GETROW', fulla)
+!
+!   Test PUTCOL
+  p = 0.0d0
+  DO j=1,n
+     DO i=1,n
+        arr(i) = 10*i + j
+     END DO
+     CALL putcol(mata, j, arr)
+  END DO
+  CALL prntmat('Test of PUTCOL', p)
+!
+!   Test PUTROW
+  p = 0.0d0
+  DO i=1,n
+     DO j=1,n
+        arr(j) = 10*i + j
+     END DO
+     CALL putrow(mata, i, arr)
+  END DO
+  CALL prntmat('Test of PUTROW', p)
+!
+  arr = 0.0d0
+  arr(2) =1.0
+  CALL putrow(mata, 2, arr)
+  CALL prntmat('Away on i=2, j=2', p)
+!
+!   Test GETCOL
+  fulla = 0.0
+  DO j=1,n
+     CALL getcol(mata, j, fulla(:,j))
+  END DO
+  CALL prntmat('Full matrix from GETCOL', fulla)
+!
+!   Test GETROW
+  fulla = 0.0
+  DO i=1,n
+     CALL getrow(mata, i, fulla(i,:))
+  END DO
+  CALL prntmat('Full matrix from GETROW', fulla)
+!
+!  Test GETELE
+  fulla = 0.0
+  DO i=1,n
+     DO j=1,n
+        IF(ABS(j-i).LT.ku+1) CALL getele(mata,i,j,fulla(i,j))
+     END DO
+  END DO
+  CALL prntmat('Full matrix from GETELE', fulla)
+!
+!   Test of determinant
+  fullb = 0.0
+  DO i=1,n
+     fullb(i,i) = 2.0d0
+     IF(i.LT.n) fullb(i,i+1)=-1.0d0
+     IF(i.GT.1) fullb(i,i-1)=-1.0d0
+  END DO
+  DO j=1,n
+     CALL putcol(matb, j, fullb(:,j))
+  END DO
+  CALL prntmat('Mat. A (full)', fullb)
+  CALL prntmat('Mat. A (pb)', pb)
+  CALL determinant(matb, base, pow)
+  WRITE(*,'(a,1pe15.6,i6)') 'Determinant(bas,power) =', base, pow
+!
+  CALL destroy(mata)
+  CALL destroy(matb)
+CONTAINS
+  SUBROUTINE prntmat(str, a)
+    DOUBLE PRECISION, DIMENSION(:,:) :: a
+    CHARACTER(len=*) :: str
+    INTEGER :: i
+    WRITE(*,'(a)') TRIM(str)
+    DO i=1,SIZE(a,1)
+       WRITE(*,'(10f8.1)') a(i,:)
+    END DO
+  END SUBROUTINE prntmat
+END PROGRAM main
diff --git a/examples/tmatrix_zpb.f90 b/examples/tmatrix_zpb.f90
new file mode 100644
index 0000000..ba7c30d
--- /dev/null
+++ b/examples/tmatrix_zpb.f90
@@ -0,0 +1,137 @@
+!>
+!> @file tmatrix_zpb.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test some routines of module matrix
+!
+  USE matrix
+  IMPLICIT NONE
+  TYPE(zpbmat) :: mata, matb
+  INTEGER, PARAMETER :: n=5, ku=3
+  DOUBLE COMPLEX :: arr(n), fulla(n,n), fullb(n,n), base
+  DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p, pb
+  INTEGER :: i, j, pow
+!
+  CALL init(ku, n, 0, mata)
+  CALL init(1, n, 0, matb)
+  CALL getvalp(mata, p)
+  CALL getvalp(matb, pb)
+  PRINT*, 'shape of A: ', SHAPE(p)
+!
+!   Test updtmat
+  p = 0.0d0
+  DO i=1,n
+     DO j=i,n
+        arr(j) = CMPLX(10*i + j, j-i)
+        IF( ABS(j-i) .LT. ku+1 ) CALL updtmat(mata, i, j, arr(j))
+     END DO
+  END DO
+  CALL prntmat('Test of UPDTMAT', p)
+!
+!   Test GETCOL
+  fulla = 0.0
+  DO j=1,n
+     CALL getcol(mata, j, fulla(:,j))
+  END DO
+  CALL prntmat('Full matrix from GETCOL', fulla)
+!
+!   Test GETROW
+  fulla = 0.0
+  DO i=1,n
+     CALL getrow(mata, i, fulla(i,:))
+  END DO
+  CALL prntmat('Full matrix from GETROW', fulla)
+!
+!   Test PUTCOL
+  p = 0.0d0
+  DO j=1,n
+     CALL putcol(mata, j, fulla(:,j))
+  END DO
+  CALL prntmat('Test of PUTCOL', p)
+!
+!   Test PUTROW
+  p = 0.0d0
+  DO i=1,n
+     CALL putrow(mata, i, fulla(i,:))
+  END DO
+  CALL prntmat('Test of PUTROW', p)
+!
+  arr = 0.0d0
+  arr(2) =1.0
+  CALL putrow(mata, 2, arr)
+  CALL prntmat('Away on i=2, j=2', p)
+!
+!   Test GETCOL
+  fulla = 0.0
+  DO j=1,n
+     CALL getcol(mata, j, fulla(:,j))
+  END DO
+  CALL prntmat('Full matrix from GETCOL', fulla)
+!
+!   Test GETROW
+  fulla = 0.0
+  DO i=1,n
+     CALL getrow(mata, i, fulla(i,:))
+  END DO
+  CALL prntmat('Full matrix from GETROW', fulla)
+!
+!  Test GETELE
+  fulla = 0.0
+  DO i=1,n
+     DO j=1,n
+        IF(ABS(j-i).LT.ku+1) CALL getele(mata,i,j,fulla(i,j))
+     END DO
+  END DO
+  CALL prntmat('Full matrix from GETELE', fulla)
+!
+!   Test of determinant
+  fullb = 0.0
+  DO i=1,n
+     fullb(i,i) = 2.0d0
+     IF(i.LT.n) fullb(i,i+1)=-1.0d0
+     IF(i.GT.1) fullb(i,i-1)=-1.0d0
+  END DO
+  DO j=1,n
+     CALL putcol(matb, j, fullb(:,j))
+  END DO
+  CALL prntmat('Mat. A (full)', fullb)
+  CALL prntmat('Mat. A (pb)', pb)
+  CALL determinant(matb, base, pow)
+  WRITE(*,'(a,2f8.5,i3)') 'Determinant(base,power) = ', base, pow
+!
+  CALL destroy(mata)
+  CALL destroy(matb)
+CONTAINS
+  SUBROUTINE prntmat(str, a)
+    DOUBLE COMPLEX, DIMENSION(:,:) :: a
+    CHARACTER(len=*) :: str
+    INTEGER :: i
+    WRITE(*,'(a)') TRIM(str)
+    DO i=1,SIZE(a,1)
+       WRITE(*,'(5(5x,"(",f5.1,",",f5.1,")"))') a(i,:)
+    END DO
+  END SUBROUTINE prntmat
+END PROGRAM main
diff --git a/examples/tp2p_mat.f90 b/examples/tp2p_mat.f90
new file mode 100644
index 0000000..d5084fb
--- /dev/null
+++ b/examples/tp2p_mat.f90
@@ -0,0 +1,108 @@
+!>
+!> @file tp2p_mat.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+  USE pardiso_bsplines
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+!
+  INTEGER :: me, npes, ierr
+  INTEGER :: next
+  INTEGER :: i, j, rank
+  DOUBLE PRECISION :: val
+  DOUBLE PRECISION, ALLOCATABLE :: arow(:)
+  TYPE(pardiso_mat) :: mat
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+  IF(npes.NE.2) THEN
+     PRINT*, 'Should run with 2 procs!'
+     CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
+  END IF
+!
+!   Define local matrix
+!
+  rank = npes
+  CALL init(npes, 0, mat)
+  DO i=1,rank  ! Fill row me+1
+     val = me+1
+     j = me+1
+     CALL updtmat(mat, i, me+1, val)
+  END DO
+!
+!  Exchange matrix 
+!
+  CALL disp_mat('Original matrix')
+  next=MODULO(me+1,2)
+!
+  IF(me.EQ.0) THEN
+     CALL p2p_mat(mat, 1, 'send', 'put', MPI_COMM_WORLD)
+  ELSE
+     CALL p2p_mat(mat, 0, 'recv', 'put', MPI_COMM_WORLD)
+  END IF
+  CALL disp_mat('Matrix after 0->1/put')
+!
+  CALL p2p_mat(mat, next, 'sendrecv', 'put', MPI_COMM_WORLD)
+  CALL disp_mat('Matrix after sendrev/put')
+!
+  CALL p2p_mat(mat, next, 'sendrecv', 'updt', MPI_COMM_WORLD)
+  CALL disp_mat('Matrix after sendrev/updt')
+!
+  IF(me.EQ.1) THEN
+     CALL p2p_mat(mat, 0, 'send', 'updt', MPI_COMM_WORLD)
+  ELSE
+     CALL p2p_mat(mat, 1, 'recv', 'updt', MPI_COMM_WORLD)
+  END IF
+  CALL disp_mat('Matrix after 1->0/updt')
+!
+  IF(me.EQ.1) THEN
+     CALL p2p_mat(mat, 0, 'send', 'put', MPI_COMM_WORLD)
+  ELSE
+     CALL p2p_mat(mat, 1, 'recv', 'put', MPI_COMM_WORLD)
+  END IF
+  CALL disp_mat('Matrix after 1->0/put')
+!
+  CALL mpi_finalize(ierr)
+CONTAINS
+  SUBROUTINE disp_mat(str)
+    CHARACTER(len=*), INTENT(in) :: str
+    INTEGER :: p
+    DO p=0,npes-1
+       IF(me.EQ.p) THEN
+          WRITE(*,'(a,i3.3)') str//' on PE', me
+          CALL to_mat(mat, nlkeep=.TRUE.)
+          ALLOCATE(arow(mat%rank))
+          DO i=1,mat%rank
+             CALL getrow(mat, i, arow)
+             WRITE(*,'(10f8.2)') arow
+          END DO
+          DEALLOCATE(arow)
+          CALL FLUSH(6)
+       END IF
+       CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+    END DO
+  END SUBROUTINE disp_mat
+END PROGRAM main
diff --git a/examples/tpsum_mat.f90 b/examples/tpsum_mat.f90
new file mode 100644
index 0000000..5f01441
--- /dev/null
+++ b/examples/tpsum_mat.f90
@@ -0,0 +1,77 @@
+!>
+!> @file tpsum_mat.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!!$  USE pardiso_bsplines
+!!$  USE wsmp_bsplines
+  USE mumps_bsplines
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+!
+  INTEGER :: me, npes, ierr
+  INTEGER :: i, j, rank
+  DOUBLE PRECISION :: val
+  DOUBLE PRECISION, ALLOCATABLE :: arow(:)
+!!$  TYPE(pardiso_mat) :: mat
+!!$  TYPE(wsmp_mat) :: mat
+  TYPE(mumps_mat) :: mat
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+  rank = npes
+  CALL init(npes, 1, mat)
+  DO i=1,rank  ! Fill row me+1
+     val = me+1
+     j = me+1
+     CALL updtmat(mat, i, me+1, val)
+  END DO
+!
+!!$  CALL disp_mat('Original matrix')
+  CALL psum_mat(mat, MPI_COMM_WORLD)
+  CALL disp_mat('Global sum of matrix')
+!
+  CALL mpi_finalize(ierr)
+CONTAINS
+  SUBROUTINE disp_mat(str)
+    CHARACTER(len=*), INTENT(in) :: str
+    INTEGER :: p
+    DO p=0,npes-1
+       IF(me.EQ.p) THEN
+          CALL to_mat(mat, nlkeep=.TRUE.)
+          WRITE(*,'(a,i3.3,a,2i6)') str//' on PE', me, ': rank, nnz', mat%rank, mat%nnz
+          ALLOCATE(arow(mat%rank))
+          DO i=1,mat%rank
+             CALL getrow(mat, i, arow)
+             WRITE(*,'(10f8.2)') arow
+          END DO
+          DEALLOCATE(arow)
+          CALL FLUSH(6)
+       END IF
+       CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+    END DO
+  END SUBROUTINE disp_mat
+END PROGRAM main
diff --git a/examples/tsparse1.f90 b/examples/tsparse1.f90
new file mode 100644
index 0000000..c22a07f
--- /dev/null
+++ b/examples/tsparse1.f90
@@ -0,0 +1,117 @@
+!>
+!> @file tsparse1.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! MAIN !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+PROGRAM main
+  USE sparse
+  IMPLICIT none
+!  
+  TYPE(zspmat) :: amat
+  TYPE(zsprow) :: arow
+!
+  LOGICAL          :: found
+  INTEGER          :: n=10
+  INTEGER          :: jcol, nnz, i
+  DOUBLE PRECISION :: valr, vali
+  DOUBLE COMPLEX :: val, zero=(0.,0.)
+  DOUBLE COMPLEX, ALLOCATABLE :: arr(:), farr(:)
+  INTEGER, ALLOCATABLE :: col(:), newcol(:)
+!
+!   Initialize the sparse matrix amat
+!
+  CALL init(n, amat)
+!
+!   Use UPDT_SP to update a sparse row
+!
+  WRITE(*,*) 'Enter a list of positive indices, terminate with a zero'
+  DO
+     READ(*,*) jcol
+     IF(jcol .LE. 0) EXIT
+     CALL RANDOM_NUMBER(valr)
+     vali = jcol
+     val = CMPLX(valr, vali)
+     CALL updtmat(arow, jcol, val)
+  END DO
+!
+!   Convert a sparse row to a sequential row
+!
+  nnz = arow%nnz
+  WRITE(*,'(a,i5)') 'nnz =', nnz
+!
+  ALLOCATE(arr(nnz), col(nnz), newcol(nnz))
+  CALL getrow(arow, arr, col)
+  WRITE(*, '(a/(10i8))') 'col', col
+  WRITE(*, '(a/(10f8.4))') 'arr', arr
+!
+  ALLOCATE(farr(MAXVAL(col)))
+  CALL getrow(arow, farr)
+  WRITE(*, '(/a/(10f8.4))') 'farr', farr
+!
+!   Clear element by element of row
+!
+  DO i=1,nnz
+     CALL putele(arow, col(i), zero)
+     CALL getrow(arow, arr, newcol)
+     WRITE(*, '(/a,i6/(10i8))') 'col', arow%nnz, newcol(1:arow%nnz)
+  END DO
+!
+!   Re-create row using PUTROW and full row
+!
+  CALL putrow(arow, farr)
+  CALL getrow(arow, arr, col)
+  WRITE(*,'(/a,i5)') 'nnz =', arow%nnz
+  WRITE(*, '(a/(10i8))') 'col', col
+  WRITE(*, '(a/(10f8.4))') 'arr', arr
+!
+!   Clear row using DESTROY
+!
+  CALL destroy(arow)
+  CALL getrow(arow, arr, newcol)
+  nnz = arow%nnz
+  WRITE(*, '(/a,i6/(10i8))') 'col', nnz, newcol(1:nnz)
+!
+!   Re-create row using PUTROW and sparse row
+!
+  CALL putrow(arow, arr, col)
+  CALL getrow(arow, arr, newcol, nnz)
+  WRITE(*, '(/a,i6/(10i8))') 'col', nnz, newcol(1:nnz)
+  WRITE(*, '(a/(10f8.4))') 'arr', arr
+!
+!   Test GETELE
+!
+  i=111;val=0
+  CALL getele(arow, i, val, found)
+  WRITE(*,'(/i8,2f8.4,l3)') i, val, found
+  DO i=1,nnz
+     CALL getele(arow, col(i), val, found)
+     WRITE(*,'(i8,2f8.4,l3)') col(i), val, found
+  END DO
+!
+!   Test destroy_spmat
+!
+  CALL destroy(amat)
+END PROGRAM main
diff --git a/examples/tsparse2.f90 b/examples/tsparse2.f90
new file mode 100644
index 0000000..23d3fcf
--- /dev/null
+++ b/examples/tsparse2.f90
@@ -0,0 +1,151 @@
+!>
+!> @file tsparse2.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Simple 2D Poisson using 5 points FD
+!
+  USE pardiso_bsplines
+  IMPLICIT NONE
+!
+  TYPE(pardiso_mat) :: amat, bmat
+  DOUBLE PRECISION, ALLOCATABLE :: arow(:), rhs(:), sol(:)
+  DOUBLE PRECISION, ALLOCATABLE :: arown(:,:), rhsn(:,:), soln(:,:)
+  INTEGER :: nx=5, ny=4, n
+  INTEGER :: i, j, irow, jcol
+!
+  DOUBLE PRECISION   :: mem, seconds
+!
+  n = nx*ny
+  CALL init(n, 1, amat)                ! Non-symmetric matrix
+  CALL init(n, 1, bmat, nlsym=.TRUE.)  ! Symmetric matrix
+  ALLOCATE(arow(n), arown(n,2))
+  ALLOCATE(rhs(n), rhsn(n,2))
+  ALLOCATE(sol(n), soln(n,2))
+!
+!  Construct the FD matrix amat, using sparse rows (linked lists)
+!
+  DO j=1,ny
+     DO i=1,nx
+        arow = 0.0d0
+        irow = numb(i,j)
+        arow(irow) = 4.0d0
+        IF(i.GT.1)  arow(numb(i-1,j)) = -1.0d0
+        IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0
+        IF(j.GT.1)  arow(numb(i,j-1)) = -1.0d0
+        IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0
+        rhs(irow) = SUM(arow)
+        CALL putrow(amat, irow, arow) ! General matrix
+        CALL putrow(bmat, irow, arow) ! Symmetric matrix
+     END DO
+  END DO
+!
+!   Print the matrices
+!
+  WRITE(*,'(/a)') 'Matrix A'
+  DO i=1,n
+     CALL getrow(amat, i, arow)
+     WRITE(*,'(30f4.0)') arow
+  END DO
+  PRINT*, 'nnz from get_count', get_count(amat)
+!
+  WRITE(*,'(/a)') 'Matrix B'
+  DO i=1,n
+     CALL getrow(bmat, i, arow)
+     WRITE(*,'(30f4.0)') arow
+  END DO
+  PRINT*, 'nnz from get_count', get_count(bmat)
+!
+!   Factor the matrix using Pardiso
+!
+  CALL factor(amat, nlmetis=.TRUE.)
+  WRITE(*,'(/a,i5)') 'Number of nonzeros in factors of A  = ',amat%p%iparm(18)
+  WRITE(*,'(a,i5)')  'Number of factorization MFLOPS      = ',amat%p%iparm(19)
+!
+  CALL factor(bmat, nlmetis=.TRUE.)
+  WRITE(*,'(/a,i5)') 'Number of nonzeros in factors of B  = ',bmat%p%iparm(18)
+  WRITE(*,'(a,i5)')  'Number of factorization MFLOPS      = ',bmat%p%iparm(19)
+!
+  WRITE(*,'(/a/(10f8.4))') 'rhs', rhs
+!
+!   Backsolve Ax = b, using Pardiso
+!
+  sol = rhs
+  CALL bsolve(amat, sol, debug=.FALSE.)
+  WRITE(*,'(/a/(10f8.4))') 'sol (non-sym)', sol
+  WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
+!
+!   Backsolve Bx = b, using Pardiso
+!
+  sol = rhs
+  CALL bsolve(bmat, sol, debug=.FALSE.)
+  WRITE(*,'(/a/(10f8.4))') 'sol (sym)', sol
+  WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
+!
+  arow = vmx(amat, sol)
+  WRITE(*,'(/a/(10f8.4))') 'A*x (non-sym)', arow
+  WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arow-rhs))
+!
+  arow = vmx(bmat, sol)
+  WRITE(*,'(/a/(10f8.4))') 'B*x (sym)', arow
+  WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arow-rhs))
+!
+!   Multiple RHS
+!
+  rhsn(:,1) = -rhs(:)
+  rhsn(:,2) = 2.d0*rhs(:)
+  CALL bsolve(amat, rhsn, soln)
+  WRITE(*,'(/a/(10f8.4))') 'soln (non-sym)', soln
+  WRITE(*,'(a,2(1pe12.3))') 'Error', MAXVAL(ABS(soln(:,1)+1.0d0)), &
+       &                  MAXVAL(ABS(soln(:,2)-2.0d0))
+!
+  CALL bsolve(bmat, rhsn, soln)
+  WRITE(*,'(/a/(10f8.4))') 'soln (sym)', soln
+  WRITE(*,'(a,2(1pe12.3))') 'Error', MAXVAL(ABS(soln(:,1)+1.0d0)), &
+       &                  MAXVAL(ABS(soln(:,2)-2.0d0))
+!
+  arown = vmx(amat, soln)
+  WRITE(*,'(/a/(10f8.4))') 'A*x (non-sym)', arown
+  WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arown-rhsn))
+!
+  arown = vmx(bmat, soln)
+  WRITE(*,'(/a/(10f8.4))') 'A*x (sym)', arown
+  WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(arown-rhsn))
+!
+!   Clean up
+!
+  DEALLOCATE(arow,arown)
+  DEALLOCATE(rhs,rhsn)
+  DEALLOCATE(sol,soln)
+  CALL destroy(amat)
+  CALL destroy(bmat)
+CONTAINS
+  INTEGER FUNCTION numb(i,j)
+    INTEGER, INTENT(in) :: i, j
+    numb = (j-1)*nx + i
+  END FUNCTION numb
+!
+END PROGRAM main
+
diff --git a/examples/zpardiso_ex1.f b/examples/zpardiso_ex1.f
new file mode 100644
index 0000000..7979d88
--- /dev/null
+++ b/examples/zpardiso_ex1.f
@@ -0,0 +1,96 @@
+!>
+!> @file zpardiso_ex1.f
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+      PROGRAM main
+      USE wsmp_bsplines
+      USE pardiso_bsplines
+      IMPLICIT NONE
+c
+      INTEGER :: n=9
+      INTEGER ia(10)
+      INTEGER ja(29)
+      COMPLEX*16 avals(29)
+      COMPLEX*16 b(9), sol(9), arow(9)
+c
+c$$$      type(zwsmp_mat) :: mat
+      type(zpardiso_mat) :: mat
+      integer :: i, k
+c
+      DATA ia /1,5,9,13,17,21,25,27,29,30/
+      data ja 
+     1        /1,          3,                      7,    8,
+     2	             2,    3,                            8,    9,
+     3                     3,                      7,    8,    9,
+     4                           4,          6,    7,    8,
+     5                                 5,    6,          8,    9,
+     6                                       6,    7,    8,    9,
+     7                                             7,    8,
+     8                                                   8,    9,
+     9                                                         9/
+      data avals 
+     1 /(14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
+     2 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
+     3 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
+     4 (14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
+     5 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
+     6 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
+     7 (16.d0,0.d0), (-4.d0,-2.d-1),
+     8 (71.d0,0.d0), (-4.d0,-2.d-1),
+     9 (16.d0,0.d0)/ 
+c
+c$$$      call init(n, 1, mat, nlherm=.false., nlsym=.true., nlpos=.true.)
+      call init(n, 1, mat, nlherm=.true., nlpos=.true.)
+      do i=1,n
+         do k=ia(i),ia(i+1)-1
+            call putele(mat, i, ja(k), avals(k))
+         end do
+      end do
+c
+      call factor(mat)
+c
+      print*, 'diff of val', cnorm2(avals-mat%val)
+      print*, 'diff of ia', ia-mat%irow
+      print*,' diff ja', ja-mat%cols
+c
+      print*, 'The RHS:'
+      do i = 1, n
+         call getrow(mat,i, arow)
+         b(i) = sum(arow)
+         print *,i,' : ',b(i)
+      end do
+      call bsolve(mat,b,sol)
+      print *,'The solution of the system is as follows:'
+      do i = 1, n
+         print *,i,' : ',sol(i)
+      end do
+      print*, 'Residue =', cnorm2(vmx(mat,sol)-b)
+      contains
+      FUNCTION cnorm2(x)
+      DOUBLE COMPLEX, INTENT(in) :: x(:)
+      DOUBLE PRECISION :: cnorm2
+      cnorm2 = SQRT(DOT_PRODUCT(x,x))    
+      END FUNCTION cnorm2
+c
+      END PROGRAM main
diff --git a/examples/zssmp_ex1.f b/examples/zssmp_ex1.f
new file mode 100644
index 0000000..cae1c08
--- /dev/null
+++ b/examples/zssmp_ex1.f
@@ -0,0 +1,111 @@
+!>
+!> @file zssmp_ex1.f
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+      PROGRAM main
+      USE wsmp_bsplines
+      USE pardiso_bsplines
+      IMPLICIT NONE
+c
+      INTEGER :: n=9
+      INTEGER ia(10)
+      INTEGER ja(29)
+      COMPLEX*16 avals(29)
+      COMPLEX*16 b(9), sol(9), arow(9)
+c
+      type(zwsmp_mat) :: mat
+      integer :: i, k
+c
+      DATA ia /1,5,9,13,17,21,25,27,29,30/
+      data ja 
+     1        /1,          3,                      7,    8,
+     2	             2,    3,                            8,    9,
+     3                     3,                      7,    8,    9,
+     4                           4,          6,    7,    8,
+     5                                 5,    6,          8,    9,
+     6                                       6,    7,    8,    9,
+     7                                             7,    8,
+     8                                                   8,    9,
+     9                                                         9/
+      data avals 
+     1 /(14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
+     2 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
+     3 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
+     4 (14.d0,0.d0), (-1.d0,-5.d-2), (-1.d0,-5.0d-2), (-3.d0,-1.5d-1),
+     5 (14.d0,0.d0), (-1.d0,-5.d-2), (-3.d0,-1.5d-1), (-1.d0,-5.0d-2),
+     6 (16.d0,0.d0), (-2.d0,-1.d-1), (-4.d0,-2.0d-1), (-2.d0,-1.0d-1),
+     7 (16.d0,0.d0), (-4.d0,-2.d-1),
+     8 (71.d0,0.d0), (-4.d0,-2.d-1),
+     9 (16.d0,0.d0)/ 
+c
+      call init(n, 1, mat, nlherm=.true., nlpos=.true.)
+      do i=1,n
+         do k=ia(i),ia(i+1)-1
+            call putele(mat, i, ja(k), avals(k))
+         end do
+      end do
+c
+      print*, 'The RHS before tomat:'
+      do i = 1, n
+         call getrow(mat,i, arow)
+         b(i) = sum(arow)
+         print *,i,' : ',b(i)
+      end do
+c
+      call factor(mat)
+c
+      write(*,'(a/(20f6.2))') 'avals', avals
+      write(*,'(a/(20f6.2))') 'mat%val', mat%val
+      print*, 'diff of val', cnorm2(avals-mat%val)
+      print*, 'diff of ia', ia-mat%irow
+      print*,' diff ja', ja-mat%cols
+c
+      print*, 'Check getrow'
+      do i = 1, n
+         call getrow(mat,i, arow)
+         write(*,'(i3,": ",(20f6.2))') i, arow(i:n)
+      end do
+c
+      print*, 'The RHS:'
+      do i = 1, n
+         call getrow(mat,i, arow)
+         b(i) = sum(arow)
+         print *,i,' : ',b(i)
+      end do
+      call bsolve(mat,b,sol)
+      print *,'Norm of Residual = ',mat%p%dparm(7)
+      print *,'The solution of the system is as follows:'
+      do i = 1, n
+         print *,i,' : ',sol(i)
+      end do
+      print*, 'Residue =', cnorm2(vmx(mat,sol)-b)
+c
+      contains
+      FUNCTION cnorm2(x)
+      DOUBLE COMPLEX, INTENT(in) :: x(:)
+      DOUBLE PRECISION :: cnorm2
+      cnorm2 = SQRT(DOT_PRODUCT(x,x))    
+      END FUNCTION cnorm2
+c
+      END PROGRAM main
diff --git a/fft/CMakeLists.txt b/fft/CMakeLists.txt
new file mode 100644
index 0000000..ca0d58b
--- /dev/null
+++ b/fft/CMakeLists.txt
@@ -0,0 +1,69 @@
+/**
+ * @file CMakeLists.txt
+ *
+ * @brief Principal CMake configuration file for the fft library
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+project(fft)
+
+find_package(FFTW REQUIRED)
+set(fft_w $ENV{fft_w})
+
+set(SRCS
+  fft_fftw.F90
+)
+
+set(EXAMPLES tfft.f90)
+
+add_library(fft STATIC ${SRCS})
+
+target_include_directories(fft
+  PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+          ${FFTW_INCLUDES}
+  INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+            $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+            ${FFTW_INCLUDES}
+  )
+
+if (${fft_w} MATCHES "fft_w2")
+  target_compile_options(fft PRIVATE "-Dfft_w2")
+else()
+  target_compile_options(fft PRIVATE "-Dfft_w3")
+endif()
+
+target_link_libraries(fft PUBLIC ${FFTW_LIBRARY} ${MPI_Fortran_LIBRARIES})
+#
+set_property(TARGET fft
+  PROPERTY PUBLIC_HEADER ${CMAKE_CURRENT_BINARY_DIR}/modules/fft.mod)
+
+add_executable(tfft tfft.f90)
+target_link_libraries(tfft fft ${MPI_Fortran_LIBRARIES})
+
+#add_test(tfft ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1
+#  ${CMAKE_CURRENT_BINARY_DIR}/tfft < ${fft_SOURCE_DIR}/in)
+
+install(TARGETS fft
+  EXPORT ${BSPLINES_EXPORT_TARGETS}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
diff --git a/fft/Makefile b/fft/Makefile
new file mode 100644
index 0000000..bb7a818
--- /dev/null
+++ b/fft/Makefile
@@ -0,0 +1,68 @@
+#
+# @file Makefile
+#
+# @brief Makefile for the fft library
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+FFTW=$(FFTW_HOME)
+F90 = ifort
+LD = ifort
+
+debug = -g -traceback -CB
+optim = -O3
+
+OPT=$(debug)
+#OPT=$(optim)
+
+F90FLAGS = $(OPT) -Dfft_w3 -I$(FFTW)/include
+LDFLAGS = $(OPT) -L. -L$(FFTW)/lib
+
+LIBS = -lfft -lfftw3
+
+.SUFFIXES:
+.SUFFIXES: .o .c .F90 .f90 .f
+
+.f90.o:
+	$(F90) $(F90FLAGS) -c $<
+.F90.o:
+	$(F90) $(F90FLAGS) -c $<
+.f.o:
+	$(F90) $(F90FLAGS) -c $<
+
+all:	tfft
+
+lib:    libfft.a
+
+libfft.a: fft_fftw.o
+	xiar r $@ $?
+	ranlib $@
+
+tfft:	tfft.o
+	$(LD) $(LDFLAGS) -o $@ tfft.o fft_fftw.o $(LIBS)
+
+tfft.o:	lib
+
+clean:
+	rm -f *.o *.mod *~ a.out
+
+distclean: clean
+	rm -f tfft
diff --git a/fft/fft_fftw.F90 b/fft/fft_fftw.F90
new file mode 100644
index 0000000..ff9e7ca
--- /dev/null
+++ b/fft/fft_fftw.F90
@@ -0,0 +1,1376 @@
+!>
+!> @file fft_fftw.F90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+#if defined(fft_w2)
+MODULE fft
+!
+  IMPLICIT NONE
+!
+  PRIVATE
+!
+  PUBLIC four1, fourcol, fourrow
+!
+! Global parameters
+!
+  INTEGER, PARAMETER :: MXPLAN=8
+!
+! Global variables
+!
+  INTEGER ::n1d_saved=0
+  INTEGER*8, DIMENSION(MXPLAN)             :: plan1d
+  INTEGER,   DIMENSION(MXPLAN)             :: n1d
+  REAL,      DIMENSION(:),     ALLOCATABLE :: scr1_real
+  DOUBLE COMPLEX,   DIMENSION(:),     ALLOCATABLE :: scr1
+
+  INTERFACE fourcol
+     MODULE PROCEDURE four1, fourcol_ra, fourcol_raa
+  END INTERFACE
+
+CONTAINS
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE four1(arr, isign)
+!
+!  A single 1D complex FFT
+!
+    INCLUDE 'fftw_f77.h'
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: arr
+    INTEGER,               INTENT(IN)    :: isign
+!
+! Local variables
+!
+    INTEGER :: n, id
+!
+    n = SIZE(arr)
+    IF( .NOT. ALLOCATED(scr1) ) THEN
+       ALLOCATE(scr1(n))
+    ELSE
+       IF ( SIZE(scr1) < n ) THEN
+          DEALLOCATE(scr1)
+          ALLOCATE(scr1(n))
+       END IF
+    END IF
+!
+    CALL getplan(n, isign, id, 1)
+    CALL fftw_f77_one(plan1d(id), arr, scr1)
+  END SUBROUTINE four1
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourcol_ra(arr, isign)
+!
+!  1D complex FFT of columns of arr(1:N,1:howmany)
+!
+     INCLUDE 'fftw_f77.h'
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
+    INTEGER,                 INTENT(IN)    :: isign
+!
+! Local variables
+!
+    INTEGER :: n, howmany, id
+!
+    n = SIZE(arr,1)
+    howmany = SIZE(arr,2)
+!
+    IF( .NOT. ALLOCATED(scr1) ) THEN
+       ALLOCATE(scr1(n))
+    ELSE
+       IF ( SIZE(scr1) < n ) THEN
+          DEALLOCATE(scr1)
+          ALLOCATE(scr1(n))
+       END IF
+    END IF
+!
+    CALL getplan(n, isign, id,1)
+    CALL fftw_f77(plan1d(id), howmany, arr, 1, n, scr1, 1, n)
+  END SUBROUTINE fourcol_ra
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourcol_raa(arr, isign)
+!
+!  1D complex FFT of columns of arr(1:N,1:howmany)
+!
+     INCLUDE 'fftw_f77.h'
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
+    INTEGER,                   INTENT(IN)    :: isign
+!
+! Local variables
+!
+    INTEGER :: n, howmany, id
+!
+    n = SIZE(arr,1)
+    howmany = SIZE(arr,2)*SIZE(arr,3)
+!
+    IF( .NOT. ALLOCATED(scr1) ) THEN
+       ALLOCATE(scr1(n))
+    ELSE
+       IF ( SIZE(scr1) < n ) THEN
+          DEALLOCATE(scr1)
+          ALLOCATE(scr1(n))
+       END IF
+    END IF
+!
+    CALL getplan(n, isign, id, 1)
+    CALL fftw_f77(plan1d(id), howmany, arr, 1, n, scr1, 1, n)
+  END SUBROUTINE fourcol_raa
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourrow(arr, isign)
+!
+!  1D complex FFT of rows of arr(1:howmany,1:N)
+!
+    INCLUDE 'fftw_f77.h'
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
+    INTEGER,                 INTENT(IN)    :: isign
+!
+! Local variables
+!
+    INTEGER :: n, howmany, id
+!
+    n = SIZE(arr,2)
+    howmany = SIZE(arr,1)
+!
+    IF( .NOT. ALLOCATED(scr1) ) THEN
+       ALLOCATE(scr1(n))
+    ELSE
+       IF ( SIZE(scr1) < n ) THEN
+          DEALLOCATE(scr1)
+          ALLOCATE(scr1(n))
+       END IF
+    END IF
+!
+    CALL getplan(n, isign, id, 1)
+    CALL fftw_f77(plan1d(id), howmany, arr, howmany, 1, &
+         & scr1, howmany, 1)
+  END SUBROUTINE fourrow
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE getplan(n, sign, id, complex_fftw)
+!
+!  Create or get an already created FFT plan (depends only on N.)
+!
+    INCLUDE 'fftw_f77.h'
+!
+! Dummy arguments
+!
+    INTEGER, INTENT(IN)  :: n            ! size of transform
+    INTEGER, INTENT(IN)  :: sign         ! dir. of transform -1=>FORWARD, +1=>BACKWARD
+    INTEGER, INTENT(OUT) :: id           ! id of FFT plan
+    INTEGER, INTENT(IN)  :: complex_fftw ! Create complex<->complex transform if =1
+!
+! Local variables
+!
+    INTEGER :: k, i, dir
+!
+    k = sign*(2*n+complex_fftw)
+    DO i = 1,n1d_saved
+       IF( k == n1d(i)) THEN
+          id = i
+          RETURN
+       END IF
+    END DO
+    IF( n1d_saved == MXPLAN) THEN
+       PRINT*, 'Module fft: MXPLAN too small! Increase it and recompile'
+       STOP
+    END IF
+    n1d_saved = n1d_saved+1
+    n1d(n1d_saved) = k
+    id = n1d_saved
+    dir = FFTW_FORWARD
+    IF( sign == +1 ) dir = FFTW_BACKWARD
+    IF (complex_fftw == 1) THEN
+       CALL fftw_f77_create_plan(plan1d(id), n, dir, FFTW_ESTIMATE + FFTW_IN_PLACE)
+!!$    ELSE
+!!$       CALL rfftw_f77_create_plan(plan1d(id), n, dir, FFTW_ESTIMATE + FFTW_IN_PLACE)
+    END IF
+  END SUBROUTINE getplan
+END MODULE fft
+#endif
+!
+#if defined(fft_w3)
+MODULE fft
+!
+  IMPLICIT NONE
+!
+  PRIVATE
+  PUBLIC :: four1, fourcol, fourrow
+!
+  INCLUDE 'fftw3.f'
+!
+  TYPE int_para
+     INTEGER, DIMENSION(2) :: par ! size of transform
+  END TYPE int_para
+!
+! Global parameters
+!
+  INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of plans.
+!
+! Global variables
+!
+  INTEGER*8,      DIMENSION(MXPLAN,4), SAVE :: plan1d      ! plans for 1-dim FFT
+  TYPE(int_para), DIMENSION(MXPLAN,4), SAVE :: n1d_par
+  INTEGER,        DIMENSION(4),        SAVE :: n1d_saved=0 ! number of plans saved
+!
+  INTERFACE fourcol
+     MODULE PROCEDURE four1, fourcol_ra, fourcol_raa
+  END INTERFACE
+!
+CONTAINS
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE four1(vec, isign)
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec
+    INTEGER,               INTENT(IN)    :: isign
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=1
+!
+! Local variables
+!
+    INTEGER :: k
+    INTEGER :: dim1, i, id, istat, n
+    DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: vec_tmp
+!
+!
+    n = SIZE(vec)
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*n
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+!
+! test if the maximal number of plans is alredy reached.
+!
+       IF (n1d_saved(NUM) == MXPLAN) THEN
+           WRITE(*,*) 'FOUR1: MXPLAN too small! Increase it and recompile'
+          STOP
+       END IF
+!
+       dim1 = SIZE(vec)
+       ALLOCATE(vec_tmp(dim1), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOUR1: Allocation of  vec_tmp  failed!'
+          STOP
+       END IF
+!
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       id = n1d_saved(NUM)
+!
+       SELECT CASE (isign)
+       CASE (-1)
+          CALL dfftw_plan_dft_1d(plan1d(id,NUM), n, vec_tmp(1), vec_tmp(1), &
+               FFTW_FORWARD, FFTW_ESTIMATE)
+       CASE (1)
+          CALL dfftw_plan_dft_1d(plan1d(id,NUM), n, vec_tmp(1), vec_tmp(1), &
+               FFTW_BACKWARD, FFTW_ESTIMATE)
+       END SELECT
+!
+       DEALLOCATE(vec_tmp, stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOUR1: Dellocation of  vec_tmp  failed!'
+          STOP
+       END IF
+!
+    END SELECT
+!
+    CALL dfftw_execute_dft(plan1d(id,NUM), vec(1), vec(1))
+!
+  END SUBROUTINE four1
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourcol_ra(arr, isign)
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
+    INTEGER,                 INTENT(IN)    :: isign
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=2, RANK=1
+!
+! Local variables
+!
+    INTEGER :: k
+    INTEGER :: dim1, dim2, howmany, i, id, istat, n
+    INTEGER, DIMENSION(RANK)             :: n_arr, nembed
+    DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: arr_tmp
+!
+    dim1 = SIZE(arr,1)
+    dim2 = SIZE(arr,2)
+    howmany = SIZE(arr,2)
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*dim1
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1) .AND. &
+            howmany == n1d_par(i,NUM)%par(2)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+!
+! test if the maximal number of plans is alredy reached.
+!
+       IF (n1d_saved(NUM) == MXPLAN) THEN
+          WRITE(*,*) 'FOURCOL_RA: MXPLAN too small! Increase it and recompile'
+          STOP
+       END IF
+!
+       ALLOCATE(arr_tmp(dim1, dim2), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RA: Allocation of  arr_tmp  failed!'
+          STOP
+       END IF
+!
+       nembed(1) = SIZE(arr)
+       n_arr(1)  = dim1
+       n         = n_arr(1)
+!
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
+       id = n1d_saved(NUM)
+!
+       SELECT CASE (isign)
+       CASE (-1)
+          CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
+               arr_tmp(1,1), nembed, 1, n, arr_tmp(1,1), nembed, 1, n, &
+               FFTW_FORWARD, FFTW_ESTIMATE)
+       CASE (1)
+          CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
+               arr_tmp(1,1), nembed, 1, n, arr_tmp(1,1), nembed, 1, n, &
+               FFTW_BACKWARD, FFTW_ESTIMATE)
+       END SELECT
+!
+       DEALLOCATE(arr_tmp, stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RA: Dellocation of  arr_tmp  failed!'
+          STOP
+       END IF
+!
+    END SELECT
+!
+    CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1), arr(1,1))
+!
+  END SUBROUTINE fourcol_ra
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourcol_raa(arr, isign)
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
+    INTEGER,                   INTENT(IN)    :: isign
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=3, RANK=1
+!
+! Local variables
+!
+    INTEGER :: k
+    INTEGER :: dim1, dim2, dim3, howmany, i, id, istat, n
+    INTEGER, DIMENSION(RANK)               :: n_arr, nembed
+    DOUBLE COMPLEX, DIMENSION(:,:,:), ALLOCATABLE :: arr_tmp
+!
+    dim1 = SIZE(arr,1)
+    dim2 = SIZE(arr,2)
+    dim3 = SIZE(arr,3)
+    howmany = dim2*dim3
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*dim1
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1) .AND. &
+            howmany == n1d_par(i,NUM)%par(2)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+!
+! test if the maximal number of plans is alredy reached.
+!
+       IF (n1d_saved(NUM) == MXPLAN) THEN
+          WRITE(*,*) 'FOURCOL_RAA: MXPLAN too small! Increase it and recompile'
+          STOP
+       END IF
+!
+       ALLOCATE(arr_tmp(dim1, dim2, dim3), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RAA: Allocation of  arr_tmp  failed!'
+          STOP
+       END IF
+!
+       nembed(1) = SIZE(arr)
+       n_arr(1)  = dim1
+       n         = n_arr(1)
+!
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
+       id = n1d_saved(NUM)
+!
+       SELECT CASE (isign)
+       CASE (-1)
+          CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
+               arr_tmp(1,1,1), nembed, 1, n, arr_tmp(1,1,1), nembed, 1, n, &
+               FFTW_FORWARD, FFTW_ESTIMATE)
+       CASE (1)
+          CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
+               arr_tmp(1,1,1), nembed, 1, n, arr_tmp(1,1,1), nembed, 1, n, &
+               FFTW_BACKWARD, FFTW_ESTIMATE)
+       END SELECT
+!
+       DEALLOCATE(arr_tmp, stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RAA: Dellocation of  arr_tmp  failed!'
+          STOP
+       END IF
+!
+    END SELECT
+!
+    CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1,1), arr(1,1,1))
+!
+  END SUBROUTINE fourcol_raa
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourrow(arr, isign)
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
+    INTEGER,                 INTENT(IN)    :: isign
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=4, RANK=1
+!
+! Local variables
+!
+    INTEGER :: k
+    INTEGER :: dim1, dim2, howmany, i, id, istat, n
+    INTEGER, DIMENSION(RANK)             :: n_arr, nembed
+    DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: arr_tmp
+!
+    dim1 = SIZE(arr,1)
+    dim2 = SIZE(arr,2)
+    howmany = dim1
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*dim2
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1) .AND. &
+            howmany == n1d_par(i,NUM)%par(2)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+!
+! test if the maximal number of plans is alredy reached.
+!
+       IF (n1d_saved(NUM) == MXPLAN) THEN
+          WRITE(*,*) 'FOURROW: MXPLAN too small! Increase it and recompile'
+          STOP
+       END IF
+!
+       ALLOCATE(arr_tmp(dim1, dim2), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURROW: Allocation of  arr_tmp  failed!'
+          STOP
+       END IF
+!
+       nembed(1) = SIZE(arr)
+       n_arr(1)  = SIZE(arr,2)
+       n         = n_arr(1)
+       howmany   = SIZE(arr,1)
+!
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
+       id = n1d_saved(NUM)
+!
+       SELECT CASE (isign)
+       CASE (-1)
+          CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
+               arr_tmp(1,1), nembed, howmany, 1, arr_tmp(1,1), nembed, howmany, 1, &
+               FFTW_FORWARD, FFTW_ESTIMATE)
+       CASE (1)
+          CALL dfftw_plan_many_dft(plan1d(id,NUM), RANK, n_arr, howmany, &
+               arr_tmp(1,1), nembed, howmany, 1, arr_tmp(1,1), nembed, howmany, 1, &
+               FFTW_BACKWARD, FFTW_ESTIMATE)
+       END SELECT
+!
+       DEALLOCATE(arr_tmp, stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOUR1: Dellocation of  arr_tmp  failed!'
+          STOP
+       END IF
+!
+    END SELECT
+!
+    CALL dfftw_execute_dft(plan1d(id,NUM), arr(1,1), arr(1,1))
+!
+  END SUBROUTINE fourrow
+END MODULE fft
+#endif
+!
+#if defined(fft_essl)
+MODULE fft
+!
+  IMPLICIT NONE
+!
+  PRIVATE
+!
+  PUBLIC :: four1, fourcol, fourrow
+!
+  TYPE pointer_ra
+     REAL, DIMENSION(:), POINTER :: poi_ra
+  END TYPE pointer_ra
+!
+  TYPE int_para
+     INTEGER, DIMENSION(2) :: par
+  END TYPE int_para
+!
+! Global parameters
+!
+  INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of work arrays.
+!
+! Global variables
+!
+  EXTERNAL :: ENOTRM
+  LOGICAL,                                            SAVE :: initflag=.TRUE.    ! initialization of the module
+  CHARACTER (len=8),                                  SAVE :: S2015              ! string to copy error list entry
+  REAL,             DIMENSION(8)                           :: aux1               ! auxilary array
+  REAL,             DIMENSION(1)                           :: aux2               ! auxilary array
+  TYPE(pointer_ra), DIMENSION(:,:),      ALLOCATABLE, SAVE :: aux1_poi, aux2_poi ! work arrays for the ESSL routine
+  TYPE(int_para),   DIMENSION(MXPLAN,4),              SAVE :: n1d_par            ! size of transform
+  INTEGER,          DIMENSION(4),                     SAVE :: n1d_saved=0        ! number of plans saved
+!
+  INTERFACE fourcol
+     MODULE PROCEDURE fourcol_ra, fourcol_raa
+  END INTERFACE
+!
+CONTAINS
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE four1(vec, isign)
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec
+    INTEGER,               INTENT(IN)    :: isign
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=1
+!
+! Local variables
+!
+    INTEGER :: i, id, istat, k, n, naux1, naux2
+!
+    IF (initflag) THEN
+       initflag = .FALSE.
+!
+       CALL EINFO(0)
+       CALL ERRSAV(2015,S2015)
+!
+       ALLOCATE(aux1_poi(MXPLAN,4), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOUR1: 1. Allocation of  aux1_poi  failed!'
+          STOP
+       END IF
+!
+       ALLOCATE(aux2_poi(MXPLAN,4), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOUR1: 1. Allocation of  aux2_poi  failed!'
+          STOP
+       END IF
+!
+    END IF
+!
+    n = SIZE(vec)
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*n
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+!
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       id = n1d_saved(NUM)
+!
+       CALL ERRSET(2015,0,-1,1,ENOTRM,0)
+!
+       naux1 = SIZE(aux1)
+       naux2 = SIZE(aux2)
+!
+       CALL dcft(1, vec(1), 1, n,  vec(1), 1, n, &
+            n, 1, -isign, 1.0, aux1, naux1, aux2, naux2)
+!
+       CALL ERRSTR(2015,S2015)
+!
+! dynamic allocation of the work arrays.
+!
+       ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOUR1: 2. Allocation of  aux1_poi  failed!'
+          STOP
+       ENDIF
+!
+       ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOUR1: 2. Allocation of  aux2_poi  failed!'
+          STOP
+       ENDIF
+!
+       CALL dcft(1, vec(1), 1, n, vec(1), 1, n, &
+            n, 1, -isign, 1.0, &
+            aux1_poi(id,NUM)%poi_ra(1), naux1, &
+            aux2_poi(id,NUM)%poi_ra(1), naux2)
+!
+    END SELECT
+!
+    CALL dcft(0, vec(1), 1, n, vec(1), 1, n, &
+         n, 1, -isign, 1.0,  &
+         aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
+         aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
+!
+  END SUBROUTINE four1
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourcol_ra(arr, isign)
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
+    INTEGER,                 INTENT(IN)    :: isign
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=2
+!
+! Local variables
+!
+    INTEGER :: dim1, howmany, i, id, istat, k, naux1, naux2
+!
+    IF (initflag) THEN
+       initflag = .FALSE.
+!
+       CALL EINFO(0)
+       CALL ERRSAV(2015,S2015)
+!
+       ALLOCATE(aux1_poi(MXPLAN,4), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RA: 1. Allocation of  aux1_poi  failed!'
+          STOP
+       END IF
+!
+       ALLOCATE(aux2_poi(MXPLAN,4), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RA: 1. Allocation of  aux2_poi  failed!'
+          STOP
+       END IF
+!
+    END IF
+!
+    dim1    = SIZE(arr,1)
+    howmany = SIZE(arr,2)
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*dim1
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1) .AND. &
+            howmany == n1d_par(i,NUM)%par(2)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+!
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
+       id = n1d_saved(NUM)
+!
+       CALL ERRSET(2015,0,-1,1,ENOTRM,0)
+!
+       naux1 = SIZE(aux1)
+       naux2 = SIZE(aux2)
+!
+       CALL dcft(1, arr(1,1), 1, dim1,  arr(1,1), 1, dim1, &
+            dim1, howmany, -isign, 1.0, aux1, naux1, aux2, naux2)
+!
+       CALL ERRSTR(2015,S2015)
+!
+! dynamic allocation of the work arrays.
+!
+       ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RA: 2. Allocation of  aux1_poi  failed!'
+          STOP
+       ENDIF
+!
+       ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RA: 2. Allocation of  aux2_poi  failed!'
+          STOP
+       ENDIF
+!
+       CALL dcft(1, arr(1,1), 1, dim1, arr(1,1), 1, dim1, &
+            dim1, howmany, -isign, 1.0, &
+            aux1_poi(id,NUM)%poi_ra(1), naux1, &
+            aux2_poi(id,NUM)%poi_ra(1), naux2)
+!
+    END SELECT
+!
+    CALL dcft(0, arr(1,1), 1, dim1, arr(1,1), 1, dim1, &
+         dim1, howmany, -isign, 1.0, &
+         aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
+         aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
+!
+  END SUBROUTINE fourcol_ra
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourcol_raa(arr, isign)
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
+    INTEGER,                   INTENT(IN)    :: isign
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=3
+!
+! Local variables
+!
+    INTEGER :: dim1, howmany, i, id, istat, k, naux1, naux2
+!
+    IF (initflag) THEN
+       initflag = .FALSE.
+!
+       CALL EINFO(0)
+       CALL ERRSAV(2015,S2015)
+!
+       ALLOCATE(aux1_poi(MXPLAN,NUM), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RAA: 1. Allocation of  aux1_poi  failed!'
+          STOP
+       END IF
+!
+       ALLOCATE(aux2_poi(MXPLAN,NUM), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RAA: 1. Allocation of  aux2_poi  failed!'
+          STOP
+       END IF
+!
+    END IF
+!
+    dim1    = SIZE(arr,1)
+    howmany = SIZE(arr,2)*SIZE(arr,3)
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*dim1
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1) .AND. &
+            howmany == n1d_par(i,NUM)%par(2)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+!
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
+       id = n1d_saved(NUM)
+!
+       CALL ERRSET(2015,0,-1,1,ENOTRM,0)
+!
+       naux1 = SIZE(aux1)
+       naux2 = SIZE(aux2)
+!
+       CALL dcft(1, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, &
+            dim1, howmany, -isign, 1.0, aux1, naux1, aux2, naux2)
+!
+       CALL ERRSTR(2015,S2015)
+!
+! dynamic allocation of the work arrays.
+!
+       ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RAA: 2. Allocation of  aux1_poi  failed!'
+          STOP
+       ENDIF
+!
+       ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURCOL_RAA: 2. Allocation of  aux2_poi  failed!'
+          STOP
+       ENDIF
+!
+       CALL dcft(1, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, &
+            dim1, howmany, -isign, 1.0, &
+            aux1_poi(id,NUM)%poi_ra(1), naux1, &
+            aux2_poi(id,NUM)%poi_ra(1), naux2)
+!
+    END SELECT
+!
+    CALL dcft(0, arr(1,1,1), 1, dim1, arr(1,1,1), 1, dim1, &
+         dim1, howmany, -isign, 1.0, &
+            aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
+            aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
+!
+  END SUBROUTINE fourcol_raa
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourrow(arr, isign)
+!
+! Dummy arguments
+!
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
+    INTEGER,                 INTENT(IN)    :: isign
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=4
+!
+! Local variables
+!
+    INTEGER :: dim1, dim2, i, id, istat, k, naux1, naux2
+!
+    IF (initflag) THEN
+       initflag = .FALSE.
+!
+       CALL EINFO(0)
+       CALL ERRSAV(2015,S2015)
+!
+       ALLOCATE(aux1_poi(MXPLAN,NUM), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURROW: 1. Allocation of  aux1_poi  failed!'
+          STOP
+       END IF
+!
+       ALLOCATE(aux2_poi(MXPLAN,NUM), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURROW: 1. Allocation of  aux2_poi  failed!'
+          STOP
+       END IF
+!
+    END IF
+!
+    dim1 = SIZE(arr,1)
+    dim2 = SIZE(arr,2)
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*dim1
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1) .AND. &
+            dim2 == n1d_par(i,NUM)%par(2)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+!
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       n1d_par(n1d_saved(NUM),NUM)%par(2) = dim2
+       id = n1d_saved(NUM)
+!
+       CALL ERRSET(2015,0,-1,1,ENOTRM,0)
+!
+       naux1 = SIZE(aux1)
+       naux2 = SIZE(aux2)
+!
+       CALL dcft(1, arr(1,1), dim2, 1,  arr(1,1), dim2, 1, &
+            dim1, dim2, -isign, 1.0, aux1, naux1, aux2, naux2)
+!
+       CALL ERRSTR(2015,S2015)
+!
+! dynamic allocation of the work arrays.
+!
+       ALLOCATE(aux1_poi(id,NUM)%poi_ra(naux1), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURROW: 2. Allocation of  aux1_poi  failed!'
+          STOP
+       ENDIF
+!
+       ALLOCATE(aux2_poi(id,NUM)%poi_ra(naux2), stat=istat)
+       IF (istat /= 0) THEN
+          WRITE(*,*) 'FOURROW: 2. Allocation of  aux2_poi  failed!'
+          STOP
+       ENDIF
+!
+       CALL dcft(1, arr(1,1), dim1, 1, arr(1,1), dim1, 1, &
+            dim1, dim2, -isign, 1.0, &
+            aux1_poi(id,NUM)%poi_ra(1), naux1, &
+            aux2_poi(id,NUM)%poi_ra(1), naux2)
+!
+    END SELECT
+!
+    CALL dcft(0, arr(1,1), dim1, 1, arr(1,1), dim1, 1, &
+         dim1, dim2, -isign, 1.0,  &
+         aux1_poi(id,NUM)%poi_ra(1), SIZE(aux1_poi(id,NUM)%poi_ra), &
+         aux2_poi(id,NUM)%poi_ra(1), SIZE(aux2_poi(id,NUM)%poi_ra))
+!
+  END SUBROUTINE fourrow
+END MODULE fft
+#endif
+!
+#if defined(fft_mkl)
+MODULE fft
+!
+  USE mkl_dfti
+!
+  IMPLICIT NONE
+!
+  PRIVATE
+!
+  PUBLIC :: pointer_r, handle1d
+  PUBLIC :: four1, fourcol, fourrow
+!
+  TYPE pointer_r
+     TYPE(DFTI_DESCRIPTOR), POINTER :: desc_handle
+  END TYPE pointer_r
+!
+  TYPE int_para
+     INTEGER, DIMENSION(2) :: par
+  END TYPE int_para
+!
+! Global parameters
+!
+  INTEGER, PARAMETER :: MXPLAN=16 ! define the maximum number of plans.
+!
+! Global variables
+!
+  TYPE(DFTI_DESCRIPTOR),                      POINTER       :: desc_handle
+  TYPE(pointer_r),       DIMENSION(MXPLAN,4),          SAVE :: handle1d    ! descriptor handles for 1-dim FFT
+  TYPE(int_para),        DIMENSION(MXPLAN,4),          SAVE :: n1d_par     ! size of transform
+  INTEGER,               DIMENSION(4),                 SAVE :: n1d_saved=0 ! number of descriptor handles saved
+!
+  INTERFACE fourcol
+     MODULE PROCEDURE fourcol_ra, fourcol_raa
+  END INTERFACE
+!
+CONTAINS
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE four1(vec, isign)
+!
+! Dummy arguments
+!
+    INTEGER,               INTENT(IN)    :: isign
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(INOUT) :: vec
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=1
+!
+! Local variables
+!
+    INTEGER :: dim1, id, i, k, status
+    LOGICAL :: init_flag
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*SIZE(vec)
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+       init_flag = .TRUE.
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       id = n1d_saved(NUM)
+!
+    CASE default
+       init_flag = .FALSE.
+    END SELECT
+!
+    dim1 = SIZE(vec,1)
+!
+    CALL fourcol_mkl(vec(1), dim1, 1, isign, init_flag, id, NUM)
+!
+  END SUBROUTINE four1
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourcol_ra(arr, isign)
+!
+! Dummy arguments
+!
+    INTEGER,                 INTENT(IN)    :: isign
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=2
+!
+! Local variables
+!
+    INTEGER :: dim1, howmany, id, i, k, status
+    LOGICAL :: init_flag
+!
+    dim1 = SIZE(arr,1)
+    howmany = SIZE(arr,2)
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*SIZE(arr)
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1) .AND. &
+            howmany == n1d_par(i,NUM)%par(2)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+       init_flag = .TRUE.
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
+       id = n1d_saved(NUM)
+!
+    CASE default
+       init_flag = .FALSE.
+    END SELECT
+!
+    CALL fourcol_mkl(arr(1,1), dim1, howmany, isign, init_flag, id, NUM)
+!
+  END SUBROUTINE fourcol_ra
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourcol_raa(arr, isign)
+!
+! Dummy arguments
+!
+    INTEGER,                   INTENT(IN)    :: isign
+    DOUBLE COMPLEX, DIMENSION(:,:,:), INTENT(INOUT) :: arr
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=3
+!
+! Local variables
+!
+    INTEGER :: dim1, howmany, id, i, k, status
+    LOGICAL :: init_flag
+!
+    dim1 = SIZE(arr,1)
+    howmany = SIZE(arr,2)*SIZE(arr,3)
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*SIZE(arr)
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1) .AND. &
+            howmany == n1d_par(i,NUM)%par(2)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+       init_flag = .TRUE.
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
+       id = n1d_saved(NUM)
+!
+    CASE default
+       init_flag = .FALSE.
+    END SELECT
+!
+    CALL fourcol_mkl(arr(1,1,1), dim1, howmany, isign, init_flag, id, NUM)
+!
+  END SUBROUTINE fourcol_raa
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+  SUBROUTINE fourrow(arr, isign)
+!
+! Dummy arguments
+!
+    INTEGER,                 INTENT(IN)    :: isign
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(INOUT) :: arr
+!
+! Local parameters
+!
+    INTEGER, PARAMETER :: NUM=4
+!
+! Local variables
+!
+    INTEGER :: dim2, howmany, id, i, k, status
+    LOGICAL :: init_flag
+!
+    howmany = SIZE(arr,1)
+    dim2    = SIZE(arr,2)
+!
+! test if a plan that fits is already created.
+!
+    id = -1
+    k = isign*SIZE(arr)
+    DO i = 1,n1d_saved(NUM)
+       IF (k == n1d_par(i,NUM)%par(1) .AND. &
+            howmany == n1d_par(i,NUM)%par(2)) THEN
+          id = i
+          EXIT
+       END IF
+    END DO
+!
+    SELECT CASE (id)
+    CASE (-1)
+       init_flag = .TRUE.
+       n1d_saved(NUM) = n1d_saved(NUM)+1
+       n1d_par(n1d_saved(NUM),NUM)%par(1) = k
+       n1d_par(n1d_saved(NUM),NUM)%par(2) = howmany
+       id = n1d_saved(NUM)
+!
+    CASE default
+       init_flag = .FALSE.
+    END SELECT
+!
+    CALL fourrow_mkl(arr(1,1), howmany, dim2, isign, init_flag, id, NUM)
+!
+  END SUBROUTINE fourrow
+END MODULE fft
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+SUBROUTINE fourcol_mkl(arr, dim1, howmany, isign, init_flag, id, num)
+!
+! COMMENT: This subroutine is necessary to prevent the Lahey/Fujitsu
+!          compiler from making a copy of array arr when passing
+!          arguments.
+  USE fft, ONLY: handle1d
+!
+  USE mkl_dfti
+!
+  IMPLICIT NONE
+!
+! Dummy arguments
+!
+  DOUBLE COMPLEX, DIMENSION(*), INTENT(INOUT) :: arr
+  INTEGER,               INTENT(IN)    :: dim1
+  INTEGER,               INTENT(IN)    :: howmany
+  INTEGER,               INTENT(IN)    :: isign
+  LOGICAL,               INTENT(IN)    :: init_flag
+  INTEGER,               INTENT(IN)    :: id
+  INTEGER,               INTENT(IN)    :: num
+!
+! Local variables
+!
+  INTEGER :: i, status
+!
+  IF (init_flag) THEN
+!
+     status = DftiCreateDescriptor(handle1d(id,num)%desc_handle, &
+          DFTI_DOUBLE, DFTI_COMPLEX, 1, dim1)
+     IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 0:', DftiErrorMessage(status)
+
+     status = DftiSetValue(handle1d(id,num)%desc_handle, &
+          DFTI_NUMBER_OF_TRANSFORMS, howmany)
+     IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 1:', DftiErrorMessage(status)
+     status = DftiSetValue(handle1d(id,num)%desc_handle, &
+          DFTI_INPUT_DISTANCE, dim1)
+     IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 2:', DftiErrorMessage(status)
+     status = DftiSetValue(handle1d(id,num)%desc_handle, &
+          DFTI_OUTPUT_DISTANCE, dim1)
+     IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 3:', DftiErrorMessage(status)
+!
+     status = DftiCommitDescriptor(handle1d(id,num)%desc_handle)
+     IF (status /= 0) WRITE(*,*) 'FOURCOL_MKL 4:', DftiErrorMessage(status)
+!
+  END IF
+!
+  SELECT CASE (isign)
+  CASE (-1)
+     status = DftiComputeForward(handle1d(id,num)%desc_handle, arr)
+  CASE (1)
+     status = DftiComputeBackward(handle1d(id,num)%desc_handle, arr)
+  END SELECT
+!
+END SUBROUTINE fourcol_mkl
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+SUBROUTINE fourrow_mkl(arr, howmany, dim2, isign, init_flag, id, num)
+!
+! COMMENT: This subroutine is necessary to prevent the Lahey/Fujitsu
+!          compiler from making a copy of array arr when passing
+!          arguments.
+!
+  USE fft, ONLY: handle1d
+!
+  USE mkl_dfti
+!
+  IMPLICIT NONE
+!
+! Dummy arguments
+!
+  DOUBLE COMPLEX, DIMENSION(*), INTENT(INOUT) :: arr
+  INTEGER,               INTENT(IN)    :: howmany
+  INTEGER,               INTENT(IN)    :: dim2
+  INTEGER,               INTENT(IN)    :: isign
+  LOGICAL,               INTENT(IN)    :: init_flag
+  INTEGER,               INTENT(IN)    :: id
+  INTEGER,               INTENT(IN)    :: num
+!
+! Local variables
+!
+  INTEGER :: i, status
+  INTEGER, DIMENSION(2) :: stride
+!
+  IF (init_flag) THEN
+!
+     stride(1) = 0
+     stride(2) = howmany
+!
+     status = DftiCreateDescriptor(handle1d(id,num)%desc_handle, &
+          DFTI_DOUBLE, DFTI_COMPLEX, 1, dim2)
+     IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 0:', DftiErrorMessage(status)
+
+     status = DftiSetValue(handle1d(id,num)%desc_handle, &
+          DFTI_NUMBER_OF_TRANSFORMS, dim2)
+     IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 1:', DftiErrorMessage(status)
+!
+     status = DftiSetValue(handle1d(id,num)%desc_handle, &
+          DFTI_INPUT_DISTANCE, 1)
+     IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 2:', DftiErrorMessage(status)
+     status = DftiSetValue(handle1d(id,num)%desc_handle, &
+          DFTI_INPUT_STRIDES, stride)
+     IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 3:', DftiErrorMessage(status)
+!
+     status = DftiSetValue(handle1d(id,num)%desc_handle, &
+          DFTI_OUTPUT_DISTANCE, 1)
+     IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 4:', DftiErrorMessage(status)
+     status = DftiSetValue(handle1d(id,num)%desc_handle, &
+          DFTI_OUTPUT_STRIDES, stride)
+     IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 5:', DftiErrorMessage(status)
+!
+     status = DftiCommitDescriptor(handle1d(id,num)%desc_handle)
+     IF (status /= 0) WRITE(*,*) 'FOURROW_MKL 6:', DftiErrorMessage(status)
+!
+  END IF
+!
+  SELECT CASE (isign)
+  CASE (-1)
+     status = DftiComputeForward(handle1d(id,num)%desc_handle, arr)
+  CASE (1)
+     status = DftiComputeBackward(handle1d(id,num)%desc_handle, arr)
+  END SELECT
+!
+END SUBROUTINE fourrow_mkl
+#endif
+!
diff --git a/fft/fftw_f77.h b/fft/fftw_f77.h
new file mode 100644
index 0000000..b825f23
--- /dev/null
+++ b/fft/fftw_f77.h
@@ -0,0 +1,53 @@
+!>
+!> @file fftw_f77.h
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!     This file contains PARAMETER statements for various constants
+!     that can be passed to FFTW routines.  You should include
+!     this file in any FORTRAN program that calls the fftw_f77
+!     routines (either directly or with an #include statement
+!     if you use the C preprocessor).
+
+      integer FFTW_FORWARD,FFTW_BACKWARD
+      parameter (FFTW_FORWARD=-1,FFTW_BACKWARD=1)
+
+      integer FFTW_REAL_TO_COMPLEX,FFTW_COMPLEX_TO_REAL
+      parameter (FFTW_REAL_TO_COMPLEX=-1,FFTW_COMPLEX_TO_REAL=1)
+
+      integer FFTW_ESTIMATE,FFTW_MEASURE
+      parameter (FFTW_ESTIMATE=0,FFTW_MEASURE=1)
+
+      integer FFTW_OUT_OF_PLACE,FFTW_IN_PLACE,FFTW_USE_WISDOM
+      parameter (FFTW_OUT_OF_PLACE=0)
+      parameter (FFTW_IN_PLACE=8,FFTW_USE_WISDOM=16)
+
+      integer FFTW_THREADSAFE
+      parameter (FFTW_THREADSAFE=128)
+
+!     Constants for the MPI wrappers:
+      integer FFTW_TRANSPOSED_ORDER, FFTW_NORMAL_ORDER
+      integer FFTW_SCRAMBLED_INPUT, FFTW_SCRAMBLED_OUTPUT
+      parameter(FFTW_TRANSPOSED_ORDER=1, FFTW_NORMAL_ORDER=0)
+      parameter(FFTW_SCRAMBLED_INPUT=8192)
+      parameter(FFTW_SCRAMBLED_OUTPUT=16384)
diff --git a/fft/tfft.f90 b/fft/tfft.f90
new file mode 100644
index 0000000..866a66b
--- /dev/null
+++ b/fft/tfft.f90
@@ -0,0 +1,141 @@
+!>
+!> @file tfft.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!  Test the FFT routines exported by module fft
+!
+  USE fft
+  IMPLICIT NONE
+  INTEGER :: nx=8
+  DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: a,b,c
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rn1, rn2
+  DOUBLE PRECISION :: pi, argx
+  INTEGER :: ix, nx0=2
+!
+  PRINT*, 'Enter array dim. nx'
+  READ(*,*) nx
+  ALLOCATE(a(nx,4), b(nx,4), c(4,nx))
+  ALLOCATE(rn1(nx,4), rn2(nx,4))
+!
+!  Create initial array
+  pi = 4.0d0*ATAN(1.0d0)
+  WRITE(*,*) 'Enter mode nx0'
+  READ(*,*) nx0
+  DO ix=0,nx-1
+     argx = 2.0d0*pi/nx*nx0*ix
+     a(ix+1,:) = COS(argx)
+  END DO
+!
+  WRITE(*,*) '-----------------------'
+  WRITE(*,*) 'Reals of original array'
+  WRITE(*,*) '-----------------------'
+  WRITE(*,'(10f10.4)') REAL(a)
+!________________________________________________________________________________
+!
+  WRITE(*,*) '-----------------'
+  WRITE(*,*) 'testing four1 ...'
+  WRITE(*,*) '-----------------'
+!
+!  Forward transform
+  b=a
+  CALL four1(b(:,1), -1)
+!
+!  Check
+  WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
+  WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
+!
+!  Backward transform
+  CALL four1(b(:,1), 1)
+  b = b/REAL(nx)
+!
+!  Check
+  WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
+  WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
+!
+  b(:,1) = b(:,1)-a(:,1)
+  rn1(:,1) = REAL(b(:,1),8)
+  rn2(:,1) = AIMAG(b(:,1))
+  PRINT *, 'Min. max err of real', MINVAL(rn1(:,1)), MAXVAL(rn1(:,1))
+  PRINT *, 'Min. max err of imag', MINVAL(rn2(:,1)), MAXVAL(rn2(:,1))
+!________________________________________________________________________________
+!
+  WRITE(*,*) '-------------------'
+  WRITE(*,*) 'testing fourcol ...'
+  WRITE(*,*) '-------------------'
+!
+!  Forward transform
+  b=a
+  CALL fourcol(b(:,:), -1)
+!
+!  Check
+  WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
+  WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
+!
+!  Backward transform
+  CALL fourcol(b(:,:), 1)
+  b = b/REAL(nx)
+!
+!  Check
+  WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(b(:,1))
+  WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(b(:,1))
+!
+  b = b-a
+  rn1 = REAL(b,8)
+  rn2 = AIMAG(b)
+  PRINT *, 'Min. max err of real', MINVAL(rn1), MAXVAL(rn1)
+  PRINT *, 'Min. max err of imag', MINVAL(rn2), MAXVAL(rn2)
+!________________________________________________________________________________
+!
+  WRITE(*,*) '-------------------'
+  WRITE(*,*) 'testing fourrow ...'
+  WRITE(*,*) '-------------------'
+!
+!  Forward transform
+  b=a
+  c = TRANSPOSE(b)
+  CALL fourrow(c, -1)
+!
+!  Check
+  WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(c(1,:))
+  WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(c(1,:))
+!
+!  Backward transform
+  CALL fourrow(c, 1)
+  c = c/REAL(nx)
+!
+!  Check
+  WRITE(*,'(a/(10f10.4))') 'Reals of FFT', REAL(c(1,:))
+  WRITE(*,'(a/(10f10.4))') 'Imag of FFT', AIMAG(c(1,:))
+!
+  b = TRANSPOSE(c)-a
+  rn1 = REAL(b,8)
+  rn2 = AIMAG(b)
+  PRINT *, 'Min. max err of real', MINVAL(rn1), MAXVAL(rn1)
+  PRINT *, 'Min. max err of imag', MINVAL(rn2), MAXVAL(rn2)
+!
+!  Clean up
+  DEALLOCATE(a,b,c, rn1,rn2)
+END PROGRAM main
diff --git a/matlab/cds_mat.m b/matlab/cds_mat.m
new file mode 100644
index 0000000..e2a79d4
--- /dev/null
+++ b/matlab/cds_mat.m
@@ -0,0 +1,44 @@
+%
+% @file cds_mat.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+function [mata, diag] = cds_mat(file, dset)
+    n=double(hdf5read(file,dset, 'RANK'));
+    dists=double(hdf5read(file, strcat(dset,'/dists'))); 
+    val=hdf5read(file, strcat(dset,'/vals'));
+    
+    %%    Shift the off-diagonals  %%
+    for k=1:length(dists)
+        d=dists(k);
+        if d < 0
+            val(1:n+d,k) = val(1-d:n,k);
+        elseif d > 0
+            val(n:-1:d+1,k) = val(n-d:-1:1,k);
+        end
+    end
+    mata = spdiags(val, dists, n,n);
+    if nargout == 2
+        idiag = find(dists==0);
+        diag = val(:,idiag);
+    end
diff --git a/matlab/csr_mat.m b/matlab/csr_mat.m
new file mode 100644
index 0000000..ddfa679
--- /dev/null
+++ b/matlab/csr_mat.m
@@ -0,0 +1,45 @@
+%
+% @file csr_mat.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+function [mata, diag] = csr_mat(file, dset)
+    n=hdf5read(file,dset, 'RANK');
+    nnz=hdf5read(file,dset, 'NNZ');
+    cols=hdf5read(file, strcat(dset,'/cols')); 
+    irow=hdf5read(file, strcat(dset,'/irow')); 
+    val=hdf5read(file, strcat(dset,'/val')); 
+    idiag=hdf5read(file, strcat(dset,'/idiag')); 
+    for i=1:n
+        s = irow(i);
+        e = irow(i+1)-1;
+        rows(s:e) = i;    
+    end
+    cols=double(cols);
+    rows=double(rows);
+    vals = double(val);
+    mata = sparse(rows,cols,vals);
+    if nargout == 2
+        diag = val(idiag);
+    end
+    
diff --git a/matlab/driv1.m b/matlab/driv1.m
new file mode 100644
index 0000000..2d28ea7
--- /dev/null
+++ b/matlab/driv1.m
@@ -0,0 +1,68 @@
+%
+% @file driv1.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='driv1.h5';
+
+x = hdf5read(file, '/X');
+knotsx = hdf5read(file, 'KNOTSX');
+splinesx = hdf5read(file, '/splinesx','V71Dimensions', true);
+y = hdf5read(file, '/Y');
+knotsy = hdf5read(file, 'KNOTSY');
+splinesy = hdf5read(file, '/splinesy','V71Dimensions', true);
+
+c=['b','g','r','c','m','y','k'];
+nc=size(c,2);
+
+figure
+%subplot(211)
+hold on
+ns = size(splinesx,1);
+attr=hdf5read(file,'/splinesx/title'); title_ann=attr.Data;
+for i = 1:ns
+    cc = mod(i-1,nc)+1;
+    plot(x,splinesx(i,:),c(cc))
+end
+yk=zeros(size(knotsx));
+plot(knotsx,yk,'ro');
+grid on
+title(title_ann)
+ylabel('Splines')
+
+figure
+%subplot(212)
+hold on
+ns = size(splinesy,1);
+attr=hdf5read(file,'/splinesy/title'); title_ann=attr.Data;
+for i = 1:ns
+    cc = mod(i-1,nc)+1;
+    plot(y,splinesy(i,:),c(cc))
+end
+yk=zeros(size(knotsy));
+i1=find(knotsy==y(1));
+i2=find(knotsy>=y(size(y,1)),1);
+plot(knotsy(i1:i2),yk(i1:i2),'ro');
+grid on
+title(title_ann)
+ylabel('Splines')
diff --git a/matlab/fit.m b/matlab/fit.m
new file mode 100644
index 0000000..64b90e8
--- /dev/null
+++ b/matlab/fit.m
@@ -0,0 +1,38 @@
+%
+% @file fit.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+P1=polyfit(log(n),log(err1),1)
+P2=polyfit(log(n),log(err2),1)
+P3=polyfit(log(n),log(err3),1)
+
+figure
+loglog(n,err1,'o', n, exp(P1(2)).*n.^P1(1), 'b')
+hold on
+loglog(n,err2,'rh', n, exp(P2(2)).*n.^P2(1),'r')
+loglog(n,err3,'*k', n, exp(P3(2)).*n.^P3(1), 'k')
+grid on
+xlabel('Number of intervals N');
+ylabel('Discretization Error')
+title('2D Cylindrical problem with m=1, s=10')
diff --git a/matlab/fit1d.m b/matlab/fit1d.m
new file mode 100644
index 0000000..6475e77
--- /dev/null
+++ b/matlab/fit1d.m
@@ -0,0 +1,94 @@
+%
+% @file fit1d.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='fit1d.h5';
+
+x = hdf5read(file, '/X');
+f =  hdf5read(file, '/FCALC');
+fexact =  hdf5read(file, '/FEXACT');
+error =  hdf5read(file, '/ERROR');
+
+f1 =  hdf5read(file, '/FCALC1');
+fexact1 =  hdf5read(file, '/FEXACT1');
+error1 =  hdf5read(file, '/ERROR1');
+
+splines = hdf5read(file, '/SPLINES'); splines = splines';
+%
+%     Attributes
+%
+nidbas = hdf5read(file,'/NIDBAS');
+nx = hdf5read(file,'/NX');
+attr=hdf5read(file,'/FEXACT/title'); fexact_ann=attr.Data;
+attr=hdf5read(file,'/FCALC/title'); f_ann=attr.Data;
+attr=hdf5read(file,'/ERROR/title'); error_ann=attr.Data;
+
+attr=hdf5read(file,'/FEXACT1/title'); fexact1_ann=attr.Data;
+attr=hdf5read(file,'/FCALC1/title'); f1_ann=attr.Data;
+attr=hdf5read(file,'/ERROR1/title'); error1_ann=attr.Data;
+
+label=sprintf('Splines of degree %d, NX =%d', nidbas, nx);
+
+ns = size(splines,1);
+
+
+c=['b','g','r','c','m','y','k'];
+nc=size(c,2);
+
+figure
+subplot(511)
+hold on
+for i = 1:ns
+    cc = mod(i-1,nc)+1;
+    plot(x,splines(i,:),c(cc))
+end
+grid on
+ylabel('Splines')
+xlabel('X')
+title(label);
+hold off
+
+subplot(512)
+plot(x, f, 'o', x, fexact)
+legend(f_ann, fexact_ann)
+xlabel('X')
+grid on
+
+subplot(513)
+plot(x, error)
+ylabel(error_ann)
+xlabel('X')
+grid on
+
+subplot(514)
+plot(x, f1, 'h', x, fexact1)
+legend(f1_ann, fexact1_ann)
+xlabel('X')
+grid on
+
+subplot(515)
+plot(x, error1)
+ylabel(error1_ann)
+xlabel('X')
+grid on
diff --git a/matlab/fit2d.m b/matlab/fit2d.m
new file mode 100644
index 0000000..f225459
--- /dev/null
+++ b/matlab/fit2d.m
@@ -0,0 +1,80 @@
+%
+% @file fit2d.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='fit2d.h5';
+%
+%     Get data from data sets
+%
+r=hdf5read(file,'/xpt');
+t=hdf5read(file,'/ypt');
+
+fcalc=hdf5read(file,'/fcalc');
+fexact=hdf5read(file,'/fexact');
+errs=hdf5read(file,'/errs');
+%
+%     Attributes
+%
+NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
+NIDBAS1=hdf5read(file,'/NIDBAS1');
+NIDBAS2=hdf5read(file,'/NIDBAS2');
+MBESS=hdf5read(file,'/MBESS');
+LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
+              NIDBAS1, NIDBAS2, MBESS);
+
+attr=hdf5read(file,'/xpt/title'); x_ann=attr.Data;
+attr=hdf5read(file,'/ypt/title'); y_ann=attr.Data;
+attr=hdf5read(file,'/fcalc/title'); fcalc_ann=attr.Data;
+attr=hdf5read(file,'/fexact/title'); fexact_ann=attr.Data;
+attr=hdf5read(file,'/errs/title');errs_ann=attr.Data;
+
+[R,T]=meshgrid(r,t);
+x = R.*cos(T); y= R.*sin(T);
+
+figure
+subplot(221)
+pcolor(double(x),double(y),double(fcalc));
+shading interp
+xlabel('X'); ylabel('Y')
+title(LABEL)
+colorbar
+
+subplot(222)
+pcolor(double(x),double(y),double(fexact))
+shading interp
+axis image
+xlabel('X'); ylabel('Y')
+title('X-Y plane')
+colorbar
+
+subplot(223)
+surfc(double(x),double(y),double(errs))
+xlabel('X'); ylabel('Y');
+title(errs_ann)
+
+%% Plot error at theta ~ pi/4
+k = max(find(t<pi/4));
+err=fcalc(k,:)-fexact(k,:);
+figure
+plot(r, err)
diff --git a/matlab/fitlog.m b/matlab/fitlog.m
new file mode 100644
index 0000000..29fe33f
--- /dev/null
+++ b/matlab/fitlog.m
@@ -0,0 +1,28 @@
+%
+% @file fitlog.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+function [yfit, P] = fitlog(x,y)
+P=polyfit(log(x),log(y),1);
+yfit = exp(P(2)).*x.^P(1);
diff --git a/matlab/fourier_gs.m b/matlab/fourier_gs.m
new file mode 100644
index 0000000..09bbb05
--- /dev/null
+++ b/matlab/fourier_gs.m
@@ -0,0 +1,69 @@
+%
+% @file fourier_gs.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+clear all
+tau=0;
+alpha=0.5;
+
+theta=-pi:0.02:pi;
+[x,y] = meshgrid(theta,theta);
+n1=length(theta);
+n2=n1;
+
+%%%%
+%%%% Gauss-Seidel relaxations
+%%%%
+str_title=sprintf('tau = %.1f, alpha = %.2f',tau, alpha);
+ee = exp(i.*theta);
+eep= conj(ee);
+csin= alpha.*complex(alpha, (tau/2).*imag(ee));
+G=zeros(n1,n2);
+for ii=1:n1
+    for jj=1:n2
+        num = ee(ii) + csin(ii)*ee(jj);
+        G(ii,jj) = num / (2*(1+alpha^2) - conj(num));
+    end
+end
+
+
+figure
+hold off
+G0=(ee+csin)./(2*(1+alpha^2)-(eep+conj(csin)));
+plot(theta, abs(G(:,1)), 'r', 'LineWidth', 2)
+hold on
+plot(theta, abs(G0), 'g', 'LineWidth', 2)
+for jj=1:20:n2
+    plot(theta, abs(G(:,jj)), 'b')
+end
+xlabel('\theta_1'); ylabel('Amplification Factor for Gauss-Seidel')
+title(str_title)
+
+% $$$ figure
+% $$$ mesh(x,y,abs(G))
+% $$$ xlabel('\theta_1'); ylabel('\theta_2')
+% $$$ title(str_title);
+% $$$ view(-120,25)
+
+max(max(abs(G)))
diff --git a/matlab/fourier_jac.m b/matlab/fourier_jac.m
new file mode 100644
index 0000000..4e9da71
--- /dev/null
+++ b/matlab/fourier_jac.m
@@ -0,0 +1,69 @@
+%
+% @file fourier_jac.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+clear all
+omega=0.8;
+tau=-2;
+alpha=1;
+
+c=2*omega/(1+alpha^2);
+str_title=sprintf('omega =  %.1f, tau = %.1f, alpha = %.2f', omega, ...
+                  tau, alpha) 
+
+theta1=-pi:0.01:pi;
+theta2=-pi:0.01:pi;
+[x,y] = meshgrid(theta1,theta2);
+
+n1=length(theta1);
+n2=length(theta2);
+
+%%%%
+%%%% Damped Jacobi relaxations
+%%%%
+G=zeros(n1,n2);
+for ii=1:n1
+    for jj=1:n2
+        G(ii,jj) = 1-c.*( sin(theta1(ii)/2)^2 + alpha^2*sin(theta2(jj)/2)^2 ...
+                 + 0.25*alpha*tau*sin(theta1(ii))*sin(theta2(jj)) );
+    end
+end
+
+figure
+hold off
+G0 = 1-c.*sin(theta1./2).^2;
+plot(theta1, G(:,1), 'r', 'LineWidth', 2)
+hold on
+plot(theta1, G0, 'g', 'LineWidth', 2)
+for jj=1:20:n2
+    plot(theta1, G(:,jj), 'b')
+end
+xlabel('\theta_1'); ylabel('Amplification Factor for Jacobi')
+title(str_title)
+% $$$ figure
+% $$$ mesh(x,y,G)
+% $$$ xlabel('\theta_1'); ylabel('\theta_2')
+% $$$ title(str_title);
+
+max(max(abs(G)))
diff --git a/matlab/fourier_smooth.m b/matlab/fourier_smooth.m
new file mode 100644
index 0000000..45e8a7d
--- /dev/null
+++ b/matlab/fourier_smooth.m
@@ -0,0 +1,106 @@
+%
+% @file fourier_smooth.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+%   Brute force computation of \mu = MAX |G| for
+%   damped Jacobi and Gauss-Seidel relaxation
+%
+%   Find optimal damping factor \omega for Jacobi relaxations
+%
+if ~exist('tau','var'), tau=0; end
+if ~exist('alpha','var'), alpha=1.0; end
+dth=0.01;
+
+theta1=-pi+dth:dth:pi;
+theta2=-pi+dth:dth:pi;
+nth1=length(theta1);
+nth2=length(theta2);
+
+sint12=sin(theta1./2).^2;
+sint22=sin(theta2./2).^2;
+
+sint1=sin(theta1);
+sint2=sin(theta2);
+[S1,S2]=meshgrid(sint1,sint2);
+ctau=0.25*alpha*tau;
+
+omega=0.5:0.002:1;
+n=length(omega);
+G = zeros(nth1,nth2);
+for i=1:n
+  c=2*omega(i)/(1+alpha^2);
+  for i1=1:nth1
+      for i2=1:nth2
+          if or(abs(theta1(i1))>= pi/2, abs(theta2(i2)) >= pi/2);
+              G(i1,i2) = abs(1-c*(sint12(i1)+alpha^2*sint22(i2) + ...
+                                 ctau*sint1(i1)*sint2(i2)));
+          end
+      end
+  end
+  [gmax,imax]=max(G);
+  [mu(i),jmax]=max(gmax);
+  theta1_opt(i) = theta1(imax(jmax));
+  theta2_opt(i) = theta2(jmax);
+end
+[mu_min,i_min]=min(mu);
+omega_opt=omega(i_min);
+str_title=sprintf(['omega = %.3f, mu = %.3f, alpha = %.2f, tau = ' ...
+'%.1f'], omega_opt, mu_min, alpha, tau);
+
+figure
+subplot(211)
+plot(omega,mu,'LineWidth',2);
+xlabel('\omega'); ylabel('\mu')
+grid on
+title(str_title);
+subplot(212)
+plot(omega, theta1_opt, omega, theta2_opt,'LineWidth',2);
+legend('\theta_{1opt}', '\theta_{2opt}')
+xlabel('\omega'); ylabel('optimum \theta')
+grid on
+%
+%   \mu for Gauss-Seidel relaxation
+%
+Ggs = zeros(nth1,nth2);
+exp1=complex(cos(theta1),sin(theta1));
+exp2=complex(cos(theta2),sin(theta2));
+c=2*(1+alpha^2);
+ctau=complex(alpha^2, (0.5*alpha*tau).*sint1);
+for i1=1:nth1
+    for i2=1:nth2
+        if or(abs(theta1(i1))>= pi/2, abs(theta2(i2)) >= pi/2);
+            Num = exp1(i1) + ctau(i1)*exp2(i2);
+            Ggs(i1,i2) = abs( Num/(c-conj(Num)) );
+        end
+    end
+end
+[gmax,imax]=max(Ggs);
+[mugs,jmax]=max(gmax);
+theta1_gs_opt = theta1(imax(jmax));
+theta2_gs_opt = theta2(jmax);
+fprintf('alpha = %.2f, tau = %.1f, Ggs = %.4f, theta1 = %.4f, theta2 = %.4f\n', alpha, tau, ...
+        mugs, theta1_gs_opt, theta2_gs_opt);
+subplot(211)
+hold on
+plot(omega,repmat(mugs,1,length(omega)),'r--','LineWidth',2)
diff --git a/matlab/gb_mat.m b/matlab/gb_mat.m
new file mode 100644
index 0000000..a15b31d
--- /dev/null
+++ b/matlab/gb_mat.m
@@ -0,0 +1,43 @@
+%
+% @file gb_mat.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+function [mata] = gb_mat(file, dset)
+rank=h5readatt(file, dset, 'RANK');
+ku=h5readatt(file, dset, 'KU');
+kl=h5readatt(file, dset, 'KL');
+gbmat=h5read(file, dset);
+m=rank; n=rank;
+mata = zeros(m,n);
+for i=1:m
+    jmin = max(1,i-kl);
+    jmax = min(n,i+ku);
+    for j=jmin:jmax
+        ib = kl+ku+i-j+1;
+        mata(i,j)=gbmat(ib,j);
+    end
+end
+clear gbmat;
+
+    
\ No newline at end of file
diff --git a/matlab/gbmat.m b/matlab/gbmat.m
new file mode 100644
index 0000000..3bb7c5e
--- /dev/null
+++ b/matlab/gbmat.m
@@ -0,0 +1,60 @@
+%
+% @file gbmat.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde2d.h5';
+%
+%  Attributes of GB matrix
+%
+NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
+NIDBAS1=hdf5read(file,'/NIDBAS1');
+NIDBAS2=hdf5read(file,'/NIDBAS2');
+
+%mat='/MAT1';
+rank=hdf5read(file,strcat(mat,'/RANK'));
+ku=hdf5read(file,strcat(mat,'/KU'));
+kl=hdf5read(file,strcat(mat,'/KL'));
+gb_mat=hdf5read(file,mat);
+rhs0=hdf5read(file,'/RHS');
+sol0=hdf5read(file,'/SOL');
+%
+m=rank; n=rank;
+a = zeros(m,n);
+for i=1:m
+    jmin = max(1,i-kl);
+    jmax = min(n,i+ku);
+    for j=jmin:jmax
+        ib = kl+ku+i-j+1;
+        a(i,j)=gb_mat(ib,j);
+    end
+end
+%clear gb_mat;
+S = sparse(a);
+%clear a;
+figure
+spy(S);
+LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), rank = %d', NR, NTH, ...
+              NIDBAS1, NIDBAS2, rank);
+title(LABEL)
+
diff --git a/matlab/gs_fd.m b/matlab/gs_fd.m
new file mode 100644
index 0000000..8e8c1b6
--- /dev/null
+++ b/matlab/gs_fd.m
@@ -0,0 +1,66 @@
+%
+% @file gs_fd.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+clear all
+
+N=1024; 
+L=100;
+n0=1;
+Narr=[128 256 512 1024];
+
+for i=1:length(Narr);
+    N=Narr(i);
+    h=L/N; h2=h^2;
+    title_str=sprintf(['N=%d, L=%.1f, n0=%.1f'], N, L, n0);
+    v0=-2.+n0*h2;
+    u0=1.;
+    dom(i)= 2*abs(u0)/abs(v0);
+    v=v0.*ones(N,1);
+    u=u0*ones(N-1,1);
+    mata=diag(u,1) + diag(u,-1) + diag(v);
+    matl= tril(mata,0); % D+L
+    lambda = eig(-triu(mata,1),matl);
+    rho(i)=max(abs(lambda));
+    
+% $$$     figure
+% $$$     plot(lambda,'o')
+% $$$     xlabel('Real eigenvalue'), ylabel('Imag eigenvalue')
+% $$$     grid on
+% $$$     axis equal
+% $$$     title_str=sprintf(['N=%d, L=%.1f, n0=%.1f, Spec. Radius=%.4f, dom=%.4f'], N, L, ...
+% $$$                       n0, rho(i),dom(i));
+% $$$     title(title_str);
+    fprintf(1, 'Spectral Radius of GS relaxation matrix = %.4f, dom=%.4f\n', ...
+            rho(i),dom(i))
+end
+
+figure
+plot(Narr, rho,'o-')
+xlabel('N'); ylabel('GS spectral radius')
+title_str=sprintf(['FD scheme, L=%.1f, n0=%.1f'], L, n0);
+title(title_str);
+grid on
+
+
diff --git a/matlab/gs_fe.m b/matlab/gs_fe.m
new file mode 100644
index 0000000..7dfaa33
--- /dev/null
+++ b/matlab/gs_fe.m
@@ -0,0 +1,66 @@
+%
+% @file gs_fe.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+clear all
+
+N=1024; 
+L=100;
+n0=1;
+Narr=[128 256 512 1024];
+
+for i=1:length(Narr);
+    N=Narr(i);
+    h=L/N; h2=h^2;
+    title_str=sprintf(['N=%d, L=%.1f, n0=%.1f'], N, L, n0);
+    v0=-2.+2.0*n0*h2/6.0;
+    u0=1.+n0*h2/6.0;
+    dom(i)= 2*abs(u0)/abs(v0);
+    v=v0.*ones(N,1);
+    u=u0*ones(N-1,1);
+    mata=diag(u,1) + diag(u,-1) + diag(v);
+    matl= tril(mata,0); % D+L
+    lambda = eig(-triu(mata,1),matl);
+    rho(i)=max(abs(lambda));
+    
+% $$$     figure
+% $$$     plot(lambda,'o')
+% $$$     xlabel('Real eigenvalue'), ylabel('Imag eigenvalue')
+% $$$     grid on
+% $$$     axis equal
+% $$$     title_str=sprintf(['N=%d, L=%.1f, n0=%.1f, Spec. Radius=%.4f, dom=%.4f'], N, L, ...
+% $$$                       n0, rho(i),dom(i));
+% $$$     title(title_str);
+    fprintf(1, 'Spectral Radius of GS relaxation matrix = %.4f, dom=%.4f\n', ...
+            rho(i),dom(i))
+end
+
+figure
+plot(Narr, rho,'o-')
+xlabel('N'); ylabel('GS spectral radius')
+title_str=sprintf(['FE scheme, L=%.1f, n0=%.1f'], L, n0);
+title(title_str);
+grid on
+
+
diff --git a/matlab/h5Complex.m b/matlab/h5Complex.m
new file mode 100644
index 0000000..ab338f9
--- /dev/null
+++ b/matlab/h5Complex.m
@@ -0,0 +1,50 @@
+%
+% @file h5Complex.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+function z = h5Complex(file, dset)
+    data = hdf5read(file, dset);
+    dims = size(data);
+    rank = size(dims,2);
+    switch rank
+      case {1}
+        for i=1:dims(1)
+              z(i)=complex(cell2mat(data(i,1).Data(1)), cell2mat(data(i,1).Data(2)));
+        end
+      case {2}
+        for i=1:dims(1)
+            for j=1:dims(2)
+                z(i,j)=complex(cell2mat(data(i,j).Data(1)), cell2mat(data(i,j).Data(2)));
+            end
+        end
+      case {3}
+        for i=1:dims(1)
+            for j=1:dims(2)
+                for k=1:dims(3)
+                    z(i,j,k)=complex(cell2mat(data(i,j,k).Data(1)), cell2mat(data(i,j,k).Data(2)));
+                end
+            end
+        
+        end
+    end
diff --git a/matlab/h5Complex_ll.m b/matlab/h5Complex_ll.m
new file mode 100644
index 0000000..ad43007
--- /dev/null
+++ b/matlab/h5Complex_ll.m
@@ -0,0 +1,33 @@
+%
+% @file h5Complex_ll.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+function z = h5Complex_ll(file, dset)
+    fid=H5F.open(file, 'H5F_ACC_RDONLY', 'H5P_DEFAULT');
+    dset_id=H5D.open(fid, dset);
+    dxpl = 'H5P_DEFAULT';
+    data = H5D.read(dset_id,'H5ML_DEFAULT','H5S_ALL','H5S_ALL', dxpl);
+    z = complex(data.real, data.imaginary);
+    H5D.close(dset_id);
+    H5F.close(fid);
diff --git a/matlab/jac_opt.m b/matlab/jac_opt.m
new file mode 100644
index 0000000..82d6f70
--- /dev/null
+++ b/matlab/jac_opt.m
@@ -0,0 +1,78 @@
+%
+% @file jac_opt.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+%
+%   Find optimal damping factor \omega for Jacobi relaxations
+%
+clear all
+alpha=0.5;
+
+theta2=pi/2:0.01:pi;
+theta1=0:0.01:pi;
+[x,y]=meshgrid(theta1,theta1);
+
+sint12=sin(theta1./2).^2;
+sint22=sin(theta2./2).^2;
+[ksi,eta]=meshgrid(sint12,sint22);
+
+nth1=length(theta1);
+nth2=length(theta2);
+
+omega=0.1:0.01:1;
+n=length(omega);
+for i=1:n
+  c=2*omega(i)/(1+alpha^2);
+  G=abs(1 - c.*(ksi+(alpha^2).*eta ));
+  [gmax,imax]=max(G);
+  [mu(i),jmax]=max(gmax);
+  eta_opt(i)=eta(imax(jmax),jmax);
+  ksi_opt(i)=ksi(imax(jmax),jmax);
+end
+[mu_min,i_min]=min(mu);
+omega_opt=omega(i_min);
+str_title=sprintf('omega = %.3f, mu = %.3f, alpha = %.2f', omega_opt, ...
+                  mu_min, alpha); 
+
+figure
+plot(omega,mu,'o-', omega, ksi_opt, '*-', omega, eta_opt, '^-');
+legend('\mu', '\xi_{opt}', '\eta_{opt}')
+xlabel('\omega'); ylabel('\mu')
+grid on
+title(str_title);
+
+% $$$ 
+% $$$ c=2*omega_opt/(1+alpha^2);
+% $$$ G=1-c.*(ksi+(alpha^2).*eta);
+% $$$ figure
+% $$$ hold off
+% $$$ plot(theta1, G(1,:), 'r', 'LineWidth', 2)
+% $$$ hold on
+% $$$ plot(theta1, G(nth2,:), 'g', 'LineWidth', 2)
+% $$$ for jj=1:20:nth2
+% $$$     plot(theta1, G(jj,:), 'b')
+% $$$ end
+% $$$ xlabel('\theta_1'); ylabel('Amplification Factor for Jacobi')
+% $$$ title(str_title)
+% $$$ grid on
diff --git a/matlab/modes.m b/matlab/modes.m
new file mode 100644
index 0000000..ccb5c06
--- /dev/null
+++ b/matlab/modes.m
@@ -0,0 +1,84 @@
+%
+% @file modes.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+N=8;
+kmodes=N-1;
+
+%%  modes on fine grid %%
+xh=(1/N).*(0:1:N);
+wh=zeros(N+1,kmodes);
+for k=1:kmodes
+    wh(:,k) = sin((k*pi).*xh);
+end
+
+%%  Restriction %%
+R=[
+1 0 0 0 0 0 0 0 0
+0 0.5 1 0.5 0 0 0 0 0
+0 0 0 0.5 1 0.5 0 0 0
+0 0 0 0 0 0.5 1 0.5 0
+0 0 0 0 0 0 0 0 1
+];
+
+%% Null space of Restriction %%
+ns = [
+     0     0     0     0
+     2     0     0     0
+    -1    -1     0     0
+     0     2     0     0
+     0    -1    -1     0
+     0     0     2     0
+     0     0    -1    -1
+     0     0     0     2
+     0     0     0     0];
+
+figure
+subplot(211)
+plot(ns,'o-');
+title('Basis of Null space of Restriction')
+subplot(212)
+plot(R','o-');
+title('Basis of Range of Prolongation')
+
+
+%%  modes on coarse grid %%
+N2h = N/2;
+x2h=(1/N2h).*(0:1:N2h);
+w2h = R*wh;
+
+x=0:0.01:1.;
+figure
+for k=1:kmodes
+    subplot(3,3,k)
+    plot(xh,wh(:,k),'o', x, sin((k*pi).*x),'b-', x2h, w2h(:,k), 'ro-');
+    grid on
+end
+
+figure
+for k=1:N/2
+    subplot(2,2,k)
+    plot(xh,wh(:,k),'o-', xh,wh(:,N-k),'r*-')
+    grid on
+end
diff --git a/matlab/pde1d.m b/matlab/pde1d.m
new file mode 100644
index 0000000..86b6642
--- /dev/null
+++ b/matlab/pde1d.m
@@ -0,0 +1,72 @@
+%
+% @file pde1d.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde1d.h5';
+fprintf(1,'NX     = %d\n', hdf5read(file,'/NX'));
+fprintf(1,'NIDBAS = %d\n', hdf5read(file,'/NIDBAS'));
+fprintf(1,'NGAUSS = %d\n', hdf5read(file,'/NGAUSS'));
+fprintf(1,'KDIFF  = %d\n', hdf5read(file,'/KDIFF'));
+
+x = hdf5read(file, '/XGRID');
+f= hdf5read(file, '/SOLCAL');
+fexact= hdf5read(file, '/SOLANA');
+err=hdf5read(file, '/ERR'); 
+f=f';
+fexact=fexact';
+err=err';
+
+figure
+subplot(311)
+plot(x,f(1,:),'o',x,fexact(1,:))
+xlabel('X')
+ylabel('Function')
+grid on
+subplot(312)
+plot(x,f(2,:),'o',x,fexact(2,:))
+xlabel('X')
+ylabel('1st Derivative')
+grid on
+subplot(313)
+plot(x,f(3,:),'o',x,fexact(3,:))
+xlabel('X')
+ylabel('2nd Derivative')
+grid on
+
+figure
+subplot(311)
+plot(x,err(1,:),'o-')
+xlabel('X');
+ylabel('Errors on function');
+grid on
+subplot(312)
+plot(x,err(2,:),'o-')
+xlabel('X');
+ylabel('Errors on 1st derivative');
+grid on
+subplot(313)
+plot(x,err(3,:),'o-')
+xlabel('X');
+ylabel('Errors on 2nd derivative');
+grid on
diff --git a/matlab/pde1d_eig.m b/matlab/pde1d_eig.m
new file mode 100644
index 0000000..8b39b43
--- /dev/null
+++ b/matlab/pde1d_eig.m
@@ -0,0 +1,30 @@
+%
+% @file pde1d_eig.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde1d_eig.h5';
+
+%%% get sparse matrix and its diagonal elelments in dig %%%
+[mata,diag]=zcsr_mat(file,'/MAT');
+spy(mata,12);
diff --git a/matlab/pde1d_eig_zcsr.m b/matlab/pde1d_eig_zcsr.m
new file mode 100644
index 0000000..c6fa3e7
--- /dev/null
+++ b/matlab/pde1d_eig_zcsr.m
@@ -0,0 +1,30 @@
+%
+% @file pde1d_eig_zcsr.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde1d_eig.h5';
+
+%%% get sparse matrix and its diagonal elelments in dig %%%
+[mata,diag]=zcsr_mat(file,'/MAT');
+spy(mata,12);
diff --git a/matlab/pde1d_eig_zmumps.m b/matlab/pde1d_eig_zmumps.m
new file mode 100644
index 0000000..2a431bd
--- /dev/null
+++ b/matlab/pde1d_eig_zmumps.m
@@ -0,0 +1,55 @@
+%
+% @file pde1d_eig_zmumps.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde1d_eig_zmumps.h5';
+
+%%% Read mumps matrix and convert to Matlab sparse format
+[mata,diag_ele]=zmumps_mat(file,'/MAT');
+n=size(mata,1);
+
+%spy(mata,12);
+arpack_vals=h5Complex_ll(file,'/eig_vals');
+nev=size(arpack_vals,1);
+arpack_vecs = h5Complex_ll(file, '/eig_vecs');
+
+%%% Compute eigen values and vectors, using EIGS
+[V,D,FLAG]=eigs(mata,nev,'SM');
+[eigs_vals,perm]=sort(diag(D));
+eigs_vecs=V(:,perm);
+
+fprintf('Eigenvalues from Arpack and Matlab eigs\n');
+for i=1:nev
+   fprintf('%i (%.5e %.5e), %.5e\n',i,real(arpack_vals(i)),imag(arpack_vals(i)),eigs_vals(i));
+end
+fprintf('Norm of difference %.3e\n', norm(arpack_vals-eigs_vals,Inf));
+
+%%% Renormalize EIGS %%%
+fprintf('\n\nDiff of Eigenvectors from Arpack and Matlab eigs\n');
+for i=1:nev
+    nrm=arpack_vecs(1,i);
+    eigs_vecs(:,i) = (eigs_vecs(:,i)./eigs_vecs(1,i)).*nrm;
+    diff_vecs = norm(arpack_vecs(:,i)-eigs_vecs(:,i),Inf);
+    fprintf('%i %10.3e\n', i, diff_vecs);
+end
diff --git a/matlab/pde1dp.m b/matlab/pde1dp.m
new file mode 100644
index 0000000..97b7a6e
--- /dev/null
+++ b/matlab/pde1dp.m
@@ -0,0 +1,41 @@
+%
+% @file pde1dp.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde1dp.h5';
+fprintf(1,'NX     = %d\n', hdf5read(file,'/NX'));
+fprintf(1,'NIDBAS = %d\n', hdf5read(file,'/NIDBAS'));
+fprintf(1,'NGAUSS = %d\n', hdf5read(file,'/NGAUSS'));
+
+mata=hdf5read(file,'/mata');
+
+xpts = hdf5read(file, '/rhs/x');
+frhs = hdf5read(file, '/rhs/f');
+
+figure
+plot(xpts,frhs);
+xlabel('X')
+ylabel('RHS')
+grid on
+
diff --git a/matlab/pde1dp_cmpl.m b/matlab/pde1dp_cmpl.m
new file mode 100644
index 0000000..e07fc31
--- /dev/null
+++ b/matlab/pde1dp_cmpl.m
@@ -0,0 +1,49 @@
+%
+% @file pde1dp_cmpl.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde1dp_cmpl.h5';
+
+xgrid=hdf5read(file,'/xgrid');
+nx = size(xgrid)-1;
+
+rhs=h5Complex(file, '/rhs');
+sol=h5Complex(file, '/sol');
+%mat=h5Complex(file, '/mat');
+
+x=hdf5read(file,'/x');
+solana=h5Complex(file,'/solana');
+solcal=h5Complex(file,'/solcal');
+err=hdf5read(file, '/err');
+
+figure
+subplot(211)
+plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ...
+     imag(solcal), '*')
+legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag')
+xlabel('X'); ylabel('SOL');
+subplot(212)
+plot(x, err, 'o-');
+xlabel('X'); ylabel('|Error|')
+grid on
diff --git a/matlab/pde1dp_cmpl_dft.m b/matlab/pde1dp_cmpl_dft.m
new file mode 100644
index 0000000..10aa61a
--- /dev/null
+++ b/matlab/pde1dp_cmpl_dft.m
@@ -0,0 +1,70 @@
+%
+% @file pde1dp_cmpl_dft.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde1dp_cmpl_dft.h5';
+
+xgrid=hdf5read(file,'/xgrid');
+nx = size(xgrid)-1;
+mode=0:nx-1;
+
+rhs=h5Complex(file, '/rhs');
+sol=h5Complex(file, '/sol');
+rhs_fft=h5Complex(file, '/rhs_fft');
+sol_fft=h5Complex(file, '/sol_fft');
+mat=h5Complex(file, '/mat');
+
+x=hdf5read(file,'/x');
+solana=h5Complex(file,'/solana');
+solcal=h5Complex(file,'/solcal');
+err=hdf5read(file, '/err');
+
+figure
+subplot(211)
+plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ...
+     imag(solcal), '*')
+legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag')
+xlabel('X'); ylabel('SOL');
+subplot(212)
+plot(x, err, 'o-');
+xlabel('X'); ylabel('|Error|')
+grid on
+
+figure
+subplot(311)
+plot(mode, real(mat), 'o', mode, imag(mat), '*')
+xlabel('mode'); ylabel('DFT of MAT')
+legend('Real', 'Imag')
+grid on
+
+subplot(312)
+plot(mode, real(rhs_fft), 'o', mode, imag(rhs_fft), '*')
+xlabel('mode'); ylabel('DFT of RHS')
+legend('Real', 'Imag')
+grid on
+subplot(313)
+plot(mode, real(sol_fft), 'o', mode, imag(sol_fft), '*')
+xlabel('mode'); ylabel('DFT of SOL')
+legend('Real', 'Imag')
+grid on
diff --git a/matlab/pde1dp_cmpl_pardiso.m b/matlab/pde1dp_cmpl_pardiso.m
new file mode 100644
index 0000000..1878e8e
--- /dev/null
+++ b/matlab/pde1dp_cmpl_pardiso.m
@@ -0,0 +1,72 @@
+%
+% @file pde1dp_cmpl_pardiso.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde1dp_cmpl_pardiso.h5';
+
+xgrid=hdf5read(file,'/xgrid');
+nx = size(xgrid)-1;
+
+rhs=h5Complex(file, '/rhs');
+sol=h5Complex(file, '/sol');
+
+cols=hdf5read(file, '/MAT/cols');
+irow=hdf5read(file, '/MAT/irow');
+val=h5Complex(file, '/MAT/val');
+perm=hdf5read(file, '/MAT/perm');
+
+cols=double(cols);
+irow=double(irow);
+perm=double(perm);
+n = size(perm,1);
+nnz=size(val,1);
+
+rows = zeros(nnz,1);
+for i=1:n
+    s = irow(i);
+    e = irow(i+1)-1;
+    rows(s:e) = i;
+end
+mat = sparse(rows,cols,val);
+figure
+subplot(121);
+spy(mat);
+subplot(122);
+spy(mat(perm,perm));
+
+x=hdf5read(file,'/x');
+solana=h5Complex(file,'/solana');
+solcal=h5Complex(file,'/solcal');
+err=hdf5read(file, '/err');
+
+figure
+subplot(211)
+plot(x, real(solana),x,imag(solana),x,real(solcal),'o',x, ...
+     imag(solcal), '*')
+legend('Exact Real', 'Exact Imag', 'Calc. Real', 'Calc. Imag')
+xlabel('X'); ylabel('SOL');
+subplot(212)
+plot(x, err, 'o-');
+xlabel('X'); ylabel('|Error|')
+grid on
diff --git a/matlab/pde2d.m b/matlab/pde2d.m
new file mode 100644
index 0000000..c375cef
--- /dev/null
+++ b/matlab/pde2d.m
@@ -0,0 +1,85 @@
+%
+% @file pde2d.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde2d.h5';
+%
+%     Get data from data sets
+%
+r=hdf5read(file,'/xgrid');
+t=hdf5read(file,'/ygrid');
+sol=hdf5read(file,'/sol')';
+solexact=hdf5read(file,'/solana')';
+err=hdf5read(file,'/errors')';
+solr=hdf5read(file,'/derivx')';
+solt=hdf5read(file,'/derivy')';
+%
+%     Attributes
+%
+NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
+NIDBAS1=hdf5read(file,'/NIDBAS1');
+NIDBAS2=hdf5read(file,'/NIDBAS2');
+MBESS=hdf5read(file,'/MBESS');
+LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
+              NIDBAS1, NIDBAS2, MBESS);
+
+attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
+attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
+attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
+attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
+attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
+
+[R,T]=meshgrid(r,t);
+x = R.*cos(T); y= R.*sin(T);
+solx = cos(T).*solr - sin(T)./R.*solt;
+soly = sin(T).*solr + cos(T)./R.*solt;
+
+figure
+subplot(221)
+pcolor(double(r),double(t),double(sol));
+shading interp
+hold on, quiver(r,t,solr,solt)
+xlabel(x_ann); ylabel(y_ann)
+title(LABEL)
+colorbar
+
+subplot(222)
+pcolor(double(x),double(y),double(sol))
+shading interp
+hold on, quiver(x,y,solx,soly)
+hold off, axis image
+xlabel('X'); ylabel('Y')
+title('X-Y plane')
+colorbar
+
+subplot(223)
+surfc(double(x),double(y),double(sol))
+xlabel('X'); ylabel('Y');
+title(sol_ann)
+
+subplot(224)
+surfc(double(x),double(y),double(err))
+xlabel('X'); ylabel('Y');
+title(err_ann)
+
diff --git a/matlab/pde2d_mumps.m b/matlab/pde2d_mumps.m
new file mode 100644
index 0000000..268f2ed
--- /dev/null
+++ b/matlab/pde2d_mumps.m
@@ -0,0 +1,97 @@
+%
+% @file pde2d_mumps.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde2d_mumps.h5';
+
+nr=hdf5read(file,'/', 'NX'); 
+nth=hdf5read(file,'/', 'NY');
+NIDBAS1=hdf5read(file,'/','NIDBAS1');
+NIDBAS2=hdf5read(file,'/','NIDBAS2');
+MBESS=hdf5read(file,'/','MBESS');
+
+r=hdf5read(file,'/xgrid');
+t=hdf5read(file,'/ygrid');
+sol=hdf5read(file,'/sol')';
+solexact=hdf5read(file,'/solana')';
+err=hdf5read(file,'/errors')';
+solr=hdf5read(file,'/derivx')';
+solt=hdf5read(file,'/derivy')';
+LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', nr, nth, ...
+              NIDBAS1, NIDBAS2, MBESS);
+
+figure
+subplot(211)
+plot(r, sol(nth/2,:), 'o', r, solexact(nth/2,:))
+xlabel('r')
+grid on
+title(LABEL)
+subplot(212)
+if MBESS == 0
+    plot(t, sol(:,1), 'o', t, solexact(:,1))
+else
+    plot(t, sol(:,nr/2), 'o', t, solexact(:,nr/2))
+end
+xlabel('\theta')
+grid on
+
+
+
+
+% $$$ if verLessThan('matlab', '7.9');
+% $$$     n = hdf5read(file,'/MAT/RANK');
+% $$$     nnz = hdf5read(file,'/MAT/NNZ');
+% $$$     nlsym = hdf5read(file,'/MAT/NLSYM');
+% $$$ else
+% $$$     n = hdf5read(file,'/MAT/', 'RANK');
+% $$$     nnz = hdf5read(file,'/MAT/', 'NNZ');
+% $$$     nlsym = hdf5read(file,'/MAT/', 'NLSYM');
+% $$$ end
+% $$$ 
+% $$$ cols=hdf5read(file, '/MAT/cols');
+% $$$ irow=hdf5read(file, '/MAT/irow');
+% $$$ val=hdf5read(file, '/MAT/val');
+% $$$ perm=hdf5read(file, '/MAT/perm');
+% $$$ 
+% $$$ rows = zeros(nnz,1);
+% $$$ cols=double(cols);
+% $$$ irow=double(irow);
+% $$$ perm=double(perm);
+% $$$ 
+% $$$ for i=1:n
+% $$$     s = irow(i);
+% $$$     e = irow(i+1)-1;
+% $$$     rows(s:e) = i;    
+% $$$ end
+% $$$ 
+% $$$ mat = sparse(rows,cols,val);
+% $$$ figure
+% $$$ subplot(121);
+% $$$ spy(mat(perm,perm));
+% $$$ title('Matrix structure')
+% $$$ subplot(122);
+% $$$ spy(chol(mat(perm,perm)));
+% $$$ title('Factor L^T')
+
+
diff --git a/matlab/pde2d_nh.m b/matlab/pde2d_nh.m
new file mode 100644
index 0000000..cd12593
--- /dev/null
+++ b/matlab/pde2d_nh.m
@@ -0,0 +1,101 @@
+%
+% @file pde2d_nh.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde2d_nh.h5';
+%
+%     Get data from data sets
+%
+r=hdf5read(file,'/xgrid');
+t=hdf5read(file,'/ygrid');
+sol=hdf5read(file,'/sol')';
+solexact=hdf5read(file,'/solana')';
+err=hdf5read(file,'/errors')';
+errx=hdf5read(file,'/errors_x')';
+erry=hdf5read(file,'/errors_y')';
+solr=hdf5read(file,'/derivx')';
+solt=hdf5read(file,'/derivy')';
+%
+%     Attributes
+%
+NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
+NIDBAS1=hdf5read(file,'/NIDBAS1');
+NIDBAS2=hdf5read(file,'/NIDBAS2');
+MBESS=hdf5read(file,'/MBESS');
+LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
+              NIDBAS1, NIDBAS2, MBESS);
+
+attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
+attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
+attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
+attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
+attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
+
+[R,T]=meshgrid(r,t);
+x = R.*cos(T); y= R.*sin(T);
+solx = cos(T).*solr - sin(T)./R.*solt;
+soly = sin(T).*solr + cos(T)./R.*solt;
+
+figure
+subplot(221)
+pcolor(double(r),double(t),double(sol));
+shading interp
+hold on, quiver(r,t,solr,solt)
+xlabel(x_ann); ylabel(y_ann)
+title(LABEL)
+colorbar
+
+subplot(222)
+pcolor(double(x),double(y),double(sol))
+shading interp
+hold on, quiver(x,y,solx,soly)
+hold off, axis image
+xlabel('X'); ylabel('Y')
+title('X-Y plane')
+colorbar
+
+subplot(223)
+surfc(double(x),double(y),double(sol))
+xlabel('X'); ylabel('Y');
+title(sol_ann)
+
+subplot(224)
+surfc(double(x),double(y),double(err))
+xlabel('X'); ylabel('Y');
+title(err_ann)
+
+figure
+subplot(311)
+plot(t,err(:,NR+1),'o-')
+xlabel('\theta'); ylabel('Error on solution')
+grid on
+title('Error on Boundary r=1');
+subplot(312)
+plot(t,errx(:,NR+1),'o-')
+xlabel('\theta'); ylabel('Error on x-derivative')
+grid on
+subplot(313)
+plot(t,erry(:,NR+1),'o-')
+xlabel('\theta'); ylabel('Error on y-derivative')
+grid on
diff --git a/matlab/pde2d_pardiso.m b/matlab/pde2d_pardiso.m
new file mode 100644
index 0000000..1ec3ae3
--- /dev/null
+++ b/matlab/pde2d_pardiso.m
@@ -0,0 +1,63 @@
+%
+% @file pde2d_pardiso.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde2d_pardiso.h5';
+
+if verLessThan('matlab', '7.9');
+    n = hdf5read(file,'/MAT/RANK');
+    nnz = hdf5read(file,'/MAT/NNZ');
+    nlsym = hdf5read(file,'/MAT/NLSYM');
+else
+    n = hdf5read(file,'/MAT/', 'RANK');
+    nnz = hdf5read(file,'/MAT/', 'NNZ');
+    nlsym = hdf5read(file,'/MAT/', 'NLSYM');
+end
+
+cols=hdf5read(file, '/MAT/cols');
+irow=hdf5read(file, '/MAT/irow');
+val=hdf5read(file, '/MAT/val');
+perm=hdf5read(file, '/MAT/perm');
+
+rows = zeros(nnz,1);
+cols=double(cols);
+irow=double(irow);
+perm=double(perm);
+
+for i=1:n
+    s = irow(i);
+    e = irow(i+1)-1;
+    rows(s:e) = i;    
+end
+
+mat = sparse(rows,cols,val);
+figure
+subplot(121);
+spy(mat(perm,perm));
+title('Matrix structure')
+subplot(122);
+spy(chol(mat(perm,perm)));
+title('Factor L^T')
+
+
diff --git a/matlab/pde2d_sym_pardiso.m b/matlab/pde2d_sym_pardiso.m
new file mode 100644
index 0000000..7eb2c8f
--- /dev/null
+++ b/matlab/pde2d_sym_pardiso.m
@@ -0,0 +1,136 @@
+%
+% @file pde2d_sym_pardiso.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde2d_sym_pardiso.h5';
+%
+%     Get data from data sets
+%
+r=hdf5read(file,'/xgrid');
+t=hdf5read(file,'/ygrid');
+sol=hdf5read(file,'/sol')';
+solexact=hdf5read(file,'/solana')';
+err=hdf5read(file,'/errors')';
+solr=hdf5read(file,'/derivx')';
+solt=hdf5read(file,'/derivy')';
+%
+%     Attributes
+%
+if verLessThan('matlab', '7.9');
+    NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
+    NIDBAS1=hdf5read(file,'/NIDBAS1');
+    NIDBAS2=hdf5read(file,'/NIDBAS2');
+    MBESS=hdf5read(file,'/MBESS');
+    EPSI=hdf5read(file,'/EPSI');
+    attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
+    attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
+    attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
+    attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
+    attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
+else
+    NR=hdf5read(file,'/','NX'); NTH=hdf5read(file,'/','NY');
+    NIDBAS1=hdf5read(file,'/','NIDBAS1');
+    NIDBAS2=hdf5read(file,'/','NIDBAS2');
+    MBESS=hdf5read(file,'/','MBESS');
+    EPSI=hdf5read(file,'/','EPSI');
+    attr=hdf5read(file,'/xgrid/','title'); x_ann=attr.Data;
+    attr=hdf5read(file,'/ygrid/','title'); y_ann=attr.Data;
+    attr=hdf5read(file,'/sol/','title'); sol_ann=attr.Data;
+    attr=hdf5read(file,'/solana/','title'); solexact_ann=attr.Data;
+    attr=hdf5read(file,'/errors/','title');err_ann=attr.Data;
+end
+LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d, epsi = %3.2f', ...
+              NR, NTH, NIDBAS1, NIDBAS2, MBESS, EPSI);
+
+
+[R,T]=meshgrid(r,t);
+x = R.*cos(T); y= R.*sin(T);
+solx = cos(T).*solr - sin(T)./R.*solt;
+soly = sin(T).*solr + cos(T)./R.*solt;
+
+figure
+subplot(221)
+pcolor(double(r),double(t),double(sol));
+shading interp
+hold on, quiver(r,t,solr,solt)
+xlabel(x_ann); ylabel(y_ann)
+title(LABEL)
+colorbar
+
+subplot(222)
+pcolor(double(x),double(y),double(sol))
+shading interp
+hold on, quiver(x,y,solx,soly)
+hold off, axis image
+xlabel('X'); ylabel('Y')
+title('X-Y plane')
+colorbar
+
+subplot(223)
+surfc(double(x),double(y),double(sol))
+xlabel('X'); ylabel('Y');
+title(sol_ann)
+
+subplot(224)
+surfc(double(x),double(y),double(err))
+xlabel('X'); ylabel('Y');
+title(err_ann)
+
+
+
+if verLessThan('matlab', '7.9');
+    n = hdf5read(file,'/MAT/RANK');
+    nnz = hdf5read(file,'/MAT/NNZ');
+    nlsym = hdf5read(file,'/MAT/NLSYM');
+else
+    n = hdf5read(file,'/MAT/', 'RANK');
+    nnz = hdf5read(file,'/MAT/', 'NNZ');
+    nlsym = hdf5read(file,'/MAT/', 'NLSYM');
+end
+
+cols=hdf5read(file, '/MAT/cols');
+irow=hdf5read(file, '/MAT/irow');
+val=hdf5read(file, '/MAT/val');
+perm=hdf5read(file, '/MAT/perm');
+
+rows = zeros(nnz,1);
+cols=double(cols);
+irow=double(irow);
+perm=double(perm);
+
+for i=1:n
+    s = irow(i);
+    e = irow(i+1)-1;
+    rows(s:e) = i;    
+end
+
+mat = sparse(rows,cols,val);
+figure
+   subplot(121)
+   spy(mat)
+   title('Original Matrix structure')
+   subplot(122)
+   spy(mat(perm,perm))
+   title('Permuted Matrix structure')
+
diff --git a/matlab/pde2d_sym_pardiso_dft.m b/matlab/pde2d_sym_pardiso_dft.m
new file mode 100644
index 0000000..5b0f948
--- /dev/null
+++ b/matlab/pde2d_sym_pardiso_dft.m
@@ -0,0 +1,171 @@
+%
+% @file pde2d_sym_pardiso_dft.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+mat_disp=1;
+
+file='pde2d_sym_pardiso_dft.h5';
+
+if verLessThan('matlab', '7.9');
+    n = hdf5read(file,'/MAT1/RANK');
+    nnz = hdf5read(file,'/MAT1/NNZ');
+    nlsym = hdf5read(file,'/MAT1/NLSYM');
+    NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
+    NIDBAS1=hdf5read(file,'/NIDBAS1');
+    NIDBAS2=hdf5read(file,'/NIDBAS2');
+    MBESS=hdf5read(file,'/MBESS');
+    EPSI=hdf5read(file,'/EPSI');
+    KMIN=hdf5read(file,'/KMIN');
+    KMAX=hdf5read(file,'/KMAX');
+else
+    n = hdf5read(file,'/MAT1/', 'RANK');
+    nnz = hdf5read(file,'/MAT1/', 'NNZ');
+    nlsym = hdf5read(file,'/MAT1/', 'NLSYM');
+    NR=hdf5read(file,'/', 'NX'); NTH=hdf5read(file,'/', 'NY');
+    NIDBAS1=hdf5read(file,'/', 'NIDBAS1');
+    NIDBAS2=hdf5read(file,'/', 'NIDBAS2');
+    MBESS=hdf5read(file,'/', 'MBESS');
+    EPSI=hdf5read(file,'/','EPSI');
+    KMIN=hdf5read(file,'/','KMIN');
+    KMAX=hdf5read(file,'/','KMAX');
+end
+LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d, epsi = %3.2f', NR, NTH, ...
+              NIDBAS1, NIDBAS2, MBESS, EPSI);
+DK = KMAX-KMIN+1;
+
+attr=hdf5read(file,'/xgrid/','title'); x_ann=attr.Data;
+attr=hdf5read(file,'/ygrid/','title'); y_ann=attr.Data;
+attr=hdf5read(file,'/sol/','title'); sol_ann=attr.Data;
+attr=hdf5read(file,'/solana/','title'); solexact_ann=attr.Data;
+attr=hdf5read(file,'/errors/','title');err_ann=attr.Data;
+
+if mat_disp == 1
+    cols=hdf5read(file, '/MAT1/cols');
+    irow=hdf5read(file, '/MAT1/irow');
+    val=h5Complex(file, '/MAT1/val');
+    perm=hdf5read(file, '/MAT1/perm');
+
+    rows = zeros(nnz,1);
+    cols=double(cols);
+    irow=double(irow);
+    perm=double(perm);
+
+    for i=1:n
+        s = irow(i);
+        e = irow(i+1)-1;
+        rows(s:e) = i;    
+    end
+
+    valr=real(val); vali=imag(val);
+    mat = sparse(rows,cols,valr);
+    figure
+    subplot(121)
+    spy(mat,8)
+    title('Original Matrix structure')
+    subplot(122)
+    spy(mat(perm,perm),8)
+    title('Permuted Matrix structure')
+end
+
+r=hdf5read(file,'/xgrid');
+t=hdf5read(file,'/ygrid');
+sol=hdf5read(file,'/sol')';
+solexact=hdf5read(file,'/solana')';
+err=hdf5read(file,'/errors')';
+solr=hdf5read(file,'/derivx')';
+solt=hdf5read(file,'/derivy')';
+
+[R,T]=meshgrid(r,t);
+x = R.*cos(T); y= R.*sin(T);
+solx = cos(T).*solr - sin(T)./R.*solt;
+soly = sin(T).*solr + cos(T)./R.*solt;
+
+figure
+subplot(221)
+pcolor(r,t,sol);
+shading interp
+hold on, quiver(r,t,solr,solt)
+xlabel(x_ann); ylabel(y_ann)
+title(LABEL)
+colorbar
+
+subplot(222)
+pcolor(x,y,sol)
+shading interp
+hold on, quiver(x,y,solx,soly)
+hold off, axis image
+xlabel('X'); ylabel('Y')
+title('X-Y plane')
+colorbar
+
+subplot(223)
+surfc(x,y,sol)
+xlabel('X'); ylabel('Y');
+title(sol_ann)
+
+subplot(224)
+surfc(x,y,err)
+xlabel('X'); ylabel('Y');
+title(err_ann)
+
+figure
+ft_sol=h5Complex(file,'/FT_SOL');
+ft_sol=reshape(ft_sol,DK,[]);
+m=[KMIN:KMAX]; sp=1:NR+NIDBAS1;
+subplot(121)
+  stem3(sp,m, real(ft_sol), 'filled')
+  shading interp
+  xlabel('Radial spline number'); ylabel('m')
+  title('Real(\phi)')
+subplot(122)
+  stem3(sp,m, imag(ft_sol),'filled')
+  shading interp
+  xlabel('Radial spline number'); ylabel('m')
+  title('Imag(\phi)')
+
+figure
+ft_rhs=h5Complex(file,'/FT_RHS');
+ft_rhs=reshape(ft_rhs,DK,[]);
+m=[KMIN:KMAX]; sp=1:NR+NIDBAS1;
+subplot(121)
+  stem3(sp,m, real(ft_rhs), 'filled')
+  shading interp
+  xlabel('Radial spline number'); ylabel('m')
+  title('Real(\rho)')
+subplot(122)
+  stem3(sp,m, imag(ft_rhs),'filled')
+  shading interp
+  xlabel('Radial spline number'); ylabel('m')
+  title('Imag(\rho)')
+
+figure
+energy_k = h5Complex(file,'/ENERGY_K');
+subplot(211)
+stem(m, real(energy_k));
+xlabel('m'); ylabel('Real(\phi)'); title('Spectral energy')
+subplot(212)
+stem(m, imag(energy_k));
+xlabel('m'); ylabel('Imag(\phi)');  title('Spectral energy')
+
+  
diff --git a/matlab/pde3d.m b/matlab/pde3d.m
new file mode 100644
index 0000000..f8cb948
--- /dev/null
+++ b/matlab/pde3d.m
@@ -0,0 +1,124 @@
+%
+% @file pde3d.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='pde3d.h5';
+%
+%     Get data from data sets
+%
+x=hdf5read(file,'/xgrid');
+y=hdf5read(file,'/ygrid');
+z=hdf5read(file,'/zgrid');
+%
+nx=size(x);
+ny=size(y);
+nz=size(z);
+%
+rhs=hdf5read(file,'/RHS');
+coefs=hdf5read(file,'/SOL');
+bcoef=hdf5read(file,'/BCOEF');
+sol=hdf5read(file,'/sol');
+solexact=hdf5read(file,'/solana');
+solx=hdf5read(file,'/derivx');
+soly=hdf5read(file,'/derivy');
+solz=hdf5read(file,'/derivz');
+solx_exact=hdf5read(file,'/derivx_exact');
+soly_exact=hdf5read(file,'/derivy_exact');
+solz_exact=hdf5read(file,'/derivz_exact');
+
+figure
+k=ceil(nz(1)/2);
+subplot(211);
+pcolor(x,y,sol(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated solution')
+colorbar
+
+subplot(212);
+pcolor(x,y,solexact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical solution')
+colorbar
+
+figure
+err=sol-solexact;
+pcolor(x,y,err(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Discretization error')
+colorbar
+
+figure
+subplot(211);
+pcolor(x,y,solx(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated d/dx')
+colorbar
+
+subplot(212);
+pcolor(x,y,solx_exact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical d/dx')
+colorbar
+
+figure
+k=ceil(nz(1)/2);
+subplot(211);
+pcolor(x,y,soly(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated d/dy')
+colorbar
+
+subplot(212);
+pcolor(x,y,soly_exact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical d/dy')
+colorbar
+
+figure
+k=ceil(nz(1)/6);
+subplot(211);
+pcolor(x,y,solz(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated d/dz')
+colorbar
+
+subplot(212);
+pcolor(x,y,solz_exact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical d/dz')
+colorbar
+
+figure
+i=ceil(nx(1)/2);
+plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r')
+xlabel('z');
diff --git a/matlab/poisson_fe.m b/matlab/poisson_fe.m
new file mode 100644
index 0000000..92012f6
--- /dev/null
+++ b/matlab/poisson_fe.m
@@ -0,0 +1,123 @@
+%
+% @file poisson_fe.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='poisson_fe.h5';
+%
+nx=h5readatt(file,'/','NX');
+ny=h5readatt(file,'/','NY');
+kx=h5readatt(file,'/','KX');
+ky=h5readatt(file,'/','KY');
+nidbas1=h5readatt(file,'/','NIDBAS1');
+nidbas2=h5readatt(file,'/','NIDBAS2');
+relax=h5readatt(file,'/','RELAX');
+nlevels=h5readatt(file,'/','LEVELS');
+nu1=h5readatt(file,'/','NU1');
+nu2=h5readatt(file,'/','NU2');
+mu=h5readatt(file,'/','MU');
+title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ...
+                   'nu2=%d, mu=%d, LEVELS=%d, KX=%d, KY=%d'], ...
+                  nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,kx,ky);
+
+
+%
+%   Prolongation matrices at the coarsest grid
+%
+levels=nlevels;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/matpx');
+matpx=h5read(file,dset);
+dset=strcat(mglevel,'/matpy');
+matpy=h5read(file,dset);
+
+%
+%   FE matrix at the finest grid
+%
+levels=1;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/mata');
+[mata,diag]=csr_mat(file,dset);
+
+f=h5read(file,strcat(mglevel,'/f'));
+v=h5read(file,strcat(mglevel,'/v'));
+f1d=h5read(file,strcat(mglevel,'/f1d'));
+v1d=h5read(file,strcat(mglevel,'/v1d'));
+% $$$ figure
+% $$$ spy(mata)
+
+%
+%    Solutions at the finest grid
+%
+x=h5read(file,'/solutions/xg');
+y=h5read(file,'/solutions/yg');
+dense=h5read(file,'/solutions/dense');
+sol_anal=h5read(file,'/solutions/anal');
+sol_calc=h5read(file,'/solutions/calc');
+sol_direct=h5read(file,'/solutions/direct');
+nx=int32(size(x,1));
+ny=int32(size(y,1));
+
+figure
+surf(x,y,sol_direct'-sol_anal')
+xlabel('X'); ylabel('Y');
+title('Error on the finest grid')
+
+figure
+subplot(211)
+[yy,iy] = max(abs(sol_anal),[],2);
+[xx,ix] = max(yy);
+iy0=iy(ix);
+str=sprintf('Solution at y = %.4f', y(iy0));
+plot(x, sol_anal(:,iy0),x, sol_direct(:,iy0),'o')
+xlabel('x'); ylabel(str);
+grid on
+legend('Analytic Solution', 'Direct Solution')
+title(title_str)
+subplot(212)
+[xx,ix] = max(abs(sol_anal));
+[yy,iy] = max(xx);
+ix0=ix(iy);
+str=sprintf('Solution at x = %.4f', x(ix0));
+plot(y, sol_anal(ix0,:),y, sol_direct(ix0,:),'o')
+xlabel('y'); ylabel(str);
+grid on
+title(title_str)
+
+%
+%   Iterations
+%
+dset='/Iterations/';
+disc_err=h5read(file, strcat(dset,'disc_errors'));
+resid=h5read(file, strcat(dset,'residues'));
+its=0:1:size(resid,1)-1;
+figure
+subplot(211)
+semilogy(its,resid,'o-')
+grid on
+xlabel('Iterations'); ylabel('Norm of residue');
+title(title_str);
+subplot(212)
+semilogy(its,disc_err,'h-')
+grid on
+xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/poisson_mg.m b/matlab/poisson_mg.m
new file mode 100644
index 0000000..e407dd6
--- /dev/null
+++ b/matlab/poisson_mg.m
@@ -0,0 +1,140 @@
+%
+% @file poisson_mg.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+if ~exist('file'), file='poisson_mg.h5'; end
+%
+nx=h5readatt(file,'/','NX');
+ny=h5readatt(file,'/','NY');
+lx=h5readatt(file,'/','LX');
+ly=h5readatt(file,'/','LY');
+kx=h5readatt(file,'/','KX');
+ky=h5readatt(file,'/','KY');
+beta=h5readatt(file,'/','BETA');
+omega=h5readatt(file,'/','OMEGA');
+relax=h5readatt(file,'/','RELAX');
+mat_type=h5readatt(file,'/','MAT_TYPE');
+nlevels=h5readatt(file,'/','LEVELS');
+mu=h5readatt(file,'/','MU');
+nnu=h5readatt(file,'/','NNU');
+nu1=h5read(file,'/nu1');
+nu2=h5read(file,'/nu2');
+title_str=sprintf(['N=(%d,%d), Lx=%d, Ly=%d, beta=%.4f, relax=%s, V(%d,%d), ' ...
+                   'LEVELS=%d, KX=%d, KY=%d'], ...
+                  nx,ny,lx,ly,beta,relax,nu1(nnu),nu2(nnu),nlevels,kx,ky);
+
+
+%
+%   Prolongation matrices at the coarsest grid
+%
+levels=nlevels;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/matpx');
+matpx=csr_mat(file,dset);
+dset=strcat(mglevel,'/matpy');
+matpy=csr_mat(file,dset);
+
+%
+%   FE matrix at the finest grid
+%
+levels=1;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/mata');
+if mat_type == 'csr'
+    [mata,diag]=csr_mat(file,dset);
+else
+    [mata,diag]=cds_mat(file,dset);
+end
+n=size(diag,1);
+
+% $$$ figure
+% $$$ spy(mata)
+%
+%    Spectral radius of GS Iteration Matrix
+%         Rg = -(D+L)^(-1) * U
+%
+% $$$ matl= tril(mata,0); % D+L
+% $$$ lambda = eigs(-triu(mata,1),matl)
+% $$$ fprintf(1, 'Spectral Radius of GS relaxation matrix = %g\n', max(abs(lambda)))
+% $$$ figure
+% $$$ plot(lambda, 'o')  
+% $$$ axis equal
+% $$$ grid on
+% $$$ xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues')
+% $$$ title(title_str)
+%
+%    Solutions at the finest grid
+%
+dense=h5read(file,'/dense');
+x=h5read(file,'/solutions/xg');
+y=h5read(file,'/solutions/yg');
+sol_anal=h5read(file,'/solutions/anal');
+%sol_direct=h5read(file,'/solutions/direct');
+sol_calc=h5read(file,'/solutions/calc');
+
+figure
+% $$$ surf(x,y,sol_calc'-sol_anal')
+pcolor(x,y,sol_calc'-sol_anal')
+shading interp
+colorbar
+xlabel('X'); ylabel('Y'); zlabel('Error');
+title(title_str)
+
+figure
+subplot(211)
+[yy,iy] = max(abs(sol_anal),[],2);
+[xx,ix] = max(yy);
+iy0=iy(ix);
+str=sprintf('Solution at y = %.4f', y(iy0));
+plot(x, sol_anal(:,iy0),x, sol_calc(:,iy0),'o')
+xlabel('x'); ylabel(str);
+grid on
+legend('Analytic Solution', 'MG Solution')
+title(title_str)
+subplot(212)
+[xx,ix] = max(abs(sol_anal));
+[yy,iy] = max(xx);
+ix0=ix(iy);
+str=sprintf('Solution at x = %.4f', x(ix0));
+plot(y, sol_anal(ix0,:),y, sol_calc(ix0,:),'o')
+xlabel('y'); ylabel(str);
+grid on
+title(title_str)
+%
+%   Iterations
+%
+dset='/Iterations/';
+disc_err=h5read(file, strcat(dset,'disc_errors'));
+resid=h5read(file, strcat(dset,'residues'));
+its=0:1:size(resid,1)-1;
+figure
+subplot(211)
+semilogy(its,resid,'o-')
+grid on
+xlabel('Iterations'); ylabel('Norm of residue');
+title(title_str);
+subplot(212)
+semilogy(its,disc_err,'h-')
+grid on
+xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/ppde3d.m b/matlab/ppde3d.m
new file mode 100644
index 0000000..ed333f2
--- /dev/null
+++ b/matlab/ppde3d.m
@@ -0,0 +1,145 @@
+%
+% @file ppde3d.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='ppde3d.h5';
+%
+%     Get data from data sets
+%
+x=hdf5read(file,'/xgrid');
+y=hdf5read(file,'/ygrid');
+z=hdf5read(file,'/zgrid');
+%
+nx=size(x);
+ny=size(y);
+nz=size(z);
+%
+sol=hdf5read(file,'/sol');
+solexact=hdf5read(file,'/solana');
+solx=hdf5read(file,'/derivx');
+soly=hdf5read(file,'/derivy');
+solz=hdf5read(file,'/derivz');
+solx_exact=hdf5read(file,'/derivx_exact');
+soly_exact=hdf5read(file,'/derivy_exact');
+solz_exact=hdf5read(file,'/derivz_exact');
+
+figure
+k=ceil(nz(1)/2);
+subplot(311);
+pcolor(x,y,sol(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated solution')
+colorbar
+
+subplot(312);
+pcolor(x,y,solexact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical solution')
+colorbar
+
+subplot(313);
+err=sol-solexact;
+pcolor(x,y,err(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Discretization error')
+colorbar
+
+figure
+subplot(311);
+pcolor(x,y,solx(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated d/dx')
+colorbar
+
+subplot(312);
+pcolor(x,y,solx_exact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical d/dx')
+colorbar
+
+subplot(313);
+err=solx-solx_exact;
+pcolor(x,y,err(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Discretization error')
+colorbar
+
+figure
+k=ceil(nz(1)/2);
+subplot(311);
+pcolor(x,y,soly(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated d/dy')
+colorbar
+
+subplot(312);
+pcolor(x,y,soly_exact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical d/dy')
+colorbar
+
+subplot(313);
+err=soly-soly_exact;
+pcolor(x,y,err(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Discretization error')
+colorbar
+
+figure
+k=ceil(nz(1)/6);
+subplot(311);
+pcolor(x,y,solz(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated d/dz')
+colorbar
+
+subplot(312);
+pcolor(x,y,solz_exact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical d/dz')
+colorbar
+
+subplot(313);
+err=solz-solz_exact;
+pcolor(x,y,err(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Discretization error')
+colorbar
+
+figure
+i=ceil(nx(1)/2);
+plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r')
+xlabel('z');
diff --git a/matlab/ppde3d_pb.m b/matlab/ppde3d_pb.m
new file mode 100644
index 0000000..0702db8
--- /dev/null
+++ b/matlab/ppde3d_pb.m
@@ -0,0 +1,145 @@
+%
+% @file ppde3d_pb.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='ppde3d_pb.h5';
+%
+%     Get data from data sets
+%
+x=hdf5read(file,'/xgrid');
+y=hdf5read(file,'/ygrid');
+z=hdf5read(file,'/zgrid');
+%
+nx=size(x);
+ny=size(y);
+nz=size(z);
+%
+sol=hdf5read(file,'/sol');
+solexact=hdf5read(file,'/solana');
+solx=hdf5read(file,'/derivx');
+soly=hdf5read(file,'/derivy');
+solz=hdf5read(file,'/derivz');
+solx_exact=hdf5read(file,'/derivx_exact');
+soly_exact=hdf5read(file,'/derivy_exact');
+solz_exact=hdf5read(file,'/derivz_exact');
+
+figure
+k=ceil(nz(1)/2);
+subplot(311);
+pcolor(x,y,sol(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated solution')
+colorbar
+
+subplot(312);
+pcolor(x,y,solexact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical solution')
+colorbar
+
+subplot(313);
+err=sol-solexact;
+pcolor(x,y,err(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Discretization error')
+colorbar
+
+figure
+subplot(311);
+pcolor(x,y,solx(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated d/dx')
+colorbar
+
+subplot(312);
+pcolor(x,y,solx_exact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical d/dx')
+colorbar
+
+subplot(313);
+err=solx-solx_exact;
+pcolor(x,y,err(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Discretization error')
+colorbar
+
+figure
+k=ceil(nz(1)/2);
+subplot(311);
+pcolor(x,y,soly(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated d/dy')
+colorbar
+
+subplot(312);
+pcolor(x,y,soly_exact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical d/dy')
+colorbar
+
+subplot(313);
+err=soly-soly_exact;
+pcolor(x,y,err(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Discretization error')
+colorbar
+
+figure
+k=ceil(nz(1)/6);
+subplot(311);
+pcolor(x,y,solz(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Caculated d/dz')
+colorbar
+
+subplot(312);
+pcolor(x,y,solz_exact(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Analytical d/dz')
+colorbar
+
+subplot(313);
+err=solz-solz_exact;
+pcolor(x,y,err(:,:,k)');
+shading interp
+xlabel('x'); ylabel('y')
+title('Discretization error')
+colorbar
+
+figure
+i=ceil(nx(1)/2);
+plot(z, squeeze(sol(i,1,:)), 'o', z, squeeze(solexact(i,1,:)),'r')
+xlabel('z');
diff --git a/matlab/ppoisson_fd.m b/matlab/ppoisson_fd.m
new file mode 100644
index 0000000..a573749
--- /dev/null
+++ b/matlab/ppoisson_fd.m
@@ -0,0 +1,110 @@
+%
+% @file ppoisson_fd.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+if ~exist('file'), file='ppoisson_fd.h5'; end
+
+prb=h5readatt(file,'/','PRB');
+nx=h5readatt(file,'/','NX');
+ny=h5readatt(file,'/','NY');
+lx=h5readatt(file,'/','LX');
+ly=h5readatt(file,'/','LY');
+kx=h5readatt(file,'/','KX');
+ky=h5readatt(file,'/','KY');
+beta=h5readatt(file,'/','BETA');
+omega=h5readatt(file,'/','OMEGA');
+relax=h5readatt(file,'/','RELAX');
+nlevels=h5readatt(file,'/','LEVELS');
+mu=h5readatt(file,'/','MU');
+nu1=h5readatt(file,'/','NU1');
+nu2=h5readatt(file,'/','NU2');
+direct_solve_nits=h5readatt(file,'/','DIRECT_SOLVE_NITS');
+
+title_str=sprintf(['PRB=%s, N=(%d,%d), relax=%s, V(%d,%d), LEVELS=%d, DIRECT SOLVE=%d'], ...
+                  prb, nx, ny, relax, nu1, nu2, nlevels, direct_solve_nits);
+
+x = h5read(file, '/xgrid');
+y = h5read(file, '/ygrid');
+[X,Y]=meshgrid(y,x);
+n1=size(x,1);
+n2=size(y,1);
+n=n1*n2
+
+mat = stencil_mat(file, '/MAT');
+% $$$ figure
+% $$$ spy(mat)
+
+f = h5read(file,'/f'); f1d=reshape(f,n,1);
+v = h5read(file,'/v'); v1d=reshape(v,n,1);
+u = h5read(file,'/u'); u1d=reshape(u,n,1);
+
+% $$$ udirect1d = mat\f1d;
+% $$$ udirect=reshape(udirect1d,n1,n2);
+% $$$ fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d));
+% $$$ fprintf('Error of direct solution    = %.3e\n', norm(udirect1d- ...
+% $$$                                                   v1d));
+figure
+subplot(221)
+pcolor(x,y,v'-u')
+xlabel('X'); ylabel('Y');
+shading interp
+colorbar
+title('Error on the finest grid')
+
+subplot(222)
+[yy,iy] = max(abs(v),[],2);
+[xx,ix] = max(yy);
+iy0=iy(ix);
+str=sprintf('Solution at y = %.4f', y(iy0));
+plot(x, v(:,iy0),x, u(:,iy0),'o')
+xlabel('x'); ylabel(str);
+grid on
+legend('Analytic Solution', 'Computed Solution')
+title(title_str)
+
+subplot(223)
+[xx,ix] = max(abs(v));
+[yy,iy] = max(xx);
+ix0=ix(iy);
+str=sprintf('Solution at x = %.4f', x(ix0));
+plot(y, v(ix0,:),y, u(ix0,:),'o')
+xlabel('y'); ylabel(str);
+grid on
+
+figure
+resid_it=h5read(file, '/resid');
+err_it=h5read(file, '/error');
+nits = size(resid_it,1)-1;
+it=0:nits;
+subplot(211)
+semilogy(it,resid_it,'o-')
+xlabel('Iterations')
+ylabel('Residual norm')
+grid on
+title(title_str)
+subplot(212)
+semilogy(it, err_it, 'o-')
+xlabel('Iterations')
+ylabel('Norm of Discretization Error')
+grid on
diff --git a/matlab/relax.m b/matlab/relax.m
new file mode 100644
index 0000000..37d3fca
--- /dev/null
+++ b/matlab/relax.m
@@ -0,0 +1,55 @@
+%
+% @file relax.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+load relax.mat
+
+figure
+subplot(221)
+semilogy(jac_1(:,1),jac_1(:,2),gs_1(:,1),gs_1(:,2))
+grid on
+legend('Jacobi', 'GS')
+xlabel('Iterations'); ylabel('Error')
+title('NX=32, P=1')
+
+subplot(222)
+semilogy(jac_3(:,1),jac_3(:,2),gs_3(:,1),gs_3(:,2))
+grid on
+legend('Jacobi', 'GS')
+xlabel('Iterations'); ylabel('Error')
+title('NX=32, P=3')
+
+subplot(223)
+semilogy(jac_1(:,1),jac_1(:,4),gs_1(:,1),gs_1(:,4))
+grid on
+legend('Jacobi', 'GS')
+xlabel('Iterations'); ylabel('Discretization error')
+title('NX=32, P=1')
+
+subplot(224)
+semilogy(jac_3(:,1),jac_3(:,4),gs_3(:,1),gs_3(:,4))
+grid on
+legend('Jacobi', 'GS')
+xlabel('Iterations'); ylabel('Discretization error')
+title('NX=32, P=3')
diff --git a/matlab/stencil_mat.m b/matlab/stencil_mat.m
new file mode 100644
index 0000000..4283768
--- /dev/null
+++ b/matlab/stencil_mat.m
@@ -0,0 +1,49 @@
+%
+% @file stencil_mat.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+function [mata, diag] = stencil_mat(file, dset)
+    id = double(h5read(file, strcat(dset,'/dists')));
+    val = h5read(file, strcat(dset,'/val'));
+    n1 = size(val,1);
+    n2 = size(val,2);
+    n = n1*n2;
+    ndiag = size(val,3);
+    dists = id(:,1) + n1*id(:,2);
+    val = reshape(val,n,ndiag);
+    
+    %%    Shift the off-diagonals  %%
+    for k=1:length(dists)
+        d=dists(k);
+        if d < 0
+            val(1:n+d,k) = val(1-d:n,k);
+        elseif d > 0
+            val(n:-1:d+1,k) = val(n-d:-1:1,k);
+        end
+    end
+    mata = spdiags(val, dists, n,n);
+    if nargout == 2
+        idiag = find(dists==0);
+        diag = val(:,idiag);
+    end
diff --git a/matlab/tcdsmat.m b/matlab/tcdsmat.m
new file mode 100644
index 0000000..5331df7
--- /dev/null
+++ b/matlab/tcdsmat.m
@@ -0,0 +1,91 @@
+%
+% @file tcdsmat.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+mat='/MAT1';
+gbmat;
+clear S gb_mat;
+
+file='tcdsmat.h5';
+nx=hdf5read(file,'/NX');
+ny=hdf5read(file,'/NY');
+dists=hdf5read(file,strcat(mat,'/dists'));
+vals=hdf5read(file,strcat(mat,'/vals'));
+rowv=hdf5read(file,strcat(mat,'/rowv'));
+colh=hdf5read(file,strcat(mat,'/colh'));
+n=hdf5read(file,strcat(mat,'/RANK'));
+nd=hdf5read(file,strcat(mat,'/NDIAGS'));
+
+err=zeros(n,nd);
+
+% Diagonal balancing of matrix
+dbal = 1./sqrt(diag(a));
+a = diag(dbal)*a*diag(dbal);
+
+% Check CDS mat except row ny and column ny
+for k=1:nd
+    d=dists(k);
+    i1=max(1,1-d); i2=min(n,n-d);
+    fprintf(1,'%8d %8d %8d\n',d,i1,i2);
+    for i=i1:i2
+        j=i+d;
+        if (i~=ny && j~=ny)
+            err(i,k) = a(i,j)-vals(i,k);
+        end
+    end
+end
+fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err)));
+
+% Check row ny and j .ne. ny
+i=ny;
+bw0=size(rowv,1);
+for k=1:nd
+    d=dists(k);
+    j=i+d;
+    if ((j >= ny+1) && (j <= bw0))
+        err(i,k)=a(i,j)-rowv(j);
+    end
+end
+fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err)));
+
+
+% Check column ny
+j=ny;
+for k=1:nd
+    d=dists(k);
+    i=j-d;
+    if ((i >= ny+1) && (i <= bw0))
+        err(i,k)=a(i,j)-colh(i);
+    end
+end
+fprintf(1,'min/max of err: %8.4e, %8.4e\n',min(min(err)), max(max(err)));
+
+% Check RHS
+rhs=hdf5read(file,'/RHS');
+fprintf('Err in RHS: %8.3e\n', max(max(abs(rhs-rhs0))))
+
+% Check SOL
+sol=hdf5read(file,'/SOL');
+err= sol-sol0;
+fprintf('Err SOL:  %8.3e\n', max(max(abs(err))));
diff --git a/matlab/tcdsmat_plot_sol.m b/matlab/tcdsmat_plot_sol.m
new file mode 100644
index 0000000..0754929
--- /dev/null
+++ b/matlab/tcdsmat_plot_sol.m
@@ -0,0 +1,79 @@
+%
+% @file tcdsmat_plot_sol.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='tcdsmat.h5'
+%
+%     Get data from data sets
+%
+r=hdf5read(file,'/xgrid');
+t=hdf5read(file,'/ygrid');
+sol=hdf5read(file,'/sol')';
+solexact=hdf5read(file,'/solana')';
+err=hdf5read(file,'/errors')';
+%
+%     Attributes
+%
+NR=hdf5read(file,'/NX'); NTH=hdf5read(file,'/NY');
+NIDBAS1=hdf5read(file,'/NIDBAS1');
+NIDBAS2=hdf5read(file,'/NIDBAS2');
+MBESS=hdf5read(file,'/MBESS');
+LABEL=sprintf('nr = %d, ntheta = %d, nidbas = (%d,%d), mbess = %d', NR, NTH, ...
+              NIDBAS1, NIDBAS2, MBESS);
+
+attr=hdf5read(file,'/xgrid/title'); x_ann=attr.Data;
+attr=hdf5read(file,'/ygrid/title'); y_ann=attr.Data;
+attr=hdf5read(file,'/sol/title'); sol_ann=attr.Data;
+attr=hdf5read(file,'/solana/title'); solexact_ann=attr.Data;
+attr=hdf5read(file,'/errors/title');err_ann=attr.Data;
+
+[R,T]=meshgrid(r,t);
+x = R.*cos(T); y= R.*sin(T);
+
+figure
+subplot(221)
+pcolor(double(r),double(t),double(sol));
+shading interp
+xlabel(x_ann); ylabel(y_ann)
+title(LABEL)
+colorbar
+
+subplot(222)
+pcolor(double(x),double(y),double(sol))
+shading interp
+hold off, axis image
+xlabel('X'); ylabel('Y')
+title('X-Y plane')
+colorbar
+
+subplot(223)
+surfc(double(x),double(y),double(sol))
+xlabel('X'); ylabel('Y');
+title(sol_ann)
+
+subplot(224)
+surfc(double(x),double(y),double(err))
+xlabel('X'); ylabel('Y');
+title(err_ann)
+
diff --git a/matlab/test_intergrid.m b/matlab/test_intergrid.m
new file mode 100644
index 0000000..4d89561
--- /dev/null
+++ b/matlab/test_intergrid.m
@@ -0,0 +1,102 @@
+%
+% @file test_intergrid.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+if ~exist('file'), file='test_intergrid0.h5'; end
+%
+nx=h5readatt(file,'/','NX');
+ny=h5readatt(file,'/','NY');
+lx=h5readatt(file,'/','LX');
+ly=h5readatt(file,'/','LY');
+kx=h5readatt(file,'/','KX');
+ky=h5readatt(file,'/','KY');
+nlevels=h5readatt(file,'/','LEVELS');
+title_str=sprintf(['N=(%d,%d), Lx=%d, Ly=%d, LEVELS=%d, KX=%d, KY=%d'], ...
+                  nx,ny,lx,ly,nlevels,kx,ky);
+
+if nlevels ~= 2
+    disp 'levels should be 2!'
+    return
+end
+
+%
+%   Prolongation matrices at the coarsest grid
+%
+levels=nlevels;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/matpx');
+matpx=csr_mat(file,dset);
+dset=strcat(mglevel,'/matpy');
+matpy=csr_mat(file,dset);
+
+for l=1:2
+    mglevel=sprintf('/mglevels/level.%.2d', l);
+    x=h5read(file,strcat(mglevel,'/x'));
+    y=h5read(file,strcat(mglevel,'/y'));
+    f=h5read(file,strcat(mglevel,'/f'));
+    v=h5read(file,strcat(mglevel,'/v'));
+    figure
+    subplot(221)
+    [yy,iy] = max(abs(f),[],2); [xx,ix] = max(yy); iy0=iy(ix);
+    str=sprintf('f at y = %.4f', y(iy0));
+    plot(x, f(:,iy0),'o-')
+    xlabel('x'); ylabel(str);
+    grid on
+    title(title_str)
+    subplot(222)
+    [xx,ix] = max(abs(f)); [yy,iy] = max(xx); ix0=ix(iy);
+    str=sprintf('f at x = %.4f', x(ix0));
+    plot(y, f(ix0,:),'o-')
+    xlabel('y'); ylabel(str);
+    grid on
+    title(title_str)
+    subplot(223)
+    [yy,iy] = max(abs(v),[],2); [xx,ix] = max(yy); iy0=iy(ix);
+    str=sprintf('v at y = %.4f', y(iy0));
+    plot(x, v(:,iy0),'ro-')
+    xlabel('x'); ylabel(str);
+    grid on
+    title(title_str)
+    subplot(224)
+    [xx,ix] = max(abs(v)); [yy,iy] = max(xx); ix0=ix(iy);
+    str=sprintf('v at x = %.4f', x(ix0));
+    plot(y, v(ix0,:),'ro-')
+    xlabel('y'); ylabel(str);
+    grid on
+    title(title_str)
+    if l==1
+        ffine=f; vfine=v;
+    else
+        fcoarse=f; vcoarse=v;
+    end
+end
+
+%% Check
+err_restriction = matpx'*ffine*matpy./4 - fcoarse;
+err_prolong = matpx*vcoarse*matpy' - vfine;
+
+
+
+max(max(abs(err_restriction)))
+max(max(abs(err_prolong)))
diff --git a/matlab/test_jacobi.m b/matlab/test_jacobi.m
new file mode 100644
index 0000000..2a71117
--- /dev/null
+++ b/matlab/test_jacobi.m
@@ -0,0 +1,96 @@
+%
+% @file test_jacobi.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+if ~exist('file'), file='test_jacobi.h5'; end
+
+x = h5read(file, '/xgrid');
+y = h5read(file, '/ygrid');
+[X,Y]=meshgrid(y,x);
+n1=size(x,1);
+n2=size(y,1);
+n=n1*n2
+
+mat = stencil_mat(file, '/MAT');
+% $$$ figure
+% $$$ spy(mat)
+
+f = h5read(file,'/f'); f1d=reshape(f,n,1);
+v = h5read(file,'/v'); v1d=reshape(v,n,1);
+u = h5read(file,'/u'); u1d=reshape(u,n,1);
+resids = h5read(file,'/resids');
+errs = h5read(file,'/errs');
+
+udirect1d = mat\f1d;
+udirect=reshape(udirect1d,n1,n2);
+fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d));
+fprintf('Error of direct solution    = %.3e\n', norm(udirect1d-v1d));
+
+figure
+subplot(211)
+[yy,iy] = max(abs(v),[],2);
+[xx,ix] = max(yy);
+iy0=iy(ix);
+str=sprintf('Solution at y = %.4f', y(iy0));
+plot(x, v(:,iy0),x, u(:,iy0),'o')
+xlabel('x'); ylabel(str);
+grid on
+legend('Analytic Solution', 'Computed Solution')
+subplot(212)
+[xx,ix] = max(abs(v));
+[yy,iy] = max(xx);
+ix0=ix(iy);
+str=sprintf('Solution at x = %.4f', x(ix0));
+plot(y, v(ix0,:),y, u(ix0,:),'o')
+xlabel('y'); ylabel(str);
+grid on
+
+% $$$ figure
+% $$$ subplot(321)
+% $$$ surf(x,y,v'); title('Exact solution')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(322)
+% $$$ surf(x,y,f'); title('RHS')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(323)
+% $$$ surf(x,y,u'); title('Num. solution')
+% $$$ subplot(324)
+% $$$ surf(x,y,udirect'); title('Direct. solution')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(325)
+% $$$ surf(x,y,resids'); title('Residuals')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(326)
+% $$$ surf(x,y,errs'); title('Errors')
+% $$$ xlabel('X'); ylabel('Y')
+
+resid_it=h5read(file, '/resid');
+err_it=h5read(file, '/error');
+nits = size(resid_it,1)-1;
+it=0:nits;
+figure
+semilogy(it,resid_it, it, err_it)
+legend('Residual norm', 'Discretization error')
+xlabel('Iterations')
+grid on
diff --git a/matlab/test_jacobig.m b/matlab/test_jacobig.m
new file mode 100644
index 0000000..a0e31d2
--- /dev/null
+++ b/matlab/test_jacobig.m
@@ -0,0 +1,84 @@
+%
+% @file test_jacobig.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+if ~exist('file'), file='test_jacobig.h5'; end
+
+x = h5read(file, '/xgrid');
+y = h5read(file, '/ygrid');
+[X,Y]=meshgrid(y,x);
+n1=size(x,1);
+n2=size(y,1);
+n=n1*n2
+
+mat = stencil_mat(file, '/MAT');
+% $$$ figure
+% $$$ spy(mat)
+
+f = h5read(file,'/f'); f1d=reshape(f,n,1);
+v = h5read(file,'/v'); v1d=reshape(v,n,1);
+u = h5read(file,'/u'); u1d=reshape(u,n,1);
+
+udirect1d = mat\f1d;
+udirect=reshape(udirect1d,n1,n2);
+fprintf('Residual of direct solution = %.3e\n', norm(mat*udirect1d-f1d));
+fprintf('Error of direct solution    = %.3e\n', norm(udirect1d- ...
+                                                  v1d));
+figure
+subplot(221)
+pcolor(x,y,v'-u')
+xlabel('X'); ylabel('Y');
+shading interp
+colorbar
+title('Error')
+
+subplot(223)
+[yy,iy] = max(abs(v),[],2);
+[xx,ix] = max(yy);
+iy0=iy(ix);
+str=sprintf('Solution at y = %.4f', y(iy0));
+plot(x, v(:,iy0),x, u(:,iy0),'o')
+xlabel('x'); ylabel(str);
+grid on
+legend('Analytic Solution', 'Computed Solution')
+
+subplot(224)
+[xx,ix] = max(abs(v));
+[yy,iy] = max(xx);
+ix0=ix(iy);
+str=sprintf('Solution at x = %.4f', x(ix0));
+plot(y, v(ix0,:),y, u(ix0,:),'o')
+xlabel('y'); ylabel(str);
+grid on
+
+resid_it=h5read(file, '/resid');
+err_it=h5read(file, '/error');
+nits = size(resid_it,1)-1;
+it=0:nits;
+
+subplot(222)
+semilogy(it,resid_it, it, err_it)
+legend('Residual norm', 'Discretization error')
+xlabel('Iterations')
+grid on
diff --git a/matlab/test_mg.m b/matlab/test_mg.m
new file mode 100644
index 0000000..d0ed10b
--- /dev/null
+++ b/matlab/test_mg.m
@@ -0,0 +1,164 @@
+%
+% @file test_mg.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='test_mg.h5';
+%
+nx=h5readatt(file,'/','NX');
+nidbas=h5readatt(file,'/','NIDBAS');
+relax=h5readatt(file,'/','RELAX');
+levels=h5readatt(file,'/','LEVELS');
+nu1=h5readatt(file,'/','NU1');
+nu2=h5readatt(file,'/','NU2');
+alpha=h5readatt(file,'/','ALPHA');
+omega=h5readatt(file,'/','OMEGA');
+
+if alpha == 0
+    kx=h5readatt(file,'/','KMODE');
+    title_str=sprintf('N=%d, NIDBAS=%d, KX=%d, relax=%s, omega=%.3f, levels = %d, nu1 = %d, nu2 = %d', ...
+                  nx,nidbas,kx,relax, omega, levels, nu1, nu2);
+else
+    modem=h5readatt(file,'/','MODEM');
+    modep=h5readatt(file,'/','MODEP');
+    title_str=sprintf('N=%d, NIDBAS=%d, modem=%d, modep=%d, relax=%s, omega=%.3f, levels = %d, nu1 = %d, nu2 = %d', ...
+                  nx,nidbas,modem,modep,relax, omega, levels, nu1, nu2);
+end
+
+
+%
+% Read matrices at coarset grid
+%
+for lev=2:levels
+    %
+    %  FE mat at fine grid
+    mglevel=sprintf('/mglevels/level.%.2d', lev-1);
+    dset=strcat(mglevel,'/mata');
+    mata_f = gb_mat(file, dset);
+
+    %
+    %  FE mat at coarse grid
+    mglevel=sprintf('/mglevels/level.%.2d', lev);
+    dset=strcat(mglevel,'/mata');
+    mata_c = gb_mat(file, dset);
+
+    %
+    %  Prolong mat
+    dset=strcat(mglevel,'/matp');
+    matp=h5read(file,dset);
+    %
+    %   Check
+    fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ...
+                                                      - mata_c))
+end
+%
+%   Iterations
+dset='/Iterations/';
+err=h5read(file, strcat(dset,'errors'));
+disc_err=h5read(file, strcat(dset,'disc_errors'));
+resid=h5read(file, strcat(dset,'residues'));
+its=0:1:size(err,1)-1;
+
+figure
+subplot(212)
+semilogy(its,resid,'o-', its, disc_err,'h-')
+legend('Residue', 'Error')
+grid on
+xlabel('Iterations'); ylabel('Norm od residue and error');
+title(title_str);
+
+%
+%   Plot grid values at the last iteration
+xgrid=h5read(file, '/Iterations/xgrid');
+u_calc=h5read(file, '/Iterations/u_calc');
+u_exact=h5read(file, '/Iterations/u_exact');
+u_direct=h5read(file, '/Iterations/u_direct');
+subplot(211)
+plot(xgrid, u_exact, xgrid,u_calc,'o')
+legend('Analytic', 'Calculated')
+xlabel('X');ylabel('Grid values of solution')
+grid on
+title(title_str);
+
+%
+% 
+% $$$ mglevel=sprintf('/mglevels/level.%.2d', 1);
+% $$$ dset=strcat(mglevel,'/mata');
+% $$$ A = gb_mat(file, dset);
+% $$$ D = diag(diag(A),0);
+% $$$ n=rank(A);
+% $$$ k=1:1:n;
+% $$$ if relax(1:2) == 'ja'
+% $$$ %
+% $$$ %  Compute eigenvalues of Rj = D^(-1)*A
+% $$$ %
+% $$$     [V, l] = eig(A,D);
+% $$$     [lambda, iss] = sort(diag(l));
+% $$$     V = V(1:end,iss);
+% $$$ %
+% $$$ %   Spectral radius of Jacobi iteration matrix
+% $$$ %         R(omega) = max |1-omega*lambda|
+% $$$ %
+% $$$     om=0:0.01:1;
+% $$$     for i=1:size(om,2)
+% $$$         rho(i) = max(abs(1-om(i).*lambda));
+% $$$     end
+% $$$     fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(1-omega*lambda)))
+% $$$ 
+% $$$     figure
+% $$$     subplot(211)
+% $$$     plot(k, 1-omega*lambda, 'o-')
+% $$$     xlabel('mode k'); ylabel('Eigen value of inv(D)*A')
+% $$$     grid on
+% $$$     title(title_str)
+% $$$     subplot(212)
+% $$$     plot(om, rho)
+% $$$     xlabel('\omega'); ylabel('Spectral Radius')
+% $$$     grid on
+% $$$     
+% $$$     for i=1:n
+% $$$         k=mod(i-1,4*5)+1;
+% $$$         if k==1
+% $$$             figure
+% $$$             title(title_str)
+% $$$         end
+% $$$         subplot(4,5,k)
+% $$$         str = sprintf('Mode = %d, ||R|| = %.3f', i, 1-omega*lambda(i));
+% $$$         plot(V(:,i)); grid on
+% $$$         title(str)
+% $$$     end
+% $$$ elseif relax(1:2) == 'gs'
+% $$$ %
+% $$$ %    Spectral radius of GS Iteration Matrix
+% $$$ %         Rg = (D-L)^(-1) * U
+% $$$ %
+% $$$     B = tril(A,0); % D-L
+% $$$     lambda = eig(-triu(A,1),B);
+% $$$     fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda)))
+% $$$     figure
+% $$$     plot(lambda, 'o', 'MarkerSize', 6)  
+% $$$     axis equal
+% $$$     grid on
+% $$$     xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues')
+% $$$     title(title_str)
+% $$$ end
diff --git a/matlab/test_mg2d.m b/matlab/test_mg2d.m
new file mode 100644
index 0000000..cac6b9d
--- /dev/null
+++ b/matlab/test_mg2d.m
@@ -0,0 +1,102 @@
+%
+% @file test_mg2d.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='test_mg2d.h5';
+%
+nx=h5readatt(file,'/','NX');
+ny=h5readatt(file,'/','NY');
+kx=h5readatt(file,'/','KX');
+ky=h5readatt(file,'/','KY');
+nidbas1=h5readatt(file,'/','NIDBAS1');
+nidbas2=h5readatt(file,'/','NIDBAS2');
+relax=h5readatt(file,'/','RELAX');
+nlevels=h5readatt(file,'/','LEVELS');
+nu1=h5readatt(file,'/','NU1');
+nu2=h5readatt(file,'/','NU2');
+mu=h5readatt(file,'/','MU');
+title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ...
+                   'nu2=%d, mu=%d, LEVELS=%d, KX=%d, KY=%d'], ...
+                  nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,kx,ky);
+
+
+%
+%   Prolongation matrices at the coarsest grid
+%
+levels=nlevels;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/matpx');
+matpx=csr_mat(file,dset);
+dset=strcat(mglevel,'/matpy');
+matpy=csr_mat(file,dset);
+
+%
+%   FE matrix at the finest grid
+%
+levels=1;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/mata');
+[mata,diag]=csr_mat(file,dset);
+
+f=h5read(file,strcat(mglevel,'/f'));
+v=h5read(file,strcat(mglevel,'/v'));
+f1d=h5read(file,strcat(mglevel,'/f1d'));
+v1d=h5read(file,strcat(mglevel,'/v1d'));
+% $$$ figure
+% $$$ spy(mata)
+
+%
+%    Solutions at the finest grid
+%
+x=h5read(file,'/solutions/xg');
+y=h5read(file,'/solutions/yg');
+sol_anal=h5read(file,'/solutions/anal');
+sol_calc=h5read(file,'/solutions/calc');
+% $$$ figure
+% $$$ subplot(211)
+% $$$ surf(x,y,sol_anal')
+% $$$ xlabel('X'); ylabel('Y');
+% $$$ title('Analytical solution on the finest grid')
+% $$$ subplot(212)
+% $$$ surf(x,y,sol_calc')
+% $$$ xlabel('X'); ylabel('Y');
+% $$$ title('Calculated solution on the finest grid')
+
+%
+%   Iterations
+%
+dset='/Iterations/';
+disc_err=h5read(file, strcat(dset,'disc_errors'));
+resid=h5read(file, strcat(dset,'residues'));
+its=0:1:size(resid,1)-1;
+figure
+subplot(211)
+semilogy(its,resid,'o-')
+grid on
+xlabel('Iterations'); ylabel('Norm of residue');
+title(title_str);
+subplot(212)
+semilogy(its,disc_err,'h-')
+grid on
+xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/test_mg2d_cyl.m b/matlab/test_mg2d_cyl.m
new file mode 100644
index 0000000..3258da6
--- /dev/null
+++ b/matlab/test_mg2d_cyl.m
@@ -0,0 +1,141 @@
+%
+% @file test_mg2d_cyl.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='test_mg2d_cyl.h5';
+%
+nx=h5readatt(file,'/','NX');
+ny=h5readatt(file,'/','NY');
+nidbas1=h5readatt(file,'/','NIDBAS1');
+nidbas2=h5readatt(file,'/','NIDBAS2');
+modem=h5readatt(file,'/','MODEM');
+modep=h5readatt(file,'/','MODEP');
+relax=h5readatt(file,'/','RELAX');
+nlevels=h5readatt(file,'/','LEVELS');
+nu1=h5readatt(file,'/','NU1');
+nu2=h5readatt(file,'/','NU2');
+mu=h5readatt(file,'/','MU');
+title_str=sprintf(['N=(%d,%d), NIDBAS=(%d,%d), relax=%s, nu1=%d, ' ...
+                   'nu2=%d, mu=%d, LEVELS=%d, MODEM=%d, MODEP=%d'], ...
+                  nx,ny,nidbas1,nidbas2,relax,nu1,nu2,mu,nlevels,modem,modep);
+
+
+%
+%   Prolongation matrices at the coarsest grid
+%
+levels=nlevels;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/matpx');
+matpx=csr_mat(file,dset);
+dset=strcat(mglevel,'/matpy');
+matpy=csr_mat(file,dset);
+
+%
+%   FE matrix at the finest grid
+%
+levels=1;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/mata');
+[mata,diag]=csr_mat(file,dset);
+
+f=h5read(file,strcat(mglevel,'/f'));
+v=h5read(file,strcat(mglevel,'/v'));
+f1d=h5read(file,strcat(mglevel,'/f1d'));
+v1d=h5read(file,strcat(mglevel,'/v1d'));
+% $$$ figure
+% $$$ spy(mata)
+
+%
+%    Solutions at the finest grid
+%
+x=h5read(file,'/solutions/xg');
+y=h5read(file,'/solutions/yg');
+sol_anal=h5read(file,'/solutions/anal');
+sol_calc=h5read(file,'/solutions/calc');
+sol_direct=h5read(file,'/solutions/direct');
+nx=int32(size(x,1));
+ny=int32(size(y,1));
+
+figure
+subplot(211)
+surf(x,y,sol_anal')
+xlabel('X'); ylabel('Y');
+title('Analytical solution on the finest grid')
+subplot(212)
+surf(x,y,sol_calc')
+% $$$ surf(x,y, (abs(sol_calc'-sol_anal')))
+xlabel('X'); ylabel('Y');
+title('Calculated solution on the finest grid')
+
+figure
+subplot(211)
+plot(x, sol_anal(:,ny/2),x, sol_calc(:,ny/2),'o')
+xlabel('r');
+grid on
+legend('Analytic Solution', 'MG Solution')
+title(title_str)
+subplot(212)
+if modem == 0
+    plot(y, sol_anal(1,:),y, sol_calc(1,:),'o')
+else
+    plot(y, sol_anal(nx/2,:),y, sol_calc(nx/2,:),'o')
+end
+xlabel('\theta');
+grid on
+title(title_str)
+
+% $$$ figure
+% $$$ subplot(211)
+% $$$ semilogy(x, abs(sol_anal(:,ny/2)-sol_calc(:,ny/2)),'o')
+% $$$ xlabel('r'); ylabel('Error')
+% $$$ grid on
+% $$$ title(title_str)
+% $$$ subplot(212)
+% $$$ if modem == 0
+% $$$     semilogy(y, abs(sol_anal(1,:)-sol_calc(1,:)),'o')
+% $$$ else
+% $$$     semilogy(y, abs(sol_anal(nx/2,:)-sol_calc(nx/2,:)),'o')
+% $$$ end
+% $$$ xlabel('\theta'); ylabel('Error');
+% $$$ grid on
+% $$$ title(title_str)
+% $$$ 
+
+%
+%   Iterations
+%
+dset='/Iterations/';
+disc_err=h5read(file, strcat(dset,'disc_errors'));
+resid=h5read(file, strcat(dset,'residues'));
+its=0:1:size(resid,1)-1;
+figure
+subplot(211)
+semilogy(its,resid,'o-')
+grid on
+xlabel('Iterations'); ylabel('Norm of residue');
+title(title_str);
+subplot(212)
+semilogy(its,disc_err,'h-')
+grid on
+xlabel('Iterations'); ylabel('Norm of error');
diff --git a/matlab/test_mgp.m b/matlab/test_mgp.m
new file mode 100644
index 0000000..4f5fb1b
--- /dev/null
+++ b/matlab/test_mgp.m
@@ -0,0 +1,98 @@
+%
+% @file test_mgp.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='test_mgp.h5';
+%
+nx=h5readatt(file,'/','NX');
+nidbas=h5readatt(file,'/','NIDBAS');
+relax=h5readatt(file,'/','RELAX');
+levels=h5readatt(file,'/','LEVELS');
+nu1=h5readatt(file,'/','NU1');
+nu2=h5readatt(file,'/','NU2');
+title_str=sprintf('NX = %d, NIDBAS = %d, levels = %d, nu1 = %d, nu2 = %d', nx, nidbas, levels, nu1, nu2);
+%
+% Read matrices at coarset grid
+%
+for lev=2:levels
+%
+%  FE mat at fine grid
+mglevel=sprintf('/mglevels/level.%.2d', lev-1);
+dset=strcat(mglevel,'/mata');
+mata_f=h5read(file,dset);
+n=size(mata_f,1);
+%
+%  FE mat at coarse grid
+mglevel=sprintf('/mglevels/level.%.2d', lev);
+dset=strcat(mglevel,'/mata');
+mata_c=h5read(file,dset);
+n=size(mata_c,1);
+%
+%  Prolong mat
+dset=strcat(mglevel,'/matp');
+matp=h5read(file,dset);
+%
+%   Check
+fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ...
+                                                  - mata_c))
+end
+%
+%   Iterations
+dset='/Iterations/';
+err=h5read(file, strcat(dset,'errors'));
+disc_err=h5read(file, strcat(dset,'disc_errors'));
+resid=h5read(file, strcat(dset,'residues'));
+its=0:1:size(err,1)-1;
+
+figure
+subplot(221)
+semilogy(its,resid,'o-', its, disc_err,'h-')
+legend('Residue', 'Error')
+grid on
+xlabel('Iterations'); ylabel('Norm od residue and error');
+title(title_str);
+
+%
+%   Plot grid values at the last iteration
+xgrid=h5read(file, '/Iterations/xgrid');
+u_calc=h5read(file, '/Iterations/u_calc');
+u_exact=h5read(file, '/Iterations/u_exact');
+u_direct=h5read(file, '/Iterations/u_direct');
+subplot(222)
+plot(xgrid,u_exact, xgrid,u_calc,'o')
+xlabel('X');ylabel('Grid values of solution')
+grid on
+title(title_str);
+
+subplot(223)
+semilogy(xgrid,abs(u_calc-u_direct))
+xlabel('X');ylabel('Diff with direct solution')
+grid on
+title(title_str);
+
+subplot(224)
+semilogy(xgrid,abs(u_calc-u_exact))
+xlabel('X');ylabel('Diff with exact solution')
+grid on
+title(title_str);
diff --git a/matlab/test_relax.m b/matlab/test_relax.m
new file mode 100644
index 0000000..938b605
--- /dev/null
+++ b/matlab/test_relax.m
@@ -0,0 +1,157 @@
+%
+% @file test_relax.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='test_relax.h5';
+%
+nx=h5readatt(file,'/','NX');
+alpha=h5readatt(file,'/','ALPHA');
+nidbas=h5readatt(file,'/','NIDBAS');
+relax=h5readatt(file,'/','RELAX');
+omega=h5readatt(file,'/','OMEGA');
+if alpha == 0
+    kx=h5readatt(file,'/','KX');
+    title_str=sprintf('N=%d, NIDBAS=%d, KX=%d, relax=%s, omega=%.3f', ...
+                  nx,nidbas,kx,relax, omega);
+else
+    modem=h5readatt(file,'/','MODEM');
+    modep=h5readatt(file,'/','MODEP');
+    title_str=sprintf('N=%d, NIDBAS=%d, modem=%d, modep=%d, relax=%s, omega=%.3f', ...
+                  nx,nidbas,modem,modep,relax, omega);
+end
+
+
+%
+%    Solutions at the finest grid
+%
+x=h5read(file,'/solutions/xg');
+sol_direct=h5read(file,'/solutions/direct');
+sol_anal=h5read(file,'/solutions/anal');
+sol_calc=h5read(file,'/solutions/calc');
+figure
+subplot(211)
+plot(x, sol_anal, x, sol_calc, 'o')
+legend('Analytic', 'Calculated')
+xlabel('X')
+grid on
+title(title_str);
+
+%
+%    Relaxations
+%
+errdisc=h5read(file,'/relaxation/errdisc');
+resid=h5read(file,'/relaxation/resid');
+its=0:1:size(errdisc)-1;
+subplot(212)
+semilogy(its,errdisc,its,resid)
+legend('Discretisation Error', 'Residual Norm')
+xlabel('Iterations')
+grid on
+title(title_str)
+
+%
+%  FE Matrix
+%
+dset = '/MATA/';
+A = gb_mat(file, dset);
+D = diag(diag(A),0);
+n=rank(A);
+k=1:1:n;
+if relax(1:2) == 'ja'
+%
+%  Compute eigenvalues of Rj = D^(-1)*A
+%
+    [V, l] = eig(A,D);
+    [lambda, iss] = sort(diag(l));
+    V = V(1:end,iss);
+%
+%   Spectral radius of Jacobi iteration matrix
+%         R(omega) = max |1-omega*lambda|
+%
+    om=0:0.01:1;
+    for i=1:size(om,2)
+        rho(i) = max(abs(1-om(i).*lambda));
+    end
+    fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(1-omega*lambda)))
+
+    figure
+    subplot(211)
+    plot(k, 1-omega*lambda, 'o-')
+    xlabel('mode k'); ylabel('Eigen value of inv(D)*A')
+    grid on
+    title(title_str)
+    subplot(212)
+    plot(om, rho)
+    omega_c = 2.0/max(lambda);
+    str = sprintf('Critical omega = %.3f', omega_c)
+    title(str)
+    xlabel('\omega'); ylabel('Spectral Radius')
+    grid on
+    
+elseif relax(1:2) == 'gs'
+%
+%    Spectral radius of GS Iteration Matrix
+%         Rg = -(D+L)^(-1) * U
+%
+    B = tril(A,0); % D+L
+% $$$     [V, l] = eig(-triu(A,1),B); lambda=diag(l);
+% $$$     [V, l] = eig(B,A); lambda = 1 - 1./diag(l);
+    [V, l] = eig(A,B); lambda = 1 - diag(l);
+    [lambda, iss] = sort(lambda, 'descend');
+    V = V(1:end,iss);
+    
+    fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda)))
+    figure
+    subplot(211)
+    plot(real(lambda), imag(lambda), 'o')  
+    xlabel('Real of eigenvalues'); ylabel(['Imag of ' ...
+                            'eigenvalues'])
+    axis equal
+    title(title_str)
+    grid on
+    subplot(212)
+    plot(k, abs(lambda), 'o-')  
+    xlabel('Mode'); ylabel('eigenvalues')
+    grid on
+end
+%    
+%   Plot eigenvectors
+neig=size(lambda,1);
+for i=1:neig
+    k=mod(i-1,4*5)+1;
+    if k==1
+        figure
+        title(title_str)
+    end
+    subplot(4,5,k)
+    if relax(1:2) == 'ja'
+        str = sprintf('Mode = %d, ||R|| = %.4f', i, 1-omega* ...
+                      lambda(i));
+    else
+        str = sprintf('Mode = %d, ||R|| = %.4f', i, ...
+                      lambda(i));
+    end
+    plot(V(:,i)); grid on
+    title(str)
+end
diff --git a/matlab/test_relax2d.m b/matlab/test_relax2d.m
new file mode 100644
index 0000000..3e5f778
--- /dev/null
+++ b/matlab/test_relax2d.m
@@ -0,0 +1,86 @@
+%
+% @file test_relax2d.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='test_relax2d.h5';
+%
+nx=h5readatt(file,'/','NX');
+ny=h5readatt(file,'/','NY');
+kx=h5readatt(file,'/','KX');
+ky=h5readatt(file,'/','KY');
+nidbas1=h5readatt(file,'/','NIDBAS1');
+nidbas2=h5readatt(file,'/','NIDBAS2');
+levels=h5readatt(file,'/','LEVELS');
+relax=h5readatt(file,'/','RELAX');
+title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), KX=%d, KY=%d, relax=%s', ...
+                  nx,ny,nidbas1,nidbas2,kx,ky,relax);
+
+
+%
+%   Prolongation matrices at the coarsest grid
+%
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/matpx');
+matpx=csr_mat(file,dset);
+dset=strcat(mglevel,'/matpy');
+matpy=csr_mat(file,dset);
+
+%
+%   FE matrix at the finest grid
+%
+levels=1;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/mata');
+[mata,diag]=csr_mat(file,dset);
+
+x=h5read(file,strcat(mglevel,'/x'));
+y=h5read(file,strcat(mglevel,'/y'));
+f=h5read(file,strcat(mglevel,'/f'));
+v=h5read(file,strcat(mglevel,'/v'));
+f1d=h5read(file,strcat(mglevel,'/f1d'));
+v1d=h5read(file,strcat(mglevel,'/v1d'));
+figure
+spy(mata)
+
+%
+%    Solutions at te finest grid
+%
+sol_direct=h5read(file,'/solutions/direct');
+sol_anal=h5read(file,'/solutions/anal');
+figure
+surf(x,y,sol_direct')
+xlabel('X'); ylabel('Y');
+
+%
+%    Relaxations
+%
+errdisc=h5read(file,'/relaxation/errdisc');
+resid=h5read(file,'/relaxation/resid');
+its=0:1:size(errdisc)-1;
+figure
+semilogy(its,errdisc,its,resid)
+legend('Discretisation Error', 'Residual Norm')
+xlabel('Iterations')
+grid on
+title(title_str)
diff --git a/matlab/test_relax2d_cyl.m b/matlab/test_relax2d_cyl.m
new file mode 100644
index 0000000..e01c146
--- /dev/null
+++ b/matlab/test_relax2d_cyl.m
@@ -0,0 +1,183 @@
+%
+% @file test_relax2d_cyl.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='test_relax2d_cyl.h5';
+%
+nx=h5readatt(file,'/','NX');
+ny=h5readatt(file,'/','NY');
+modem=h5readatt(file,'/','MODEM');
+modep=h5readatt(file,'/','MODEP');
+nidbas1=h5readatt(file,'/','NIDBAS1');
+nidbas2=h5readatt(file,'/','NIDBAS2');
+levels=h5readatt(file,'/','LEVELS');
+omega=h5readatt(file,'/','OMEGA');
+relax=h5readatt(file,'/','RELAX');
+if relax(1:2) == 'ja'
+    title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d, relax=%s, omega=%.3f', ...
+                  nx,ny,nidbas1,nidbas2,modem,modep,relax, omega);
+else
+    title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d, relax=%s,', ...
+                  nx,ny,nidbas1,nidbas2,modem,modep,relax);
+end
+
+%
+%   Prolongation matrices at the coarsest grid
+%
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/matpx');
+matpx=csr_mat(file,dset);
+dset=strcat(mglevel,'/matpy');
+matpy=csr_mat(file,dset);
+
+%
+%    Solutions at the finest grid
+%
+mglevel=sprintf('/mglevels/level.%.2d', 1);
+x=h5read(file,strcat(mglevel,'/x'));
+y=h5read(file,strcat(mglevel,'/y'));
+f=h5read(file,strcat(mglevel,'/f'));
+v=h5read(file,strcat(mglevel,'/v'));
+f1d=h5read(file,strcat(mglevel,'/f1d'));
+v1d=h5read(file,strcat(mglevel,'/v1d'));
+sol_direct=h5read(file,'/solutions/direct');
+sol_relax=h5read(file,'/solutions/relax');
+sol_anal=h5read(file,'/solutions/anal');
+
+% $$$ figure
+% $$$ surf(x,y,sol_direct')
+% $$$ xlabel('r'); ylabel('\theta');
+% $$$ title(title_str)
+
+% $$$ figure
+% $$$ subplot(211)
+% $$$ plot(x, sol_anal(:,ny/2),x, sol_direct(:,ny/2),'o')
+% $$$ xlabel('r'); ylabel('Direct solution')
+% $$$ grid on
+% $$$ title(title_str)
+% $$$ subplot(212)
+% $$$ if modem == 0
+% $$$     plot(x, sol_anal(1,:),x, sol_direct(1,:),'o')
+% $$$ else
+% $$$     plot(x, sol_anal(nx/2,:),x, sol_direct(nx/2,:),'o')
+% $$$ end
+% $$$ xlabel('\theta'); ylabel('Direct solution')
+% $$$ grid on
+% $$$ title(title_str)
+
+%
+%    Relaxations
+%
+errdisc=h5read(file,'/relaxation/errdisc');
+resid=h5read(file,'/relaxation/resid');
+its=0:1:size(errdisc)-1;
+figure
+semilogy(its,errdisc,its,resid)
+legend('Discretisation Error', 'Residual Norm')
+xlabel('Iterations')
+grid on
+title(title_str)
+
+% $$$ figure
+% $$$ subplot(211)
+% $$$ plot(x, sol_anal(:,ny/2),x, sol_relax(:,ny/2),'o')
+% $$$ xlabel('r'); ; ylabel('Relaxed solution')
+% $$$ grid on
+% $$$ title(title_str)
+% $$$ subplot(212)
+% $$$ if modem == 0
+% $$$     plot(x, sol_anal(1,:),x, sol_relax(1,:),'o')
+% $$$ else
+% $$$     plot(x, sol_anal(nx/2,:),x, sol_relax(nx/2,:),'o')
+% $$$ end
+% $$$ xlabel('\theta'); ylabel('Relaxed solution')
+% $$$ grid on
+% $$$ title(title_str)
+
+figure
+subplot(211)
+plot(x, sol_anal(:,ny/2),x, sol_direct(:,ny/2),'o')
+xlabel('r'); ; ylabel('Direct solution')
+grid on
+title(title_str)
+subplot(212)
+plot(x, sol_anal(1,:),x, sol_direct(1,:),'o')
+xlabel('\theta'); ylabel('Direct solution at the axis')
+grid on
+title(title_str)
+
+%
+%   FE matrix at the finest grid
+%
+levels=1;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/mata');
+[mata,diag]=csr_mat(file,dset);
+n=size(diag,1);
+% $$$ figure
+% $$$ spy(mata)
+% $$$ title(title_str)
+
+if relax(1:2) == 'ja'
+    %
+    %   Eigenvalues of inv(D)*A
+    %
+    matd=spdiags(diag,0,n,n);
+    lambda = eigs(mata, matd, n);
+    om=0:0.01:0.8;
+    clear rho;
+    for i=1:size(om,2)
+        rho(i) = max(abs(1-om(i).*lambda));
+    end
+    figure
+    subplot(211)
+    plot(lambda,'o')
+    ylabel('Eigenvalue of inv(D)*A')
+    grid on
+    title(title_str)
+    subplot(212)
+    plot(om,rho)
+    xlabel('\omega'); ylabel('\rho')
+    grid on
+    omega_c = 2.0/max(lambda);
+    lambda_min = eigs(mata, matd, 1, 'SM');
+    fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', abs(1-omega*lambda_min))
+    str = sprintf('Critical omega = %.3f', omega_c)
+    title(str)
+else
+%
+%    Spectral radius of GS Iteration Matrix
+%         Rg = -(D+L)^(-1) * U
+%
+    matl= tril(mata,0); % D+L
+    lambda = eigs(-triu(mata,1),matl);
+    fprintf(1, 'Spectral Radius of relaxation matrix = %g\n', max(abs(lambda)))
+    figure
+    plot(lambda, 'o')  
+    axis equal
+    grid on
+    xlabel('Real of eigenvalues'); ylabel('Imag of eigenvalues')
+    title(title_str)
+end
+
diff --git a/matlab/test_stencil.m b/matlab/test_stencil.m
new file mode 100644
index 0000000..db11cc9
--- /dev/null
+++ b/matlab/test_stencil.m
@@ -0,0 +1,62 @@
+%
+% @file test_stencil.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+if ~exist('file'), file='test_stencil.h5'; end
+
+x = h5read(file, '/xgrid');
+y = h5read(file, '/ygrid');
+b1 = h5read(file, '/barr1');
+b2 = h5read(file, '/barr2');
+b3 = h5read(file, '/barr3');
+n1=size(x,1);
+n2=size(y,1);
+n=n1*n2
+
+mat = stencil_mat(file, '/MAT');
+% $$$ figure
+% $$$ spy(mat)
+
+fprintf('||B1|| = %e\n', norm(reshape(b1,n,1)));
+fprintf('||B2|| = %e\n', norm(reshape(b2,n,1)));
+fprintf('||B3|| = %e\n', norm(reshape(b3,n,1)));
+
+% $$$ figure
+% $$$ subplot(321)
+% $$$ surf(x,y,v'); title('Exact solution')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(322)
+% $$$ surf(x,y,f'); title('RHS')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(323)
+% $$$ surf(x,y,u'); title('Num. solution')
+% $$$ subplot(324)
+% $$$ surf(x,y,udirect'); title('Direct. solution')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(325)
+% $$$ surf(x,y,resids'); title('Residuals')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(326)
+% $$$ surf(x,y,errs'); title('Errors')
+% $$$ xlabel('X'); ylabel('Y')
diff --git a/matlab/test_stencilg.m b/matlab/test_stencilg.m
new file mode 100644
index 0000000..fb7dccd
--- /dev/null
+++ b/matlab/test_stencilg.m
@@ -0,0 +1,63 @@
+%
+% @file test_stencilg.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+if ~exist('file'), file='test_stencilg.h5'; end
+
+x = h5read(file, '/xgrid');
+y = h5read(file, '/ygrid');
+a = h5read(file, '/arr');
+b1 = h5read(file, '/barr1');
+b2 = h5read(file, '/barr2');
+b3 = h5read(file, '/barr3');
+n1=size(x,1);
+n2=size(y,1);
+n=n1*n2
+
+mat = stencil_mat(file, '/MAT');
+% $$$ figure
+% $$$ spy(mat)
+
+fprintf('||B1|| = %e\n', norm(reshape(b1,n,1)));
+fprintf('||B2|| = %e\n', norm(reshape(b2,n,1)));
+fprintf('||B3|| = %e\n', norm(reshape(b3,n,1)));
+
+% $$$ figure
+% $$$ subplot(321)
+% $$$ surf(x,y,v'); title('Exact solution')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(322)
+% $$$ surf(x,y,f'); title('RHS')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(323)
+% $$$ surf(x,y,u'); title('Num. solution')
+% $$$ subplot(324)
+% $$$ surf(x,y,udirect'); title('Direct. solution')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(325)
+% $$$ surf(x,y,resids'); title('Residuals')
+% $$$ xlabel('X'); ylabel('Y')
+% $$$ subplot(326)
+% $$$ surf(x,y,errs'); title('Errors')
+% $$$ xlabel('X'); ylabel('Y')
diff --git a/matlab/test_transf2d.m b/matlab/test_transf2d.m
new file mode 100644
index 0000000..506c5ad
--- /dev/null
+++ b/matlab/test_transf2d.m
@@ -0,0 +1,105 @@
+%
+% @file test_transf2d.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='test_transf2d.h5';
+%
+nx(1)=h5readatt(file,'/','NX');
+ny(1)=h5readatt(file,'/','NY');
+kx=h5readatt(file,'/','KX');
+ky=h5readatt(file,'/','KY');
+nidbas1=h5readatt(file,'/','NIDBAS1');
+nidbas2=h5readatt(file,'/','NIDBAS2');
+nlevels=h5readatt(file,'/','LEVELS');
+title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), KX=%d, KY=%d', ...
+                  nx(1),ny(1),nidbas1,nidbas2,kx,ky);
+%
+% Grid sizes on each levels
+%
+for l=2:nlevels
+    nx(l) = nx(l-1)/2;
+    ny(l) = ny(l-1)/2;
+end
+%
+%   Prolongation matrices at the coarsest grid
+%
+levels=nlevels;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/matpx');
+matpx=csr_mat(file,dset);
+dset=strcat(mglevel,'/matpy');
+matpy=csr_mat(file,dset);
+
+%
+%   FE matrix at the finest grid
+%
+levels=1;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/mata');
+[mata,diag]=csr_mat(file,dset);
+
+x=h5read(file,strcat(mglevel,'/x'));
+y=h5read(file,strcat(mglevel,'/y'));
+f=h5read(file,strcat(mglevel,'/f'));
+v=h5read(file,strcat(mglevel,'/v'));
+f1d=h5read(file,strcat(mglevel,'/f1d'));
+v1d=h5read(file,strcat(mglevel,'/v1d'));
+figure
+spy(mata)
+
+%
+%    Solutions at the finest grid
+%
+sol_direct=h5read(file,'/solutions/direct');
+sol_anal=h5read(file,'/solutions/anal');
+vfine=h5read(file,'/solutions/vfine');
+figure
+subplot(211)
+surf(x,y,sol_direct')
+xlabel('X'); ylabel('Y');
+title('Direct Solve on the finest grid')
+subplot(212)
+surf(x,y,vfine'-sol_direct')
+xlabel('X'); ylabel('Y');
+title('Prolongation solution on the finest grid')
+%
+%   Errors
+%
+errdisc = h5read(file,'/errors/errdisc');
+resid = h5read(file,'/errors/resid');
+restrict = h5read(file,'/errors/restrict');
+prolong = h5read(file,'/errors/prolong');
+errdisc_prolong = h5read(file,'/errors/disc_err_prolong');
+figure
+subplot(211)
+loglog(nx, errdisc, 'o-', nx(1:end-1), errdisc_prolong, 'h-')
+grid on;
+legend('Direct Solution', 'Prolonged Solution')
+xlabel('N'); ylabel('Discretization Errors')
+title(title_str);
+subplot(212)
+loglog(nx(2:end), restrict, 'o-', nx(1:end-1), prolong, 'h-')
+grid on;
+legend('Restricted RHS', 'Prolonged Solution')
+xlabel('N'); ylabel('Discretization Errors')
diff --git a/matlab/test_transf2d_cyl.m b/matlab/test_transf2d_cyl.m
new file mode 100644
index 0000000..1466f02
--- /dev/null
+++ b/matlab/test_transf2d_cyl.m
@@ -0,0 +1,117 @@
+%
+% @file test_transf2d_cyl.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='test_transf2d_cyl.h5';
+%
+nx(1)=h5readatt(file,'/','NX');
+ny(1)=h5readatt(file,'/','NY');
+modem=h5readatt(file,'/','MODEM');
+modep=h5readatt(file,'/','MODEP');
+nidbas1=h5readatt(file,'/','NIDBAS1');
+nidbas2=h5readatt(file,'/','NIDBAS2');
+nlevels=h5readatt(file,'/','LEVELS');
+title_str=sprintf('N=(%d,%d), NIDBAS=(%d,%d), MODEM=%d, MODEP=%d', ...
+                  nx(1),ny(1),nidbas1,nidbas2,modem,modep);
+%
+% Grid sizes on each levels
+%
+for l=2:nlevels
+    nx(l) = nx(l-1)/2;
+    ny(l) = ny(l-1)/2;
+end
+%
+%   Prolongation matrices at the coarsest grid
+%
+levels=nlevels;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/matpx');
+matpx=csr_mat(file,dset);
+dset=strcat(mglevel,'/matpy');
+matpy=csr_mat(file,dset);
+
+%
+%   FE matrix at the finest grid
+%
+levels=1;
+mglevel=sprintf('/mglevels/level.%.2d', levels);
+dset=strcat(mglevel,'/mata');
+[mata,diag]=csr_mat(file,dset);
+
+x=h5read(file,strcat(mglevel,'/x'));
+y=h5read(file,strcat(mglevel,'/y'));
+f=h5read(file,strcat(mglevel,'/f'));
+v=h5read(file,strcat(mglevel,'/v'));
+f1d=h5read(file,strcat(mglevel,'/f1d'));
+v1d=h5read(file,strcat(mglevel,'/v1d'));
+% $$$ figure
+% $$$ spy(mata)
+
+%
+%    Solutions at the finest grid
+%
+sol_direct=h5read(file,'/solutions/direct');
+sol_anal=h5read(file,'/solutions/anal');
+vfine=h5read(file,'/solutions/vfine');
+figure
+subplot(211)
+surf(x,y,sol_direct')
+xlabel('X'); ylabel('Y');
+title('Direct Solve on the finest grid')
+subplot(212)
+surf(x,y,vfine')
+xlabel('X'); ylabel('Y');
+title('Prolongation solution on the finest grid')
+
+figure
+subplot(311)
+plot(x, sol_direct(:,ny(1)/2),x, vfine(:,ny(1)/2),'o')
+xlabel('r');
+grid on
+legend('Direct Solution', 'Prolonged Solution')
+title(title_str)
+subplot(313)
+plot(y, sol_direct(1,:),y, vfine(1,:),'o')
+xlabel('\theta'); ylabel('On axis')
+grid on
+title(title_str)
+subplot(312)
+plot(y, sol_direct(nx(1)/2,:),y, vfine(nx(1)/2,:),'o')
+xlabel('\theta'); ylabel('Off axis')
+grid on
+title(title_str)
+%
+%   Errors
+%
+errdisc = h5read(file,'/errors/errdisc');
+resid = h5read(file,'/errors/resid');
+restrict = h5read(file,'/errors/restrict');
+prolong = h5read(file,'/errors/prolong');
+errdisc_prolong = h5read(file,'/errors/disc_err_prolong');
+figure
+loglog(nx, errdisc, 'o-', nx(2:end), errdisc_prolong, 'h-')
+grid on;
+legend('Direct Solution', 'Prolonged Solution')
+xlabel('N'); ylabel('Discretization Errors')
+title(title_str);
diff --git a/matlab/tpardiso.m b/matlab/tpardiso.m
new file mode 100644
index 0000000..5efb0a1
--- /dev/null
+++ b/matlab/tpardiso.m
@@ -0,0 +1,72 @@
+%
+% @file tpardiso.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+mat='/MAT1';
+gbmat;
+clear S gb_mat;
+
+% Diagonal balancing of matrix
+dbal = 1./sqrt(diag(a));
+a = diag(dbal)*a*diag(dbal);
+
+file='tpardiso.h5';
+mat='/MAT';
+n=hdf5read(file,strcat(mat,'/N'));
+nz=hdf5read(file,strcat(mat,'/NZ'));
+
+irow=hdf5read(file,strcat(mat,'/irow'));
+cols=hdf5read(file,strcat(mat,'/cols'));
+val=hdf5read(file,strcat(mat,'/val'));
+perm=hdf5read(file,strcat(mat,'/perm'));
+
+amat=zeros(n,n);
+
+% Check PARDISO mat
+for i=1:n
+    for k=irow(i):irow(i+1)-1
+        j=cols(k);
+        amat(i,j) = val(k);
+        amat(j,i) = val(k);
+    end
+end
+err = a-amat;
+errmx = max(max(abs(err)));
+fprintf(1,'Max. error = %e\n', errmx);
+
+figure
+spy(sparse(amat(perm,perm)),'r.');
+LABEL=sprintf('n = %d, nz =%d', n, nz);
+title(LABEL)
+
+% $$$ pmat=zeros(n);
+% $$$ for i=1:n
+% $$$     pmat(i,perm(i))=1;
+% $$$ end
+% $$$ amod=pmat*amat*pmat';
+% $$$ S=sparse(amod);
+% $$$ figure
+% $$$ spy(S,'r.');
+% $$$ LABEL=sprintf('n = %d', n);
+% $$$ title(LABEL)
diff --git a/matlab/two_grid.m b/matlab/two_grid.m
new file mode 100644
index 0000000..3be72de
--- /dev/null
+++ b/matlab/two_grid.m
@@ -0,0 +1,92 @@
+%
+% @file two_grid.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+file='two_grid.h5';
+%
+nx=h5readatt(file,'/','NX');
+nidbas=h5readatt(file,'/','NIDBAS');
+levels=h5readatt(file,'/','LEVELS');
+title_str=sprintf('NX = %d, NIDBAS = %d, levels = %d', nx, nidbas, levels);
+%
+% Read matrices at coarset grid
+%
+for lev=2:levels
+%
+%  FE mat at fine grid
+mglevel=sprintf('/mglevels/level.%.2d', lev-1);
+dset=strcat(mglevel,'/mata');
+ku=h5readatt(file,dset,'KU'); kl=ku;
+n=h5readatt(file,dset,'RANK');
+gbmat=h5read(file,dset);
+mata_f=zeros(n,n);
+for i=1:n
+    jmin = max(1,i-kl);
+    jmax = min(n,i+ku);
+    for j=jmin:jmax
+        ib = kl+ku+i-j+1;
+        mata_f(i,j)=gbmat(ib,j);
+    end
+end
+dset=strcat(mglevel,'/f');
+f_fine = h5read(file,dset);
+dset=strcat(mglevel,'/v');
+v_fine = h5read(file,dset);
+%
+%  FE mat at coarse grid
+mglevel=sprintf('/mglevels/level.%.2d', lev);
+dset=strcat(mglevel,'/mata');
+ku=h5readatt(file,dset,'KU'); kl=ku;
+n=h5readatt(file,dset,'RANK');
+gbmat=h5read(file,dset);
+mata_c=zeros(n,n);
+for i=1:n
+    jmin = max(1,i-kl);
+    jmax = min(n,i+ku);
+    for j=jmin:jmax
+        ib = kl+ku+i-j+1;
+        mata_c(i,j)=gbmat(ib,j);
+    end
+end
+dset=strcat(mglevel,'/f');
+f_coarse = h5read(file,dset);
+dset=strcat(mglevel,'/v');
+v_coarse = h5read(file,dset);
+%
+%  Prolong mat
+dset=strcat(mglevel,'/matp');
+matp=h5read(file,dset);
+%
+%   Check
+fprintf(1,'Level %d: ||A_coarse - P''*A_fine*P|| = %g\n', lev, norm(matp'*mata_f*matp ...
+                                                  - mata_c))
+end
+%
+
+v_prolong = h5read(file,'/v_prolong');
+
+figure
+plot(v_fine)
+hold on
+plot(v_prolong, 'r')
diff --git a/matlab/zcsr_mat.m b/matlab/zcsr_mat.m
new file mode 100644
index 0000000..0b1b491
--- /dev/null
+++ b/matlab/zcsr_mat.m
@@ -0,0 +1,44 @@
+%
+% @file zcsr_mat.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+function [mata, diag] = zcsr_mat(file, dset)
+    n=hdf5read(file,dset, 'RANK');
+    nnz=hdf5read(file,dset, 'NNZ');
+    cols=hdf5read(file, strcat(dset,'/cols')); 
+    irow=hdf5read(file, strcat(dset,'/irow')); 
+    val=h5Complex_ll(file, strcat(dset,'/val')); 
+    idiag=hdf5read(file, strcat(dset,'/idiag')); 
+    for i=1:n
+        s = irow(i);
+        e = irow(i+1)-1;
+        rows(s:e) = i;    
+    end
+    cols=double(cols);
+    rows=double(rows);
+    mata = sparse(rows,cols,val);
+    if nargout == 2
+        diag = val(idiag);
+    end
+    
diff --git a/matlab/zmumps_mat.m b/matlab/zmumps_mat.m
new file mode 100644
index 0000000..2db15b7
--- /dev/null
+++ b/matlab/zmumps_mat.m
@@ -0,0 +1,44 @@
+%
+% @file zmumps_mat.m
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+function [mata, diag] = zmumps_mat(file, dset)
+    n=hdf5read(file,dset, 'RANK');
+    Nnz=hdf5read(file,dset, 'NNZ');
+    cols=hdf5read(file, strcat(dset,'/cols')); 
+    irow=hdf5read(file, strcat(dset,'/irow')); 
+    irn=hdf5read(file,  strcat(dset,'/mumps_par/IRN'));
+    val=h5Complex_ll(file, strcat(dset,'/val')); 
+    idiag=int32(find((irn-cols)==0));
+    for i=1:n
+        s = irow(i);
+        e = irow(i+1)-1;
+        rows(s:e) = i;
+    end
+    cols=double(cols);
+    rows=double(rows);
+    mata = sparse(rows,cols,val);
+    if nargout == 2
+        diag = val(idiag);
+    end
diff --git a/multigrid/CMakeLists.txt b/multigrid/CMakeLists.txt
new file mode 100644
index 0000000..cec61e0
--- /dev/null
+++ b/multigrid/CMakeLists.txt
@@ -0,0 +1,32 @@
+/**
+ * @file CMakeLists.txt
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+add_subdirectory(src)
+if(BSPLINES_EXAMPLES)
+  add_subdirectory(wk)
+endif()
+
+#add_subdirectory(halpern)
diff --git a/multigrid/docs/Makefile b/multigrid/docs/Makefile
new file mode 100644
index 0000000..e144969
--- /dev/null
+++ b/multigrid/docs/Makefile
@@ -0,0 +1,52 @@
+#
+# @file Makefile
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+all:	multigrid.pdf multigrid_2d.pdf mg_gbs.pdf
+
+grid.eps: grid.tex
+	tex grid.tex
+	dvips -o grid.eps grid.dvi
+
+mg_gbs.dvi: grid.eps
+
+clean:
+	rm -f grid.eps *.dvi *.log *.aux *~ *.toc *.flc *.bbl *.blg *.out *~
+
+distclean: clean
+
+.SUFFIXES:
+.SUFFIXES: .tex .dvi .pdf
+
+%.dvi: %.tex
+	latex $<
+	@while ( grep "Rerun to get cross-references" \
+	         ${<:tex=log} > /dev/null ); do \
+	         latex $<; \
+	done
+	latex $<
+
+%.pdf: %.dvi
+	dvipdf $<
+
diff --git a/multigrid/docs/cubic_mg2d.eps b/multigrid/docs/cubic_mg2d.eps
new file mode 100644
index 0000000..34ee918
--- /dev/null
+++ b/multigrid/docs/cubic_mg2d.eps
@@ -0,0 +1,1625 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 3.4.6-2.10-desktop #1 SMP PREEMPT Thu Jul 26 09:36:26 UTC 2012 (641c197) x86_64.
+%%Title: /home/ttran/HLST/2012/report-4/cubic_mg2d.eps
+%%CreationDate: 12/21/2012  09:47:59
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:    16    73   576   769
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:    16    73   576   769
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode 0192 9228 csm
+
+    0     0  6731  8346 rc
+87 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 6732 8347 rf
+6 w
+0 2847 5217 0 0 -2847 875 3473 4 MP
+PP
+-5217 0 0 2847 5217 0 0 -2847 875 3473 5 MP stroke
+4 w
+DO
+0 sg
+ 875 3473 mt  875  626 L
+ 875  626 mt  875  626 L
+1396 3473 mt 1396  626 L
+1396  626 mt 1396  626 L
+1918 3473 mt 1918  626 L
+1918  626 mt 1918  626 L
+2440 3473 mt 2440  626 L
+2440  626 mt 2440  626 L
+2961 3473 mt 2961  626 L
+2961  626 mt 2961  626 L
+3483 3473 mt 3483  626 L
+3483  626 mt 3483  626 L
+4005 3473 mt 4005  626 L
+4005  626 mt 4005  626 L
+4526 3473 mt 4526  626 L
+4526  626 mt 4526  626 L
+5048 3473 mt 5048  626 L
+5048  626 mt 5048  626 L
+5570 3473 mt 5570  626 L
+5570  626 mt 5570  626 L
+6092 3473 mt 6092  626 L
+6092  626 mt 6092  626 L
+ 875 3473 mt 6092 3473 L
+6092 3473 mt 6092 3473 L
+ 875 2998 mt 6092 2998 L
+6092 2998 mt 6092 2998 L
+ 875 2524 mt 6092 2524 L
+6092 2524 mt 6092 2524 L
+ 875 2049 mt 6092 2049 L
+6092 2049 mt 6092 2049 L
+ 875 1575 mt 6092 1575 L
+6092 1575 mt 6092 1575 L
+ 875 1100 mt 6092 1100 L
+6092 1100 mt 6092 1100 L
+ 875  626 mt 6092  626 L
+6092  626 mt 6092  626 L
+SO
+6 w
+ 875 3473 mt 6092 3473 L
+ 875  626 mt 6092  626 L
+ 875 3473 mt  875  626 L
+6092 3473 mt 6092  626 L
+ 875 3473 mt 6092 3473 L
+ 875 3473 mt  875  626 L
+ 875 3473 mt  875 3420 L
+ 875  626 mt  875  678 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 829 3663 mt 
+(0) s
+1396 3473 mt 1396 3420 L
+1396  626 mt 1396  678 L
+1350 3663 mt 
+(1) s
+1918 3473 mt 1918 3420 L
+1918  626 mt 1918  678 L
+1872 3663 mt 
+(2) s
+2440 3473 mt 2440 3420 L
+2440  626 mt 2440  678 L
+2394 3663 mt 
+(3) s
+2961 3473 mt 2961 3420 L
+2961  626 mt 2961  678 L
+2915 3663 mt 
+(4) s
+3483 3473 mt 3483 3420 L
+3483  626 mt 3483  678 L
+3437 3663 mt 
+(5) s
+4005 3473 mt 4005 3420 L
+4005  626 mt 4005  678 L
+3959 3663 mt 
+(6) s
+4526 3473 mt 4526 3420 L
+4526  626 mt 4526  678 L
+4480 3663 mt 
+(7) s
+5048 3473 mt 5048 3420 L
+5048  626 mt 5048  678 L
+5002 3663 mt 
+(8) s
+5570 3473 mt 5570 3420 L
+5570  626 mt 5570  678 L
+5524 3663 mt 
+(9) s
+6092 3473 mt 6092 3420 L
+6092  626 mt 6092  678 L
+5999 3663 mt 
+(10) s
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  927 3473 L
+6092 3473 mt 6039 3473 L
+ 465 3535 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651 3431 mt 
+(-12) s
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 2998 mt  927 2998 L
+6092 2998 mt 6039 2998 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 465 3060 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651 2956 mt 
+(-10) s
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 2524 mt  927 2524 L
+6092 2524 mt 6039 2524 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 465 2586 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651 2482 mt 
+(-8) s
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 2049 mt  927 2049 L
+6092 2049 mt 6039 2049 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 465 2111 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651 2007 mt 
+(-6) s
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 1575 mt  927 1575 L
+6092 1575 mt 6039 1575 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 465 1637 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651 1533 mt 
+(-4) s
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 1100 mt  927 1100 L
+6092 1100 mt 6039 1100 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 465 1162 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651 1058 mt 
+(-2) s
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875  626 mt  927  626 L
+6092  626 mt 6039  626 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 465  688 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651  584 mt 
+(0) s
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+DO
+ 875 3473 mt  875 3473 L
+ 875 3473 mt  875 3473 L
+SO
+ 875 3473 mt 6092 3473 L
+ 875  626 mt 6092  626 L
+ 875 3473 mt  875  626 L
+6092 3473 mt 6092  626 L
+gs 875 626 5218 2848 rc
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+522 65 522 67 522 67 521 69 522 71 522 74 521 83 522 285 
+522 474 521 804 875 1197 11 MP stroke
+gr
+
+c8
+gs 802 1124 5364 2206 rc
+  36   36  875 1197 FO
+  36   36 1396 2001 FO
+  36   36 1918 2475 FO
+  36   36 2440 2760 FO
+  36   36 2961 2843 FO
+  36   36 3483 2917 FO
+  36   36 4005 2988 FO
+  36   36 4526 3057 FO
+  36   36 5048 3124 FO
+  36   36 5570 3191 FO
+  36   36 6092 3256 FO
+gr
+
+gs 875 626 5218 2848 rc
+gr
+
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+3137 3841 mt 
+(Iterations) s
+ 373 2668 mt  -90 rotate
+(Norm of residual) s
+90 rotate
+1825  494 mt 
+(Cubic Splines, V\(2,1\), relax=gs, KX=3, KY=3) s
+gs 875 626 5218 2848 rc
+/c9 { 0.000000 0.498039 0.000000 sr} bdef
+c9
+522 66 522 66 522 68 521 69 522 72 522 75 521 81 522 195 
+522 442 521 731 875 1126 11 MP stroke
+gr
+
+c9
+gs 802 1053 5364 2012 rc
+  36   36  875 1126 FO
+  36   36 1396 1857 FO
+  36   36 1918 2299 FO
+  36   36 2440 2494 FO
+  36   36 2961 2575 FO
+  36   36 3483 2650 FO
+  36   36 4005 2722 FO
+  36   36 4526 2791 FO
+  36   36 5048 2859 FO
+  36   36 5570 2925 FO
+  36   36 6092 2991 FO
+gr
+
+gs 875 626 5218 2848 rc
+/c10 { 0.847059 0.160784 0.000000 sr} bdef
+c10
+522 65 522 67 522 68 521 70 522 73 522 77 521 84 522 119 
+522 425 521 660 875 1057 11 MP stroke
+gr
+
+c10
+gs 802 984 5364 1855 rc
+  36   36  875 1057 FO
+  36   36 1396 1717 FO
+  36   36 1918 2142 FO
+  36   36 2440 2261 FO
+  36   36 2961 2345 FO
+  36   36 3483 2422 FO
+  36   36 4005 2495 FO
+  36   36 4526 2565 FO
+  36   36 5048 2633 FO
+  36   36 5570 2700 FO
+  36   36 6092 2765 FO
+gr
+
+gs 875 626 5218 2848 rc
+/c11 { 0.000000 0.749020 0.749020 sr} bdef
+c11
+522 65 522 67 522 69 521 70 522 74 522 78 521 86 522 115 
+522 350 521 627 875 995 11 MP stroke
+gr
+
+c11
+gs 802 922 5364 1748 rc
+  36   36  875  995 FO
+  36   36 1396 1622 FO
+  36   36 1918 1972 FO
+  36   36 2440 2087 FO
+  36   36 2961 2173 FO
+  36   36 3483 2251 FO
+  36   36 4005 2325 FO
+  36   36 4526 2395 FO
+  36   36 5048 2464 FO
+  36   36 5570 2531 FO
+  36   36 6092 2596 FO
+gr
+
+gs 875 626 5218 2848 rc
+gr
+
+1 sg
+0 2847 5217 0 0 -2847 875 7428 4 MP
+PP
+-5217 0 0 2847 5217 0 0 -2847 875 7428 5 MP stroke
+4 w
+DO
+0 sg
+ 875 7428 mt  875 4581 L
+ 875 4581 mt  875 4581 L
+1396 7428 mt 1396 4581 L
+1396 4581 mt 1396 4581 L
+1918 7428 mt 1918 4581 L
+1918 4581 mt 1918 4581 L
+2440 7428 mt 2440 4581 L
+2440 4581 mt 2440 4581 L
+2961 7428 mt 2961 4581 L
+2961 4581 mt 2961 4581 L
+3483 7428 mt 3483 4581 L
+3483 4581 mt 3483 4581 L
+4005 7428 mt 4005 4581 L
+4005 4581 mt 4005 4581 L
+4526 7428 mt 4526 4581 L
+4526 4581 mt 4526 4581 L
+5048 7428 mt 5048 4581 L
+5048 4581 mt 5048 4581 L
+5570 7428 mt 5570 4581 L
+5570 4581 mt 5570 4581 L
+6092 7428 mt 6092 4581 L
+6092 4581 mt 6092 4581 L
+ 875 7428 mt 6092 7428 L
+6092 7428 mt 6092 7428 L
+ 875 6716 mt 6092 6716 L
+6092 6716 mt 6092 6716 L
+ 875 6004 mt 6092 6004 L
+6092 6004 mt 6092 6004 L
+ 875 5292 mt 6092 5292 L
+6092 5292 mt 6092 5292 L
+ 875 4581 mt 6092 4581 L
+6092 4581 mt 6092 4581 L
+SO
+6 w
+ 875 7428 mt 6092 7428 L
+ 875 4581 mt 6092 4581 L
+ 875 7428 mt  875 4581 L
+6092 7428 mt 6092 4581 L
+ 875 7428 mt 6092 7428 L
+ 875 7428 mt  875 4581 L
+ 875 7428 mt  875 7375 L
+ 875 4581 mt  875 4633 L
+ 829 7618 mt 
+(0) s
+1396 7428 mt 1396 7375 L
+1396 4581 mt 1396 4633 L
+1350 7618 mt 
+(1) s
+1918 7428 mt 1918 7375 L
+1918 4581 mt 1918 4633 L
+1872 7618 mt 
+(2) s
+2440 7428 mt 2440 7375 L
+2440 4581 mt 2440 4633 L
+2394 7618 mt 
+(3) s
+2961 7428 mt 2961 7375 L
+2961 4581 mt 2961 4633 L
+2915 7618 mt 
+(4) s
+3483 7428 mt 3483 7375 L
+3483 4581 mt 3483 4633 L
+3437 7618 mt 
+(5) s
+4005 7428 mt 4005 7375 L
+4005 4581 mt 4005 4633 L
+3959 7618 mt 
+(6) s
+4526 7428 mt 4526 7375 L
+4526 4581 mt 4526 4633 L
+4480 7618 mt 
+(7) s
+5048 7428 mt 5048 7375 L
+5048 4581 mt 5048 4633 L
+5002 7618 mt 
+(8) s
+5570 7428 mt 5570 7375 L
+5570 4581 mt 5570 4633 L
+5524 7618 mt 
+(9) s
+6092 7428 mt 6092 7375 L
+6092 4581 mt 6092 4633 L
+5999 7618 mt 
+(10) s
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  927 7428 L
+6092 7428 mt 6039 7428 L
+ 465 7490 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651 7386 mt 
+(-10) s
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 6716 mt  927 6716 L
+6092 6716 mt 6039 6716 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 465 6778 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651 6674 mt 
+(-8) s
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 6004 mt  927 6004 L
+6092 6004 mt 6039 6004 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 465 6066 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651 5962 mt 
+(-6) s
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 5292 mt  927 5292 L
+6092 5292 mt 6039 5292 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 465 5354 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651 5250 mt 
+(-4) s
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 4581 mt  927 4581 L
+6092 4581 mt 6039 4581 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 465 4643 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 651 4539 mt 
+(-2) s
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+DO
+ 875 7428 mt  875 7428 L
+ 875 7428 mt  875 7428 L
+SO
+ 875 7428 mt 6092 7428 L
+ 875 4581 mt 6092 4581 L
+ 875 7428 mt  875 4581 L
+6092 7428 mt 6092 4581 L
+gs 875 4581 5218 2848 rc
+c8
+522 0 522 0 522 0 521 0 522 0 522 0 521 0 522 22 
+522 937 521 1686 875 4776 11 MP stroke
+gr
+
+c8
+gs 802 4703 5364 2792 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 844 4758 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1365 6444 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1887 7381 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2409 7403 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2930 7403 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3452 7403 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3974 7403 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4495 7403 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5017 7403 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5539 7403 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 6061 7403 13 MP
+DP
+gr
+
+gs 875 4581 5218 2848 rc
+gr
+
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+3137 7796 mt 
+(Iterations) s
+ 373 6506 mt  -90 rotate
+(Norm of error) s
+90 rotate
+gs 875 4581 5218 2848 rc
+c9
+522 0 522 0 522 0 521 0 522 0 522 0 521 1 522 10 
+522 594 521 1611 875 4776 11 MP stroke
+gr
+
+c9
+gs 802 4703 5364 2363 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 844 4758 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1365 6369 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1887 6963 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2409 6973 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2930 6974 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3452 6974 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3974 6974 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4495 6974 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5017 6974 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5539 6974 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 6061 6974 13 MP
+DP
+gr
+
+gs 875 4581 5218 2848 rc
+c10
+522 0 522 0 522 0 521 0 522 0 522 0 521 0 522 4 
+522 390 521 1388 875 4776 11 MP stroke
+gr
+
+c10
+gs 802 4703 5364 1929 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 844 4758 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1365 6146 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1887 6536 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2409 6540 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2930 6540 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3452 6540 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3974 6540 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4495 6540 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5017 6540 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5539 6540 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 6061 6540 13 MP
+DP
+gr
+
+gs 875 4581 5218 2848 rc
+c11
+522 0 522 0 522 0 521 0 522 1 522 0 521 0 522 1 
+522 200 521 1135 875 4776 11 MP stroke
+gr
+
+c11
+gs 802 4703 5364 1484 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 844 4758 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1365 5893 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1887 6093 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2409 6094 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2930 6094 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3452 6094 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3974 6095 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4495 6095 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5017 6095 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5539 6095 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 6061 6095 13 MP
+DP
+gr
+
+gs 875 4581 5218 2848 rc
+gr
+
+0 sg
+-225 213 3020 5810 2 MP stroke
+57 -53 -119 49 2857 6027 3 MP
+PP
+62 4 57 -53 -119 49 2857 6027 4 MP stroke
+0 62 57 -115 2738 6076 3 MP
+PP
+-57 53 0 62 57 -115 2738 6076 4 MP stroke
+3022 5773 mt 
+(16 X 16) s
+-151 154 3312 6300 2 MP stroke
+55 -56 -116 55 3222 6455 3 MP
+PP
+61 1 55 -56 -116 55 3222 6455 4 MP stroke
+3 62 52 -118 3106 6510 3 MP
+PP
+-55 56 3 62 52 -118 3106 6510 4 MP stroke
+3314 6263 mt 
+(32 X 32) s
+-161 158 3777 6730 2 MP stroke
+55 -55 -117 52 3678 6891 3 MP
+PP
+62 3 55 -55 -117 52 3678 6891 4 MP stroke
+1 61 54 -116 3561 6943 3 MP
+PP
+-55 55 1 61 54 -116 3561 6943 4 MP stroke
+3779 6693 mt 
+(64 X 64) s
+-171 163 4244 7151 2 MP stroke
+57 -54 -118 50 4134 7318 3 MP
+PP
+61 4 57 -54 -118 50 4134 7318 4 MP stroke
+1 62 56 -116 4016 7368 3 MP
+PP
+-57 54 1 62 56 -116 4016 7368 4 MP stroke
+4246 7114 mt 
+(128 X 128) s
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/multigrid/docs/cubic_mg2d.fig b/multigrid/docs/cubic_mg2d.fig
new file mode 100644
index 0000000..df7519e
Binary files /dev/null and b/multigrid/docs/cubic_mg2d.fig differ
diff --git a/multigrid/docs/cubic_mg2d_levels.eps b/multigrid/docs/cubic_mg2d_levels.eps
new file mode 100644
index 0000000..65352b0
--- /dev/null
+++ b/multigrid/docs/cubic_mg2d_levels.eps
@@ -0,0 +1,1559 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 3.4.6-2.10-desktop #1 SMP PREEMPT Thu Jul 26 09:36:26 UTC 2012 (641c197) x86_64.
+%%Title: /home/ttran/HLST/2012/report-4/cubic_mg2d_levels.eps
+%%CreationDate: 12/21/2012  09:48:21
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:    17    88   576   753
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:    17    88   576   753
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode 0204 9036 csm
+
+    0     0  6711  7980 rc
+87 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 6712 7981 rf
+6 w
+0 2722 5201 0 0 -2722 872 3321 4 MP
+PP
+-5201 0 0 2722 5201 0 0 -2722 872 3321 5 MP stroke
+4 w
+DO
+0 sg
+ 872 3321 mt  872  599 L
+ 872  599 mt  872  599 L
+1392 3321 mt 1392  599 L
+1392  599 mt 1392  599 L
+1912 3321 mt 1912  599 L
+1912  599 mt 1912  599 L
+2432 3321 mt 2432  599 L
+2432  599 mt 2432  599 L
+2952 3321 mt 2952  599 L
+2952  599 mt 2952  599 L
+3472 3321 mt 3472  599 L
+3472  599 mt 3472  599 L
+3992 3321 mt 3992  599 L
+3992  599 mt 3992  599 L
+4512 3321 mt 4512  599 L
+4512  599 mt 4512  599 L
+5032 3321 mt 5032  599 L
+5032  599 mt 5032  599 L
+5552 3321 mt 5552  599 L
+5552  599 mt 5552  599 L
+6073 3321 mt 6073  599 L
+6073  599 mt 6073  599 L
+ 872 3321 mt 6073 3321 L
+6073 3321 mt 6073 3321 L
+ 872 2776 mt 6073 2776 L
+6073 2776 mt 6073 2776 L
+ 872 2232 mt 6073 2232 L
+6073 2232 mt 6073 2232 L
+ 872 1687 mt 6073 1687 L
+6073 1687 mt 6073 1687 L
+ 872 1143 mt 6073 1143 L
+6073 1143 mt 6073 1143 L
+ 872  599 mt 6073  599 L
+6073  599 mt 6073  599 L
+SO
+6 w
+ 872 3321 mt 6073 3321 L
+ 872  599 mt 6073  599 L
+ 872 3321 mt  872  599 L
+6073 3321 mt 6073  599 L
+ 872 3321 mt 6073 3321 L
+ 872 3321 mt  872  599 L
+ 872 3321 mt  872 3268 L
+ 872  599 mt  872  651 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 826 3511 mt 
+(0) s
+1392 3321 mt 1392 3268 L
+1392  599 mt 1392  651 L
+1346 3511 mt 
+(1) s
+1912 3321 mt 1912 3268 L
+1912  599 mt 1912  651 L
+1866 3511 mt 
+(2) s
+2432 3321 mt 2432 3268 L
+2432  599 mt 2432  651 L
+2386 3511 mt 
+(3) s
+2952 3321 mt 2952 3268 L
+2952  599 mt 2952  651 L
+2906 3511 mt 
+(4) s
+3472 3321 mt 3472 3268 L
+3472  599 mt 3472  651 L
+3426 3511 mt 
+(5) s
+3992 3321 mt 3992 3268 L
+3992  599 mt 3992  651 L
+3946 3511 mt 
+(6) s
+4512 3321 mt 4512 3268 L
+4512  599 mt 4512  651 L
+4466 3511 mt 
+(7) s
+5032 3321 mt 5032 3268 L
+5032  599 mt 5032  651 L
+4986 3511 mt 
+(8) s
+5552 3321 mt 5552 3268 L
+5552  599 mt 5552  651 L
+5506 3511 mt 
+(9) s
+6073 3321 mt 6073 3268 L
+6073  599 mt 6073  651 L
+5980 3511 mt 
+(10) s
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  924 3321 L
+6073 3321 mt 6020 3321 L
+ 462 3383 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 648 3279 mt 
+(-12) s
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 2776 mt  924 2776 L
+6073 2776 mt 6020 2776 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 462 2838 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 648 2734 mt 
+(-10) s
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 2232 mt  924 2232 L
+6073 2232 mt 6020 2232 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 462 2294 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 648 2190 mt 
+(-8) s
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 1687 mt  924 1687 L
+6073 1687 mt 6020 1687 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 462 1749 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 648 1645 mt 
+(-6) s
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 1143 mt  924 1143 L
+6073 1143 mt 6020 1143 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 462 1205 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 648 1101 mt 
+(-4) s
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872  599 mt  924  599 L
+6073  599 mt 6020  599 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 462  661 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 648  557 mt 
+(-2) s
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+DO
+ 872 3321 mt  872 3321 L
+ 872 3321 mt  872 3321 L
+SO
+ 872 3321 mt 6073 3321 L
+ 872  599 mt 6073  599 L
+ 872 3321 mt  872  599 L
+6073 3321 mt 6073  599 L
+gs 872 599 5202 2723 rc
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+1 0 521 75 520 76 520 77 520 78 520 81 520 85 520 92 
+520 254 520 558 520 988 872 710 12 MP stroke
+gr
+
+c8
+gs 799 637 5349 2511 rc
+  36   36  872  710 FO
+  36   36 1392 1698 FO
+  36   36 1912 2256 FO
+  36   36 2432 2510 FO
+  36   36 2952 2602 FO
+  36   36 3472 2687 FO
+  36   36 3992 2768 FO
+  36   36 4512 2846 FO
+  36   36 5032 2923 FO
+  36   36 5552 2999 FO
+  36   36 6073 3074 FO
+gr
+
+gs 872 599 5202 2723 rc
+gr
+
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+3126 3689 mt 
+(Iterations) s
+ 370 2579 mt  -90 rotate
+(Norm of residual) s
+90 rotate
+gs 872 599 5202 2723 rc
+/c9 { 0.847059 0.160784 0.000000 sr} bdef
+c9
+1 0 521 75 520 76 520 77 520 79 520 82 520 85 520 95 
+520 366 520 511 520 917 872 710 12 MP stroke
+gr
+
+c9
+gs 799 637 5349 2510 rc
+  36   36  872  710 FO
+  36   36 1392 1627 FO
+  36   36 1912 2138 FO
+  36   36 2432 2504 FO
+  36   36 2952 2599 FO
+  36   36 3472 2684 FO
+  36   36 3992 2766 FO
+  36   36 4512 2845 FO
+  36   36 5032 2922 FO
+  36   36 5552 2998 FO
+  36   36 6073 3073 FO
+gr
+
+gs 872 599 5202 2723 rc
+/c10 { 0.000000 0.498039 0.000000 sr} bdef
+c10
+1 1 521 75 520 75 520 78 520 79 520 81 520 86 520 95 
+520 331 520 538 520 923 872 710 12 MP stroke
+gr
+
+c10
+gs 799 637 5349 2509 rc
+  36   36  872  710 FO
+  36   36 1392 1633 FO
+  36   36 1912 2171 FO
+  36   36 2432 2502 FO
+  36   36 2952 2597 FO
+  36   36 3472 2683 FO
+  36   36 3992 2764 FO
+  36   36 4512 2843 FO
+  36   36 5032 2921 FO
+  36   36 5552 2996 FO
+  36   36 6073 3071 FO
+gr
+
+gs 872 599 5202 2723 rc
+/c11 { 0.000000 0.749020 0.749020 sr} bdef
+c11
+1 0 521 75 520 76 520 77 520 79 520 81 520 86 520 95 
+520 327 520 543 520 923 872 710 12 MP stroke
+gr
+
+c11
+gs 799 637 5349 2509 rc
+  36   36  872  710 FO
+  36   36 1392 1633 FO
+  36   36 1912 2176 FO
+  36   36 2432 2503 FO
+  36   36 2952 2598 FO
+  36   36 3472 2684 FO
+  36   36 3992 2765 FO
+  36   36 4512 2844 FO
+  36   36 5032 2921 FO
+  36   36 5552 2997 FO
+  36   36 6073 3072 FO
+gr
+
+gs 872 599 5202 2723 rc
+gr
+
+0 sg
+1041  467 mt 
+(Cubic Splines, 128 x 128 Problem, V\(2,1\), relax=gs,  KX=3, \
+KY=3) s
+1 sg
+0 2722 5201 0 0 -2722 872 7102 4 MP
+PP
+-5201 0 0 2722 5201 0 0 -2722 872 7102 5 MP stroke
+4 w
+DO
+0 sg
+ 872 7102 mt  872 4380 L
+ 872 4380 mt  872 4380 L
+1392 7102 mt 1392 4380 L
+1392 4380 mt 1392 4380 L
+1912 7102 mt 1912 4380 L
+1912 4380 mt 1912 4380 L
+2432 7102 mt 2432 4380 L
+2432 4380 mt 2432 4380 L
+2952 7102 mt 2952 4380 L
+2952 4380 mt 2952 4380 L
+3472 7102 mt 3472 4380 L
+3472 4380 mt 3472 4380 L
+3992 7102 mt 3992 4380 L
+3992 4380 mt 3992 4380 L
+4512 7102 mt 4512 4380 L
+4512 4380 mt 4512 4380 L
+5032 7102 mt 5032 4380 L
+5032 4380 mt 5032 4380 L
+5552 7102 mt 5552 4380 L
+5552 4380 mt 5552 4380 L
+6073 7102 mt 6073 4380 L
+6073 4380 mt 6073 4380 L
+ 872 7102 mt 6073 7102 L
+6073 7102 mt 6073 7102 L
+ 872 6421 mt 6073 6421 L
+6073 6421 mt 6073 6421 L
+ 872 5741 mt 6073 5741 L
+6073 5741 mt 6073 5741 L
+ 872 5060 mt 6073 5060 L
+6073 5060 mt 6073 5060 L
+ 872 4380 mt 6073 4380 L
+6073 4380 mt 6073 4380 L
+SO
+6 w
+ 872 7102 mt 6073 7102 L
+ 872 4380 mt 6073 4380 L
+ 872 7102 mt  872 4380 L
+6073 7102 mt 6073 4380 L
+ 872 7102 mt 6073 7102 L
+ 872 7102 mt  872 4380 L
+ 872 7102 mt  872 7049 L
+ 872 4380 mt  872 4432 L
+ 826 7292 mt 
+(0) s
+1392 7102 mt 1392 7049 L
+1392 4380 mt 1392 4432 L
+1346 7292 mt 
+(1) s
+1912 7102 mt 1912 7049 L
+1912 4380 mt 1912 4432 L
+1866 7292 mt 
+(2) s
+2432 7102 mt 2432 7049 L
+2432 4380 mt 2432 4432 L
+2386 7292 mt 
+(3) s
+2952 7102 mt 2952 7049 L
+2952 4380 mt 2952 4432 L
+2906 7292 mt 
+(4) s
+3472 7102 mt 3472 7049 L
+3472 4380 mt 3472 4432 L
+3426 7292 mt 
+(5) s
+3992 7102 mt 3992 7049 L
+3992 4380 mt 3992 4432 L
+3946 7292 mt 
+(6) s
+4512 7102 mt 4512 7049 L
+4512 4380 mt 4512 4432 L
+4466 7292 mt 
+(7) s
+5032 7102 mt 5032 7049 L
+5032 4380 mt 5032 4432 L
+4986 7292 mt 
+(8) s
+5552 7102 mt 5552 7049 L
+5552 4380 mt 5552 4432 L
+5506 7292 mt 
+(9) s
+6073 7102 mt 6073 7049 L
+6073 4380 mt 6073 4432 L
+5980 7292 mt 
+(10) s
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  924 7102 L
+6073 7102 mt 6020 7102 L
+ 462 7164 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 648 7060 mt 
+(-10) s
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 6421 mt  924 6421 L
+6073 6421 mt 6020 6421 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 462 6483 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 648 6379 mt 
+(-8) s
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 5741 mt  924 5741 L
+6073 5741 mt 6020 5741 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 462 5803 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 648 5699 mt 
+(-6) s
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 5060 mt  924 5060 L
+6073 5060 mt 6020 5060 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 462 5122 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 648 5018 mt 
+(-4) s
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 4380 mt  924 4380 L
+6073 4380 mt 6020 4380 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 462 4442 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 648 4338 mt 
+(-2) s
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+DO
+ 872 7102 mt  872 7102 L
+ 872 7102 mt  872 7102 L
+SO
+ 872 7102 mt 6073 7102 L
+ 872 4380 mt 6073 4380 L
+ 872 7102 mt  872 4380 L
+6073 7102 mt 6073 4380 L
+gs 872 4380 5202 2723 rc
+c8
+1 0 521 0 520 0 520 0 520 0 520 0 520 0 520 1 
+520 6 520 495 520 2027 872 4567 12 MP stroke
+gr
+
+c8
+gs 799 4494 5349 2676 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 841 4549 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1361 6576 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1881 7071 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2401 7077 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2921 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3441 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3961 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4481 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5001 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5521 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 6042 7078 13 MP
+DP
+gr
+
+gs 872 4380 5202 2723 rc
+gr
+
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+3126 7470 mt 
+(Iterations) s
+ 370 6243 mt  -90 rotate
+(Norm of error) s
+90 rotate
+gs 872 4380 5202 2723 rc
+c9
+1 0 521 0 520 0 520 0 520 0 520 0 520 0 520 1 
+520 33 520 624 520 1871 872 4567 12 MP stroke
+gr
+
+c9
+gs 799 4494 5349 2676 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 841 4549 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1361 6420 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1881 7044 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2401 7077 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2921 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3441 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3961 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4481 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5001 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5521 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 6042 7078 13 MP
+DP
+gr
+
+gs 872 4380 5202 2723 rc
+c10
+1 0 521 0 520 0 520 0 520 0 520 0 520 0 520 1 
+520 22 520 814 520 1692 872 4567 12 MP stroke
+gr
+
+c10
+gs 799 4494 5349 2676 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 841 4549 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1361 6241 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1881 7055 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2401 7077 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2921 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3441 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3961 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4481 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5001 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5521 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 6042 7078 13 MP
+DP
+gr
+
+gs 872 4380 5202 2723 rc
+c11
+1 0 521 0 520 0 520 0 520 0 520 0 520 0 520 1 
+520 20 520 897 520 1611 872 4567 12 MP stroke
+gr
+
+c11
+gs 799 4494 5349 2676 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 841 4549 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1361 6160 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1881 7057 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2401 7077 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2921 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3441 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3961 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4481 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5001 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5521 7078 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 6042 7078 13 MP
+DP
+gr
+
+gs 872 4380 5202 2723 rc
+gr
+
+0 sg
+-290 198 1737 5827 2 MP stroke
+65 -44 -125 31 1507 6038 3 MP
+PP
+60 13 65 -44 -125 31 1507 6038 4 MP stroke
+-9 61 74 -105 1382 6069 3 MP
+PP
+-65 44 -9 61 74 -105 1382 6069 4 MP stroke
+1739 5790 mt 
+(levels=7) s
+-290 197 1871 6300 2 MP stroke
+65 -44 -125 30 1641 6511 3 MP
+PP
+60 14 65 -44 -125 30 1641 6511 4 MP stroke
+-9 60 74 -104 1516 6541 3 MP
+PP
+-65 44 -9 60 74 -104 1516 6541 4 MP stroke
+1873 6263 mt 
+(levels=3) s
+-290 198 1938 6536 2 MP stroke
+65 -44 -125 31 1708 6747 3 MP
+PP
+60 13 65 -44 -125 31 1708 6747 4 MP stroke
+-9 61 74 -105 1583 6778 3 MP
+PP
+-65 44 -9 61 74 -105 1583 6778 4 MP stroke
+1940 6499 mt 
+(levels=2) s
+-291 198 1795 6111 2 MP stroke
+65 -44 -125 30 1564 6323 3 MP
+PP
+60 14 65 -44 -125 30 1564 6323 4 MP stroke
+-9 61 74 -105 1439 6353 3 MP
+PP
+-65 44 -9 61 74 -105 1439 6353 4 MP stroke
+1797 6074 mt 
+(levels=4) s
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/multigrid/docs/cubic_mg2d_levels.fig b/multigrid/docs/cubic_mg2d_levels.fig
new file mode 100644
index 0000000..8c823c7
Binary files /dev/null and b/multigrid/docs/cubic_mg2d_levels.fig differ
diff --git a/multigrid/docs/cubic_mg2d_relax.eps b/multigrid/docs/cubic_mg2d_relax.eps
new file mode 100644
index 0000000..55c592f
--- /dev/null
+++ b/multigrid/docs/cubic_mg2d_relax.eps
@@ -0,0 +1,1482 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 3.4.6-2.10-desktop #1 SMP PREEMPT Thu Jul 26 09:36:26 UTC 2012 (641c197) x86_64.
+%%Title: /home/ttran/HLST/2012/report-4/cubic_mg2d_relax.eps
+%%CreationDate: 12/21/2012  09:48:11
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:     4    72   589   769
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:     4    72   589   769
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode 0048 9228 csm
+
+    0     0  7028  8364 rc
+87 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 7029 8365 rf
+6 w
+0 2855 5446 0 0 -2855 914 3481 4 MP
+PP
+-5446 0 0 2855 5446 0 0 -2855 914 3481 5 MP stroke
+4 w
+DO
+0 sg
+ 914 3481 mt  914  627 L
+ 914  627 mt  914  627 L
+1458 3481 mt 1458  627 L
+1458  627 mt 1458  627 L
+2003 3481 mt 2003  627 L
+2003  627 mt 2003  627 L
+2547 3481 mt 2547  627 L
+2547  627 mt 2547  627 L
+3092 3481 mt 3092  627 L
+3092  627 mt 3092  627 L
+3637 3481 mt 3637  627 L
+3637  627 mt 3637  627 L
+4181 3481 mt 4181  627 L
+4181  627 mt 4181  627 L
+4726 3481 mt 4726  627 L
+4726  627 mt 4726  627 L
+5270 3481 mt 5270  627 L
+5270  627 mt 5270  627 L
+5815 3481 mt 5815  627 L
+5815  627 mt 5815  627 L
+6360 3481 mt 6360  627 L
+6360  627 mt 6360  627 L
+ 914 3481 mt 6360 3481 L
+6360 3481 mt 6360 3481 L
+ 914 2910 mt 6360 2910 L
+6360 2910 mt 6360 2910 L
+ 914 2339 mt 6360 2339 L
+6360 2339 mt 6360 2339 L
+ 914 1768 mt 6360 1768 L
+6360 1768 mt 6360 1768 L
+ 914 1197 mt 6360 1197 L
+6360 1197 mt 6360 1197 L
+ 914  627 mt 6360  627 L
+6360  627 mt 6360  627 L
+SO
+6 w
+ 914 3481 mt 6360 3481 L
+ 914  626 mt 6360  626 L
+ 914 3481 mt  914  626 L
+6360 3481 mt 6360  626 L
+ 914 3481 mt 6360 3481 L
+ 914 3481 mt  914  626 L
+ 914 3481 mt  914 3426 L
+ 914  627 mt  914  681 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 868 3671 mt 
+(0) s
+1458 3481 mt 1458 3426 L
+1458  627 mt 1458  681 L
+1412 3671 mt 
+(1) s
+2003 3481 mt 2003 3426 L
+2003  627 mt 2003  681 L
+1957 3671 mt 
+(2) s
+2547 3481 mt 2547 3426 L
+2547  627 mt 2547  681 L
+2501 3671 mt 
+(3) s
+3092 3481 mt 3092 3426 L
+3092  627 mt 3092  681 L
+3046 3671 mt 
+(4) s
+3637 3481 mt 3637 3426 L
+3637  627 mt 3637  681 L
+3591 3671 mt 
+(5) s
+4181 3481 mt 4181 3426 L
+4181  627 mt 4181  681 L
+4135 3671 mt 
+(6) s
+4726 3481 mt 4726 3426 L
+4726  627 mt 4726  681 L
+4680 3671 mt 
+(7) s
+5270 3481 mt 5270 3426 L
+5270  627 mt 5270  681 L
+5224 3671 mt 
+(8) s
+5815 3481 mt 5815 3426 L
+5815  627 mt 5815  681 L
+5769 3671 mt 
+(9) s
+6360 3481 mt 6360 3426 L
+6360  627 mt 6360  681 L
+6267 3671 mt 
+(10) s
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  968 3481 L
+6360 3481 mt 6305 3481 L
+ 504 3543 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 690 3439 mt 
+(-12) s
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 2910 mt  968 2910 L
+6360 2910 mt 6305 2910 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 504 2972 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 690 2868 mt 
+(-10) s
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 2339 mt  968 2339 L
+6360 2339 mt 6305 2339 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 504 2401 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 690 2297 mt 
+(-8) s
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 1768 mt  968 1768 L
+6360 1768 mt 6305 1768 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 504 1830 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 690 1726 mt 
+(-6) s
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 1197 mt  968 1197 L
+6360 1197 mt 6305 1197 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 504 1259 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 690 1155 mt 
+(-4) s
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914  627 mt  968  627 L
+6360  627 mt 6305  627 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 504  689 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 690  585 mt 
+(-2) s
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+DO
+ 914 3481 mt  914 3481 L
+ 914 3481 mt  914 3481 L
+SO
+ 914 3481 mt 6360 3481 L
+ 914  626 mt 6360  626 L
+ 914 3481 mt  914  626 L
+6360 3481 mt 6360  626 L
+gs 914 627 5447 2855 rc
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+545 79 545 79 544 81 545 83 544 85 545 90 545 100 544 342 
+545 570 544 968 914 743 11 MP stroke
+gr
+
+c8
+gs 841 670 5593 2624 rc
+  36   36  914  743 FO
+  36   36 1458 1711 FO
+  36   36 2003 2281 FO
+  36   36 2547 2623 FO
+  36   36 3092 2723 FO
+  36   36 3637 2813 FO
+  36   36 4181 2898 FO
+  36   36 4726 2981 FO
+  36   36 5270 3062 FO
+  36   36 5815 3141 FO
+  36   36 6360 3220 FO
+gr
+
+gs 914 627 5447 2855 rc
+gr
+
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+3291 3849 mt 
+(Iterations) s
+ 412 2673 mt  -90 rotate
+(Norm of residual) s
+90 rotate
+1509  495 mt 
+(Cubic Splines, 128 x 128 problem, relax=gs, KX=3, KY=3) s
+gs 914 627 5447 2855 rc
+/c9 { 0.847059 0.160784 0.000000 sr} bdef
+c9
+545 54 545 55 544 55 545 58 544 61 545 83 545 275 544 390 
+545 456 544 831 914 743 11 MP stroke
+gr
+
+c9
+gs 841 670 5593 2465 rc
+  36   36  914  743 FO
+  36   36 1458 1574 FO
+  36   36 2003 2030 FO
+  36   36 2547 2420 FO
+  36   36 3092 2695 FO
+  36   36 3637 2778 FO
+  36   36 4181 2839 FO
+  36   36 4726 2897 FO
+  36   36 5270 2952 FO
+  36   36 5815 3007 FO
+  36   36 6360 3061 FO
+gr
+
+gs 914 627 5447 2855 rc
+/c10 { 0.000000 0.498039 0.000000 sr} bdef
+c10
+545 47 545 56 544 82 545 158 544 225 545 236 545 229 544 262 
+545 338 544 275 914 743 11 MP stroke
+gr
+
+c10
+gs 841 670 5593 2055 rc
+  36   36  914  743 FO
+  36   36 1458 1018 FO
+  36   36 2003 1356 FO
+  36   36 2547 1618 FO
+  36   36 3092 1847 FO
+  36   36 3637 2083 FO
+  36   36 4181 2308 FO
+  36   36 4726 2466 FO
+  36   36 5270 2548 FO
+  36   36 5815 2604 FO
+  36   36 6360 2651 FO
+gr
+
+gs 914 627 5447 2855 rc
+gr
+
+1 sg
+0 2854 5446 0 0 -2854 914 7444 4 MP
+PP
+-5446 0 0 2854 5446 0 0 -2854 914 7444 5 MP stroke
+4 w
+DO
+0 sg
+ 914 7444 mt  914 4590 L
+ 914 4590 mt  914 4590 L
+1458 7444 mt 1458 4590 L
+1458 4590 mt 1458 4590 L
+2003 7444 mt 2003 4590 L
+2003 4590 mt 2003 4590 L
+2547 7444 mt 2547 4590 L
+2547 4590 mt 2547 4590 L
+3092 7444 mt 3092 4590 L
+3092 4590 mt 3092 4590 L
+3637 7444 mt 3637 4590 L
+3637 4590 mt 3637 4590 L
+4181 7444 mt 4181 4590 L
+4181 4590 mt 4181 4590 L
+4726 7444 mt 4726 4590 L
+4726 4590 mt 4726 4590 L
+5270 7444 mt 5270 4590 L
+5270 4590 mt 5270 4590 L
+5815 7444 mt 5815 4590 L
+5815 4590 mt 5815 4590 L
+6360 7444 mt 6360 4590 L
+6360 4590 mt 6360 4590 L
+ 914 7444 mt 6360 7444 L
+6360 7444 mt 6360 7444 L
+ 914 6730 mt 6360 6730 L
+6360 6730 mt 6360 6730 L
+ 914 6017 mt 6360 6017 L
+6360 6017 mt 6360 6017 L
+ 914 5303 mt 6360 5303 L
+6360 5303 mt 6360 5303 L
+ 914 4590 mt 6360 4590 L
+6360 4590 mt 6360 4590 L
+SO
+6 w
+ 914 7444 mt 6360 7444 L
+ 914 4590 mt 6360 4590 L
+ 914 7444 mt  914 4590 L
+6360 7444 mt 6360 4590 L
+ 914 7444 mt 6360 7444 L
+ 914 7444 mt  914 4590 L
+ 914 7444 mt  914 7389 L
+ 914 4590 mt  914 4644 L
+ 868 7634 mt 
+(0) s
+1458 7444 mt 1458 7389 L
+1458 4590 mt 1458 4644 L
+1412 7634 mt 
+(1) s
+2003 7444 mt 2003 7389 L
+2003 4590 mt 2003 4644 L
+1957 7634 mt 
+(2) s
+2547 7444 mt 2547 7389 L
+2547 4590 mt 2547 4644 L
+2501 7634 mt 
+(3) s
+3092 7444 mt 3092 7389 L
+3092 4590 mt 3092 4644 L
+3046 7634 mt 
+(4) s
+3637 7444 mt 3637 7389 L
+3637 4590 mt 3637 4644 L
+3591 7634 mt 
+(5) s
+4181 7444 mt 4181 7389 L
+4181 4590 mt 4181 4644 L
+4135 7634 mt 
+(6) s
+4726 7444 mt 4726 7389 L
+4726 4590 mt 4726 4644 L
+4680 7634 mt 
+(7) s
+5270 7444 mt 5270 7389 L
+5270 4590 mt 5270 4644 L
+5224 7634 mt 
+(8) s
+5815 7444 mt 5815 7389 L
+5815 4590 mt 5815 4644 L
+5769 7634 mt 
+(9) s
+6360 7444 mt 6360 7389 L
+6360 4590 mt 6360 4644 L
+6267 7634 mt 
+(10) s
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  968 7444 L
+6360 7444 mt 6305 7444 L
+ 504 7506 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 690 7402 mt 
+(-10) s
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 6730 mt  968 6730 L
+6360 6730 mt 6305 6730 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 504 6792 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 690 6688 mt 
+(-8) s
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 6017 mt  968 6017 L
+6360 6017 mt 6305 6017 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 504 6079 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 690 5975 mt 
+(-6) s
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 5303 mt  968 5303 L
+6360 5303 mt 6305 5303 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 504 5365 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 690 5261 mt 
+(-4) s
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 4590 mt  968 4590 L
+6360 4590 mt 6305 4590 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 504 4652 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 690 4548 mt 
+(-2) s
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+DO
+ 914 7444 mt  914 7444 L
+ 914 7444 mt  914 7444 L
+SO
+ 914 7444 mt 6360 7444 L
+ 914 4590 mt 6360 4590 L
+ 914 7444 mt  914 4590 L
+6360 7444 mt 6360 4590 L
+gs 914 4590 5447 2855 rc
+c8
+545 0 545 0 544 0 545 0 544 0 545 0 545 0 544 22 
+545 939 544 1690 914 4786 11 MP stroke
+gr
+
+c8
+gs 841 4713 5593 2798 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 883 4768 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1427 6458 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1972 7397 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2516 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3061 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3606 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4150 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4695 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5239 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5784 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 6329 7419 13 MP
+DP
+gr
+
+gs 914 4590 5447 2855 rc
+gr
+
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+3291 7812 mt 
+(Iterations) s
+ 412 6519 mt  -90 rotate
+(Norm of error) s
+90 rotate
+gs 914 4590 5447 2855 rc
+c9
+545 0 545 0 544 0 545 0 544 0 545 0 545 3 544 305 
+545 1174 544 1169 914 4786 11 MP stroke
+gr
+
+c9
+gs 841 4713 5593 2798 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 883 4768 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1427 5937 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1972 7111 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2516 7416 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3061 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3606 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4150 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4695 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5239 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5784 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 6329 7419 13 MP
+DP
+gr
+
+gs 914 4590 5447 2855 rc
+c10
+545 0 545 0 544 1 545 11 544 124 545 290 545 388 544 604 
+545 608 544 625 914 4786 11 MP stroke
+gr
+
+c10
+gs 841 4713 5593 2798 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 883 4768 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1427 5393 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1972 6001 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2516 6605 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3061 6993 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3606 7283 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4150 7407 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4695 7418 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5239 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5784 7419 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 6329 7419 13 MP
+DP
+gr
+
+gs 914 4590 5447 2855 rc
+gr
+
+0 sg
+-199 130 2409 5908 2 MP stroke
+65 -43 -125 28 2270 6053 3 MP
+PP
+60 15 65 -43 -125 28 2270 6053 4 MP stroke
+-11 61 76 -104 2145 6081 3 MP
+PP
+-65 43 -11 61 76 -104 2145 6081 4 MP stroke
+2411 5871 mt 
+(V\(1,0\)) s
+-216 174 2419 6894 2 MP stroke
+61 -49 -122 40 2264 7077 3 MP
+PP
+61 9 61 -49 -122 40 2264 7077 4 MP stroke
+-5 61 66 -110 2142 7117 3 MP
+PP
+-61 49 -5 61 66 -110 2142 7117 4 MP stroke
+2421 6857 mt 
+(V\(1,1\)) s
+167 -222 1372 7134 2 MP stroke
+-47 62 108 -69 1478 6919 3 MP
+PP
+-61 7 -47 62 108 -69 1478 6919 4 MP stroke
+-11 -61 -36 123 1586 6850 3 MP
+PP
+47 -62 -11 -61 -36 123 1586 6850 4 MP stroke
+ 915 7290 mt 
+(V\(2,1\)) s
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/multigrid/docs/cubic_mg2d_relax.fig b/multigrid/docs/cubic_mg2d_relax.fig
new file mode 100644
index 0000000..8252dd5
Binary files /dev/null and b/multigrid/docs/cubic_mg2d_relax.fig differ
diff --git a/multigrid/docs/cyl_conv.eps b/multigrid/docs/cyl_conv.eps
new file mode 100644
index 0000000..f0326c7
--- /dev/null
+++ b/multigrid/docs/cyl_conv.eps
@@ -0,0 +1,991 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 2.6.27.56-0.1-default #1 SMP 2010-12-01 16:57:58 +0100 x86_64.
+%%Title: /home/ttran/multigrid/bsplines/cyl_conv.eps
+%%CreationDate: 09/03/2012  14:52:17
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:    73   252   521   589
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:    73   252   521   589
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode 0876 7068 csm
+
+    0     0  5376  4034 rc
+85 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 5377 4035 rf
+6 w
+0 3231 4166 0 0 -3231 699 3575 4 MP
+PP
+-4166 0 0 3231 4166 0 0 -3231 699 3575 5 MP stroke
+4 w
+DO
+0 sg
+ 699 3575 mt  699  344 L
+ 699  344 mt  699  344 L
+2087 3575 mt 2087  344 L
+2087  344 mt 2087  344 L
+3476 3575 mt 3476  344 L
+3476  344 mt 3476  344 L
+4865 3575 mt 4865  344 L
+4865  344 mt 4865  344 L
+ 699 3575 mt 4865 3575 L
+4865 3575 mt 4865 3575 L
+ 699 2928 mt 4865 2928 L
+4865 2928 mt 4865 2928 L
+ 699 2282 mt 4865 2282 L
+4865 2282 mt 4865 2282 L
+ 699 1636 mt 4865 1636 L
+4865 1636 mt 4865 1636 L
+ 699  990 mt 4865  990 L
+4865  990 mt 4865  990 L
+ 699  344 mt 4865  344 L
+4865  344 mt 4865  344 L
+SO
+6 w
+ 699 3575 mt 4865 3575 L
+ 699  344 mt 4865  344 L
+ 699 3575 mt  699  344 L
+4865 3575 mt 4865  344 L
+ 699 3575 mt 4865 3575 L
+ 699 3575 mt  699  344 L
+ 699 3575 mt  699 3554 L
+ 699  344 mt  699  364 L
+DO
+ 699 3575 mt  699  344 L
+ 699  344 mt  699  344 L
+SO
+ 699 3575 mt  699 3533 L
+ 699  344 mt  699  385 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 575 3817 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 761 3713 mt 
+(1) s
+1117 3575 mt 1117 3554 L
+1117  344 mt 1117  364 L
+DO
+1117 3575 mt 1117  344 L
+1117  344 mt 1117  344 L
+SO
+1361 3575 mt 1361 3554 L
+1361  344 mt 1361  364 L
+DO
+1361 3575 mt 1361  344 L
+1361  344 mt 1361  344 L
+SO
+1535 3575 mt 1535 3554 L
+1535  344 mt 1535  364 L
+DO
+1535 3575 mt 1535  344 L
+1535  344 mt 1535  344 L
+SO
+1669 3575 mt 1669 3554 L
+1669  344 mt 1669  364 L
+DO
+1669 3575 mt 1669  344 L
+1669  344 mt 1669  344 L
+SO
+1779 3575 mt 1779 3554 L
+1779  344 mt 1779  364 L
+DO
+1779 3575 mt 1779  344 L
+1779  344 mt 1779  344 L
+SO
+1872 3575 mt 1872 3554 L
+1872  344 mt 1872  364 L
+DO
+1872 3575 mt 1872  344 L
+1872  344 mt 1872  344 L
+SO
+1953 3575 mt 1953 3554 L
+1953  344 mt 1953  364 L
+DO
+1953 3575 mt 1953  344 L
+1953  344 mt 1953  344 L
+SO
+2024 3575 mt 2024 3554 L
+2024  344 mt 2024  364 L
+DO
+2024 3575 mt 2024  344 L
+2024  344 mt 2024  344 L
+SO
+2087 3575 mt 2087 3554 L
+2087  344 mt 2087  364 L
+DO
+2087 3575 mt 2087  344 L
+2087  344 mt 2087  344 L
+SO
+2087 3575 mt 2087 3533 L
+2087  344 mt 2087  385 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+1963 3817 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+2149 3713 mt 
+(2) s
+2505 3575 mt 2505 3554 L
+2505  344 mt 2505  364 L
+DO
+2505 3575 mt 2505  344 L
+2505  344 mt 2505  344 L
+SO
+2750 3575 mt 2750 3554 L
+2750  344 mt 2750  364 L
+DO
+2750 3575 mt 2750  344 L
+2750  344 mt 2750  344 L
+SO
+2923 3575 mt 2923 3554 L
+2923  344 mt 2923  364 L
+DO
+2923 3575 mt 2923  344 L
+2923  344 mt 2923  344 L
+SO
+3058 3575 mt 3058 3554 L
+3058  344 mt 3058  364 L
+DO
+3058 3575 mt 3058  344 L
+3058  344 mt 3058  344 L
+SO
+3168 3575 mt 3168 3554 L
+3168  344 mt 3168  364 L
+DO
+3168 3575 mt 3168  344 L
+3168  344 mt 3168  344 L
+SO
+3261 3575 mt 3261 3554 L
+3261  344 mt 3261  364 L
+DO
+3261 3575 mt 3261  344 L
+3261  344 mt 3261  344 L
+SO
+3341 3575 mt 3341 3554 L
+3341  344 mt 3341  364 L
+DO
+3341 3575 mt 3341  344 L
+3341  344 mt 3341  344 L
+SO
+3412 3575 mt 3412 3554 L
+3412  344 mt 3412  364 L
+DO
+3412 3575 mt 3412  344 L
+3412  344 mt 3412  344 L
+SO
+3476 3575 mt 3476 3554 L
+3476  344 mt 3476  364 L
+DO
+3476 3575 mt 3476  344 L
+3476  344 mt 3476  344 L
+SO
+3476 3575 mt 3476 3533 L
+3476  344 mt 3476  385 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+3352 3817 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+3538 3713 mt 
+(3) s
+3894 3575 mt 3894 3554 L
+3894  344 mt 3894  364 L
+DO
+3894 3575 mt 3894  344 L
+3894  344 mt 3894  344 L
+SO
+4138 3575 mt 4138 3554 L
+4138  344 mt 4138  364 L
+DO
+4138 3575 mt 4138  344 L
+4138  344 mt 4138  344 L
+SO
+4312 3575 mt 4312 3554 L
+4312  344 mt 4312  364 L
+DO
+4312 3575 mt 4312  344 L
+4312  344 mt 4312  344 L
+SO
+4446 3575 mt 4446 3554 L
+4446  344 mt 4446  364 L
+DO
+4446 3575 mt 4446  344 L
+4446  344 mt 4446  344 L
+SO
+4556 3575 mt 4556 3554 L
+4556  344 mt 4556  364 L
+DO
+4556 3575 mt 4556  344 L
+4556  344 mt 4556  344 L
+SO
+4649 3575 mt 4649 3554 L
+4649  344 mt 4649  364 L
+DO
+4649 3575 mt 4649  344 L
+4649  344 mt 4649  344 L
+SO
+4730 3575 mt 4730 3554 L
+4730  344 mt 4730  364 L
+DO
+4730 3575 mt 4730  344 L
+4730  344 mt 4730  344 L
+SO
+4801 3575 mt 4801 3554 L
+4801  344 mt 4801  364 L
+DO
+4801 3575 mt 4801  344 L
+4801  344 mt 4801  344 L
+SO
+4865 3575 mt 4865 3554 L
+4865  344 mt 4865  364 L
+DO
+4865 3575 mt 4865  344 L
+4865  344 mt 4865  344 L
+SO
+4865 3575 mt 4865 3533 L
+4865  344 mt 4865  385 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+4741 3817 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+4927 3713 mt 
+(4) s
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  740 3575 L
+4865 3575 mt 4823 3575 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 289 3637 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 475 3533 mt 
+(-10) s
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 2928 mt  740 2928 L
+4865 2928 mt 4823 2928 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 289 2990 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 475 2886 mt 
+(-8) s
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 2282 mt  740 2282 L
+4865 2282 mt 4823 2282 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 289 2344 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 475 2240 mt 
+(-6) s
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 1636 mt  740 1636 L
+4865 1636 mt 4823 1636 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 289 1698 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 475 1594 mt 
+(-4) s
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699  990 mt  740  990 L
+4865  990 mt 4823  990 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 289 1052 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 475  948 mt 
+(-2) s
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699  344 mt  740  344 L
+4865  344 mt 4823  344 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 289  406 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 475  302 mt 
+(0) s
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+DO
+ 699 3575 mt  699 3575 L
+ 699 3575 mt  699 3575 L
+SO
+ 699 3575 mt 4865 3575 L
+ 699  344 mt 4865  344 L
+ 699 3575 mt  699  344 L
+4865 3575 mt 4865  344 L
+gs 699 344 4167 3232 rc
+gr
+
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+  36   36 1400 1303 FO
+  36   36 1818 1778 FO
+  36   36 2236 2196 FO
+  36   36 2654 2593 FO
+  36   36 3072 2984 FO
+  36   36 3490 3373 FO
+gs 699 344 4167 3232 rc
+/c9 { 0.000000 0.500000 0.000000 sr} bdef
+c9
+418 410 418 411 418 410 418 411 418 410 1400 1345 6 MP stroke
+gr
+
+c9
+/c10 { 1.000000 0.000000 0.000000 sr} bdef
+c10
+  36   36 1400  951 FO
+  36   36 1818 1148 FO
+  36   36 2236 1343 FO
+  36   36 2654 1538 FO
+  36   36 3072 1732 FO
+  36   36 3490 1927 FO
+gs 699 344 4167 3232 rc
+418 195 418 195 418 195 418 195 418 195 1400 952 6 MP stroke
+gr
+
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+1968 3995 mt 
+(Number of intervals N) s
+ 197 2674 mt  -90 rotate
+(Discretization Error) s
+90 rotate
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 216 FMSR
+
+1251  201 mt 
+(m=22, s=10 Cylindrical Problem) s
+-279 260 2841 1060 2 MP stroke
+57 -53 -119 49 2624 1324 3 MP
+PP
+62 4 57 -53 -119 49 2624 1324 4 MP stroke
+0 62 57 -115 2505 1373 3 MP
+PP
+-57 53 0 62 57 -115 2505 1373 4 MP stroke
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+2843  947 mt 
+(p=1, N) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 132 FMSR
+
+3342 1031 mt 
+(G) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+3444  947 mt 
+(=4) s
+466 -203 1872 2654 2 MP stroke
+-71 31 128 -7 2281 2427 3 MP
+PP
+-57 -24 -71 31 128 -7 2281 2427 4 MP stroke
+21 -58 -92 89 2409 2420 3 MP
+PP
+71 -31 21 -58 -92 89 2409 2420 4 MP stroke
+1082 2642 mt 
+(p=3, N) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 132 FMSR
+
+1581 2726 mt 
+(G) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+1683 2642 mt 
+(=6) s
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/multigrid/docs/grid.tex b/multigrid/docs/grid.tex
new file mode 100644
index 0000000..13b90a0
--- /dev/null
+++ b/multigrid/docs/grid.tex
@@ -0,0 +1,70 @@
+%
+% @file grid.tex
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+
+\hoffset=-2truecm
+\voffset=-2truecm
+\special{papersize=9cm,5cm}
+\hsize=9truecm
+\vsize=5truecm
+
+\parindent=0pt
+\nopagenumbers
+
+\input pstricks
+
+\pspicture(4,4)
+  \psline[linestyle=solid](0,0)(8,0)
+  \psline[linestyle=dotted](0,1)(8,1)
+  \psline[linestyle=solid](0,2)(8,2)
+  \psline[linestyle=dotted](0,3)(8,3)
+  \psline[linestyle=solid](0,4)(8,4)
+
+  \psline[linestyle=solid](0,0)(0,4)
+  \psline[linestyle=dotted](1,0)(1,4)
+  \psline[linestyle=solid](2,0)(2,4)
+  \psline[linestyle=dotted](3,0)(3,4)
+  \psline[linestyle=solid](4,0)(4,4)
+  \psline[linestyle=dotted](5,0)(5,4)
+  \psline[linestyle=solid](6,0)(6,4)
+  \psline[linestyle=dotted](7,0)(7,4)
+  \psline[linestyle=solid](8,0)(8,4)
+
+  \psdots[dotstyle=square,dotscale=2](0,0)(2,0)(4,0)(6,0)(8,0)
+  \psdots[dotstyle=square,dotscale=2](0,2)(2,2)(4,2)(6,2)(8,2)
+  \psdots[dotstyle=square,dotscale=2](0,4)(2,4)(4,4)(6,4)(8,4)
+
+  \psdots[dotstyle=*](0,0)(1,0)(2,0)(3,0)(4,0)(5,0)(6,0)(7,0)(8,0)
+  \psdots[dotstyle=*](0,1)(1,1)(2,1)(3,1)(4,1)(5,1)(6,1)(7,1)(8,1)
+  \psdots[dotstyle=*](0,2)(1,2)(2,2)(3,2)(4,2)(5,2)(6,2)(7,2)(8,2)
+  \psdots[dotstyle=*](0,3)(1,3)(2,3)(3,3)(4,3)(5,3)(6,3)(7,3)(8,3)
+  \psdots[dotstyle=*](0,4)(1,4)(2,4)(3,4)(4,4)(5,4)(6,4)(7,4)(8,4)
+
+\endpspicture
+
+
+\bye
+
+
diff --git a/multigrid/docs/linear_mg2d.eps b/multigrid/docs/linear_mg2d.eps
new file mode 100644
index 0000000..43e69dd
--- /dev/null
+++ b/multigrid/docs/linear_mg2d.eps
@@ -0,0 +1,1387 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 3.4.6-2.10-desktop #1 SMP PREEMPT Thu Jul 26 09:36:26 UTC 2012 (641c197) x86_64.
+%%Title: /home/ttran/HLST/2012/report-4/linear_mg2d.eps
+%%CreationDate: 12/21/2012  09:47:45
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:    24    70   568   771
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:    24    70   568   771
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode 0288 9252 csm
+
+    0     0  6539  8402 rc
+87 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 6540 8403 rf
+6 w
+0 2867 5068 0 0 -2867 850 3497 4 MP
+PP
+-5068 0 0 2867 5068 0 0 -2867 850 3497 5 MP stroke
+4 w
+DO
+0 sg
+ 850 3497 mt  850  630 L
+ 850  630 mt  850  630 L
+1356 3497 mt 1356  630 L
+1356  630 mt 1356  630 L
+1863 3497 mt 1863  630 L
+1863  630 mt 1863  630 L
+2370 3497 mt 2370  630 L
+2370  630 mt 2370  630 L
+2877 3497 mt 2877  630 L
+2877  630 mt 2877  630 L
+3384 3497 mt 3384  630 L
+3384  630 mt 3384  630 L
+3890 3497 mt 3890  630 L
+3890  630 mt 3890  630 L
+4397 3497 mt 4397  630 L
+4397  630 mt 4397  630 L
+4904 3497 mt 4904  630 L
+4904  630 mt 4904  630 L
+5411 3497 mt 5411  630 L
+5411  630 mt 5411  630 L
+5918 3497 mt 5918  630 L
+5918  630 mt 5918  630 L
+ 850 3497 mt 5918 3497 L
+5918 3497 mt 5918 3497 L
+ 850 2780 mt 5918 2780 L
+5918 2780 mt 5918 2780 L
+ 850 2063 mt 5918 2063 L
+5918 2063 mt 5918 2063 L
+ 850 1346 mt 5918 1346 L
+5918 1346 mt 5918 1346 L
+ 850  630 mt 5918  630 L
+5918  630 mt 5918  630 L
+SO
+6 w
+ 850 3497 mt 5918 3497 L
+ 850  630 mt 5918  630 L
+ 850 3497 mt  850  630 L
+5918 3497 mt 5918  630 L
+ 850 3497 mt 5918 3497 L
+ 850 3497 mt  850  630 L
+ 850 3497 mt  850 3446 L
+ 850  630 mt  850  680 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 804 3687 mt 
+(0) s
+1356 3497 mt 1356 3446 L
+1356  630 mt 1356  680 L
+1310 3687 mt 
+(1) s
+1863 3497 mt 1863 3446 L
+1863  630 mt 1863  680 L
+1817 3687 mt 
+(2) s
+2370 3497 mt 2370 3446 L
+2370  630 mt 2370  680 L
+2324 3687 mt 
+(3) s
+2877 3497 mt 2877 3446 L
+2877  630 mt 2877  680 L
+2831 3687 mt 
+(4) s
+3384 3497 mt 3384 3446 L
+3384  630 mt 3384  680 L
+3338 3687 mt 
+(5) s
+3890 3497 mt 3890 3446 L
+3890  630 mt 3890  680 L
+3844 3687 mt 
+(6) s
+4397 3497 mt 4397 3446 L
+4397  630 mt 4397  680 L
+4351 3687 mt 
+(7) s
+4904 3497 mt 4904 3446 L
+4904  630 mt 4904  680 L
+4858 3687 mt 
+(8) s
+5411 3497 mt 5411 3446 L
+5411  630 mt 5411  680 L
+5365 3687 mt 
+(9) s
+5918 3497 mt 5918 3446 L
+5918  630 mt 5918  680 L
+5825 3687 mt 
+(10) s
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  900 3497 L
+5918 3497 mt 5867 3497 L
+ 440 3559 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 626 3455 mt 
+(-20) s
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 2780 mt  900 2780 L
+5918 2780 mt 5867 2780 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 440 2842 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 626 2738 mt 
+(-15) s
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 2063 mt  900 2063 L
+5918 2063 mt 5867 2063 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 440 2125 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 626 2021 mt 
+(-10) s
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 1346 mt  900 1346 L
+5918 1346 mt 5867 1346 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 440 1408 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 626 1304 mt 
+(-5) s
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850  630 mt  900  630 L
+5918  630 mt 5867  630 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 440  692 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 626  588 mt 
+(0) s
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+DO
+ 850 3497 mt  850 3497 L
+ 850 3497 mt  850 3497 L
+SO
+ 850 3497 mt 5918 3497 L
+ 850  630 mt 5918  630 L
+ 850 3497 mt  850  630 L
+5918 3497 mt 5918  630 L
+gs 850 630 5069 2868 rc
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+507 178 507 177 507 171 507 176 506 181 507 182 507 175 507 174 
+507 198 506 116 850 975 11 MP stroke
+gr
+
+c8
+gs 777 902 5215 1875 rc
+  36   36  850  975 FO
+  36   36 1356 1091 FO
+  36   36 1863 1289 FO
+  36   36 2370 1463 FO
+  36   36 2877 1638 FO
+  36   36 3384 1820 FO
+  36   36 3890 2001 FO
+  36   36 4397 2177 FO
+  36   36 4904 2348 FO
+  36   36 5411 2525 FO
+  36   36 5918 2703 FO
+gr
+
+gs 850 630 5069 2868 rc
+gr
+
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+3038 3865 mt 
+(Iterations) s
+ 348 2682 mt  -90 rotate
+(Norm of residual) s
+90 rotate
+1707  498 mt 
+(Linear Splines, V\(2,1\), relax=gs, KX=3, KY=3) s
+gs 850 630 5069 2868 rc
+/c9 { 0.000000 0.498039 0.000000 sr} bdef
+c9
+507 178 507 182 507 175 507 177 506 183 507 191 507 185 507 198 
+507 215 506 122 850 932 11 MP stroke
+gr
+
+c9
+gs 777 859 5215 1953 rc
+  36   36  850  932 FO
+  36   36 1356 1054 FO
+  36   36 1863 1269 FO
+  36   36 2370 1467 FO
+  36   36 2877 1652 FO
+  36   36 3384 1843 FO
+  36   36 3890 2026 FO
+  36   36 4397 2203 FO
+  36   36 4904 2378 FO
+  36   36 5411 2560 FO
+  36   36 5918 2738 FO
+gr
+
+gs 850 630 5069 2868 rc
+/c10 { 0.847059 0.160784 0.000000 sr} bdef
+c10
+507 186 507 185 507 189 507 185 506 185 507 193 507 205 507 214 
+507 222 506 137 850 889 11 MP stroke
+gr
+
+c10
+gs 777 816 5215 2048 rc
+  36   36  850  889 FO
+  36   36 1356 1026 FO
+  36   36 1863 1248 FO
+  36   36 2370 1462 FO
+  36   36 2877 1667 FO
+  36   36 3384 1860 FO
+  36   36 3890 2045 FO
+  36   36 4397 2230 FO
+  36   36 4904 2419 FO
+  36   36 5411 2604 FO
+  36   36 5918 2790 FO
+gr
+
+gs 850 630 5069 2868 rc
+/c11 { 0.000000 0.749020 0.749020 sr} bdef
+c11
+507 216 507 214 507 231 507 224 506 253 507 210 507 205 507 215 
+507 223 506 179 850 849 11 MP stroke
+gr
+
+c11
+gs 777 776 5215 2317 rc
+  36   36  850  849 FO
+  36   36 1356 1028 FO
+  36   36 1863 1251 FO
+  36   36 2370 1466 FO
+  36   36 2877 1671 FO
+  36   36 3384 1881 FO
+  36   36 3890 2134 FO
+  36   36 4397 2358 FO
+  36   36 4904 2589 FO
+  36   36 5411 2803 FO
+  36   36 5918 3019 FO
+gr
+
+gs 850 630 5069 2868 rc
+gr
+
+1 sg
+0 2867 5068 0 0 -2867 850 7478 4 MP
+PP
+-5068 0 0 2867 5068 0 0 -2867 850 7478 5 MP stroke
+4 w
+DO
+0 sg
+ 850 7478 mt  850 4611 L
+ 850 4611 mt  850 4611 L
+1356 7478 mt 1356 4611 L
+1356 4611 mt 1356 4611 L
+1863 7478 mt 1863 4611 L
+1863 4611 mt 1863 4611 L
+2370 7478 mt 2370 4611 L
+2370 4611 mt 2370 4611 L
+2877 7478 mt 2877 4611 L
+2877 4611 mt 2877 4611 L
+3384 7478 mt 3384 4611 L
+3384 4611 mt 3384 4611 L
+3890 7478 mt 3890 4611 L
+3890 4611 mt 3890 4611 L
+4397 7478 mt 4397 4611 L
+4397 4611 mt 4397 4611 L
+4904 7478 mt 4904 4611 L
+4904 4611 mt 4904 4611 L
+5411 7478 mt 5411 4611 L
+5411 4611 mt 5411 4611 L
+5918 7478 mt 5918 4611 L
+5918 4611 mt 5918 4611 L
+ 850 7478 mt 5918 7478 L
+5918 7478 mt 5918 7478 L
+ 850 6761 mt 5918 6761 L
+5918 6761 mt 5918 6761 L
+ 850 6044 mt 5918 6044 L
+5918 6044 mt 5918 6044 L
+ 850 5327 mt 5918 5327 L
+5918 5327 mt 5918 5327 L
+ 850 4611 mt 5918 4611 L
+5918 4611 mt 5918 4611 L
+SO
+6 w
+ 850 7478 mt 5918 7478 L
+ 850 4611 mt 5918 4611 L
+ 850 7478 mt  850 4611 L
+5918 7478 mt 5918 4611 L
+ 850 7478 mt 5918 7478 L
+ 850 7478 mt  850 4611 L
+ 850 7478 mt  850 7427 L
+ 850 4611 mt  850 4661 L
+ 804 7668 mt 
+(0) s
+1356 7478 mt 1356 7427 L
+1356 4611 mt 1356 4661 L
+1310 7668 mt 
+(1) s
+1863 7478 mt 1863 7427 L
+1863 4611 mt 1863 4661 L
+1817 7668 mt 
+(2) s
+2370 7478 mt 2370 7427 L
+2370 4611 mt 2370 4661 L
+2324 7668 mt 
+(3) s
+2877 7478 mt 2877 7427 L
+2877 4611 mt 2877 4661 L
+2831 7668 mt 
+(4) s
+3384 7478 mt 3384 7427 L
+3384 4611 mt 3384 4661 L
+3338 7668 mt 
+(5) s
+3890 7478 mt 3890 7427 L
+3890 4611 mt 3890 4661 L
+3844 7668 mt 
+(6) s
+4397 7478 mt 4397 7427 L
+4397 4611 mt 4397 4661 L
+4351 7668 mt 
+(7) s
+4904 7478 mt 4904 7427 L
+4904 4611 mt 4904 4661 L
+4858 7668 mt 
+(8) s
+5411 7478 mt 5411 7427 L
+5411 4611 mt 5411 4661 L
+5365 7668 mt 
+(9) s
+5918 7478 mt 5918 7427 L
+5918 4611 mt 5918 4661 L
+5825 7668 mt 
+(10) s
+ 850 7478 mt  875 7478 L
+5918 7478 mt 5892 7478 L
+DO
+ 850 7478 mt 5918 7478 L
+5918 7478 mt 5918 7478 L
+SO
+ 850 7478 mt  900 7478 L
+5918 7478 mt 5867 7478 L
+ 502 7540 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 688 7436 mt 
+(-6) s
+ 850 7262 mt  875 7262 L
+5918 7262 mt 5892 7262 L
+DO
+ 850 7262 mt 5918 7262 L
+5918 7262 mt 5918 7262 L
+SO
+ 850 7136 mt  875 7136 L
+5918 7136 mt 5892 7136 L
+DO
+ 850 7136 mt 5918 7136 L
+5918 7136 mt 5918 7136 L
+SO
+ 850 7046 mt  875 7046 L
+5918 7046 mt 5892 7046 L
+DO
+ 850 7046 mt 5918 7046 L
+5918 7046 mt 5918 7046 L
+SO
+ 850 6977 mt  875 6977 L
+5918 6977 mt 5892 6977 L
+DO
+ 850 6977 mt 5918 6977 L
+5918 6977 mt 5918 6977 L
+SO
+ 850 6920 mt  875 6920 L
+5918 6920 mt 5892 6920 L
+DO
+ 850 6920 mt 5918 6920 L
+5918 6920 mt 5918 6920 L
+SO
+ 850 6872 mt  875 6872 L
+5918 6872 mt 5892 6872 L
+DO
+ 850 6872 mt 5918 6872 L
+5918 6872 mt 5918 6872 L
+SO
+ 850 6830 mt  875 6830 L
+5918 6830 mt 5892 6830 L
+DO
+ 850 6830 mt 5918 6830 L
+5918 6830 mt 5918 6830 L
+SO
+ 850 6794 mt  875 6794 L
+5918 6794 mt 5892 6794 L
+DO
+ 850 6794 mt 5918 6794 L
+5918 6794 mt 5918 6794 L
+SO
+ 850 6761 mt  875 6761 L
+5918 6761 mt 5892 6761 L
+DO
+ 850 6761 mt 5918 6761 L
+5918 6761 mt 5918 6761 L
+SO
+ 850 6761 mt  900 6761 L
+5918 6761 mt 5867 6761 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 502 6823 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 688 6719 mt 
+(-5) s
+ 850 6545 mt  875 6545 L
+5918 6545 mt 5892 6545 L
+DO
+ 850 6545 mt 5918 6545 L
+5918 6545 mt 5918 6545 L
+SO
+ 850 6419 mt  875 6419 L
+5918 6419 mt 5892 6419 L
+DO
+ 850 6419 mt 5918 6419 L
+5918 6419 mt 5918 6419 L
+SO
+ 850 6329 mt  875 6329 L
+5918 6329 mt 5892 6329 L
+DO
+ 850 6329 mt 5918 6329 L
+5918 6329 mt 5918 6329 L
+SO
+ 850 6260 mt  875 6260 L
+5918 6260 mt 5892 6260 L
+DO
+ 850 6260 mt 5918 6260 L
+5918 6260 mt 5918 6260 L
+SO
+ 850 6203 mt  875 6203 L
+5918 6203 mt 5892 6203 L
+DO
+ 850 6203 mt 5918 6203 L
+5918 6203 mt 5918 6203 L
+SO
+ 850 6155 mt  875 6155 L
+5918 6155 mt 5892 6155 L
+DO
+ 850 6155 mt 5918 6155 L
+5918 6155 mt 5918 6155 L
+SO
+ 850 6113 mt  875 6113 L
+5918 6113 mt 5892 6113 L
+DO
+ 850 6113 mt 5918 6113 L
+5918 6113 mt 5918 6113 L
+SO
+ 850 6077 mt  875 6077 L
+5918 6077 mt 5892 6077 L
+DO
+ 850 6077 mt 5918 6077 L
+5918 6077 mt 5918 6077 L
+SO
+ 850 6044 mt  875 6044 L
+5918 6044 mt 5892 6044 L
+DO
+ 850 6044 mt 5918 6044 L
+5918 6044 mt 5918 6044 L
+SO
+ 850 6044 mt  900 6044 L
+5918 6044 mt 5867 6044 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 502 6106 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 688 6002 mt 
+(-4) s
+ 850 5828 mt  875 5828 L
+5918 5828 mt 5892 5828 L
+DO
+ 850 5828 mt 5918 5828 L
+5918 5828 mt 5918 5828 L
+SO
+ 850 5702 mt  875 5702 L
+5918 5702 mt 5892 5702 L
+DO
+ 850 5702 mt 5918 5702 L
+5918 5702 mt 5918 5702 L
+SO
+ 850 5612 mt  875 5612 L
+5918 5612 mt 5892 5612 L
+DO
+ 850 5612 mt 5918 5612 L
+5918 5612 mt 5918 5612 L
+SO
+ 850 5543 mt  875 5543 L
+5918 5543 mt 5892 5543 L
+DO
+ 850 5543 mt 5918 5543 L
+5918 5543 mt 5918 5543 L
+SO
+ 850 5486 mt  875 5486 L
+5918 5486 mt 5892 5486 L
+DO
+ 850 5486 mt 5918 5486 L
+5918 5486 mt 5918 5486 L
+SO
+ 850 5438 mt  875 5438 L
+5918 5438 mt 5892 5438 L
+DO
+ 850 5438 mt 5918 5438 L
+5918 5438 mt 5918 5438 L
+SO
+ 850 5397 mt  875 5397 L
+5918 5397 mt 5892 5397 L
+DO
+ 850 5397 mt 5918 5397 L
+5918 5397 mt 5918 5397 L
+SO
+ 850 5360 mt  875 5360 L
+5918 5360 mt 5892 5360 L
+DO
+ 850 5360 mt 5918 5360 L
+5918 5360 mt 5918 5360 L
+SO
+ 850 5327 mt  875 5327 L
+5918 5327 mt 5892 5327 L
+DO
+ 850 5327 mt 5918 5327 L
+5918 5327 mt 5918 5327 L
+SO
+ 850 5327 mt  900 5327 L
+5918 5327 mt 5867 5327 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 502 5389 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 688 5285 mt 
+(-3) s
+ 850 5111 mt  875 5111 L
+5918 5111 mt 5892 5111 L
+DO
+ 850 5111 mt 5918 5111 L
+5918 5111 mt 5918 5111 L
+SO
+ 850 4985 mt  875 4985 L
+5918 4985 mt 5892 4985 L
+DO
+ 850 4985 mt 5918 4985 L
+5918 4985 mt 5918 4985 L
+SO
+ 850 4896 mt  875 4896 L
+5918 4896 mt 5892 4896 L
+DO
+ 850 4896 mt 5918 4896 L
+5918 4896 mt 5918 4896 L
+SO
+ 850 4826 mt  875 4826 L
+5918 4826 mt 5892 4826 L
+DO
+ 850 4826 mt 5918 4826 L
+5918 4826 mt 5918 4826 L
+SO
+ 850 4770 mt  875 4770 L
+5918 4770 mt 5892 4770 L
+DO
+ 850 4770 mt 5918 4770 L
+5918 4770 mt 5918 4770 L
+SO
+ 850 4722 mt  875 4722 L
+5918 4722 mt 5892 4722 L
+DO
+ 850 4722 mt 5918 4722 L
+5918 4722 mt 5918 4722 L
+SO
+ 850 4680 mt  875 4680 L
+5918 4680 mt 5892 4680 L
+DO
+ 850 4680 mt 5918 4680 L
+5918 4680 mt 5918 4680 L
+SO
+ 850 4643 mt  875 4643 L
+5918 4643 mt 5892 4643 L
+DO
+ 850 4643 mt 5918 4643 L
+5918 4643 mt 5918 4643 L
+SO
+ 850 4611 mt  875 4611 L
+5918 4611 mt 5892 4611 L
+DO
+ 850 4611 mt 5918 4611 L
+5918 4611 mt 5918 4611 L
+SO
+ 850 4611 mt  900 4611 L
+5918 4611 mt 5867 4611 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+ 502 4673 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 112 FMSR
+
+ 688 4569 mt 
+(-2) s
+ 850 7478 mt 5918 7478 L
+ 850 4611 mt 5918 4611 L
+ 850 7478 mt  850 4611 L
+5918 7478 mt 5918 4611 L
+gs 850 4611 5069 2868 rc
+c8
+507 0 507 0 507 0 507 0 506 0 507 0 507 11 507 327 
+507 978 506 1082 850 5005 11 MP stroke
+gr
+
+c8
+gs 777 4932 5215 2545 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 819 4987 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1325 6069 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1832 7047 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2339 7374 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2846 7385 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3353 7385 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3859 7385 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4366 7385 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4873 7385 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5380 7385 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5887 7385 13 MP
+DP
+gr
+
+gs 850 4611 5069 2868 rc
+gr
+
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+3038 7846 mt 
+(Iterations) s
+ 410 6546 mt  -90 rotate
+(Norm of error) s
+90 rotate
+gs 850 4611 5069 2868 rc
+c9
+507 0 507 0 507 0 507 0 506 0 507 0 507 2 507 105 
+507 785 506 1074 850 5005 11 MP stroke
+gr
+
+c9
+gs 777 4932 5215 2113 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 819 4987 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1325 6061 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1832 6846 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2339 6951 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2846 6953 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3353 6953 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3859 6953 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4366 6953 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4873 6953 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5380 6953 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5887 6953 13 MP
+DP
+gr
+
+gs 850 4611 5069 2868 rc
+c10
+507 0 507 0 507 0 507 0 506 0 507 0 507 0 507 23 
+507 467 506 1045 850 5005 11 MP stroke
+gr
+
+c10
+gs 777 4932 5215 1682 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 819 4987 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1325 6032 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1832 6499 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2339 6522 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2846 6522 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3353 6522 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3859 6522 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4366 6522 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4873 6522 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5380 6522 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5887 6522 13 MP
+DP
+gr
+
+gs 850 4611 5069 2868 rc
+c11
+507 0 507 0 507 0 507 0 506 0 507 0 507 0 507 2 
+507 155 506 947 850 5005 11 MP stroke
+gr
+
+c11
+gs 777 4932 5215 1251 rc
+0 j
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 819 4987 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1325 5934 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 1832 6089 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2339 6091 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 2846 6091 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3353 6091 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 3859 6091 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4366 6091 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 4873 6091 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5380 6091 13 MP
+DP
+-13 -18 13 -18 -21 0 -10 -18 -10 18 -21 0 13 18 -13 18 
+21 0 10 18 10 -18 21 0 5887 6091 13 MP
+DP
+gr
+
+gs 850 4611 5069 2868 rc
+gr
+
+0 sg
+-217 214 2943 5814 2 MP stroke
+56 -55 -117 52 2787 6031 3 MP
+PP
+61 3 56 -55 -117 52 2787 6031 4 MP stroke
+2 62 54 -117 2670 6083 3 MP
+PP
+-56 55 2 62 54 -117 2670 6083 4 MP stroke
+2945 5777 mt 
+(16 X 16) s
+-146 155 3217 6316 2 MP stroke
+53 -56 -115 56 3133 6471 3 MP
+PP
+62 0 53 -56 -115 56 3133 6471 4 MP stroke
+4 62 49 -118 3018 6527 3 MP
+PP
+-53 56 4 62 49 -118 3018 6527 4 MP stroke
+3219 6279 mt 
+(32 X 32) s
+-156 159 3906 6740 2 MP stroke
+54 -56 -116 54 3812 6901 3 MP
+PP
+62 2 54 -56 -116 54 3812 6901 4 MP stroke
+3 61 51 -117 3696 6955 3 MP
+PP
+-54 56 3 61 51 -117 3696 6955 4 MP stroke
+3908 6703 mt 
+(64 X 64) s
+-166 164 4208 7181 2 MP stroke
+55 -55 -117 52 4104 7348 3 MP
+PP
+62 3 55 -55 -117 52 4104 7348 4 MP stroke
+1 61 54 -116 3987 7400 3 MP
+PP
+-55 55 1 61 54 -116 3987 7400 4 MP stroke
+4210 7144 mt 
+(128 X 128) s
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/multigrid/docs/linear_mg2d.fig b/multigrid/docs/linear_mg2d.fig
new file mode 100644
index 0000000..f613e20
Binary files /dev/null and b/multigrid/docs/linear_mg2d.fig differ
diff --git a/multigrid/docs/mg_gbs.pdf b/multigrid/docs/mg_gbs.pdf
new file mode 100644
index 0000000..8b77358
Binary files /dev/null and b/multigrid/docs/mg_gbs.pdf differ
diff --git a/multigrid/docs/mg_gbs.tex b/multigrid/docs/mg_gbs.tex
new file mode 100644
index 0000000..4c1eec0
--- /dev/null
+++ b/multigrid/docs/mg_gbs.tex
@@ -0,0 +1,1623 @@
+%
+% @file mg_gbs.tex
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+\documentclass[a4paper]{article}
+\usepackage{linuxdoc-sgml}
+\usepackage{graphicx}
+\usepackage{hyperref}
+%\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{mathtools}
+\usepackage{placeins}
+\usepackage{multirow}
+\usepackage{latexsym}
+\usepackage{listings}
+\usepackage{xcolor}
+\usepackage{rotating}
+\def\RepFigures{FIGURES_mg_gbs}
+
+
+\title{\tt Multigrid Solver for GBS}
+\author{Trach-Minh Tran, Federico Halpern\\CRPP/EPFL}
+\date{v1.0, June 2015}
+
+\begin{document}
+\maketitle
+\tableofcontents
+
+\section{The PDE}
+The PDE considered is
+\begin{equation}
+\label{eq:pde}
+  \left[\frac{\partial^2}{\partial x^2} + \tau\frac{\partial^2}{\partial
+  x\partial y} + \frac{\partial^2}{\partial y^2} - a(x,y)\right] u(x,y)  =
+  f(x,y), \qquad 0\le x \le L_x, \; 0\le y \le L_y.
+\end{equation}
+On the four boundaries, homogeneous Dirichlet boundary condition
+$u=0$ as well as Neumann boundary condition
+$\partial u/\partial n=0$  can be applied.
+
+\section{Discretization}
+The grid points $(x_i,y_j)$ are defined by
+\begin{equation}
+  \begin{split}
+    x_i &= ih_x = i\frac{L_x}{N_x}, \quad i=0,\ldots, N_x \\
+    y_j &= jh_y = j\frac{L_y}{N_y}, \quad j=0,\ldots, N_y \\
+  \end{split}
+\end{equation}
+
+Second order Finite Difference discretization of Eq.\ref{eq:pde} leads
+to the following 9-point stencil
+
+\begin{equation}
+\label{eq:stencil}
+  S_{ij} = \frac{1}{h_x^2}
+  \begin{bmatrix}
+    -\tau\alpha/4   & \alpha^2                  & \tau\alpha/4 \\
+          1         & -2(1+\alpha^2)-h_x^2a_{ij} & 1 \\
+    \tau\alpha/4    & \alpha^2                  &-\tau\alpha/4 \\
+  \end{bmatrix}
+, \qquad \mbox{where $\alpha=h_x/h_y$}.
+\end{equation}
+
+Note that the mesh aspect ratio $\alpha$ results in the same stencil
+for the \emph{anisotropic} Poisson equation with $h_x=h_y$:
+\begin{equation}
+   \frac{\partial^2 u}{\partial x^2} + \alpha^2
+     \frac{\partial^2 u}{\partial y^2} = f.
+\end{equation}
+
+It is shown in \cite[p.~119]{Briggs} that this anisotropy can degrade
+the performance of multigrid using  standard relaxations such as
+Gauss-Seidel or damped Jacobi can be strongly degraded.
+
+With the \emph{lexicographic} numbering
+\begin{equation}
+  I = j(N_x+1) + i+1, 
+\end{equation}
+for the $(N_x+1)(N_y+1)$ nodes, the discretized problem can be
+expressed as a matrix problem
+\begin{equation}
+\label{eq:matrix}
+  \mathbf{Au} = \mathbf{f},
+\end{equation}
+where $\mathbf{A}$ is a 9-diagonal matrix,  assembled using the stencil defined
+above. Homogeneous Dirichlet boundary condition can be imposed, for example, on
+the face $j=0$ simply by \emph{clearing} the matrix rows and columns
+$1,2,\ldots, N_x+1$, and setting the diagonal terms to 1.
+
+Neumann boundary condition $\partial u/\partial x=0$ at
+the face $i=0$, can be simply implemented by imposing
+$u_{-1j}=u_{1j}$. The stencil for the boundary nodes $(0,j)$
+can thus be modified as
+
+\begin{equation}
+  S_{0j} = \frac{1}{h_x^2}
+  \begin{bmatrix}
+     0 &\alpha^2                  & 0 \\
+     0 &-2(1+\alpha^2)-h_x^2a_{0j} & 2 \\
+     0 &\alpha^2                  & 0 \\
+  \end{bmatrix}
+.
+\end{equation}
+
+Two model problems are considered in this report:
+\begin{description}
+\item[\texttt{\textbf{DDDD}} problem:] Homogeneous Dirichlet BC
+  at all the 4 boundaries. The \emph{analytic solution} is
+  \begin{equation}
+    u(x,y) = \sin\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y},
+    \qquad \mbox{where $k_x$, $k_y$ are positive integers}.
+  \end{equation}
+\item[\texttt{\textbf{NNDD}} problem:] Neumann boundary conditions
+  $\partial u/\partial x=0$ at $x=0$ and $x=L_x$, homogeneous
+  Dirichlet BC at $y=0$ and $y=L_y$. The \emph{analytic solution} is
+  \begin{equation}
+    u(x,y) = \cos\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y},
+    \qquad \mbox{where $k_x$, $k_y$ are positive integers}.
+  \end{equation}
+\end{description}
+In both problems, $a$ depends only on $x$:
+\begin{equation}
+  \label{eq:density}
+  a(x,y)= \exp\left[-\frac{(x-L_x/3)^2}{(L_x/2)^2}\right].
+\end{equation}
+
+The sparse direct solver MUMPS \cite{MUMPS} is used to solve (\ref{eq:matrix}) in
+order to check the convergence of the schema described
+above. Fig.\ref{fig:convergence} shows the expected \emph{quadratic}
+convergence of the error with respect to $h_x$ with fixed $\alpha=h_x/h_y=0.5$ for
+both problems, when the grid size is varied from $16\times 64$ to $512\times 2048$.
+
+\begin{figure}[hbt]
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/convergence}
+  \caption{Convergence of the error
+    $\| u_{calc}- u_{anal}\|_\infty$ wrt the number of intervals
+  in the $x$ direction $N_x$ for $L_x=100$, $L_y=800$, $k_x=k_y=4$, $\tau=1$ and $N_y=4N_x$.}
+  \label{fig:convergence}
+\end{figure}
+
+\section{Multigrid $V$-cycle}
+\label{sec-mgProc}
+Given an approximate  $\mathbf{u}^h$ and right hand side $\mathbf{f}^h$ 
+defined at some grid level  represented by the grid spacing $h$, the
+following MG $V$-cycle procedure
+\begin{equation*}
+  \boxed{\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{f}^h)}
+\end{equation*}
+will compute a \emph{new} $\mathbf{u}^h$. It is defined recursively by the
+following steps: 
+
+\begin{enumerate}
+\item If $h$ is the coarsest mesh size, 
+  \begin{itemize}
+  \item  Direct solve $\mathbf{A}^h\mathbf{u}^h=\mathbf{f}^h$ 
+  \item Goto 3.
+  \end{itemize}
+\item Else 
+  \begin{itemize}
+    \item Relax $\mathbf{u}^h$ $\nu_1$ times.
+    \item $\mathbf{f}^{2h} \leftarrow {\mathbf{R}}(\mathbf{f}^h-\mathbf{A}^h\mathbf{u}^h)$.
+    \item $\mathbf{u}^{2h} \leftarrow MG^{2h}(\mathbf{u}^{2h},\mathbf{f}^{2h})$ $\mu$ times.
+    \item $\mathbf{u}^h\leftarrow
+      \mathbf{u}^h+{\mathbf{P}}\mathbf{u}^{2h}$.
+    \item Relax $\mathbf{u}^h$ $\nu_2$ times.
+    \end{itemize} 
+\item Return
+\end{enumerate}
+
+In the procedure above, the operators $\mathbf{R}$ and $\mathbf{P}$
+denote respectively the \emph{restriction} (from \emph{fine} to
+\emph{coarse} grid) and the \emph{prolongation} (from \emph{coarse} to
+\emph{fine} grid). Notice that in this multigrid procedure,
+$\mathbf{R}$ applies only to the \emph{right hand side} while $\mathbf{P}$
+applies only to the \emph{solution}. The standard $V(\nu_1,\nu_2)$ cycle is obtained by
+calling this $MG^h$ procedure with $\mathbf{f}^h$ defined at the
+\emph{finest} grid level, a guess $\mathbf{u}^h=0$ and $\mu=1$, while 
+$\mu=2$ results in the $W(\nu_1,\nu_2)$ cycle.
+
+Details on the grid coarsening, the inter-grid transfers and methods
+of relaxation are given in the following.
+
+\subsection{Grid coarsening}
+Let start with the one-dimensional \emph{fine} grid defined
+by $x_i,\, i=0,\ldots,N$, assuming that $N$ is even. The next coarse grid
+(with $N/2$ intervals) is obtained by simply discarding the grid
+points with \emph{odd} indices. 
+
+In order to get a \emph{smallest coarsest} grid (so that it is possible
+to solve \emph{cheaply} the problem with a \emph{direct} method), $N$ should be
+$N=N_c2^{L-1}$ where $L$ the total number of grid levels and $N_c$ is either
+2 or a \emph{small odd} integer. As an example, the fine grid with $N=768$ can have
+up to 9 grid levels, and a coarsest grid with 3 intervals, see
+Table~\ref{tab:level}.
+
+\begin{table}[hbt]
+\centering
+\begin{tabular}{|l||r|r|r|r|r|}\hline
+$L$ & \multicolumn{5}{c|}{$N$} \\ \hline
+  1 &    2 &    3 &    5 &    7 &    9\\
+  2 &    4 &    6 &   10 &   14 &   18\\
+  3 &    8 &   12 &   20 &   28 &   36\\
+  4 &   16 &   24 &   40 &   56 &   72\\
+  5 &   32 &   48 &   80 &  112 &  144\\
+  6 &   64 &   96 &  160 &  224 &  288\\
+  7 &  128 &  192 &  320 &  448 &  576\\
+  8 &  256 &  384 &  640 &  896 & 1152\\
+  9 &  512 &  768 & 1280 & 1792 & 2304\\
+ 10 & 1024 & 1536 & 2560 & 3584 & 4608\\
+\hline
+\end{tabular}
+\caption{Set of values of the \emph{fine} grid number of intervals $N$
+  to obtain a \emph{coarsest} grid size at most equal to $9$ with
+  at most $10$ grid levels.}
+\label{tab:level}
+\end{table}
+
+For a two-dimensional grid, the same procedure is applied to both
+dimensions. The result of such procedure is illustrated in
+Fig.~\ref{fig:2d_coarsening}, for a $8\times4$ fine grid.
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.6\hsize]{grid}
+  \caption{A \emph{coarse} $4\times 2$ grid ($\Box$) obtained from a
+    $8\times4$ fine grid ($\bullet$).}
+  \label{fig:2d_coarsening}
+\end{figure}
+
+
+\subsection{Inter-grid transfers}
+The one-dimensional \emph{prolongation} operator for the second-order FD
+discretization is chosen the same as the one obtained with the
+\emph{linear Finite Elements} \cite{MG1D}. For a $N=8$ grid, it can be
+represented as a $9\times 5$ matrix given by
+\begin{equation}
+  \label{eq:1dprolongation}
+  \mathbf{P} =
+  \left(
+  \begin{matrix}
+    1   & 0   & 0   & 0   & 0  \\
+    1/2 & 1/2 & 0   & 0   & 0  \\
+    0   & 1   & 0   & 0   & 0  \\
+    0   & 1/2 & 1/2 & 0   & 0  \\
+    0   & 0   & 1   & 0   & 0  \\
+    0   & 0   & 1/2 & 1/2 & 0  \\
+    0   & 0   & 0   & 1   & 0  \\
+    0   & 0   & 0   & 1/2 & 1/2\\
+    0   & 0   & 0   & 0   & 1   \\
+  \end{matrix}\right)
+\end{equation}
+The \emph{restriction} matrix $\mathbf{R}$ is simply related to $\mathbf{P}$ by
+\begin{equation}
+  \label{eq:1drestriction}
+  \mathbf{R} = \frac{1}{2}\mathbf{P}^{T}=\frac{1}{2}\left(
+  \begin{matrix}
+    1&1/2&0&0&0&0&0&0&0\\
+    0&1/2&1&1/2&0&0&0&0&0\\
+    0&0&0&1/2&1&1/2&0&0&0\\
+    0&0&0&0&0&1/2&1&1/2&0\\
+    0&0&0&0&0&0&0&1/2&1\\
+  \end{matrix}
+  \right).
+\end{equation}
+For Dirichlet BC imposed on the \emph{left} boundary one has to set
+${P}_{21}=R_{12}=0$, while for Dirichlet BC imposed on the \emph{right}
+boundary, ${P}_{N,N/2+1}=R_{N/2+1,N}=0$. Notice that these inter-grid operators are
+identical to the standard \emph{linear interpolation} and \emph{full
+  weighting} operators.
+
+For a two-dimensional problem, using the property that the grid is a
+\emph{tensor product} of two one-dimensional grids, the
+restriction of the right hand side $f^{h}_{ij}$ and the prolongation of
+the solution $u^{2h}_{ij}$ can be computed as
+\begin{equation}
+  \label{eq:2dintergrid}
+  \begin{split}
+    \mathbf{f}^{2h} &= \mathbf{R}^x \cdot \mathbf{f}^{h}\cdot (\mathbf{R}^y)^T \\     
+    \mathbf{u}^{h}  &= \mathbf{\mathbf{P}}^x \cdot \mathbf{u}^{2h}\cdot
+    (\mathbf{\mathbf{P}}^y)^T \\     
+  \end{split}
+\end{equation}
+
+\subsection{Relaxations}
+Gauss-Seidel and damped Jacobi iterations are used as relaxation
+methods in the multigrid $V$ cycle. In general, Gauss-Seidel
+method is  more efficient but much more difficult to
+\emph{parallelize} than the Jacobi method.
+
+It should be noted that if $a(x,y)$ in Eq.~\ref{eq:pde} is
+non-positive, both relaxations diverge! This can be seen by
+considering the following one-dimensional FD equation with uniform
+$a$:
+
+\begin{equation}
+  u_{j-1} -(2+ah^2)u_j + u_{j+1} = h^2f_j.
+\end{equation}
+
+Using the damped Jacobi relaxation, the error
+$\epsilon^{(m)}_j\equiv u_{anal}(x_j)-u_j^{(m)}$ at iteration $m+1$ is given by
+
+\begin{equation}
+  \epsilon^{(m+1)}_j =
+  \frac{\omega}{2+h^2a}(\epsilon^{(m)}_{j-1}+\epsilon^{(m)}_{j+1})
+    +(1-\omega)\epsilon^{(m)}_j .
+\end{equation}
+Performing a \emph{local mode analysis} (or Fourier analysis) (see
+\cite[p.~48]{Briggs}), assuming that
+$\epsilon^{(m)}_j=A(m)e^{ij\theta}$, where $\theta$ is related to the
+mode number $k$ by $\theta=2\pi k/N$, the 
+\emph{amplification factor} $G(\theta)$ is obtained as
+\begin{equation}
+  \begin{split}
+    G(\theta) &= \frac{A(m+1)}{A(m)} =
+    \frac{2\omega}{2+h^2a}\cos\theta + (1-\omega) \\
+    &= G_0(\theta) -\frac{ \omega h^2a}{2+h^2a}\cos\theta 
+    \simeq  G_0(\theta) -\frac{ \omega h^2a}{2}\cos\theta, \\
+ G_0(\theta) &=1-2\omega\sin^2\frac{\theta}{2},  \\
+  \end{split}
+\end{equation}
+where $G_0(\theta)$ is the amplification factor for
+$a=0$. Note that $|G_0(\theta)|< 1$ for \emph{all} $\theta$ and $0<\omega< 1$
+but $\displaystyle{\max_{|\theta|<\pi}|G(\theta)|>1}$ if $a<0$.
+
+In Gauss-Seidel relaxation method, the errors evolve as:
+\begin{equation}
+  \epsilon^{(m+1)}_j = \frac{\epsilon^{(m+1)}_{j-1}+\epsilon^{(m)}_{j+1}}{2+h^2a}.
+\end{equation}
+Applying again the same Fourier analysis yields the
+following complex amplification factor:
+\begin{equation}
+  \begin{split}
+    G(\theta) &\simeq G_0(\theta)\left(1-\frac{h^2a}{2-e^{-i\theta}}\right) \\
+    G_0(\theta) &=\frac{e^{i\theta}}{2-e^{-i\theta}}, \quad
+    |G_0(\theta)| < 1
+  \end{split}
+\end{equation}
+which show that the Gauss-Seidel relaxations \emph{diverge} if $a<0$.
+
+Notice that when $a>0$, the effect of $a$ on the amplification is
+negligible and is thus ignored in the following two-dimensional
+analysis. Applying the damped Jacobi scheme on the
+stencil~(\ref{eq:stencil}), the error at the iteration $m+1$ is given
+by:
+\begin{equation}
+  \begin{split}
+  \epsilon^{(m+1)}_{ij} = & \frac{\omega}{2(1+\alpha^2)} \left[
+    \epsilon^{(m)}_{i-1,j} + \epsilon^{(m)}_{i+1,j} + \alpha^2(
+    \epsilon^{(m)}_{i,j-1} + \epsilon^{(m)}_{i,j+1}) + \frac{\tau\alpha}{4}(
+    \epsilon^{(m)}_{i+1,j+1}+\epsilon^{(m)}_{i-1,j-1} -
+    \epsilon^{(m)}_{i-1,j+1}-\epsilon^{(m)}_{i+1,j-1})
+    \right] \\
+  & + (1-\omega)\epsilon^{(m)}_{ij}. \\
+  \end{split}
+\end{equation}
+Using the two-dimensional Fourier mode expression
+\begin{equation}
+  \epsilon^{(m)}_{ij} = A(m)e^{i(\theta_1+\theta_2)}, \quad -\pi <\theta_1,
+  \theta_2 \le \pi,
+\end{equation}
+the amplification factor $G=A(m+1)/A(m)$ is given
+by
+\begin{equation}
+\label{eq:amp_jac}
+  G(\theta_1,\theta_2;\omega,\alpha,\tau)=1 - \frac{2\omega}{1+\alpha^2} \left(
+    \sin^2\frac{\theta_1}{2} +  \alpha^2\sin^2\frac{\theta_2}{2} +
+    \frac{\tau\alpha}{4}\sin\theta_1\,\sin\theta_2
+    \right).
+\end{equation}
+
+The errors in Gauss-Seidel method, assuming a \emph{lexicographic} ordering
+for the unknowns (increasing first $i$ then $j$), are updated according
+to
+\begin{equation}
+  \epsilon^{(m+1)}_{ij} =  \frac{1}{2(1+\alpha^2)} \left[
+    \epsilon^{(m+1)}_{i-1,j} + \epsilon^{(m)}_{i+1,j} + \alpha^2(
+    \epsilon^{(m+1)}_{i,j-1} + \epsilon^{(m)}_{i,j+1}) + \frac{\tau\alpha}{4}(
+    \epsilon^{(m)}_{i+1,j+1}+\epsilon^{(m+1)}_{i-1,j-1} -
+    \epsilon^{(m)}_{i-1,j+1}-\epsilon^{(m+1)}_{i+1,j-1})
+    \right].
+\end{equation}
+The Fourier mode analysis then leads to the following complex
+amplification factor
+\begin{equation}
+\label{eq:amp_gs}
+   G(\theta_1,\theta_2;\alpha,\tau) =
+   \frac{e^{i\theta_1} + 
+     \left(\alpha^2+i\dfrac{\tau\alpha}{2}\sin\theta_1\right)e^{i\theta_2}}
+   {2(1+\alpha^2) - \left[e^{-i\theta_1}+\left(\alpha^2 -
+     i\dfrac{\tau\alpha}{2}\sin\theta_1\right)e^{-i\theta_2}\right]}.
+\end{equation}
+
+Curves of $G$ for \emph{fixed} $\theta_2$ are plotted in
+Fig.~\ref{fig:fourier_jac} showing \emph{convergence} ($\max|G|<1$) for
+$\tau=-1,0,1,2$, using the damped Jacobi method. The 
+same conclusions are obtained for Gauss-Seidel relaxations as shown
+in Fig.~\ref{fig:fourier_gs} where the absolute values 
+of the complex amplification factor $G$ are plotted. However, for larger
+$|\tau|>2$, both methods diverge as can be seen in
+Fig.~\ref{fig:relax_diverge}.Notice however that the PDE
+(\ref{eq:pde}) is \emph{elliptic} only when $|\tau|<2$ is satisfied!
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.85\hsize]{\RepFigures/fourier_jac}
+  \caption{Amplification factor for damped Jacobi relaxations with
+    $\omega=0.8$ and $\alpha=h_x/h_y=1$ and $\tau=-1,0,1,2$ displayed as curves of
+    constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on
+  the \emph{red} curve.}
+  \label{fig:fourier_jac}
+\end{figure}
+
+\begin{figure}[hbt]
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/fourier_gs}
+  \caption{Absolute value of the amplification factor for Gauss-Seidel
+    relaxations with $\alpha=h_x/h_y=1$ and $\tau=-1,0,1,2$, displayed as curves of
+    constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on
+  the \emph{red} curve.}
+  \label{fig:fourier_gs}
+\end{figure}
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/relax_diverge}
+  \caption{Amplification factor for Jacobi (left) and Gauss-Seidel (right)
+    relaxations for $|\tau|=3,5$, $\alpha=h_x/h_y=1$, displayed as curves of
+    constant $\theta_2$. $\theta_2=0$ on the \emph{green} curve and $\pi$ on
+  the \emph{red} curve.}
+  \label{fig:relax_diverge}
+\end{figure}
+
+In summary, the local mode analysis predicts that
+\begin{itemize}
+\item Negative values of the coefficient $a$ and large
+  mixed derivative ($|\tau| > 2$) can make both damped Jacobi and
+  Gauss-Seidel relaxations diverge.
+\item Positive values of $a$ can decrease the amplification factor
+  (improving thus the convergence rate) but
+  its contributions $h^2a$ decrease for increasing grid resolution.
+\end{itemize}
+These predictions will be checked against numerical experiments in the
+next section.
+
+\FloatBarrier
+\section{Numerical Experiments}
+\label{sec:NumExp1}
+In the following numerical experiments, we look at the convergence
+rate of the residual norm and the error norm which are defined at the
+iteration $m$ by
+\begin{equation}
+  \begin{split}
+    r^{(m)} &= \|\mathbf{f}-\mathbf{A}\mathbf{u}^{(m)}\|_\infty, \\
+    e^{(m)} &= \|\mathbf{u}^{(m)}-\mathbf{u}_{anal}\|_\infty.\\
+  \end{split}
+\end{equation}
+
+The iterations are stopped when the number of iterations reach an
+user supplied \emph{maximum} of iterations or when the residual norm
+is smaller than either a given \emph{relative} tolerance {\tt rtol}
+\cite[p.~51]{TEMPL} or \emph{absolute} tolerance {\tt atol}:
+\begin{equation}
+  \begin{split}
+    r^{(m)} &< \mbox{\tt rtol}
+      \cdot(\|\mathbf{A}\|_\infty\cdot\|\mathbf{u}^{(m)}\|_\infty + 
+      \|\mathbf{f}\|_\infty), \\
+    r^{(m)} &<  \mbox{\tt atol}.\\
+  \end{split}
+\end{equation}
+
+An additional stopping criteria consists of stopping the
+iterations when the change of the discretization error between
+successive iteration is small enough:
+\begin{equation}
+  \frac{e^{(m)}-e^{(m-1)}}{e^{(m-1)}}<    \mbox{\tt etol}.
+\end{equation}
+
+\subsection{$V$-cycle performances}
+Table~\ref{tab:iternum} shows the numbers of $V$-cycles required to
+reach the \emph{relative tolerance} 
+$\mbox{\tt rtol}=10^{-8}$. In these runs where $\alpha=0.5$, $\tau=1$
+and $a(x,y)$ given by Eq.~\ref{eq:density}, we observe that the
+biggest improvement is obtained at $\nu_1=\nu_2=2$. For larger
+$\nu_1,\nu_2$, the number of required iterations is relatively insensitive 
+to the grid sizes. As can be seen in Fig.~\ref{fig:mg_iterations}
+which plots the evolution of the error $e^{(m)}$, it
+is clear that the level of discretization error has been largely
+reached. Finally the times used by these runs are shown in
+Fig.~\ref{fig:dddd_pc220} and Fig.~\ref{fig:nndd_pc220} where the
+times spent in the direct solver using MUMPS \cite{MUMPS} are included
+for comparison. For the $512\times 2048$ grid (the largest
+case using the direct solver), the multigrid $V(3,3)$ is about $30$
+times faster!
+
+\begin{table}[htb]
+\centering
+\begin{tabular}{|l||c|c|c|c||c|c|c|c|}\hline
+   & \multicolumn{4}{c||}{\texttt{\textbf{DDDD}} problem}          
+   & \multicolumn{4}{c|}{\texttt{\textbf{NNDD}} problem} \\ \cline{2-9}
+Grid size  & $V(1,1)$ & $V(2,2)$ & $V(3,3)$ & $V(4,4)$
+           & $V(1,1)$ & $V(2,2)$ & $V(3,3)$ & $V(4,4)$ \\ \hline
+    $16\times 64$  &  3 &  2 &   2 &  1 &   4 &   2 &   2 &   1\\
+    $32\times 128$ &  5 &  3 &   2 &  2 &   5 &   3 &   2 &   2\\
+    $64\times 256$ &  7 &  4 &   3 &  3 &   7 &   4 &   3 &   3\\
+   $128\times 512$ & 10 &  6 &   4 &  4 &  10 &   6 &   5 &   4\\
+  $256\times 1024$ & 11 &  6 &   5 &  4 &  11 &   6 &   5 &   4\\
+  $512\times 2048$ & 11 &  6 &   5 &  4 &  11 &   6 &   5 &   4\\
+ $1024\times 4096$ & 10 &  6 &   4 &  4 &  10 &   6 &   4 &   4\\
+ $1536\times 6144$ &  9 &  6 &   4 &  4 &   9 &   5 &   4 &   3\\
+\hline
+\end{tabular}
+\caption{Multigrid $V$-cycle results for the {\tt DDDD} and {\tt NNDD}
+  model problems with $k_x=k_y=4$, $L_x=100$, $L_y=800$, $\tau=1$ and
+  $a(x,y)$ given by Eq.~\ref{eq:density}. Shown are the numbers of
+  multigrid  $V$-cycles required to reduce the \emph{relative}
+  residual norm to less than $10^{-8}$ for different
+  grid sizes and numbers of pre and post relaxation
+  sweeps. Gauss-Seidel relaxation is used. The coarsest grid size of the
+  $1536\times 6144$ case is $3\times 12$ while all the others have a coarsest
+  grid of size $2\times 8$.} 
+\label{tab:iternum}
+\end{table}
+ 
+\begin{figure}[htb!]
+  \centering
+  \includegraphics[width=\textwidth]{\RepFigures/mg_iterations}
+  \caption{Performance of the $V(2,2)$-cycle using the Gauss-Seidel
+    relaxation scheme for the {\tt DDDD} (upper curve) and {\tt NNDD}
+    (lower curve) problem. The relative tolerance {\tt rtol} is
+    set to $10^{-8}$ the coarsest grid size for all the problem size
+    is fixed to $2\times 8$.}
+  \label{fig:mg_iterations}
+\end{figure}
+
+The fittings of the obtained data show that the multigrid
+$V$ cycle cost scales almost \emph{linearly} with the number of
+unknowns $N=(N_x+1)(N_y+1)$ (as does the backsolve stage of MUMPS)
+while the \emph{total} direct solve time scales as $N^{1.4}$.
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/dddd_pc220}
+  \caption{Times used by the multigrid $V$ cycles for the runs reported
+    in Table~\ref{tab:iternum} for the \texttt{\textbf{DDDD}}
+    problem. The last 6 $V(3,3)$ data points are used for the multigrid fit. The
+    MUMPS direct solver's cost is included for comparison. All the
+    timing results are obtained on an Intel Nehalem i7 processor, using the
+    Intel compiler version 13.0.1 and MUMPS-4.10.0. } 
+  \label{fig:dddd_pc220}
+\end{figure}
+
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/nndd_pc220}
+  \caption{As in Fig.~\ref{fig:dddd_pc220} for the
+    \texttt{\textbf{NNDD}} problem.}
+  \label{fig:nndd_pc220}
+\end{figure}
+
+\FloatBarrier
+\subsection{Effects of the mesh aspect ratio $\alpha$}
+From Table~\ref{tab:anisotropy}, one can observe that the required number of
+$V(2,2)$ cycles increase quickly when $\alpha<0.5$ and $\alpha>2$. Advanced
+\emph{relaxation} methods and \emph{coarsening} strategies
+\cite[chap. 7]{Wesseling} can solve this performance degradation
+but are generally more difficult to parallelize.
+
+\begin{table}[htb]
+\centering
+\begin{tabular}{|l||c|c|c|}\hline
+  $\alpha$  & \texttt{\textbf{DDDD}} & \texttt{\textbf{NNDD}} \\ \hline
+   0.125    & 19  & 22 \\
+   0.25     & 12  & 12 \\
+   0.5      &  6  &  6 \\
+   1.0      &  5  &  5 \\
+   2.0      &  7  &  7 \\
+   4.0      & 20  & 19 \\
+\hline
+\end{tabular}
+\caption{Effects of the \emph{mesh aspect ratio} $\alpha=h_x/h_y$ on the
+  number of $V(2,2)$ cycles required to reach $\mbox{\tt
+    rtol}=10^{-8}$ for {\tt DDDD} and {\tt NNDD}
+  model problems. The listed $\alpha$'s are obtained by fixing
+  $N_x=256$, $N_y=1024$, $L_x=100$ and varying $L_y$.}
+\label{tab:anisotropy}
+\end{table}
+
+\subsection{Effects of the mixed partial derivative}
+When $|\tau|>2$, Table~\ref{tab:mixedterm} shows that the multigrid
+$V$-cycle  diverge, as predicted from the local mode analysis based on
+the amplification factor given in Eq.~\ref{eq:amp_gs}. Although, a
+non-negative coefficient $a$ has a \emph{stabilizing} effect, the
+latter disappears already for a $256\times 1024$ grid.
+
+\begin{table}[htb]
+\centering
+\begin{tabular}{|l|l|c|c|c|c|c|c|c|c|}\hline
+  &Grid size  & $\tau=-3$ & $\tau=-2$ & $\tau=-1$ & $\tau=0$ & $\tau=1$
+  & $\tau=2$ & $\tau=3$  \\ \hline
+\multirow{4}{*}{\texttt{\textbf{DDDD}}}
+&$128\times 512(a=0)$ & - & 39 &  7 & 5 & 7 & 38 & -  \\
+&    $128\times 512$  &16 &  6 &  5 & 4 & 4 &  6 & 17 \\
+&   $256\times 1024$  &-  &  8 &  5 & 4 & 5 &  7 &  - \\
+&   $512\times 2048$  &-  &  9 &  5 & 4 & 5 &  9 &  - \\ 
+\hline\hline
+\multirow{4}{*}{\texttt{\textbf{NNDD}}}
+&$128\times 512(a=0)$ & - & 42 &  7 & 5 & 7 & 41 & -  \\
+&    $128\times 512$  &13 &  6 &  5 & 4 & 5 &  5 & 13 \\
+&   $256\times 1024$  &-  &  7 &  5 & 4 & 5 &  7 &  - \\
+&   $512\times 2048$  &-  &  7 &  5 & 4 & 5 &  7 &  - \\ 
+\hline
+\end{tabular}
+\caption{Effects of the mixed derivative term $\tau$ on the
+  performances of the  $V(3,3)$ cycle. The dashes
+  indicate that the $V$-cycle diverges. In theses runs, $a(x,y)$ is given by
+  Eq.~\ref{eq:density} except for the cases where it is set to
+  0. Notice the  \emph{stabilizing} effect of $a\ne 0$ for the
+  $128\times 512$ grid at $\tau = \pm 3$.}
+\label{tab:mixedterm}
+\end{table}
+
+\subsection{Using the damped Jacobi relaxation}
+The optimum Jacobi damping factor $\omega$ can be determined by minimizing
+the \emph{smoothing factor} defined as the maximum amplification
+coefficient (\ref{eq:amp_jac}) restricted to the \emph{oscillatory modes}:
+\begin{equation}
+  \label{eq:mu_jac}
+  \mu(\omega,\alpha,\tau) = \max_{(\theta_1,\theta_2)\in\Omega}
+  |G(\theta_1,\theta_2,\omega,\alpha,\tau)|, \qquad \Omega =
+  [|\theta_1|>\pi/2]\,\bigcup\,[|\theta_2|>\pi/2].
+\end{equation}
+Results from numerical computation of (\ref{eq:mu_jac} are shown
+in Fig.~\ref{fig:jac_opt}. An analytic expression for $\tau=0$
+assuming $\alpha\le 1$ is derived in \cite[p.~119]{salpha}:
+\begin{gather}
+  \mu(\omega,\alpha,\tau=0) =
+  \max\left(\left|1-2\omega\right|,\;\left|1-\frac{\alpha^2}{1+\alpha^2}\omega\right|\right),
+  \nonumber\\    
+\mu_{\mbox{opt}} = \frac{2+\alpha^2}{2+3\alpha^2} \quad
+\mbox{at} \quad \omega_{\mbox{opt}} = \frac{2+2\alpha^2}{2+3\alpha^2}.
+\end{gather}
+Notice that the smoothing factor increases as $\alpha$ departs from 1
+and for increasing $\tau$.
+
+For Gauss-Seidel relaxation, the same numerical procedure applied to
+(\ref{eq:amp_gs}) yields a smoothing factor $\mu$ equal to
+respectively $0.5$, $ 0.68$ and $0.70$ for the three cases shown in
+Fig.~\ref{fig:jac_opt}, which result in a better smoothing property
+than the damped Jacobi relaxation.
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/jac_opt}
+  \caption{The smoothing factor for damped Jacobi relaxation
+    for different values of $\alpha$ and $\tau$.}
+  \label{fig:jac_opt}
+\end{figure}
+
+Numerical experiments with the reference case ($\alpha=0.5$, $\tau=1$,
+$a(x,y)$ given by Eq.~\ref{eq:density}) and the $128\times 512$ grid
+using damped Jacobi relaxation, are shown in Table~\ref{tab:jac_opt}
+and confirm that $\omega=0.9$ is the optimum damping factor and that
+it is less efficient than the Gauss-Seidel relaxation, in agreement
+with the Fourier analysis.
+
+\begin{table}[htb]
+\centering
+\begin{tabular}{l c c c c c c}\hline
+&$\omega=0.5$ &$\omega=0.6$ &$\omega=0.7$ &$\omega=0.8$ &$\omega=0.9$
+  &$\omega=1.0$ \\ \cline{2-7}
+\texttt{\textbf{DDDD}}& 12 & 10 & 9 & 8 & 7 & 15 \\
+\texttt{\textbf{NNDD}}& 12 & 11 & 9 & 8 & 7 & 18 \\
+\hline
+\end{tabular}
+\caption{The number of $V(3,3)$ cycles required to obtain
+  $\mbox{\texttt{rtol}}=10^{-8}$ versus the Jacobi \emph{damped
+    factor} $\omega$. The grid size is $128\times 512$ with
+  $\alpha=0.5$, $\tau=1$ and $a(x,y)$ given by Eq.~\ref{eq:density}.}
+\label{tab:jac_opt}
+\end{table}
+
+\subsection{Matrix storage}
+Initially the \emph{Compressed Sparse Row} storage format (CSR or CRS) (see
+\cite[p.~58--59]{TEMPL}) was used to store the discretized finite
+difference matrix. With this choice, the CPU time used by the matrix
+construction (and boundary condition setting) is found to be always larger
+than the multigrid solver time as shown in
+Fig.~\ref{fig:matcon_time}. Fortunately, switching to the \emph{Compressed
+  Diagonal Storage} (CDS), where the 9 diagonal structure of the
+matrix is fully exploited, the matrix construction time is considerably
+reduced as shown in the same figure. On the other hand, no
+difference in the multigrid solver performance is noticeable
+between the two matrix storage.
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=\hsize]{\RepFigures/matcon_time}
+  \caption{CPU time used by the matrix construction for CSR and CDS
+    matrix storage compared to the multigrid $V(3,3)$ cycle time for
+    the \textbf{DDDD} and  \textbf{NNDD} model problems. The timing is
+  obtained using the same conditions as in Fig.~\ref{fig:dddd_pc220}.}
+  \label{fig:matcon_time}
+\end{figure}
+
+\FloatBarrier
+
+\section{Modified PDE}
+Here, the following modified PDE is considered:
+\begin{equation}
+\label{eq:new_pde}
+  \left[\frac{\partial^2}{\partial x^2} + \tau\frac{\partial^2}{\partial
+  x\partial y} + (1+\tau^2/4)\frac{\partial^2}{\partial y^2} - a(x,y)\right] u(x,y)  =
+  f(x,y), \qquad 0\le x \le L_x, \; 0\le y \le L_y.
+\end{equation}
+This PDE which is obtained from the $\hat s-\alpha$ model
+\cite[Eq.10]{salpha} is
+\emph{elliptic} for any value of $\tau$. The resulting stencil is
+changed from the previous stencil \ref{eq:stencil} to
+\begin{equation}
+\label{eq:new_stencil}
+  S_{ij} = \frac{1}{h_x^2}
+  \begin{bmatrix}
+    -\tau\alpha/4   & \alpha^2(1+\tau^2/4)                  & \tau\alpha/4 \\
+          1         & -2\left[1+\alpha^2(1+\tau^2/4)\right]-h_x^2a_{ij} & 1 \\
+    \tau\alpha/4    & \alpha^2(1+\tau^2/4)                   &-\tau\alpha/4 \\
+  \end{bmatrix}
+.
+\end{equation}
+
+Note that the
+\emph{anisotropy} of the resulting Finite Difference discretization
+is now $\alpha^2(1+\tau^2/4)$ and could be controlled by adjusting both
+the mesh aspect ratio $\alpha$ and the \emph{shear} term $\tau$.
+
+Numerical calculations show that the multigrid $V$-cycles do always
+converge, as shown in Table~\ref{tab:new_mixedterm}.
+
+\begin{table}[htb]
+\centering
+\begin{tabular}{|l|l|c|c|c|c|c|c|c|}\hline
+  &Grid size  & $\tau=0$ & $\tau=1$ & $\tau=2$ & $\tau=4$ & $\tau=8$
+  & $\tau=16$  \\ \hline
+\multirow{3}{*}{\texttt{\textbf{DDDD}}}
+&    $128\times 512$  &4 & 4 &  5 & 6 & 9  & 20 (6) \\
+&   $256\times 1024$  &4 & 4 &  5 & 6 & 11 & 25 (8)\\
+&   $512\times 2048$  &4 & 4 &  5 & 7 & 12 & 29 (8)\\ 
+\hline\hline
+\multirow{3}{*}{\texttt{\textbf{NNDD}}}
+&    $128\times 512$  &4 &   4 &  4 & 5 & 8 &  17 (5) \\
+&   $256\times 1024$  &4  &  5 &  5 & 5 & 8 &  19 (6) \\
+&   $512\times 2048$  &4  &  4 &  4 & 5 & 7 &  18 (6) \\ 
+\hline
+\end{tabular}
+\caption{Effects of the mixed derivative term $\tau$ on the
+  performances of the  $V(3,3)$ cycle. In theses runs, $a(x,y)$ is
+  given by Eq.~\ref{eq:density}. The mesh aspect ratio $\alpha=0.5$
+  was used. On the last column and shown in parenthesis are the
+  numbers of $V(3,3)$ cycles when $\alpha$ is reduced to 0.125 by
+  increasing the length $L_y$ while keeping all the other values fixed.}
+\label{tab:new_mixedterm}
+\end{table}
+
+\section{Parallel Multigrid}
+In order to maximize the parallel efficiency and the flexibility of
+utilization, a two-dimensional domain partition scheme is chosen to
+parallelize the multigrid solver. As shown below, generalization of this
+procedure for higher dimensions is straightforward.
+
+\subsection{Distributed grid coarsening}
+The coarsening algorithm can be summarized as follow:
+\begin{itemize}
+\item Partition the grid points on each dimension at the
+  \emph{finest} grid level, as evenly as possible.
+\item The range for each sub-grid, using \emph{global indexing} is
+  thus specified by $[s,e]$, with $s=0$ for the first sub-grid and
+  $e=N$ for the last sub-grid, $N$ being the number of grid intervals.
+\item The next coarse sub-grid is thus obtained by discarding all the
+  \emph{odd} indexed grid points, as in the serial case.
+\item This process can continue (as long as the total of number of
+  intervals is even) until there exists a prescribed \emph{minimum}
+  number of grid points on any sub-grid is reached.
+\end{itemize}
+
+\subsection{Matrix-free formulation}
+Using standard \emph{matrix} to represent the discretized 2D (or higher
+dimensional) operators imply an \emph{one-dimensional numbering} of
+the grid nodes. For example on a 2D $N_x\times N_y$ grid, the 1D numbering
+of the node $(x_{i_1},y_{i_2})$ could be defined as
+\[
+ k=i_1+i_2\times N_x, \quad i_1=0:N_x,\;i_2=0:N_y.
+\]
+However, using 2D domain partition defined by
+\begin{equation}
+  \label{eq:2dnumber}
+  i_1=s_1:e_1,\;i_2=s_2:e_2,
+\end{equation}
+with $s=(s_1,s_2)$ and $e=(e_1,e_2)$ denoting respectively the
+\emph{starting} and \emph{ending} indices of a rectangular sub-domain,
+result in a \emph{non-contiguous} set of the indices ${k}$ and in a
+complicate structure of the partitioned matrix for the linear
+operator.
+
+On the other hand, using the \emph{stencil notation}
+introduced in \cite[chap. 5.2]{Wesseling} based on the
+\emph{multidimensional} node labeling as defined by
+(\ref{eq:2dnumber}) for a 2D problem, one can define a simple data
+structure for the partitioned operator, $A(i,\delta)$,
+where the $d$-tuple $i=(i_1,\ldots,i_d)$ represents a node of the
+$d$-dimensional grid and the $d$-tuple
+$\delta=(\delta_1,\ldots,\delta_d)$, the 
+\emph{distance} between the connected nodes. The result of
+$\mathbf{A}u$ can thus be defined as 
+\begin{equation}
+  \label{eq:vmx}
+  (\mathbf{A}u)_i = \sum_{\delta\in\mathbb{Z}^d}
+  A(i,\delta)u_{i+\delta}, \quad i=s:e. 
+\end{equation}
+In (\ref{eq:vmx}), the sum is performed over all
+indices $\delta$ such that $A(i,\delta)$ is non-zero. For the 2D 
+nine-point stencil defined in (\ref{eq:stencil}), the 2-tuple
+$\delta$ can be specified as the 9 columns of the following
+\emph{structure} matrix
+\begin{equation}
+  \label{5points}
+  S_\delta = \left(
+  \begin{array}{rrrrrrrrr}
+    0  & -1 & 0 & 1 & -1 & 1 & -1& 0 & 1 \\
+    0  & -1 & -1&-1 &  0 & 0 & 1 & 1 & 1 \\
+  \end{array} \right).
+\end{equation}
+In the general case of a $d$-dimensional grid and $\mathcal{N}$ point stencil,
+$S_\delta$ is a $d\times\mathcal{N}$
+matrix. By noting that the subscript $i+\delta$ of $u$ on the right hand side of
+(\ref{eq:vmx}) should be in the range $[0,N]$ \emph{only} for sub-domains which are
+\emph{adjacent} to the boundary, one can deduce that for
+a \emph{fixed} $\delta$, the lower and upper bounds of the indices $i$
+should be 
+\begin{equation}
+  \begin{split}
+  i_{\mbox{min}} &= \max (0, -\delta, s), \\
+  i_{\mbox{max}} &= \min (N, N-\delta, e) \\
+  \end{split}
+\end{equation}
+where $N=(N_1,N_2,\ldots,N_d)$ specify the number of intervals,
+since, for sub-domains \emph{not adjacent} to the boundary, $u$ should
+include values at the \emph{ghost} cells $s-g$ and $e+g$ where $g$ is
+given by 
+\begin{equation}
+  g = \max|S_\delta|
+\end{equation}
+with the operator max taken along the \emph{rows} of the matrix.
+The formula defined in (\ref{eq:vmx}) can then be implemented as in
+the \emph{pseudo} Fortran code
+
+\par
+\addvspace{\medskipamount}
+\nopagebreak\hrule
+\begin{lstlisting}[mathescape]
+do k=1,SIZE($S_{\delta}$,2)   ! loop over the stencil points
+  $\delta$ = $S_{\delta}$(:,k)
+  lb = MAX(0,-$\delta$,$s$)
+  ub = MIN($N$,$N-\delta$,e)
+  do i=lb,ub
+    Au(i) = Au(i) + A(i,$\delta$)*u(i+$\delta$)
+  enddo
+enddo
+\end{lstlisting} 
+\nopagebreak\hrule 
+\addvspace{\medskipamount}
+
+On the other hand, if the values of $u$ at the ghost cells of the
+sub-domains \emph{adjacent} to the boundary are set to 0
+\begin{equation*}
+  u_{-g} = u_{N+g} = 0,
+\end{equation*}
+the lower and upper bounds of the
+inner loop can be simply set to $lb=s$ and $ub=e$. Note that the inner
+loop should be interpreted as $d$ nested loops over the $d$-tuple
+$i=(i_1,\ldots,i_d)$ for a $d$-dimensional problem.
+
+\subsection{Inter-grid transfers}
+
+\subsubsection{Restriction}
+Using the definition in the first equation of (\ref{eq:2dintergrid})
+together with (\ref{eq:1drestriction}), the 2D restriction operator
+can be represented by the following 9-point stencil:
+\begin{equation}
+  \label{eq:2drestriction}
+  \mathbf{R}_i = \frac{1}{16}
+  \begin{pmatrix}
+    1 & 2 & 1 \\
+    2 & 4 & 2 \\
+    1 & 2 & 1 \\
+  \end{pmatrix},
+\end{equation}
+and the restriction of $f$ can be computed as
+\begin{equation}
+  \bar{f}_i = (\mathbf{R}f)_i = \sum_{\delta\in\mathbb{Z}^2}
+  R(i,\delta)f_{2i+\delta}, \quad i=\bar{s}:\bar{e},  
+\end{equation}
+where $\bar{s},\bar{e}$ denote the partitioned domain boundary indices on the
+\emph{coarse} grid, using the same algorithm described previously.
+
+\subsubsection{BC for the restriction operator}
+\label{sec:restrict_bc}
+Dirichlel boundary conditions can be imposed by modifying the
+\emph{restriction stencil} on each of the four boundaries as follow:
+\begin{equation}
+  \mathbf{R}_{0,.} = \frac{1}{16}\begin{pmatrix}
+    1 & 2 & 0 \\
+    2 & 4 & 0 \\
+    1 & 2 & 0 \\
+  \end{pmatrix},\quad
+  \mathbf{R}_{N_x,.} =  \frac{1}{16}\begin{pmatrix}
+    0 & 2 & 1 \\
+    0 & 4 & 2 \\
+    0 & 2 & 1 \\
+  \end{pmatrix},\quad
+  \mathbf{R}_{.,0} =  \frac{1}{16}\begin{pmatrix}
+    0 & 0 & 0 \\
+    2 & 4 & 2 \\
+    1 & 2 & 1 \\
+  \end{pmatrix},\quad
+  \mathbf{R}_{.,N_y} = \frac{1}{16}\begin{pmatrix}
+    1 & 2 & 1 \\
+    2 & 4 & 2 \\
+    0 & 0 & 0 \\
+  \end{pmatrix}.
+\end{equation}
+With the natural Neumann BC, no change of the restriction operator is needed.
+
+\subsubsection{Prolongation}
+Stencil notation for \emph{prolongation} operators is less obvious to
+formulate, see \cite[chap. 5.2]{Wesseling}. A more straightforward
+implementation is obtained in the 2D case, by simply applying
+\emph{bilinear interpolation} on the \emph{coarse grid}:
+\begin{equation}
+  \label{eq:2dprolongation}
+  \begin{split}
+   (\mathbf{P}\bar{u})_{2i}   &= \bar{u}_{i}, \\
+   (\mathbf{P}\bar{u})_{2i+e_1} &= (\bar{u}_{i} + \bar{u}_{i+e_1})/2, \quad
+   (\mathbf{P}\bar{u})_{2i+e_2} = (\bar{u}_{i} + \bar{u}_{i+e_2})/2, \\
+   (\mathbf{P}\bar{u})_{2i+e_1+e_2} &= (\bar{u}_{i} +
+      \bar{u}_{i+e_1} + \bar{u}_{i+e_2} + \bar{u}_{i+e_1+e_2})/4, \\
+  \end{split}
+\end{equation}
+
+\subsection{Relaxations}
+While the Gauss-Seidel proves to be more efficient, the damped Jacobi
+method, at least for a first version of the parallel multigrid solver,
+is used because it is straightforward to \emph{parallelize}. The same
+undamped Jacobi (with $\omega=1$) with a \emph{few}  number of 
+iterations is also used to solve the linear system at the coarsest
+mesh as prescribed by the multigrid $V$-cycle procedure defined in
+section \ref{sec-mgProc}. 
+
+\subsection{Local vectors and stencils}
+All local vectors (used to represent solutions or
+right-hand-sides) contain \emph{ghost cells} and are implemented
+using 2D arrays, for example
+\[\mbox{\texttt{sol(s(1)-1:e(1)+1,s(2)-1:e(2)+1)}}\]
+for the solution vector.
+
+The partitioned stencils are defined
+only for the \emph{local} grid points, without the ghost cells.
+Thus, before each operation on the local vectors, an exchange (or
+update) of the values on the ghost cells is performed.
+
+As a result, all the memory required by the solver is completely
+partitioned, except for the space used by the ghost cells.
+
+\subsection{Numerical Experiments}
+In this section, all the numerical experiments are conducted on
+\texttt{helios.iferc-csc.org}, using the Intel compiler version 13.1.3
+and bullxpmi-1.2.4.3. The \emph{stopping criteria} for the $V$-cycles
+is based on the absolute and relative residual norms as well as the
+discretization error norm as defined in section \ref{sec:NumExp1}. In
+cases where the analytic solution is not known, the latter can be
+replaced by some norm of the solution.
+
+\subsubsection{Strong scaling}
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=\hsize]{\RepFigures/strong_256x1024_DDD}
+  \caption{DDDD problem for a $256\times 1024$ size, using multigrid
+    $V(3,3)$ cycles. Different times for a given number of
+    processes are obtained with different combinations of processes in
+    each dimension. The number of grid levels are fixed to 6. Five Jacobi
+    iterations are used at the coarsest grid.}
+  \label{fig:strong_scal_small}
+\end{figure}
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=\hsize]{\RepFigures/strong_512x2048_DDD}
+  \caption{DDDD problem for a $512\times 2048$ size  using multigrid
+    $V(3,3)$ cycles. The \textcolor{red}{red marker} on the left shows
+    the time for the serial multigrid solver. Different times for a
+    given number of processes are obtained with different combinations
+    of processes in each dimension. The number of grid levels are fixed
+    to 6. Five Jacobi iterations are used at the coarsest grid.}
+  \label{fig:strong_scal}
+\end{figure}
+
+Here 2 \emph{fixed} problem sizes are considered:
+\begin{itemize}
+  \item A small size with the (fine) grid of $256\times 1024$
+    shown in Fig.~\ref{fig:strong_scal_small} and
+  \item a larger size of $512\times 2048$ in Fig.~\ref{fig:strong_scal}.
+\end{itemize}
+
+In both cases $\mbox{\tt rtol}=10^{-8}$ and $\mbox{\tt
+  etol}=10^{-3}$. It was checked that the results do not change when more
+than 5 Jacobi iterations are used at the coarsest mesh. Notice that
+for the small problem, the parallel efficiency starts to degrade at 32
+MPI processes while for the larger case, this happens after 64 MPI
+processes. This can be explained by 
+the ghost cell exchange communication overhead: denoting $N_1$ and
+$N_2$, the number of grid points in each direction and $P_1$ and $P_2$
+the number of MPI processes in each direction, the ratio $S/V$ between the
+number of ghost points and interior grid points for each local
+subdomains can be estimated as 
+\begin{equation}
+\label{eq:comm_overhead}
+  S/V\simeq\frac{2(N_1/P_1+N_2/P_2)}{N_1N_2/P_1P_2}  = 2\left(P_1/N_1+P_2/N_2\right). 
+\end{equation}
+This ratio increases as the number MPI processes increases while
+keeping the problem size fixed. On very coarse grids, this communication
+cost can become prohibitive. For this reason, in all the runs shown
+here, the number of grid points on each direction for the coarsest
+grid is limited to 2.
+
+\subsubsection{Weak Scaling}
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/weak_DDDD}
+  \caption{Weak scaling for a DDDD problem, using multigrid $V(3,3)$
+    cycles. The number of grid levels are fixed to 7. The solver for the
+    coarsest grid uses 5 Jacobi iterations except for the 2 largest cases
+    which require respectively 20 and 100 iterations to  converge. The 2
+    sets of curves on the right figure show respectively the timings with
+    and without the calculations of the residual norm and discretization
+    error which require both a \emph{global reduction}.}
+  \label{fig:weak_scal_DDDD}  
+\end{figure}
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/weak_NNDD}
+  \caption{Weak scaling for a NNDD problem, using multigrid $V(3,3)$
+    cycles. The number of grid levels are fixed to 7. The solver for the
+    coarsest grid uses 5 Jacobi iterations except for the 2 largest cases
+    which require respectively 20 and 100 iterations to  converge. The 2
+    sets of curves on the right figure show respectively the timings with
+    and without the calculations of the residual norm and discretization
+    error which require both a \emph{global reduction}.}
+  \label{fig:weak_scal_NNDD}  
+\end{figure}
+
+According to Eq.~\ref{eq:comm_overhead}, varying the problem size
+together with the number of MPI processes by keeping $N_1/P_1$ and
+$N_2/P_2$ constant should yield a \emph{constant scaling},
+provided that the convergence rate does not depend on the problem sizes.
+The results for the \texttt{DDDD} and \texttt{NNDD} problems are shown
+in Fig.~\ref{fig:weak_scal_DDDD} and
+Fig.~\ref{fig:weak_scal_NNDD}. The left part of the figures shows that
+the convergence rate depends only weakly on the problem sizes, which
+leads indeed to a (almost) constant time obtained for numbers of MPI
+processes $P$ between 16 and 1024 . The reason for the good timings
+for smaller $P$ is simply that there are only 2 ghost cell exchanges
+for $P=2\times 2$ (instead of 4 for $P\ge 16$) and that there is no
+exchange for $P=0$.
+
+\section{Non-homogeneous Boundary Conditions}
+
+\subsection{Non-homogeneous Dirichlet Conditions}
+Non-homogeneous Dirichlet boundary conditions can be imposed on all the
+Dirichlet faces simply by \emph{clearing}, as for the 
+\emph{homogeneous case}, the matrice rows and columns and setting its
+diagonal term to 1. Moreover, the corresponding corresponding
+\emph{right-hand-side} should be set to:
+\begin{equation}
+  \begin{split}
+    f_{0,j} &= D^W(y_j), \quad f_{N_x,j}=D^E(y_j), \qquad j=0,\ldots,N_y, \\
+    f_{i,0} &= D^S(x_i), \quad f_{j,N_y}=D^N(x_i), \qquad i=0,\ldots,N_x, \\
+  \end{split}
+\end{equation}
+where $D^W, D^E, D^S, D^N$ are the values of $u$ at the 4 Dirichlet
+faces. As for the homogeneous Dirichlet BC, the \emph{restriction}
+operator should be changed as described in section
+\ref{sec:restrict_bc} while the \emph{prolongation} defined
+in (\ref{eq:2dprolongation}) remains unchanged.
+
+\subsection{Non-homogeneous Neumann Conditions}
+The non-homogeneous Neumann conditions at the 4 faces $x=0$ can be
+defined as
+\begin{equation}
+  \begin{split}
+  \left.\frac{\partial u}{\partial x}\right|_{x=0} &= N^W(y), \quad
+  \left.\frac{\partial u}{\partial x}\right|_{x=L_x} = N^E(y), \\
+  \left.\frac{\partial u}{\partial y}\right|_{y=0} &= N^S(x), \quad
+  \left.\frac{\partial u}{\partial y}\right|_{y=L_y} = N^N(x). \\
+  \end{split}
+\end{equation}
+Discretization of the BC defined above, using the \emph{central
+difference} yields on the 4 faces
+\begin{equation}
+  \begin{split}
+  u_{-1,j}   &= u_{1,j} -2h_xN^W(y_j), \quad
+  u_{N_x+1,j} = u_{N_x-1,j} + 2h_xN^E(y_j), \qquad j=0,\ldots,N_y, \\
+  u_{i,-1}   &= u_{i,1} -2h_yN^S(x_i), \quad
+  u_{i,N_y+1} = u_{i,N_y-1} + 2h_yN^N(x_i), \qquad i=0,\ldots,N_x. \\
+  \end{split}
+\end{equation}
+
+With these relations, the stencil (\ref{eq:new_stencil}) on the 4
+boundaries is modified as follow 
+
+\begin{equation}
+  \begin{split}
+  S^W &= \frac{1}{h_x^2}
+  \begin{bmatrix}
+     0 &\alpha^2(1+\tau^2/4)                  & 0 \\
+     0 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{0,j} & 2 \\
+     0 &\alpha^2(1+\tau^2/4)                  & 0 \\
+  \end{bmatrix}
+,\quad
+  S^E = \frac{1}{h_x^2}
+  \begin{bmatrix}
+     0 &\alpha^2(1+\tau^2/4)                     & 0 \\
+     2 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{N_x,j} & 0 \\
+     0 &\alpha^2(1+\tau^2/4)                     & 0 \\
+  \end{bmatrix}
+, \\
+  S^S &= \frac{1}{h_x^2}
+  \begin{bmatrix}
+     0 &                2\alpha^2(1+\tau^2/4)  & 0 \\
+     1 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{i,0} & 1 \\
+     0 & 0 & 0 \\
+  \end{bmatrix}
+,\quad   
+  S^N  = \frac{1}{h_x^2}
+  \begin{bmatrix}
+     0 &  0  & 0 \\
+     1 &-2(1+\alpha^2(1+\tau^2/4))-h_x^2a_{i,N_y} & 1 \\
+     0 &                2\alpha^2(1+\tau^2/4)    & 0 \\
+  \end{bmatrix}
+, \\
+  \end{split}
+\end{equation}
+while the right-hand-side  should be changed according to
+\begin{equation}
+  \begin{split}
+    f_{0,j} &\longleftarrow f_{0,j} + \frac{2}{h_x}\left[\frac{\tau\alpha}{4}\,N^W(y_{j-1})
+  + N^W(y_j) - \frac{\tau\alpha}{4}\,N^W(y_{j+1})\right], \\
+    f_{N_x,j} &\longleftarrow f_{N_x,j} + \frac{2}{h_x}\left[\frac{\tau\alpha}{4}\,N^E(y_{j-1})
+  - N^E(y_j) - \frac{\tau\alpha}{4}\,N^E(y_{j+1})\right], \\
+    f_{i,0} &\longleftarrow f_{i,0} + \frac{2}{h_y}\left[\frac{\tau}{4\alpha}\,N^S(x_{i-1})
+  + (1+\tau^2/4)N^S(x_i) - \frac{\tau}{4\alpha}\,N^S(x_{i+1})\right], \\
+    f_{i,N_y} &\longleftarrow f_{i,N_y} + \frac{2}{h_y}\left[\frac{\tau}{4\alpha}\,N^N(x_{i-1})
+  - (1+\tau^2/4)N^N(x_i)  - \frac{\tau}{4\alpha}\,N^N(x_{i+1})\right]. \\
+  \end{split}
+\end{equation}
+
+\subsection{The NNDD test problem}
+In order to test the discretization of the non-homogeneous boundary
+conditions as formulated above, a test problem with the prescribed
+\emph{exact} solution 
+\begin{equation}
+  u(x,y) = 1 + \sin\frac{2\pi k_xx}{L_x}\sin\frac{2\pi k_yy}{L_y},
+    \qquad \mbox{where $k_x$, $k_y$ are positive integers}
+\end{equation}
+and the following non-homogeneous boundary conditions
+
+\begin{equation}
+  \begin{split}
+    \left.\frac{\partial u}{\partial x}\right|_{x=0} =
+    \left.\frac{\partial u}{\partial x}\right|_{x=L_x} &=
+    k_x\sin\frac{2\pi k_yy}{L_y}, \\
+    u(x,0) = u(x,L_y) &= 1, \\
+  \end{split}
+\end{equation}
+
+is solved with varying grid spacing. The discretization errors versus
+the number of grid intervals $N_x$ displayed in Fig(\ref{fig:conv_nh_bc} shows
+a \emph{quadratic} convergence as expected from the second order
+finite differences used in both the PDE and the Neumann boundary condition
+discretization.
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/conv_nh_bc}
+  \caption{Convergence of the error
+    $\| u_{calc}- u_{anal}\|_\infty$ wrt the number of intervals in
+    the $x$ direction $N_x$ for the non-homogeneous NNDD
+    problem. Here, $L_x=100$, $L_y=800$, $k_x=k_y=4$, $\tau=1$ and $N_y=4N_x$.}
+  \label{fig:conv_nh_bc}
+\end{figure}
+
+As shown in Fig.(\ref{fig:nndd_nh}), the multigrid $V$-cycles for the
+\emph{non-homogeneous} problem converge with a slightly smaller efficiency,
+than the \emph{homogeneous} problem shown in Fig.(\ref{fig:weak_scal_NNDD}).
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/nndd_nh}
+  \caption{Performances of the $V(3,3)$-cycle for the non-homogeneous
+    NNDD problem. The same parameters in Fig.(\ref{fig:conv_nh_bc})
+    are used here.}
+  \label{fig:nndd_nh}
+\end{figure}
+
+\FloatBarrier
+
+\subsection{Local relaxation methods}
+In addition to the damped Jacobi, three methods of relaxations are
+added in this parallel multigrid solver:
+\begin{enumerate}
+\item The 4 color Gauss-Seidel method (RBGS).
+\item The Gauss-Seidel method (GS).
+\item The successive over-relaxation method (SOR).
+\end{enumerate}
+In order to apply correctly the parallel 4 color Gauss-Seidel, a
+complicated ghost cell exchange has to be performed for each sweep
+for each color. Here we simply apply the method \emph{locally} on each
+subdomain with only one ghost exchange performed at the beginning of
+each relaxation.
+
+The same procedure is also used for the other 2 methods which are
+inherently \emph{serial}. All these 3 relaxations are
+thus only correct if there is only one subdomain. As a 
+consequence, while the damped Jacobi does not depend on the
+partition of the subdomains, results from these 3 methods do depend on
+how the domain is partitioned.
+
+Table~\ref{tab:advrelax} show
+however that all of the 3 \emph{approximated} relaxation methods produce
+a much \emph{faster} convergence rate than the damped Jacobi relaxations for the NNDD test
+problem considered here. The performance of the implemented solver
+using the 4 relaxation methods on HELIOS is compared in
+Fig,(\ref{fig:weakhelios}. The bad performance of the 4 color
+Gauss-Seidel relaxations (RBGS) can be explained by the 4 nested loops
+required to sweep each of the 4 colors.
+
+\begin{table}[hbt]
+\centering
+\begin{tabular}{|l||r|r|r|r|r|r|}\hline
+ Grid Sizes          & $256\times 1024$ & $512\times 2048$ & $1024\times 4096$ & $2048\times 8192$ & $4096\times 16384$ & $8192\times 32768$ \\
+\hline
+ Process topology    &
+ \multicolumn{1}{c|} {$1\times 1$} &
+ \multicolumn{1}{c|} {$2\times 2$} &
+ \multicolumn{1}{c|} {$4\times 4$} &
+ \multicolumn{1}{c|} {$8\times 8$} &
+ \multicolumn{1}{c|} {$16\times 16$} &
+ \multicolumn{1}{c|} {$32\times 32$}  \\
+\hline
+ Jacobi $\omega=0.9$ &     0.22 &     0.24 &      0.24 &      0.24 &       0.24 &       0.25 \\
+ RBGS                &     0.05 &     0.07 &      0.10 &      0.10 &       0.12 &       0.12 \\
+ GS                  &     0.07 &     0.08 &      0.10 &      0.11 &       0.11 &       0.12 \\
+ SOR $\omega=1.2$    &     0.04 &     0.05 &      0.07 &      0.07 &       0.07 &       0.08 \\
+\hline
+\end{tabular}
+\caption{Reduction factor for the residuals (obtained as the \emph{geometric mean} of
+  all its values except the first 2 values) for the non-homogeneous
+  NNDD test problem. The same parameters as in Fig.(\ref{fig:conv_nh_bc}) are used here.}
+\label{tab:advrelax}
+\end{table}
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.9\hsize]{\RepFigures/weak_helios}
+  \caption{Performance of the 4 relaxations on the non-homogeneous
+    NNDD problem. The same parameters in Fig.(\ref{fig:conv_nh_bc})
+    are used here. The grid sizes used in this \emph{weak scaling}
+    run are shown in Table~\ref{tab:advrelax}.}
+  \label{fig:weakhelios}
+\end{figure}
+
+\FloatBarrier
+
+\section{Performance of the Stencil Kernel on different platform}
+To get a feeling on the performances gained on the different
+platforms and how well the compilers (with their
+auto-vectorization capability) support these platforms, 
+the following Fortran \emph{9-point stencil} kernel has been used. The
+OpenMP directives are used for parallelization on both Xeon and Xeon
+Phi while offload to GPU card is done via the high level OpenACC
+directives. \emph{First touch} is applied in the initialization of
+\texttt{x} and \texttt{mat}.
+
+\begin{lstlisting}[language=Fortran,numbers=left,commentstyle=\color{blue},keywordstyle=\color{red},frame=single]
+!$omp parallel do private(ix,iy)
+!$acc parallel loop present(mat,x,y) private(ix,iy)
+    DO iy=0,ny
+       DO ix=0,nx
+          y(ix,iy) =  mat(ix,iy,1)*x(ix-1,iy-1) &
+               &    + mat(ix,iy,2)*x(ix,  iy-1) &
+               &    + mat(ix,iy,3)*x(ix+1,iy-1) &
+               &    + mat(ix,iy,4)*x(ix-1,iy)   &
+               &    + mat(ix,iy,0)*x(ix,iy)     &
+               &    + mat(ix,iy,5)*x(ix+1,iy)   &
+               &    + mat(ix,iy,6)*x(ix-1,iy+1) &
+               &    + mat(ix,iy,7)*x(ix,  iy+1) &
+               &    + mat(ix,iy,8)*x(ix+1,iy+1)
+       END DO
+    END DO
+!$acc end parallel loop
+!$omp end parallel do
+\end{lstlisting}
+
+The performances on a Helios dual processor node and its attached Xeon
+Phi co-processor are shown in Fig.~\ref{fig:cpu_mic} while the
+performances on a Cray XC30 CPU and its attached NVIDIA graphics card are
+shown in Fig.~\ref{fig:cpu_gpu}. In these figures, Intel optimization
+flag \texttt{-O3} and default Cray optimization were applied. In
+Fig.~\ref{fig:cpu_mic_O1_O3}, the speedup by vectorization is shown by
+comparing performances obtained with \texttt{-O3} and \texttt{-O1}.
+Several observations can be drawn from these results.
+\begin{itemize}
+\item The parallel scaling, using OpenMP is linear for both Intel and
+  Cray compilers, when the problem sizes fit into the 20MB cache of
+  the Sandybridge processor. For grid sizes smaller than $32\times8$, the
+  overhead of thread creation dominates. When the memory footprint is
+  larger than the cache, 4 threads per socket already saturate the memory
+  bandwidth.
+\item On the MIC, the parallel speedup scales linearly up to 60
+  cores with 1 thread per core. Using 2 or 3 threads per core does not help
+  while with 4 threads, the performance even degrades.
+\item The MIC, using the Intel \emph{mic native} mode, does not
+  perform better than 8 cores of the Sandybridge processor.
+\item Since the benefit from \emph{vectorization} is quite large for
+  the MIC (see Fig.~\ref{fig:cpu_mic_O1_O3}), the poor parallel
+  scalability may be explained by the low flop intensity per thread
+  coupled with the high overhead of the (many) thread creation and
+  thread synchronization.
+\item The NIVIDIA card, using the high level OpenACC programming
+  style is more than 3 times faster than 8 Sandybridge cores, for grid
+  sizes larger than $1024\times256$. For smaller sizes, there are not
+  enough flops to keep the GPU threads busy.
+\end{itemize}
+
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_mic}
+  \caption{Performance on the Helios dual processor (left) using the
+    \texttt{-O3} compiler option and on the MIC (right), 
+    using the native mode \texttt{-mmic}.}
+  \label{fig:cpu_mic}
+\end{figure}
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_gpu}
+  \caption{Performance on a Cray XC30 single 8 core processor node
+    (left) and the NVIDIA card (right) using OpenACC. Default Cray
+    Fortran compiler optimization has been used on both runs.}
+  \label{fig:cpu_gpu}
+\end{figure}
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=\hsize]{\RepFigures/cpu_mic_O1_O3}
+  \caption{Performance comparison between using \texttt{-O3} and
+    \texttt{-O1}, on the Helios dual processor (left) and on the MIC (right).}
+  \label{fig:cpu_mic_O1_O3}
+\end{figure}
+
+\FloatBarrier
+
+\section{Hybrid MPI+OpenMP \texttt{PARMG (r599})}
+In this version, a \emph{straightforward} parallelization is
+done in the subroutines \texttt{jacobi, residue, prolong, restrict} and
+\texttt{norm\_vec}, using the OpenMP work sharing directives. The
+ghost cell exchange is executed by the \emph{master} thread.
+All the 2D arrays (solutions, RHS, etc.) are allocated and initialized
+\emph{once} by the \emph{master} thread. Dynamic array allocations 
+during the multigrid $V$-cycles are thus avoided.
+
+To help further optimization, timings are introduced for each of the 4
+multigrid components \texttt{jacobi, residue, prolong, restrict} and
+the ghost cell \texttt{exchange} as well as on the \emph{recursive}
+subroutine \texttt{mg}. Since the timings of the 4 MG components
+include already calls to \texttt{exchange}, the time obtained for
+\texttt{mg} should be equal to the sum of the 4 MG components and 
+the \emph{extras} time which includes operations in \texttt{mg} but
+not in the 4 components:
+\begin{equation}
+  \label{eq:timings}
+  t_\text{mg} = t_\text{jacobi} + t_\text{residue} + t_\text{prolong}
+  + t_\text{restrict} + t_\text{extras}.
+\end{equation}
+We will see in the following sections that, in addition to these 5
+contributions to $t_\text{mg}$,
+there is \emph{overhead} probably due to the \emph{recursive}
+calls of \texttt{mg}.
+
+\subsection{Parallel efficiency on single node}
+The comparison in Fig.~\ref{fig:single_node} shows that the pure
+OpenMP version is at most $30\%$ slower than the pure MPI version when all the
+16 cores are used but less than $10\%$ when only one socket is
+used. The degradation of the OpenMP version can be explained by the
+\emph{numa} effects when 2 sockets are used. It is also observed
+that the performance level off at 4 cores, due to the
+saturation of the socket memory bandwidth.
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{\RepFigures/single_node}
+  \caption{Parallel performance of the 7 level $V(3,3)$-cycle on a
+    dual socket Helios node ($2\times 8$ cores) for pure OpenMP and pure
+    MPI. The non-homogeneous NNDD problem with the same parameters in
+    Fig.~\ref{fig:conv_nh_bc} is considered here. The OpenMP threads and MPI
+    tasks are placed first on the first socket before filling the
+    second socket, using the \texttt{srun} option
+    ``\texttt{--cpu\_bind=cores -m block:block}'' and the environment
+    variable \texttt{OMP\_PROC\_BIND=true}.}
+  \label{fig:single_node}
+\end{figure}
+
+\subsection{Hybrid efficiency on multi-nodes}
+In the following multi-node experiments, all the 16 cores on each
+Helios node are
+utilized. The numbers of OpenMP threads \emph{per} MPI \emph{process} NT, the
+number of MPI processes \emph{per node} NP, the number of nodes NNODES
+and the \emph{total} number of MPI processes $\text{NP}_{tot}$ verify
+thus the following relations:
+\begin{equation}
+  \begin{gathered}
+    1\le\text{NT} \le 16, \qquad 1\le\text{NP} \le 16 \\
+    \text{NT}\times\text{NP} = 16 \\
+    \text{NP}_{tot} = 16\times\text{NNODES}/\text{NT} \\
+  \end{gathered}
+\end{equation}
+
+The times of the different MG components and the relative
+contributions for the \emph{strong scaling}
+experiments using a $1024\times 4096$ grid size, are shown in 
+Fig.~\ref{fig:hybrid_strong} and Fig.~\ref{fig:hybrid_strong_contrib}
+respectively. The following observations can be made:  
+\begin{enumerate}
+  \item The \texttt{exchange} time increases strongly with increasing
+  NNODES, due to smaller partitioned subdomains and thus their larger
+  surface/volume ratio.
+  \item The pure MPI (NT=1) \texttt{exchange} time is on the other
+    hand reduced with
+  $\text{NT}>1$ since the local partitioned grid becomes larger.
+  \item The less efficient OpenMP parallelization (numa effects,
+  Amdahl's law) tends to limit however this advantage.
+  \item As a result, there is an optimal NT for a given NNODES:
+    2 for 4 and 16 nodes, 8 for 64 nodes.
+  \item The \texttt{jacobi} and \texttt{residue} contributions dominate
+    largely with $0.63\le t_\text{jacobi}/t_\text{mg}\le 0.83$ and
+    $0.09\le t_\text{residue}/t_\text{mg}\le 0.18$.
+  \item The \emph{overhead} (see Eq.~\ref{eq:timings}) times increase
+    with NNODES but decrease slightly for increasing NT.
+\end{enumerate}
+
+The times of the different MG components and the relative
+contributions for the \emph{weak scaling}
+experiments are shown in Fig.~\ref{fig:hybrid_weak} and
+Fig.~\ref{fig:hybrid_weak_contrib} respectively. The following
+observations can be made: 
+\begin{enumerate}
+\item A steady increase of MG times with the number of nodes can
+  be attributed to the increase of ghost cells \texttt{exchange} time,
+  even though the amount of communications between nodes does not
+  change.
+\item The MG performance is improved slightly when NT=2 but drop
+  drastically for NT=16. This seems to indicate that
+  \emph{numa} effects are important here, since the array initialization
+  is not done locally on each thread.
+\item The \emph{overhead} (see Eq.~\ref{eq:timings}) times are much
+  smaller than in the \emph{strong scaling} runs.
+\end{enumerate}
+
+\begin{sidewaysfigure}
+  \centering
+  \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_strong}
+  \caption{Detailed timings for strong scaling experiments using the same
+    problem parameters as in Fig.~\ref{fig:single_node}, except that 5
+    levels are chosen to be able to run the runs with 64 nodes.}
+  \label{fig:hybrid_strong}
+\end{sidewaysfigure}
+
+\begin{sidewaysfigure}
+%\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_strong_contrib}
+  \caption{Relative contributions of each of the MG components for the
+    strong scaling experiments using the same
+    problem parameters as in Fig.~\ref{fig:single_node}.}
+  \label{fig:hybrid_strong_contrib}
+%\end{figure}
+\end{sidewaysfigure}
+
+\begin{sidewaysfigure}
+%\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_weak}
+  \caption{Detailed timings for weak  scaling experiments using the same
+    problem parameters as in Fig.~\ref{fig:single_node}.}
+  \label{fig:hybrid_weak}
+%\end{figure}
+\end{sidewaysfigure}
+
+\begin{sidewaysfigure}
+%\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=\hsize]{\RepFigures/hybrid_weak_contrib}
+  \caption{Relative contributions of each of the MG components for the
+    weak scaling experiments using the same
+    problem parameters as in Fig.~\ref{fig:single_node}.} 
+  \label{fig:hybrid_weak_contrib}
+%\end{figure}
+\end{sidewaysfigure}
+
+Finally, Table~\ref{tab:memory} shows that using $\text{NT}>1$
+decreases the memory needed by the multigrid procedure for both strong
+and weak scaling runs.
+
+\begin{table}[htb]
+\begin{center}
+\begin{tabular}{|l|c||r|r|r|r|r|}
+\hline
+ &\texttt{\textbf{NNODES}}& NT=1 & NT=2 & NT=4 & NT=8 & NT=16  \\
+\hline
+\multirow{4}{*}{\texttt{\textbf{Strong Scaling}}}
+&1    & 57.01 & 53.70 & 52.04 & 51.06 & 48.57  \\
+&4   & 21.87 & 21.49 & 15.75 & 13.87 & 13.11  \\
+&16  & 13.82 & 8.52 & 5.75 & 5.69 & 4.00  \\
+&64  & 13.93 & 6.80 & 3.73 & 2.32 & 1.48  \\
+\hline\hline
+\multirow{4}{*}{\texttt{\textbf{Weak Scaling}}}
+&1   & 57.03 & 53.71 & 52.04 & 51.08 & 48.61 \\
+&4   & 59.24 & 59.18 & 53.39 & 51.52 & 48.69 \\
+&16  & 60.48 & 55.33 & 52.69 & 52.74 & 49.06 \\
+&64  & 63.30 & 56.13 & 53.30 & 51.58 & 48.79 \\
+\hline
+\end{tabular}
+\end{center}
+  \caption{Memory footprint \emph{per core} (MB/core) for the strong scaling and weak
+    scaling experiments.}
+  \label{tab:memory}
+\end{table}
+
+\subsection{Summary and conclusions}
+The \emph{strong scaling} and  \emph{weak scaling} wrt NT and NNODES
+are summarized in  Fig.~\ref{fig:scaling}. The speed up for the strong
+scaling experiments shows a good efficiency up to 16 nodes
+for all NT but degrades at 64 nodes (1024 cores) due the partitioned
+grid becoming too small. A good \emph{weak scaling} is also
+obtained with an increase in $t_\text{mg}$ of
+less than $10\%$ when NNODES vary from 4 to 64. However, for NT=16, the
+efficiency drops significantly, due to the non-local memory access
+when the OpenMP threads are placed on both sockets (\emph{numa} effect).
+
+In order to improve the hybrid MPI+OpenMP multigrid, especially for large number
+of threads per MPI process NT, the following optimizations should be done:
+\begin{itemize}
+  \item \emph{First touch} array initialization in order to avoid
+    \emph{numa} effects.
+  \item OpenMP parallelization of some remaining \emph{serial} loops.
+  \item Better vectorization of inner loops.
+\end{itemize}
+The outcome of these optimization steps is important in order to run
+efficiently on upcoming \emph{multicore} processors and
+\emph{manycore} (MIC) devices.
+
+\begin{figure}[htb]
+  \centering
+  \includegraphics[angle=0,width=0.9\hsize]{\RepFigures/scaling}
+  \caption{Strong scaling with a $1024\times 4096$ grid size (left) and
+  weak scaling (right) with grid sizes $1024\times 4096,10248\times 8192,
+  4096\times 16384$ and $8192\times32768$ respectively for 1, 4, 16
+  and 64 nodes.}
+  \label{fig:scaling}
+\end{figure}
+
+\FloatBarrier
+
+\pagebreak
+\begin{thebibliography}{99}
+  \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A
+    Multigrid Tutorial, Second Edition, SIAM (2000)}.
+  \bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/}.
+  \bibitem{MG1D} {\tt Multigrid Formulation for Finite Elements},\\ 
+    \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/multigrid/docs/multigrid.pdf}
+  \bibitem{TEMPL} {R. Barrett, M. Berry, T. F. Chan,
+    J. Demmel, J. Donato, J. Dongarra, V. Eijkhout,
+    R. Pozo, C. Romine and H. Van der Vorst,
+    Templates for the Solution of Linear Systems: Building Blocks for
+    Iterative Methods, 2nd Edition , SIAM, (1994)}.
+    \bibitem{Wesseling} {P.~Wesseling, An Introduction to Multigrid
+      Methods, Edwards, 2004}.
+    \bibitem{salpha} {X. Lapillonne, S. Brunner, T. Dannert, S. Jolliet,
+      A. Marinoni et al., Phys. Plasmas 16, 032308 (2009)}.
+\end{thebibliography}
+
+\end{document}
diff --git a/multigrid/docs/multigrid.pdf b/multigrid/docs/multigrid.pdf
new file mode 100644
index 0000000..165662e
Binary files /dev/null and b/multigrid/docs/multigrid.pdf differ
diff --git a/multigrid/docs/multigrid.tex b/multigrid/docs/multigrid.tex
new file mode 100644
index 0000000..a8f694d
--- /dev/null
+++ b/multigrid/docs/multigrid.tex
@@ -0,0 +1,949 @@
+%
+% @file multigrid.tex
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+\documentclass[a4paper]{article}
+\usepackage{linuxdoc-sgml}
+\usepackage{graphicx}
+\usepackage{hyperref}
+\usepackage{amsmath}
+%\usepackage{verbatim}
+%\usepackage[notref]{showkeys}
+
+\title{\tt Multigrid Formulation for Finite Elements.}
+\author{Trach-Minh Tran, Stephan Brunner}
+\date{v0.2, October 2012}
+\abstract{A multigrid formulation for finite elements is
+  derived, using variational principles. More specifically the grid
+  transfer operators will be derived and tested in 1D Cartesian,
+  cylindrical and spherical geometry for arbitrary order B-Splines.}
+
+\begin{document}
+\maketitle
+\tableofcontents
+
+\section{The discretized problem}
+Consider the one-dimensional linear integro-differential problem
+\begin{equation}
+\label{eq:oned_prob}
+  \mathcal{L}(u) = f, \qquad 0\le x\le L,
+\end{equation}
+with suitable boundary conditions. On an \emph{equidistant} mesh with
+interval $h=L/N$ and using the \emph{weak form} of Eq.~(\ref{eq:oned_prob}),
+the linear system to be solved on this grid (which will be
+referred as the \emph{fine} grid) can be written as (see
+\cite{SOLVERS}, \cite{BSPLINES}):
+  \begin{equation}
+    \label{eq:fine}
+    \sum_{i'=1}^{N+p}A_{ii'}^hu^h_{i'} = b^h_i, \qquad 
+      A^h_{ii'}=\int_{0}^{L}\Lambda^h_i
+      \mathcal{L}(\Lambda^h_{i'})\,x^{\alpha}dx, \qquad
+      b^h_i = \int_{0}^{L}f\Lambda^h_i\,x^{\alpha}dx,
+  \end{equation}
+where $p$ is the order the Splines $\Lambda^h_i$ and
+$\alpha=0,1,2$ for Cartesian, cylindrical and spherical coordinates
+respectively. It should be noted that the unknowns $u^h_i$ of this
+linear system are the \emph{expansion coefficients} of the discretized
+solution of the problem $u^h(x)$
+\begin{equation}
+  u^h(x) = \sum_{i'=1}^{N+p}u^h_{i'}\Lambda^h_{i'}(x)
+\end{equation}
+and the right hand sides $b^h_i$ are defined as the \emph{projection} of
+$f(x)$ on the same basis functions, in contrast with the Finite
+Differences (FD) or Finite Volume (FV) formulations where $u^h_i$ and
+$b^h_i$ are the \emph{nodal values} of $u$ and $f$.
+
+On the \emph{coarser} mesh with interval $2h=2L/N$, the discretized
+linear system can be written as
+  \begin{equation}
+    \label{eq:coarse}
+    \sum_{i'=1}^{N/2+p}A_{ii'}^{2h}u^{2h}_{i'} = b^{2h}_i, \qquad 
+      A^{2h}_{ii'}=\int_{0}^{L}\Lambda^{2h}_i
+      \mathcal{L}(\Lambda^{2h}_{i'})\,x^{\alpha}dx, \qquad
+      b^{2h}_i = \int_{0}^{L}f\Lambda^{2h}_i\,x^{\alpha}dx.
+  \end{equation}
+
+\section{Transfer operators}
+\label{sec:twogrid}
+For simplicity let consider the two-grid procedure \cite{Briggs} which can be
+summarized as follow:
+\begin{enumerate}
+\item Obtain an approximation $\mathbf{u}^h$ on the \emph{fine} grid, using a
+  Gauss-Seidel (GS) or a weighted Jacobi scheme. This procedure is
+  also called \emph{smoothing} or \emph{relaxation}.
+\item Compute the \emph{residuals}:
+    $\mathbf{r}^h=\mathbf{b}^h-\mathbf{A}^h\mathbf{u}^h$. 
+\item Obtain the residuals on the coarse mesh $\mathbf{r^{2h}}$ by
+  \emph{restriction} of $\mathbf{r}^h$.
+\item Direct solve $\mathbf{A}^{2h}\mathbf{e}^{2h}=\mathbf{r^{2h}}$ to obtain
+  the error $\mathbf{e}^{2h}=\mathbf{u}-\mathbf{u}^{2h}$
+\item Interpolate (\emph{prolong}) the error to obtain $\mathbf{e}^h$.
+\item Correct the approximation obtained on the fine grid: 
+   $\mathbf{u}^h\leftarrow \mathbf{u}^h+\mathbf{e}^h$.
+\item Relax on $\mathbf{A}^{h}\mathbf{u}^{h}=\mathbf{f^{h}}$, using
+  the previously computed $\mathbf{u}^h$ as a guess.
+\end{enumerate}
+
+Steps 3 and 5 are called \emph{grid transfers} and are detailed in the
+following. It should be noted that the fine to coarse transfer
+(restriction) applies to the right hand side $\mathbf{b}^h$ while the
+prolongation applies to the expansion coefficients $\mathbf{u}^{2h}$.
+
+\subsection{Fine to coarse grid transfer (restriction)}
+The right hand side on the fine and coarse grid can be written as
+\begin{equation*}
+  \begin{split}
+    b^{h}_i &= \int_0^Lf\Lambda^{h}_i\,x^{\alpha}dx = 
+      \sum_{i'=1}^{N+p}f^h_{i'}\underbrace{\int_0^L\Lambda^{h}_{i}\Lambda^{h}_{i'}\,x^{\alpha}dx}_
+         {M^{h,h}_{ii'}}, \\
+    b^{2h}_i &= \int_0^Lf\Lambda^{2h}_i\,x^{\alpha}dx = \sum_{i'=1}^{N+p}f^h_{i'}
+    \underbrace{\int_0^L\Lambda^{2h}_{i}\Lambda^{h}_{i'}\,x^{\alpha}dx}_{M^{2h,h}_{ii'}} \\
+  \end{split}
+\end{equation*}
+where the expansion $f(x)=\sum_{i=1}^{N+p}f^h_i\Lambda^h_i(x)$ has been used.
+Elimination of  $\mathbf{f^h}$ leads to the definition of the
+\emph{restriction} matrix:
+\begin{equation}
+\label{eq:restriction}
+  \mathbf{b}^{2h} = \mathbf{R}^{2h}_{h}\mathbf{b}^h, 
+  \qquad \boxed{\mathbf{R}^{2h}_{h}=\mathbf{M}^{2h,h}(\mathbf{M}^{h,h})^{-1}}.
+\end{equation}
+
+Note that the computation of the \emph{mass matrices}
+$\mathbf{M}^{h,h}$ and $\mathbf{M}^{2h,h}$ can be done \emph{exactly}
+using a Gauss integration with $N_G=\lceil p+(\alpha+1)/2 \rceil$ points.
+
+Another way to derive the restriction operator $\mathbf{R}^{2h}_{h}$
+is by noting that the basis functions $\Lambda^{2h}_{i}$ are
+\emph{piecewise} $C^{p-1}_h$ \emph{polynomials} with \emph{breaks} on
+the fine grid points $x_i=ih$,  and thus can be expressed
+\emph{uniquely} as 
+\begin{equation}
+  \Lambda^{2h}_{i}(x) = \sum_{i'=1}^{N+p}c_{ii'}\Lambda^{h}_{i'}(x),
+  \quad i=1\ldots N/2+p.
+\end{equation}
+Projecting this equation on the basis $\Lambda^{h}_j$ then leads to
+\begin{equation*}
+  \begin{split}
+    \sum_{i'=1}^{N+p}c_{ii'}\int_0^L\Lambda^{h}_{i'}\Lambda^{h}_{j}\,x^{\alpha}dx
+    &= \int_0^L\Lambda^{2h}_{i}\Lambda^{h}_{j}\,x^{\alpha}dx, \qquad
+    i=1\ldots N/2+p, \quad j=1,\ldots N+p \\
+    \Longrightarrow \mathbf{c}\cdot\mathbf{M}^{h,h} &=
+    \mathbf{M}^{2h,h} \Longrightarrow \mathbf{c} =
+    \mathbf{M}^{2h,h}(\mathbf{M}^{h,h})^{-1} = \mathbf{R}^{2h}_{h} \\
+  \end{split}
+\end{equation*}
+and finally
+\begin{equation}
+\label{eq:restrict_gen}
+  \boxed{\Lambda^{2h}_{i}(x) =
+      \sum_{i'=1}^{N+p}\left(\mathbf{R}^{2h}_{h}\right)_{ii'}\Lambda^{h}_{i'}(x), 
+  \quad i=1\ldots N/2+p}
+\end{equation}
+Because the expansion coefficients $c_{ii'}$ of $\Lambda^{2h}_{i}(x)$
+(rows of the restriction matrix $\mathbf{R}^{2h}_{h}$) on the
+fine mesh basis are \emph{unique},  $\mathbf{R}^{2h}_{h}$ should be
+independent of the geometry exponent $\alpha$ or more generally, of the
+definition of the \emph{projection} (or scalar product) used to
+calculate the restriction matrix. Furthermore, since the supports of
+both $\Lambda^{h}_i$ and  $\Lambda^{2h}_i$ are \emph{compact}, the
+matrix $\mathbf{R}^{2h}_{h}$ should be \emph{sparse}.
+
+One can show that, using (\ref{eq:restrict_gen}), the
+restriction of the fine mesh FE matrix $\mathbf{A}^h$ is given by 
+\begin{equation}
+\label{eq_coarse_mat}
+  \mathbf{A}^{2h} = \mathbf{R}^{2h}_h\mathbf{A}^{h}\left(\mathbf{R}^{2h}_h\right)^{T}.
+\end{equation}
+
+\subsection{Coarse to fine grid transfer (prolongation)}
+Let denote the discretized solution on the coarse mesh of
+$\mathbf{A}^{2h}\mathbf{u}^{2h}=\mathbf{R}^{2h}_{h}\mathbf{b}^h$
+by
+\begin{equation*}
+  u^{2h}(x) = \sum_{i=1}^{N/2+p}u^{2h}_{i}\Lambda^{2h}_{i}(x),
+\end{equation*}
+and seek for an approximated solution on the fine mesh $\mathbf{u}^{h}$
+\begin{equation*}
+  u^{h}(x) = \sum_{i=1}^{N+p}u^{h}_{i}\Lambda^{h}_{i}(x).
+\end{equation*}
+by \emph{prolongation} of $\mathbf{u}^{2h}$ (instead of solving
+$\mathbf{A}^{h}\mathbf{u}^{h}=\mathbf{b}^h$). A reasonable solution is 
+to \emph{minimize} the square of the error norm defined as
+\begin{equation*}
+  \begin{split}
+    \epsilon^2 &= \|u^{h}(x)-u^{2h}(x)\|^2 \equiv \int_0^L
+    [u^{h}(x)-u^{2h}(x)]^2\,x^\alpha dx, \\
+    \frac{\partial\epsilon^2}{\partial u^h_i}   &=0 \Longrightarrow
+    \sum_{i'=1}^{N+p}u^{h}_{i}\int_0^L
+    \Lambda^{h}_{i}\Lambda^{h}_{i'}\,x^\alpha dx =
+    \sum_{i'=1}^{N/2+p}u^{2h}_{i}\int_0^L
+    \Lambda^{h}_{i}\Lambda^{2h}_{i'}\,x^\alpha dx. \\
+  \end{split}
+\end{equation*}
+
+This yields the prolonged (or interpolated) \emph{coarse grid}
+  solution on the \emph{fine grid}
+\begin{equation}
+\label{eq:prolongation}
+  \mathbf{u}^h = \mathbf{P}^h_{2h}\mathbf{u}^{2h}, \qquad
+  \boxed{\mathbf{P}^h_{2h} =
+    (\mathbf{M}^{h,h})^{-1}\mathbf{M}^{h,2h}=(\mathbf{R}^{2h}_{h})^T}
+\end{equation}
+and the coarse FE matrix can be finally expressed as\begin{equation}
+  \label{eq:coarse_mat}
+  \boxed{\mathbf{A}^{2h} = \mathbf{R}^{2h}_{h}\mathbf{A}^{h}\mathbf{P}^h_{2h}}
+\end{equation}
+
+\subsection{An alternative derivation of grid transfer operators}
+Starting from the inter grid transformation of the basis functions
+Eq.(\ref{eq:restrict_gen}), the
+restriction of $\mathbf{b}^h$ and the prolongation of
+$\mathbf{u}^{2h}$ can be derived as follow
+\begin{gather*}
+  b^{2h}_i = \int_0^L f\Lambda^{2h}_i\,x^{\alpha}dx = \sum_{i'=1}^{N+p}
+    \left(\mathbf{R}^{2h}_{h}\right)_{ii'}\int_0^Lf\Lambda^{h}_{i'}\,x^{\alpha}dx
+      =\sum_{i'=1}^{N+p} \left(\mathbf{R}^{2h}_{h}\right)_{ii'}b^h_{i'}, \\
+      u^{2h}(x) = \sum_{i=1}^{N/2+p} u^{2h}_{i} \Lambda^{2h}_{i}=
+  \sum_{i'=1}^{N+p}\underbrace{\left[\sum_{i=1}^{N/2+p}
+  \left(\mathbf{R}^{2h}_{h}\right)_{ii'}
+  u^{2h}_{i}\right]}_{u^h_{i'}}\Lambda^{h}_{i'}(x) \Longrightarrow 
+  \mathbf{u}^h = \left(\mathbf{R}^{2h}_{h}\right)^T\mathbf{u}^{2h} =
+  \mathbf{P}^h_{2h}\mathbf{u}^{2h}.\\ 
+\end{gather*}
+
+\section{Numerical results for the transfer operators}
+The prolongation matrix as defined in Eq.~(\ref{eq:prolongation}) was
+calculated using the BSPLINES module. A Gauss integration with
+$N_G=\lceil p+(\alpha+1)/2 \rceil$ points is used to carry out the
+numerical integrations.
+In the following, the results are presented
+for linear, quadratic and cubic Splines. Since the
+restriction matrix is just the transpose of the prolongation matrix,
+only the latter is shown. As expected, all the obtained matrices
+are found to be \emph{independent} of $\alpha$ and \emph{sparse}.
+
+During the calculations, it was checked that
+\begin{itemize}
+\item The coarse matrix computed using Eq.~(\ref{eq:coarse_mat}) and
+  the transfer matrix, is identical to the matrix assembled directly
+  on the coarse grid. 
+\item The sum of each row of the prolongation matrix is 1, since a
+  constant function ($\mathbf{u}^{2h}=1$) should remain constant after
+  the grid transfer.
+
+\end{itemize}
+
+\subsection{Linear Splines}
+For $N=8$, the prolongation is a $9\times 5$ matrix given by
+\begin{equation}
+  \mathbf{P}^{h}_{2h} =
+  \left(
+  \begin{matrix}
+    1   & 0   & 0   & 0   & 0  \\
+    1/2 & 1/2 & 0   & 0   & 0  \\
+    0   & 1   & 0   & 0   & 0  \\
+    0   & 1/2 & 1/2 & 0   & 0  \\
+    0   & 0   & 1   & 0   & 0  \\
+    0   & 0   & 1/2 & 1/2 & 0  \\
+    0   & 0   & 0   & 1   & 0  \\
+    0   & 0   & 0   & 1/2 & 1/2\\
+    0   & 0   & 0   & 0   & 1   \\
+  \end{matrix}\right)
+\end{equation}
+As expected, the prolongation matrix for linear Splines is
+identical to the one obtained for first order FD discretization, where
+a linear interpolation is used. One can easily check that
+\begin{equation*}
+  \begin{split}
+    \Lambda^{2h}_1(x) &= \Lambda^h_1(x) + \frac{1}{2}\Lambda^h_2(x), \\
+    \Lambda^{2h}_2(x) &= \frac{1}{2}\Lambda^h_2(x) + \Lambda^h_3(x) +
+    \frac{1}{2}\Lambda^h_4(x), \\ 
+  \end{split}
+\end{equation*}
+as expected from (\ref{eq:restrict_gen}).
+
+\subsection{Quadratic Splines}
+For $N=8$, the prolongation is a $10\times 6$ matrix given by
+\begin{equation}
+  \mathbf{P}^{h}_{2h} =
+  \left(
+  \begin{matrix}
+    1   & 0   & 0   & 0   & 0   & 0  \\
+    1/2 & 1/2 & 0   & 0   & 0   & 0  \\
+    0   & 3/4 & 1/4 & 0   & 0   & 0  \\
+    0   & 1/4 & 3/4 & 0   & 0   & 0  \\
+    0   & 0   & 3/4 & 1/4 & 0   & 0  \\
+    0   & 0   & 1/4 & 3/4 & 0   & 0  \\
+    0   & 0   & 0   & 3/4 & 1/4 & 0  \\
+    0   & 0   & 0   & 1/4 & 3/4 & 0  \\
+    0   & 0   & 0   & 0   & 1/2 & 1/2\\
+    0   & 0   & 0   & 0   & 0   & 1  \\
+  \end{matrix}\right)
+\end{equation}
+
+\subsection{Cubic Splines}
+For $N=10$, the prolongation is a $13\times 8$ matrix given by
+\begin{equation}
+  \mathbf{P}^{h}_{2h} =
+  \left(
+  \begin{matrix}
+    1   & 0    & 0     & 0   & 0   & 0     & 0    & 0  \\
+    1/2 & 1/2  & 0     & 0   & 0   & 0     & 0    & 0  \\
+    0   & 3/4  & 1/4   & 0   & 0   & 0     & 0    & 0  \\
+    0   & 3/16 & 11/16 & 1/8 & 0   & 0     & 0    & 0  \\
+    0   & 0    & 1/2   & 1/2 & 0   & 0     & 0    & 0  \\
+    0   & 0    & 1/8   & 3/4 & 1/8 & 0     & 0    & 0  \\
+    0   & 0    & 0     & 1/2 & 1/2 & 0     & 0    & 0  \\
+    0   & 0    & 0     & 1/8 & 3/4 & 1/8   & 0    & 0  \\
+    0   & 0    & 0     & 0   & 1/2 & 1/2   & 0    & 0  \\
+    0   & 0    & 0     & 0   & 1/8 & 11/16 & 3/16 & 0  \\
+    0   & 0    & 0     & 0   & 0   & 1/4   & 3/4  & 0  \\
+    0   & 0    & 0     & 0   & 0   & 0     & 1/2  & 1/2\\
+    0   & 0    & 0     & 0   & 0   & 0     & 0    & 1  \\
+  \end{matrix}\right)
+\end{equation}
+
+Note that from the results shown above, it is straightforward to derive the
+prolongation matrix for other number of intervals $N$.
+
+\section{Practical Considerations}
+\subsection{Boundary conditions}
+The \emph{essential Dirichlet boundary conditions} are imposed by zeroing the column
+and row (first column and first row for the left boundary and last column
+and last row for the right boundary) of the FE matrix $\mathbf{A}^h$ and
+putting 1 on the diagonal. The same operation should be also performed
+on the prolongation matrix, preserving thus the relation
+(\ref{eq:coarse_mat}). For non-homogeneous Dirichlet boundary
+conditions, the elements of the column should be saved before the
+zeroing operation (for example $A^h_{21}, A^h_{31}, \ldots$ for the left
+boundary condition). They will be used later to modify the right hand
+side:
+\begin{equation*}
+  b^h_i \leftarrow b^h_i-A^h_{i1}u^h_1, \quad i=2,\ldots
+\end{equation*}
+
+Nothing has to be done for \emph{natural boundary conditions}.
+
+\subsection{Residual norm and error}
+The residual norm is simply defined as the Euclidean norm of the
+residue:
+
+\begin{equation}
+\label{eq:resid}
+\|r\|_2 = \|\mathbf{b}-\mathbf{A}\mathbf{u}\|_2 =
+\sqrt{\sum_i\left(b_i-\sum_{i'}A_{ii'}u_{i'}\right)^2}.
+\end{equation}
+When the \emph{exact} solution $u(x)$ is known, the \emph{discretization error}
+can defined as
+\begin{equation}
+\label{eq:discerr}
+  \|e\|_2 = \sqrt{\int x^\alpha dx\left[\sum_{i}u_{i}\Lambda_i(x)-u(x)\right]^2}
+\end{equation}
+and computed using a Gauss quadrature. Note that for Splines of order
+$p$, $\|e\|_2(h)$ converges to zero as $O(h^{p+1})$. 
+
+\section{The Model Problems}
+\subsection{Cartesian geometry}
+The following second-order boundary value problem is considered:
+\begin{equation}
+\label{eq:cartesian_problem}
+  \begin{split}
+  -\frac{d^2}{dx^2} u(x) + \sigma u(x) &= \sin (\pi kx), \qquad 0\le x\le 1 \\
+    u(0)=u(1) &= 0 \\
+    \Rightarrow u(x) = \frac{\sin(\pi kx)}{\pi^2k^2+\sigma}.& \\
+  \end{split}
+\end{equation}
+Using the weak form, the FE discretized matrix and right hand side can
+be computed as 
+\begin{equation}
+  A_{ii'} = \int_0^1dx\left[\Lambda'_i(x)\Lambda'_{i'}(x) +
+    \sigma\Lambda_i(x)\Lambda_{i'}(x)\right], \qquad
+  b_i =  \int_0^1dx \sin (\pi kx)\Lambda_i(x).
+\end{equation}
+For Splines of order $p$, the integration is done with a $\lceil
+p+1/2\rceil$ point Gauss quadrature which is \emph{exact}
+for the matrix $\mathbf{A}$ if $\sigma$ is constant.
+
+The boundary conditions are simply imposed by setting
+\begin{equation*}
+  A_{ki}=A_{ik}=\delta_{ik} \qquad\mbox{and} \qquad b_k=0
+\end{equation*}
+for $k=1$ (the first equation) and $N+p$ (the last equation).
+
+\subsection{Cylindrical geometry}
+The following second-order boundary value problem is considered:
+\begin{equation}
+  \begin{split}
+  -\frac{1}{r}\frac{d}{dr}r\frac{d}{dr}u(r) + \frac{m^2}{r^2}u(r) &= 
+  j^2_{ms}J_{m}(j_{ms}r), \qquad 0\le r\le 1, \quad j_{ms} =
+  s^{th}\mbox{ zero of }J_{m}, \\
+    u(1) &= 0 \\
+    \Rightarrow u(r) = J_{m}(j_{ms}r).& \\
+  \end{split}
+\end{equation}
+Using the weak form, the FE discretized matrix and right hand side can
+be computed as 
+\begin{equation}
+  A_{ii'} = \int_0^1rdr\left[\Lambda'_i(r)\Lambda'_{i'}(r) +
+    \frac{m^2}{r^2}\Lambda_i(r)\Lambda_{i'}(r)\right], \qquad
+  b_i =  \int_0^1rdr j^2_{ms}\,J_{m}(j_{ms}r)\Lambda_i(r).
+\end{equation}
+The boundary condition has only to be imposed on the last equation,
+using the same procedure described for the Cartesian geometry.
+
+It should be noted here that for $m\neq 0$, the matrix elements
+$A_{1i}$ and $A_{i1}$ \emph{diverge} since $\Lambda_1(r)$ is not
+equal to zero at $r=0$. However, using a \emph{direct solver}, one can observe
+that the resulting \emph{discretization errors} as defined by
+Eq.(\ref{eq:discerr}) converge for number of Gauss points $N_G$ slightly
+larger than $p+1$, as shown in Table~\ref{tab:gauss_conv}. Then, using
+$N_G=4$ and $6$ for the linear and cubic splines respectively, the
+discretization error as a function of the number of grid intervals
+(Fig~\ref{fig:cyl_conv}) show the expected quadratic and quartic
+scaling respectively for the linear and cubic Splines.
+
+\begin{figure}
+  \centering
+  \includegraphics[angle=0,width=0.8\hsize]{cyl_conv}
+  \caption{Discretization errors $\|e\|_2$ obtained by a \emph{direct
+      solver} versus the number of grid intervals $N$. A linear fit
+    yields a quadratic scaling ($\sim N^{-2.0}$) for the linear Splines
+  and a quartic convergence ($\sim N^{-4.3}$) for the cubic Splines.}
+  \label{fig:cyl_conv}
+\end{figure}
+
+\begin{table}
+  \centering
+  \begin{tabular}{|c|c|c|}\hline
+Number of Gauss points & $p=1$     & $p=3$     \\ \hline
+  2                    & 8.319E-04 &           \\
+  4                    & 9.277E-04 & 5.799E-07 \\
+  6                    & 9.276E-04 & 5.936E-07 \\
+  8                    & 9.276E-04 & 5.936E-07 \\\hline
+  \end{tabular}
+  \caption{Convergence of the \emph{discretization error} with respect
+  to the number of Gauss points for the cylindrical problem with $m=1$,
+  $s=10$ on a $128$ interval grid.}
+  \label{tab:gauss_conv}
+\end{table}
+
+\section{The Multigrid Schemes}
+The two grid procedure described in section (\ref{sec:twogrid}) can be
+generalized as follow.
+Let $\nu_1$, $\nu_2$ and $\mu$ be three iteration parameters. 
+Given a guess $\mathbf{u}^h$ and right hand side $\mathbf{b}^h$ at the
+\emph{finest} level, a MG cycle represented by 
+\begin{equation*}
+  \boxed{\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{b}^h)}
+\end{equation*}
+will compute a \emph{new} $\mathbf{u}^h$ and is defined recursively by the
+following steps: 
+
+\begin{enumerate}
+\item If $h$ is the coarsest mesh size, direct solve
+  $\mathbf{A}^h\mathbf{u}^h=\mathbf{b}^h$ and return.
+  \item Else 
+    \begin{itemize}
+    \item Relax $\mathbf{u}^h$ $\nu_1$ times.
+    \item $\mathbf{b}^{2h} \leftarrow
+      \mathbf{R}^{2h}_h(\mathbf{b}^h-\mathbf{A}^h\mathbf{u}^h), \quad
+      \mathbf{u}^{2h}\leftarrow 0$.
+    \item $\mathbf{u}^{2h} \leftarrow MG^{2h}(\mathbf{u}^{2h},\mathbf{b}^{2h})$ $\mu$ times.
+    \item $\mathbf{u}^h\leftarrow
+      \mathbf{u}^h+\mathbf{P}^{h}_{2h}\mathbf{u}^{2h}$.
+    \item Relax $\mathbf{u}^h$ $\nu_2$ times.
+    \end{itemize} 
+\end{enumerate}
+
+The standard $V$-cycle is obtained for $\mu=1$ while $\mu=2$ results in
+the $W$-cycle. Usually the number of \emph{pre-smooth} and
+\emph{post-smooth} sweeps $\nu_1$ and $\nu_2$ is limited to 1 or 2. In the
+following a $V$-cycle will be denoted by $V(\nu_1,\nu_2)$.
+
+Another multigrid algorithm called \emph{Full Multigrid} or FMG does
+not require an input guess $\mathbf{u}^h$ but solves first the
+problem on coarser grids and uses one or many MG cycles to obtain the problem
+solution. It can be represented by
+\begin{equation*}
+  \boxed{\mathbf{u}^h \leftarrow FMG^h(\mathbf{b}^h)}
+\end{equation*}
+and defined recursively by the following steps:
+
+\begin{enumerate}
+\item If $h$ is the coarsest mesh size, direct solve
+  $\mathbf{A}^h\mathbf{u}^h=\mathbf{b}^h$ and return.
+  \item Else 
+    \begin{itemize}
+    \item $\mathbf{b}^{2h} \leftarrow  \mathbf{R}^{2h}_h(\mathbf{b}^h)$.
+    \item $\mathbf{u}^{2h} \leftarrow FMG^{2h}(\mathbf{b}^{2h})$.
+    \item $\mathbf{u}^h \leftarrow \mathbf{P}^{h}_{2h}\mathbf{u}^{2h}$.
+    \item $\mathbf{u}^h \leftarrow MG^h(\mathbf{u}^h,\mathbf{b}^h)$ $\nu_0$ times.
+    \end{itemize} 
+\end{enumerate}
+
+Note that while the MG process is an iterative process (started by
+setting for example the initial guess $\mathbf{u}^h=0$), the FMG is
+more like a \emph{direct solver} with appropriate values of $\nu_1$,
+$\nu_2$ and  $\nu_0$ determined experimentally.
+
+\section{Numerical Experiments}
+The residual norm $\|r\|_2$ and error $\|e\|_2$ defined
+previously are reported after each $V$-cycle in
+Table~\ref{tab:cartesian1} for the Cartesian model problem and in
+Table~\ref{tab:cylindrical1} for the cylindrical one . The ratio
+between successive cycle $\|r\|_2$ 
+and $\|e\|_2$ are shown in columns labeled \emph{ratio} and measure
+the rate of iteration convergence. The \emph{asymptotic} ratio of
+$\|r\|_2$ is called the \emph{convergence factor}.
+
+In all the cases shown, one can note that $\|e\|_2$ level off quickly to the
+discretization error obtained by using the \emph{direct solver} on the
+finest grid, while the residual norms $\|r\|_2$ continue to  decrease until
+the machine zero is eventually reached. One can also verify that the \emph{final} 
+discretization errors scale approximately as $8^2$ and $8^4$
+respectively for linear and cubic Splines, as $N$ is increased from
+$128$ to $1024$.
+
+Most interestingly, the \emph{iterative performance} depends very weakly
+on the problem size $N$, for both the Cartesian and the cylindrical
+cases. Moreover, the multigrid  seems to be less efficient when linear Splines are used for
+the problem discretization. This iterative performance can be further improved by
+increasing the \emph{iteration parameters} $\nu_1$, $\nu_2$ and $\mu$,
+as shown in Table~\ref{tab:improv}. One can also observe in the same
+table that the Jacobi relaxation is systematically less efficient than Gauss
+Seidel relaxation.
+
+\begin{table}
+\centering
+\begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline
+ \multicolumn{9}{|c|}{ Linear B-Splines $p=1$} \\ \hline
+   & \multicolumn{4}{c|}{ $N=128$}            
+   & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
+$V$-cycle  & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
+           $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
+   0 & 6.219E-02 &       & 7.164E-04 &       & 2.210E-02 &       & 7.164E-04 &       \\
+   1 & 2.169E-02 &  0.35 & 5.880E-05 &  0.08 & 9.699E-03 &  0.44 & 3.622E-05 &  0.05 \\
+   2 & 3.801E-03 &  0.18 & 7.806E-06 &  0.13 & 1.790E-03 &  0.18 & 1.965E-06 &  0.05 \\
+   3 & 5.061E-04 &  0.13 & 3.666E-06 &  0.47 & 2.923E-04 &  0.16 & 1.583E-07 &  0.08 \\
+   4 & 6.762E-05 &  0.13 & 3.564E-06 &  0.97 & 4.055E-05 &  0.14 & 6.197E-08 &  0.39 \\
+   5 & 8.902E-06 &  0.13 & 3.585E-06 &  1.01 & 5.586E-06 &  0.14 & 5.655E-08 &  0.91 \\
+   6 & 1.199E-06 &  0.13 & 3.589E-06 &  1.00 & 7.122E-07 &  0.13 & 5.622E-08 &  0.99 \\
+   7 & 1.585E-07 &  0.13 & 3.590E-06 &  1.00 & 9.815E-08 &  0.14 & 5.620E-08 &  1.00 \\
+   8 & 2.089E-08 &  0.13 & 3.590E-06 &  1.00 & 1.320E-08 &  0.13 & 5.619E-08 &  1.00 \\
+   9 & 2.746E-09 &  0.13 & 3.590E-06 &  1.00 & 1.887E-09 &  0.14 & 5.619E-08 &  1.00 \\
+  10 & 3.741E-10 &  0.14 & 3.590E-06 &  1.00 & 2.533E-10 &  0.13 & 5.619E-08 &  1.00 \\
+\hline \hline
+\multicolumn{9}{|c|}{ Cubic B-Splines $p=3$} \\ \hline
+   & \multicolumn{4}{c|}{ $N=128$}            
+   & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
+$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
+           $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
+   0 & 6.187E-02 &       & 7.164E-04 &       &  2.209E-02 &       & 7.164E-04 &       \\
+   1 & 1.948E-04 &  0.00 & 1.893E-06 &  0.00 &  1.685E-05 &  0.00 & 4.292E-08 &  0.00 \\
+   2 & 4.316E-06 &  0.02 & 3.927E-09 &  0.00 &  1.241E-07 &  0.01 & 7.156E-11 &  0.00 \\
+   3 & 1.554E-07 &  0.04 & 2.374E-09 &  0.60 &  4.184E-09 &  0.03 & 6.198E-13 &  0.01 \\
+   4 & 5.750E-09 &  0.04 & 2.373E-09 &  1.00 &  1.560E-10 &  0.04 & 5.635E-13 &  0.91 \\
+   5 & 2.153E-10 &  0.04 & 2.373E-09 &  1.00 &  5.912E-12 &  0.04 & 5.635E-13 &  1.00 \\
+   6 & 8.122E-12 &  0.04 & 2.373E-09 &  1.00 &  2.258E-13 &  0.04 & 5.635E-13 &  1.00 \\
+   7 & 3.079E-13 &  0.04 & 2.373E-09 &  1.00 &  8.777E-15 &  0.04 & 5.635E-13 &  1.00 \\
+   8 & 1.173E-14 &  0.04 & 2.373E-09 &  1.00 &  1.758E-15 &  0.20 & 5.635E-13 &  1.00 \\
+   9 & 4.489E-16 &  0.04 & 2.373E-09 &  1.00 &  1.709E-15 &  0.97 & 5.635E-13 &  1.00 \\
+  10 & 9.571E-17 &  0.21 & 2.373E-09 &  1.00 &  1.761E-15 &  1.03 &
+  5.635E-13 &  1.00 \\ \hline
+\end{tabular}
+\caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation
+  for a \emph{Cartesian} problem with $k=10$ and $\sigma=0$,
+  discretized on a grid with $N=128$ and $1024$ intervals, using
+  linear and cubic B-splines. For 
+  both grid sizes, a total of 6 grid levels were considered.}
+\label{tab:cartesian1}
+\end{table}
+
+\begin{table}
+\centering
+\begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline
+ \multicolumn{9}{|c|}{ Linear B-Splines $p=1$} \\ \hline
+   & \multicolumn{4}{c|}{ $N=128$}            
+   & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
+$V$-cycle  & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
+           $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
+   0 & 1.789E+01 &           & 9.354E-02 &      & 6.400E+00 &      & 9.354E-02 &       \\
+   1 & 3.373E+00 &      0.19 & 3.068E-03 & 0.03 & 1.826E+00 & 0.29 & 3.036E-03 & 0.03  \\
+   2 & 4.895E-01 &      0.15 & 8.064E-04 & 0.26 & 3.133E-01 & 0.17 & 1.624E-04 & 0.05  \\
+   3 & 6.160E-02 &      0.13 & 6.704E-04 & 0.83 & 4.581E-02 & 0.15 & 1.411E-05 & 0.09  \\
+   4 & 8.013E-03 &      0.13 & 6.811E-04 & 1.02 & 5.959E-03 & 0.13 & 1.062E-05 & 0.75  \\
+   5 & 9.871E-04 &      0.12 & 6.844E-04 & 1.00 & 8.098E-04 & 0.14 & 1.069E-05 & 1.01  \\
+   6 & 1.283E-04 &      0.13 & 6.847E-04 & 1.00 & 1.048E-04 & 0.13 & 1.070E-05 & 1.00  \\
+   7 & 1.613E-05 &      0.13 & 6.847E-04 & 1.00 & 1.504E-05 & 0.14 & 1.070E-05 & 1.00  \\
+   8 & 2.097E-06 &      0.13 & 6.847E-04 & 1.00 & 2.050E-06 & 0.14 & 1.070E-05 & 1.00  \\
+   9 & 2.639E-07 &      0.13 & 6.847E-04 & 1.00 & 3.008E-07 & 0.15 & 1.070E-05 & 1.00  \\
+  10 & 3.500E-08 &      0.13 & 6.847E-04 & 1.00 & 4.074E-08 & 0.14 & 1.070E-05 & 1.00  \\
+\hline \hline
+\multicolumn{9}{|c|}{ Cubic B-Splines $p=3$} \\ \hline
+   & \multicolumn{4}{c|}{ $N=128$}            
+   & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
+$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
+           $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
+   0 & 1.768E+01 &      & 9.354E-02 &      & 6.399E+00 &      & 9.354E-02 &       \\
+   1 & 4.243E-02 & 0.00 & 4.727E-05 & 0.00 & 4.975E-03 & 0.00 & 6.588E-06 & 0.00  \\
+   2 & 1.378E-03 & 0.03 & 1.897E-06 & 0.04 & 7.835E-05 & 0.02 & 6.578E-09 & 0.00  \\
+   3 & 4.773E-05 & 0.03 & 1.814E-06 & 0.96 & 2.797E-06 & 0.04 & 4.125E-10 & 0.06  \\
+   4 & 2.174E-06 & 0.05 & 1.814E-06 & 1.00 & 1.041E-07 & 0.04 & 4.092E-10 & 0.99  \\
+   5 & 4.816E-07 & 0.22 & 1.814E-06 & 1.00 & 3.935E-09 & 0.04 & 4.092E-10 & 1.00  \\
+   6 & 1.942E-07 & 0.40 & 1.814E-06 & 1.00 & 1.499E-10 & 0.04 & 4.092E-10 & 1.00  \\
+   7 & 8.887E-08 & 0.46 & 1.814E-06 & 1.00 & 5.757E-12 & 0.04 & 4.092E-10 & 1.00  \\
+   8 & 4.449E-08 & 0.50 & 1.814E-06 & 1.00 & 2.517E-13 & 0.04 & 4.092E-10 & 1.00  \\
+   9 & 2.377E-08 & 0.53 & 1.814E-06 & 1.00 & 1.360E-13 & 0.54 & 4.092E-10 & 1.00  \\
+  10 & 1.328E-08 & 0.56 & 1.814E-06 & 1.00 & 1.384E-13 & 1.02 & 4.092E-10 & 1.00  \\ \hline
+\end{tabular}
+\caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation
+  for a one-dimensional \emph{cylindrical} problem with $m=22$ and
+  $s=10$, discretized on a grid with
+  $N=128$ and $1024$ intervals, using linear and cubic B-splines. For
+  both grid sizes, a total of 6 grid levels were considered.}
+\label{tab:cylindrical1}
+\end{table}
+
+\begin{table}
+\centering
+\begin{tabular}{|c|c|c|c|c|}\hline
+   & \multicolumn{2}{c|}{ Cartesian problem}            
+   & \multicolumn{2}{c|}{ Cylindrical problem}  \\ \cline{2-5}
+                       & $N=128$ &  $N=1024$ & $N=128$  &  $N=1024$\\ \hline
+$\nu_1=1, \nu_2=1, \mu=1$ & 0.13 &  0.14 &  0.13 &  0.14 \\
+$\nu_1=1, \nu_2=2, \mu=1$ & 0.08 &  0.08 (\emph{0.10}) &  0.08 &  0.08 (\emph{0.09}) \\
+$\nu_1=2, \nu_2=1, \mu=1$ & 0.08 &  0.08 &  0.08 &  0.08 \\
+$\nu_1=2, \nu_2=2, \mu=1$ & 0.04 &  0.04 (\emph{0.08}) &  0.02 &  0.03 (\emph{0.08})\\
+$\nu_1=1, \nu_2=1, \mu=2$ & 0.12 &  0.11 &  0.12 &  0.11 \\ \hline
+\end{tabular}
+\caption{The \emph{convergence factor} (averaged over
+  the last 5 cycles) for different iteration parameters $\nu_1$, $\nu_2$ and
+  $\mu$, using the linear Splines  for both Cartesian ($k=10$,
+  $\sigma=0$) and cylindrical ($m=22$, $s=10$) problems. The last
+  entry is usually called  
+  a $W$-cycle while the first four designate a $V(\nu_1,\nu_2)$
+  cycle. Gauss Seidel relaxation is used except for the results enclosed
+  in parenthesis which are obtained with the Jacobi
+  (weighted  with $\omega=2/3$) relaxation.}
+\label{tab:improv}
+\end{table}
+
+The next experiment is shown on Table~\ref{tab:fmg}, where two FMG$(\nu_1,\nu_2)$
+schemes are applied to the $m=22$, $s=10$ cylindrical problem with
+grid sizes up to $N=2048$. Note that the problem is solved to the level of
+discretization for $N\ge 128$ with FMG$(2,1)$ but not with FMG$(1,1)$. Solving the same
+problem with the $V(2,1)$ cycle required 3 iterations for all the values of
+$N$ shown. Since the cost of one FMG(2,1) is $\sim 2$ the cost
+of one $V(2,1)$ (see Appendix \ref{sec:cost}), it appears that FMG is
+more efficient for $N\ge 128$. 
+
+Finally, in all the cases shown here, the equality (\ref{eq:coarse_mat})
+is verified numerically, except for the cylindrical case with $m\neq 0$. This is
+expected since as noted earlier, the matrix elements $A_{i1}$ and
+$A_{1i}$ diverge unless $m=0$ in the cylindrical problem.
+
+\begin{table}
+\centering
+\begin{tabular}{|c|c|c|c|c|}\hline
+& \multicolumn{2}{c|}{FMG(1,1)}
+& \multicolumn{2}{c|}{FMG(2,1)} \\  \cline{2-5}
+ $N$  & $\|e\|_2$  & $\|e\|_2/\|e\|_d$ & $\|e\|_2$  & $\|e\|_2/\|e\|_d$ \\ \hline
+    4 & 1.011E-01 & 0.968 & 1.012E-01 & 0.969 \\
+    8 & 7.781E-02 & 1.031 & 7.679E-02 & 1.018 \\
+   16 & 3.332E-02 & 1.310 & 2.808E-02 & 1.104 \\
+   32 & 1.516E-03 & 1.421 & 1.098E-03 & 1.030 \\
+   64 & 5.168E-05 & 1.443 & 3.652E-05 & 1.019 \\
+  128 & 2.012E-06 & 1.109 & 1.818E-06 & 1.002 \\
+  256 & 1.125E-07 & 1.053 & 1.069E-07 & 1.001 \\
+  512 & 6.819E-09 & 1.037 & 6.576E-09 & 1.000 \\
+ 1024 & 4.224E-10 & 1.032 & 4.093E-10 & 1.000 \\
+ 2048 & 2.634E-11 & 1.031 & 2.556E-11 & 1.000 \\
+\hline
+\end{tabular}
+\caption{The discretization errors $\|e\|_2$ obtained from a 
+  FMG$(\nu_1,\nu_2)$  sweep with $\nu_0=1$ for different grid sizes $N$. The columns
+  $\|e\|_2/\|e\|_d$ display their ratio with the discretization errors
+  obtained from a \emph{direct} solver. The cylindrical problem with
+  $m=22$ and $s=10$ using cubic Splines is considered here.} 
+\label{tab:fmg}
+\end{table}
+
+\section{Periodic Case}
+\subsection{Transfer operators}
+For periodic problems, we use \emph{periodic} Splines \cite{BSPLINES} which
+satisfy the periodic boundary condition
+$\Lambda^h_{i+N}(x)=\Lambda^h_i(x-Nh)$. As a result, both the expansion
+coefficients and the right hand sides are periodic
+with periodicity $N$ ($u^h_{i+N}=u^h_i$, $b^h_{i+N}=b^h_i$) and he rank
+of all matrices should be $N$ instead of $N+p$ as in the non-periodic case.
+
+The \emph{prolongation} matrix $\mathbf{P}^h_{2h}$ as given by
+(\ref{eq:prolongation}) are computed numerically and the results
+for $N=8$ are given below for linear, quadratic and cubic Splines.
+
+\begin{itemize}
+\item Linear Splines
+  \begin{equation}
+    \mathbf{P}^{h}_{2h} =
+    \left(
+    \begin{matrix}
+      1   & 0   & 0   & 0   \\
+      1/2 & 1/2 & 0   & 0   \\
+      0   & 1   & 0   & 0   \\
+      0   & 1/2 & 1/2 & 0   \\
+      0   & 0   & 1   & 0   \\
+      0   & 0   & 1/2 & 1/2 \\
+      0   & 0   & 0   & 1   \\
+      1/2 & 0   & 0   & 1/2 \\
+    \end{matrix}\right)
+  \end{equation}
+\item Quadratic Splines
+  \begin{equation}
+    \mathbf{P}^{h}_{2h} =
+    \left(
+    \begin{matrix}
+      3/4 & 1/4 & 0   & 0   \\
+      1/4 & 3/4 & 0   & 0   \\
+      0   & 3/4 & 1/4 & 0   \\
+      0   & 1/4 & 3/4 & 0   \\
+      0   & 0   & 3/4 & 1/4 \\
+      0   & 0   & 1/4 & 3/4 \\
+      1/4 & 0   & 0   & 3/4 \\
+      3/4 & 0   & 0   & 1/4 \\
+    \end{matrix}\right)
+  \end{equation}
+\item Cubic Splines
+  \begin{equation}
+    \mathbf{P}^{h}_{2h} =
+    \left(
+    \begin{matrix}
+      1/2 & 1/2 & 0   & 0   \\
+      1/8 & 3/4 & 1/8 & 0   \\
+      0   & 1/2 & 1/2 & 0   \\
+      0   & 1/8 & 3/4 & 1/8 \\
+      0   & 0   & 1/2 & 1/2 \\
+      1/8 & 0   & 1/8 & 3/4 \\
+      1/2 & 0   & 0   & 1/2 \\
+      3/4 & 1/8 & 0   & 1/8 \\
+    \end{matrix}\right)
+  \end{equation}
+\end{itemize}
+The restriction matrix is simply $\mathbf{R}^{2h}_h=
+(\mathbf{P}^{h}_{2h})^T$. Generalization for any other number of intervals $N$ should be
+straightforward.
+
+\subsection{Numerical Experiments}
+In order to test the grid transfer operators obtained above, the same
+second-order problem (\ref{eq:cartesian_problem}) but with the
+periodic boundary condition $u(x+1)=u(x)$ is considered. It should be
+noted that in that case, if $\sigma=0$, the problem is singular since
+the solution is not \emph{unique}! But we have observed that this
+problem can be avoided for a slightly non zero $\sigma$, 
+
+With $\sigma=0.01$ and $k=10$ and using linear and cubic Splines, we
+recover the same multigrid iterative performances shown in Table
+\ref{tab:cartesian1} obtained previously for non-periodic Dirichlet
+boundary conditions. Table \ref{tab:quad_splines} also shows similar
+iterative efficiencies for \emph{quadratic} non-periodic and periodic problems.
+
+The identity (\ref{eq:coarse_mat}) is numerically verified in all the
+cases considered.
+
+\begin{table}
+\centering
+\begin{tabular}{|c|c|c|c|c|c|c|c|c|}\hline
+ \multicolumn{9}{|c|}{ Cartesian problem with quadratic splines} \\ \hline
+   & \multicolumn{4}{c|}{ $N=128$}            
+   & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
+$V$-cycle  & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
+           $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
+  0 & 6.203E-02 &      & 7.164E-04 &      & 2.209E-02 &      & 7.164E-04 &      \\
+  1 & 8.114E-04 & 0.01 & 6.375E-06 & 0.01 & 1.003E-04 & 0.00 & 4.509E-07 & 0.00 \\
+  2 & 1.891E-05 & 0.02 & 6.079E-08 & 0.01 & 1.769E-06 & 0.02 & 8.061E-10 & 0.00 \\
+  3 & 1.103E-06 & 0.06 & 5.220E-08 & 0.86 & 7.018E-08 & 0.04 & 9.970E-11 & 0.12 \\
+  4 & 8.148E-08 & 0.07 & 5.220E-08 & 1.00 & 5.620E-09 & 0.08 & 9.958E-11 & 1.00 \\
+  5 & 6.368E-09 & 0.08 & 5.220E-08 & 1.00 & 4.772E-10 & 0.08 & 9.958E-11 & 1.00 \\
+  6 & 4.969E-10 & 0.08 & 5.220E-08 & 1.00 & 4.101E-11 & 0.09 & 9.958E-11 & 1.00 \\
+  7 & 3.874E-11 & 0.08 & 5.220E-08 & 1.00 & 3.548E-12 & 0.09 & 9.958E-11 & 1.00 \\
+  8 & 3.081E-12 & 0.08 & 5.220E-08 & 1.00 & 3.081E-13 & 0.09 & 9.958E-11 & 1.00 \\
+  9 & 2.489E-13 & 0.08 & 5.220E-08 & 1.00 & 2.690E-14 & 0.09 & 9.958E-11 & 1.00 \\
+ 10 & 1.986E-14 & 0.08 & 5.220E-08 & 1.00 & 3.212E-15 & 0.12 & 9.958E-11 & 1.00 \\
+\hline \hline
+\multicolumn{9}{|c|}{ Periodic problem with quadratic splines} \\ \hline
+   & \multicolumn{4}{c|}{ $N=128$}            
+   & \multicolumn{4}{c|}{ $N=1024$} \\ \cline{2-9}
+$V$-cycle & $\|r\|_2$ & ratio & $\|e\|_2$ & ratio & 
+           $\|r\|_2$ & ratio & $\|e\|_2$ & ratio \\ \hline
+  0 & 6.203E-02 &      & 7.164E-04 &      & 2.209E-02 &      & 7.164E-04 &      \\
+  1 & 1.285E-03 & 0.02 & 1.294E-05 & 0.02 & 3.116E-04 & 0.01 & 5.862E-06 & 0.01 \\
+  2 & 7.878E-05 & 0.06 & 6.569E-07 & 0.05 & 2.893E-05 & 0.09 & 1.626E-07 & 0.03 \\
+  3 & 6.573E-06 & 0.08 & 6.511E-08 & 0.10 & 2.691E-06 & 0.09 & 5.631E-09 & 0.03 \\
+  4 & 5.681E-07 & 0.09 & 5.224E-08 & 0.80 & 2.385E-07 & 0.09 & 2.400E-10 & 0.04 \\
+  5 & 4.890E-08 & 0.09 & 5.219E-08 & 1.00 & 2.097E-08 & 0.09 & 9.997E-11 & 0.42 \\
+  6 & 4.198E-09 & 0.09 & 5.219E-08 & 1.00 & 1.828E-09 & 0.09 & 9.958E-11 & 1.00 \\
+  7 & 3.607E-10 & 0.09 & 5.219E-08 & 1.00 & 1.584E-10 & 0.09 & 9.958E-11 & 1.00 \\
+  8 & 3.103E-11 & 0.09 & 5.219E-08 & 1.00 & 1.370E-11 & 0.09 & 9.958E-11 & 1.00 \\
+  9 & 2.674E-12 & 0.09 & 5.219E-08 & 1.00 & 1.184E-12 & 0.09 & 9.958E-11 & 1.00 \\
+ 10 & 2.307E-13 & 0.09 & 5.219E-08 & 1.00 & 1.025E-13 & 0.09 & 9.958E-11 & 1.00 \\\hline
+\end{tabular}
+\caption{The multigrid $V(1,1)$ performance with Gauss-Seidel relation
+  for \emph{Cartesian} problem ($k=10$, $\sigma=0$) and
+  \emph{periodic} problem ($k=10$, $\sigma=0.01$), discretized on a grid with
+  $N=128$ and $1024$ intervals, using quadratic B-splines. For
+  both grid sizes, a total of 6 grid levels were considered.}
+\label{tab:quad_splines}
+\end{table}
+
+\section{Conclusion}
+Using the variational principle, we have derived the expressions of
+the grid transfer matrices for Finite Elements using Splines of any
+order. It is found that:
+
+\begin{itemize}
+\item The grid transfer matrices do not depend of the geometries
+  characterized by the Jacobian as defined in  $dV = x^\alpha dx$.
+\item The standard grid transfer operator used for first order finite
+  difference (FD) discretization for Cartesian geometry is recovered when
+  linear Spline finite elements (FE) are used.
+\item Applying these transfer matrices, we have solved Cartesian, 
+  cylindrical as well as periodic one dimensional
+  problems, and obtained essentially the same multigrid iterative performances
+  as found for standard first order FD Cartesian problems. 
+\item No performance \emph{degradation} is observed when the order of Splines FE
+  is increased from 1 to 3, or when the cylindrical geometry is considered.
+\end{itemize}
+
+For two dimensional problems, notice that for both Cartesian ($dV=dxdy$) and
+standard curvilinear geometries ($dV=r^\alpha drd\theta$), the
+Jacobian is \emph{separable}. Using this property, one can show that
+the two dimensional grid transfer consists of simply applying
+successively one dimensional grid transfer on each of the $x$ and
+$y$ (or $r$ and $\theta$) grids. With the solution
+$\mathbf{u}^h=[u^h_{ij}]$  and right hand side
+$\mathbf{b}^h=[b^h_{ij}]$ defined by
+\begin{equation}
+  u(x,y) = \sum_{ij} u^h_{ij}\Lambda^h_i(x)\Lambda^h_j(y), \qquad
+  b^h_{ij} = \int dx\Lambda^h_i(x)\int dy\Lambda^h_j(y)f(x,y), 
+\end{equation}
+the two dimension grid transfers can be expressed as (see Appendix \ref{sec:twod})
+\begin{equation}
+  \begin{split}
+  \mathbf{u}^h &= {_{x}\mathbf{P}^h_{2h}}\; \mathbf{u}^{2h}
+                  \left(_{y}\mathbf{P}^h_{2h}\right)^T, \\    
+  \mathbf{b}^{2h} &={_{x}\mathbf{R}^{2h}_{h}}\; \mathbf{b}^{h}
+                  \left(_{y}\mathbf{R}^{2h}_{h}\right)^T. \\    
+  \end{split}
+\end{equation}
+
+For more general curvilinear coordinates such as found in tokamak
+magnetic coordinates defined by $dV=J(s,\theta)dsd\theta$, we will
+assume that the grid transfer operators derived above are still
+applicable. The validity of this assumption will be the object of the
+next task.
+
+
+\appendix
+\section{Multigrid Cost Estimation}
+\label{sec:cost}
+Assuming that the \emph{coarsest} grid is fixed to $2$, the total
+number of grid levels $L$ is given by $N/2^{L-1}=2$ or $L=\log_2(N)$, where $N$
+is the number of intervals in the \emph{finest grid}. Since both
+\emph{relaxation} and intergrid transfer are proportional to the
+number of problem unknowns, the cost of the
+$V$-cycle can be estimated as:
+\begin{equation}
+  \begin{split}
+   \mbox{MG}(N) &= c\left[ (N+p)+(N/2+p) +\ldots + (N/2^{L-2}+p) \right] \\
+       &= c \left[ 2N-4 +(L-1)p  \right] ,\\
+  \end{split}
+\end{equation}
+where $p$ is the order of Splines used for the discretization. The FMG
+can then be deduced, assuming $\nu_0=1$ as
+\begin{equation}
+  \begin{split}
+    \mbox{FMG}(N) &= \mbox{MG}(N) +\mbox{MG}(N/2) + \ldots + \mbox{MG}(N/2^{L-2})\\
+                  &= c\left[ 4N-8 +(L-1)(pL/2-4) \right] .\\
+  \end{split}
+\end{equation}
+As expected a single FMG cycle (with $\nu_0=1$) costs about two  $V$-cycles.
+
+\section{Two dimensional Grid Transfer}
+\label{sec:twod}
+On the fine and the coarse grids, the problem solution $u(x,y)$ can be written
+as:
+\begin{equation*}
+  u(x,y) = \sum_{i'j'} u^h_{i'j'}\Lambda^h_{i'}(x)\Lambda^h_{j'}(y) 
+  = \sum_{i'j'} u^{2h}_{i'j'}\Lambda^{2h}_{i'}(x)\Lambda^{2h}_{j'}(y).
+\end{equation*}
+Projecting these two expansions on the two dimensional basis
+functions $\Lambda^h_{i}(x)\Lambda^h_{j}(y)$ yields
+
+  \begin{gather*}
+    \sum_{i'j'} u^h_{i'j'}
+    \underbrace{\int dx\Lambda^h_{i}(x)\Lambda^h_{i'}(x)}_{M^{h,h}_{ii'}}
+    \underbrace{\int dy\Lambda^h_{j}(x)\Lambda^h_{j'}(y)}_{N^{h,h}_{jj'}}
+    =  \sum_{i'j'} u^{2h}_{i'j'}
+    \underbrace{\int dx\Lambda^h_{i}(x)\Lambda^{2h}_{i'}(x)}_{M^{h,2h}_{ii'}}
+    \underbrace{\int
+      dy\Lambda^h_{j}(x)\Lambda^{2h}_{j'}(y)}_{N^{h,2h}_{jj'}} \\
+     \Longrightarrow \quad
+    \mathbf{M}^{h,h}\;\mathbf{u}^h\;\left(\mathbf{N}^{h,h}\right)^T =
+    \mathbf{M}^{h,2h}\mathbf{u}^{2h}\left(\mathbf{N}^{h,2h}\right)^T \\
+     \Longrightarrow \quad
+    \mathbf{u}^h = \left(\mathbf{M}^{h,h}\right)^{-1}
+    \mathbf{M}^{h,2h}\;\mathbf{u}^{2h}\;\left[\left(\mathbf{N}^{h,h}\right)^{-1}\mathbf{N}^{h,2h}\right]^T.
+  \end{gather*}
+
+The right hand side can be written on the fine and coarse grids as
+\begin{equation*}
+  \begin{split}
+  b^{h}_{ij} = \int dx\Lambda^{h}_i(x)\int dy\Lambda^{h}_j(y)\;f(x,y)
+  = \sum_{i'j'} M^{h,h}_{ii'}f^{h}_{i'j'}N^{h,h}_{jj'} \quad\Longrightarrow\quad
+  \mathbf{b}^h &= \mathbf{M}^{h,h}\;\mathbf{f}^h\left(\mathbf{N}^{h,h}\right)^{T}, \\
+  b^{2h}_{ij} = \int dx\Lambda^{2h}_i(x)\int dy\Lambda^{2h}_j(y)\;f(x,y)
+  = \sum_{i'j'} M^{2h,h}_{ii'}f^{h}_{i'j'}N^{2h,h}_{jj'} \quad\Longrightarrow\quad
+  \mathbf{b}^{2h} &= \mathbf{M}^{2h,h}\;\mathbf{f}^h\left(\mathbf{N}^{2h,h}\right)^{T}, \\
+  \end{split}
+\end{equation*}
+where the expansion of $f(x,y)$ on the \emph{fine} mesh has been
+used. Elimination of $\mathbf{f}^h$ then yields
+\begin{equation*}
+   \mathbf{b}^{2h} =
+   \mathbf{M}^{2h,h}\left(\mathbf{M}^{h,h}\right)^{-1} \;
+   \mathbf{b}^h\;
+   \left[\mathbf{N}^{2h,h}\left(\mathbf{N}^{h,h}\right)^{-1} \right]^T.
+\end{equation*}
+
+\begin{thebibliography}{99}
+  \bibitem{SOLVERS} {\tt The Solvers in BSPLINES}, 
+    \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/solvers.pdf}
+  \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide, 
+    \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/bsplines.pdf}
+  \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A
+    Multigrid Tutorial, Second Edition, Siam (2000)}.
+\end{thebibliography}
+\end{document}
+
+
diff --git a/multigrid/docs/multigrid_2d.pdf b/multigrid/docs/multigrid_2d.pdf
new file mode 100644
index 0000000..77333fc
Binary files /dev/null and b/multigrid/docs/multigrid_2d.pdf differ
diff --git a/multigrid/docs/multigrid_2d.tex b/multigrid/docs/multigrid_2d.tex
new file mode 100644
index 0000000..a373364
--- /dev/null
+++ b/multigrid/docs/multigrid_2d.tex
@@ -0,0 +1,425 @@
+%
+% @file multigrid_2d.tex
+%
+% @brief
+%
+% @copyright
+% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+% SPC (Swiss Plasma Center)
+%
+% spclibs is free software: you can redistribute it and/or modify it under
+% the terms of the GNU Lesser General Public License as published by the Free
+% Software Foundation, either version 3 of the License, or (at your option)
+% any later version.
+%
+% spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+%
+% You should have received a copy of the GNU Lesser General Public License
+% along with this program. If not, see <https://www.gnu.org/licenses/>.
+%
+% @authors
+% (in alphabetical order)
+% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+%
+\documentclass[a4paper]{article}
+\usepackage{linuxdoc-sgml}
+\usepackage{graphicx}
+\usepackage{hyperref}
+\usepackage{amsmath}
+
+\title{Multigrid for Finite Elements using Splines.}
+\author{Trach-Minh Tran, Stephan Brunner}
+\date{v0.1, January 2013}
+\abstract{A multigrid formulation for finite elements is
+  derived, using variational principles. More specifically the grid
+  transfer operators will be derived and tested in 2D Cartesian and
+  cylindrical geometry for arbitrary order B-Splines.}
+
+\begin{document}
+\maketitle
+\tableofcontents
+
+\section{The Model Problems}
+\subsection{Cartesian Geometry}
+The following second-order boundary value problem is considered
+
+\begin{equation}
+  \label{eq:cartesian_problem}
+  \begin{split}
+  -\left[\frac{\partial^2}{\partial x^2} 
+  +\frac{\partial^2}{\partial y^2} \right] u(x,y)  &=  f(x,y) \qquad 0\le x\le 1,\quad 0\le y\le 1 \\
+  u(0,y) = u(1,y) &= u(x,0) = u(x,1) = 0.
+  \end{split}
+\end{equation}
+By choosing
+\begin{equation*}
+  f(x,y) = \sin (\pi k_xx + \pi k_yy),
+\end{equation*}
+where $k_x$ and $k_y$ are integers, the solution of the BVP is simply
+\begin{equation*}
+  u(x,y) = \frac{\sin(\pi k_xx+\pi k_yy)}{\pi^2(k_x^2+k_y^2)}.
+\end{equation*}
+Using a weak formulation on Eq.(\ref{eq:cartesian_problem}) and a grid
+of $N_x\times N_y$ intervals, one obtains the following discretized
+linear system
+\begin{equation}
+  \sum_{i'=1}^{N_x+p}\sum_{j'=1}^{N_y+p}A_{iji'j'}u_{i'j'} = b_{ij},
+  \qquad i=1,\ldots,N_x+p,\quad j=1,\ldots,N_y+p,
+\end{equation}
+where the unknowns $u_{ij}$ are the Spline (of order $p$) expansion
+coefficients of the solution
+\begin{equation}
+  u(x,y) = \sum_{i=1}^{N_x+p}\sum_{j=1}^{N_y+p}u_{ij}\Lambda_i(x)\Lambda_j(y),
+\end{equation}
+and the matrix $A$ and right hand side $b$ are determined from
+\begin{align}
+  A_{iji'j'} &= \int^1_0\int^1_0 dxdy
+  \left[\Lambda'_{i'}(x)\Lambda_{j'}(y)\Lambda'_i(x)\Lambda_j(y) + 
+    \Lambda_{i'}(x)\Lambda'_{j'}(y)\Lambda_i(x)\Lambda'_j(y) \right],  \\
+  b_{ij} &=  \int^1_0\int^1_0 dxdy\Lambda_i(x)\Lambda_j(y)f(x,y).
+\end{align}
+Note that using a Gauss quadrature with $\lceil(2p+1)/2\rceil$ points
+per interval to calculate the matrix $A$ would yield an exact integration.
+
+\subsection{Cylindrical Geometry}
+The following second-order boundary value problem is considered:
+\begin{equation}
+  \label{eq:cylindrical_problem}
+  \begin{split}
+  -\left[\frac{1}{r}\frac{\partial}{\partial r}r\frac{\partial}{\partial r} 
+  + \frac{1}{r^2} \frac{\partial^2}{\partial\theta^2}
+   \right]u(r,\theta) &= f(r,\theta) \qquad 0\le r\le 1,\quad
+  0 \le \theta < 2\pi \\
+    u(1,\theta) &= 0, \\
+  \end{split}
+\end{equation}
+By choosing
+\begin{equation*}
+  f(r,\theta) = j^2_{ms} J_{m}(j_{ms}r)\cos(m\theta),
+\end{equation*}
+where $m$ is an integer and $j_{ms}$, the $s^{th}$ zero of $J_{m}$,
+the solution of this BVP is
+\begin{equation*}
+u(r,\theta) = J_{m}(j_{ms}r)\cos(m\theta).
+\end{equation*}
+Using a weak formulation on Eq.(\ref{eq:cylindrical_problem})and a grid
+of $N_r\times N_\theta$ intervals, one obtains the following discretized
+linear system
+\begin{equation}
+  \sum_{i'=1}^{N_r+p}\sum_{j'=1}^{N_\theta}A_{iji'j'}u_{i'j'} = b_{ij},
+  \qquad i=1,\ldots,N_r+p,\quad j=1,\ldots,N_\theta,
+\end{equation}
+where the unknowns $u_{ij}$ are the Spline (of order $p$) expansion
+coefficients of the solution
+\begin{equation}
+  u(r,\theta) = \sum_{i=1}^{N_r+p}\sum_{j=1}^{N_\theta}u_{ij}\Lambda_i(r)\Lambda_j(\theta),
+\end{equation}
+and the matrix $A$ and right hand side $b$ are determined from
+\begin{align}
+  A_{iji'j'} &= \int^1_0\int^{2\pi}_0 rdrd\theta
+  \left[\Lambda'_{i'}(r)\Lambda_{j'}(\theta)\Lambda'_i(r)\Lambda_j(\theta) + 
+    \frac{1}{r^2}
+    \Lambda_{i'}(r)\Lambda'_{j'}(\theta)\Lambda_i(r)\Lambda'_j(\theta)
+    \right],  \\
+  b_{ij} &=  \int^1_0\int^{2\pi}_0 rdrd\theta\Lambda_i(r)\Lambda_j(\theta)f(r,\theta).
+\end{align}
+Note that $A$ has an $1/r$ singularity in the
+integrand. For $m\neq0$, this should not be problematic since the
+converged solution behaves as $\sim r^m$ near $r=0$. The case $m=0$
+will be investigated numerically latter in this report, together withe
+the $m\neq 0$ case.
+
+\section{Restriction Operator}
+In the following, let us use the superscripts $h$ and $2h$ to denote
+quantities defined respectively on a \emph{fine} ($N_x\times N_y$
+or $N_r\times N_\theta$) and a \emph{coarser} ($N_x/2\times N_y/2$
+or $N_r/2\times N_\theta/2$) grid.
+
+The two grid transfers required in the standard \emph{multigrid}
+\cite{MG1D,Briggs} are:
+\begin{enumerate}
+\item the \emph{restriction} of the right hand side: $\mathbf{b}^{h}
+  \longrightarrow \mathbf{b}^{2h}$ and 
+\item the \emph{prolongation} of the solution: $\mathbf{u}^{2h}
+  \longrightarrow \mathbf{u}^{h}$.
+\end{enumerate}
+
+Noting that the basis functions $\Lambda^{2h}_i(x)$, which are \emph{piecewise}
+$C^{p-1}$ polynomials with \emph{breaks} on the \emph{coarse} grid
+points $x^{2h}_k=(2h)k$ can be also considered as \emph{piecewise}
+$C^{p-1}$ polynomials with \emph{breaks} on the \emph{fine} grid
+$x^h_k=kh$, they can be expressed \emph{uniquely} as a linear
+combination of the \emph{fine} grid basis functions:
+
+\begin{equation}
+  \label{eq:basis_transf}
+  \Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}c_{ii'}\Lambda^h_{i'}(x), \quad
+  i=1,\ldots,N/2+p.
+\end{equation}
+The (rectangular) matrix $c_{ii'}$ can be identified as the
+one-dimensional \emph{restriction}  $\mathbf{R}$ since
+\begin{equation*}
+  b^{2h}_i = \int_0^1 dx f(x)\Lambda^{2h}_i(x) =
+  \sum^{N+p}_{i'=1}c_{ii'}\;b^h_{i'} =
+  \sum^{N+p}_{i'=1}R_{ii'}\;b^h_{i'}.
+\end{equation*}
+
+It can be computed by simply projecting Eq.(\ref{eq:basis_transf})
+on the fine grid basis function $\Lambda^h_{j}(x)$ \cite{MG1D}:
+\begin{equation}
+  \sum^{N+p}_{i'=1}R_{ii'}\underbrace{\int_0^1 dx
+  \Lambda^h_{i'}(x)\Lambda^h_{j}(x)}_{M^h_{i'j}} = \underbrace{\int_0^1 dx
+  \Lambda^{2h}_i(x)\Lambda^h_{j}(x)}_{M^{2h,h}_{i'j}} \Longrightarrow
+  \mathbf{R}=\mathbf{M}^{2h,h}\cdot(\mathbf{M}^{h})^{-1}.
+\end{equation}
+
+It should be stressed that the representation for $\Lambda^{2h}_i(x)$
+in  Eq.(\ref{eq:basis_transf}) is \emph {unique}. This is
+checked by verifying that the same matrix $R_{ii'}$ is obtained using
+for example the \emph{collocation} methods. One such method, which is used for
+this check is detailed in Appendix \ref{sec:colloc}. The calculated
+grid transfer matrices for linear, quadratic and cubic periodic and
+non-periodic Splines are given in \cite{MG1D}. 
+
+Denoting the restriction on $x$ and $y$ respectively
+by $\mathbf{R}^x$ and $\mathbf{R}^y$, the two-dimensional restriction of $b^h_{ij}$ is
+defined as
+\begin{equation*}
+   b^{2h}_{ij} = \int_0^1\int_0^1 dxdy f(x,y)\Lambda^{2h}_i(x)\Lambda^{2h}_j(y) =
+  \sum^{N+p}_{i'=1}\sum^{N+p}_{j'=1}R^x_{ii'}R^y_{jj'}b^h_{i'j'},
+\end{equation*}
+and thus
+\begin{equation}
+  \label{eq:restriction}
+  \boxed{\mathbf{b}^{2h} = \mathbf{R}^x \cdot \mathbf{b}^{h}
+  \cdot (\mathbf{R}^y)^T.}
+\end{equation}
+
+\section{Prolongation Operator}
+Using Eq.(\ref{eq:basis_transf}) (with $c_{ii'}=R_{ii'}$), the
+solution at the coarse grid can be expressed as
+\begin{equation*}
+  u^{2h}(x) = \sum_{i=1}^{N/2+p}u^{2h}_{i}\Lambda^{2h}_{i}(x) = 
+  \sum_{i'=1}^{N+p}\left[\sum_{i=1}^{N/2+p} R_{ii'}u^{2h}_{i}\right]
+  \Lambda^h_{i'}(x) =
+  \sum_{i'=1}^{N+p}\underbrace{\left[\sum_{i=1}^{N/2+p} (R)^T_{i'i}u^{2h}_{i}\right]}_{\tilde{u}^h_{i'}}
+  \Lambda^h_{i'}(x),
+\end{equation*}
+from which one obvious choice for the \emph{prolongation} operator would be
+\begin{equation}
+  \mathbf{P} = \mathbf{R}^T = (\mathbf{M}^{h})^{-1}\cdot\mathbf{M}^{h,2h}.
+\end{equation}
+
+Generalization to a two-dimensional prolongation is obtained as
+follows, where summation over repeated indices is assumed:
+\begin{equation*}
+  u^{2h}(x,y) = u^{2h}_{ij}\Lambda^{2h}_{i}(x)\Lambda^{2h}_{j}(y) =
+  \left[R^x_{ii'}u^{2h}_{ij}R^y_{jj'}\right]\Lambda^{h}_{i'}(x)\Lambda^{h}_{j'}(y)
+\end{equation*}
+which leads to the prolonged solution $\tilde{\mathbf{u}}^{h}$ given by
+\begin{equation}
+  \label{eq:prolongation}
+  \boxed{\tilde{\mathbf{u}}^{h} = \mathbf{P}^x \cdot \mathbf{u}^{2h} \cdot
+  (\mathbf{P}^y)^T .}
+\end{equation}
+
+It should be noted here that, while the restricted right hand side
+$\mathbf{b}^{2h}$ as defined in Eq.(\ref{eq:restriction}) is
+\emph{exactly identical} to the assembled right hand side, the
+prolonged solution $\tilde{\mathbf{u}}^{h}$ defined in
+Eq.(\ref{eq:prolongation}) is just a
+representation of $u^{2h}(x,y)$ on the fine mesh and \emph{not} the
+solution $u^h(x,y)$ which can only be obtained by solving the problem
+on the fine mesh!
+
+\section{Numerical Experiments}
+The multigrid performance can be characterized by looking at the
+convergence of the residual Euclidean norm for the linear system 
+$\mathbf{A}\mathbf{u}=\mathbf{b}$:
+
+\begin{equation}
+\label{eq:resid}
+\|\mathbf{r}\|_2 = \|\mathbf{b}-\mathbf{A}\mathbf{u}\|_2.
+\end{equation}
+When the \emph{exact} solution $u(x,y)$ is known, the \emph{discretization error}
+can defined as
+\begin{equation}
+\label{eq:discerr}
+  \|e\|_2 = \sqrt{\int dV\left[\sum_{ij}u_{ij}\Lambda_{ij}(x,y)-u(x,y)\right]^2}
+\end{equation}
+and computed using a Gauss quadrature. Note that for Splines of order
+$p$, $\|e(x,y)\|_2(h)$ converges to zero as $O(h^{p+1})$. 
+
+\subsection{Cartesian Geometry}
+The multigrid performances for varying problem sizes are displayed in
+Fig.(\ref{fig:linear_mg2d}) for linear Splines and
+Fig.(\ref{fig:cubic_mg2d}) for cubic Splines. They show that the number
+of iterations required for convergence (abount 3 for both linear and cubic
+Splines) is insensitive to the problem sizes. Compared to direct
+methods, the multigrid should scale much better for large problem
+sizes, as indicated in Table~\ref{tab:comparison1}. For this model
+problem, using cubic Splines seems to converge 
+slightly faster than linear Splines!
+
+\begin{figure}
+  \centering
+  \includegraphics[angle=0,width=\hsize]{linear_mg2d}
+  \caption{Performance of the multigrid $V(2,1)$ scheme using a
+    Gauss-Seidel relaxation and \emph{linear Splines} for different
+    problem sizes. The size of the \emph{coarsest} grid  is $2\times 2$.}
+  \label{fig:linear_mg2d}
+\end{figure}
+
+\begin{figure}
+  \centering
+  \includegraphics[angle=0,width=\hsize]{cubic_mg2d}
+  \caption{Performance of the multigrid $V(2,1)$ scheme using a
+    Gauss-Seidel relaxation and \emph{cubic Splines} for different
+    problem sizes. The size of the \emph{coarsest} grid  is $2\times 2$.}
+  \label{fig:cubic_mg2d}
+\end{figure}
+
+\begin{table}[htb]
+  \centering
+  \begin{tabular}{|c|c|c|c|c|c|}\hline
+    & \multicolumn{2}{c|}{Linear Splines}
+    & \multicolumn{2}{c|}{Cubic Splines} \\ \hline
+$N$ & $V(2,1)$ & Direct & $V(2,1)$ & Direct \\ \hline
+ 16 & 8.844E-04 & 2.051E-03 & 2.653E-03 & 3.970E-03 \\
+ 32 & 1.661E-03 & 5.345E-03 & 4.983E-03 & 1.540E-02 \\
+ 64 & 5.766E-03 & 2.054E-02 & 1.730E-02 & 7.492E-02 \\
+128 & 2.347E-02 & 3.288E-01 & 7.042E-02 & 1.060E+00 \\
+\hline
+  \end{tabular}
+  \caption{Times (in seconds) used by a the \emph{direct sparse} solver MUMPS-4.10.0
+    for different problem sizes versus the times used by \emph{three}
+    multigrid $V(2,1)$ cycles. The Intel Fortran-13.0 compiler is used on an Intel
+    i7 platform.}
+\label{tab:comparison1}
+\end{table}
+
+The effects of the relaxation parameters $\nu_1,\nu_2$ on the
+multigrid performnace (Fig.(\ref{fig:cubic_mg2d_relax})) indicates
+that only a few relaxations are sufficient to achieve a good multigrid
+performance. Further analysis of the computational cost is required
+however to determine the \emph{optimal} $\nu_1,\nu_2$.
+
+Finally, the effects of the number of grid levels are analyzed in
+Fig.(\ref{fig:cubic_mg2d_levels}). In addition to the computational
+cost (see Table~\ref{tab:comparison2}), the memory required for the
+\emph{direct  solver} at the coarsest grid level should be taken into
+account for  the choice of the optimal number of grid levels,
+especially for very large problems.
+
+\begin{figure}
+  \centering
+  \includegraphics[angle=0,width=\hsize]{cubic_mg2d_relax}
+  \caption{Effect of the number of the relaxation sweeps $\nu_1,\nu_2$
+    on the performance of the multigrid $V(\nu_1,\nu_2)$-cycle for
+    \emph{Cubic Splines}. The finest grid has $128\times 128$
+    intervals.}
+  \label{fig:cubic_mg2d_relax}
+\end{figure}
+
+\begin{figure}
+  \centering
+  \includegraphics[angle=0,width=\hsize]{cubic_mg2d_levels}
+  \caption{Effect of the number grid levels
+    on the performance of the multigrid $V(2,1)$-cycle for \emph{Cubic
+      Splines}. The finest grid has $128\times 128$ intervals.}
+  \label{fig:cubic_mg2d_levels}
+\end{figure}
+
+\begin{table}[htb]
+  \centering
+  \begin{tabular}{|c|c|c|c|}\hline
+Number of levels & $V(1,0)$ &  $V(1,1)$ & $V(2,1)$ \\ \hline
+  2 & 3.386E-02 & 3.881E-02 & 4.031E-02 \\
+  3 & 2.923E-02 & 3.398E-02 & 3.605E-02 \\
+  4 & 2.880E-02 & 3.275E-02 & 3.595E-02 \\
+  7 & 2.912E-02 & 3.236E-02 & 3.566E-02 \\
+\hline
+  \end{tabular}
+  \caption{Effects of the times in seconds used per $V$-cyclefor
+    different number of grid levels and relaxation paramters for a
+    $128\times 128$ problem. The Intel Fortran-13.0 compiler is used on an Intel
+    i7 platform.}
+\label{tab:comparison2}
+\end{table}
+
+
+
+\subsection{Cylindrical Geometry}
+
+\newpage
+\appendix
+
+\section{Grid transfer matrix by collocation}
+\label{sec:colloc}
+Let first consider the \emph{periodic case}. Denoting $N$ as the
+number of intervals of the fine grid, the \emph{periodic} Spline basis
+functions on the \emph{coarse} grid $\Lambda^{2h}_i$ can be expressed
+as linear combinations of the \emph{fine} grid Spline basis functions
+as:
+\begin{equation}
+  \Lambda^{2h}_i(x) = \sum^{N}_{i'=1}R_{ii'}\Lambda^h_{i'}(x), \quad
+  i=1,\ldots,N/2.
+\end{equation}
+For any given $i$, the coefficients $R_{ii'}$ can be calculated by
+expressing the relation above on exactly $N$ points on the $x$-grid. For
+\emph{odd} Spline order $p$, these \emph{collocation} (or
+interpolating) points can be chosen as the \emph{break} points of the fine
+grid $x^h_k,\quad k=0,\ldots,N-1$. For \emph{even} values of $p$, the
+collocation points should be $x^h_{k+1/2}=(x^h_{k}+x^h_{k+1})/2$ in
+order to obtain a non-singular linear system of equations 
+\cite{BSPLINES}. The resulting system of equations to solve for
+$R_{ii'}$ are given below: 
+\begin{equation}
+  \begin{split}
+    p\mbox{ odd}: \qquad &\sum^{N}_{i'=1}\Lambda^h_{i'}(x^h_k)\,R_{ii'} =
+    \Lambda^{2h}_i(x^h_k), \qquad k=0,\ldots,N-1,\quad i=1,\ldots,N/2, \\
+    p\mbox{ even}: \qquad &\sum^{N}_{i'=1}\Lambda^h_{i'}(x^h_{k+1/2})\,R_{ii'} =
+    \Lambda^{2h}_i(x^h_{k+1/2}), \qquad k=0,\ldots,N-1,\quad i=1,\ldots,N/2.\\
+  \end{split}
+\end{equation}
+
+For \emph{non-periodic} Splines, there are $N+p$ and
+$N/2+p$ basis functions respectively on the fine and coarse grid:
+\begin{equation}
+  \Lambda^{2h}_i(x) = \sum^{N+p}_{i'=1}R_{ii'}\Lambda^h_{i'}(x), \quad
+  i=1,\ldots,N/2+p.
+\end{equation}
+This implies that for any given $\Lambda^{2h}_i$, $N+p$ conditions
+are required to determined the $N+p$ terms of row $i$ of the matrix $R_{ii'}$. For odd $p$,
+$N+1$ collocation points $x_k,\quad k=0,\ldots,N$ can be used with the
+missing $p-1$ equations obtained by expressing all the $(p-1)/2$ derivatives
+of $\Lambda^{2h}_i(x)$ at the end points $x_0$ and $x_N$:
+\begin{equation}
+ \frac{d^\alpha}{dx^\alpha}\Lambda^{2h}_i(x) =
+ \sum^{N+p}_{i'=1}R_{ii'}\frac{d^\alpha}{dx^\alpha} \Lambda^h_{i'}(x)
+ , \quad \alpha=1,\dots,\frac{p-1}{2} \quad (\mbox{$p$ odd}).
+\end{equation}
+For \emph{even} $p$, in addition to the $N$ relations obtained with
+the collocation points $x_{k+1/2}$ (as in the \emph{periodic} case),
+the missing $p$ conditions can be obtained by expressing $\Lambda^{2h}_i$
+and its derivatives up to $p/2-1$ at the end points $x_0$ and $x_N$: 
+\begin{equation}
+ \frac{d^\alpha}{dx^\alpha}\Lambda^{2h}_i(x) =
+ \sum^{N+p}_{i'=1}R_{ii'}\frac{d^\alpha}{dx^\alpha} \Lambda^h_{i'}(x)
+ , \quad \alpha=0,\dots,\frac{p}{2}-1 \quad (\mbox{$p$ even}).
+\end{equation}
+
+\begin{thebibliography}{99}
+  \bibitem{MG1D} {\tt Multigrid Formulation for Finite Elements},\\ 
+    \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/multigrid/docs/multigrid.pdf}
+  \bibitem{Briggs} {W.L.~Briggs, V.E.~Henson and S.F.~McCormick, A
+    Multigrid Tutorial, Second Edition, Siam (2000)}.
+  \bibitem{BSPLINES} {\tt BSPLINES} Reference Guide, 
+    \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/bsplines.pdf}
+  \bibitem{SOLVERS} {\tt The Solvers in BSPLINES}, 
+    \url{https://crppsvn.epfl.ch/repos/bsplines/trunk/docs/solvers.pdf}
+\end{thebibliography}
+
+\end{document}
diff --git a/multigrid/docs/quad_splines.eps b/multigrid/docs/quad_splines.eps
new file mode 100644
index 0000000..1cc1190
--- /dev/null
+++ b/multigrid/docs/quad_splines.eps
@@ -0,0 +1,1081 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Creator: MATLAB, The MathWorks, Inc. Version 7.14.0.739 (R2012a). Operating System: Linux 2.6.27.56-0.1-default #1 SMP 2010-12-01 16:57:58 +0100 x86_64.
+%%Title: /home/ttran/multigrid/quad_splines.eps
+%%CreationDate: 09/18/2012  13:09:10
+%%DocumentNeededFonts: Helvetica
+%%DocumentProcessColors: Cyan Magenta Yellow Black
+%%LanguageLevel: 2
+%%Pages: 1
+%%BoundingBox:    44   189   549   653
+%%EndComments
+
+%%BeginProlog
+% MathWorks dictionary
+/MathWorks 160 dict begin
+% definition operators
+/bdef {bind def} bind def
+/ldef {load def} bind def
+/xdef {exch def} bdef
+/xstore {exch store} bdef
+% operator abbreviations
+/c  /clip ldef
+/cc /concat ldef
+/cp /closepath ldef
+/gr /grestore ldef
+/gs /gsave ldef
+/mt /moveto ldef
+/np /newpath ldef
+/cm /currentmatrix ldef
+/sm /setmatrix ldef
+/rm /rmoveto ldef
+/rl /rlineto ldef
+/s {show newpath} bdef
+/sc {setcmykcolor} bdef
+/sr /setrgbcolor ldef
+/sg /setgray ldef
+/w /setlinewidth ldef
+/j /setlinejoin ldef
+/cap /setlinecap ldef
+/rc {rectclip} bdef
+/rf {rectfill} bdef
+% page state control
+/pgsv () def
+/bpage {/pgsv save def} bdef
+/epage {pgsv restore} bdef
+/bplot /gsave ldef
+/eplot {stroke grestore} bdef
+% orientation switch
+/portraitMode 0 def /landscapeMode 1 def /rotateMode 2 def
+% coordinate system mappings
+/dpi2point 0 def
+% font control
+/FontSize 0 def
+/FMS {/FontSize xstore findfont [FontSize 0 0 FontSize neg 0 0]
+  makefont setfont} bdef
+/reencode {exch dup where {pop load} {pop StandardEncoding} ifelse
+  exch dup 3 1 roll findfont dup length dict begin
+  { 1 index /FID ne {def}{pop pop} ifelse } forall
+  /Encoding exch def currentdict end definefont pop} bdef
+/isroman {findfont /CharStrings get /Agrave known} bdef
+/FMSR {3 1 roll 1 index dup isroman {reencode} {pop pop} ifelse
+  exch FMS} bdef
+/csm {1 dpi2point div -1 dpi2point div scale neg translate
+ dup landscapeMode eq {pop -90 rotate}
+  {rotateMode eq {90 rotate} if} ifelse} bdef
+% line types: solid, dotted, dashed, dotdash
+/SO { [] 0 setdash } bdef
+/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef
+/DA { [6 dpi2point mul] 0 setdash } bdef
+/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4
+  dpi2point mul] 0 setdash } bdef
+% macros for lines and objects
+/L {lineto stroke} bdef
+/MP {3 1 roll moveto 1 sub {rlineto} repeat} bdef
+/AP {{rlineto} repeat} bdef
+/PDlw -1 def
+/W {/PDlw currentlinewidth def setlinewidth} def
+/PP {closepath eofill} bdef
+/DP {closepath stroke} bdef
+/MR {4 -2 roll moveto dup  0 exch rlineto exch 0 rlineto
+  neg 0 exch rlineto closepath} bdef
+/FR {MR stroke} bdef
+/PR {MR fill} bdef
+/L1i {{currentfile picstr readhexstring pop} image} bdef
+/tMatrix matrix def
+/MakeOval {newpath tMatrix currentmatrix pop translate scale
+0 0 1 0 360 arc tMatrix setmatrix} bdef
+/FO {MakeOval stroke} bdef
+/PO {MakeOval fill} bdef
+/PD {currentlinewidth 2 div 0 360 arc fill
+   PDlw -1 eq not {PDlw w /PDlw -1 def} if} def
+/FA {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arc tMatrix setmatrix stroke} bdef
+/PA {newpath tMatrix currentmatrix pop	translate 0 0 moveto scale
+  0 0 1 5 -2 roll arc closepath tMatrix setmatrix fill} bdef
+/FAn {newpath tMatrix currentmatrix pop translate scale
+  0 0 1 5 -2 roll arcn tMatrix setmatrix stroke} bdef
+/PAn {newpath tMatrix currentmatrix pop translate 0 0 moveto scale
+  0 0 1 5 -2 roll arcn closepath tMatrix setmatrix fill} bdef
+/vradius 0 def /hradius 0 def /lry 0 def
+/lrx 0 def /uly 0 def /ulx 0 def /rad 0 def
+/MRR {/vradius xdef /hradius xdef /lry xdef /lrx xdef /uly xdef
+  /ulx xdef newpath tMatrix currentmatrix pop ulx hradius add uly
+  vradius add translate hradius vradius scale 0 0 1 180 270 arc 
+  tMatrix setmatrix lrx hradius sub uly vradius add translate
+  hradius vradius scale 0 0 1 270 360 arc tMatrix setmatrix
+  lrx hradius sub lry vradius sub translate hradius vradius scale
+  0 0 1 0 90 arc tMatrix setmatrix ulx hradius add lry vradius sub
+  translate hradius vradius scale 0 0 1 90 180 arc tMatrix setmatrix
+  closepath} bdef
+/FRR {MRR stroke } bdef
+/PRR {MRR fill } bdef
+/MlrRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lry uly sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 90 270 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 270 90 arc tMatrix setmatrix
+  closepath} bdef
+/FlrRR {MlrRR stroke } bdef
+/PlrRR {MlrRR fill } bdef
+/MtbRR {/lry xdef /lrx xdef /uly xdef /ulx xdef /rad lrx ulx sub 2 div def
+  newpath tMatrix currentmatrix pop ulx rad add uly rad add translate
+  rad rad scale 0 0 1 180 360 arc tMatrix setmatrix lrx rad sub lry rad
+  sub translate rad rad scale 0 0 1 0 180 arc tMatrix setmatrix
+  closepath} bdef
+/FtbRR {MtbRR stroke } bdef
+/PtbRR {MtbRR fill } bdef
+/stri 6 array def /dtri 6 array def
+/smat 6 array def /dmat 6 array def
+/tmat1 6 array def /tmat2 6 array def /dif 3 array def
+/asub {/ind2 exch def /ind1 exch def dup dup
+  ind1 get exch ind2 get sub exch } bdef
+/tri_to_matrix {
+  2 0 asub 3 1 asub 4 0 asub 5 1 asub
+  dup 0 get exch 1 get 7 -1 roll astore } bdef
+/compute_transform {
+  dmat dtri tri_to_matrix tmat1 invertmatrix 
+  smat stri tri_to_matrix tmat2 concatmatrix } bdef
+/ds {stri astore pop} bdef
+/dt {dtri astore pop} bdef
+/db {2 copy /cols xdef /rows xdef mul dup 3 mul string
+  currentfile 
+  3 index 0 eq {/ASCIIHexDecode filter}
+  {/ASCII85Decode filter 3 index 2 eq {/RunLengthDecode filter} if }
+  ifelse exch readstring pop
+  dup 0 3 index getinterval /rbmap xdef
+  dup 2 index dup getinterval /gbmap xdef
+  1 index dup 2 mul exch getinterval /bbmap xdef pop pop}bdef
+/it {gs np dtri aload pop moveto lineto lineto cp c
+  cols rows 8 compute_transform 
+  rbmap gbmap bbmap true 3 colorimage gr}bdef
+/il {newpath moveto lineto stroke}bdef
+currentdict end def
+%%EndProlog
+
+%%BeginSetup
+MathWorks begin
+
+0 cap
+
+end
+%%EndSetup
+
+%%Page: 1 1
+%%BeginPageSetup
+%%PageBoundingBox:    44   189   549   653
+MathWorks begin
+bpage
+%%EndPageSetup
+
+%%BeginObject: obj1
+bplot
+
+/dpi2point 12 def
+portraitMode 0528 7836 csm
+
+    0     0  6068  5560 rc
+86 dict begin %Colortable dictionary
+/c0 { 0.000000 0.000000 0.000000 sr} bdef
+/c1 { 1.000000 1.000000 1.000000 sr} bdef
+/c2 { 0.900000 0.000000 0.000000 sr} bdef
+/c3 { 0.000000 0.820000 0.000000 sr} bdef
+/c4 { 0.000000 0.000000 0.800000 sr} bdef
+/c5 { 0.910000 0.820000 0.320000 sr} bdef
+/c6 { 1.000000 0.260000 0.820000 sr} bdef
+/c7 { 0.000000 0.820000 0.820000 sr} bdef
+c0
+1 j
+1 sg
+   0    0 6069 5561 rf
+6 w
+0 4531 4703 0 0 -4531 789 4948 4 MP
+PP
+-4703 0 0 4531 4703 0 0 -4531 789 4948 5 MP stroke
+4 w
+DO
+0 sg
+ 789 4948 mt  789  417 L
+ 789  417 mt  789  417 L
+2356 4948 mt 2356  417 L
+2356  417 mt 2356  417 L
+3924 4948 mt 3924  417 L
+3924  417 mt 3924  417 L
+5492 4948 mt 5492  417 L
+5492  417 mt 5492  417 L
+ 789 4948 mt 5492 4948 L
+5492 4948 mt 5492 4948 L
+ 789 4381 mt 5492 4381 L
+5492 4381 mt 5492 4381 L
+ 789 3815 mt 5492 3815 L
+5492 3815 mt 5492 3815 L
+ 789 3248 mt 5492 3248 L
+5492 3248 mt 5492 3248 L
+ 789 2682 mt 5492 2682 L
+5492 2682 mt 5492 2682 L
+ 789 2116 mt 5492 2116 L
+5492 2116 mt 5492 2116 L
+ 789 1549 mt 5492 1549 L
+5492 1549 mt 5492 1549 L
+ 789  983 mt 5492  983 L
+5492  983 mt 5492  983 L
+ 789  417 mt 5492  417 L
+5492  417 mt 5492  417 L
+SO
+6 w
+ 789 4948 mt 5492 4948 L
+ 789  417 mt 5492  417 L
+ 789 4948 mt  789  417 L
+5492 4948 mt 5492  417 L
+ 789 4948 mt 5492 4948 L
+ 789 4948 mt  789  417 L
+ 789 4948 mt  789 4924 L
+ 789  417 mt  789  440 L
+DO
+ 789 4948 mt  789  417 L
+ 789  417 mt  789  417 L
+SO
+ 789 4948 mt  789 4900 L
+ 789  417 mt  789  464 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+ 683 5160 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+ 843 5071 mt 
+(1) s
+1260 4948 mt 1260 4924 L
+1260  417 mt 1260  440 L
+DO
+1260 4948 mt 1260  417 L
+1260  417 mt 1260  417 L
+SO
+1536 4948 mt 1536 4924 L
+1536  417 mt 1536  440 L
+DO
+1536 4948 mt 1536  417 L
+1536  417 mt 1536  417 L
+SO
+1732 4948 mt 1732 4924 L
+1732  417 mt 1732  440 L
+DO
+1732 4948 mt 1732  417 L
+1732  417 mt 1732  417 L
+SO
+1884 4948 mt 1884 4924 L
+1884  417 mt 1884  440 L
+DO
+1884 4948 mt 1884  417 L
+1884  417 mt 1884  417 L
+SO
+2008 4948 mt 2008 4924 L
+2008  417 mt 2008  440 L
+DO
+2008 4948 mt 2008  417 L
+2008  417 mt 2008  417 L
+SO
+2113 4948 mt 2113 4924 L
+2113  417 mt 2113  440 L
+DO
+2113 4948 mt 2113  417 L
+2113  417 mt 2113  417 L
+SO
+2204 4948 mt 2204 4924 L
+2204  417 mt 2204  440 L
+DO
+2204 4948 mt 2204  417 L
+2204  417 mt 2204  417 L
+SO
+2284 4948 mt 2284 4924 L
+2284  417 mt 2284  440 L
+DO
+2284 4948 mt 2284  417 L
+2284  417 mt 2284  417 L
+SO
+2356 4948 mt 2356 4924 L
+2356  417 mt 2356  440 L
+DO
+2356 4948 mt 2356  417 L
+2356  417 mt 2356  417 L
+SO
+2356 4948 mt 2356 4900 L
+2356  417 mt 2356  464 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+2250 5160 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+2410 5071 mt 
+(2) s
+2828 4948 mt 2828 4924 L
+2828  417 mt 2828  440 L
+DO
+2828 4948 mt 2828  417 L
+2828  417 mt 2828  417 L
+SO
+3104 4948 mt 3104 4924 L
+3104  417 mt 3104  440 L
+DO
+3104 4948 mt 3104  417 L
+3104  417 mt 3104  417 L
+SO
+3300 4948 mt 3300 4924 L
+3300  417 mt 3300  440 L
+DO
+3300 4948 mt 3300  417 L
+3300  417 mt 3300  417 L
+SO
+3452 4948 mt 3452 4924 L
+3452  417 mt 3452  440 L
+DO
+3452 4948 mt 3452  417 L
+3452  417 mt 3452  417 L
+SO
+3576 4948 mt 3576 4924 L
+3576  417 mt 3576  440 L
+DO
+3576 4948 mt 3576  417 L
+3576  417 mt 3576  417 L
+SO
+3681 4948 mt 3681 4924 L
+3681  417 mt 3681  440 L
+DO
+3681 4948 mt 3681  417 L
+3681  417 mt 3681  417 L
+SO
+3772 4948 mt 3772 4924 L
+3772  417 mt 3772  440 L
+DO
+3772 4948 mt 3772  417 L
+3772  417 mt 3772  417 L
+SO
+3852 4948 mt 3852 4924 L
+3852  417 mt 3852  440 L
+DO
+3852 4948 mt 3852  417 L
+3852  417 mt 3852  417 L
+SO
+3924 4948 mt 3924 4924 L
+3924  417 mt 3924  440 L
+DO
+3924 4948 mt 3924  417 L
+3924  417 mt 3924  417 L
+SO
+3924 4948 mt 3924 4900 L
+3924  417 mt 3924  464 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+3818 5160 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+3978 5071 mt 
+(3) s
+4396 4948 mt 4396 4924 L
+4396  417 mt 4396  440 L
+DO
+4396 4948 mt 4396  417 L
+4396  417 mt 4396  417 L
+SO
+4672 4948 mt 4672 4924 L
+4672  417 mt 4672  440 L
+DO
+4672 4948 mt 4672  417 L
+4672  417 mt 4672  417 L
+SO
+4868 4948 mt 4868 4924 L
+4868  417 mt 4868  440 L
+DO
+4868 4948 mt 4868  417 L
+4868  417 mt 4868  417 L
+SO
+5020 4948 mt 5020 4924 L
+5020  417 mt 5020  440 L
+DO
+5020 4948 mt 5020  417 L
+5020  417 mt 5020  417 L
+SO
+5144 4948 mt 5144 4924 L
+5144  417 mt 5144  440 L
+DO
+5144 4948 mt 5144  417 L
+5144  417 mt 5144  417 L
+SO
+5249 4948 mt 5249 4924 L
+5249  417 mt 5249  440 L
+DO
+5249 4948 mt 5249  417 L
+5249  417 mt 5249  417 L
+SO
+5340 4948 mt 5340 4924 L
+5340  417 mt 5340  440 L
+DO
+5340 4948 mt 5340  417 L
+5340  417 mt 5340  417 L
+SO
+5420 4948 mt 5420 4924 L
+5420  417 mt 5420  440 L
+DO
+5420 4948 mt 5420  417 L
+5420  417 mt 5420  417 L
+SO
+5492 4948 mt 5492 4924 L
+5492  417 mt 5492  440 L
+DO
+5492 4948 mt 5492  417 L
+5492  417 mt 5492  417 L
+SO
+5492 4948 mt 5492 4900 L
+5492  417 mt 5492  464 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+5386 5160 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+5546 5071 mt 
+(4) s
+ 789 4948 mt  812 4948 L
+5492 4948 mt 5468 4948 L
+DO
+ 789 4948 mt 5492 4948 L
+5492 4948 mt 5492 4948 L
+SO
+ 789 4948 mt  836 4948 L
+5492 4948 mt 5444 4948 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+ 432 5001 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+ 592 4912 mt 
+(-11) s
+ 789 4777 mt  812 4777 L
+5492 4777 mt 5468 4777 L
+DO
+ 789 4777 mt 5492 4777 L
+5492 4777 mt 5492 4777 L
+SO
+ 789 4677 mt  812 4677 L
+5492 4677 mt 5468 4677 L
+DO
+ 789 4677 mt 5492 4677 L
+5492 4677 mt 5492 4677 L
+SO
+ 789 4607 mt  812 4607 L
+5492 4607 mt 5468 4607 L
+DO
+ 789 4607 mt 5492 4607 L
+5492 4607 mt 5492 4607 L
+SO
+ 789 4552 mt  812 4552 L
+5492 4552 mt 5468 4552 L
+DO
+ 789 4552 mt 5492 4552 L
+5492 4552 mt 5492 4552 L
+SO
+ 789 4507 mt  812 4507 L
+5492 4507 mt 5468 4507 L
+DO
+ 789 4507 mt 5492 4507 L
+5492 4507 mt 5492 4507 L
+SO
+ 789 4469 mt  812 4469 L
+5492 4469 mt 5468 4469 L
+DO
+ 789 4469 mt 5492 4469 L
+5492 4469 mt 5492 4469 L
+SO
+ 789 4436 mt  812 4436 L
+5492 4436 mt 5468 4436 L
+DO
+ 789 4436 mt 5492 4436 L
+5492 4436 mt 5492 4436 L
+SO
+ 789 4407 mt  812 4407 L
+5492 4407 mt 5468 4407 L
+DO
+ 789 4407 mt 5492 4407 L
+5492 4407 mt 5492 4407 L
+SO
+ 789 4381 mt  812 4381 L
+5492 4381 mt 5468 4381 L
+DO
+ 789 4381 mt 5492 4381 L
+5492 4381 mt 5492 4381 L
+SO
+ 789 4381 mt  836 4381 L
+5492 4381 mt 5444 4381 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+ 432 4434 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+ 592 4345 mt 
+(-10) s
+ 789 4211 mt  812 4211 L
+5492 4211 mt 5468 4211 L
+DO
+ 789 4211 mt 5492 4211 L
+5492 4211 mt 5492 4211 L
+SO
+ 789 4111 mt  812 4111 L
+5492 4111 mt 5468 4111 L
+DO
+ 789 4111 mt 5492 4111 L
+5492 4111 mt 5492 4111 L
+SO
+ 789 4040 mt  812 4040 L
+5492 4040 mt 5468 4040 L
+DO
+ 789 4040 mt 5492 4040 L
+5492 4040 mt 5492 4040 L
+SO
+ 789 3985 mt  812 3985 L
+5492 3985 mt 5468 3985 L
+DO
+ 789 3985 mt 5492 3985 L
+5492 3985 mt 5492 3985 L
+SO
+ 789 3940 mt  812 3940 L
+5492 3940 mt 5468 3940 L
+DO
+ 789 3940 mt 5492 3940 L
+5492 3940 mt 5492 3940 L
+SO
+ 789 3902 mt  812 3902 L
+5492 3902 mt 5468 3902 L
+DO
+ 789 3902 mt 5492 3902 L
+5492 3902 mt 5492 3902 L
+SO
+ 789 3870 mt  812 3870 L
+5492 3870 mt 5468 3870 L
+DO
+ 789 3870 mt 5492 3870 L
+5492 3870 mt 5492 3870 L
+SO
+ 789 3841 mt  812 3841 L
+5492 3841 mt 5468 3841 L
+DO
+ 789 3841 mt 5492 3841 L
+5492 3841 mt 5492 3841 L
+SO
+ 789 3815 mt  812 3815 L
+5492 3815 mt 5468 3815 L
+DO
+ 789 3815 mt 5492 3815 L
+5492 3815 mt 5492 3815 L
+SO
+ 789 3815 mt  836 3815 L
+5492 3815 mt 5444 3815 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+ 432 3868 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+ 592 3779 mt 
+(-9) s
+ 789 3644 mt  812 3644 L
+5492 3644 mt 5468 3644 L
+DO
+ 789 3644 mt 5492 3644 L
+5492 3644 mt 5492 3644 L
+SO
+ 789 3545 mt  812 3545 L
+5492 3545 mt 5468 3545 L
+DO
+ 789 3545 mt 5492 3545 L
+5492 3545 mt 5492 3545 L
+SO
+ 789 3474 mt  812 3474 L
+5492 3474 mt 5468 3474 L
+DO
+ 789 3474 mt 5492 3474 L
+5492 3474 mt 5492 3474 L
+SO
+ 789 3419 mt  812 3419 L
+5492 3419 mt 5468 3419 L
+DO
+ 789 3419 mt 5492 3419 L
+5492 3419 mt 5492 3419 L
+SO
+ 789 3374 mt  812 3374 L
+5492 3374 mt 5468 3374 L
+DO
+ 789 3374 mt 5492 3374 L
+5492 3374 mt 5492 3374 L
+SO
+ 789 3336 mt  812 3336 L
+5492 3336 mt 5468 3336 L
+DO
+ 789 3336 mt 5492 3336 L
+5492 3336 mt 5492 3336 L
+SO
+ 789 3303 mt  812 3303 L
+5492 3303 mt 5468 3303 L
+DO
+ 789 3303 mt 5492 3303 L
+5492 3303 mt 5492 3303 L
+SO
+ 789 3274 mt  812 3274 L
+5492 3274 mt 5468 3274 L
+DO
+ 789 3274 mt 5492 3274 L
+5492 3274 mt 5492 3274 L
+SO
+ 789 3248 mt  812 3248 L
+5492 3248 mt 5468 3248 L
+DO
+ 789 3248 mt 5492 3248 L
+5492 3248 mt 5492 3248 L
+SO
+ 789 3248 mt  836 3248 L
+5492 3248 mt 5444 3248 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+ 432 3301 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+ 592 3212 mt 
+(-8) s
+ 789 3078 mt  812 3078 L
+5492 3078 mt 5468 3078 L
+DO
+ 789 3078 mt 5492 3078 L
+5492 3078 mt 5492 3078 L
+SO
+ 789 2978 mt  812 2978 L
+5492 2978 mt 5468 2978 L
+DO
+ 789 2978 mt 5492 2978 L
+5492 2978 mt 5492 2978 L
+SO
+ 789 2907 mt  812 2907 L
+5492 2907 mt 5468 2907 L
+DO
+ 789 2907 mt 5492 2907 L
+5492 2907 mt 5492 2907 L
+SO
+ 789 2852 mt  812 2852 L
+5492 2852 mt 5468 2852 L
+DO
+ 789 2852 mt 5492 2852 L
+5492 2852 mt 5492 2852 L
+SO
+ 789 2808 mt  812 2808 L
+5492 2808 mt 5468 2808 L
+DO
+ 789 2808 mt 5492 2808 L
+5492 2808 mt 5492 2808 L
+SO
+ 789 2770 mt  812 2770 L
+5492 2770 mt 5468 2770 L
+DO
+ 789 2770 mt 5492 2770 L
+5492 2770 mt 5492 2770 L
+SO
+ 789 2737 mt  812 2737 L
+5492 2737 mt 5468 2737 L
+DO
+ 789 2737 mt 5492 2737 L
+5492 2737 mt 5492 2737 L
+SO
+ 789 2708 mt  812 2708 L
+5492 2708 mt 5468 2708 L
+DO
+ 789 2708 mt 5492 2708 L
+5492 2708 mt 5492 2708 L
+SO
+ 789 2682 mt  812 2682 L
+5492 2682 mt 5468 2682 L
+DO
+ 789 2682 mt 5492 2682 L
+5492 2682 mt 5492 2682 L
+SO
+ 789 2682 mt  836 2682 L
+5492 2682 mt 5444 2682 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+ 432 2735 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+ 592 2646 mt 
+(-7) s
+ 789 2512 mt  812 2512 L
+5492 2512 mt 5468 2512 L
+DO
+ 789 2512 mt 5492 2512 L
+5492 2512 mt 5492 2512 L
+SO
+ 789 2412 mt  812 2412 L
+5492 2412 mt 5468 2412 L
+DO
+ 789 2412 mt 5492 2412 L
+5492 2412 mt 5492 2412 L
+SO
+ 789 2341 mt  812 2341 L
+5492 2341 mt 5468 2341 L
+DO
+ 789 2341 mt 5492 2341 L
+5492 2341 mt 5492 2341 L
+SO
+ 789 2286 mt  812 2286 L
+5492 2286 mt 5468 2286 L
+DO
+ 789 2286 mt 5492 2286 L
+5492 2286 mt 5492 2286 L
+SO
+ 789 2241 mt  812 2241 L
+5492 2241 mt 5468 2241 L
+DO
+ 789 2241 mt 5492 2241 L
+5492 2241 mt 5492 2241 L
+SO
+ 789 2203 mt  812 2203 L
+5492 2203 mt 5468 2203 L
+DO
+ 789 2203 mt 5492 2203 L
+5492 2203 mt 5492 2203 L
+SO
+ 789 2171 mt  812 2171 L
+5492 2171 mt 5468 2171 L
+DO
+ 789 2171 mt 5492 2171 L
+5492 2171 mt 5492 2171 L
+SO
+ 789 2142 mt  812 2142 L
+5492 2142 mt 5468 2142 L
+DO
+ 789 2142 mt 5492 2142 L
+5492 2142 mt 5492 2142 L
+SO
+ 789 2116 mt  812 2116 L
+5492 2116 mt 5468 2116 L
+DO
+ 789 2116 mt 5492 2116 L
+5492 2116 mt 5492 2116 L
+SO
+ 789 2116 mt  836 2116 L
+5492 2116 mt 5444 2116 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+ 432 2169 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+ 592 2080 mt 
+(-6) s
+ 789 1945 mt  812 1945 L
+5492 1945 mt 5468 1945 L
+DO
+ 789 1945 mt 5492 1945 L
+5492 1945 mt 5492 1945 L
+SO
+ 789 1845 mt  812 1845 L
+5492 1845 mt 5468 1845 L
+DO
+ 789 1845 mt 5492 1845 L
+5492 1845 mt 5492 1845 L
+SO
+ 789 1775 mt  812 1775 L
+5492 1775 mt 5468 1775 L
+DO
+ 789 1775 mt 5492 1775 L
+5492 1775 mt 5492 1775 L
+SO
+ 789 1720 mt  812 1720 L
+5492 1720 mt 5468 1720 L
+DO
+ 789 1720 mt 5492 1720 L
+5492 1720 mt 5492 1720 L
+SO
+ 789 1675 mt  812 1675 L
+5492 1675 mt 5468 1675 L
+DO
+ 789 1675 mt 5492 1675 L
+5492 1675 mt 5492 1675 L
+SO
+ 789 1637 mt  812 1637 L
+5492 1637 mt 5468 1637 L
+DO
+ 789 1637 mt 5492 1637 L
+5492 1637 mt 5492 1637 L
+SO
+ 789 1604 mt  812 1604 L
+5492 1604 mt 5468 1604 L
+DO
+ 789 1604 mt 5492 1604 L
+5492 1604 mt 5492 1604 L
+SO
+ 789 1575 mt  812 1575 L
+5492 1575 mt 5468 1575 L
+DO
+ 789 1575 mt 5492 1575 L
+5492 1575 mt 5492 1575 L
+SO
+ 789 1549 mt  812 1549 L
+5492 1549 mt 5468 1549 L
+DO
+ 789 1549 mt 5492 1549 L
+5492 1549 mt 5492 1549 L
+SO
+ 789 1549 mt  836 1549 L
+5492 1549 mt 5444 1549 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+ 432 1602 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+ 592 1513 mt 
+(-5) s
+ 789 1379 mt  812 1379 L
+5492 1379 mt 5468 1379 L
+DO
+ 789 1379 mt 5492 1379 L
+5492 1379 mt 5492 1379 L
+SO
+ 789 1279 mt  812 1279 L
+5492 1279 mt 5468 1279 L
+DO
+ 789 1279 mt 5492 1279 L
+5492 1279 mt 5492 1279 L
+SO
+ 789 1208 mt  812 1208 L
+5492 1208 mt 5468 1208 L
+DO
+ 789 1208 mt 5492 1208 L
+5492 1208 mt 5492 1208 L
+SO
+ 789 1153 mt  812 1153 L
+5492 1153 mt 5468 1153 L
+DO
+ 789 1153 mt 5492 1153 L
+5492 1153 mt 5492 1153 L
+SO
+ 789 1109 mt  812 1109 L
+5492 1109 mt 5468 1109 L
+DO
+ 789 1109 mt 5492 1109 L
+5492 1109 mt 5492 1109 L
+SO
+ 789 1071 mt  812 1071 L
+5492 1071 mt 5468 1071 L
+DO
+ 789 1071 mt 5492 1071 L
+5492 1071 mt 5492 1071 L
+SO
+ 789 1038 mt  812 1038 L
+5492 1038 mt 5468 1038 L
+DO
+ 789 1038 mt 5492 1038 L
+5492 1038 mt 5492 1038 L
+SO
+ 789 1009 mt  812 1009 L
+5492 1009 mt 5468 1009 L
+DO
+ 789 1009 mt 5492 1009 L
+5492 1009 mt 5492 1009 L
+SO
+ 789  983 mt  812  983 L
+5492  983 mt 5468  983 L
+DO
+ 789  983 mt 5492  983 L
+5492  983 mt 5492  983 L
+SO
+ 789  983 mt  836  983 L
+5492  983 mt 5444  983 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+ 432 1036 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+ 592  947 mt 
+(-4) s
+ 789  812 mt  812  812 L
+5492  812 mt 5468  812 L
+DO
+ 789  812 mt 5492  812 L
+5492  812 mt 5492  812 L
+SO
+ 789  713 mt  812  713 L
+5492  713 mt 5468  713 L
+DO
+ 789  713 mt 5492  713 L
+5492  713 mt 5492  713 L
+SO
+ 789  642 mt  812  642 L
+5492  642 mt 5468  642 L
+DO
+ 789  642 mt 5492  642 L
+5492  642 mt 5492  642 L
+SO
+ 789  587 mt  812  587 L
+5492  587 mt 5468  587 L
+DO
+ 789  587 mt 5492  587 L
+5492  587 mt 5492  587 L
+SO
+ 789  542 mt  812  542 L
+5492  542 mt 5468  542 L
+DO
+ 789  542 mt 5492  542 L
+5492  542 mt 5492  542 L
+SO
+ 789  504 mt  812  504 L
+5492  504 mt 5468  504 L
+DO
+ 789  504 mt 5492  504 L
+5492  504 mt 5492  504 L
+SO
+ 789  471 mt  812  471 L
+5492  471 mt 5468  471 L
+DO
+ 789  471 mt 5492  471 L
+5492  471 mt 5492  471 L
+SO
+ 789  442 mt  812  442 L
+5492  442 mt 5468  442 L
+DO
+ 789  442 mt 5492  442 L
+5492  442 mt 5492  442 L
+SO
+ 789  417 mt  812  417 L
+5492  417 mt 5468  417 L
+DO
+ 789  417 mt 5492  417 L
+5492  417 mt 5492  417 L
+SO
+ 789  417 mt  836  417 L
+5492  417 mt 5444  417 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 144 FMSR
+
+ 432  470 mt 
+(10) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 96 FMSR
+
+ 592  381 mt 
+(-3) s
+ 789 4948 mt 5492 4948 L
+ 789  417 mt 5492  417 L
+ 789 4948 mt  789  417 L
+5492 4948 mt 5492  417 L
+gs 789 417 4704 4532 rc
+gr
+
+/c8 { 0.000000 0.000000 1.000000 sr} bdef
+c8
+  36   36 1108 1070 FO
+  36   36 1580 1744 FO
+  36   36 2052 2314 FO
+  36   36 2524 2842 FO
+  36   36 2996 3358 FO
+  36   36 3468 3870 FO
+  36   36 3940 4382 FO
+gs 789 417 4704 4532 rc
+/c9 { 0.000000 0.500000 0.000000 sr} bdef
+c9
+472 544 472 544 472 544 472 544 472 544 472 544 1108 1165 7 MP stroke
+gr
+
+c9
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+2322 5338 mt 
+(Number of Intervals N) s
+ 340 3397 mt  -90 rotate
+(Discretization Error) s
+90 rotate
+gs 789 417 4704 4532 rc
+/c10 { 0.847059 0.160784 0.000000 sr} bdef
+c10
+472 230 472 230 472 229 472 230 472 230 472 230 1108 677 7 MP stroke
+gr
+
+c10
+  36   36 1108  647 FO
+  36   36 1580  894 FO
+  36   36 2052 1157 FO
+  36   36 2524 1403 FO
+  36   36 2996 1625 FO
+  36   36 3468 1827 FO
+  36   36 3940 2014 FO
+gs 789 417 4704 4532 rc
+gr
+
+0 sg
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 216 FMSR
+
+2289  287 mt 
+(Quadratic Splines) s
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 120 FMSR
+
+ 772 4991 mt 
+( ) s
+5476  459 mt 
+( ) s
+2112 3946 mt 3053 3946 L
+3053 3946 mt 3053 3619 L
+2112 3946 mt 2112 3619 L
+2112 3619 mt 3053 3619 L
+%%IncludeResource: font Helvetica
+/Helvetica /ISOLatin1Encoding 168 FMSR
+
+2173 3801 mt 
+(Cartesian) s
+3651 1788 mt 4506 1788 L
+4506 1788 mt 4506 1461 L
+3651 1788 mt 3651 1461 L
+3651 1461 mt 4506 1461 L
+3712 1643 mt 
+(Periodic) s
+
+end %%Color Dict
+
+eplot
+%%EndObject
+
+epage
+end
+
+showpage
+
+%%Trailer
+%%EOF
diff --git a/multigrid/src/CMakeLists.txt b/multigrid/src/CMakeLists.txt
new file mode 100644
index 0000000..cacde7a
--- /dev/null
+++ b/multigrid/src/CMakeLists.txt
@@ -0,0 +1,92 @@
+/**
+ * @file CMakeLists.txt
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+project(multigrid_tests)
+
+if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Intel")
+#  set(CMAKE_Fortran_FLAGS_RELEASE 
+#    "${CMAKE_Fortran_FLAGS_RELEASE} -profile-functions -profile-loops=outer"
+#    )
+  set(CMAKE_Fortran_FLAGS_DEBUG 
+    "${CMAKE_Fortran_FLAGS_DEBUG} -fpe0"
+    )
+endif()
+
+set(MG_TESTS
+  transfer1d
+  test_relax
+  test_mg
+  test_mgp
+  test_csr
+  two_grid
+  test_mg2d
+  test_relax2d
+  test_transf2d
+  transfer1d_col
+  test_relax2d_cyl
+  test_transf2d_cyl 
+  test_mg2d_cyl
+)
+
+foreach(test ${MG_TESTS})
+  add_executable(${test} ${test}.f90)
+  target_link_libraries(${test} bsplines)
+endforeach()
+
+add_executable(poisson_fd poisson_fd.f90 fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
+target_link_libraries(poisson_fd bsplines)
+set(TESTS ${TESTS} poisson_mg)
+
+add_executable(partition partition.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
+target_link_libraries(partition bsplines)
+
+# Fail to compile with crayftn 4.0.46 on ROSA/DAINT
+add_executable(test_stencil test_stencil.f90 stencil_mod.f90 gvector_mod.f90)
+target_link_libraries(test_stencil bsplines)
+
+add_executable(test_stencilg test_stencilg.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
+target_link_libraries(test_stencilg bsplines)
+
+# Fail to compile with crayftn 4.0.46 on ROSA/DAINT
+add_executable(test_jacobi test_jacobi.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
+target_link_libraries(test_jacobi bsplines)
+
+add_executable(test_jacobig test_jacobig.f90  fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
+target_link_libraries(test_jacobig bsplines)
+
+add_executable(ppoisson_fd ppoisson_fd.f90  fdmat_mod.f90 stencil_mod.f90 parmg_mod.f90 gvector_mod.f90)
+target_link_libraries(ppoisson_fd bsplines)
+
+add_executable(test_gvec1d test_gvec1d.f90)
+target_link_libraries(test_gvec1d bsplines)
+
+add_executable(test_intergrid0 test_intergrid0.f90)
+target_link_libraries(test_intergrid0 bsplines)
+
+add_executable(test_intergrid1 test_intergrid1.f90 parmg_mod.f90 gvector_mod.f90 stencil_mod.f90)
+target_link_libraries(test_intergrid1 bsplines)
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
diff --git a/multigrid/src/Makefile b/multigrid/src/Makefile
new file mode 100644
index 0000000..5748f81
--- /dev/null
+++ b/multigrid/src/Makefile
@@ -0,0 +1,136 @@
+#
+# @file Makefile
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+MPIF90 = mpif90
+LD = $(MPIF90)
+
+# F90FLAGS = -I$(HOME)/include/O -I$(PPUTILS2)
+# LDFLAGS = -L$(HOME)/lib/O -L${HDF5}/lib
+F90FLAGS = -I$(FUTILS)/include -I$(BSPLINES)/include
+LDFLAGS = -mkl=cluster -L$(FUTILS)/lib -L$(BSPLINES)/lib -L${HDF5}/lib
+
+MODS = gvector_mod.o stencil_mod.o
+LIBS = $(MODS) -lbsplines -lpppack -lpputils2 -lfutils \
+       -lhdf5_fortran -lhdf5 -lz
+
+ifdef MKL
+SPBLAS = -DMKL
+endif
+
+ifdef MUMPS
+F90FLAGS += -I$(MUMPS)/include
+LDFLAGS += -L$(MUMPS)/lib
+LIBS += $(MUMPSLIBS)
+endif
+
+all:	 transfer1d test_relax test_mg test_mgp test_csr two_grid \
+         test_mg2d test_relax2d test_transf2d transfer1d_col \
+         test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl poisson_fd
+
+.SUFFIXES:
+.SUFFIXES: .o .f90
+
+.f90.o:
+	$(MPIF90) $(F90FLAGS) -c $<
+
+partition: partition.o
+	$(LD) $(LDFLAGS) -o $@ $< parmg_mod.o $(MODS) -lpputils2 -lfutils \
+       -lhdf5_fortran -lhdf5 -lz
+
+transfer1d:	transfer1d.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+test_relax:	test_relax.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+test_mg:	test_mg.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+test_mgp:	test_mgp.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+test_csr:	test_csr.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+two_grid:	two_grid.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+test_mg2d:	test_mg2d.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+test_relax2d:	test_relax2d.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+test_transf2d:	test_transf2d.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+transfer1d_col:	transfer1d_col.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+test_relax2d_cyl:	test_relax2d_cyl.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+test_transf2d_cyl:	test_transf2d_cyl.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+test_mg2d_cyl:	test_mg2d_cyl.o
+	$(LD) $(LDFLAGS) -o $@ $< $(LIBS)
+
+poisson_fd:	poisson_fd.o
+	$(LD) $(LDFLAGS) -o $@ $< fdmat_mod.o $(LIBS)
+
+csr_mod.o: csr_mod.f90
+	    $(MPIF90) $(FPP) $(SPBLAS) $(F90FLAGS) -c csr_mod.f90
+
+parmg_mod.o: gvector_mod.o stencil_mod.o
+partition.o: parmg_mod.o
+transfer1d.o: $(MODS)
+test_relax.o: $(MODS)
+test_mg.o: $(MODS)
+test_mgp.o: $(MODS)
+test_csr.o: $(MODS)
+two_grid.o: $(MODS)
+test_mg2d.o: $(MODS)
+test_relax2d.o: $(MODS)
+test_transf2d.o: $(MODS)
+transfer1d_col.o: $(MODS)
+test_relax2d_cyl.o: $(MODS)
+test_transf2d_cyl.o: $(MODS)
+test_mg2d_cyl.o: $(MODS)
+poisson_fd.o: fdmat_mod.o
+fdmat_mod.o: stencil_mod.o parmg_mod.o
+stencil_mod.o: gvector_mod.o
+parmg_mod.o: gvector_mod.o stencil_mod.o
+
+clean:
+	rm -f *.o *.mod *~ ../wk/*~ a.out lib
+
+distclean: clean
+	rm -f ../wk/*.h5 ../wk/fort.* *.eps \
+	   transfer1d test_relax test_mg test_mgp test_csr two_grid \
+           test_mg2d test_relax2d test_transf2d transfer1d_col \
+           test_relax2d_cyl test_transf2d_cyl test_mg2d_cyl
+
+#include $(HOST).mk
diff --git a/multigrid/src/README_mod.txt b/multigrid/src/README_mod.txt
new file mode 100644
index 0000000..224e499
--- /dev/null
+++ b/multigrid/src/README_mod.txt
@@ -0,0 +1,91 @@
+/**
+ * @file README_mod.txt
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+1) Module parmg
+   ============
+  - types:
+      grid2_type
+       INTEGER, DIMENSION(2)    :: s, e, s0, e0, npt_loc, npt
+       REAL(rkind), ALLOCATABLE :: x(:), y(:)
+       TYPE(gvector_2d)         :: f, v
+       TYPE(stencil_2d)         :: fdmat, restrict_mat
+
+   - module procedures:
+       creat_grid
+       coarse (1d, 2d)
+       exchange (gvector)
+       prolong  (gvector)
+       restrict (gvector)
+       jacobi
+       get_resids
+       init_restrict (gvector)
+       disp (0d, 1d array of int)
+       get_lmax
+
+   - Uses
+       gvector
+       stencil
+
+2) Module stencil
+   ==============
+  - types:
+        LOGICAL                  :: nluni
+        INTEGER, DIMENSION(2)    :: ldim, gdim, s0, e0, s, e
+        INTEGER                  :: npoints
+        INTEGER, ALLOCATABLE     :: id(:,:)
+        REAL(rkind), ALLOCATABLE :: val(:,:,:)
+
+  - module procedures:
+      init
+      vmx
+      laplacian
+      putmat
+
+  - operators:
+      *: vmx
+
+   - Uses
+       gvector
+
+3) Module gvector
+   ==============
+  - types:
+      gvector_2d
+        INTEGER, DIMENSION(2)    :: s, e ! vector internal bounds
+        INTEGER, DIMENSION(2)    :: g    ! ghost cell widths
+        REAL(rkind), ALLOCATABLE :: val(:,:)
+
+  - module procedures:
+      constructor (gvector_2d)
+      disp
+      norm2 (serial, mpi)
+      
+  - operators:
+      + : add_scal, add_vec
+      - : minus_vec, substract_vec
+      * : scale_left, scale_right
+
+  - assignment:
+      = : from_scal, from_vec 
diff --git a/multigrid/src/fdmat_mod.f90 b/multigrid/src/fdmat_mod.f90
new file mode 100644
index 0000000..c925169
--- /dev/null
+++ b/multigrid/src/fdmat_mod.f90
@@ -0,0 +1,600 @@
+!>
+!> @file fdmat_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE fdmat_mod
+!
+  USE multigrid
+  IMPLICIT NONE
+!
+  INTERFACE fdmat
+     MODULE PROCEDURE fdmat_stencil
+     MODULE PROCEDURE fdmat_gen, fdmat_csr, fdmat_cds
+  END INTERFACE fdmat
+  INTERFACE ibc_fdmat
+     MODULE PROCEDURE ibc_fdmat_stencil
+     MODULE PROCEDURE ibc_fdmat_gen, ibc_fdmat_csr, ibc_fdmat_cds
+  END INTERFACE ibc_fdmat
+  INTERFACE ibc_rhs
+     MODULE PROCEDURE ibc_rhs_g
+  END INTERFACE ibc_rhs
+!
+CONTAINS
+!--------------------------------------------------------------------------------
+  SUBROUTINE fdmat_stencil(grid, fdense, icrosst, mat)
+!
+!   Construct model GBS FD partitioned matrix
+!
+    USE iso_fortran_env, ONLY : rkind => real64
+    USE stencil, ONLY : stencil_2d
+    USE parmg, ONLY   : grid2_type
+!
+    TYPE(grid2_type)        :: grid
+    REAL(rkind), INTENT(in) :: icrosst
+    TYPE(stencil_2d)        :: mat
+    INTERFACE
+       FUNCTION fdense(x)
+         USE iso_fortran_env, ONLY : rkind => real64
+         REAL(rkind), INTENT(in) :: x(:)
+         REAL(rkind)             :: fdense(SIZE(x))
+       END FUNCTION fdense
+    END INTERFACE
+!
+    REAL(rkind) :: dx, dy
+    REAL(rkind),ALLOCATABLE :: dense(:)
+    REAL(rkind) :: stencil_arr(-1:1,-1:1), zdiag(-1:1,-1:1), corr
+    INTEGER     :: nx, ny, i, j, k, d(2)
+!
+!   Grid properties
+!
+    nx = grid%npt(1)-1
+    ny = grid%npt(2)-1
+    dx = grid%x(1)-grid%x(0)  ! Assume equidistant grid
+    dy = grid%y(1)-grid%y(0)
+    ALLOCATE(dense(0:nx))     ! electron densoty vary only along x
+!
+!   Stencil array
+!
+    stencil_arr = 0.0d0
+    zdiag   = 0.0d0
+    corr    = 1.d0+icrosst**2/4.0d0
+    stencil_arr(0,0)   = -2.0d0/dx/dx-2.0d0/dy/dy*corr
+    stencil_arr(-1,0)  =  1.0/dx/dx
+    stencil_arr(1,0)   =  1.0/dx/dx
+    stencil_arr(0,-1)  =  1.0/dy/dy*corr
+    stencil_arr(0,1)   =  1.0/dy/dy*corr
+    stencil_arr(-1,-1) =  icrosst*1.0/4.0/dx/dy
+    stencil_arr(1,-1)  =  icrosst*(-1.0/4.0/dx/dy)
+    stencil_arr(-1,1)  =  icrosst*(-1.0/4.0/dx/dy)
+    stencil_arr(1,1)   =  icrosst*(1.0/4.0/dx/dy)
+    zdiag(0,0)     =  1.0d0
+    dense(:) = fdense(grid%x(:))
+!
+!  Assemble the stencil by scanning local grid points
+!
+    DO k=0,mat%npoints-1
+       d(:) = mat%id(k,:)
+       DO j=mat%s(2),mat%e(2)
+          DO i=mat%s(1),mat%e(1)
+             mat%val(i,j,k) = stencil_arr(d(1),d(2)) + &
+                  &           zdiag(d(1),d(2))*dense(i)
+          END DO
+       END DO
+    END DO
+!
+    DEALLOCATE(dense)
+  END SUBROUTINE fdmat_stencil
+!--------------------------------------------------------------------------------
+  SUBROUTINE ibc_rhs_g(f, s0, e0, prb)
+!
+!   Impose BC on rhs
+!
+    USE iso_fortran_env, ONLY : rkind => real64
+    USE gvector, ONLY : gvector_2d
+!
+    TYPE(gvector_2d), INTENT(inout) :: f
+    CHARACTER(len=*), INTENT(in)    :: prb
+    INTEGER, INTENT(in)             :: s0(2), e0(2)
+    INTEGER :: s(2), e(2)
+!
+    s = f%s
+    e = f%e
+!
+    IF(s(1).EQ.s0(1)) THEN
+       IF(prb(1:1).EQ.'d') THEN    ! West face
+          f%val(s(1),s(2):e(2)) = 0.0_rkind
+       ELSE
+          f%val(s(1),s(2):e(2)) = 0.5_rkind*f%val(s(1),s(2):e(2))
+       END IF
+    END IF
+    IF(e(1).EQ.e0(1)) THEN 
+       IF(prb(2:2).EQ.'d') THEN    ! East face
+          f%val(e(1),s(2):e(2)) = 0.0_rkind
+       ELSE
+          f%val(e(1),s(2):e(2)) = 0.5_rkind*f%val(e(1),s(2):e(2))
+       END IF
+    END IF
+    IF(s(2).EQ.s0(2)) THEN
+       IF(prb(3:3).EQ.'d') THEN    ! South face
+          f%val(s(1):e(1),s(2)) = 0.0_rkind
+       ELSE
+          f%val(s(1):e(1),s(2)) = 0.5_rkind*f%val(s(1):e(1),s(2))
+       END IF
+    END IF
+    IF(e(2).EQ.e0(2)) THEN
+       IF(prb(4:4).EQ.'d') THEN    ! North face
+          f%val(s(1):e(1),e(2)) = 0.0_rkind
+       ELSE
+          f%val(s(1):e(1),e(2)) = 0.5_rkind*f%val(s(1):e(1),e(2))
+       END IF
+    END IF
+  END SUBROUTINE ibc_rhs_g
+!--------------------------------------------------------------------------------
+  SUBROUTINE ibc_fdmat_stencil(mat, prb)
+!
+!   Impose BC on matrix
+!
+    USE iso_fortran_env, ONLY : rkind => real64
+    USE stencil, ONLY : stencil_2d
+!
+    TYPE(stencil_2d), INTENT(inout) :: mat
+    CHARACTER(len=*), INTENT(in)    :: prb
+!
+    INTEGER :: s0(2), e0(2), s(2), e(2)
+!
+    s0 = mat%s0
+    e0 = mat%e0
+    s = mat%s
+    e = mat%e
+!
+!   Neumann BC
+!   WARNING: Divide the stencil by 2 => should do the same for RHS!
+!
+!                       N
+!                   6---7---8
+!                   |   |   |
+!                W  4---0---5   E   Numbering of stencil
+!                   |   |   |
+!                   1---2---3
+!                       S
+!
+    IF(s(1).EQ.s0(1) .AND. prb(1:1).EQ.'n') THEN    ! West face
+       mat%val(s(1),s(2):e(2),1) = 0.0_rkind
+       mat%val(s(1),s(2):e(2),3) = 0.0_rkind
+       mat%val(s(1),s(2):e(2),4) = 0.0_rkind
+       mat%val(s(1),s(2):e(2),5) = 2.0d0*mat%val(s(1),s(2):e(2),5)
+       mat%val(s(1),s(2):e(2),6) = 0.0_rkind
+       mat%val(s(1),s(2):e(2),8) = 0.0_rkind
+       mat%val(s(1),s(2):e(2),:) = 0.5_rkind*mat%val(s(1),s(2):e(2),:)
+    END IF
+    IF(e(1).EQ.e0(1) .AND. prb(2:2).EQ.'n') THEN    ! East face
+       mat%val(e(1),s(2):e(2),1) = 0.0_rkind
+       mat%val(e(1),s(2):e(2),3) = 0.0_rkind
+       mat%val(e(1),s(2):e(2),4) = 2.0d0*mat%val(e(1),s(2):e(2),4)
+       mat%val(e(1),s(2):e(2),5) = 0.0_rkind
+       mat%val(e(1),s(2):e(2),6) = 0.0_rkind
+       mat%val(e(1),s(2):e(2),8) = 0.0_rkind
+       mat%val(e(1),s(2):e(2),:) = 0.5_rkind*mat%val(e(1),s(2):e(2),:)
+    END IF
+    IF(s(2).EQ.s0(2) .AND. prb(3:3).EQ.'n') THEN    ! South face
+       mat%val(s(1):e(1),s(2),1) = 0.0_rkind
+       mat%val(s(1):e(1),s(2),2) = 0.0_rkind
+       mat%val(s(1):e(1),s(2),3) = 0.0_rkind
+       mat%val(s(1):e(1),s(2),6) = 0.0_rkind
+       mat%val(s(1):e(1),s(2),7) = 2.0d0*mat%val(s(1):e(1),s(2),7)
+       mat%val(s(1):e(1),s(2),8) = 0.0_rkind
+       mat%val(s(1):e(1),s(2),:) = 0.5_rkind*mat%val(s(1):e(1),s(2),:)
+    END IF
+    IF(e(2).EQ.e0(2) .AND. prb(4:4).EQ.'n') THEN    ! North face
+       mat%val(s(1):e(1),e(2),1) = 0.0_rkind
+       mat%val(s(1):e(1),e(2),2) = 2.0d0*mat%val(s(1):e(1),e(2),2)
+       mat%val(s(1):e(1),e(2),3) = 0.0_rkind
+       mat%val(s(1):e(1),e(2),6) = 0.0_rkind
+       mat%val(s(1):e(1),e(2),7) = 0.0_rkind
+       mat%val(s(1):e(1),e(2),8) = 0.0_rkind
+       mat%val(s(1):e(1),e(2),:) = 0.5_rkind*mat%val(s(1):e(1),e(2),:)
+    END IF
+!
+!   Dirichlet BC
+!
+    IF(s(1).EQ.s0(1) .AND. prb(1:1).EQ.'d') THEN    ! West face
+       mat%val(s(1),s(2):e(2),:) = 0.0_rkind
+       mat%val(s(1),s(2):e(2),0) = 1.0_rkind
+    END IF
+    IF(e(1).EQ.e0(1) .AND. prb(2:2).EQ.'d') THEN    ! East face
+       mat%val(e(1),s(2):e(2),:) = 0.0_rkind
+       mat%val(e(1),s(2):e(2),0) = 1.0_rkind
+    END IF
+    IF(s(2).EQ.s0(2) .AND. prb(3:3).EQ.'d') THEN    ! South face
+       mat%val(s(1):e(1),s(2),:) = 0.0_rkind
+       mat%val(s(1):e(1),s(2),0) = 1.0_rkind
+    END IF
+    IF(e(2).EQ.e0(2) .AND. prb(4:4).EQ.'d') THEN    ! North face
+       mat%val(s(1):e(1),e(2),:) = 0.0_rkind
+       mat%val(s(1):e(1),e(2),0) = 1.0_rkind
+    END IF
+  END SUBROUTINE ibc_fdmat_stencil
+!--------------------------------------------------------------------------------
+  SUBROUTINE fdmat_gen(grid, fdense, icrosst, noinit)
+!
+!   Generic version
+!
+    TYPE(grid2d), INTENT(inout)   :: grid
+    DOUBLE PRECISION, INTENT(in)  :: icrosst
+    LOGICAL, INTENT(in), OPTIONAL :: noinit
+    INTERFACE
+       FUNCTION fdense(x)
+         DOUBLE PRECISION, INTENT(in) :: x(:)
+         DOUBLE PRECISION :: fdense(SIZE(x))
+       END FUNCTION fdense
+    END INTERFACE
+!
+    IF(ALLOCATED(grid%mata)) THEN
+       CALL fdmat_csr(grid, fdense, icrosst, grid%mata, noinit)
+    ELSE
+       CALL fdmat_cds(grid, fdense, icrosst, grid%mata_cds, noinit)
+    END IF
+  END SUBROUTINE fdmat_gen
+!--------------------------------------------------------------------------------
+  SUBROUTINE fdmat_cds(grid, fdense, icrosst, mat, noinit)
+!
+!  Construct FD matrix
+!
+    TYPE(grid2d), INTENT(in)      :: grid
+    DOUBLE PRECISION, INTENT(in)  :: icrosst
+    TYPE(cds_mat), INTENT(inout)  :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: noinit
+    INTERFACE
+       FUNCTION fdense(x)
+         DOUBLE PRECISION, INTENT(in) :: x(:)
+         DOUBLE PRECISION :: fdense(SIZE(x))
+       END FUNCTION fdense
+    END INTERFACE
+!
+    INTEGER :: n, nx, ny
+    INTEGER :: kl, ku, k
+    INTEGER :: ix, ix2, jx, iy, jy, iy2,  irow
+    DOUBLE PRECISION :: lx, ly, dx, dy, mele
+    DOUBLE PRECISION :: dense(0:grid%n(1))
+    DOUBLE PRECISION :: stencil(-1:1,-1:1), zdiag(-1:1,-1:1)
+    DOUBLE PRECISION :: corr
+    LOGICAL :: run_init
+    INTEGER, ALLOCATABLE :: dists(:)
+!--------------------------------------------------------------------------------
+    run_init = .TRUE.
+    IF(PRESENT(noinit)) run_init = .NOT.noinit
+!
+!   Grid properties
+!
+    nx = grid%n(1)
+    ny = grid%n(2)
+    dx = grid%x(1) - grid%x(0)
+    dy = grid%y(1) - grid%y(0)
+    lx = grid%x(nx)
+    ly = grid%y(ny)
+    n = PRODUCT(grid%rank)  ! Rank of matrix
+!
+!   Stencil
+!
+    stencil = 0.0d0
+    zdiag = 0.0d0
+!
+    corr = 1.d0+icrosst**2/4.0d0
+    stencil(0,0)   = -2.0d0/dx/dx-2.0d0/dy/dy*corr
+    stencil(-1,0)  =  1.0/dx/dx
+    stencil(1,0)   =  1.0/dx/dx
+    stencil(0,-1)  =  1.0/dy/dy*corr
+    stencil(0,1)   =  1.0/dy/dy*corr
+    stencil(-1,-1) =  icrosst*1.0/4.0/dx/dy
+    stencil(1,-1)  =  icrosst*(-1.0/4.0/dx/dy)
+    stencil(-1,1)  =  icrosst*(-1.0/4.0/dx/dy)
+    stencil(1,1)   =  icrosst*(1.0/4.0/dx/dy)
+    zdiag(0,0)     = 1.0d0
+!
+!   9-point stencil "diagonal storage"
+!
+    kl=4
+    ku=4
+    ALLOCATE(dists(-kl:ku))
+    DO iy2=-1,1
+       DO ix2=-1,1
+          k=3*iy2+ix2
+          dists(k) = iy2*(nx+1) + ix2
+       END DO
+    END DO
+!
+    IF(run_init) THEN 
+       CALL init(n, dists, 1, mat)
+    END IF
+!
+!   Assemble matrix by scanning all grid points
+!
+    dense(:) = fdense(grid%x(:))
+    DO iy=0,ny
+       DO ix=0,nx
+          irow = iy*(nx+1)+ix+1
+          DO iy2=-1,1
+             jy=iy+iy2
+             IF(jy.GE.0 .AND. jy.LE.ny) THEN
+                DO ix2=-1,1
+                   jx=ix+ix2
+                   IF(jx.GE.0 .AND.jx.LE.nx) THEN
+                      mele = stencil(ix2,iy2) + zdiag(ix2,iy2)*dense(ix)
+                      k=3*iy2+ix2
+                      mat%val(irow,k) = mele
+                   END IF
+                END DO
+             END IF
+          END DO
+       END DO
+    END DO
+!
+    DEALLOCATE(dists)
+  END SUBROUTINE fdmat_cds
+!--------------------------------------------------------------------------------
+  SUBROUTINE fdmat_csr(grid, fdense, icrosst, mat, noinit)
+!
+!  Construct FD matrix
+!
+    TYPE(grid2d), INTENT(in)      :: grid
+    DOUBLE PRECISION, INTENT(in)  :: icrosst
+    TYPE(csr_mat), INTENT(inout)  :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: noinit
+    INTERFACE
+       FUNCTION fdense(x)
+         DOUBLE PRECISION, INTENT(in) :: x(:)
+         DOUBLE PRECISION :: fdense(SIZE(x))
+       END FUNCTION fdense
+    END INTERFACE
+!
+    INTEGER :: n, nx, ny
+    INTEGER :: ix, ix2, jx, iy, jy, iy2,  irow, icol
+    DOUBLE PRECISION :: lx, ly, dx, dy, mele
+    DOUBLE PRECISION :: dense(0:grid%n(1))
+    DOUBLE PRECISION :: stencil(-1:1,-1:1), zdiag(-1:1,-1:1)
+    DOUBLE PRECISION :: corr
+    LOGICAL :: run_init
+!--------------------------------------------------------------------------------
+    run_init = .TRUE.
+    IF(PRESENT(noinit)) run_init = .NOT.noinit
+!
+!   Grid properties
+!
+    nx = grid%n(1)
+    ny = grid%n(2)
+    dx = grid%x(1) - grid%x(0)
+    dy = grid%y(1) - grid%y(0)
+    lx = grid%x(nx)
+    ly = grid%y(ny)
+    n = PRODUCT(grid%rank)  ! Rank of matrix
+!
+!   Stencil
+!
+    stencil = 0.0d0
+    zdiag = 0.0d0
+!
+    corr = 1.d0+icrosst**2/4.0d0
+    stencil(0,0)   = -2.0d0/dx/dx-2.0d0/dy/dy*corr
+    stencil(-1,0)  =  1.0/dx/dx
+    stencil(1,0)   =  1.0/dx/dx
+    stencil(0,-1)  =  1.0/dy/dy*corr
+    stencil(0,1)   =  1.0/dy/dy*corr
+    stencil(-1,-1) =  icrosst*1.0/4.0/dx/dy
+    stencil(1,-1)  =  icrosst*(-1.0/4.0/dx/dy)
+    stencil(-1,1)  =  icrosst*(-1.0/4.0/dx/dy)
+    stencil(1,1)   =  icrosst*(1.0/4.0/dx/dy)
+    zdiag(0,0)     = 1.0d0
+!
+!   Create CSR matrix
+    IF(run_init) THEN 
+       CALL init(n, 1, mat)
+    END IF
+!
+!   Assemble matrix by scanning all grid points
+!
+    dense(:) = fdense(grid%x(:))
+    DO iy=0,ny
+       DO ix=0,nx
+          irow = numb(ix,iy)
+          DO iy2=-1,1
+             jy=iy+iy2
+             IF(jy.GE.0 .AND. jy.LE.ny) THEN
+                DO ix2=-1,1
+                   jx=ix+ix2
+                   IF(jx.GE.0 .AND.jx.LE.nx) THEN
+                      icol=numb(jx,jy)
+                      mele = stencil(ix2,iy2) + zdiag(ix2,iy2)*dense(ix)
+                      CALL putele(mat, irow, icol,mele)
+                   END IF
+                END DO
+             END IF
+          END DO
+       END DO
+    END DO
+!--------------------------------------------------------------------------------
+  CONTAINS
+    INTEGER FUNCTION numb(ix,iy)
+      INTEGER, INTENT(in) :: ix, iy
+      INTEGER :: stride
+      stride = grid%rank(1)
+      numb = iy*stride + (ix+1)
+    END FUNCTION numb
+!--------------------------------------------------------------------------------
+  END SUBROUTINE fdmat_csr
+!++
+  SUBROUTINE ibc_fdmat_gen(grid, prb)
+!
+!   Generic version
+!
+    TYPE(grid2d), INTENT(inout)  :: grid
+    CHARACTER(len=*), INTENT(in) :: prb
+!
+    IF(ALLOCATED(grid%mata)) THEN
+       CALL ibc_fdmat_csr(grid, grid%mata, prb)
+    ELSE
+       CALL ibc_fdmat_cds(grid, grid%mata_cds, prb)
+    END IF
+!
+  END SUBROUTINE ibc_fdmat_gen
+!++
+  SUBROUTINE ibc_fdmat_csr(grid, mat, prb)
+!
+!   Impose BC
+!
+    TYPE(grid2d), INTENT(in)     :: grid
+    TYPE(csr_mat), INTENT(inout) :: mat
+    CHARACTER(len=*), INTENT(in) :: prb
+!
+    DOUBLE PRECISION :: arow(mat%rank)
+    INTEGER :: nx, ny, nx1, ny1, n, iy, irow, irow1
+!--------------------------------------------------------------------------------  
+    nx = grid%n(1)
+    ny = grid%n(2)
+    nx1=nx+1
+    ny1=ny+1
+    n = nx1*ny1
+!
+!  Dirichelt BC on West/East
+!
+    IF(prb.EQ.'dddd') THEN
+       DO irow=1,ny*nx1+1,nx1
+          arow=0.0d0; arow(irow)=1.0d0
+          CALL putrow(mat, irow, arow)
+          irow1=irow+nx
+          arow=0.0d0; arow(irow1)=1.0d0
+          CALL putrow(mat, irow1, arow)
+       END DO
+!
+!   Neumann on West/East
+!   WARNING: Divide the stencil by 2 => should do the same for RHS!
+!
+    ELSE IF(prb.EQ.'nndd') THEN
+       DO irow=1,ny*nx1+1,nx1
+          iy = irow/nx1
+          CALL getrow(mat, irow, arow)
+          arow(irow+1) = 2.0d0*arow(irow+1)
+          IF(iy.GT.0) arow(irow-nx) = 0.0d0
+          IF(iy.LT.ny) arow(irow+nx+2) = 0.0d0
+          arow(:) = 0.5d0*arow(:)
+          CALL putrow(mat, irow, arow)
+       END DO
+       DO irow=nx1,n,nx1
+          iy = irow/nx1
+          CALL getrow(mat, irow, arow)
+          arow(irow-1) = 2.0d0*arow(irow-1)
+          IF(iy.GT.0) arow(irow-nx-2) = 0.0d0
+          IF(iy.LT.ny) arow(irow+nx) = 0.0d0
+          arow(:) = 0.5d0*arow(:)
+          CALL putrow(mat, irow, arow)
+       END DO
+!
+    ELSE
+       WRITE(*,'(a,a4,a)') 'ibc_mat: prb = ', prb, ' NOT IMPLEMENTED!'
+       STOP
+    END IF
+!
+!  Dirichlet BC on South/North sides
+!
+    DO irow=1,nx1
+       arow=0.0d0;  arow(irow)=1.0d0
+       CALL putrow(mat, irow, arow)
+    END DO
+    DO irow1=ny*nx1+1,n
+       arow=0.0d0;  arow(irow1)=1.0d0
+       CALL putrow(mat, irow1, arow)
+    END DO
+!--------------------------------------------------------------------------------
+  END SUBROUTINE ibc_fdmat_csr
+!++
+  SUBROUTINE ibc_fdmat_cds(grid, mat, prb)
+!
+!   Impose BC
+!
+    TYPE(grid2d), INTENT(in)     :: grid
+    TYPE(cds_mat), INTENT(inout) :: mat
+    CHARACTER(len=*), INTENT(in) :: prb
+!
+    INTEGER :: nx, ny, iy, irow
+    INTEGER :: n, nx1, ny1
+!--------------------------------------------------------------------------------  
+    nx = grid%n(1)
+    ny = grid%n(2)
+    nx1=nx+1
+    ny1=ny+1
+    n = nx1*ny1
+!
+!         2  ==   3  ==  4
+!         |       |      |
+!        -1  ==   0  ==  1
+!         |       |      |
+!        -4  ==  -3  == -2
+!
+!  Dirichelt BC on West/East
+!
+    IF(prb.EQ.'dddd') THEN
+       DO irow=1,ny*nx1+1,nx1
+          mat%val(irow,:) = 0.0d0
+          mat%val(irow,0) = 1.0d0
+          mat%val(irow+nx,:) = 0.0d0
+          mat%val(irow+nx,0) = 1.0d0
+       END DO
+!
+!   Neumann on West/East
+!   WARNING: Divide the stencil by 2 => should do the same for RHS!
+!
+    ELSE IF(prb.EQ.'nndd') THEN
+       DO irow=1,ny*nx1+1,nx1
+          iy = irow/nx1
+          IF(iy.GT.0)  mat%val(irow,-2)=0.0d0
+          IF(iy.LT.ny) mat%val(irow,+4)=0.0d0
+          mat%val(irow,+1)=2.0d0*mat%val(irow,+1)
+          mat%val(irow,:)=0.5d0*mat%val(irow,:)
+       END DO
+       DO irow=nx1,n,nx1
+          iy = irow/nx1
+          IF(iy.GT.0)  mat%val(irow,-4)=0.0d0
+          IF(iy.LT.ny) mat%val(irow,+2)=0.0d0
+          mat%val(irow,-1)=2.0d0*mat%val(irow,-1)
+          mat%val(irow,:)=0.5d0*mat%val(irow,:)
+       END DO
+!
+    ELSE
+       WRITE(*,'(a,a4,a)') 'ibc_mat: prb = ', prb, ' NOT IMPLEMENTED!'
+       STOP
+    END IF
+!
+!  Dirichlet BC on South/North sides
+!
+    DO irow=1,nx1
+       mat%val(irow,:) = 0.0d0
+       mat%val(irow,0) = 1.0d0
+    END DO
+    DO irow=ny*nx1+1,n
+       mat%val(irow,:) = 0.0d0
+       mat%val(irow,0) = 1.0d0
+    END DO
+!--------------------------------------------------------------------------------
+  END SUBROUTINE ibc_fdmat_cds
+!++
+END MODULE fdmat_mod
diff --git a/multigrid/src/gvector_mod.f90 b/multigrid/src/gvector_mod.f90
new file mode 100644
index 0000000..2f0162e
--- /dev/null
+++ b/multigrid/src/gvector_mod.f90
@@ -0,0 +1,231 @@
+!>
+!> @file gvector_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE gvector
+!
+!    Implementation of 2D vectors with arbitrary
+!    vector bounds and ghost cell width.
+!
+!  T.M. Tran, CRPP-EPFL
+!  September 2013
+!
+  USE iso_fortran_env, ONLY : rkind => real64
+  IMPLICIT NONE
+  PRIVATE
+  PUBLIC :: gvector_2d, disp, norm2, &
+       &    OPERATOR(+), OPERATOR(-), OPERATOR(*), &
+       &    ASSIGNMENT(=)
+
+  TYPE gvector_2d
+     INTEGER, DIMENSION(2)    :: s, e ! vector internal bounds
+     INTEGER, DIMENSION(2)    :: g    ! ghost cell widths
+     REAL(rkind), ALLOCATABLE :: val(:,:)
+  END TYPE gvector_2d
+
+  INTERFACE gvector_2d
+     MODULE PROCEDURE constructor
+  END INTERFACE gvector_2d
+  INTERFACE OPERATOR(+)
+     MODULE PROCEDURE add_scal
+     MODULE PROCEDURE add_vec
+  END INTERFACE OPERATOR(+)
+  INTERFACE OPERATOR(-)
+     MODULE PROCEDURE minus_vec
+     MODULE PROCEDURE substract_vec
+  END INTERFACE OPERATOR(-)
+  INTERFACE OPERATOR(*)
+     MODULE PROCEDURE scale_left
+     MODULE PROCEDURE scale_right
+  END INTERFACE OPERATOR(*)
+  INTERFACE ASSIGNMENT(=)
+     MODULE PROCEDURE from_scal
+     MODULE PROCEDURE from_vec
+  END INTERFACE ASSIGNMENT(=)
+  INTERFACE norm2
+     MODULE PROCEDURE norm2_gvector_2d
+     MODULE PROCEDURE norm2_root_g_2d
+     MODULE PROCEDURE norm2_all_g_2d
+  END INTERFACE norm2
+
+CONTAINS
+!=======================================================================
+  FUNCTION constructor(s, e, g) RESULT(res)
+    INTEGER, INTENT(in)           :: s(2), e(2)
+    INTEGER, OPTIONAL, INTENT(in) :: g(2)
+    TYPE(gvector_2d)              :: res
+    INTEGER :: lb(2), ub(2)
+    res%g= 0
+    IF(PRESENT(g)) res%g = g
+    res%s = s
+    res%e = e
+    lb = res%s - res%g
+    ub = res%e + res%g
+    ALLOCATE(res%val(lb(1):ub(1),lb(2):ub(2)))
+!
+! Initialize to 0 on all ghost cells
+    res%val(lb(1):s(1)-1,:) = 0._rkind
+    res%val(e(1)+1:ub(1),:) = 0._rkind
+    res%val(:,lb(2):s(2)-1) = 0._rkind
+    res%val(:,e(2)+1:ub(2)) = 0._rkind
+  END FUNCTION constructor
+!=======================================================================
+  FUNCTION add_vec(lhs, rhs) RESULT(res)
+    TYPE(gvector_2d), INTENT(in) :: lhs, rhs
+    TYPE(gvector_2d)             :: res
+    res = gvector_2d(lhs%s, lhs%e, lhs%g)
+    res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
+         &  lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + &
+         &  rhs%val(res%s(1):res%e(1),res%s(2):res%e(2))
+  END FUNCTION add_vec
+!=======================================================================
+  FUNCTION add_scal(lhs, rhs) RESULT(res)
+    TYPE(gvector_2d), INTENT(in) :: lhs
+    REAL(rkind), INTENT(in)      :: rhs
+    TYPE(gvector_2d)             :: res
+    res = gvector_2d(lhs%s, lhs%e, lhs%g)
+    res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
+         &  lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) + rhs
+  END FUNCTION add_scal
+!=======================================================================
+  FUNCTION minus_vec(this) RESULT(res)
+    TYPE(gvector_2d), INTENT(in) :: this
+    TYPE(gvector_2d)             :: res
+    res = gvector_2d(this%s, this%e, this%g)
+    res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
+         &  -this%val(res%s(1):res%e(1),res%s(2):res%e(2))
+  END FUNCTION minus_vec
+!=======================================================================
+  FUNCTION substract_vec(lhs, rhs) RESULT(res)
+    TYPE(gvector_2d), INTENT(in) :: lhs, rhs
+    TYPE(gvector_2d)             :: res
+    res = gvector_2d(lhs%s, lhs%e, lhs%g)
+    res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
+         &  lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) - &
+         &  rhs%val(res%s(1):res%e(1),res%s(2):res%e(2))
+  END FUNCTION substract_vec
+!=======================================================================
+  FUNCTION scale_left(lhs, rhs) RESULT(res)
+    REAL(rkind), INTENT(in)      :: lhs
+    TYPE(gvector_2d), INTENT(in) :: rhs
+    TYPE(gvector_2d)             :: res
+    res = gvector_2d(rhs%s, rhs%e, rhs%g)
+    res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
+         &  lhs * rhs%val(res%s(1):res%e(1),res%s(2):res%e(2))
+  END FUNCTION scale_left
+!=======================================================================
+  FUNCTION scale_right(lhs, rhs) RESULT(res)
+    TYPE(gvector_2d), INTENT(in) :: lhs
+    REAL(rkind), INTENT(in)      :: rhs
+    TYPE(gvector_2d)             :: res
+    res = gvector_2d(lhs%s, lhs%e, lhs%g)
+    res%val(res%s(1):res%e(1),res%s(2):res%e(2)) = &
+         &  lhs%val(res%s(1):res%e(1),res%s(2):res%e(2)) * rhs
+  END FUNCTION scale_right
+!=======================================================================
+
+  SUBROUTINE from_vec(lhs, rhs)
+    TYPE(gvector_2d), INTENT(inout) :: lhs
+    REAL(rkind), INTENT(in)         :: rhs(:,:)
+    INTEGER :: n(2)
+    n = lhs%e - lhs%s + 1
+    IF(SIZE(rhs,1).NE.n(1) .OR. SIZE(rhs,2).NE.n(2)) THEN
+       PRINT*, 'from_vec: sizes of rhs and lhs not equal!'
+       STOP
+    END IF
+    lhs%val(lhs%s(1):lhs%e(1),lhs%s(2):lhs%e(2)) = rhs(:,:)
+  END SUBROUTINE from_vec
+!=======================================================================
+
+  SUBROUTINE from_scal(lhs, rhs)
+    TYPE(gvector_2d), INTENT(inout) :: lhs
+    REAL(rkind), INTENT(in)         :: rhs
+    lhs%val(lhs%s(1):lhs%e(1),lhs%s(2):lhs%e(2)) = rhs
+  END SUBROUTINE from_scal
+!=======================================================================
+  SUBROUTINE disp(str,this)
+    CHARACTER(len=*), INTENT(in) :: str
+    TYPE(gvector_2d), INTENT(in) :: this
+    INTEGER :: i
+    WRITE(*,'(/a,3(" (",i0,",",i0,") "))') str//': s, e, g =',&
+         &   this%s, this%e, this%g
+    DO i=LBOUND(this%val,1),UBOUND(this%val,1)
+       WRITE(*,'(10(1pe11.3))') (this%val(i,:))
+    END DO
+  END SUBROUTINE disp
+!=======================================================================
+
+  FUNCTION norm2_gvector_2d(this) RESULT(res)
+    TYPE(gvector_2d), INTENT(in) :: this
+    REAL(rkind)                  :: res
+    res = NORM2( this%val(this%s(1):this%e(1), &
+         &  this%s(2):this%e(2)) )
+  END FUNCTION norm2_gvector_2d
+!=======================================================================
+  FUNCTION norm2_root_g_2d(x, comm, root) RESULT(res)
+!
+!  Vector norm of 2d distributed array with ghost cells
+!
+    USE mpi
+    TYPE(gvector_2d), INTENT(in) :: x
+    INTEGER, INTENT(in)          :: comm
+    INTEGER, INTENT(in)          :: root
+    REAL(rkind)                  :: res
+    INTEGER, PARAMETER       :: ndim=2
+    INTEGER, DIMENSION(ndim) :: s, e
+    REAL(rkind)              :: res_loc
+    INTEGER                  :: me, ierr
+!
+    CALL mpi_comm_rank(comm, me, ierr)
+    s = x%s
+    e = x%e
+    res_loc = SUM(x%val(s(1):e(1),s(2):e(2))**2)
+    res = 0.0
+    CALL mpi_reduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM,&
+         &          root, comm, ierr)
+    IF(me.EQ.root) res = SQRT(res)    
+  END FUNCTION norm2_root_g_2d
+!=======================================================================
+  FUNCTION norm2_all_g_2d(x, comm) RESULT(res)
+!
+!  Vector norm of 2d distributed array with ghost cells
+!
+    USE mpi
+    TYPE(gvector_2d), INTENT(in) :: x
+    INTEGER, INTENT(in)          :: comm
+    REAL(rkind)                  :: res
+    INTEGER, PARAMETER       :: ndim=2
+    INTEGER, DIMENSION(ndim) :: s, e
+    REAL(rkind)              :: res_loc
+    INTEGER                  :: ierr
+!
+    s = x%s
+    e = x%e
+    res_loc = SUM(x%val(s(1):e(1),s(2):e(2))**2)
+    CALL mpi_allreduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
+         &          comm, ierr)
+    res = SQRT(res)    
+  END FUNCTION norm2_all_g_2d
+!=======================================================================
+END MODULE gvector
diff --git a/multigrid/src/parmg_mod.f90 b/multigrid/src/parmg_mod.f90
new file mode 100644
index 0000000..5de78ca
--- /dev/null
+++ b/multigrid/src/parmg_mod.f90
@@ -0,0 +1,722 @@
+!>
+!> @file parmg_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE parmg
+!
+!   parmg: Utilities for parallel multigrid
+!
+!   T.M. Tran, CRPP-EPFL
+!   December 2013
+!
+  USE mpi
+  USE iso_fortran_env, ONLY : rkind => real64
+  USE gvector, ONLY : gvector_2d
+  USE stencil, ONLY : stencil_2d
+  IMPLICIT NONE
+!
+  PRIVATE
+  PUBLIC :: grid2_type, mg_info, create_grid, mg, get_lmax, coarse, disp, exchange, &
+       &    get_resids, jacobi, prolong, init_restrict, restrict, &
+       &    norm_vec, norm_mat
+!
+  TYPE grid2_type
+     INTEGER, DIMENSION(2)    :: s, e, s0, e0, npt_loc, npt
+     REAL(rkind), ALLOCATABLE :: x(:)
+     REAL(rkind), ALLOCATABLE :: y(:)
+     TYPE(gvector_2d)         :: f
+     TYPE(gvector_2d)         :: v
+     TYPE(stencil_2d)         :: fdmat
+     TYPE(stencil_2d)         :: restrict_mat
+  END TYPE grid2_type
+!
+  TYPE mg_info
+     INTEGER :: comm   ! Communicator
+     INTEGER :: nu1    ! Relaxation down sweeps
+     INTEGER :: nu2    ! Relaxation up sweeps
+     INTEGER :: mu     ! mu-cycle number
+     INTEGER :: nu0    ! Number of FMG cycles
+     INTEGER :: levels ! Number of mg levels
+     INTEGER :: direct_solve_nits ! Jacobit nits for direct_solve
+     CHARACTER(len=4) :: relax    ! Type of relation
+     REAL(rkind)      :: omega    ! for weighted Jacobi relaxation
+  END TYPE mg_info
+!
+  INTERFACE create_grid
+     MODULE PROCEDURE create_grid_2d
+  END INTERFACE create_grid
+  INTERFACE mg
+     MODULE PROCEDURE mg_2d
+  END INTERFACE mg
+  INTERFACE coarse
+     MODULE PROCEDURE coarse_1d, coarse_2d
+  END INTERFACE coarse
+  INTERFACE exchange
+     MODULE PROCEDURE exchange_g_2d, exchange_g_2d_new
+  END INTERFACE exchange
+  INTERFACE prolong
+     MODULE PROCEDURE prolong_g_2d
+  END INTERFACE prolong
+  INTERFACE restrict
+     MODULE PROCEDURE restrict_g_2d
+  END INTERFACE restrict
+  INTERFACE jacobi
+     MODULE PROCEDURE jacobi_stencila_2d
+     MODULE PROCEDURE jacobi_stencilg_2d
+  END INTERFACE jacobi
+  INTERFACE get_resids
+     MODULE PROCEDURE resids_stencila_2d
+     MODULE PROCEDURE resids_stencilg_2d
+  END INTERFACE get_resids
+  INTERFACE disp
+     MODULE PROCEDURE dispi_0, dispi_1
+  END INTERFACE disp
+CONTAINS
+!
+!--------------------------------------------------------------------------------
+  SUBROUTINE create_grid_2d(x, y, s_in, e_in, id, prb, grids, comm)
+!
+!    Create arrays of partitionned grids
+!
+    USE stencil, ONLY : init
+!
+    REAL(rkind), INTENT(in)      :: x(0:), y(0:)     ! Global coordinates
+    INTEGER, INTENT(in)          :: s_in(2), e_in(2) ! Partition of finest grid
+    INTEGER, INTENT(in)          :: id(:,:)          ! Structure of stencil
+    CHARACTER(len=*), INTENT(in) :: prb
+    INTEGER, INTENT(in)          :: comm
+    TYPE(grid2_type)             :: grids(:)
+!
+    INTEGER :: levels
+    INTEGER :: s0(2), e0(2), s(2), e(2)
+    INTEGER :: npt_loc(2), npt_loc_min(2), npt_glob(2)
+    INTEGER :: l, ierr
+!
+    levels = SIZE(grids)
+    s = s_in
+    e = e_in
+!
+    DO l=1,levels
+       IF(l.GT.1) THEN
+          CALL coarse(s,e)
+       END IF
+       npt_loc = e-s+1
+       CALL mpi_allreduce(s, s0, 2, MPI_INTEGER, MPI_MIN, comm, ierr)
+       CALL mpi_allreduce(e, e0, 2, MPI_INTEGER, MPI_MAX, comm, ierr)
+       CALL mpi_allreduce(npt_loc, npt_loc_min, 2, MPI_INTEGER, MPI_MIN, comm, ierr)
+       IF(MINVAL(npt_loc_min) .LT. 2) THEN
+          PRINT*, 'CREATE_GRID: number intervals too small!'
+          STOP
+       END IF
+       npt_glob = e0+1
+       grids(l)%s0 = s0
+       grids(l)%e0 = e0
+       grids(l)%s  = s
+       grids(l)%e  = e
+       grids(l)%npt_loc = npt_loc
+       grids(l)%npt     = npt_glob
+       grids(l)%f = gvector_2d(s, e, [1,1]) ! Arrays with ghost cell
+       grids(l)%v = gvector_2d(s, e, [1,1])
+       ALLOCATE(grids(l)%x(s0(1):e0(1)))    ! Global coords (x,y)
+       ALLOCATE(grids(l)%y(s0(2):e0(2)))
+       IF(l.EQ.1) THEN
+          grids(1)%x = x
+          grids(1)%y = y
+       ELSE
+          grids(l)%x(:) = grids(l-1)%x(0::2)
+          grids(l)%y(:) = grids(l-1)%y(0::2)
+       END IF
+    END DO
+!
+!   Set up FD matrix
+!
+    DO l=1,levels
+       s = grids(l)%s
+       e = grids(l)%e
+       CALL init(s, e, id, .FALSE., grids(l)%fdmat, comm)
+    END DO
+!
+!   Set up restriction stencil
+!
+  DO l=2,levels
+     CALL init_restrict(grids(l), prb, comm)
+  END DO
+!
+  END SUBROUTINE create_grid_2d
+!--------------------------------------------------------------------------------
+  RECURSIVE SUBROUTINE mg_2d(grids, info, l)
+!
+!   Execute a recursive V-cycle
+!
+    USE gvector, ONLY : ASSIGNMENT(=), OPERATOR(+)
+    TYPE(grid2_type), INTENT(inout)  :: grids(:)
+    TYPE(mg_info), INTENT(in)        :: info
+    INTEGER, INTENT(in)              :: l
+!
+    TYPE(gvector_2d)      :: resids, v_prolong
+    INTEGER, DIMENSION(2) :: s0, e0, s, e, g=[1,1]
+    INTEGER :: comm, levels, k
+!
+    comm   = info%comm
+    levels = info%levels
+!
+    s0 = grids(l)%s0; e0 = grids(l)%e0
+    s  = grids(l)%s;  e  = grids(l)%e
+    resids = gvector_2d(s, e, g)
+!
+    IF(l.EQ.levels) THEN
+       CALL direct_solve(grids(l)%fdmat, grids(l)%v, grids(l)%f)
+    ELSE
+       CALL relax(info%nu1)
+       resids = get_resids(comm, grids(l)%fdmat, grids(l)%v, grids(l)%f)
+       CALL exchange(comm, resids)
+       CALL restrict(grids(l+1)%restrict_mat, resids, grids(l+1)%f)
+       grids(l+1)%v = 0.0d0
+!
+!   Only 1 call to the coarsest level
+       DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) 
+          CALL mg(grids, info, l+1)
+       END DO
+!
+       v_prolong = gvector_2d(s, e, g)
+       CALL exchange(comm, grids(l+1)%v)
+       CALL prolong(grids(l+1)%v, v_prolong)
+       grids(l)%v = grids(l)%v + v_prolong
+       CALL relax(info%nu2)
+    END IF
+!
+  CONTAINS
+    SUBROUTINE relax(nu)
+      INTEGER, INTENT(in) :: nu
+      SELECT CASE (TRIM(info%relax))
+      CASE ("jac")
+         CALL jacobi(comm, grids(l)%fdmat, info%omega, nu, grids(l)%v, grids(l)%f)
+      CASE default
+         PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!"
+         STOP
+      END SELECT
+    END SUBROUTINE relax
+    SUBROUTINE direct_solve(mat, v, f)
+      TYPE(stencil_2d), INTENT(in)    :: mat
+      TYPE(gvector_2d), INTENT(inout) :: v
+      TYPE(gvector_2d), INTENT(in)    :: f
+      v = 0.0d0
+      CALL jacobi(comm, mat, 1.0_rkind, info%direct_solve_nits, v, f)
+    END SUBROUTINE direct_solve
+  END SUBROUTINE mg_2d
+!--------------------------------------------------------------------------------
+  FUNCTION get_lmax(s_in, npt_loc, npt_min, comm) RESULT(lmax)
+!
+!   Get max number of levels on all processes
+!
+    INTEGER             :: lmax
+    INTEGER, INTENT(in) :: s_in, npt_loc, npt_min, comm
+    INTEGER :: me, ierr
+    INTEGER :: s, e, kpt_loc, kpt, kpt_loc_min
+!    
+    CALL mpi_comm_rank(comm, me, ierr)
+    s = s_in
+    kpt_loc = npt_loc
+    e = s+npt_loc-1
+    lmax = 1
+    DO
+       CALL mpi_allreduce(kpt_loc, kpt_loc_min, 1, MPI_INTEGER, MPI_MIN, &
+            &             comm, ierr)
+       CALL mpi_allreduce(kpt_loc, kpt, 1, MPI_INTEGER, MPI_SUM, &
+            &             comm, ierr)
+!
+!   Stop if npt-1 not even or when minumum local npt is attained
+       IF(MODULO(kpt-1,2).NE.0 .OR. kpt_loc_min .LE. npt_min) EXIT
+!
+       lmax = lmax+1
+       CALL coarse(s, e)
+       kpt_loc = e-s+1
+    END DO
+!
+  END FUNCTION get_lmax
+!--------------------------------------------------------------------------------
+  SUBROUTINE coarse_1d(s, e)
+!
+!  Compute (s,e) of next coarse grid
+!
+    INTEGER, INTENT(inout)  :: s, e
+    INTEGER :: s0, npt, i
+!
+!   Previous odd indices are discarded
+    s0 = s
+    IF( MODULO(s0,2) .NE. 0 ) THEN 
+       s0 = s+1  
+    END IF
+!
+!   Count local number of points
+    npt = 0
+    DO i=s0,e,2
+       npt = npt+1
+    END DO
+!
+!   Coarse s, e
+    s = s0/2
+    e = s + npt - 1
+  END SUBROUTINE coarse_1d
+!--------------------------------------------------------------------------------
+  SUBROUTINE coarse_2d(s, e)
+!
+!  Compute (s,e) of next coarse grid
+!
+    INTEGER, INTENT(inout)  :: s(2), e(2)
+!
+    CALL coarse_1d(s(1), e(1))
+    CALL coarse_1d(s(2), e(2))
+  END SUBROUTINE coarse_2d
+!--------------------------------------------------------------------------------
+  SUBROUTINE dispi_0(str, a, comm)
+!
+!   Display integer local scalar
+!
+    INTEGER, INTENT(in)          :: a, comm
+    CHARACTER(len=*), INTENT(in) :: str
+    INTEGER :: npes, me, ierr
+    INTEGER, ALLOCATABLE, DIMENSION(:) :: a_gather(:)
+!
+    CALL MPI_COMM_RANK(comm, me, ierr)
+    CALL MPI_COMM_SIZE(comm, npes, ierr)
+    ALLOCATE(a_gather(npes))
+    CALL MPI_GATHER(a, 1, MPI_INTEGER, a_gather, 1, MPI_INTEGER, &
+         &          0, comm, ierr)    
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(a/(20i6))') str, a_gather
+    END IF
+    DEALLOCATE(a_gather)
+  END SUBROUTINE dispi_0
+!--------------------------------------------------------------------------------
+  SUBROUTINE dispi_1(str, a, comm)
+!
+!   Display integer local array
+!
+    INTEGER, INTENT(in)          :: a(:), comm
+    CHARACTER(len=*), INTENT(in) :: str
+    INTEGER :: npes, me, ierr, n, i
+    INTEGER, ALLOCATABLE, DIMENSION(:) :: a_gather(:,:)
+!
+    n = SIZE(a,1)
+    CALL MPI_COMM_RANK(comm, me, ierr)
+    CALL MPI_COMM_SIZE(comm, npes, ierr)
+    ALLOCATE(a_gather(n,npes))
+    CALL MPI_GATHER(a, n, MPI_INTEGER, a_gather, n, MPI_INTEGER, &
+         &          0, comm, ierr)    
+    IF( me.EQ.0 ) THEN
+       WRITE(*,'(a)') str
+       DO i=1,n
+          WRITE(*,'(20i6)') a_gather(i,:)
+       END DO
+    END IF
+    DEALLOCATE(a_gather)
+  END SUBROUTINE dispi_1
+!--------------------------------------------------------------------------------
+  SUBROUTINE exchange_g_2d_new(comm, u)
+!
+!  Exchange ghost cells with (west,east,south,north) neighbors.
+!  Assume same ghost cells on each dimension:
+!    u%g(1) : number of ghost cells on west and east boundaries
+!    u%g(2) : number of ghost cells on south and north boundaries
+!
+    INTEGER, INTENT(in)             :: comm
+    TYPE(gvector_2d), INTENT(inout) :: u
+    INTEGER  :: neighs(4), ierr
+!
+    CALL mpi_cart_shift(comm, 0, 1, neighs(1), neighs(2), ierr)
+    CALL mpi_cart_shift(comm, 1, 1, neighs(3), neighs(4), ierr)
+    CALL exchange_g_2d(comm, neighs, u)
+  END SUBROUTINE exchange_g_2d_new
+!--------------------------------------------------------------------------------
+  SUBROUTINE exchange_g_2d(comm, neighs, u)
+!
+!  Exchange ghost cells with (west,east,south,north) neighbors.
+!  Assume same ghost cells on each dimension:
+!    u%g(1) : number of ghost cells on west and east boundaries
+!    u%g(2) : number of ghost cells on south and north boundaries
+!
+    INTEGER, INTENT(in)             :: comm
+    INTEGER, INTENT(in)             :: neighs(4)
+    TYPE(gvector_2d), INTENT(inout) :: u
+!
+    INTEGER                  :: cols, rows
+    INTEGER                  :: ierr
+    INTEGER, PARAMETER       :: ndim=2
+    INTEGER, DIMENSION(ndim) :: g, lb, ub, s, e, n
+!
+    s = u%s
+    e = u%e
+    g = u%g
+    lb = s - g
+    ub = e + g
+    n = ub - lb + 1   ! include ghost cells
+!
+!   g(2) matrix full rows with stride n(1)
+    CALL mpi_type_vector(n(2), g(2), n(1), MPI_DOUBLE_PRECISION, rows, ierr)
+    CALL mpi_type_commit(rows, ierr)
+!
+!   g(1) contiguous matrix full columns
+    CALL mpi_type_contiguous(n(1)*g(1), MPI_DOUBLE_PRECISION,  cols, ierr)
+    CALL mpi_type_commit(cols, ierr)
+!
+!  Exchange along first dimension
+    CALL mpi_sendrecv(u%val(s(1),  lb(2)), 1, rows, neighs(1), 0, &
+         &            u%val(e(1)+1,lb(2)), 1, rows, neighs(2), 0, &
+         &                                  comm, MPI_STATUS_IGNORE, ierr)
+    CALL mpi_sendrecv(u%val(e(1)-g(1)+1,lb(2)), 1, rows, neighs(2), 0, &
+         &            u%val(lb(1),      lb(2)), 1, rows, neighs(1), 0, &
+         &                                  comm, MPI_STATUS_IGNORE, ierr)
+!
+!  Exchange along second dimension
+    CALL mpi_sendrecv(u%val(lb(1),s(2)),   1, cols, neighs(3), 0, &
+         &            u%val(lb(1),e(2)+1), 1, cols, neighs(4), 0, &
+         &                                  comm, MPI_STATUS_IGNORE, ierr)
+    CALL mpi_sendrecv(u%val(lb(1),e(2)-g(2)+1),   1, cols, neighs(4), 0, &
+         &            u%val(lb(1),lb(2)),         1, cols, neighs(3), 0, &
+         &                                  comm, MPI_STATUS_IGNORE, ierr)
+  END SUBROUTINE exchange_g_2d
+!--------------------------------------------------------------------------------
+  SUBROUTINE prolong_g_2d(vbar, v)
+!
+!  2D bilinear prolongation
+!
+    TYPE(gvector_2d), INTENT(in)    :: vbar
+    TYPE(gvector_2d), INTENT(inout) :: v
+!
+    INTEGER :: i,j,i1,i2,j1,j2
+!
+    i1 = v%s(1)-MODULO(v%s(1),2); i2 = v%e(1)+MODULO(v%e(1),2)
+    j1 = v%s(2)-MODULO(v%s(2),2); j2 = v%e(2)+MODULO(v%e(2),2)
+!
+!    Even numbered nodes on fine mesh
+!
+    DO j=j1,j2,2
+       DO i=i1,i2,2
+          v%val(i,j) = vbar%val(i/2,j/2)
+       END DO
+    END DO
+!
+!   Linear interpolation on x
+!
+    DO j=j1,j2,2
+       DO i=i1+1,i2-1,2
+          v%val(i,j) = 0.5d0*(v%val(i-1,j)+v%val(i+1,j))
+       END DO
+    END DO
+!
+!   Linear interpolation on y
+!
+    DO j=j1+1,j2-1,2
+       DO i=i1,i2
+          v%val(i,j) = 0.5d0*(v%val(i,j-1)+v%val(i,j+1))
+       END DO
+    END DO
+  END SUBROUTINE prolong_g_2d
+!--------------------------------------------------------------------------------
+  SUBROUTINE init_restrict(grid, prb, comm)
+!
+!  Set up restriction stencil
+!
+    USE stencil, ONLY : init
+    TYPE(grid2_type), INTENT(inout) :: grid
+    CHARACTER(len=*), INTENT(in)    :: prb
+    INTEGER, INTENT(in)             :: comm
+!
+    INTEGER, PARAMETER :: npoints=9, ndim=2
+    INTEGER :: s(2), e(2), n(2), id(9,2)
+    INTEGER :: i, j
+!
+! Stencil structure initialization
+!
+    s = grid%s
+    e = grid%e
+    n = grid%npt-1
+!
+!           N
+!       6---7---8
+!       |   |   |
+!    W  4---0---5   E   Numbering of stencil
+!       |   |   |
+!       1---2---3
+!           S
+!
+    id = RESHAPE([0, -1, 0, 1,-1, 1,-1, 0, 1,  &
+         &        0, -1,-1,-1, 0, 0, 1, 1, 1], &
+         &       [npoints, ndim])
+    CALL init(s, e, id, .FALSE., grid%restrict_mat, comm)
+!
+! Fill in stencil
+!
+    DO j=s(2),e(2)
+       DO i=s(1),e(1)
+          grid%restrict_mat%val(i,j,:) = [4._rkind, &
+         &        1._rkind, 2._rkind, 1._rkind, &
+         &        2._rkind,           2._rkind, &
+         &        1._rkind, 2._rkind, 1._rkind ]&
+         &            / 16._rkind
+       END DO
+    END DO
+!
+! Apply Dirichlet BC
+!
+    IF(s(1).EQ.0 .AND. prb(1:1).EQ.'d') THEN       ! West face
+       grid%restrict_mat%val(s(1),:,3) = 0._rkind
+       grid%restrict_mat%val(s(1),:,5) = 0._rkind
+       grid%restrict_mat%val(s(1),:,8) = 0._rkind
+    END IF
+    IF(e(1).EQ.n(1) .AND. prb(2:2).EQ.'d') THEN    ! East face
+       grid%restrict_mat%val(e(1),:,1) = 0._rkind
+       grid%restrict_mat%val(e(1),:,4) = 0._rkind
+       grid%restrict_mat%val(e(1),:,6) = 0._rkind
+    END IF
+    IF(s(2).EQ.0 .AND. prb(3:3).EQ.'d') THEN       ! South face
+       grid%restrict_mat%val(:,s(2),6) = 0._rkind
+       grid%restrict_mat%val(:,s(2),7) = 0._rkind
+       grid%restrict_mat%val(:,s(2),8) = 0._rkind
+    END IF
+    IF(e(2).EQ.n(2) .AND. prb(4:4).EQ.'d') THEN    ! North face
+       grid%restrict_mat%val(:,e(2),1) = 0._rkind
+       grid%restrict_mat%val(:,e(2),2) = 0._rkind
+       grid%restrict_mat%val(:,e(2),3) = 0._rkind
+    END IF
+  END SUBROUTINE init_restrict
+!--------------------------------------------------------------------------------
+  SUBROUTINE jacobi_stencila_2d(mat, omega, nu, v, f)
+!
+!   Weighted Jacobi relaxation
+!
+    TYPE(stencil_2d),INTENT(in)             :: mat
+    REAL(rkind), INTENT(in)                 :: omega
+    INTEGER, INTENT(in)                     :: nu
+    REAL(rkind), ALLOCATABLE, INTENT(inout) :: v(:,:)
+    REAL(rkind), ALLOCATABLE, INTENT(in)    :: f(:,:)
+!
+    REAL(rkind), ALLOCATABLE :: temp(:,:), inv_diag(:,:)
+    INTEGER, DIMENSION(2)    :: smin, emax, s, e, d, lb, ub
+    INTEGER                  :: it, k, i, j
+!
+    s(:) = mat%s(:)
+    e(:) = mat%e(:)
+    smin(:) = mat%s0(:)
+    emax(:) = mat%e0(:)
+!
+    ALLOCATE(temp(s(1):e(1),s(2):e(2)))
+    ALLOCATE(inv_diag(s(1):e(1),s(2):e(2)))
+!
+    inv_diag(:,:) = omega/mat%val(:,:,0)
+    DO it=1,nu
+       temp(:,:) = f(s(1):e(1),s(2):e(2))
+       DO k=1,mat%npoints-1   ! exclude the diagonal term, f - (L+U)*v
+          d(:) = mat%id(k,:)
+          lb = MAX(smin, smin-d, mat%s)
+          ub = MIN(emax, emax-d, mat%e)
+          DO j=lb(2),ub(2)
+             DO i=lb(1),ub(1)
+                temp(i,j) = temp(i,j) - mat%val(i,j,k)*v(i+d(1),j+d(2))
+             END DO
+          END DO
+       END DO
+       temp = temp *  inv_diag
+       v(s(1):e(1),s(2):e(2)) = (1.d0-omega)*v(s(1):e(1),s(2):e(2)) + temp
+    END DO
+!
+    DEALLOCATE(temp)
+    DEALLOCATE(inv_diag)
+  END SUBROUTINE jacobi_stencila_2d
+!--------------------------------------------------------------------------------
+  SUBROUTINE jacobi_stencilg_2d(comm, mat, omega, nu, v, f)
+!
+!   Weighted Jacobi relaxation
+!
+    USE gvector, ONLY : ASSIGNMENT(=)
+    INTEGER, INTENT(in)             :: comm
+    TYPE(stencil_2d),INTENT(in)     :: mat
+    REAL(rkind), INTENT(in)         :: omega
+    INTEGER, INTENT(in)             :: nu
+    TYPE(gvector_2d), INTENT(inout) :: v
+    TYPE(gvector_2d), INTENT(in)    :: f
+!
+    REAL(rkind), ALLOCATABLE :: temp(:,:), inv_diag(:,:)
+    INTEGER, DIMENSION(2) :: s, e, d
+    INTEGER               :: it, k, i, j
+!
+    s(:) = v%s(:)
+    e(:) = v%e(:)
+!
+    ALLOCATE(temp(s(1):e(1),s(2):e(2)))
+    ALLOCATE(inv_diag(s(1):e(1),s(2):e(2)))
+!
+    inv_diag(:,:) = omega/mat%val(:,:,0)
+    DO it=1,nu
+       CALL exchange(comm, v)
+       temp(:,:) = f%val(s(1):e(1),s(2):e(2))
+       DO k=1,mat%npoints-1   ! exclude the diagonal term, f - (L+U)*v
+          d(:) = mat%id(k,:)
+          DO j=s(2),e(2)
+             DO i=s(1),e(1)
+                temp(i,j) = temp(i,j) - &
+                     &   mat%val(i,j,k) * v%val(i+d(1),j+d(2))
+             END DO
+          END DO
+       END DO
+       temp = temp *  inv_diag
+       v%val(s(1):e(1),s(2):e(2)) = (1.d0-omega) * v%val(s(1):e(1),s(2):e(2)) + &
+            &                       temp
+    END DO
+  END SUBROUTINE jacobi_stencilg_2d
+!--------------------------------------------------------------------------------
+  FUNCTION resids_stencila_2d(mat, xarr, farr) RESULT(res)
+!
+!   Return residuals res = mat*x, where x, farr and res are simple arrays
+!
+    TYPE(stencil_2d), INTENT(in)         :: mat
+    REAL(rkind), ALLOCATABLE, INTENT(in) :: xarr(:,:)
+    REAL(rkind), ALLOCATABLE, INTENT(in) :: farr(:,:)
+    REAL(rkind)           :: res(LBOUND(xarr,1):UBOUND(xarr,1), &
+         &                   LBOUND(xarr,2):UBOUND(xarr,2))
+    INTEGER               :: k, i, j
+    INTEGER, DIMENSION(2) :: smin, emax, d, lb, ub
+!
+    smin(:) = mat%s0(:)
+    emax(:) = mat%e0(:)
+    res = farr
+    DO k=0,mat%npoints-1
+       d(:) = mat%id(k,:)
+       lb = MAX(smin, smin-d, mat%s)
+       ub = MIN(emax, emax-d, mat%e)
+       DO j=lb(2),ub(2)
+          DO i=lb(1),ub(1)
+             res(i,j) = res(i,j) - mat%val(i,j,k)*xarr(i+d(1),j+d(2))
+          END DO
+       END DO
+    END DO
+  END FUNCTION resids_stencila_2d
+!--------------------------------------------------------------------------------
+  FUNCTION resids_stencilg_2d(comm, mat, xarr, farr) RESULT(res)
+!
+!   Return residuals res= f-mat*x, where x, f and res are gvectors
+!
+    INTEGER, INTENT(in)             :: comm
+    TYPE(stencil_2d), INTENT(in)    :: mat
+    TYPE(gvector_2d), INTENT(inout) :: xarr
+    TYPE(gvector_2d), INTENT(in)    :: farr
+    TYPE(gvector_2d)                :: res
+    INTEGER               :: k, i, j
+    INTEGER, DIMENSION(2) :: s, e, d
+!
+    s = xarr%s
+    e = xarr%e
+    res = gvector_2d(xarr%s, xarr%e, xarr%g)
+    res%val = farr%val
+    CALL exchange(comm, xarr)
+    DO k=0,mat%npoints-1
+       d(:) = mat%id(k,:)
+       DO j=s(2),e(2)
+          DO i=s(1),e(1)
+             res%val(i,j) = res%val(i,j) - mat%val(i,j,k)*xarr%val(i+d(1),j+d(2))
+          END DO
+       END DO
+    END DO
+  END FUNCTION resids_stencilg_2d
+!--------------------------------------------------------------------------------
+  SUBROUTINE restrict_g_2d(mat, f, fbar)
+!
+!   2D full weighting restriction
+!
+    TYPE(stencil_2d), INTENT(in)    :: mat
+    TYPE(gvector_2d), INTENT(in)    :: f
+    TYPE(gvector_2d), INTENT(inout) :: fbar
+!
+    INTEGER, DIMENSION(2) :: s, e, d
+    INTEGER               :: k, i, j
+!
+    s = fbar%s
+    e = fbar%e
+!
+! Diagonal contributions: d(0) = (0,0)
+    DO j=s(2),e(2)
+       DO i=s(1),e(1)
+          fbar%val(i,j) = mat%val(i,j,0) * f%val(2*i,2*j)
+       END DO
+    END DO
+!
+    DO k=1,mat%npoints-1
+       d(:) = mat%id(k,:)
+       DO j=s(2),e(2)
+          DO i=s(1),e(1)
+             fbar%val(i,j) = fbar%val(i,j) + &
+               &   mat%val(i,j,k) * f%val(2*i+d(1),2*j+d(2))
+          END DO
+       END DO
+    END DO
+   
+  END SUBROUTINE restrict_g_2d
+!--------------------------------------------------------------------------------
+  REAL(rkind) FUNCTION norm_vec(x, comm, root)
+!
+!  Infinity vector norm
+!
+    TYPE(gvector_2d), INTENT(in)  :: x
+    INTEGER, INTENT(in)           :: comm
+    INTEGER, OPTIONAL, intent(in) :: root
+    REAL(rkind) :: temp
+    INTEGER :: ierr
+    temp = MAXVAL( ABS(x%val(x%s(1):x%e(1),x%s(2):x%e(2))) )
+    IF(PRESENT(root)) THEN
+       CALL mpi_reduce(temp, norm_vec, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
+            &          root, comm, ierr)
+    ELSE
+       CALL mpi_allreduce(temp, norm_vec, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
+            &             comm, ierr)
+    END IF
+  END FUNCTION norm_vec
+!--------------------------------------------------------------------------------
+  REAL(rkind) FUNCTION norm_mat(mat, comm, root)
+!
+!  Infinity matrix norm
+!
+    TYPE(stencil_2d), INTENT(in)  :: mat
+    INTEGER, INTENT(in)           :: comm
+    INTEGER, OPTIONAL, intent(in) :: root
+    REAL(rkind) :: arr_temp(mat%s(1):mat%e(1),mat%s(2):mat%e(2))
+    REAL(rkind) :: temp
+    INTEGER :: i, j, s(2), e(2), ierr
+    s = mat%s; e = mat%e
+    DO j=s(2),e(2)
+       DO i=s(1),e(1)
+          arr_temp(i,j) = SUM(ABS(mat%val(i,j,:)))
+       END DO
+    END DO
+    temp = MAXVAL(arr_temp)
+    IF(PRESENT(root)) THEN
+       CALL mpi_reduce(temp, norm_mat, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
+            &          root, comm, ierr)
+    ELSE
+       CALL mpi_allreduce(temp, norm_mat, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
+            &             comm, ierr)
+    END IF
+  END FUNCTION norm_mat
+!--------------------------------------------------------------------------------
+END MODULE parmg
diff --git a/multigrid/src/partition.f90 b/multigrid/src/partition.f90
new file mode 100644
index 0000000..1a3b2e6
--- /dev/null
+++ b/multigrid/src/partition.f90
@@ -0,0 +1,77 @@
+!>
+!> @file partition.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+  USE mpi
+  USE pputils2, ONLY : dist1d
+  USE parmg,    ONLY : get_lmax, coarse, disp
+  IMPLICIT NONE
+!
+  INTEGER :: me, npes, ierr
+  INTEGER :: n, npt, npt_loc, s, e
+  INTEGER :: l, lmax
+!
+  CALL MPI_INIT(ierr)
+  CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr)
+  CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr)
+!
+  IF(me.EQ.0) THEN
+     WRITE(*,*) 'Enter n'
+     READ(*,*) n
+  END IF
+  CALL mpi_bcast(n,1,MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+!
+!   Partition ot finest grid
+!
+  npt = n+1
+  CALL dist1d(MPI_COMM_WORLD, 0, npt, s, npt_loc)
+  e = s+npt_loc-1
+  CALL disp('start index=', s, MPI_COMM_WORLD)
+!
+!   Max number of levels
+!
+  lmax = get_lmax(s, npt_loc, 1, MPI_COMM_WORLD)
+  IF(me.EQ.0) WRITE(*,'(a,i0)') 'Max number of levels ', lmax
+!
+!   Grid coarsening
+!
+  DO l=1,lmax
+     IF(l.GT.1) THEN
+        CALL coarse(s, e)
+        npt_loc = e-s+1
+        CALL mpi_allreduce(npt_loc, npt, 1, MPI_INTEGER, MPI_SUM, &
+             &             MPI_COMM_WORLD, ierr)
+     END IF
+     IF(me.EQ.0) WRITE(*, '(a,i3)') 'level', l
+     CALL disp('s      ', s, MPI_COMM_WORLD)
+     CALL disp('e      ', e, MPI_COMM_WORLD)
+     CALL disp('npt_loc', npt_loc, MPI_COMM_WORLD)
+     CALL disp('npt    ', npt, MPI_COMM_WORLD)
+  END DO
+!
+  CALL MPI_FINALIZE(ierr)
+!
+!+++++
+END PROGRAM main
diff --git a/multigrid/src/poisson_fd.f90 b/multigrid/src/poisson_fd.f90
new file mode 100644
index 0000000..d44100d
--- /dev/null
+++ b/multigrid/src/poisson_fd.f90
@@ -0,0 +1,485 @@
+!>
+!> @file poisson_fd.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+! Solving the following 2d PDE using finite differences:
+!
+!   d2/dx2 f(x,y) + tau*d2/dxdy f(x,y) + d2/dy2 f(x,y) = w(x,y),  x,y in [0:Lx][0:Ly]
+!      w(x,y) = -4*pi^2 *[(kx^2/Lx^2+ky^2/Ly^2)*cos(2*kx*pi*x/Lx)*sin(2*ky*pi*y/Ly)
+!               -tau*(kx*ky)/(Lx*Ly)*sin(2*kx*pi*x/Lx)*cos(2*ky*pi*y/Lx)]
+!
+!   West, East boundaries:   Neumann
+!   South, North boundaries: Dirichlet
+!
+!   Analytic solution : f(x,y) = cos(2*kx*pi*x/Lx)*sin(2*kx*pi*y/Ly)
+!
+  USE multigrid
+  USE fdmat_mod
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+!
+  INTEGER, PARAMETER :: nnumx=32
+!
+  INTEGER :: ierr, np, me
+  DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
+  DOUBLE PRECISION :: Lx, Ly, icrosst, beta, miome
+  INTEGER :: n, nx,ny,nz,kx,ky
+  CHARACTER(len=4) :: prb
+  INTEGER :: nits
+  DOUBLE PRECISION :: atol, rtol
+  LOGICAL :: nldirect, nldebug
+!
+  TYPE(mg_info) :: info  ! info for MG
+  INTEGER :: levels, nnu, mu, nu0
+!
+  INTEGER  :: inu, nu1(nnumx), nu2(nnumx), niter(nnumx)
+  DOUBLE PRECISION :: titer(nnumx)
+!
+  LOGICAL :: nlfixed
+  DOUBLE PRECISION :: omega
+  CHARACTER(len=4) :: mat_type, relax
+!
+  DOUBLE PRECISION :: dx, dy
+  INTEGER :: ix, iy, l, its
+  DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:), dense(:)
+  TYPE(grid2d), ALLOCATABLE :: grids(:)
+!
+  DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_ana2d(:,:), sol_direct2d(:,:)
+  DOUBLE PRECISION, POINTER             :: sol_ana(:), sol_direct(:)
+  DOUBLE PRECISION :: err_direct, resid_direct
+  DOUBLE PRECISION :: norma, normb
+  DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:)
+  DOUBLE PRECISION :: t0, tsetup, tmat(2), tdirect, tbsolve
+  DOUBLE PRECISION, EXTERNAL ::  mem
+!
+  NAMELIST /parameters/ prb, mat_type,nx, ny, nz, kx, ky, Lx, Ly, icrosst, beta, &
+       &                miome, nldebug, nlfixed, levels, nnu, nu1, nu2, mu, nu0, &
+       &                relax,omega, nldirect, nits, atol, rtol
+!--------------------------------------------------------------------------------
+!                       1.0 Prologue
+!
+  CALL MPI_INIT(ierr)
+  CALL MPI_COMM_RANK(MPI_COMM_WORLD,me,ierr)
+  CALL MPI_COMM_SIZE(MPI_COMM_WORLD,np,ierr)
+!
+!   Default inputs
+!
+  nx=32
+  ny=32
+  nz=1
+  kx=1
+  ky=1
+  icrosst=1.0d0
+  Lx = 1.0D0
+  Ly = 1.0D0
+  beta = 0d0
+  miome = 200d0
+  nldebug = .FALSE.
+  prb = 'dddd'
+  mat_type = 'cds'
+  nldirect = .TRUE.
+!
+  nlfixed = .FALSE.
+  levels = 2
+  nnu = 1
+  nu1 = 1
+  nu2 = 1
+  mu = 1
+  nu0 = 1
+  nits = 10
+  atol = 1.e-8; rtol = 1.e-8
+  relax = 'jac'
+  omega = 0.6667
+!
+  IF(me==0) THEN
+     READ(*,parameters)
+     WRITE(*,parameters)
+  END IF
+!
+! Send input parameters to other processors
+!
+  CALL MPI_BCAST(nx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL MPI_BCAST(ny, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL MPI_BCAST(nz, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL MPI_BCAST(kx, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL MPI_BCAST(ky, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL MPI_BCAST(icrosst, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+  CALL MPI_BCAST(Lx,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
+  CALL MPI_BCAST(Ly,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
+  CALL MPI_BCAST(beta,1,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD, ierr)
+  CALL MPI_BCAST(miome, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+!
+  CALL mpi_bcast(nldebug, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nldirect, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nnu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nu1, nnumx, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nu2, nnumx, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(mat_type, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(atol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+  CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+!
+  IF(nnu.GT.nnumx) THEN
+     IF(me.EQ.0) THEN
+        PRINT*, 'Value of nnu larger than', nnumx
+     END IF
+     CALL mpi_finalize(ierr)
+     STOP
+  END IF
+!
+!   Adjust number of levels and fill mg info.
+!
+  levels = MIN(levels, get_lmax(nx), get_lmax(ny))
+  info%nu1 = nu1(1)
+  info%nu2 = nu2(1)
+  info%mu = mu
+  info%nu0 = nu0
+  info%levels = levels
+  info%relax = relax
+  info%omega = omega
+!--------------------------------------------------------------------------------
+!                       2.0 Setup grids
+!
+!   Grid on the finest level
+!
+  dx = lx/REAL(nx,8)
+  dy = ly/REAL(ny,8)
+  ALLOCATE(x(0:nx), y(0:ny))
+  DO ix=0,nx
+     x(ix) = ix*dx
+  END DO
+  DO iy=0,ny
+     y(iy) = iy*dy
+  END DO
+  WRITE(*,'(a,3(1pe12.3))') 'dx, dy, dx/dy =', dx, dy, dx/dy
+!
+  ALLOCATE(dense(0:nx))
+  dense = fdense(x)
+!
+!   Set up grids
+!
+  t0 = mpi_wtime()
+  ALLOCATE(grids(levels))
+  CALL create_grid_fd(x, y, grids, info, mat_type=mat_type, debug=nldebug)
+  WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
+  WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
+  IF(nldebug) THEN
+     CALL printmat('** Prolongation matrix in 1st dim.**', grids(2)%transf(1))
+     CALL printmat('** Prolongation matrix in 2nd dim.**', grids(2)%transf(2))
+  END IF
+!
+!   Set BC on grid transfer matrices
+!
+  IF(prb.EQ.'dddd') CALL ibc_transf(grids,1,3) ! Direction X
+  CALL ibc_transf(grids,2,3)                   ! Direction Y
+  tsetup = mpi_wtime()-t0
+!--------------------------------------------------------------------------------
+!                       3.0 Problem discretization
+!
+!  Construct FD matrix and impose BC on all grids
+!
+  t0=mpi_wtime()
+  DO l=1,levels
+     CALL fdmat(grids(l), fdense, icrosst)
+     IF(mat_type.EQ.'csr') CALL to_mat(grids(l)%mata)
+  END DO
+  tmat(1) = mpi_wtime()-t0
+!
+  t0=mpi_wtime()
+  DO l=1,levels
+     CALL ibc_fdmat(grids(l), prb)
+  END DO
+  tmat(2) = mpi_wtime()-t0
+!
+!   Set RHS and impose BC on the fiest grid
+!
+  grids(1)%f(:,:) = frhs(x,y)
+!
+  IF(prb.EQ.'dddd') THEN
+     grids(1)%f(0,:) = 0.0d0   ! Dirichlet on west and east
+     grids(1)%f(nx,:) = 0.0d0
+  ELSE IF(prb.EQ.'nndd') THEN  ! Neumann on west and east
+     grids(1)%f(0,:) = 0.5d0*grids(1)%f(0,:)
+     grids(1)%f(nx,:) = 0.5d0*grids(1)%f(nx,:)
+  END IF
+  grids(1)%f(:,0) = 0.0d0    ! Dirichlet on south and north
+  grids(1)%f(:,ny) = 0.0d0
+!
+!--------------------------------------------------------------------------------
+!                       4.0 Analytical solutions and RHS at the finest grid (l=1)
+!
+  n = (nx+1)*(ny+1)              ! Number of unknowns
+  ALLOCATE(sol_ana2d(0:nx,0:ny))
+  sol_ana(1:n) => sol_ana2d
+  sol_ana2d(:,:) = fsol(x,y)
+!--------------------------------------------------------------------------------
+!                       5.0 Direct solution at the finest grid (l=1)
+!
+  IF(nldirect) THEN
+     WRITE(*,'(/a)') 'Direct solution for the finest grid problem ...'
+     ALLOCATE(sol_direct2d(0:nx,0:ny))
+     sol_direct(1:n) => sol_direct2d
+!
+     t0 = mpi_wtime()
+     sol_direct = grids(1)%f1d
+     CALL direct_solve(grids(1), sol_direct, debug=nldebug)
+     tdirect = mpi_wtime()-t0
+!
+     t0 = mpi_wtime()
+     sol_direct = grids(1)%f1d
+     CALL direct_solve(grids(1), sol_direct, debug=nldebug)
+     tbsolve = mpi_wtime()-t0
+!
+!  Max norm and residual
+!
+     err_direct = MAXVAL(ABS(sol_direct-sol_ana))
+     resid_direct = residue(grids(1), grids(1)%f1d, sol_direct, 'inf')
+     WRITE(*,'(a,2(1pe12.3))') 'Max norm of error and residual norm', &
+          &                  err_direct, resid_direct
+  END IF
+!--------------------------------------------------------------------------------
+!                       5.0 Iterative solution using MG V-cycle
+!
+  WRITE(*,'(/a)') 'Multigrid MG V-cycles ...'
+  ALLOCATE(errdisc(0:nits))
+  ALLOCATE(resid(0:nits))
+  ALLOCATE(rresid(0:nits))
+!
+!   Norm of A and b
+!
+  IF(mat_type.EQ.'csr') THEN
+     norma = matnorm(grids(1)%mata, 'inf')
+  ELSE
+     norma = matnorm(grids(1)%mata_cds, 'inf')
+  END IF
+  normb = MAXVAL(ABS(grids(1)%f1d))
+  WRITE(*,'(a,2(1pe12.3))') 'Norm A and RHS', norma, normb
+!
+!   Initial guess
+!
+  DO inu=1,nnu
+     info%nu1 = nu1(inu)
+     info%nu2 = nu2(inu)
+     WRITE(*,'(/2(a5,i3,2x))') 'nu1 =', nu1(inu), 'nu2 =', nu2(inu)
+     IF(nlfixed .AND. nldirect) THEN
+        grids(1)%v = sol_direct2d
+     ELSE
+        grids(1)%v = 0.0d0
+     END IF
+!
+     errdisc(0) = MAXVAL(ABS(grids(1)%v1d-sol_ana))
+     resid(0) = residue(grids(1), grids(1)%f1d, grids(1)%v1d, 'inf')
+     rresid(0) = resid(0)  /  ( norma*MAXVAL(ABS(grids(1)%v1d)) + normb )
+     WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', &
+          &                           'rel. resid', 'ratio'
+     WRITE(*,'(i4,3(1pe12.3,8x))') 0, resid(0), errdisc(0), rresid(0)
+!
+!   Iterations
+!
+     t0 = mpi_wtime()
+     DO its=1,nits
+        CALL mg(grids, info, 1)
+        errdisc(its) = MAXVAL(ABS(grids(1)%v1d-sol_ana))
+        resid(its) = residue(grids(1), grids(1)%f1d, grids(1)%v1d, 'inf')
+        rresid(its) = resid(its)  /  ( norma*MAXVAL(ABS(grids(1)%v1d)) + normb )
+        WRITE(*,'((i4,3(1pe12.3,0pf8.2)))')  its, &
+             &   resid(its),   resid(its)/resid(its-1), &
+             &   errdisc(its), errdisc(its)/errdisc(its-1), &
+             &   rresid(its),  rresid(its)/rresid(its-1)
+        IF(resid(its) .LE. atol .or. rresid(its) .le. rtol) EXIT
+     END DO
+     niter(inu) = MIN(nits,its)
+     titer(inu) = mpi_wtime() - t0
+  END DO
+!--------------------------------------------------------------------------------
+!                       9.0 Epilogue
+!
+!   Display timing
+!
+  WRITE(*,'(/a)') 'Timing ...'
+  WRITE(*,'(a,1pe12.3,i5)') 'Setup time (s)             ', tsetup
+  WRITE(*,'(a,2(1pe12.3))')  'Matrix construction time(s)', tmat
+  WRITE(*,'(a,2(1pe12.3))') 'Direct and bsolve time (s) ', tdirect, tbsolve
+  WRITE(*,'(/3a6,a15)') 'nu1', 'nu2', 'niter', 'Iter time(s)'
+  DO inu=1,nnu
+     WRITE(*,'(3i6,3x,1pe12.3)') nu1(inu), nu2(inu), niter(inu), titer(inu)
+  END DO
+!
+  WRITE(*,'(/a,f12.3)') 'Mem used so far (MB)', mem()
+!
+!   Creata HDF5 file
+!
+  IF(me.EQ.0) CALL h5file
+!
+!   Clean up
+!
+  DEALLOCATE(x)
+  DEALLOCATE(y)
+  DEALLOCATE(dense)
+  DEALLOCATE(grids)
+  DEALLOCATE(sol_ana2d)
+  IF(nldirect)  DEALLOCATE(sol_direct2d)
+  DEALLOCATE(errdisc)
+  DEALLOCATE(resid)
+  DEALLOCATE(rresid)
+!
+  CALL MPI_FINALIZE(ierr)
+!--------------------------------------------------------------------------------
+CONTAINS
+!+++
+  FUNCTION fdense(x)
+!
+!   Return density
+!
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: fdense(SIZE(x))
+    fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
+  END FUNCTION fdense
+!+++
+  FUNCTION frhs(x,y)
+!
+!   Return RHS
+!
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: frhs(SIZE(x),SIZE(y))
+    DOUBLE PRECISION :: c, s, d(SIZE(x))
+    DOUBLE PRECISION :: corr
+    INTEGER :: j
+    corr = 1.d0+icrosst**2/4.0d0
+    d(:) = fdense(x(:))
+    IF(prb.EQ.'dddd') THEN
+       DO j=1,SIZE(y)
+          c = COS(2.0d0*pi*ky*y(j)/Ly)
+          s = SIN(2.0d0*pi*ky*y(j)/Ly)
+          frhs(:,j) =  -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
+               &     -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
+               &     + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
+       END DO
+    ELSE IF (prb.EQ.'nndd') THEN
+       DO j=1,SIZE(y)
+          c = COS(2.0d0*pi*ky*y(j)/Ly)
+          s = SIN(2.0d0*pi*ky*y(j)/Ly)
+          frhs(:,j) =  -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
+               &     +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
+               &     + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
+       END DO
+    END IF
+!!$    frhs = -frhs
+  END FUNCTION frhs
+!+++
+  FUNCTION fsol(x,y)
+!
+!   Return analytical solution
+!
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: fsol(SIZE(x),SIZE(y))
+    DOUBLE PRECISION :: c
+    INTEGER :: j
+    IF(prb.EQ.'dddd') THEN
+       DO j=1,SIZE(y)
+          c = SIN(2.0d0*pi*ky*y(j)/Ly)
+          fsol(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
+       END DO
+    ELSE IF (prb.EQ.'nndd') THEN
+       DO j=1,SIZE(y)
+          c = SIN(2.0d0*pi*ky*y(j)/Ly)
+          fsol(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
+       END DO
+    END IF
+  END FUNCTION fsol
+!+++
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='poisson_mg.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', nx)
+    CALL attach(fid, '/', 'NY', ny)
+    CALL attach(fid, '/', 'KX', kx)
+    CALL attach(fid, '/', 'KY', ky)
+    CALL attach(fid, '/', 'LX', Lx)
+    CALL attach(fid, '/', 'LY', Ly)
+    CALL attach(fid, '/', 'BETA', beta)
+    CALL attach(fid, '/', 'OMEGA', omega)
+    CALL attach(fid, '/', 'RELAX', relax)
+    CALL attach(fid, '/', 'MAT_TYPE', mat_type)
+    CALL attach(fid, '/', 'NITS', nits)
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL attach(fid, '/', 'NNU', nnu)
+    CALL attach(fid, '/', 'NU0', nu0)
+    CALL attach(fid, '/', 'MU', mu)
+!
+    CALL putarr(fid, '/nu1', nu1(1:nnu))
+    CALL putarr(fid, '/nu2', nu2(1:nnu))
+    CALL putarr(fid, '/dense', dense)
+    CALL creatg(fid, '/mglevels')
+    DO l=1,levels
+       WRITE(dsname,'("/mglevels/level.",i2.2)') l
+       CALL creatg(fid, TRIM(dsname))
+       IF(mat_type.EQ.'csr') THEN
+          CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
+       ELSE
+          CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata_cds)
+       END IF
+       IF(l.GT.1) THEN
+          CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
+          CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
+       END IF
+       CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
+       CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
+       CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
+       CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
+       CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
+       CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
+    END DO
+!
+!  Solutions at finest grid
+!
+    CALL creatg(fid, '/solutions')
+    CALL putarr(fid, '/solutions/xg', grids(1)%x)
+    CALL putarr(fid, '/solutions/yg', grids(1)%y)
+    CALL putarr(fid, '/solutions/calc', grids(1)%v)
+    CALL putarr(fid, '/solutions/anal', sol_ana2d)
+    IF(nldirect)  CALL putarr(fid, '/solutions/direct', sol_direct2d)
+!
+    nits=niter(nnu)
+    CALL creatg(fid, '/Iterations')
+    CALL putarr(fid, '/Iterations/residues', resid(0:nits))
+    CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits))
+!
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!+++
+
+END PROGRAM main
diff --git a/multigrid/src/ppoisson_fd.f90 b/multigrid/src/ppoisson_fd.f90
new file mode 100644
index 0000000..0fbf675
--- /dev/null
+++ b/multigrid/src/ppoisson_fd.f90
@@ -0,0 +1,418 @@
+!>
+!> @file ppoisson_fd.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!    Test 2D  parallel multigrid V-cycle
+!
+MODULE mod
+  USE iso_fortran_env, ONLY : rkind => real64
+  IMPLICIT NONE
+!
+  REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
+CONTAINS
+END MODULE mod
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+PROGRAM main
+  USE mpi
+  USE fdmat_mod, ONLY : fdmat, ibc_fdmat, ibc_rhs
+  USE pputils2, ONLY  : dist1d, timera, hostlist
+  USE gvector, ONLY   : gvector_2d, norm2, ASSIGNMENT(=), OPERATOR(-)
+  USE parmg, ONLY     : grid2_type, mg_info, create_grid, mg, exchange, &
+       &                get_resids, disp, norm_vec, norm_mat
+  USE stencil, ONLY   : stencil_2d, putmat
+  USE mod
+  IMPLICIT NONE
+!
+  INTEGER, PARAMETER :: ndims=2
+!
+  INTEGER                   :: me, neighs(4), npes, ierr
+  INTEGER, DIMENSION(ndims) :: coords, comm1d
+  LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
+  LOGICAL                   :: reorder =.FALSE.
+  INTEGER                   :: comm_cart
+  INTEGER, DIMENSION(ndims) :: e0, s0, e, s, npt_glob, npt_loc
+!
+  REAL(rkind), ALLOCATABLE  :: xgrid(:), ygrid(:)
+  INTEGER, ALLOCATABLE      :: id(:,:)
+  REAL(rkind)               :: dx, dy
+  INTEGER                   :: npoints ! Number of points in FD stencil
+  REAL(rkind)               :: t_mg, t_mg0, t_mg_min, t_mg_max
+!
+  TYPE(gvector_2d)          :: v_exact, resids, errs
+  REAL(rkind)               :: norma, normb, normv
+  REAL(rkind), ALLOCATABLE  :: resid_it(:), err_it(:), rresid(:)
+  REAL(rkind)               :: ratio_err, ratio_resid, ratio_rresid
+  INTEGER, DIMENSION(ndims) :: g, npt_loc_min
+  INTEGER                   :: l, i, it
+  CHARACTER(len=64)         :: str
+!
+  TYPE(grid2_type), ALLOCATABLE :: grids(:)
+  TYPE(mg_info)                 :: info    ! info for MG
+!
+!   Input quantities
+!
+  LOGICAL           :: nldebug=.FALSE.
+  CHARACTER(len=64) :: filein = 'ppoisson_fd.in'
+  INTEGER           :: dims(2)=[0,0]
+  CHARACTER(len=4)  :: prb='dddd'
+  CHARACTER(len=4)  :: relax='jac'
+  INTEGER           :: nx=4, ny=4    ! Number of intervals
+  INTEGER           :: kx=1, ky=1
+  REAL(rkind)       :: Lx=1.0, Ly=1.0
+  REAL(rkind)       :: icrosst=1.0, beta=0.0, miome=200.0
+  REAL(rkind)       :: omega=1.0d0
+  INTEGER           :: nits=100, direct_solve_nits=5
+  INTEGER           :: levels=2, nu1=3, nu2=3, mu=1, nu0=1
+  REAL(rkind)       :: rtol=1.e-8, atol=1.e-8, errtol=1.e-3
+!
+  NAMELIST /in/ nldebug, dims, prb, nx, ny, kx, ky, Lx, Ly, icrosst, beta, &
+       &        miome, omega, nits, levels, relax, nu1, nu2, mu, nu0, &
+       &        direct_solve_nits, rtol, atol, errtol
+!================================================================================
+!                        1.0  Prologue
+!
+!  2D process grid
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+!
+!  Read input filename from commmand line argument
+!
+  IF( command_argument_count() > 0 ) THEN
+     CALL get_command_argument(1, filein)
+  END IF
+  IF(me.EQ.0) WRITE(*,'(a,a)') 'filein = ', TRIM(filein)
+!
+!  Read problem inputs
+!
+  OPEN(unit=99, file=filein, form='formatted')
+  READ(99,in)
+  CLOSE(99)
+!
+  CALL mpi_dims_create(npes, ndims, dims, ierr)
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
+       &               ierr)
+  CALL mpi_comm_rank(comm_cart, me, ierr)
+  CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
+  CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
+  CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
+!
+  CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
+  CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
+!
+  info%comm              = comm_cart
+  info%nu1               = nu1
+  info%nu2               = nu2
+  info%mu                = mu
+  info%nu0               = nu0
+  info%levels            = levels
+  info%direct_solve_nits = direct_solve_nits
+  info%relax             = relax
+  info%omega             = omega
+!
+  IF(me.EQ.0) THEN
+     WRITE(*, in)
+  END IF
+  IF(nldebug) THEN
+     CALL hostlist(comm_cart)
+  END IF
+!================================================================================
+!                        2.0  2d Grid construction
+!
+!  Partition 2D grid
+!
+  CALL timera(0, 'Grid_construction')
+  npt_glob(1) = nx+1
+  npt_glob(2) = ny+1
+  DO i=1,ndims
+     CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
+     e(i) = s(i) + npt_loc(i) - 1
+  END DO
+  IF(nldebug) THEN
+     WRITE(*,'(a,i3.3,a,2(3i8,", "))') 'PE', me, ' coords, s,e:', &
+          &  (coords(i),s(i),e(i),i=1,ndims)
+  END IF
+!
+!   Global mesh
+!
+  dx = Lx/REAL(nx)
+  dy = Ly/REAL(ny)
+  ALLOCATE(xgrid(0:nx))
+  ALLOCATE(ygrid(0:ny))
+  xgrid = [ (i*dx, i=0,nx) ]
+  ygrid = [ (i*dy, i=0,ny) ]
+!
+!   Create grid structure
+!
+  ALLOCATE(grids(levels))
+  npoints = 9             ! Size of FD stencil
+  ALLOCATE(id(npoints,2))
+  id=RESHAPE([ 0, -1,  0,  1, -1, 1, -1, 0, 1,  & 
+               0, -1, -1, -1,  0, 0,  1, 1, 1], &
+              [npoints,2])
+  CALL create_grid(xgrid, ygrid, s, e, id, prb, grids, comm_cart)
+!
+  IF(nldebug) THEN
+     DO l=1,levels
+        WRITE(str,'(a,i0)') 'Number of local points at level ', l
+        CALL disp(TRIM(str), grids(l)%npt_loc, comm_cart)
+     END DO
+  END IF
+  CALL mpi_reduce(grids(levels)%npt_loc, npt_loc_min, 2, MPI_INTEGER, MPI_MIN, &
+       &          0, comm_cart, ierr)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2i4)') 'Minimum local npt at coarsest grid:', npt_loc_min
+  END IF
+!
+  CALL timera(1, 'Grid_construction')
+!================================================================================
+!                        3.0  FD Operator
+!
+  CALL timera(0, 'FD Operator')
+!
+  DO l=1,levels
+     CALL fdmat(grids(l),  fdense, icrosst, grids(l)%fdmat)
+     CALL ibc_fdmat(grids(l)%fdmat, prb)
+  END DO
+!
+  CALL timera(1, 'FD Operator')
+!================================================================================
+!                        4.0 RHS and exact solution at the finest grid (l=1)
+!
+!    Allocate memory
+!
+  CALL timera(0, 'RHS and exact sol')
+!
+  s0 = grids(1)%s0; e0 = grids(1)%e0
+  s  = grids(1)%s;  e  = grids(1)%e
+  g = [1,1]
+  v_exact = gvector_2d(s, e, g) ! Exact solutions   
+  errs    = gvector_2d(s, e, g) ! Disc. errors
+  resids  = gvector_2d(s, e, g) ! Residuals 
+  ALLOCATE(resid_it(0:nits))
+  ALLOCATE(rresid(0:nits))
+  ALLOCATE(err_it(0:nits))
+!
+!   Set RHS at the finest grid and impose Dirichlet/Neuman BC.
+!
+  grids(1)%f = frhs(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
+  CALL ibc_rhs(grids(1)%f, s0, e0, prb)
+!
+!   Exact solutions
+!
+  v_exact = fexact(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
+!
+  CALL timera(1, 'RHS and exact sol')
+!================================================================================
+!                        5.0  MG V-cycle iteration loop
+!
+!   Norm of A and b
+!
+  norma = norm_mat(grids(1)%fdmat, comm_cart)
+  normb = norm_vec(grids(1)%f, comm_cart)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe12.3))') 'Norm A and RHS', norma, normb
+  END IF
+!
+  grids(1)%v = 0.0d0
+  resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
+  errs  = grids(1)%v - v_exact
+  err_it(0)   = norm_vec(errs, comm_cart, root=0)
+  resid_it(0) = norm_vec(resids, comm_cart)
+  normv = norm_vec(grids(1)%v, comm_cart)
+  rresid(0) = resid_it(0)  /  ( norma*normv + normb )
+!
+  IF(me.EQ.0) THEN 
+     WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', &
+          &                           'rel. resid', 'ratio'
+     WRITE(*,'(i4,3(1pe12.3,8X))') 0, resid_it(0), err_it(0),  rresid(0)
+  END IF
+!
+  CALL timera(0, 'MG V-cycle loop')
+  t_mg = 0.0d0
+  DO it=1,nits
+     t_mg0 = mpi_wtime()
+     CALL mg(grids, info, 1)
+     t_mg = t_mg + (mpi_wtime()-t_mg0)
+     resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
+     errs  = grids(1)%v - v_exact
+     err_it(it)   = norm_vec(errs, comm_cart)
+     resid_it(it) = norm_vec(resids, comm_cart)
+     normv = norm_vec(grids(1)%v, comm_cart)
+     rresid(it)  = resid_it(it)  /  ( norma*normv + normb )
+     ratio_err   = err_it(it)/err_it(it-1)
+     ratio_resid = resid_it(it)/resid_it(it-1)
+     ratio_rresid= rresid(it)/ rresid(it-1)
+     IF(me.EQ.0) THEN 
+        WRITE(*,'(i4,3(1pe12.3,0pf8.2))') it, &
+             &   resid_it(it), ratio_resid,&
+             &   err_it(it),   ratio_err, &
+             &   rresid(it),   ratio_rresid
+     END IF
+     IF(resid_it(it) .LE. atol .OR. rresid(it) .LE. rtol .OR. &
+          &  ABS(ratio_err-1._rkind).LT.errtol) THEN 
+        nits = it
+        EXIT
+     END IF
+  END DO
+!
+  CALL timera(1, 'MG V-cycle loop')
+  CALL mpi_reduce(t_mg, t_mg_max, 1, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm_cart, ierr)
+  CALL mpi_reduce(t_mg, t_mg_min, 1, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm_cart, ierr)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,2(1pe10.3)/)') 'Minmax of MG (only) time (s):', t_mg_min, t_mg_max
+  END IF
+!================================================================================
+!                        9.0  Epilogue
+!
+  IF(nldebug) THEN
+     CALL h5file
+  END IF
+!
+  CALL timera(9, '')
+  CALL MPI_FINALIZE(ierr)
+CONTAINS
+!
+!+++
+  FUNCTION fdense(x)
+!
+!   Return density
+!
+    REAL(rkind), INTENT(in) :: x(:)
+    REAL(rkind)              :: fdense(SIZE(x))
+    fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
+  END FUNCTION fdense
+!+++
+  FUNCTION fexact(x,y)
+!
+!   Return analytical solution
+!
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind) :: fexact(SIZE(x),SIZE(y))
+    REAL(rkind) :: c
+    INTEGER :: j
+    IF(prb.EQ.'dddd') THEN
+       DO j=1,SIZE(y)
+          c = SIN(2.0d0*pi*ky*y(j)/Ly)
+          fexact(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
+       END DO
+    ELSE IF (prb.EQ.'nndd') THEN
+       DO j=1,SIZE(y)
+          c = SIN(2.0d0*pi*ky*y(j)/Ly)
+          fexact(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
+       END DO
+    END IF
+  END FUNCTION fexact
+!+++
+  FUNCTION frhs(x,y)
+!
+!   Return RHS
+!
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind) :: frhs(SIZE(x),SIZE(y))
+    REAL(rkind) :: c, s, d(SIZE(x))
+    REAL(rkind) :: corr
+    INTEGER :: j
+    corr = 1.d0+icrosst**2/4.0d0
+    d(:) = fdense(x(:))
+    IF(prb.EQ.'dddd') THEN
+       DO j=1,SIZE(y)
+          c = COS(2.0d0*pi*ky*y(j)/Ly)
+          s = SIN(2.0d0*pi*ky*y(j)/Ly)
+          frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
+               &     -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
+               &     + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
+       END DO
+    ELSE IF (prb.EQ.'nndd') THEN
+       DO j=1,SIZE(y)
+          c = COS(2.0d0*pi*ky*y(j)/Ly)
+          s = SIN(2.0d0*pi*ky*y(j)/Ly)
+          frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
+               &     +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
+               &     + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
+       END DO
+    END IF
+  END FUNCTION frhs
+!+++!
+  FUNCTION outerprod(x, y) RESULT(r)
+!
+!  outer product
+!
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind)             :: r(SIZE(x),SIZE(y))
+    INTEGER :: i, j
+    DO j=1,SIZE(y)
+       DO i=1,SIZE(x)
+          r(i,j) = x(i)*y(j)
+       END DO
+    END DO
+  END FUNCTION outerprod
+!+++
+  SUBROUTINE h5file
+!
+!  Result hdf5 file
+!
+    USE futils
+    CHARACTER(len=128) :: file='ppoisson_fd.h5'
+    INTEGER :: fid
+    CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
+    CALL attach(fid, '/', 'PRB', prb)
+    CALL attach(fid, '/', 'RELAX', relax)
+    CALL attach(fid, '/', 'NX', nx)
+    CALL attach(fid, '/', 'NY', ny)
+    CALL attach(fid, '/', 'KX', kx)
+    CALL attach(fid, '/', 'KY', ky)
+    CALL attach(fid, '/', 'LX', Lx)
+    CALL attach(fid, '/', 'LY', Ly)
+    CALL attach(fid, '/', 'ICROSST', icrosst)
+    CALL attach(fid, '/', 'BETA', beta)
+    CALL attach(fid, '/', 'MIOME', miome)
+    CALL attach(fid, '/', 'OMEGA', omega)
+    CALL attach(fid, '/', 'NITS', nits)
+    CALL attach(fid, '/', 'DIRECT_SOLVE_NITS', direct_solve_nits)
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL attach(fid, '/', 'NU1', nu1)
+    CALL attach(fid, '/', 'NU2', nu2)
+    CALL attach(fid, '/', 'NU0', nu0)
+    CALL attach(fid, '/', 'MU', mu)
+!
+    CALL putarr(fid, '/xgrid', xgrid, ionode=0)  ! only rank 0 does IO
+    CALL putarr(fid, '/ygrid', ygrid, ionode=0)  ! only rank 0 does IO
+!
+    CALL putarrnd(fid, '/f', grids(1)%f%val, (/1,2/), garea=g)
+    CALL putarrnd(fid, '/v', v_exact%val, (/1,2/), garea=g)
+    CALL putarrnd(fid, '/u', grids(1)%v%val, (/1,2/), garea=g)
+    CALL putarrnd(fid, '/errs',   errs%val,  (/1,2/), garea=(/1,1/))
+    CALL putarrnd(fid, '/resids', resids%val,(/1,2/), garea=(/1,1/))
+!
+    CALL putarr(fid, '/resid', resid_it(0:nits), ionode=0)
+    CALL putarr(fid, '/error', err_it(0:nits), ionode=0)
+!
+    CALL putmat(fid, '/MAT', grids(1)%fdmat)
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!+++
+END PROGRAM main
diff --git a/multigrid/src/stencil_mod.f90 b/multigrid/src/stencil_mod.f90
new file mode 100644
index 0000000..04095c5
--- /dev/null
+++ b/multigrid/src/stencil_mod.f90
@@ -0,0 +1,243 @@
+!>
+!> @file stencil_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE stencil
+!
+!   stencil_2d: Implement 2D stencil for matrix-less operations
+!
+!   T.M. Tran, CRPP-EPFL
+!   August 2013
+!
+  USE iso_fortran_env, ONLY : rkind => real64
+  IMPLICIT NONE
+!
+  PRIVATE
+  PUBLIC :: stencil_2d, init, vmx, putmat, laplacian, &
+       &    OPERATOR(*)
+!
+  TYPE stencil_2d
+     LOGICAL                  :: nluni
+     INTEGER, DIMENSION(2)    :: ldim, gdim
+     INTEGER, DIMENSION(2)    :: s0, e0, s, e
+     INTEGER                  :: npoints
+     INTEGER, ALLOCATABLE     :: id(:,:)
+     REAL(rkind), ALLOCATABLE :: val(:,:,:)
+  END TYPE stencil_2d
+!
+  INTERFACE init
+     MODULE PROCEDURE init_stencil_2d
+  END INTERFACE init
+  INTERFACE vmx
+     MODULE PROCEDURE vmx_stencila_2d
+     MODULE PROCEDURE vmx_stencilg_2d
+  END INTERFACE vmx
+  INTERFACE putmat
+     module procedure putmat_stencil
+  END INTERFACE putmat
+!
+  INTERFACE OPERATOR(*)
+     MODULE PROCEDURE vmx_stencila_2d
+     MODULE PROCEDURE vmx_stencilg_2d
+  END INTERFACE OPERATOR(*)
+!
+CONTAINS
+!================================================================================
+  SUBROUTINE init_stencil_2d(s, e, id, nluni, mat, comm)
+!
+!  stencil_2d constructor
+!
+    USE mpi
+    INTEGER, INTENT(in)           :: s(2), e(2) ! Bounds in each dim.
+    INTEGER, INTENT(in)           :: id(:,:)    ! Structure of stencil
+    LOGICAL, INTENT(in)           :: nluni      ! Uniform stencil
+    TYPE(stencil_2d), INTENT(out) :: mat
+    INTEGER, INTENT(in)           :: comm
+    INTEGER :: me, ndim=2, ierr
+    INTEGER :: npoints    ! Size of the stencil
+!
+    CALL mpi_comm_rank(comm, me, ierr)
+!
+    IF(id(1,1).NE.0 .AND. id(1,2).NE.0) THEN
+       IF(me.EQ.0) THEN
+          WRITE(*,*) 'INIT_STENCIL: id(1,:) should be (0,0)!'
+          CALL mpi_abort(comm, -1, ierr)
+       END IF
+    END IF
+!
+    npoints = SIZE(id,1)
+    mat%npoints = npoints
+    mat%s = s
+    mat%e = e
+    mat%nluni = nluni
+    IF(nluni) THEN
+       ALLOCATE(mat%val(1,1, 0:npoints-1))
+    ELSE
+       ALLOCATE(mat%val(s(1):e(1), s(2):e(2), 0:npoints-1))
+    END IF
+    ALLOCATE(mat%id(0:npoints-1, ndim))
+    mat%id(:,:) = id(:,:)
+    mat%val(:,:,:) = 0.0
+!
+    mat%ldim = e-s+1
+    CALL mpi_allreduce(mat%s, mat%s0, ndim, MPI_INTEGER, MPI_MIN, comm, ierr)
+    CALL mpi_allreduce(mat%e, mat%e0, ndim, MPI_INTEGER, MPI_MAX, comm, ierr)
+    mat%gdim = mat%e0 - mat%s0 + 1
+!
+  END SUBROUTINE init_stencil_2d
+!================================================================================
+  FUNCTION vmx_stencila_2d(mat, xarr) RESULT(res)
+!
+!   Return product res = mat*x, where x and res are simple arrays
+!
+    TYPE(stencil_2d), INTENT(in)         :: mat
+    REAL(rkind), ALLOCATABLE, INTENT(in) :: xarr(:,:)
+    REAL(rkind)           :: res(LBOUND(xarr,1):UBOUND(xarr,1), &
+         &                   LBOUND(xarr,2):UBOUND(xarr,2))
+    INTEGER               :: k, i, j
+    INTEGER, DIMENSION(2) :: smin, emax, d, lb, ub
+!
+    smin(:) = mat%s0(:)
+    emax(:) = mat%e0(:)
+    res = 0.0
+    DO k=0,mat%npoints-1
+       d(:) = mat%id(k,:)
+       lb = MAX(smin, smin-d, mat%s)
+       ub = MIN(emax, emax-d, mat%e)
+       DO j=lb(2),ub(2)
+          DO i=lb(1),ub(1)
+             res(i,j) = res(i,j) + mat%val(i,j,k)*xarr(i+d(1),j+d(2))
+          END DO
+       END DO
+    END DO
+  END FUNCTION vmx_stencila_2d
+!================================================================================
+  FUNCTION vmx_stencilg_2d(mat, xarr) RESULT(res)
+!
+!   Return product res= mat*x, where x and res are gvectors
+!
+    USE gvector, ONLY : gvector_2d
+    TYPE(stencil_2d), INTENT(in) :: mat
+    TYPE(gvector_2d), INTENT(in) :: xarr
+    TYPE(gvector_2d)             :: res
+    INTEGER               :: k, i, j
+    INTEGER, DIMENSION(2) :: d, s, e
+!
+    s = xarr%s
+    e = xarr%e
+    res = gvector_2d(xarr%s, xarr%e, xarr%g)
+!
+! Diagonal contributions: d(0) = (0,0)
+    DO j=s(2),e(2)
+       DO i=s(1),e(1)
+          res%val(i,j) = mat%val(i,j,0)*xarr%val(i,j)
+       END DO
+    END DO
+!
+    DO k=1,mat%npoints-1
+       d(:) = mat%id(k,:)
+       DO j=s(2),e(2)
+          DO i=s(1),e(1)
+             res%val(i,j) = res%val(i,j) + mat%val(i,j,k)*xarr%val(i+d(1),j+d(2))
+          END DO
+       END DO
+    END DO
+  END FUNCTION vmx_stencilg_2d
+!================================================================================
+  SUBROUTINE putmat_stencil(fid, label, mat, str)
+    USE futils
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(stencil_2d), INTENT(in)           :: mat
+    CHARACTER(len=*), INTENT(in), OPTIONAL :: str
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+!
+    CALL putarr(fid, TRIM(label)//'/dists', mat%id, ionode=0)
+    CALL putarrnd(fid, TRIM(label)//'/val',   mat%val, (/1,2/))
+  END SUBROUTINE putmat_stencil
+!=======================================================================
+  SUBROUTINE laplacian(dx, dy, mat)
+!
+!   Construct a Laplacian using 5-point FD discretization
+!   Assume homegeneous Dirichlet BC on all 4 faces.
+!
+    REAL(rkind), INTENT(in)         :: dx, dy
+    TYPE(stencil_2d), INTENT(inout) :: mat
+!
+    INTEGER               :: i, j, k
+    INTEGER               :: ieast, iwest, jsouth, jnorth
+    INTEGER, DIMENSION(2) :: d
+    REAL(rkind)           :: dx2inv, dy2inv
+!
+!   Assemble the stencil
+!
+    dx2inv = 1.0d0/dx**2
+    dy2inv = 1.0d0/dy**2
+!
+    mat%val(:,:,0) = -2.0d0*(dx2inv+dy2inv) ! Diagonal
+!
+    DO k=1,mat%npoints-1  ! Off diagonal
+       d = mat%id(k,:)
+       DO j=mat%s(2),mat%e(2)
+          DO i=mat%s(1),mat%e(1)
+             IF(d(1).EQ.0) THEN            ! north and south
+                mat%val(i,j,k) = dy2inv
+             ELSE IF(d(2).EQ.0) THEN       ! east and west
+                mat%val(i,j,k) = dx2inv
+             END IF
+          END DO
+       END DO
+    END DO
+!
+!   Impose Dirichlet BC on all 4 boundaries
+!
+    ieast = mat%s0(1)
+    IF(ieast .EQ. mat%s(1)) THEN   ! East boundary
+       mat%val(ieast, mat%s(2):mat%e(2), :) = 0.0
+       mat%val(ieast, mat%s(2):mat%e(2), 0) = 1.0
+    END IF
+    iwest = mat%e0(1)
+    IF(iwest .EQ. mat%e(1)) THEN   ! West boundary
+       mat%val(iwest, mat%s(2):mat%e(2), :) = 0.0
+       mat%val(iwest, mat%s(2):mat%e(2), 0) = 1.0
+    END IF
+    jsouth = mat%s0(2)
+    IF(jsouth .EQ. mat%s(2)) THEN  ! South boundary
+       mat%val(mat%s(1):mat%e(1), jsouth, :) = 0.0
+       mat%val(mat%s(1):mat%e(1), jsouth, 0) = 1.0
+    END IF
+    jnorth = mat%e0(2)
+    IF(jnorth .EQ. mat%e(2)) THEN  ! North boundary
+       mat%val(mat%s(1):mat%e(1), jnorth, :) = 0.0
+       mat%val(mat%s(1):mat%e(1), jnorth, 0) = 1.0
+    END IF
+!
+  END SUBROUTINE laplacian
+!================================================================================
+END MODULE stencil
diff --git a/multigrid/src/test_csr.f90 b/multigrid/src/test_csr.f90
new file mode 100644
index 0000000..02172b2
--- /dev/null
+++ b/multigrid/src/test_csr.f90
@@ -0,0 +1,145 @@
+!>
+!> @file test_csr.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test routines of module csr_mod
+!
+  USE multigrid
+  USE csr
+  IMPLICIT NONE
+!
+  INTEGER :: nx=8, nidbas=1, alpha=0, modem=10
+  DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0
+  LOGICAL :: nlper=.FALSE.
+  INTEGER :: ngauss, nterms
+  INTEGER :: i, j
+!
+  TYPE(grid1d)  :: gridx(1)
+  TYPE(csr_mat) :: mata
+!
+  DOUBLE PRECISION, ALLOCATABLE :: arow(:), sum_row(:)
+  DOUBLE PRECISION, ALLOCATABLE :: acol(:), sum_col(:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol(:), rhs(:)
+!
+  NAMELIST /newrun/ nx, nidbas, sigma, kmode, modem, alpha, nlper
+!----------------------------------------------------------------------------
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Set grid
+!
+  ngauss = CEILING(REAL(2*nidbas+alpha+1,8)/2.d0)
+  CALL create_grid(nx, nidbas, ngauss, alpha, gridx, nlper)
+!
+!   Create FE matrice and set BC u(0)=u(1)=0
+!
+  nterms = 3
+  CALL femat(gridx(1)%spl, mata, coefeq, nterms)
+  CALL to_mat(mata)
+  WRITE(*,'(/a,2i6)') 'rank, nnz', mata%rank, mata%nnz
+  WRITE(*,'(a/(12(1pe12.3)))')   'diag', mata%val(mata%idiag)
+  ALLOCATE(arow(mata%rank))
+  ALLOCATE(acol(mata%rank))
+  ALLOCATE(sum_row(mata%rank))
+  ALLOCATE(sum_col(mata%rank))
+  sum_col = 0.0d0
+  DO i=1,mata%rank
+     CALL getrow(mata, i, arow)
+     sum_row(i) = SUM(arow)
+     sum_col = sum_col+arow
+     IF(i.EQ.1) WRITE(*,'(/a)') 'Matrix A'
+     WRITE(*,'(12(1pe12.3))')  arow
+  END DO
+  WRITE(*,'(a/(12(1pe12.3)))')   'sum of row', sum_row
+  WRITE(*,'(a/(12(1pe12.3)))')   'sum of col', sum_col
+  DO j=1,mata%rank
+     CALL getcol(mata, j, acol)
+     sum_col(j) = SUM(acol)
+  END DO
+  WRITE(*,'(a/(12(1pe12.3)))')   'sum of col', sum_col
+!
+!   Clear and rebuild matrix
+!
+  WRITE(*,'(/a)') 'Clear and rebuild matrix ...'
+  CALL clear_mat(mata)
+  CALL femat(gridx(1)%spl, mata, coefeq, nterms)
+  WRITE(*,'(a,2i6)') 'rank, nnz', mata%rank, mata%nnz
+  DO i=1,mata%rank
+     CALL getrow(mata, i, arow)
+     WRITE(*,'(12(1pe12.3))')  arow
+  END DO
+  WRITE(*,'(a/(12(1pe12.3)))')   'diag', mata%val(mata%idiag)
+!
+!   Test VMX
+!
+  ALLOCATE(sol(mata%rank))
+  ALLOCATE(rhs(mata%rank))
+  sol = 1.0d0
+!
+  rhs = vmx(mata, sol)
+  acol = rhs-sum_row
+  WRITE(*,'(/a)') 'Test VMX ...'
+  WRITE(*,'(a/(12(1pe12.3)))') 'amat*sol', rhs  
+  WRITE(*,'(a,1pe12.3)') 'Error norm =', SQRT(DOT_PRODUCT(acol,acol))
+!
+  rhs = vmx(mata, sol, 'T')
+  acol = rhs-sum_col
+  WRITE(*,'(a/(12(1pe12.3)))') "amat'*sol", rhs
+  WRITE(*,'(a,1pe12.3)') 'Error norm =', SQRT(DOT_PRODUCT(acol,acol))
+!
+  CALL destroy(mata)
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+    SELECT CASE (alpha)
+    CASE(0)           ! Cartesian geometry
+       c(1) = 1.0d0
+       idt(1) = 1
+       idw(1) = 1
+       c(2) = sigma
+       idt(2) = 0
+       idw(2) = 0
+       c(3) = 1.0d0
+       idt(3) = 1
+       idw(3) = 0
+    CASE(1)
+       c(1) = x
+       idt(1) = 1
+       idw(1) = 1
+       c(2) = modem**2/x
+       idt(2) = 0
+       idw(2) = 0
+       c(3) = 1.0d0
+       idt(3) = 1
+       idw(3) = 0
+    CASE default
+       WRITE(*,'(a,i0,a)') 'COEFEQ: alpha ', alpha, ' not defined!'
+    END SELECT
+  END SUBROUTINE coefeq
+!----------------------------------------------------------------------------
+END PROGRAM main
diff --git a/multigrid/src/test_gvec1d.f90 b/multigrid/src/test_gvec1d.f90
new file mode 100644
index 0000000..1dd22f5
--- /dev/null
+++ b/multigrid/src/test_gvec1d.f90
@@ -0,0 +1,190 @@
+!>
+!> @file test_gvec1d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!    Test implementation of 1D vectors with arbirary
+!    vector bounds and ghost cell width.
+!
+!  T.M. Tran (09/2013)
+!
+MODULE gvector
+  USE iso_fortran_env, ONLY : rkind => real64
+  IMPLICIT NONE
+  PRIVATE
+  PUBLIC :: rkind, gvector_1d, disp, norm2, &
+       &    OPERATOR(+), OPERATOR(-), OPERATOR(*), &
+       &    ASSIGNMENT(=)
+
+  TYPE gvector_1d
+     INTEGER :: s, e, g
+     REAL(rkind), ALLOCATABLE :: val(:)
+  END TYPE gvector_1d
+
+  INTERFACE gvector_1d
+     MODULE PROCEDURE constructor
+  END INTERFACE gvector_1d
+  INTERFACE OPERATOR(+)
+     MODULE PROCEDURE add_scal
+     MODULE PROCEDURE add_vec
+  END INTERFACE OPERATOR(+)
+  INTERFACE OPERATOR(-)
+     MODULE PROCEDURE minus_vec
+     MODULE PROCEDURE substract_vec
+  END INTERFACE OPERATOR(-)
+  INTERFACE OPERATOR(*)
+     MODULE PROCEDURE scale_left
+     MODULE PROCEDURE scale_right
+  END INTERFACE OPERATOR(*)
+  INTERFACE ASSIGNMENT(=)
+     MODULE PROCEDURE from_scal
+     MODULE PROCEDURE from_vec
+  END INTERFACE ASSIGNMENT(=)
+  INTERFACE norm2
+     module procedure norm2_gvector_1d
+  END INTERFACE norm2
+
+CONTAINS
+  FUNCTION constructor(s, e, g) RESULT(res)
+    INTEGER, INTENT(in)           :: s, e
+    INTEGER, OPTIONAL, INTENT(in) :: g
+    TYPE(gvector_1d)              :: res
+    INTEGER :: lb, ub
+    res%g= 0
+    IF(PRESENT(g)) res%g=g
+    res%s=s
+    res%e=e
+    lb = res%s-res%g
+    ub = res%e+res%g
+    ALLOCATE(res%val(lb:ub))
+    res%val = -9999.0
+  END FUNCTION constructor
+
+  FUNCTION add_vec(lhs, rhs) RESULT(res)
+    TYPE(gvector_1d), INTENT(in) :: lhs, rhs
+    TYPE(gvector_1d)             :: res
+    res = gvector_1d(lhs%s, lhs%e, lhs%g)
+    res%val(res%s:res%e) = lhs%val(res%s:res%e) + rhs%val(res%s:res%e)
+  END FUNCTION add_vec
+
+  FUNCTION add_scal(lhs, rhs) RESULT(res)
+    TYPE(gvector_1d), INTENT(in) :: lhs
+    REAL(rkind), INTENT(in)      :: rhs
+    TYPE(gvector_1d)             :: res
+    res = gvector_1d(lhs%s, lhs%e, lhs%g)
+    res%val(res%s:res%e) = lhs%val(res%s:res%e) + rhs
+  END FUNCTION add_scal
+
+  FUNCTION minus_vec(this) RESULT(res)
+    TYPE(gvector_1d), INTENT(in) :: this
+    TYPE(gvector_1d)             :: res
+    res = gvector_1d(this%s, this%e, this%g)
+    res%val(res%s:res%e) = -this%val(res%s:res%e)
+  END FUNCTION minus_vec
+
+  FUNCTION substract_vec(lhs, rhs) RESULT(res)
+    TYPE(gvector_1d), INTENT(in) :: lhs, rhs
+    TYPE(gvector_1d)             :: res
+    res = gvector_1d(lhs%s, lhs%e, lhs%g)
+    res = lhs + (-rhs)
+  END FUNCTION substract_vec
+
+  FUNCTION scale_left(lhs, rhs) RESULT(res)
+    REAL(rkind), INTENT(in)      :: lhs
+    TYPE(gvector_1d), INTENT(in) :: rhs
+    TYPE(gvector_1d)             :: res
+    res = gvector_1d(rhs%s, rhs%e, rhs%g)
+    res%val(res%s:res%e) = lhs * rhs%val(res%s:res%e)
+  END FUNCTION scale_left
+
+  FUNCTION scale_right(lhs, rhs) RESULT(res)
+    TYPE(gvector_1d), INTENT(in) :: lhs
+    REAL(rkind), INTENT(in)      :: rhs
+    TYPE(gvector_1d)             :: res
+    res = gvector_1d(lhs%s, lhs%e, lhs%g)
+    res%val(res%s:res%e) = rhs * lhs%val(res%s:res%e)
+  END FUNCTION scale_right
+
+  SUBROUTINE from_vec(lhs, rhs)
+    TYPE(gvector_1d), INTENT(inout) :: lhs
+    REAL(rkind), INTENT(in)         :: rhs(:)
+    INTEGER :: n
+    n = lhs%e - lhs%s + 1
+    IF(SIZE(rhs) .NE. n) THEN
+       PRINT*, 'from_vec: sizes of rhs and lhs not equal!'
+       STOP
+    END IF
+    lhs%val(lhs%s:lhs%e) = rhs(1:n)
+  END SUBROUTINE from_vec
+
+  SUBROUTINE from_scal(lhs, rhs)
+    TYPE(gvector_1d), INTENT(inout) :: lhs
+    REAL(rkind), INTENT(in)         :: rhs
+    lhs%val(lhs%s:lhs%e) = rhs
+  END SUBROUTINE from_scal
+
+  SUBROUTINE disp(str,this)
+    CHARACTER(len=*), INTENT(in) :: str
+    TYPE(gvector_1d), INTENT(in) :: this
+    WRITE(*,'(/a,3i6)') str//': s, e, g =', this%s, this%e, this%g
+    WRITE(*,'(10(1pe12.3))') this%val
+  END SUBROUTINE disp
+
+  FUNCTION norm2_gvector_1d(this) RESULT(res)
+    TYPE(gvector_1d), INTENT(in) :: this
+    REAL(rkind)                  :: res
+    res = NORM2(this%val(this%s:this%e))
+  END FUNCTION norm2_gvector_1d
+END MODULE gvector
+
+PROGRAM main
+  USE gvector
+  IMPLICIT NONE
+  INTEGER :: s=0, e=5, g=1
+  INTEGER :: i, lb, ub
+  REAL(rkind) :: a=0.1
+  TYPE(gvector_1d) :: v1, v2, v3
+!
+  lb = s-g
+  ub = e+g
+  v1 = gvector_1d(s, e, g)
+  v1%val(s:e) = [ (i, i=s,e) ]
+  CALL disp('v1', v1)
+!
+  v2 = v1 + a*v1
+  CALL disp('v1+a*v1', v2)
+!
+  v3 = v1 - v1*a
+  CALL disp('v1-v1*a', v3)
+!
+  WRITE(*,'(a,1pe12.3)') 'norm of v1      =', NORM2(v1)
+  WRITE(*,'(a,1pe12.3)') 'norm of v1-a*v1 =', NORM2(v1-a*v1)
+!
+  v1 = 0.0d0
+  CALL disp('Should be all zero', v1)
+  v2 = [ 1.d0, 2.d0, 3.d0, 4.d0, 5.d0, 6.d0 ]
+  CALL disp('Should be (1. 2. 3. 4. 5. 6.)', v2)
+
+END PROGRAM main
+
diff --git a/multigrid/src/test_intergrid0.f90 b/multigrid/src/test_intergrid0.f90
new file mode 100644
index 0000000..7a76d32
--- /dev/null
+++ b/multigrid/src/test_intergrid0.f90
@@ -0,0 +1,230 @@
+!>
+!> @file test_intergrid0.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   intergrid transfer using *serial* multigrid module:
+!     - restriction of rhs
+!     - prolongation of sol
+!
+  USE multigrid, ONLY : grid2d, mg_info, &
+       &                get_lmax, create_grid_fd, ibc_transf, &
+       &                prolong, restrict
+  IMPLICIT NONE
+  DOUBLE PRECISION, PARAMETER :: pi=4.0d0*ATAN(1.0d0)
+  DOUBLE PRECISION :: Lx, Ly, kx, ky, icrosst, beta, miome
+  INTEGER :: nx, ny, levels
+  CHARACTER(len=4) :: prb
+  LOGICAL :: nldebug
+!
+  DOUBLE PRECISION :: dx, dy
+  DOUBLE PRECISION, ALLOCATABLE :: x(:),y(:)
+!
+  TYPE(mg_info) :: info  ! info for MG
+  TYPE(grid2d), ALLOCATABLE :: grids(:)
+!
+  INTEGER :: i, l
+!
+  NAMELIST /parameters/ prb, nx, ny, levels, Lx, Ly, kx, ky, icrosst, beta, &
+       &                miome, nldebug
+!--------------------------------------------------------------------------------
+!
+!   Default inputs
+!
+  nx=32
+  ny=32
+  levels = 2
+  kx=1
+  ky=1
+  icrosst=1.0d0
+  Lx = 1.0D0
+  Ly = 1.0D0
+  miome = 200d0
+  beta = 0d0
+  prb = 'dddd'
+  nldebug = .FALSE.
+!
+  READ(*,parameters)
+  WRITE(*,parameters)
+!
+!   Fine grid
+!
+  dx = lx/REAL(nx,8)
+  dy = ly/REAL(ny,8)
+  ALLOCATE(x(0:nx), y(0:ny))
+  x = dx * [(i,i=0,nx)]
+  y = dy * [(i,i=0,ny)]
+  WRITE(*,'(a/10(1pe12.3))') 'x =', x
+  WRITE(*,'(a/10(1pe12.3))') 'y =', y
+!
+!   Create array of grids
+!
+  levels = MIN(levels, get_lmax(nx), get_lmax(ny))
+  WRITE(*,'(a,i4)') 'Number of levels', levels
+  ALLOCATE(grids(levels))
+  info%nu1 = 1
+  info%nu2 = 1
+  info%mu = 1
+  info%nu0 = 1
+  info%levels = levels
+  info%relax = 'jac'
+  info%omega = 1
+  CALL create_grid_fd(x, y, grids, info, mat_type='cds', debug=nldebug)
+!
+!   Set BC on grid transfer matrices
+!
+  IF(prb.EQ.'dddd') CALL ibc_transf(grids,1,3) ! Direction X
+  CALL ibc_transf(grids,2,3)                   ! Direction Y
+!
+!   Define RHS at l=1, compute RHS at l=2,...,levels by "restriction".
+!
+  grids(1)%f(:,:) = frhs(grids(1)%x,grids(1)%y)
+  DO l=2,levels
+     grids(l)%f = restrict(grids(l)%matp, grids(l-1)%f)
+     grids(l)%f = 0.25d0*grids(l)%f  ! Scaling for FD
+  END DO
+!
+!   Define SOL at l=levels, compute SOL at l=levels-1,..,1 by "prolongation"
+!
+  grids(levels)%v(:,:) = fsol(grids(levels)%x,grids(levels)%y)
+  DO l=levels-1,1,-1
+     grids(l)%v = prolong(grids(l+1)%matp, grids(l+1)%v)
+  END DO
+!
+  IF(nldebug) THEN
+     DO l=1,levels
+        WRITE(*,'(a,i3)') '==== Level', l
+        WRITE(*,'(a)') 'f ='
+        DO i=0,grids(l)%n(1)
+           WRITE(*,'(10f8.3)') grids(l)%f(i,:)
+        END DO
+        WRITE(*,'(a)') 'v ='
+        DO i=0,grids(l)%n(1)
+           WRITE(*,'(10f8.3)') grids(l)%v(i,:)
+        END DO
+     END DO
+  END IF
+!
+!   Epilogue
+!
+  CALL h5file
+!--------------------------------------------------------------------------------
+CONTAINS
+!+++
+  FUNCTION fdense(x)
+!
+!   Return density
+!
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: fdense(SIZE(x))
+    fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
+  END FUNCTION fdense
+!+++
+  FUNCTION frhs(x,y)
+!
+!   Return RHS
+!
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: frhs(SIZE(x),SIZE(y))
+    DOUBLE PRECISION :: c, s, d(SIZE(x))
+    DOUBLE PRECISION :: corr
+    INTEGER :: j
+    corr = 1.d0+icrosst**2/4.0d0
+    d(:) = fdense(x(:))
+    IF(prb.EQ.'dddd') THEN
+       DO j=1,SIZE(y)
+          c = COS(2.0d0*pi*ky*y(j)/Ly)
+          s = SIN(2.0d0*pi*ky*y(j)/Ly)
+          frhs(:,j) =  -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
+               &     -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
+               &     + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
+       END DO
+    ELSE IF (prb.EQ.'nndd') THEN
+       DO j=1,SIZE(y)
+          c = COS(2.0d0*pi*ky*y(j)/Ly)
+          s = SIN(2.0d0*pi*ky*y(j)/Ly)
+          frhs(:,j) =  -4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
+               &     +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
+               &     + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
+       END DO
+    END IF
+  END FUNCTION frhs
+!+++
+  FUNCTION fsol(x,y)
+!
+!   Return analytical solution
+!
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: fsol(SIZE(x),SIZE(y))
+    DOUBLE PRECISION :: c
+    INTEGER :: j
+    IF(prb.EQ.'dddd') THEN
+       DO j=1,SIZE(y)
+          c = SIN(2.0d0*pi*ky*y(j)/Ly)
+          fsol(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
+       END DO
+    ELSE IF (prb.EQ.'nndd') THEN
+       DO j=1,SIZE(y)
+          c = SIN(2.0d0*pi*ky*y(j)/Ly)
+          fsol(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
+       END DO
+    END IF
+  END FUNCTION fsol
+!+++
+  SUBROUTINE h5file
+    USE futils
+    USE csr, ONLY : putmat
+    CHARACTER(len=128) :: file='test_intergrid0.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', nx)
+    CALL attach(fid, '/', 'NY', ny)
+    CALL attach(fid, '/', 'KX', kx)
+    CALL attach(fid, '/', 'KY', ky)
+    CALL attach(fid, '/', 'LX', Lx)
+    CALL attach(fid, '/', 'LY', Ly)
+    CALL attach(fid, '/', 'BETA', beta)
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL attach(fid, '/', 'PRB', prb)
+    CALL attach(fid, '/', 'NLDEBUG', nldebug)
+    CALL creatg(fid, '/mglevels')
+    DO l=1,levels
+       WRITE(dsname,'("/mglevels/level.",i2.2)') l
+       CALL creatg(fid, TRIM(dsname))
+       IF(l.GT.1) THEN
+          CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
+          CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
+       END IF
+       CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
+       CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
+       CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
+       CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
+    END DO
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!+++
+END PROGRAM main
diff --git a/multigrid/src/test_intergrid1.f90 b/multigrid/src/test_intergrid1.f90
new file mode 100644
index 0000000..0d0a23b
--- /dev/null
+++ b/multigrid/src/test_intergrid1.f90
@@ -0,0 +1,240 @@
+!>
+!> @file test_intergrid1.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test implementation of (parallel) matrix-free
+!
+  USE iso_fortran_env, ONLY : rkind => real64
+  USE parmg, ONLY    : grid2_type, init_restrict, coarse, get_lmax, &
+       &               exchange, prolong, restrict, disp, norm_vec
+  USE pputils2, ONLY : dist1d
+  USE gvector, ONLY  : gvector_2d,OPERATOR(-)
+  USE futils
+  USE mpi
+  IMPLICIT NONE
+!
+  INTEGER, PARAMETER :: ndims=2
+  INTEGER                   :: ierr, me, npes
+  INTEGER, DIMENSION(ndims) :: dims=[0,0]
+  INTEGER, DIMENSION(ndims) :: lmax, coords, comm1d
+  LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
+  LOGICAL                   :: reorder =.FALSE.
+  INTEGER                   :: comm_cart, comm_futils
+!
+  INTEGER           :: fin
+  CHARACTER(len=64) :: filein = 'test_intergrid0.h5'
+  CHARACTER(len=64) :: dsname
+  CHARACTER(len=4)  :: prb
+  LOGICAL           :: nldebug
+!
+  INTEGER                       :: nx, ny, levels
+  TYPE(grid2_type), ALLOCATABLE :: grids(:), new_grids(:)
+  INTEGER, DIMENSION(ndims)     :: e, s, npt_glob, npt_loc, npt_loc_min
+!
+  CHARACTER(len=64) :: str
+  REAL(rkind)       :: err
+  INTEGER           :: i, k, l
+!--------------------------------------------------------------------------------
+!                              1.0 Prologue
+!
+!   Init MPI and setup 2D grid topology
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+  CALL mpi_dims_create(npes, ndims, dims, ierr)
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
+       &               ierr)
+  CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
+  CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
+  CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
+!
+  IF( me .EQ. 0 ) WRITE(*,'(a,i3,i3)') '2d processor grid', dims
+!
+!   Get nx, ny, levels from h5 file created by test_intergrid0
+!
+  IF( command_argument_count() > 0 ) THEN
+     CALL get_command_argument(1, filein)
+  END IF
+  IF(me.EQ.0) WRITE(*,'(a,a)') 'filein = ', TRIM(filein)
+!
+  CALL mpi_comm_dup(comm_cart, comm_futils, ierr)
+  CALL openf(filein, fin, mpicomm=comm_futils)
+  CALL getatt(fin, '/', 'NX', nx, ierr)
+  CALL getatt(fin, '/', 'NY', ny, ierr)
+  CALL getatt(fin, '/', 'LEVELS', levels, ierr)
+  CALL getatt(fin, '/', 'PRB', prb, ierr)
+  CALL getatt(fin, '/', 'NLDEBUG', nldebug, ierr)
+  IF(me.EQ.0) WRITE(*,'(a,a,3i5,l3)') 'prb, nx, ny, levels: ', prb, nx, ny, &
+       &                              levels, nldebug
+!--------------------------------------------------------------------------------
+!                              2.0 Read (f,v) from h5 file
+!
+  ALLOCATE(grids(levels))
+!
+!    Partition on finest grid
+!
+  npt_glob(1) = nx+1
+  npt_glob(2) = ny+1
+  DO i=1,ndims
+     CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
+     e(i) = s(i) + npt_loc(i) - 1
+     lmax(i) = get_lmax(s(i), npt_loc(i), 1, comm1d(i))
+  END DO
+  npt_loc = e-s+1
+  IF(me.EQ.0) WRITE(*,'(a,2i4)') 'lmax', lmax
+!
+!    Partition on coaser grids
+!
+  DO l=1,levels
+     IF(l.GT.1) THEN
+        CALL coarse(s,e)
+        npt_loc = e-s+1
+        CALL mpi_allreduce(npt_loc, npt_loc_min, 2, MPI_INTEGER, &
+             &             MPI_MIN, comm_cart, ierr)
+        CALL mpi_allreduce(e, npt_glob, 2, MPI_INTEGER, MPI_MAX, &
+             &             comm_cart, ierr)
+        npt_glob = npt_glob+1
+     END IF
+     WRITE(str,'(a,i3,a)') 'Partition at level', l, ': start. index ='
+     CALL disp(TRIM(str), s, comm_cart)
+     IF(me.EQ.0) THEN 
+        WRITE(*,'(a,2i6)') 'npt_glob   ', npt_glob
+        WRITE(*,'(a,2i6)') 'npt_loc_min', npt_loc_min
+     END IF
+     grids(l)%s = s
+     grids(l)%e = e
+     grids(l)%npt = npt_glob
+     grids(l)%f = gvector_2d(s, e, [1,1])
+     grids(l)%v = gvector_2d(s, e, [1,1])
+     ALLOCATE(grids(l)%x(0:npt_glob(1)-1))  ! Global coords (x,y)
+     ALLOCATE(grids(l)%y(0:npt_glob(2)-1))
+     WRITE(dsname,'("/mglevels/level.",i2.2)') l
+     CALL getarr(fin, TRIM(dsname)//"/x", grids(l)%x)
+     CALL getarr(fin, TRIM(dsname)//"/y", grids(l)%y)
+     CALL getarrnd(fin, TRIM(dsname)//"/v", grids(l)%v%val, [1,2], garea=[1,1])
+     CALL getarrnd(fin, TRIM(dsname)//"/f", grids(l)%f%val, [1,2], garea=[1,1])
+  END DO
+!--------------------------------------------------------------------------------
+!                              3.0 Parallel intergrid transfer
+!
+  ALLOCATE(new_grids(levels))
+  CALL copy_grids(grids, new_grids)
+!
+!   Set up restriction stencil
+!
+  DO l=2,levels
+     CALL init_restrict(new_grids(l), prb, comm_cart)
+  END DO
+!
+!   Prolongation of v
+!
+  DO l=levels-1,1,-1
+     CALL exchange(comm_cart, grids(l+1)%v)
+     CALL prolong(grids(l+1)%v, new_grids(l)%v)
+     IF(nldebug) THEN
+        IF(me.EQ.0) WRITE(*,'(a)') '====='
+        DO k=0,npes
+           IF(me.EQ.k) THEN
+              s = grids(l+1)%f%s
+              e = grids(l+1)%f%e
+              WRITE(*,'(a,i2)') 'reference vbar on proc.', me
+              DO i=s(1),e(1)
+                 WRITE(*,'(10f8.3)') grids(l+1)%v%val(i,s(2):e(2))
+              END DO
+              s = grids(l)%f%s
+              e = grids(l)%f%e
+              WRITE(*,'(a,i2)') 'reference v on proc.', me
+              DO i=s(1),e(1)
+                 WRITE(*,'(10f8.3)') grids(l)%v%val(i,s(2):e(2))
+              END DO
+              WRITE(*,'(a,i2)') 'compute v on proc.', me
+              DO i=s(1),e(1)
+                 WRITE(*,'(10f8.3)') new_grids(l)%v%val(i,s(2):e(2))
+              END DO
+           END IF
+           CALL mpi_barrier(comm_cart, ierr)
+        END DO
+     END IF
+     err = norm_vec(new_grids(l)%v-grids(l)%v, comm_cart, 0)
+     IF(me.EQ.0) WRITE(*,'(a,i3,1pe12.3)') 'Error of prolongation: ', l, err
+  END DO
+!
+!   Restriction of f
+!
+  DO l=2,levels
+     CALL exchange(comm_cart, grids(l-1)%f)
+     CALL restrict(new_grids(l)%restrict_mat, grids(l-1)%f, new_grids(l)%f)
+     IF(nldebug) THEN
+        IF(me.EQ.0)  WRITE(*,'(a)') '====='
+        DO k=0,npes
+           IF(me.EQ.k) THEN
+              s = grids(l-1)%f%s
+              e = grids(l-1)%f%e
+              WRITE(*,'(a,i2)') 'reference f on proc.', me
+              DO i=s(1),e(1)
+                 WRITE(*,'(10f8.3)') grids(l-1)%f%val(i,s(2):e(2))
+              END DO
+              s = grids(l)%f%s
+              e = grids(l)%f%e
+              WRITE(*,'(a,i2)') 'reference fbar on proc.', me
+              DO i=s(1),e(1)
+                 WRITE(*,'(10f8.3)') grids(l)%f%val(i,s(2):e(2))
+              END DO
+              WRITE(*,'(a,i2)') 'compute fbar on proc.', me
+              DO i=s(1),e(1)
+                 WRITE(*,'(10f8.3)') new_grids(l)%f%val(i,s(2):e(2))
+              END DO
+           END IF
+           CALL mpi_barrier(comm_cart, ierr)
+        END DO
+     END IF
+     err = norm_vec(new_grids(l)%f-grids(l)%f, comm_cart, 0)
+     IF(me.EQ.0) WRITE(*,'(a,i3,1pe12.3)') 'Error of restriction: ', l, err
+  END DO
+!--------------------------------------------------------------------------------
+!                              9.0 Epilogue
+!
+  CALL closef(fin)  
+  CALL mpi_finalize(ierr)
+!
+CONTAINS
+  SUBROUTINE copy_grids(g1, g2)
+    TYPE(grid2_type), INTENT(in)  :: g1(:)
+    TYPE(grid2_type), INTENT(out) :: g2(:)
+    INTEGER :: l
+    DO l=1,SIZE(g1)
+       g2(l)%s = g1(l)%s
+       g2(l)%e = g1(l)%e
+       g2(l)%npt_loc = g1(l)%npt_loc
+       g2(l)%npt = g1(l)%npt
+       ALLOCATE(g2(l)%x(0:g2(l)%npt(1)-1)); g2(l)%x = g1(l)%x
+       ALLOCATE(g2(l)%y(0:g2(l)%npt(2)-1)); g2(l)%y = g1(l)%y
+       g2(l)%v = gvector_2d(g1(l)%v%s, g1(l)%v%e, g1(l)%v%g); g2(l)%v%val = g1(l)%f%val
+       g2(l)%f = gvector_2d(g1(l)%f%s, g1(l)%f%e, g1(l)%f%g); g2(l)%f%val = g1(l)%f%val
+    END DO
+  END SUBROUTINE copy_grids
+END PROGRAM main
diff --git a/multigrid/src/test_jacobi.f90 b/multigrid/src/test_jacobi.f90
new file mode 100644
index 0000000..f914535
--- /dev/null
+++ b/multigrid/src/test_jacobi.f90
@@ -0,0 +1,254 @@
+!>
+!> @file test_jacobi.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!    Test 2D  parallel Jacobi using STENCIL_2D matrix-free structure.
+!
+MODULE mod
+  USE iso_fortran_env, ONLY : rkind => real64
+  IMPLICIT NONE
+!
+  LOGICAL, PARAMETER     :: nldebug=.FALSE.
+  REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
+CONTAINS
+END MODULE mod
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+PROGRAM main
+  USE mpi
+  USE pputils2, ONLY : dist1d, exchange, norm2_vec=>ppnorm2, timera, hostlist
+  USE parmg, ONLY    : jacobi, get_resids
+  USE stencil, ONLY  : stencil_2d, init, laplacian, putmat
+  USE mod
+  IMPLICIT NONE
+!
+  INTEGER, PARAMETER :: ndims=2
+!
+  INTEGER                   :: me, neighs(4), npes, ierr
+  INTEGER, DIMENSION(ndims) :: dims=[0,0]
+  INTEGER, DIMENSION(ndims) :: coords, comm1d
+  LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
+  LOGICAL                   :: reorder =.FALSE.
+  INTEGER                   :: comm_cart
+!
+  INTEGER                   :: nx=4, ny=4    ! Number of intervals
+  INTEGER, DIMENSION(ndims) :: e, s, lb, ub, npt_glob, npt_loc
+!
+  REAL(rkind), ALLOCATABLE  :: xgrid(:), ygrid(:)
+  REAL(rkind)               :: dx, dy
+  INTEGER, DIMENSION(5,2)   :: id     ! 5-point stencil 
+  INTEGER                   :: npoints
+  TYPE(stencil_2d)          :: mat
+  INTEGER                   :: i
+!
+  REAL(rkind), ALLOCATABLE  :: f(:,:), v(:,:), u(:,:)
+  REAL(rkind), ALLOCATABLE  :: resids(:,:), errs(:,:)
+  REAL(rkind), ALLOCATABLE  :: resid_it(:), err_it(:)
+  REAL(rkind)               :: omega=1.0d0, resid
+  INTEGER                   :: it, it_skip, nits=100
+!
+  NAMELIST /in/ nx, ny, omega, nits
+!================================================================================
+!                        1.0  Prologue
+!
+!  2D process grid
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_dims_create(npes, ndims, dims, ierr)
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
+       &               ierr)
+!
+  CALL mpi_comm_rank(comm_cart, me, ierr)
+  CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
+  CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
+  CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
+!
+  CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
+  CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
+!
+  CALL hostlist(comm_cart)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,i0,a,i0/)') "Process grid: ", dims(1), " X ", dims(2)
+  END IF
+!
+!  Read problem inputs
+  IF(me.EQ.0) THEN
+     READ(*,in)
+     WRITE(*,in)
+  END IF
+!
+  CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
+  CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
+  CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, comm_cart, ierr)
+  CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
+!================================================================================
+!                        2.0  2d Grid construction
+!
+!  Partition 2D grid
+  CALL timera(0, 'Grid_construction')
+  npt_glob(1) = nx+1
+  npt_glob(2) = ny+1
+  DO i=1,ndims
+     CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
+     e(i) = s(i) + npt_loc(i) - 1
+  END DO
+  WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
+       &  (coords(i),s(i),e(i),i=1,ndims)
+!
+!   Global mesh
+  dx = 1.0d0/REAL(nx)
+  dy = 1.0d0/REAL(ny)
+  ALLOCATE(xgrid(0:nx))
+  ALLOCATE(ygrid(0:ny))
+  xgrid = [ (i*dx, i=0,nx) ]
+  ygrid = [ (i*dy, i=0,ny) ]
+  CALL timera(1, 'Grid_construction')
+!================================================================================
+!                        3.0  FD Laplacian
+!
+  CALL timera(0, 'Laplacian')
+  id=RESHAPE([ 0, -1, 0, 1, 0,  & 
+               0,  0,-1, 0, 1], &
+              [5,2])
+  npoints = 5
+  CALL init(s, e, id, .FALSE., mat, comm_cart)
+!
+  CALL laplacian(dx, dy, mat)
+  CALL timera(1, 'Laplacian')
+!================================================================================
+!                        4.0 Test parallel Jacobi with \nabla u(x,y) = f(x,y)
+!
+!   Problem definition
+!
+  s = mat%s
+  e = mat%e
+  lb = s-1
+  ub = e+1
+  ALLOCATE(f(lb(1):ub(1),lb(2):ub(2)))       ! RHS
+  ALLOCATE(v(lb(1):ub(1),lb(2):ub(2)))       ! Exact solutions
+  ALLOCATE(u(lb(1):ub(1),lb(2):ub(2)))       ! Computed solutions
+  ALLOCATE(resids(lb(1):ub(1),lb(2):ub(2)))  ! Residuals
+  ALLOCATE(errs(lb(1):ub(1),lb(2):ub(2)))    ! Errors
+  ALLOCATE(resid_it(0:nits))
+  ALLOCATE(err_it(0:nits))
+!
+  f(s(1):e(1),s(2):e(2)) = rhs(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
+  v(s(1):e(1),s(2):e(2)) = exact(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
+  CALL exchange(comm_cart, f)
+  CALL exchange(comm_cart, v)
+!
+!   Residuals of exact solutions
+  resids = get_resids(mat,v,f)
+  resid = norm2_vec(resids, comm_cart)
+!
+!   Jacobi iteration loop
+!
+  IF(me.EQ.0) WRITE(*,'(/a6,t14,a,t34,a)') 'it', 'residual norm', 'discretization error'
+  u = 0.0d0
+  CALL exchange(comm_cart, u)
+  resids = get_resids(mat,u,f)
+  errs  = u-v
+  resid_it(0) = norm2_vec(resids, comm_cart)
+  err_it(0)   = norm2_vec(errs, comm_cart)
+  it_skip = MAX(1,nits/10)
+!
+  CALL timera(0, 'Jacobi')
+  DO it=1,nits
+     CALL jacobi(mat, omega, 1, u, f)
+     CALL exchange(comm_cart, u)
+     resids = get_resids(mat,u,f)
+     errs  = u-v
+     resid_it(it) = norm2_vec(resids, comm_cart)
+     err_it(it)   = norm2_vec(errs, comm_cart)
+     IF(me.EQ.0 .AND. MODULO(it,it_skip).EQ.0 ) THEN 
+        WRITE(*,'(i6,4(1pe12.3))') it, resid_it(it), resid_it(it)/resid_it(it-1),&
+          &    err_it(it), err_it(it)/err_it(it-1)
+     END IF
+  END DO
+  CALL timera(1, 'Jacobi')
+!================================================================================
+!                        9.0  Epilogue
+  CALL h5file
+!
+  CALL timera(9, '')
+  CALL MPI_FINALIZE(ierr)
+CONTAINS
+  SUBROUTINE disp(str, arr)
+    CHARACTER(len=*), INTENT(in) :: str
+    REAL(rkind), INTENT(in)      :: arr(:,:)
+    INTEGER :: j
+    WRITE(*,'(/a)') str
+    DO j=1,SIZE(arr,2)
+       WRITE(*,'(10f8.3)') arr(:,j)
+    END DO
+  END SUBROUTINE disp
+!
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_jacobi.h5'
+    INTEGER :: fid
+    CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
+    CALL putarr(fid, '/xgrid', xgrid, ionode=0)  ! only rank 0 does IO
+    CALL putarr(fid, '/ygrid', ygrid, ionode=0)  ! only rank 0 does IO
+!
+    CALL putarrnd(fid, '/f',      f,     (/1,2/), garea=(/1,1/))
+    CALL putarrnd(fid, '/v',      v,     (/1,2/), garea=(/1,1/))
+    CALL putarrnd(fid, '/u',      u,     (/1,2/), garea=(/1,1/))
+    CALL putarrnd(fid, '/errs',   errs,  (/1,2/), garea=(/1,1/))
+    CALL putarrnd(fid, '/resids', resids,(/1,2/), garea=(/1,1/))
+!
+    CALL putarr(fid, '/resid', resid_it, ionode=0)
+    CALL putarr(fid, '/error', err_it, ionode=0)
+!
+    CALL putmat(fid, '/MAT', mat)
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!
+  FUNCTION outerprod(x, y) RESULT(r)
+!
+!  outer product
+!
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind)             :: r(SIZE(x),SIZE(y))
+    INTEGER :: i, j
+    DO j=1,SIZE(y)
+       DO i=1,SIZE(x)
+          r(i,j) = x(i)*y(j)
+       END DO
+    END DO
+  END FUNCTION outerprod
+!
+  FUNCTION rhs(x,y)
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind)             :: rhs(SIZE(x),SIZE(y))
+    rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y))
+  END FUNCTION rhs
+!
+  FUNCTION exact(x,y)
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind)             :: exact(SIZE(x),SIZE(y))
+    exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y))
+  END FUNCTION exact
+END PROGRAM main
diff --git a/multigrid/src/test_jacobig.f90 b/multigrid/src/test_jacobig.f90
new file mode 100644
index 0000000..1fdca83
--- /dev/null
+++ b/multigrid/src/test_jacobig.f90
@@ -0,0 +1,331 @@
+!>
+!> @file test_jacobig.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!    Test 2D  parallel Jacobi using STENCIL_2D matrix-free structure.
+!
+MODULE mod
+  USE iso_fortran_env, ONLY : rkind => real64
+  IMPLICIT NONE
+!
+  LOGICAL, PARAMETER     :: nldebug=.FALSE.
+  REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
+CONTAINS
+END MODULE mod
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+PROGRAM main
+  USE mpi
+  USE fdmat_mod, ONLY : fdmat, ibc_fdmat, ibc_rhs
+  USE pputils2, ONLY  : dist1d, timera, hostlist
+  USE gvector, ONLY   : gvector_2d, ASSIGNMENT(=), OPERATOR(-)
+  USE parmg, ONLY     : grid2_type, create_grid, jacobi, exchange, get_resids, norm_vec
+  USE stencil, ONLY   : stencil_2d, putmat
+  USE mod
+  IMPLICIT NONE
+!
+  INTEGER, PARAMETER :: ndims=2
+!
+  INTEGER                   :: me, neighs(4), npes, ierr
+  INTEGER, DIMENSION(ndims) :: dims=[0,0]
+  INTEGER, DIMENSION(ndims) :: coords, comm1d
+  LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
+  LOGICAL                   :: reorder =.FALSE.
+  INTEGER                   :: comm_cart
+  INTEGER, DIMENSION(ndims) :: e0, s0, e, s, npt_glob, npt_loc
+!
+  REAL(rkind), ALLOCATABLE  :: xgrid(:), ygrid(:)
+  INTEGER, ALLOCATABLE      :: id(:,:)
+  REAL(rkind)               :: dx, dy
+  INTEGER                   :: npoints ! Number of points in FD stencil
+!
+  TYPE(gvector_2d)          :: v_exact, resids, errs
+  REAL(rkind), ALLOCATABLE  :: resid_it(:), err_it(:)
+  INTEGER, DIMENSION(ndims) :: g
+  INTEGER                   :: i, it, it_skip
+!
+  INTEGER :: levels=1
+  TYPE(grid2_type), ALLOCATABLE :: grids(:)
+!
+!   Input quantities
+!
+  CHARACTER(len=4) :: prb='dddd'
+  INTEGER          :: nx=4, ny=4    ! Number of intervals
+  INTEGER          :: kx=1, ky=1
+  REAL(rkind)      :: Lx=1.0, Ly=1.0
+  REAL(rkind)      :: icrosst=1.0, beta=0.0, miome=200.0
+  REAL(rkind)      :: omega=1.0d0
+  INTEGER          :: nits=100, nu=1
+!
+  NAMELIST /in/ prb, nx, ny, kx, ky, Lx, Ly, icrosst, beta, &
+       &        miome, omega, nits, nu
+!================================================================================
+!                        1.0  Prologue
+!
+!  2D process grid
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_dims_create(npes, ndims, dims, ierr)
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
+       &               ierr)
+!
+  CALL mpi_comm_rank(comm_cart, me, ierr)
+  CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
+  CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
+  CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
+!
+  CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
+  CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
+!
+  CALL hostlist(comm_cart)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,i0,a,i0/)') "Process grid: ", dims(1), " X ", dims(2)
+  END IF
+!
+!  Read problem inputs
+!
+  IF(me.EQ.0) THEN
+     READ(*,in)
+     WRITE(*,in)
+  END IF
+  CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, comm_cart, ierr)
+  CALL mpi_bcast(kx, 1, MPI_INTEGER, 0, comm_cart, ierr)
+  call mpi_bcast(ky, 1, MPI_INTEGER, 0, comm_cart, ierr)
+  CALL mpi_bcast(icrosst, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
+  CALL mpi_bcast(Lx,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr)
+  CALL mpi_bcast(Ly,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr)
+  CALL mpi_bcast(beta,1,MPI_DOUBLE_PRECISION,0,comm_cart, ierr)
+  CALL mpi_bcast(miome, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
+  CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
+  CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
+  CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, comm_cart, ierr)
+  CALL mpi_bcast(nu, 1, MPI_INTEGER, 0, comm_cart, ierr)
+  CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, comm_cart, ierr)
+!================================================================================
+!                        2.0  2d Grid construction
+!
+!  Partition 2D grid
+!
+  CALL timera(0, 'Grid_construction')
+  npt_glob(1) = nx+1
+  npt_glob(2) = ny+1
+  DO i=1,ndims
+     CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
+     e(i) = s(i) + npt_loc(i) - 1
+  END DO
+  WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
+       &  (coords(i),s(i),e(i),i=1,ndims)
+!
+!   Global mesh
+!
+  dx = Lx/REAL(nx)
+  dy = Ly/REAL(ny)
+  ALLOCATE(xgrid(0:nx))
+  ALLOCATE(ygrid(0:ny))
+  xgrid = [ (i*dx, i=0,nx) ]
+  ygrid = [ (i*dy, i=0,ny) ]
+  CALL timera(1, 'Grid_construction')
+!
+!   Create grid structure
+!
+  ALLOCATE(grids(levels))
+  npoints = 9             ! Size of FD stencil
+  ALLOCATE(id(npoints,2))
+  id=RESHAPE([ 0, -1,  0,  1, -1, 1, -1, 0, 1,  & 
+               0, -1, -1, -1,  0, 0,  1, 1, 1], &
+              [npoints,2])
+  CALL create_grid(xgrid, ygrid, s, e, id, prb, grids, comm_cart)
+!================================================================================
+!                        3.0  FD Operator
+!
+  CALL timera(0, 'Laplacian')
+!
+  CALL fdmat(grids(1),  fdense, icrosst, grids(1)%fdmat)
+  CALL ibc_fdmat(grids(1)%fdmat, prb)
+!
+  CALL timera(1, 'Laplacian')
+!================================================================================
+!                        4.0 RHS and exact solution
+!
+!   Allocate memory
+!
+  s0 = grids(1)%s0; e0 = grids(1)%e0
+  s  = grids(1)%s;  e  = grids(1)%e
+  g = [1,1]
+  v_exact = gvector_2d(s, e, g) ! Exact solutions   
+  errs    = gvector_2d(s, e, g) ! Disc. errors
+  resids  = gvector_2d(s, e, g) ! Residuals 
+  ALLOCATE(resid_it(0:nits))
+  ALLOCATE(err_it(0:nits))
+!
+!   Set RHS at the finest grid. Impose Dirichlet BC.
+!
+  grids(1)%f = frhs(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
+  CALL ibc_rhs(grids(1)%f, s0, e0, prb)
+!
+!   Exact solutions
+!
+  v_exact = fexact(xgrid(s(1):e(1)),ygrid(s(2):e(2)))
+!================================================================================
+!                        5.0  Jacobi iteration loop
+!
+  IF(me.EQ.0) WRITE(*,'(/a6,t14,a,t34,a)') 'it', 'residual norm', 'discretization error'
+  grids(1)%v = 0.0d0
+  resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
+  errs  = grids(1)%v - v_exact
+  resid_it(0) = norm_vec(resids, comm_cart, root=0)
+  err_it(0)   = norm_vec(errs, comm_cart, root=0)
+  it_skip = MAX(1,nits/10)
+!
+  CALL timera(0, 'Jacobi')
+  DO it=1,nits
+     CALL jacobi(comm_cart, grids(1)%fdmat, omega, nu, grids(1)%v, grids(1)%f)
+     resids = get_resids(comm_cart, grids(1)%fdmat, grids(1)%v, grids(1)%f)
+     errs  = grids(1)%v - v_exact
+     resid_it(it) = norm_vec(resids, comm_cart, root=0)
+     err_it(it)   = norm_vec(errs, comm_cart, root=0)
+     IF(me.EQ.0 .AND. MODULO(it,it_skip).EQ.0 ) THEN 
+        WRITE(*,'(i6,4(1pe12.3))') it, resid_it(it), resid_it(it)/resid_it(it-1),&
+          &    err_it(it), err_it(it)/err_it(it-1)
+     END IF
+  END DO
+  CALL timera(1, 'Jacobi')
+!================================================================================
+!                        9.0  Epilogue
+  CALL h5file
+!
+  CALL timera(9, '')
+  CALL MPI_FINALIZE(ierr)
+CONTAINS
+!
+!+++
+  FUNCTION fdense(x)
+!
+!   Return density
+!
+    REAL(rkind), INTENT(in) :: x(:)
+    REAL(rkind)              :: fdense(SIZE(x))
+    fdense(:) = 0.5d0*beta*miome*EXP( -(x(:)-Lx/3d0)**2d0/(Lx/2d0)**2d0 );
+  END FUNCTION fdense
+!+++
+  FUNCTION fexact(x,y)
+!
+!   Return analytical solution
+!
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind) :: fexact(SIZE(x),SIZE(y))
+    REAL(rkind) :: c
+    INTEGER :: j
+    IF(prb.EQ.'dddd') THEN
+       DO j=1,SIZE(y)
+          c = SIN(2.0d0*pi*ky*y(j)/Ly)
+          fexact(:,j) = SIN(2.0d0*pi*kx*x(:)/Lx)*c
+       END DO
+    ELSE IF (prb.EQ.'nndd') THEN
+       DO j=1,SIZE(y)
+          c = SIN(2.0d0*pi*ky*y(j)/Ly)
+          fexact(:,j) = COS(2.0d0*pi*kx*x(:)/Lx)*c
+       END DO
+    END IF
+  END FUNCTION fexact
+!+++
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_jacobig.h5'
+    INTEGER :: fid
+    CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
+    CALL putarr(fid, '/xgrid', xgrid, ionode=0)  ! only rank 0 does IO
+    CALL putarr(fid, '/ygrid', ygrid, ionode=0)  ! only rank 0 does IO
+!
+    CALL putarrnd(fid, '/f', grids(1)%f%val, (/1,2/), garea=g)
+    CALL putarrnd(fid, '/v', v_exact%val, (/1,2/), garea=g)
+    CALL putarrnd(fid, '/u', grids(1)%v%val, (/1,2/), garea=g)
+    CALL putarrnd(fid, '/errs',   errs%val,  (/1,2/), garea=(/1,1/))
+    CALL putarrnd(fid, '/resids', resids%val,(/1,2/), garea=(/1,1/))
+!
+    CALL putarr(fid, '/resid', resid_it, ionode=0)
+    CALL putarr(fid, '/error', err_it, ionode=0)
+!
+    CALL putmat(fid, '/MAT', grids(1)%fdmat)
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!+++
+  FUNCTION frhs(x,y)
+!
+!   Return RHS
+!
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind) :: frhs(SIZE(x),SIZE(y))
+    REAL(rkind) :: c, s, d(SIZE(x))
+    REAL(rkind) :: corr
+    INTEGER :: j
+    corr = 1.d0+icrosst**2/4.0d0
+    d(:) = fdense(x(:))
+    IF(prb.EQ.'dddd') THEN
+       DO j=1,SIZE(y)
+          c = COS(2.0d0*pi*ky*y(j)/Ly)
+          s = SIN(2.0d0*pi*ky*y(j)/Ly)
+          frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * SIN(2.0d0*pi*kx*x(:)/Lx)*s &
+               &     -icrosst*kx*ky/Lx/Ly * COS(2.0d0*pi*kx*x(:)/Lx)*c ) &
+               &     + d(:) * SIN(2.0d0*pi*kx*x(:)/Lx)*s
+       END DO
+    ELSE IF (prb.EQ.'nndd') THEN
+       DO j=1,SIZE(y)
+          c = COS(2.0d0*pi*ky*y(j)/Ly)
+          s = SIN(2.0d0*pi*ky*y(j)/Ly)
+          frhs(:,j)=-4.0d0*pi*pi*( (kx*kx/Lx/Lx+corr*ky*ky/Ly/Ly) * COS(2.0d0*pi*kx*x(:)/Lx)*s &
+               &     +icrosst*kx*ky/Lx/Ly * SIN(2.0d0*pi*kx*x(:)/Lx)*c ) &
+               &     + d(:) * COS(2.0d0*pi*kx*x(:)/Lx)*s
+       END DO
+    END IF
+  END FUNCTION frhs
+!+++!
+  FUNCTION outerprod(x, y) RESULT(r)
+!
+!  outer product
+!
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind)             :: r(SIZE(x),SIZE(y))
+    INTEGER :: i, j
+    DO j=1,SIZE(y)
+       DO i=1,SIZE(x)
+          r(i,j) = x(i)*y(j)
+       END DO
+    END DO
+  END FUNCTION outerprod
+!
+  FUNCTION rhs(x,y)
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind)             :: rhs(SIZE(x),SIZE(y))
+    rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y))
+  END FUNCTION rhs
+!
+  FUNCTION exact(x,y)
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind)             :: exact(SIZE(x),SIZE(y))
+    exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y))
+  END FUNCTION exact
+END PROGRAM main
diff --git a/multigrid/src/test_mg.f90 b/multigrid/src/test_mg.f90
new file mode 100644
index 0000000..8275f7a
--- /dev/null
+++ b/multigrid/src/test_mg.f90
@@ -0,0 +1,279 @@
+!>
+!> @file test_mg.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test multigrid V-cycle
+!
+  USE multigrid
+  USE math_util, ONLY : root_bessj
+  IMPLICIT NONE
+!
+  INTEGER          :: nx=8, nidbas=1, ngauss=2, alpha=0, nits=40
+  INTEGER          :: modem=22, modep=10
+  INTEGER          :: levels=2, nu1=1, nu2=1, mu=1, nu0=1
+  CHARACTER(len=4) :: relax='jac '
+  LOGICAL          :: nlfixed = .FALSE.
+  DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
+  DOUBLE PRECISION :: omega=2.0d0/3.0d0
+  INTEGER          :: l, nrank, its
+  DOUBLE PRECISION :: errdisc_dir
+  DOUBLE PRECISION, ALLOCATABLE :: u_direct(:), u_exact(:), u_calc(:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:), sol_calc(:)
+  DOUBLE PRECISION, ALLOCATABLE :: err(:), resid(:), errdisc(:)
+  DOUBLE PRECISION, ALLOCATABLE :: errdisc_fmg(:)
+!
+  TYPE(grid1d), ALLOCATABLE :: gridx(:)
+  TYPE(mg_info)             :: info
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, alpha, &
+       &            relax, omega, nits, nlfixed, levels, nu1, nu2, mu, nu0
+!--------------------------------------------------------------------------------
+!                   1.  Prologue
+!   Inputs
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+  levels = MIN(levels, get_lmax(nx))
+!
+  info%nu1 = nu1
+  info%nu2 = nu2
+  info%mu = mu
+  info%nu0 = nu0
+  info%levels = levels
+  info%relax = relax
+  info%omega = omega
+!
+!   Create grids
+!
+  ALLOCATE(gridx(levels))
+  CALL create_grid(nx, nidbas, ngauss, alpha, gridx)
+  WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels)
+!
+!   Create FE matrice and set BC u(0)=u(1)=0
+!
+  DO l=1,levels
+     CALL femat(gridx(l)%spl, gridx(l)%mata, coefeq)
+!
+!     Left Dirichlet BC (only for Cartesian geometry)
+     IF(alpha .EQ. 0) THEN
+        CALL ibcmat(1, gridx(l)%mata)
+     END IF
+!
+!     Right Dirichlet BC
+     CALL ibcmat(gridx(l)%mata%rank, gridx(l)%mata)
+!
+!     BC on grid transfer operator
+     IF(l.GT.1) THEN
+        WHERE( ABS(gridx(l)%transf%val) < 1.d-8) gridx(l)%transf%val=0.0d0
+        IF(alpha .EQ. 0) gridx(l)%transf%val(2:,1)=0.0d0
+        gridx(l)%transf%val(1:gridx(l-1)%rank-1,gridx(l)%rank)=0.0d0
+     END IF
+  END DO
+  CALL printdiag_gb('Diagonal of coarsest A', gridx(levels)%mata)
+!
+!   Construct RHS and set BC only on the finest grid
+!
+  nrank = gridx(1)%rank
+  CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
+!
+!     Left Dirichlet BC (only for Cartesian geometry)
+  IF(alpha .EQ. 0) THEN
+     gridx(1)%f(1) = 0.0d0
+  END IF
+!
+!     Right Dirichlet BC
+  gridx(1)%f(nrank) = 0.0d0
+!--------------------------------------------------------------------------------
+!                   2.  Direct solution
+!
+  WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
+  ALLOCATE(u_direct(0:nx))
+  ALLOCATE(sol_direct(nrank))
+  CALL direct_solve(gridx(1), sol_direct)
+  CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_direct)
+  errdisc_dir = disc_err(gridx(1)%spl, sol_direct, sol)
+  WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir
+!--------------------------------------------------------------------------------
+!                   3.  Solution from MG V-cycles
+!
+  WRITE(*,'(//a)') 'Multigrid MG V-cycles'
+  ALLOCATE(sol_calc(nrank))
+  ALLOCATE(err(0:nits))
+  ALLOCATE(errdisc(0:nits))
+  ALLOCATE(resid(0:nits))
+!
+!   Initial guess
+!
+  sol_calc(:) = 0.0d0
+  IF(nlfixed) THEN
+     sol_calc(:) = sol_direct(:)
+  END IF
+  gridx(1)%v(:) = sol_calc(:)
+  err(0) = normf(gridx(1)%matm, sol_calc-sol_direct)
+  errdisc(0) = disc_err(gridx(1)%spl, sol_calc, sol)
+  resid(0) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
+!
+!   Iterations
+!
+  DO its=1,nits
+     CALL mg(gridx, info, 1)
+     sol_calc(:) = gridx(1)%v(:)
+     err(its) = normf(gridx(1)%matm, sol_calc-sol_direct)
+     errdisc(its) = disc_err(gridx(1)%spl, sol_calc, sol)
+     resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
+  END DO
+!
+  WRITE(*,'(a4,3(a12,a8))') 'its', 'error', 'ratio', 'residue', 'ratio', &
+       &                'disc. err', 'ratio'
+  WRITE(*,'(i4,3(1pe12.3,8x))') 0, err(0), resid(0), errdisc(0)
+  WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') (its, err(its), err(its)/err(its-1), &
+       &   resid(its), resid(its)/resid(its-1), &
+       &   errdisc(its), errdisc(its)/errdisc(its-1), its=1,nits)
+!--------------------------------------------------------------------------------
+!                   4.  Solution from FMG
+!
+  WRITE(*,'(//a)') 'Full Multigrid'
+
+  ALLOCATE(errdisc_fmg(nits))
+  DO its=1,nits
+     info%nu0 = its
+     CALL fmg(gridx, info, 1)
+     sol_calc(:) = gridx(1)%v(:)
+     errdisc_fmg(its) = disc_err(gridx(1)%spl, sol_calc, sol)
+     resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
+  END DO
+  WRITE(*,'(a4,2(a12,a8))') 'nu0', 'residue', 'ratio','disc. err', 'ratio'
+  WRITE(*,'((i4,2(1pe12.3,0pf8.3)))') (its, resid(its), resid(its)/resid(its-1), &
+       &     errdisc_fmg(its),errdisc_fmg(its)/errdisc_dir, its=1,nits)
+!
+!   Grid values at final iteration
+!
+  ALLOCATE(u_exact(0:nx))
+  ALLOCATE(u_calc(0:nx))
+  u_exact = sol(gridx(1)%x)
+  CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_calc)
+!--------------------------------------------------------------------------------
+!                   9.  Epilogue
+!
+!   Creata HDF5 file
+!
+  CALL h5file
+!--------------------------------------------------------------------------------
+CONTAINS
+  FUNCTION rhs(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    DOUBLE PRECISION :: rhs
+    DOUBLE PRECISION :: nump
+    SELECT CASE (alpha)
+    CASE(0)                  ! Cartesian geometry
+       rhs = SIN(pi*kmode*x)
+    CASE(1)                  ! Cylindrical
+       nump = root_bessj(modem, modep)
+       rhs = x * nump**2 * bessel_jn(modem, nump*x)
+    END SELECT
+  END FUNCTION rhs
+  FUNCTION sol(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sol(SIZE(x))
+    DOUBLE PRECISION :: nump
+    SELECT CASE (alpha)
+    CASE(0)                  ! Cartesian geometry
+       sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
+    CASE(1)                  ! Cylindrical
+       nump = root_bessj(modem, modep)
+       sol(:) = bessel_jn(modem, nump*x(:))
+    END SELECT
+  END FUNCTION sol
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+    SELECT CASE (alpha)
+    CASE(0)           ! Cartesian geometry
+       c(1) = 1.0d0
+       idt(1) = 1
+       idw(1) = 1
+       c(2) = sigma
+       idt(2) = 0
+       idw(2) = 0
+    CASE(1)           ! Cylindrical
+       c(1) = x
+       idt(1) = 1
+       idw(1) = 1
+       c(2) = REAL(modem,8)**2/x
+       idt(2) = 0
+       idw(2) = 0
+    END SELECT
+  END SUBROUTINE coefeq
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_mg.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', nx)
+    CALL attach(fid, '/', 'NIDBAS', nidbas)
+    CALL attach(fid, '/', 'SIGMA', sigma)
+    CALL attach(fid, '/', 'KMODE', kmode)
+    CALL attach(fid, '/', 'MODEM', modem)
+    CALL attach(fid, '/', 'MODEP', modep)
+    CALL attach(fid, '/', 'ALPHA', alpha)
+    CALL attach(fid, '/', 'RELAX', relax)
+    CALL attach(fid, '/', 'OMEGA', omega)
+    CALL attach(fid, '/', 'NITS', nits)
+    CALL attach(fid, '/', 'NLFIXED', nlfixed)
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL attach(fid, '/', 'NU1', nu1)
+    CALL attach(fid, '/', 'NU2', nu2)
+    CALL attach(fid, '/', 'NU0', nu0)
+    CALL attach(fid, '/', 'MU', mu)
+    CALL creatg(fid, '/mglevels')
+    DO l=1,levels
+       WRITE(dsname,'("/mglevels/level.",i2.2)') l
+       CALL creatg(fid, TRIM(dsname))
+       CALL putmat(fid, TRIM(dsname)//'/mata', gridx(l)%mata)
+       IF(l.GT.1) THEN
+          CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val)
+          CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows)
+          CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols)
+       END IF
+       CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f)
+       CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v)
+    END DO
+    CALL creatg(fid, '/Iterations')
+    CALL putarr(fid, '/Iterations/errors', err)
+    CALL putarr(fid, '/Iterations/residues', resid)
+    CALL putarr(fid, '/Iterations/disc_errors', errdisc)
+    CALL putarr(fid, '/Iterations/disc_errors_fmg', errdisc_fmg)
+    CALL putarr(fid, '/Iterations/xgrid', gridx(1)%x)
+    CALL putarr(fid, '/Iterations/u_direct', u_direct)
+    CALL putarr(fid, '/Iterations/u_exact', u_exact)
+    CALL putarr(fid, '/Iterations/u_calc', u_calc)
+    CALL closef(fid)
+  END SUBROUTINE h5file
+END PROGRAM main
diff --git a/multigrid/src/test_mg2d.f90 b/multigrid/src/test_mg2d.f90
new file mode 100644
index 0000000..4730a65
--- /dev/null
+++ b/multigrid/src/test_mg2d.f90
@@ -0,0 +1,413 @@
+!>
+!> @file test_mg2d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test 2d multigrid: Cartesian case
+!
+  USE multigrid
+  USE csr
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+!
+  DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
+  INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha
+  DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0
+  CHARACTER(len=4) :: prb='poly'
+  INTEGER :: levels=1, nu1=1, nu2=1, mu=1, nu0=1, nits
+  CHARACTER(len=4) :: relax='jac '
+  DOUBLE PRECISION :: omega=2.0d0/3.0d0, tol
+  LOGICAL :: nlfixed=.FALSE.
+  DOUBLE PRECISION :: t0, tsetup(2), tdirect, tbsolve, titer, titer_per_step
+  DOUBLE PRECISION :: resid_direct, errdisc_direct
+  DOUBLE PRECISION :: norma, normb
+!
+  DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
+  DOUBLE PRECISION :: dx, dy
+  INTEGER :: ix, iy
+!
+  DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_calc_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:)
+!
+  DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:)
+  DOUBLE PRECISION, POINTER     :: sol_direct_1d(:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
+!
+  INTEGER :: ierr, me
+  INTEGER :: l, nterms
+  INTEGER :: its
+!
+  TYPE(grid2d), ALLOCATABLE :: grids(:)
+  TYPE(mg_info)             :: info
+!
+  NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels, prb, &
+       &            nu1, nu2, mu, nu0, relax, nits, tol, nlfixed, omega
+!--------------------------------------------------------------------------------
+!                    1.   Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Inputs
+!
+  n = (/8, 8/)
+  nidbas=(/3,3/)
+  ngauss=(/2,2/)
+  alpha = (/0,0/)
+  kx=4
+  ky=3
+  sigma=10.0d0
+  levels=2
+  prb='poly'
+  nu1 = 1
+  nu2 = 1
+  mu = 1
+  nu0 = 1
+  nits = 10
+  tol = 1.e-8
+  relax = 'jac'
+  nlfixed= .FALSE.
+!
+  IF(me.EQ.0) THEN
+     READ(*,newrun)
+     WRITE(*,newrun)
+  END IF
+     CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nu1, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nu2, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(tol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+!
+!   Adjust number of levels and fill mg info.
+!
+  levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
+  info%nu1 = nu1
+  info%nu2 = nu2
+  info%mu = mu
+  info%nu0 = nu0
+  info%levels = levels
+  info%relax = relax
+  info%omega = omega
+!
+!   Create grids
+!
+  t0 = mpi_wtime()
+!
+  dx = 1.0d0/REAL(n(1),8)
+  dy = 1.0d0/REAL(n(2),8)
+  ALLOCATE(x(0:n(1)), y(0:n(2)))
+  DO ix=0,n(1)
+     x(ix) = ix*dx
+  END DO
+  DO iy=0,n(2)
+     y(iy) = iy*dy
+  END DO
+!
+  ALLOCATE(grids(levels))
+  CALL create_grid(x, y, nidbas, ngauss, alpha, grids)
+  WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
+  WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
+!
+!   Construct RHS and set BC only on the finest grid
+!
+  CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
+  CALL ibcrhs(grids(1), grids(1)%f)
+!
+!   Build FE matrices and set BC
+!
+  nterms = 3
+  DO l=1,levels
+     CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
+     CALL ibcmat(grids(l), grids(l)%mata)
+     CALL to_mat(grids(l)%mata)
+  END DO
+!
+!   Set BC on grid transfer matrices
+!
+  CALL ibc_transf(grids,1,3)
+  CALL ibc_transf(grids,2,3)
+!
+  tsetup(1) = mpi_wtime()-t0
+!
+!   Clear and rebuild FE matrices and set BC
+!
+  t0 = mpi_wtime()
+  nterms = 2
+  DO l=1,levels
+     CALL clear_mat(grids(l)%mata)
+     CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms, noinit=.TRUE.)
+     CALL ibcmat(grids(l), grids(l)%mata)
+  END DO
+  tsetup(2) = mpi_wtime()-t0
+!--------------------------------------------------------------------------------
+!                   1.  Analytical solution (at the finest grid, l=1)
+!
+  ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
+  sol_anal_grid = sol(grids(1)%x, grids(1)%y)
+!--------------------------------------------------------------------------------
+!                   2.  Direct solution (at the finest grid, l=1)
+!
+  WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
+!
+  ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
+       &         source=grids(1)%f)
+  ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
+!
+  sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
+!
+!
+  PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
+  PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
+!
+  t0 = mpi_wtime()
+  sol_direct = grids(1)%f
+  CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
+  tdirect = mpi_wtime()-t0
+!
+  t0 = mpi_wtime()
+  sol_direct = grids(1)%f
+  CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
+  resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
+  errdisc_direct = disc_err(grids(1)%spl, sol_direct, sol)
+!
+  tbsolve = mpi_wtime()-t0
+!
+  CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
+       &       [0,0], sol_direct)
+!
+  WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
+       &        errdisc_direct, resid_direct
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid)
+!
+  WRITE(*, '(a,1pe12.3)') 'Frobenius norm of A', matnorm(grids(1)%mata)
+  WRITE(*, '(a,1pe12.3)') 'Infinity norm of A ', matnorm(grids(1)%mata, 'inf')
+  WRITE(*, '(a,1pe12.3)') '1 norm of A        ', matnorm(grids(1)%mata, '1')
+!--------------------------------------------------------------------------------
+!                   3.  Test multigrid V-cycle
+!
+  WRITE(*,'(/a)') 'Multigrid MG V-cycles ...'
+  ALLOCATE(sol_calc_grid(0:n(1),0:n(2)))
+  ALLOCATE(errdisc(0:nits))
+  ALLOCATE(resid(0:nits))
+  ALLOCATE(rresid(0:nits))
+!
+!   Norm of A and b
+!
+  norma = matnorm(grids(1)%mata)
+  normb = NORM2(grids(1)%f1d)
+!
+!   Initial guess
+!
+  IF(nlfixed) THEN
+     grids(1)%v = sol_direct
+  ELSE
+     grids(1)%v = 0.0d0
+  END IF
+!
+  errdisc(0) = disc_err(grids(1)%spl, grids(1)%v, sol)
+  resid(0) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
+  rresid(0) = resid(0)  /  ( norma*NORM2(grids(1)%v1d) + normb )
+  WRITE(*,'(a4,3(a12,a8))') 'its', 'residue', 'ratio', 'disc. err', 'ratio', &
+       &                           'rel. resid', 'ratio'
+  WRITE(*,'(i4,3(1pe12.3,8x))') 0, resid(0), errdisc(0), rresid(0)
+!
+!   Iterations
+!
+  t0 = mpi_wtime()
+  DO its=1,nits
+     CALL mg(grids, info, 1)
+     errdisc(its) = disc_err(grids(1)%spl, grids(1)%v, sol)
+     resid(its) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
+     rresid(its) = resid(its)  / ( norma*NORM2(grids(1)%v1d) + normb )
+     WRITE(*,'((i4,3(1pe12.3,0pf8.2)))')  its, &
+          &   resid(its),   resid(its)/resid(its-1), &
+          &   errdisc(its), errdisc(its)/errdisc(its-1), &
+          &   rresid(its),  rresid(its)/rresid(its-1)
+     IF(resid(its) .LE. tol) EXIT
+  END DO
+  nits = MIN(nits,its)
+  titer = mpi_wtime() - t0
+  titer_per_step = titer/REAL(its,8)
+!
+  CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_calc_grid, &
+       &       [0,0], grids(1)%v)
+!--------------------------------------------------------------------------------
+!                   9.  Epilogue
+!
+!   Display timing
+!
+  WRITE(*,'(a,2(1pe12.3))') 'Set up time (s)            ', tsetup
+  WRITE(*,'(a,2(1pe12.3))') 'Direct and solve time (s)  ', tdirect, tbsolve
+  WRITE(*,'(a,1pe12.3,i5)') 'Iter time (s)              ', titer, nits
+!
+!   Creata HDF5 file
+!
+  IF(me.EQ.0) CALL h5file
+!
+  CALL mpi_finalize(ierr)
+!--------------------------------------------------------------------------------
+CONTAINS
+!+++
+  FUNCTION rhs(x, y)
+!
+! Return problem RHS
+!
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    DOUBLE PRECISION :: rhs
+    DOUBLE PRECISION :: x2, y2
+    SELECT CASE(TRIM(prb))
+    CASE('poly')
+       x2 = x*x; y2 = y*y;
+       rhs = 2.d0 * ( (1.0d0-6.d0*x2)*y2*(1.d0-y2) + &
+            &         (1.0d0-6.d0*y2)*x2*(1.d0-x2)  )
+    CASE('trig')
+       rhs = SIN(PI*kx*x)*SIN(PI*ky*y)
+    END SELECT
+  END FUNCTION rhs
+!+++
+  FUNCTION sol(x, y)
+!
+! Return exact problem solution
+!
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: sol(SIZE(x),SIZE(y))
+    DOUBLE PRECISION :: c
+    DOUBLE PRECISION  :: x2(SIZE(x)), y2(SIZE(y))
+    INTEGER :: j
+    SELECT CASE(TRIM(prb))
+    CASE('poly')
+       x2 = x*x; y2 = y*y;
+       DO j=1,SIZE(y)
+          c = y2(j)*(y2(j)-1.d0)
+          sol(:,j) = c*x2(:)*(1.0d0-x2(:))
+       END DO
+    CASE('trig')
+       DO j=1,SIZE(y)
+          c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2)
+          sol(:,j) = c * SIN(PI*kx*x(:))
+       END DO
+    END SELECT
+  END FUNCTION sol
+!+++
+  SUBROUTINE coefeq(x, y, idt, idw, c)
+!
+! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
+!
+    DOUBLE PRECISION, INTENT(in)  :: x, y
+    INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+    c(1) = 1.0d0
+    idt(1,1) = 1
+    idt(1,2) = 0
+    idw(1,1) = 1
+    idw(1,2) = 0
+!
+    c(2) = 1.0d0
+    idt(2,1) = 0
+    idt(2,2) = 1
+    idw(2,1) = 0
+    idw(2,2) = 1
+!
+    c(3) = sigma
+    idt(3,1) = 0
+    idt(3,2) = 0
+    idw(3,1) = 0
+    idw(3,2) = 0
+
+  END SUBROUTINE coefeq
+!+++
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_mg2d.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', n(1))
+    CALL attach(fid, '/', 'NY', n(2))
+    CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+    CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+    CALL attach(fid, '/', 'KX', kx)
+    CALL attach(fid, '/', 'KY', ky)
+    CALL attach(fid, '/', 'SIGMA', sigma)
+    CALL attach(fid, '/', 'ALPHA1', alpha(1))
+    CALL attach(fid, '/', 'ALPHA2', alpha(2))
+    CALL attach(fid, '/', 'RELAX', relax)
+    CALL attach(fid, '/', 'NITS', nits)
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL attach(fid, '/', 'NU1', nu1)
+    CALL attach(fid, '/', 'NU2', nu2)
+    CALL attach(fid, '/', 'NU0', nu0)
+    CALL attach(fid, '/', 'MU', mu)
+    CALL creatg(fid, '/mglevels')
+    DO l=1,levels
+       WRITE(dsname,'("/mglevels/level.",i2.2)') l
+       CALL creatg(fid, TRIM(dsname))
+       CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
+       IF(l.GT.1) THEN
+          CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
+          CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
+       END IF
+       CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
+       CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
+       CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
+       CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
+       CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
+       CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
+    END DO
+!
+!  Solutions at finest grid
+!
+    CALL creatg(fid, '/solutions')
+    CALL putarr(fid, '/solutions/xg', grids(1)%x)
+    CALL putarr(fid, '/solutions/yg', grids(1)%y)
+    CALL putarr(fid, '/solutions/anal', sol_anal_grid)
+    CALL putarr(fid, '/solutions/calc', sol_calc_grid)
+    CALL putarr(fid, '/solutions/direct', sol_direct_grid)
+!
+    CALL creatg(fid, '/Iterations')
+    CALL putarr(fid, '/Iterations/residues', resid(0:nits))
+    CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits))
+!
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!+++
+END PROGRAM
diff --git a/multigrid/src/test_mg2d_cyl.f90 b/multigrid/src/test_mg2d_cyl.f90
new file mode 100644
index 0000000..73644ec
--- /dev/null
+++ b/multigrid/src/test_mg2d_cyl.f90
@@ -0,0 +1,427 @@
+!>
+!> @file test_mg2d_cyl.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test 2d multigrid 
+!   Cylindrical case
+!
+  USE multigrid
+  USE csr
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+!
+  DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
+  INTEGER, DIMENSION(2) :: n, nidbas, ngauss
+  INTEGER :: modem=22, modep=10
+  CHARACTER(len=4) :: prb='poly'
+  INTEGER :: levels=1, nu1=1, nu2=1, mu=1, nu0=1, nits
+  CHARACTER(len=4) :: relax='jac '
+  DOUBLE PRECISION :: omega=2.0d0/3.0d0, tol, rtol
+  LOGICAL :: nluniq=.TRUE.
+  LOGICAL :: nlfixed=.FALSE.
+  DOUBLE PRECISION :: t0, tsetup(2), tdirect, tbsolve, titer, titer_per_step
+  DOUBLE PRECISION :: resid_direct, errdisc_direct
+  DOUBLE PRECISION :: norma, normb
+!
+  DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
+  DOUBLE PRECISION :: dx, dy
+  INTEGER :: ix, iy
+!
+  DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_calc_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: rresid(:), resid(:), errdisc(:)
+!
+  DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:)
+  DOUBLE PRECISION, POINTER     :: sol_direct_1d(:)
+!
+  DOUBLE PRECISION, ALLOCATABLE :: sol_direct_orig(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
+!
+  DOUBLE PRECISION, ALLOCATABLE :: sol_orig(:,:)
+!
+  INTEGER :: ierr, me
+  INTEGER :: l, nterms
+  INTEGER :: its
+!
+  TYPE(grid2d), ALLOCATABLE :: grids(:)
+  TYPE(mg_info)             :: info
+!
+  NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, prb, &
+       &            nu1, nu2, mu, nu0, relax, nits, tol, rtol, nlfixed, &
+       &            nluniq, omega
+!--------------------------------------------------------------------------------
+!                    1.   Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Inputs
+!
+  n = (/8, 8/)
+  nidbas=(/3,3/)
+  ngauss=(/2,2/)
+  modem = 22
+  modep = 10
+  prb='poly'
+  levels=2
+  nu1 = 1
+  nu2 = 1
+  mu = 1
+  nu0 = 1
+  nits = 10
+  tol = 1.e-8
+  rtol = 1.e-10
+  relax = 'jac'
+  nlfixed= .FALSE.
+  nluniq = .TRUE.
+!
+  IF(me.EQ.0) THEN
+     READ(*,newrun)
+     WRITE(*,newrun)
+  END IF
+     CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nu1, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nu2, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nu0, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(mu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(relax, 4, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(tol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(rtol, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+!
+!   Adjust number of levels and fill mg info.
+!
+  levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
+  info%nu1 = nu1
+  info%nu2 = nu2
+  info%mu = mu
+  info%nu0 = nu0
+  info%levels = levels
+  info%relax = relax
+  info%omega = omega
+!
+!   Create grids
+!
+  t0 = mpi_wtime()
+!
+  dx = 1.0d0/REAL(n(1),8)
+  dy = 2.0d0*pi/REAL(n(2),8)
+  ALLOCATE(x(0:n(1)), y(0:n(2)))
+  DO ix=0,n(1)
+     x(ix) = ix*dx
+  END DO
+  DO iy=0,n(2)
+     y(iy) = iy*dy
+  END DO
+!
+  ALLOCATE(grids(levels))
+  CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], &
+       &           debug_in=.FALSE.)
+  WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
+  WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
+!!$  CALL printmat('** Prolongation matrix in 1st dim.**', grids(2)%transf(1))
+!!$  CALL printmat('** Prolongation matrix in 2nd dim.**', grids(2)%transf(2))
+!
+!   Construct RHS and set BC only on the finest grid
+!
+  CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
+!!$  WRITE(*,'(a/(8(1pe12.3)))') 'Orig RHS at the axis',  grids(1)%f(1,1:n(2))
+  CALL ibcrhs(grids(1), grids(1)%f, nluniq_in=nluniq)
+!!$  WRITE(*,'(a/(8(1pe12.3)))') 'RHS at the axis',  grids(1)%f(1,1:n(2))
+!
+!   Build FE matrices and set BC
+!
+  nterms = 2
+  DO l=1,levels
+     CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
+     CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
+     CALL to_mat(grids(l)%mata)
+  END DO
+!
+!   Set BC on grid transfer matrices
+!
+  CALL ibc_transf(grids, 1, 2)   ! Only right boundary on r (1st dim.)
+  tsetup(1) = mpi_wtime()-t0
+!
+!   Clear and rebuild FE matrices and set BC
+!
+  t0 = mpi_wtime()
+  nterms = 2
+  DO l=1,levels
+     CALL clear_mat(grids(l)%mata)
+     CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms, noinit=.TRUE.)
+     CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
+  END DO
+  tsetup(2) = mpi_wtime()-t0
+!--------------------------------------------------------------------------------
+!                   1.  Analytical solution (at the finest grid, l=1)
+!
+  ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
+  sol_anal_grid = sol(grids(1)%x, grids(1)%y)
+!--------------------------------------------------------------------------------
+!                   2.  Direct solution (at the finest grid, l=1)
+!
+  WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
+!
+  ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
+       &         source=grids(1)%f)
+  ALLOCATE(sol_direct_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
+  ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
+!
+  sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
+!
+!
+  PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
+  PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
+!
+  t0 = mpi_wtime()
+  sol_direct = grids(1)%f
+  CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
+  tdirect = mpi_wtime()-t0
+!
+  t0 = mpi_wtime()
+  sol_direct = grids(1)%f
+  CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
+  resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
+!
+  sol_direct_orig = sol_direct
+  CALL back_transf(grids(1), sol_direct_orig, nluniq_in=nluniq)
+  errdisc_direct = disc_err(grids(1)%spl, sol_direct_orig, sol)
+!
+  tbsolve = mpi_wtime()-t0
+!
+  CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
+       &       [0,0], sol_direct_orig)
+!
+  WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
+       &        errdisc_direct, resid_direct
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid)
+!--------------------------------------------------------------------------------
+!                   3.  Test multigrid V-cycle
+!
+  WRITE(*,'(/a)') 'Multigrid MG V-cycles ...'
+  ALLOCATE(sol_calc_grid(0:n(1),0:n(2)))
+  ALLOCATE(errdisc(0:nits))
+  ALLOCATE(resid(0:nits))
+  ALLOCATE(rresid(0:nits))
+  ALLOCATE(sol_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
+!
+!   Norm of A and b
+!
+  norma = matnorm(grids(1)%mata)
+  normb = NORM2(grids(1)%f1d)
+!
+  WRITE(*, '(a,1pe12.3)') 'Frobenius norm of A', norma
+  WRITE(*, '(a,1pe12.3)') 'Infinity norm of A ', matnorm(grids(1)%mata, 'inf')
+  WRITE(*, '(a,1pe12.3)') '1 norm of A        ', matnorm(grids(1)%mata, '1')
+!
+!   Initial guess
+!
+  IF(nlfixed) THEN
+     grids(1)%v = sol_direct
+  ELSE
+     grids(1)%v = 0.0d0
+  END IF
+!
+  sol_orig(:,:) = grids(1)%v(:,:)
+  CALL back_transf(grids(1), sol_orig, nluniq_in=nluniq)
+  errdisc(0) = disc_err(grids(1)%spl, sol_orig, sol)
+!
+  resid(0) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
+  rresid(0) = resid(0)  /  ( norma*NORM2(grids(1)%v1d) + normb )
+  WRITE(*,'(a4,3(a12,a8),a12)') 'its', 'residue', 'ratio', 'disc. err', &
+       &         'ratio', 'rel. resid', 'ratio', '||v||'
+  WRITE(*,'(i4,3(1pe12.3,8x),1pe12.3)') 0, resid(0), errdisc(0), rresid(0), NORM2(grids(1)%v1d)
+!
+!   Iterations
+!
+  t0 = mpi_wtime()
+  DO its=1,nits
+     CALL mg_cyl(grids, info, 1, nluniq_in=nluniq)
+     resid(its) = residue(grids(1)%mata, grids(1)%f1d, grids(1)%v1d)
+     rresid(its) = resid(its)  / ( norma*NORM2(grids(1)%v1d) + normb )
+!
+     sol_orig(:,:) = grids(1)%v(:,:)
+     CALL back_transf(grids(1), sol_orig, nluniq_in=nluniq)
+     errdisc(its) = disc_err(grids(1)%spl, sol_orig, sol)
+!
+     WRITE(*,'((i4,3(1pe12.3,0pf8.2)),1pe12.3)')  its, &
+          &   resid(its),   resid(its)/resid(its-1), &
+          &   errdisc(its), errdisc(its)/errdisc(its-1), &
+          &   rresid(its),  rresid(its)/rresid(its-1), &
+          &   NORM2(grids(1)%v1d)
+     IF(resid(its) .LE. tol .OR. rresid(its).LE. rtol ) EXIT
+  END DO
+  nits = MIN(nits,its)
+  titer = mpi_wtime() - t0
+  titer_per_step = titer/REAL(its,8)
+!
+  CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_calc_grid, &
+       &       [0,0], sol_orig)
+!--------------------------------------------------------------------------------
+!                   9.  Epilogue
+!
+!   Display timing
+!
+  WRITE(*,'(a,2(1pe12.3))') 'Set up time (s)            ', tsetup
+  WRITE(*,'(a,2(1pe12.3))') 'Direct and solve time (s)  ', tdirect, tbsolve
+  WRITE(*,'(a,1pe12.3,i5)') 'Iter time (s)              ', titer, nits
+!
+!   Creata HDF5 file
+!
+  IF(me.EQ.0) CALL h5file
+!
+  CALL mpi_finalize(ierr)
+!--------------------------------------------------------------------------------
+CONTAINS
+!+++
+  FUNCTION rhs(r, theta)
+!
+! Return problem RHS
+!
+    USE math_util, ONLY : root_bessj
+    DOUBLE PRECISION, INTENT(in) :: r, theta
+    DOUBLE PRECISION :: rhs
+    DOUBLE PRECISION :: nump
+!
+    SELECT CASE(TRIM(prb))
+    CASE('poly')
+       rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta)
+    CASE('bess')
+       nump = root_bessj(modem, modep)
+       rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta)
+    END SELECT
+  END FUNCTION rhs
+!+++
+  FUNCTION sol(r, theta)
+!
+! Return exact problem solution
+!
+    USE math_util, ONLY : root_bessj
+    DOUBLE PRECISION, INTENT(in) :: r(:), theta(:)
+    DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta))
+    DOUBLE PRECISION :: nump
+    INTEGER :: j
+!
+    SELECT CASE(TRIM(prb))
+    CASE('poly')
+       DO j=1,SIZE(theta)
+          sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j))
+       END DO
+    CASE('bess')
+       nump = root_bessj(modem, modep)
+       DO j=1,SIZE(theta)
+          sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j))
+       END DO
+    END SELECT
+  END FUNCTION sol
+!+++
+  SUBROUTINE coefeq(r, theta, idt, idw, c)
+!
+! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
+!
+    DOUBLE PRECISION, INTENT(in)  :: r, theta
+    INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+    c(1) = r
+    idt(1,1) = 1
+    idt(1,2) = 0
+    idw(1,1) = 1
+    idw(1,2) = 0
+!
+    c(2) = 1.0d0/r
+    idt(2,1) = 0
+    idt(2,2) = 1
+    idw(2,1) = 0
+    idw(2,2) = 1
+  END SUBROUTINE coefeq
+!+++
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_mg2d_cyl.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', n(1))
+    CALL attach(fid, '/', 'NY', n(2))
+    CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+    CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+    CALL attach(fid, '/', 'MODEM', modem)
+    CALL attach(fid, '/', 'MODEP', modep)
+    CALL attach(fid, '/', 'RELAX', relax)
+    CALL attach(fid, '/', 'NITS', nits)
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL attach(fid, '/', 'NU1', nu1)
+    CALL attach(fid, '/', 'NU2', nu2)
+    CALL attach(fid, '/', 'NU0', nu0)
+    CALL attach(fid, '/', 'MU', mu)
+    CALL creatg(fid, '/mglevels')
+    DO l=1,levels
+       WRITE(dsname,'("/mglevels/level.",i2.2)') l
+       CALL creatg(fid, TRIM(dsname))
+       CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
+       IF(l.GT.1) THEN
+          CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
+          CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
+       END IF
+       CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
+       CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
+       CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
+       CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
+       CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
+       CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
+    END DO
+!
+!  Solutions at finest grid
+!
+    CALL creatg(fid, '/solutions')
+    CALL putarr(fid, '/solutions/xg', grids(1)%x)
+    CALL putarr(fid, '/solutions/yg', grids(1)%y)
+    CALL putarr(fid, '/solutions/anal', sol_anal_grid)
+    CALL putarr(fid, '/solutions/calc', sol_calc_grid)
+    CALL putarr(fid, '/solutions/direct', sol_direct_grid)
+!
+    CALL creatg(fid, '/Iterations')
+    CALL putarr(fid, '/Iterations/residues', resid(0:nits))
+    CALL putarr(fid, '/Iterations/rresidues', rresid(0:nits))
+    CALL putarr(fid, '/Iterations/disc_errors', errdisc(0:nits))
+!
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!+++
+END PROGRAM
diff --git a/multigrid/src/test_mgp.f90 b/multigrid/src/test_mgp.f90
new file mode 100644
index 0000000..26f3f24
--- /dev/null
+++ b/multigrid/src/test_mgp.f90
@@ -0,0 +1,242 @@
+!>
+!> @file test_mgp.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test multigrid V-cycle for periodic problems
+!
+  USE multigrid
+  IMPLICIT NONE
+!
+  INTEGER          :: nx=8, nidbas=1, ngauss=2, nits=40
+  INTEGER          :: levels=2, nu1=1, nu2=1, mu=1, nu0=1
+  CHARACTER(len=4) :: relax='jac '
+  LOGICAL          :: nlfixed = .FALSE.
+  DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
+  DOUBLE PRECISION :: omega=2.0d0/3.0d0
+  INTEGER          :: l, nrank, dim, its
+  DOUBLE PRECISION :: errdisc_dir
+  DOUBLE PRECISION, ALLOCATABLE :: u_direct(:), u_exact(:), u_calc(:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:), sol_calc(:), sol_grid(:)
+  DOUBLE PRECISION, ALLOCATABLE :: err(:), resid(:), errdisc(:)
+  DOUBLE PRECISION, ALLOCATABLE :: errdisc_fmg(:)
+!
+  TYPE(grid1d), ALLOCATABLE :: gridx(:)
+  TYPE(mg_info)             :: info
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, &
+       &            relax, nits, nlfixed, levels, nu1, nu2, mu, nu0
+!--------------------------------------------------------------------------------
+!                   1.  Prologue
+!   Inputs
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+  levels = MIN(levels, get_lmax(nx))
+!
+  info%nu1 = nu1
+  info%nu2 = nu2
+  info%mu = mu
+  info%nu0 = nu0
+  info%levels = levels
+  info%relax = relax
+  info%omega = omega
+!
+!   Create grids
+!
+  ALLOCATE(gridx(levels))
+  CALL create_grid(nx, nidbas, ngauss, 0, gridx, period=.TRUE.)
+  WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels)
+!
+!   Create FE matrice and set BC u(0)=u(1)=0
+!
+  DO l=1,levels
+     CALL femat(gridx(l)%spl, gridx(l)%matap, coefeq)
+  END DO
+!
+!   Construct RHS only on the finest grid
+!
+  nrank = gridx(1)%rank    !  Rank of the system (number of unknowns)
+  dim = nrank+nidbas       !  Dimension of Splines space
+  CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
+!--------------------------------------------------------------------------------
+!                   2.  Direct solution
+!
+  WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
+  ALLOCATE(u_direct(0:nx))
+  ALLOCATE(sol_direct(nrank))
+  ALLOCATE(sol_grid(dim))      ! Required by GRIDVAL
+!
+  CALL direct_solve(gridx(1), sol_direct)
+  sol_grid(1:nrank) = sol_direct(1:nrank)
+  sol_grid(nrank+1:dim) = sol_direct(1:nidbas)
+  CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_grid)
+!
+  errdisc_dir = disc_err(gridx(1)%spl, sol_grid, sol)
+  WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir
+!--------------------------------------------------------------------------------
+!                   3.  Solution from MG V-cycles
+!
+  WRITE(*,'(//a)') 'Multigrid MG V-cycles'
+  ALLOCATE(sol_calc(nrank))
+  ALLOCATE(err(0:nits))
+  ALLOCATE(errdisc(0:nits))
+  ALLOCATE(resid(0:nits))
+!
+!   Initial guess
+!
+  sol_calc(:) = 0.0d0
+  sol_grid(:) = 0.0d0
+  IF(nlfixed) THEN
+     sol_calc(:) = sol_direct(:)
+     sol_grid(1:nrank) = sol_calc(1:nrank)
+     sol_grid(nrank+1:dim) = sol_calc(1:nidbas)
+  END IF
+  gridx(1)%v(:) = sol_calc(:)
+  err(0) = normf(gridx(1)%matmp, sol_calc-sol_direct)
+  errdisc(0) = disc_err(gridx(1)%spl, sol_grid, sol)
+  resid(0) = residue(gridx(1)%matap, gridx(1)%f, sol_calc)
+!
+!   Iterations
+!
+  DO its=1,nits
+!
+     CALL mg(gridx, info, 1)
+     sol_calc(:) = gridx(1)%v(:)
+     sol_grid(1:nrank) = sol_calc(1:nrank)
+     sol_grid(nrank+1:dim) = sol_calc(1:nidbas)
+!
+     err(its) = normf(gridx(1)%matmp, sol_calc-sol_direct)
+     errdisc(its) = disc_err(gridx(1)%spl, sol_grid, sol)  ! will call GRIDVAL
+     resid(its) = residue(gridx(1)%matap, gridx(1)%f, sol_calc)
+  END DO
+!
+  WRITE(*,'(a4,3(a12,a8))') 'its', 'error', 'ratio', 'residue', 'ratio', &
+       &                'disc. err', 'ratio'
+  WRITE(*,'(i4,3(1pe12.3,8x))') 0, err(0), resid(0), errdisc(0)
+  WRITE(*,'((i4,3(1pe12.3,0pf8.2)))') (its, err(its), err(its)/err(its-1), &
+       &   resid(its), resid(its)/resid(its-1), &
+       &   errdisc(its), errdisc(its)/errdisc(its-1), its=1,nits)
+!--------------------------------------------------------------------------------
+!                   4.  Solution from FMG
+!
+  WRITE(*,'(//a)') 'Full Multigrid'
+
+  ALLOCATE(errdisc_fmg(nits))
+  DO its=1,nits
+     info%nu0 = its
+!
+     CALL fmg(gridx, info, 1)
+     sol_calc(:) = gridx(1)%v(:)
+     sol_grid(1:nrank) = sol_calc(1:nrank)
+     sol_grid(nrank+1:dim) = sol_calc(1:nidbas)
+!
+     errdisc_fmg(its) = disc_err(gridx(1)%spl, sol_grid, sol)  ! will call GRIDVAL
+     resid(its) = residue(gridx(1)%matap, gridx(1)%f, sol_calc)
+  END DO
+  WRITE(*,'(a4,2(a12,a8))') 'nu0', 'residue', 'ratio','disc. err', 'ratio'
+  WRITE(*,'((i4,2(1pe12.3,0pf8.3)))') (its, resid(its), resid(its)/resid(its-1), &
+       &     errdisc_fmg(its),errdisc_fmg(its)/errdisc_dir, its=1,nits)
+!
+!   Grid values at final iteration
+!
+  ALLOCATE(u_exact(0:nx))
+  ALLOCATE(u_calc(0:nx))
+  u_exact = sol(gridx(1)%x)
+  CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_grid)
+!--------------------------------------------------------------------------------
+!                   9.  Epilogue
+!
+!   Creata HDF5 file
+!
+  CALL h5file
+!--------------------------------------------------------------------------------
+CONTAINS
+  FUNCTION rhs(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    DOUBLE PRECISION :: rhs
+    rhs = SIN(pi*kmode*x)
+  END FUNCTION rhs
+  FUNCTION sol(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sol(SIZE(x))
+    sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
+  END FUNCTION sol
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+       c(1) = 1.0d0
+       idt(1) = 1
+       idw(1) = 1
+       c(2) = sigma
+       idt(2) = 0
+       idw(2) = 0
+  END SUBROUTINE coefeq
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_mgp.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', nx)
+    CALL attach(fid, '/', 'NIDBAS', nidbas)
+    CALL attach(fid, '/', 'SIGMA', sigma)
+    CALL attach(fid, '/', 'KMODE', kmode)
+    CALL attach(fid, '/', 'RELAX', relax)
+    CALL attach(fid, '/', 'NITS', nits)
+    CALL attach(fid, '/', 'NLFIXED', nlfixed)
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL attach(fid, '/', 'NU1', nu1)
+    CALL attach(fid, '/', 'NU2', nu2)
+    CALL attach(fid, '/', 'NU0', nu0)
+    CALL attach(fid, '/', 'MU', mu)
+    CALL creatg(fid, '/mglevels')
+    DO l=1,levels
+       WRITE(dsname,'("/mglevels/level.",i2.2)') l
+       CALL creatg(fid, TRIM(dsname))
+       CALL putarr(fid, TRIM(dsname)//'/mata', gridx(l)%matap%val)
+       IF(l.GT.1) THEN
+          CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val)
+          CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows)
+          CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols)
+       END IF
+       CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f)
+       CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v)
+    END DO
+    CALL creatg(fid, '/Iterations')
+    CALL putarr(fid, '/Iterations/errors', err)
+    CALL putarr(fid, '/Iterations/residues', resid)
+    CALL putarr(fid, '/Iterations/disc_errors', errdisc)
+    CALL putarr(fid, '/Iterations/disc_errors_fmg', errdisc_fmg)
+    CALL putarr(fid, '/Iterations/xgrid', gridx(1)%x)
+    CALL putarr(fid, '/Iterations/u_direct', u_direct)
+    CALL putarr(fid, '/Iterations/u_exact', u_exact)
+    CALL putarr(fid, '/Iterations/u_calc', u_calc)
+    CALL closef(fid)
+  END SUBROUTINE h5file
+END PROGRAM main
diff --git a/multigrid/src/test_relax.f90 b/multigrid/src/test_relax.f90
new file mode 100644
index 0000000..9d00c66
--- /dev/null
+++ b/multigrid/src/test_relax.f90
@@ -0,0 +1,227 @@
+!>
+!> @file test_relax.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test different relaxations
+!
+  USE multigrid
+  USE math_util, ONLY : root_bessj
+  IMPLICIT NONE
+!
+  INTEGER          :: nx=8, nidbas=1, alpha=0, nits=40
+  INTEGER          :: modem=22, modep=10
+  CHARACTER(len=4) :: relax='jac '
+  LOGICAL          :: nlfixed = .FALSE.
+  DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
+  DOUBLE PRECISION :: omega=2.0d0/3.0d0
+  INTEGER          :: ngauss, i, nrank, its
+  DOUBLE PRECISION, ALLOCATABLE :: u_exact(:), u_direct(:), u_calc(:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_direct(:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_calc(:), err(:), resid(:), errdisc(:)
+  DOUBLE PRECISION :: errdisc_dir
+!
+  TYPE(grid1d) :: gridx(1)
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, &
+       &            alpha, relax, omega, nits, nlfixed
+!--------------------------------------------------------------------------------
+!                   1.  Prologue: read input, construct matrix and RHS
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Set grid
+!
+  CALL create_grid(nx, nidbas, ngauss, alpha, gridx)
+!
+!   Create FE matrice and set BC u(0)=u(1)=0
+!
+  CALL femat(gridx(1)%spl, gridx(1)%mata, coefeq)
+  nrank = gridx(1)%rank
+!
+!     Left Dirichlet BC (only for Cartesian geometry)
+  IF(alpha .EQ. 0) THEN
+     CALL ibcmat(1, gridx(1)%mata)
+  END IF
+!
+!     Right Dirichlet BC
+  CALL ibcmat(nrank, gridx(1)%mata)
+!
+!   Construct RHS
+!
+  CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
+!
+!     Left Dirichlet BC (only for Cartesian geometry)
+  IF(alpha .EQ. 0) THEN
+     gridx(1)%f(1) = 0.0d0
+  END IF
+!
+!     Right Dirichlet BC
+  gridx(1)%f(nrank) = 0.0d0
+!--------------------------------------------------------------------------------
+!                   2.  Direct solution
+!
+!   Direct solutions
+!
+  ALLOCATE(sol_direct(nrank))
+  CALL direct_solve(gridx(1), sol_direct)
+!
+!  Grid values
+!
+  ALLOCATE(u_exact(0:nx))
+  ALLOCATE(u_direct(0:nx))
+  ALLOCATE(u_calc(0:nx))
+!
+  u_exact = sol(gridx(1)%x)
+  CALL gridval(gridx(1)%spl, gridx(1)%x, u_direct, 0, sol_direct)
+  errdisc_dir = disc_err(gridx(1)%spl, sol_direct, sol)
+  WRITE(*,'(a,1pe12.3)') 'Discretization error', errdisc_dir
+!--------------------------------------------------------------------------------
+!                   3.  Relaxation
+!
+  ALLOCATE(sol_calc(nrank))
+  ALLOCATE(err(0:nits))
+  ALLOCATE(errdisc(0:nits))
+  ALLOCATE(resid(0:nits))
+!
+!   Initial guess
+  sol_calc(:) = 0.0d0
+  IF(nlfixed) THEN
+     sol_calc(:) = sol_direct(:)
+  END IF
+  err(0) = normf(gridx(1)%matm, sol_calc-sol_direct)
+  errdisc(0) = disc_err(gridx(1)%spl, sol_calc, sol)
+  resid(0) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
+!
+!   Iterations
+  DO its=1,nits
+     SELECT CASE (TRIM(relax))
+     CASE('jac')
+        CALL jacobi(gridx(1)%mata, omega, 1, sol_calc, gridx(1)%f)
+     CASE('gs')
+        CALL gs(gridx(1)%mata, 1, sol_calc, gridx(1)%f)
+     END SELECT
+     err(its) = normf(gridx(1)%matm, sol_calc-sol_direct)
+     errdisc(its) = disc_err(gridx(1)%spl, sol_calc, sol)
+     resid(its) = residue(gridx(1)%mata, gridx(1)%f, sol_calc)
+  END DO
+  CALL gridval(gridx(1)%spl, gridx(1)%x, u_calc, 0, sol_calc)
+!
+  WRITE(*,'(/a4,3a12)') 'its', 'error', 'residue', 'disc. err'
+  WRITE(*,'(i4,3(1pe12.3))') 0, err(0), resid(0), errdisc(0)
+  WRITE(*,'((i4,6(1pe12.3)))') (its, err(its), resid(its), errdisc(its), &
+       &   err(its)/err(its-1), resid(its)/resid(its-1), &
+       &   errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10))
+!
+  CALL h5file
+!--------------------------------------------------------------------------------
+CONTAINS
+!+++
+  FUNCTION rhs(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    DOUBLE PRECISION :: rhs
+    DOUBLE PRECISION :: nump
+    SELECT CASE (alpha)
+    CASE(0)                  ! Cartesian geometry
+       rhs = SIN(pi*kmode*x)
+    CASE(1)                  ! Cylindrical
+       nump = root_bessj(modem, modep)
+       rhs = x * nump**2 * bessel_jn(modem, nump*x)
+    END SELECT
+  END FUNCTION rhs
+!+++
+  FUNCTION sol(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sol(SIZE(x))
+    DOUBLE PRECISION :: nump
+    SELECT CASE (alpha)
+    CASE(0)                  ! Cartesian geometry
+       sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
+    CASE(1)                  ! Cylindrical
+       nump = root_bessj(modem, modep)
+       sol(:) = bessel_jn(modem, nump*x(:))
+    END SELECT
+  END FUNCTION sol
+!+++
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+    SELECT CASE (alpha)
+    CASE(0)           ! Cartesian geometry
+       c(1) = 1.0d0
+       idt(1) = 1
+       idw(1) = 1
+       c(2) = sigma
+       idt(2) = 0
+       idw(2) = 0
+    CASE(1)           ! Cylindrical
+       c(1) = x
+       idt(1) = 1
+       idw(1) = 1
+       c(2) = REAL(modem,8)**2/x
+       idt(2) = 0
+       idw(2) = 0
+    END SELECT
+  END SUBROUTINE coefeq
+!+++
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_relax.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', nx)
+    CALL attach(fid, '/', 'NIDBAS', nidbas)
+    CALL attach(fid, '/', 'KX', kmode)
+    CALL attach(fid, '/', 'SIGMA', sigma)
+    CALL attach(fid, '/', 'ALPHA', alpha)
+    CALL attach(fid, '/', 'RELAX', relax)
+    CALL attach(fid, '/', 'OMEGA', omega)
+    CALL attach(fid, '/', 'NITS', nits)
+    CALL attach(fid, '/', 'MODEM', modem)
+    CALL attach(fid, '/', 'MODEP', modep)
+!
+!  Solutions at finest grid
+!
+    CALL creatg(fid, '/solutions')
+    CALL putarr(fid, '/solutions/xg', gridx(1)%x)
+    CALL putarr(fid, '/solutions/direct', u_direct)
+    CALL putarr(fid, '/solutions/anal', u_exact)
+    CALL putarr(fid, '/solutions/calc', u_calc)
+!
+    CALL creatg(fid, '/relaxation')
+    CALL putarr(fid, '/relaxation/errdisc', errdisc)
+    CALL putarr(fid, '/relaxation/resid', resid)
+!
+!  Store FE matrix
+!
+    CALL putmat(fid, '/MATA', gridx(1)%mata)
+!
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!+++
+END PROGRAM main
diff --git a/multigrid/src/test_relax2d.f90 b/multigrid/src/test_relax2d.f90
new file mode 100644
index 0000000..09583c5
--- /dev/null
+++ b/multigrid/src/test_relax2d.f90
@@ -0,0 +1,334 @@
+!>
+!> @file test_relax2d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test 2d direcxt solve and relaxation methods
+!
+  USE multigrid
+  USE csr
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+!
+  DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
+  INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha
+  DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0
+  INTEGER :: levels=1, nits=1000
+  CHARACTER(len=4) :: relax='jac '
+  DOUBLE PRECISION :: omega=2.0d0/3.0d0
+  DOUBLE PRECISION :: t0
+!
+  DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
+  DOUBLE PRECISION :: dx, dy
+  INTEGER :: ix, iy
+!
+  DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:), sol_relax(:,:)
+  DOUBLE PRECISION, POINTER     :: sol_direct_1d(:), sol_relax_1d(:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: resid(:), errdisc(:)
+!
+  INTEGER :: ierr, me
+  INTEGER :: l, nterms
+  INTEGER :: its
+!
+  TYPE(grid2d), ALLOCATABLE :: grids(:)
+!
+  NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels, nits, relax
+!--------------------------------------------------------------------------------
+!                    1.   Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Inputs
+!
+  n = (/8, 8/)
+  nidbas=(/3,3/)
+  ngauss=(/2,2/)
+  alpha = (/0,0/)
+  kx=4
+  ky=3
+  sigma=10.0d0
+  levels=2
+  relax='jac'
+  nits=100
+!
+  IF(me.EQ.0) THEN
+     READ(*,newrun)
+     WRITE(*,newrun)
+  END IF
+     CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(relax, LEN(relax), MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+!
+!   Adjust number of levels
+!
+  levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
+!
+!   Create grids
+!
+  dx = 1.0d0/REAL(n(1),8)
+  dy = 1.0d0/REAL(n(2),8)
+  ALLOCATE(x(0:n(1)), y(0:n(2)))
+  DO ix=0,n(1)
+     x(ix) = ix*dx
+  END DO
+  DO iy=0,n(2)
+     y(iy) = iy*dy
+  END DO
+!
+  ALLOCATE(grids(levels))
+  CALL create_grid(x, y, nidbas, ngauss, alpha, grids)
+  WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
+  WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
+!
+!   Construct RHS and set BC only on the finest grid
+!
+  CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
+  CALL ibcrhs(grids(1), grids(1)%f)
+!!$  CALL printmat('** RHS **', grids(1)%f)
+!
+!   Build FE matrices and set BC
+!
+  nterms = 3
+  DO l=1,levels
+     CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
+     CALL ibcmat(grids(l), grids(l)%mata)
+     CALL to_mat(grids(l)%mata)
+  END DO
+!--------------------------------------------------------------------------------
+!                   1.  Direct solution (at the finest grid, l=1)
+!
+  WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
+  ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
+       &         source=grids(1)%f)
+  sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
+  PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
+  PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
+!
+  t0 = mpi_wtime()
+  CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
+  WRITE(*,'(a,1pe12.3)') 'Fact. + solve time (s) =', mpi_wtime()-t0
+!
+  sol_direct = grids(1)%f
+  t0 = mpi_wtime()
+  CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
+  WRITE(*,'(a,1pe12.3)') 'Solve time (s) =', mpi_wtime()-t0
+!
+  ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
+  ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
+  CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
+       &       [0,0], sol_direct)
+  sol_anal_grid = sol(grids(1)%x, grids(1)%y)
+  WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
+       &        disc_err(grids(1)%spl, sol_direct, sol), &
+       &        residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
+!--------------------------------------------------------------------------------
+!                   2.  Relaxation (at the finest grid, l=1)
+!
+  ALLOCATE(errdisc(0:nits))
+  ALLOCATE(resid(0:nits))
+  ALLOCATE(sol_relax(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
+  sol_relax_1d(1:SIZE(grids(1)%v1d)) => sol_relax
+!
+  sol_relax_1d = 0.0d0
+  errdisc(0) = disc_err(grids(1)%spl, sol_relax, sol)
+  resid(0) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
+  t0 = mpi_wtime()
+  DO its=1,nits
+     SELECT CASE (TRIM(relax))
+     CASE('jac')
+        CALL jacobi(grids(1)%mata, omega, 1, sol_relax_1d, grids(1)%f1d)
+     CASE('gs')
+        CALL gs(grids(1)%mata, 1, sol_relax_1d, grids(1)%f1d)
+     END SELECT
+     errdisc(its) = disc_err(grids(1)%spl, sol_relax, sol)
+     resid(its) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
+  END DO
+  WRITE(*,'(a,1pe12.3)') 'Iterative solve time (s/iteration) =', (mpi_wtime()-t0)/REAL(nits,8)
+!
+  WRITE(*,'(/a4,3a12)') 'its', 'residue', 'disc. err'
+  WRITE(*,'(i4,3(1pe12.3))') 0, resid(0), errdisc(0)
+  WRITE(*,'((i4,4(1pe12.3)))') (its, resid(its), errdisc(its), &
+       &   resid(its)/resid(its-1), &
+       &   errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10))
+!--------------------------------------------------------------------------------
+!                   9.  Epilogue
+!
+!   Creata HDF5 file
+!
+  IF(me.EQ.0) CALL h5file
+!
+  CALL mpi_finalize(ierr)
+!--------------------------------------------------------------------------------
+CONTAINS
+!+++
+  FUNCTION rhs(x, y)
+!
+! Return problem RHS
+!
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    DOUBLE PRECISION :: rhs
+    rhs = SIN(PI*kx*x)*SIN(PI*ky*y)
+  END FUNCTION rhs
+!+++
+  FUNCTION sol(x, y)
+!
+! Return exact problem solution
+!
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: sol(SIZE(x),SIZE(y))
+    DOUBLE PRECISION :: c
+    INTEGER :: j
+    DO j=1,SIZE(y)
+       c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2)
+       sol(:,j) = c * SIN(PI*kx*x(:))
+    END DO
+  END FUNCTION sol
+!+++
+  SUBROUTINE coefeq(x, y, idt, idw, c)
+!
+! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
+!
+    DOUBLE PRECISION, INTENT(in)  :: x, y
+    INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+    c(1) = 1.0d0
+    idt(1,1) = 1
+    idt(1,2) = 0
+    idw(1,1) = 1
+    idw(1,2) = 0
+!
+    c(2) = 1.0d0
+    idt(2,1) = 0
+    idt(2,2) = 1
+    idw(2,1) = 0
+    idw(2,2) = 1
+!
+    c(3) = sigma
+    idt(3,1) = 0
+    idt(3,2) = 0
+    idw(3,1) = 0
+    idw(3,2) = 0
+
+  END SUBROUTINE coefeq
+!+++
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_relax2d.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', n(1))
+    CALL attach(fid, '/', 'NY', n(2))
+    CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+    CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+    CALL attach(fid, '/', 'KX', kx)
+    CALL attach(fid, '/', 'KY', ky)
+    CALL attach(fid, '/', 'SIGMA', sigma)
+    CALL attach(fid, '/', 'ALPHA1', alpha(1))
+    CALL attach(fid, '/', 'ALPHA2', alpha(2))
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL attach(fid, '/', 'RELAX', relax)
+    CALL attach(fid, '/', 'NITS', nits)
+    CALL creatg(fid, '/mglevels')
+    DO l=1,levels
+       WRITE(dsname,'("/mglevels/level.",i2.2)') l
+       CALL creatg(fid, TRIM(dsname))
+       CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
+       IF(l.GT.1) THEN
+          CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
+          CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
+       END IF
+       CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
+       CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
+       CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
+       CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
+       CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
+       CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
+    END DO
+!
+!  Solutions at finest grid
+!
+    CALL creatg(fid, '/solutions')
+    CALL putarr(fid, '/solutions/xg', grids(1)%x)
+    CALL putarr(fid, '/solutions/yg', grids(1)%y)
+    CALL putarr(fid, '/solutions/direct', sol_direct_grid)
+    CALL putarr(fid, '/solutions/anal', sol_anal_grid)
+!
+    CALL creatg(fid, '/relaxation')
+    CALL putarr(fid, '/relaxation/errdisc', errdisc)
+    CALL putarr(fid, '/relaxation/resid', resid)
+!
+    IF(ALLOCATED(grids(1)%mata%mumps)) THEN
+       CALL myputmat(fid, '/MUMPS', grids(1)%mata%mumps)
+    END IF
+!
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!+++
+  SUBROUTINE myputmat(fid, label, mat, str)
+!
+!   Write matrix to hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(mumps_mat)                        :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+    CHARACTER(len=128) :: mumps_grp
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'NNZ',  mat%nnz)
+    CALL attach(fid, label, 'NLSYM', mat%nlsym)
+    CALL attach(fid, label, 'NLPOS', mat%nlpos)
+    CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL putarr(fid, TRIM(label)//'/cols', mat%mumps_par%JCN_loc)
+    CALL putarr(fid, TRIM(label)//'/val', mat%mumps_par%A_loc)
+!
+    mumps_grp = TRIM(label)//'/mumps_par'
+    CALL creatg(fid, mumps_grp)
+    CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR)
+    CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM)
+    CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN_loc)
+!
+  END SUBROUTINE myputmat
+END PROGRAM
diff --git a/multigrid/src/test_relax2d_cyl.f90 b/multigrid/src/test_relax2d_cyl.f90
new file mode 100644
index 0000000..c02b199
--- /dev/null
+++ b/multigrid/src/test_relax2d_cyl.f90
@@ -0,0 +1,369 @@
+!>
+!> @file test_relax2d_cyl.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test 2d direcxt solve and relaxation methods
+!   Cylindrical case
+!
+  USE multigrid
+  USE csr
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+!
+  DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
+  INTEGER, DIMENSION(2) :: n, nidbas, ngauss
+  INTEGER :: modem=22, modep=10
+  INTEGER :: levels=1, nits=1000
+  CHARACTER(len=4) :: relax='jac ', prb='poly'
+  DOUBLE PRECISION :: omega=2.0d0/3.0d0
+  LOGICAL :: nluniq=.TRUE.
+  LOGICAL :: nlfixed=.FALSE.
+  DOUBLE PRECISION :: t0
+  DOUBLE PRECISION :: resid_direct, errdisc_direct
+!
+  DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
+  DOUBLE PRECISION :: dx, dy
+  INTEGER :: ix, iy
+!
+  DOUBLE PRECISION, ALLOCATABLE, TARGET :: sol_direct(:,:), sol_relax(:,:)
+  DOUBLE PRECISION, POINTER     :: sol_direct_1d(:), sol_relax_1d(:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_direct_orig(:,:), sol_relax_orig(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_relax_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: resid(:), errdisc(:)
+!
+  INTEGER :: ierr, me
+  INTEGER :: l, nterms, j
+  INTEGER :: its
+!
+  TYPE(grid2d), ALLOCATABLE :: grids(:)
+!
+  NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, omega, nits, &
+       &            relax, prb, nlfixed, nluniq
+!--------------------------------------------------------------------------------
+!                    1.   Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Inputs
+!
+  n = (/8, 8/)
+  nidbas=(/3,3/)
+  ngauss=(/2,2/)
+  modem = 22
+  modep = 10
+  levels=2
+  relax='jac'
+  prb='poly'
+  nits=100
+  nluniq = .TRUE.
+!
+  IF(me.EQ.0) THEN
+     READ(*,newrun)
+     WRITE(*,newrun)
+  END IF
+     CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nits, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(omega, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nlfixed, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(relax, LEN(relax), MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+!
+!   Adjust number of levels
+!
+  levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
+!
+!   Create grids
+!
+  dx = 1.0d0/REAL(n(1),8)
+  dy = 2.0d0*pi/REAL(n(2),8)
+  ALLOCATE(x(0:n(1)), y(0:n(2)))
+  DO ix=0,n(1)
+     x(ix) = ix*dx
+  END DO
+  DO iy=0,n(2)
+     y(iy) = iy*dy
+  END DO
+!
+  ALLOCATE(grids(levels))
+  CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], &
+       &         debug_in=.FALSE.)
+  WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
+  WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
+!
+!   Construct RHS and set BC only on the finest grid
+!
+  CALL disrhs(grids(1)%spl, grids(1)%f, rhs)
+  CALL ibcrhs(grids(1), grids(1)%f, nluniq_in=nluniq)
+!!$  CALL printmat('** RHS **', grids(1)%f)
+!
+!   Build FE matrices and set BC
+!
+  nterms = 2
+  DO l=1,levels
+     CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
+     CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
+     CALL to_mat(grids(l)%mata)
+  END DO
+!--------------------------------------------------------------------------------
+!                   1.  Direct solution (at the finest grid, l=1)
+!
+  WRITE(*,'(//a)') 'Direct solution for the finest grid problem'
+!
+  ALLOCATE(sol_direct(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)), &
+       &         source=grids(1)%f)
+  sol_direct_1d(1:SIZE(grids(1)%v1d)) => sol_direct
+!
+  ALLOCATE(sol_direct_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
+!
+  PRINT*, 'shape of sol_direct', SHAPE(sol_direct)
+  PRINT*, 'shape of sol_direct_1d', SHAPE(sol_direct_1d)
+!
+  t0 = mpi_wtime()
+  CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
+  WRITE(*,'(a,1pe12.3)') 'Fact. + solve time (s) =', mpi_wtime()-t0
+!
+  sol_direct = grids(1)%f
+  t0 = mpi_wtime()
+  CALL direct_solve(grids(1), sol_direct_1d, debug=.FALSE.)
+  resid_direct = residue(grids(1)%mata, grids(1)%f1d, sol_direct_1d)
+!
+  sol_direct_orig = sol_direct
+  CALL back_transf(grids(1), sol_direct_orig, nluniq_in=nluniq)
+  errdisc_direct = disc_err(grids(1)%spl, sol_direct_orig, sol)
+!
+  WRITE(*,'(a,1pe12.3)') 'Solve time (s) =', mpi_wtime()-t0
+!
+  ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
+  ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
+!
+  CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
+       &       [0,0], sol_direct_orig)
+!
+  sol_anal_grid = sol(grids(1)%x, grids(1)%y)
+  WRITE(*,'(a,2(1pe12.3))') 'Discretization error and residue =', &
+       &        errdisc_direct, resid_direct
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    NORM2(sol_anal_grid-sol_direct_grid) / NORM2(sol_anal_grid)
+!--------------------------------------------------------------------------------
+!                   2.  Relaxation (at the finest grid, l=1)
+!
+  ALLOCATE(errdisc(0:nits))
+  ALLOCATE(resid(0:nits))
+  ALLOCATE(sol_relax(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
+  ALLOCATE(sol_relax_orig(SIZE(grids(1)%v,1), SIZE(grids(1)%v,2)))
+  sol_relax_1d(1:SIZE(grids(1)%v1d)) => sol_relax
+!
+!   Initial guess
+!
+  IF(nlfixed) THEN
+     sol_relax = sol_direct    ! Test fixed point\
+  ELSE
+     sol_relax = 0.0d0
+  END IF
+!
+  sol_relax_orig = sol_relax
+  CALL back_transf(grids(1), sol_relax_orig, nluniq_in=nluniq)
+  errdisc(0) = disc_err(grids(1)%spl, sol_relax_orig, sol)
+  resid(0) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
+!
+  t0 = mpi_wtime()
+  DO its=1,nits
+     SELECT CASE (TRIM(relax))
+     CASE('jac')
+        CALL jacobi(grids(1)%mata, omega, 1, sol_relax_1d, grids(1)%f1d)
+     CASE('gs')
+        CALL gs(grids(1)%mata, 1, sol_relax_1d, grids(1)%f1d)
+     END SELECT
+     resid(its) = residue(grids(1)%mata, grids(1)%f1d, sol_relax_1d)
+!
+     sol_relax_orig = sol_relax
+     CALL back_transf(grids(1), sol_relax_orig, nluniq_in=nluniq)
+!
+     errdisc(its) = disc_err(grids(1)%spl, sol_relax_orig, sol)
+  END DO
+  WRITE(*,'(a,1pe12.3)') 'Iterative solve time (s/iteration) =', (mpi_wtime()-t0)/REAL(nits,8)
+!
+  ALLOCATE(sol_relax_grid(0:n(1),0:n(2)))
+  CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_relax_grid, &
+       &       [0,0], sol_relax_orig)
+!
+  WRITE(*,'(/a4,3a12)') 'its', 'residue', 'disc. err'
+  WRITE(*,'(i4,3(1pe12.3))') 0, resid(0), errdisc(0)
+  WRITE(*,'((i4,4(1pe12.3)))') (its, resid(its), errdisc(its), &
+       &   resid(its)/resid(its-1), &
+       &   errdisc(its)/errdisc(its-1), its=1,nits,MAX(1,nits/10))
+!--------------------------------------------------------------------------------
+!                   9.  Epilogue
+!
+!   Creata HDF5 file
+!
+  IF(me.EQ.0) CALL h5file
+!
+  CALL mpi_finalize(ierr)
+!--------------------------------------------------------------------------------
+CONTAINS
+!+++
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+!+++
+  FUNCTION rhs(r, theta)
+!
+! Return problem RHS
+!
+    USE math_util, ONLY : root_bessj
+    DOUBLE PRECISION, INTENT(in) :: r, theta
+    DOUBLE PRECISION :: rhs
+    DOUBLE PRECISION :: nump
+!
+    SELECT CASE(TRIM(prb))
+    CASE('poly')
+       rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta)
+    CASE('bess')
+       nump = root_bessj(modem, modep)
+       rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta)
+    END SELECT
+  END FUNCTION rhs
+!+++
+  FUNCTION sol(r, theta)
+!
+! Return exact problem solution
+!
+    USE math_util, ONLY : root_bessj
+    DOUBLE PRECISION, INTENT(in) :: r(:), theta(:)
+    DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta))
+    DOUBLE PRECISION :: nump
+    INTEGER :: j
+!
+    SELECT CASE(TRIM(prb))
+    CASE('poly')
+       DO j=1,SIZE(theta)
+          sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j))
+       END DO
+    CASE('bess')
+       nump = root_bessj(modem, modep)
+       DO j=1,SIZE(theta)
+          sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j))
+       END DO
+    END SELECT
+  END FUNCTION sol
+!+++
+  SUBROUTINE coefeq(r, theta, idt, idw, c)
+!
+! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
+!
+    DOUBLE PRECISION, INTENT(in)  :: r, theta
+    INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+    c(1) = r
+    idt(1,1) = 1
+    idt(1,2) = 0
+    idw(1,1) = 1
+    idw(1,2) = 0
+!
+    c(2) = 1.0d0/r
+    idt(2,1) = 0
+    idt(2,2) = 1
+    idw(2,1) = 0
+    idw(2,2) = 1
+  END SUBROUTINE coefeq
+!+++
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_relax2d_cyl.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', n(1))
+    CALL attach(fid, '/', 'NY', n(2))
+    CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+    CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+    CALL attach(fid, '/', 'MODEM', modem)
+    CALL attach(fid, '/', 'MODEP', modep)
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL attach(fid, '/', 'RELAX', relax)
+    CALL attach(fid, '/', 'NITS', nits)
+    CALL attach(fid, '/', 'OMEGA', omega)
+    CALL creatg(fid, '/mglevels')
+    DO l=1,levels
+       WRITE(dsname,'("/mglevels/level.",i2.2)') l
+       CALL creatg(fid, TRIM(dsname))
+       CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
+       IF(l.GT.1) THEN
+          CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
+          CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
+       END IF
+       CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
+       CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
+       CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
+       CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
+       CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
+       CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
+    END DO
+!
+!  Solutions at finest grid
+!
+    CALL creatg(fid, '/solutions')
+    CALL putarr(fid, '/solutions/xg', grids(1)%x)
+    CALL putarr(fid, '/solutions/yg', grids(1)%y)
+    CALL putarr(fid, '/solutions/direct', sol_direct_grid)
+    CALL putarr(fid, '/solutions/relax', sol_relax_grid)
+    CALL putarr(fid, '/solutions/anal', sol_anal_grid)
+!
+    CALL creatg(fid, '/relaxation')
+    CALL putarr(fid, '/relaxation/errdisc', errdisc)
+    CALL putarr(fid, '/relaxation/resid', resid)
+!
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!+++
+END PROGRAM
diff --git a/multigrid/src/test_stencil.f90 b/multigrid/src/test_stencil.f90
new file mode 100644
index 0000000..e1a0dc0
--- /dev/null
+++ b/multigrid/src/test_stencil.f90
@@ -0,0 +1,238 @@
+!>
+!> @file test_stencil.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE mod
+  USE iso_fortran_env, ONLY : real64
+  IMPLICIT NONE
+!
+  INTEGER, PARAMETER     :: rkind = real64
+  LOGICAL, PARAMETER     :: nldebug=.FALSE.
+  REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
+CONTAINS
+END MODULE mod
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+PROGRAM main
+  USE mpi
+  USE pputils2, ONLY : dist1d, exchange, norm2_vec=>ppnorm2
+  USE stencil, ONLY  : stencil_2d, init, laplacian, vmx, putmat
+  USE mod
+  IMPLICIT NONE
+!
+  INTEGER, PARAMETER :: ndims=2
+!
+  INTEGER                   :: me, neighs(4), npes, ierr
+  INTEGER, DIMENSION(ndims) :: dims=[0,0]
+  INTEGER, DIMENSION(ndims) :: coords, comm1d
+  LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
+  LOGICAL                   :: reorder =.FALSE.
+  INTEGER                   :: comm_cart
+!
+  INTEGER                   :: nx=4, ny=4    ! Number of intervals
+  INTEGER, DIMENSION(ndims) :: e, s, lb, ub, npt_glob, npt_loc
+!
+  REAL(rkind), ALLOCATABLE  :: xgrid(:), ygrid(:)
+  REAL(rkind), ALLOCATABLE  :: arr(:,:), fexact(:,:)
+  REAL(rkind), ALLOCATABLE  :: barr1(:,:), barr2(:,:), barr3(:,:)
+  REAL(rkind)               :: dx, dy
+  REAL(rkind)               :: err
+  INTEGER, DIMENSION(5,2)   :: id       ! 5-point stencil 
+  INTEGER                   :: npoints
+  TYPE(stencil_2d)          :: mat
+  INTEGER                   :: i, j
+!
+  NAMELIST /in/ nx, ny
+!================================================================================
+!                        1.0  Prologue
+!
+!  2D process grid
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_dims_create(npes, ndims, dims, ierr)
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
+       &               ierr)
+!
+  CALL mpi_comm_rank(comm_cart, me, ierr)
+  CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
+  CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
+  CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
+!
+  CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
+  CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
+!
+!  Read problem inputs
+  IF(me.EQ.0) THEN
+     READ(*,in)
+     WRITE(*,in)
+  END IF
+  CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
+  CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
+!================================================================================
+!                        2.0  2d Grid construction
+!
+!  Partition 2D grid
+  npt_glob(1) = nx+1
+  npt_glob(2) = ny+1
+  DO i=1,ndims
+     CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
+     e(i) = s(i) + npt_loc(i) - 1
+  END DO
+  WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
+       &  (coords(i),s(i),e(i),i=1,ndims)
+!
+!   Global mesh
+  dx = 1.0d0/REAL(nx)
+  dy = 1.0d0/REAL(ny)
+  ALLOCATE(xgrid(0:nx))
+  ALLOCATE(ygrid(0:ny))
+  xgrid = [ (i*dx, i=0,nx) ]
+  ygrid = [ (i*dy, i=0,ny) ]
+!================================================================================
+!                        3.0  FD Laplacian
+!
+  id=RESHAPE([ 0, -1, 0, 1, 0,  & 
+               0,  0,-1, 0, 1], &
+              [5,2])
+  npoints = 5
+  CALL init(s, e, id, .FALSE., mat, comm_cart)
+!
+  CALL laplacian(dx, dy, mat)
+!================================================================================
+!                       4.0 Check matrice-vector product
+!
+!  Local arrays with ghost cells
+  lb = mat%s-1
+  ub = mat%e+1
+  ALLOCATE(arr(lb(1):ub(1),lb(2):ub(2)))
+  ALLOCATE(fexact(lb(1):ub(1),lb(2):ub(2)))
+  ALLOCATE(barr1(lb(1):ub(1),lb(2):ub(2)))
+  ALLOCATE(barr2(lb(1):ub(1),lb(2):ub(2)))
+  ALLOCATE(barr3(lb(1):ub(1),lb(2):ub(2)))
+!
+!   Constant vector => Laplacian = 0
+  barr1 = 0
+  arr = 1.0
+  barr1 = vmx(mat,arr)
+  IF(mat%s(1).EQ.0)  barr1(0,:)  = 0.0  ! discard boundary values
+  IF(mat%e(1).EQ.nx) barr1(nx,:) = 0.0
+  IF(mat%s(2).EQ.0)  barr1(:,0)  = 0.0
+  IF(mat%e(2).EQ.ny) barr1(:,ny) = 0.0
+  err = norm2_vec(barr1,comm_cart,root=0,garea=[1,1])
+  IF(me.EQ.0) THEN
+     WRITE(*,'(/a,1pe12.3)') 'Constant vector: ||B1|| =', err
+  END IF
+!
+!  Bilinear vector => Laplacian = 0
+  arr =0.0d0
+  barr2=0.0d0
+  DO j=mat%s(2),mat%e(2)
+     DO i=mat%s(1),mat%e(1)
+        arr(i,j) = xgrid(i)*ygrid(j)
+     END DO
+  END DO
+  CALL exchange(comm_cart, arr)
+  barr2 = vmx(mat,arr)
+  IF(mat%s(1).EQ.0)  barr2(0,:)  = 0.0  ! discard boundary values
+  IF(mat%e(1).EQ.nx) barr2(nx,:) = 0.0
+  IF(mat%s(2).EQ.0)  barr2(:,0)  = 0.0
+  IF(mat%e(2).EQ.ny) barr2(:,ny) = 0.0
+  err = norm2_vec(barr2, comm_cart,root=0,garea=[1,1])
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,1pe12.3)') 'Bilinear vector: ||B2|| =', err
+  END IF
+!
+!  Biquadratic vector => Laplacian = fexact
+  DO j=mat%s(2),mat%e(2)
+     DO i=mat%s(1),mat%e(1)
+        arr(i,j) = (xgrid(i)*ygrid(j))**2/4.0d0
+        fexact(i,j) = (xgrid(i)**2 + ygrid(j)**2)/2.0d0
+     END DO
+  END DO
+  CALL exchange(comm_cart, arr)
+  CALL exchange(comm_cart, fexact)
+  barr3 = vmx(mat,arr) - fexact
+  IF(mat%s(1).EQ.0)  barr3(0,:)  = 0.0  ! discard boundary values
+  IF(mat%e(1).EQ.nx) barr3(nx,:) = 0.0
+  IF(mat%s(2).EQ.0)  barr3(:,0)  = 0.0
+  IF(mat%e(2).EQ.ny) barr3(:,ny) = 0.0
+  err = norm2_vec(barr3,comm_cart,root=0,garea=[1,1])
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,1pe12.3)') 'Biquadratic vector: ||B3|| =', err
+  END IF
+!================================================================================
+!                        9.0  Epilogue
+  CALL h5file
+  CALL MPI_FINALIZE(ierr)
+CONTAINS
+  SUBROUTINE disp(str, arr)
+    CHARACTER(len=*), INTENT(in) :: str
+    REAL(rkind), INTENT(in)      :: arr(:,:)
+    INTEGER :: j
+    WRITE(*,'(/a)') str
+    DO j=1,SIZE(arr,2)
+       WRITE(*,'(10f8.3)') arr(:,j)
+    END DO
+  END SUBROUTINE disp
+!
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_stencil.h5'
+    INTEGER :: fid
+    CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
+    CALL putarr(fid, '/xgrid', xgrid, ionode=0)  ! only rank 0 does IO
+    CALL putarr(fid, '/ygrid', ygrid, ionode=0)  ! only rank 0 does IO
+    CALL putarrnd(fid, '/barr1', barr1,(/1,2/), garea=(/1,1/))
+    CALL putarrnd(fid, '/barr2', barr2,(/1,2/), garea=(/1,1/))
+    CALL putarrnd(fid, '/barr3', barr3,(/1,2/), garea=(/1,1/))
+    CALL putmat(fid, '/MAT', mat)
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!
+  FUNCTION outerprod(x, y) RESULT(r)
+!
+!  outer product
+!
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind)             :: r(SIZE(x),SIZE(y))
+    INTEGER :: i, j
+    DO j=1,SIZE(y)
+       DO i=1,SIZE(x)
+          r(i,j) = x(i)*y(j)
+       END DO
+    END DO
+  END FUNCTION outerprod
+!
+  FUNCTION rhs(x,y)
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind)             :: rhs(SIZE(x),SIZE(y))
+    rhs = -10.d0*pi**2 * outerprod(SIN(pi*x), SIN(3.d0*pi*y))
+  END FUNCTION rhs
+!
+  FUNCTION exact(x,y)
+    REAL(rkind), INTENT(in) :: x(:), y(:)
+    REAL(rkind)             :: exact(SIZE(x),SIZE(y))
+    exact = outerprod(SIN(pi*x), SIN(3.d0*pi*y))
+  END FUNCTION exact
+END PROGRAM main
diff --git a/multigrid/src/test_stencilg.f90 b/multigrid/src/test_stencilg.f90
new file mode 100644
index 0000000..0b920cc
--- /dev/null
+++ b/multigrid/src/test_stencilg.f90
@@ -0,0 +1,203 @@
+!>
+!> @file test_stencilg.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE mod
+  USE iso_fortran_env, ONLY : rkind => real64
+  IMPLICIT NONE
+!
+  LOGICAL, PARAMETER     :: nldebug=.FALSE.
+  REAL(rkind), PARAMETER :: PI=4.d0*ATAN(1.0d0)
+CONTAINS
+END MODULE mod
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+PROGRAM main
+  USE mpi
+  USE pputils2, ONLY : dist1d
+  USE gvector, ONLY  : gvector_2d, ASSIGNMENT(=), OPERATOR(-)
+  USE parmg, ONLY    : exchange, norm_vec
+  USE stencil, ONLY  : stencil_2d, init, laplacian, putmat, OPERATOR(*)
+  USE mod
+  IMPLICIT NONE
+!
+  INTEGER, PARAMETER :: ndims=2
+!
+  INTEGER                   :: me, neighs(4), npes, ierr
+  INTEGER, DIMENSION(ndims) :: dims=[0,0]
+  INTEGER, DIMENSION(ndims) :: coords, comm1d
+  LOGICAL, DIMENSION(ndims) :: periods=[.FALSE.,.FALSE.]
+  LOGICAL                   :: reorder =.FALSE.
+  INTEGER                   :: comm_cart
+!
+  INTEGER                   :: nx=4, ny=4    ! Number of intervals
+  INTEGER, DIMENSION(ndims) :: e, s, g, npt_glob, npt_loc
+!
+  REAL(rkind), ALLOCATABLE  :: xgrid(:), ygrid(:)
+  TYPE(gvector_2d)          :: arr, fexact
+  TYPE(gvector_2d)          :: barr1, barr2, barr3
+  REAL(rkind)               :: dx, dy
+  REAL(rkind)               :: err
+  INTEGER, DIMENSION(5,2)   :: id       ! 5-point stencil 
+  INTEGER                   :: npoints
+  TYPE(stencil_2d)          :: mat
+  INTEGER                   :: i, j
+!
+  NAMELIST /in/ nx, ny
+!================================================================================
+!                        1.0  Prologue
+!
+!  2D process grid
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_dims_create(npes, ndims, dims, ierr)
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, comm_cart,&
+       &               ierr)
+!
+  CALL mpi_comm_rank(comm_cart, me, ierr)
+  CALL mpi_cart_coords(comm_cart, me, ndims, coords, ierr)
+  CALL mpi_cart_shift(comm_cart, 0, 1, neighs(1), neighs(2), ierr)
+  CALL mpi_cart_shift(comm_cart, 1, 1, neighs(3), neighs(4), ierr)
+!
+  CALL mpi_cart_sub(comm_cart, [.TRUE.,.FALSE.], comm1d(1), ierr)
+  CALL mpi_cart_sub(comm_cart, [.FALSE.,.TRUE.], comm1d(2), ierr)
+!
+!  Read problem inputs
+  IF(me.EQ.0) THEN
+     READ(*,in)
+     WRITE(*,in)
+  END IF
+  CALL mpi_bcast(nx, 1, MPI_INTEGER, 0, comm_cart, ierr)
+  CALL mpi_bcast(ny, 1, MPI_INTEGER, 0, comm_cart, ierr)
+!================================================================================
+!                        2.0  2d Grid construction
+!
+!  Partition 2D grid
+  npt_glob(1) = nx+1
+  npt_glob(2) = ny+1
+  DO i=1,ndims
+     CALL dist1d(comm1d(i), 0, npt_glob(i), s(i), npt_loc(i))
+     e(i) = s(i) + npt_loc(i) - 1
+  END DO
+  WRITE(*,'(a,i3.3,a,2(3i4,", "))') 'PE', me, ' coords, s,e:', &
+       &  (coords(i),s(i),e(i),i=1,ndims)
+!
+!   Global mesh
+  dx = 1.0d0/REAL(nx)
+  dy = 1.0d0/REAL(ny)
+  ALLOCATE(xgrid(0:nx))
+  ALLOCATE(ygrid(0:ny))
+  xgrid = [ (i*dx, i=0,nx) ]
+  ygrid = [ (i*dy, i=0,ny) ]
+!================================================================================
+!                        3.0  FD Laplacian
+!
+  id=RESHAPE([ 0, -1, 0, 1, 0,  & 
+               0,  0,-1, 0, 1], &
+              [5,2])
+  npoints = 5
+  CALL init(s, e, id, .FALSE., mat, comm_cart)
+!
+  CALL laplacian(dx, dy, mat)
+!================================================================================
+!                       4.0 Check matrice-vector product
+!
+!  Local arrays with ghost cells
+  g = [1,1]
+  arr   = gvector_2d(s, e, g)
+  barr1 = gvector_2d(s, e, g)
+  barr2 = gvector_2d(s, e, g)
+  barr3 = gvector_2d(s, e, g)
+  fexact = gvector_2d(s, e, g)
+!
+!   Constant vector => Laplacian = 0
+  arr = 1.0d0
+  CALL exchange(comm_cart, arr)
+  barr1 = mat*arr
+  IF(s(1).EQ.0)  barr1%val(0,:)  = 0.0  ! discard boundary values
+  IF(e(1).EQ.nx) barr1%val(nx,:) = 0.0
+  IF(s(2).EQ.0)  barr1%val(:,0)  = 0.0
+  IF(e(2).EQ.ny) barr1%val(:,ny) = 0.0
+  err = norm_vec(barr1, comm_cart, root=0)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(/a,1pe12.3)') 'Constant vector: ||B1|| =', err
+  END IF
+!
+!  Bilinear vector => Laplacian = 0
+  DO j=s(2),e(2)
+     DO i=s(1),e(1)
+        arr%val(i,j) = xgrid(i)*ygrid(j)
+     END DO
+  END DO
+  CALL exchange(comm_cart, arr)
+  barr2 = mat*arr
+  IF(s(1).EQ.0)  barr2%val(0,:)  = 0.0  ! discard boundary values
+  IF(e(1).EQ.nx) barr2%val(nx,:) = 0.0
+  IF(s(2).EQ.0)  barr2%val(:,0)  = 0.0
+  IF(e(2).EQ.ny) barr2%val(:,ny) = 0.0
+  err = norm_vec(barr2, comm_cart, root=0)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,1pe12.3)') 'Bilinear vector: ||B2|| =', err
+  END IF
+!
+!  Biquadratic vector => Laplacian = fexact
+  DO j=s(2),e(2)
+     DO i=s(1),e(1)
+        arr%val(i,j) = (xgrid(i)*ygrid(j))**2/4.0d0
+        fexact%val(i,j) = (xgrid(i)**2 + ygrid(j)**2)/2.0d0
+     END DO
+  END DO
+  CALL exchange(comm_cart, arr)
+  CALL exchange(comm_cart, fexact)
+  barr3 = mat*arr - fexact
+  IF(s(1).EQ.0)  barr3%val(0,:)  = 0.0  ! discard boundary values
+  IF(e(1).EQ.nx) barr3%val(nx,:) = 0.0
+  IF(s(2).EQ.0)  barr3%val(:,0)  = 0.0
+  IF(e(2).EQ.ny) barr3%val(:,ny) = 0.0
+  err = norm_vec(barr3, comm_cart, root=0)
+  IF(me.EQ.0) THEN
+     WRITE(*,'(a,1pe12.3)') 'Biquadratic vector: ||B3|| =', err
+  END IF
+!================================================================================
+!                        9.0  Epilogue
+  CALL h5file
+  CALL MPI_FINALIZE(ierr)
+!
+CONTAINS
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_stencilg.h5'
+    INTEGER :: fid
+    CALL creatf(file, fid, real_prec='d', mpicomm=comm_cart)
+    CALL putarr(fid, '/xgrid', xgrid, ionode=0)  ! only rank 0 does IO
+    CALL putarr(fid, '/ygrid', ygrid, ionode=0)  ! only rank 0 does IO
+    CALL putarrnd(fid, '/arr',   arr%val,(/1,2/), garea=g)
+    CALL putarrnd(fid, '/barr1', barr1%val,(/1,2/), garea=g)
+    CALL putarrnd(fid, '/barr2', barr2%val,(/1,2/), garea=g)
+    CALL putarrnd(fid, '/barr3', barr3%val,(/1,2/), garea=g)
+    CALL putmat(fid, '/MAT', mat)
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!
+END PROGRAM main
diff --git a/multigrid/src/test_transf2d.f90 b/multigrid/src/test_transf2d.f90
new file mode 100644
index 0000000..a126b7e
--- /dev/null
+++ b/multigrid/src/test_transf2d.f90
@@ -0,0 +1,301 @@
+!>
+!> @file test_transf2d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test 2d multigrid 
+!
+  USE multigrid
+  USE csr
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+!
+  DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
+  INTEGER, DIMENSION(2) :: n, nidbas, ngauss, alpha
+  DOUBLE PRECISION :: kx=4.d0, ky=3.d0, sigma=10.0d0
+  INTEGER :: levels=1
+  DOUBLE PRECISION :: omega=2.0d0/3.0d0
+!
+  DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
+  DOUBLE PRECISION :: dx, dy
+  INTEGER :: ix, iy
+!
+  DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: errdisc(:), resid(:)
+!
+  DOUBLE PRECISION, ALLOCATABLE, TARGET :: fcoarse(:,:)
+  DOUBLE PRECISION, POINTER :: fcoarse_1d(:)
+  DOUBLE PRECISION, ALLOCATABLE, TARGET :: vfine(:,:)
+  DOUBLE PRECISION, POINTER :: vfine_1d(:)
+  DOUBLE PRECISION, ALLOCATABLE :: vfine_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: err_restrict(:), err_prolong(:), &
+       &                           disc_err_prolong(:)
+!
+  INTEGER :: ierr, me
+  INTEGER :: l, nterms
+  INTEGER :: its
+!
+  TYPE(grid2d), ALLOCATABLE :: grids(:)
+!
+  NAMELIST /newrun/ n, nidbas, ngauss, kx, ky, sigma, alpha, levels
+!--------------------------------------------------------------------------------
+!                    1.   Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Inputs
+!
+  n = (/8, 8/)
+  nidbas=(/3,3/)
+  ngauss=(/2,2/)
+  alpha = (/0,0/)
+  kx=4
+  ky=3
+  sigma=10.0d0
+  levels=2
+!
+  IF(me.EQ.0) THEN
+     READ(*,newrun)
+     WRITE(*,newrun)
+  END IF
+     CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(alpha, 2, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(kx, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(ky, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(sigma, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+!
+!   Adjust number of levels
+!
+  levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
+!
+!   Create grids
+!
+  dx = 1.0d0/REAL(n(1),8)
+  dy = 1.0d0/REAL(n(2),8)
+  ALLOCATE(x(0:n(1)), y(0:n(2)))
+  DO ix=0,n(1)
+     x(ix) = ix*dx
+  END DO
+  DO iy=0,n(2)
+     y(iy) = iy*dy
+  END DO
+!
+  ALLOCATE(grids(levels))
+  CALL create_grid(x, y, nidbas, ngauss, alpha, grids)
+  WRITE(*,'(5a6)') 'l', 'nx', 'ny', 'rx', 'ry'
+  WRITE(*,'(5i6)') (l, grids(l)%n, grids(l)%rank, l=1,levels)
+!
+!   Build FE matrices and set BC
+!
+  nterms = 3
+  DO l=1,levels
+     CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
+     CALL ibcmat(grids(l), grids(l)%mata)
+     CALL to_mat(grids(l)%mata)
+  END DO
+!
+!   Set BC on grid transfer matrices
+!
+  CALL ibc_transf(grids,1,3)
+  CALL ibc_transf(grids,2,3)
+!--------------------------------------------------------------------------------
+!                   1.  Direct solutions
+!
+  WRITE(*,'(/a)') 'Direct solutions for all levels ...'
+  ALLOCATE(errdisc(levels))
+  ALLOCATE(resid(levels))
+  WRITE(*,'(3a5,2a12)') 'l', 'nx', 'ny', 'err', 'resid'
+  DO l=1,levels
+     CALL disrhs(grids(l)%spl, grids(l)%f, rhs)
+     CALL ibcrhs(grids(l), grids(l)%f)
+     grids(l)%v = grids(l)%f
+     CALL direct_solve(grids(l), grids(l)%v1d, debug=.FALSE.)
+     errdisc(l) = disc_err(grids(l)%spl, grids(l)%v, sol)
+     resid(l) = residue(grids(l)%mata, grids(l)%f1d, grids(l)%v1d)
+     WRITE(*,'(3i5,2(1pe12.3))') l, grids(l)%n, Errdisc(l), resid(l)
+  END DO
+!
+!  Grid values of direct solutions at the finest levels
+  ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
+  ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
+  CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
+       &       [0,0], grids(1)%v)
+  sol_anal_grid = sol(grids(1)%x, grids(1)%y)
+!--------------------------------------------------------------------------------
+!                   2.  Test restrict and prolong
+!
+  WRITE(*,'(/a)') 'Testing restrict and prolong...'
+  WRITE(*,'(3a5,3a12)') 'l', 'nx', 'ny', 'rhs', 'sol', 'disc_err'
+  ALLOCATE(err_restrict(2:levels))
+  ALLOCATE(err_prolong(2:levels))
+  ALLOCATE(disc_err_prolong(2:levels))
+  ALLOCATE(vfine_grid(0:n(1),0:n(2)))
+  DO l=2,levels
+     ALLOCATE(fcoarse(SIZE(grids(l)%f,1),SIZE(grids(l)%f,2)))
+     fcoarse_1d(1:SIZE(grids(l)%f1d)) => fcoarse
+     ALLOCATE(vfine(SIZE(grids(l-1)%v,1),SIZE(grids(l-1)%v,2)))
+     vfine_1d(1:SIZE(grids(l-1)%v1d)) => vfine
+!
+     fcoarse = restrict(grids(l)%matp,  grids(l-1)%f)
+     err_restrict(l) =  MAXVAL(ABS(fcoarse_1d-grids(l)%f1d))
+!
+     CALL direct_solve(grids(l), fcoarse_1d)
+     vfine = prolong(grids(l)%matp, fcoarse)
+     disc_err_prolong(l) = disc_err(grids(l-1)%spl, vfine, sol)
+     err_prolong(l) = MAXVAL(ABS(vfine_1d-grids(l-1)%v1d))
+!
+     IF(l.EQ.2) THEN   ! Grid val on finest grid
+        CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, vfine_grid, &
+             &       [0,0], vfine)
+     END IF
+!
+     WRITE(*,'(3i5,3(1pe12.3))') l, grids(l)%n, err_restrict(l), err_prolong(l), &
+          &                      disc_err_prolong(l)
+     DEALLOCATE(fcoarse)
+     DEALLOCATE(vfine)
+  END DO
+!--------------------------------------------------------------------------------
+!                   9.  Epilogue
+!
+!   Creata HDF5 file
+!
+  IF(me.EQ.0) CALL h5file
+!
+  CALL mpi_finalize(ierr)
+!--------------------------------------------------------------------------------
+CONTAINS
+!+++
+  FUNCTION rhs(x, y)
+!
+! Return problem RHS
+!
+    DOUBLE PRECISION, INTENT(in) :: x, y
+    DOUBLE PRECISION :: rhs
+    rhs = SIN(PI*kx*x)*SIN(PI*ky*y)
+  END FUNCTION rhs
+!+++
+  FUNCTION sol(x, y)
+!
+! Return exact problem solution
+!
+    DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+    DOUBLE PRECISION :: sol(SIZE(x),SIZE(y))
+    DOUBLE PRECISION :: c
+    INTEGER :: j
+    DO j=1,SIZE(y)
+       c = SIN(PI*ky*y(j)) / (PI**2*(kx**2+ky**2) + sigma**2)
+       sol(:,j) = c * SIN(PI*kx*x(:))
+    END DO
+  END FUNCTION sol
+!+++
+  SUBROUTINE coefeq(x, y, idt, idw, c)
+!
+! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
+!
+    DOUBLE PRECISION, INTENT(in)  :: x, y
+    INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+    c(1) = 1.0d0
+    idt(1,1) = 1
+    idt(1,2) = 0
+    idw(1,1) = 1
+    idw(1,2) = 0
+!
+    c(2) = 1.0d0
+    idt(2,1) = 0
+    idt(2,2) = 1
+    idw(2,1) = 0
+    idw(2,2) = 1
+!
+    c(3) = sigma
+    idt(3,1) = 0
+    idt(3,2) = 0
+    idw(3,1) = 0
+    idw(3,2) = 0
+
+  END SUBROUTINE coefeq
+!+++
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_transf2d.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', n(1))
+    CALL attach(fid, '/', 'NY', n(2))
+    CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+    CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+    CALL attach(fid, '/', 'KX', kx)
+    CALL attach(fid, '/', 'KY', ky)
+    CALL attach(fid, '/', 'SIGMA', sigma)
+    CALL attach(fid, '/', 'ALPHA1', alpha(1))
+    CALL attach(fid, '/', 'ALPHA2', alpha(2))
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL creatg(fid, '/mglevels')
+    DO l=1,levels
+       WRITE(dsname,'("/mglevels/level.",i2.2)') l
+       CALL creatg(fid, TRIM(dsname))
+       CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
+       IF(l.GT.1) THEN
+          CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
+          CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
+       END IF
+       CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
+       CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
+       CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
+       CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
+       CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
+       CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
+    END DO
+!
+!  Solutions at finest grid
+!
+    CALL creatg(fid, '/solutions')
+    CALL putarr(fid, '/solutions/xg', grids(1)%x)
+    CALL putarr(fid, '/solutions/yg', grids(1)%y)
+    CALL putarr(fid, '/solutions/direct', sol_direct_grid)
+    CALL putarr(fid, '/solutions/anal', sol_anal_grid)
+    CALL putarr(fid, '/solutions/vfine', vfine_grid)
+!
+!   Some errors
+!
+    CALL creatg(fid, '/errors')
+    CALL putarr(fid, '/errors/errdisc', errdisc)
+    CALL putarr(fid, '/errors/resid', resid)
+    CALL putarr(fid, '/errors/restrict', err_restrict)
+    CALL putarr(fid, '/errors/prolong', err_prolong)
+    CALL putarr(fid, '/errors/disc_err_prolong', disc_err_prolong)
+!
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!+++
+END PROGRAM
diff --git a/multigrid/src/test_transf2d_cyl.f90 b/multigrid/src/test_transf2d_cyl.f90
new file mode 100644
index 0000000..66427e6
--- /dev/null
+++ b/multigrid/src/test_transf2d_cyl.f90
@@ -0,0 +1,321 @@
+!>
+!> @file test_transf2d_cyl.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Test 2d multigrid 
+!   Cylindrical case
+!
+  USE multigrid
+  USE csr
+  IMPLICIT NONE
+  INCLUDE 'mpif.h'
+!
+  DOUBLE PRECISION, PARAMETER :: PI=4.0d0*ATAN(1.0d0)
+  INTEGER, DIMENSION(2) :: n, nidbas, ngauss
+  INTEGER :: modem=22, modep=10
+  INTEGER :: levels=1
+  CHARACTER(len=4) :: prb='poly'
+  LOGICAL :: nluniq=.TRUE.
+!
+  DOUBLE PRECISION, ALLOCATABLE :: x(:), y(:)
+  DOUBLE PRECISION :: dx, dy
+  INTEGER :: ix, iy
+!
+  DOUBLE PRECISION, ALLOCATABLE :: sol_direct_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: sol_anal_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: errdisc(:), resid(:)
+!
+  DOUBLE PRECISION, ALLOCATABLE, TARGET :: fcoarse(:,:)
+  DOUBLE PRECISION, POINTER :: fcoarse_1d(:)
+  DOUBLE PRECISION, ALLOCATABLE, TARGET :: vfine(:,:)
+  DOUBLE PRECISION, POINTER :: vfine_1d(:)
+  DOUBLE PRECISION, ALLOCATABLE :: vfine_grid(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: err_restrict(:), err_prolong(:), &
+       &                           disc_err_prolong(:)
+!
+  INTEGER :: ierr, me
+  INTEGER :: l, nterms
+  INTEGER :: its
+  INTEGER :: n2
+!
+  TYPE(grid2d), ALLOCATABLE :: grids(:)
+!
+  NAMELIST /newrun/ n, nidbas, ngauss, modem, modep, levels, prb, nluniq
+!--------------------------------------------------------------------------------
+!                    1.   Prologue
+!
+  CALL mpi_init(ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Inputs
+!
+  n = (/8, 8/)
+  nidbas=(/3,3/)
+  ngauss=(/2,2/)
+  modem = 22
+  modep = 10
+  prb='poly'
+  levels=2
+  nluniq = .TRUE.
+!
+  IF(me.EQ.0) THEN
+     READ(*,newrun)
+     WRITE(*,newrun)
+  END IF
+     CALL mpi_bcast(n, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nidbas, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(ngauss, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(modem, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(modep, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(levels, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(prb, LEN(prb), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+     CALL mpi_bcast(nluniq, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+!
+!   Adjust number of levels
+!
+  levels = MIN(levels, get_lmax(n(1)), get_lmax(n(2)))
+!
+!   Create grids
+!
+  dx = 1.0d0/REAL(n(1),8)
+  dy = 2.0d0*pi/REAL(n(2),8)
+  ALLOCATE(x(0:n(1)), y(0:n(2)))
+  DO ix=0,n(1)
+     x(ix) = ix*dx
+  END DO
+  DO iy=0,n(2)
+     y(iy) = iy*dy
+  END DO
+!
+  ALLOCATE(grids(levels))
+  CALL create_grid(x, y, nidbas, ngauss, [1, 0], grids, period=[.FALSE., .TRUE.], &
+       &           debug_in=.FALSE.)
+  WRITE(*,'(5a6,a12)') 'l', 'nx', 'ny', 'rx', 'ry', 'shape of v'
+  WRITE(*,'(7i6)') (l, grids(l)%n, grids(l)%rank, SHAPE(grids(l)%v), l=1,levels)
+!
+!   Build FE matrices and set BC
+!
+  nterms = 2
+  DO l=1,levels
+     CALL femat(grids(l)%spl, grids(l)%mata, coefeq, nterms)
+     CALL ibcmat(grids(l), grids(l)%mata, nluniq_in=nluniq)
+     CALL to_mat(grids(l)%mata)
+  END DO
+!
+!   Set BC on grid transfer matrices
+!
+  CALL ibc_transf(grids, 1, 2)   ! Only right boundary on r (1st dim.)
+!--------------------------------------------------------------------------------
+!                   1.  Direct solutions
+!
+  WRITE(*,'(/a)') 'Direct solutions for all levels ...'
+  WRITE(*,'(3a5,2a12)') 'l', 'nx', 'ny', 'err', 'resid'
+!
+  ALLOCATE(errdisc(levels))
+  ALLOCATE(resid(levels))
+!
+  DO l=1,levels
+     CALL disrhs(grids(l)%spl, grids(l)%f, rhs)
+     CALL ibcrhs(grids(l), grids(l)%f, nluniq_in=nluniq)
+!
+     grids(l)%v = grids(l)%f
+     CALL direct_solve(grids(l), grids(l)%v1d, debug=.FALSE.)
+!
+     resid(l) = residue(grids(l)%mata, grids(l)%f1d, grids(l)%v1d)
+     CALL back_transf(grids(l), grids(l)%v, nluniq_in=nluniq)
+     errdisc(l) = disc_err(grids(l)%spl, grids(l)%v, sol)
+     WRITE(*,'(3i5,2(1pe12.3))') l, grids(l)%n, Errdisc(l), resid(l)
+  END DO
+!
+!  Grid values of direct solutions at the finest levels
+  ALLOCATE(sol_direct_grid(0:n(1),0:n(2)))
+  ALLOCATE(sol_anal_grid(0:n(1),0:n(2)))
+  CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, sol_direct_grid, &
+       &       [0,0], grids(1)%v)
+  sol_anal_grid = sol(grids(1)%x, grids(1)%y)
+!--------------------------------------------------------------------------------
+!                   2.  Test restrict and prolong
+!
+  WRITE(*,'(/a)') 'Testing restrict and prolong...'
+  WRITE(*,'(3a5,3a12)') 'l', 'nx', 'ny', 'rhs', 'sol', 'disc_err'
+  ALLOCATE(err_restrict(2:levels))
+  ALLOCATE(err_prolong(2:levels))
+  ALLOCATE(disc_err_prolong(2:levels))
+  ALLOCATE(vfine_grid(0:n(1),0:n(2)))
+  DO l=2,levels
+     ALLOCATE(fcoarse(SIZE(grids(l)%f,1),SIZE(grids(l)%f,2)))
+     fcoarse_1d(1:SIZE(grids(l)%f1d)) => fcoarse
+     ALLOCATE(vfine(SIZE(grids(l-1)%v,1),SIZE(grids(l-1)%v,2)))
+     vfine_1d(1:SIZE(grids(l-1)%v1d)) => vfine
+!
+     fcoarse(:,:) = restrict_cyl(grids(l),  grids(l-1)%f, nluniq)
+!
+     err_restrict(l) =  MAXVAL(ABS(fcoarse_1d-grids(l)%f1d))
+!
+     CALL direct_solve(grids(l), fcoarse_1d)
+!
+     vfine(:,:) = prolong_cyl(grids(l), fcoarse, nluniq)
+!
+     CALL back_transf(grids(l-1), vfine, nluniq_in=nluniq)
+     disc_err_prolong(l) = disc_err(grids(l-1)%spl, vfine, sol)
+     err_prolong(l) = MAXVAL(ABS(vfine_1d-grids(l-1)%v1d))
+!
+     IF(l.EQ.2) THEN   ! Grid val on finest grid
+        CALL gridval(grids(1)%spl, grids(1)%x, grids(1)%y, vfine_grid, &
+             &       [0,0], vfine)
+     END IF
+!
+     WRITE(*,'(3i5,3(1pe12.3))') l, grids(l)%n, err_restrict(l), err_prolong(l), &
+          &                      disc_err_prolong(l)
+     DEALLOCATE(fcoarse)
+     DEALLOCATE(vfine)
+  END DO
+!--------------------------------------------------------------------------------
+!                   9.  Epilogue
+!
+!   Creata HDF5 file
+!
+  IF(me.EQ.0) CALL h5file
+!
+  CALL mpi_finalize(ierr)
+!--------------------------------------------------------------------------------
+CONTAINS
+!+++
+  FUNCTION rhs(r, theta)
+!
+! Return problem RHS
+!
+    USE math_util, ONLY : root_bessj
+    DOUBLE PRECISION, INTENT(in) :: r, theta
+    DOUBLE PRECISION :: rhs
+    DOUBLE PRECISION :: nump
+!
+    SELECT CASE(TRIM(prb))
+    CASE('poly')
+       rhs = REAL(4*(modem+1),8)*r**(modem+1)*COS(REAL(modem,8)*theta)
+    CASE('bess')
+       nump = root_bessj(modem, modep)
+       rhs = r * nump**2 * BESSEL_JN(modem, nump*r) * COS(modem*theta)
+    END SELECT
+  END FUNCTION rhs
+!+++
+  FUNCTION sol(r, theta)
+!
+! Return exact problem solution
+!
+    USE math_util, ONLY : root_bessj
+    DOUBLE PRECISION, INTENT(in) :: r(:), theta(:)
+    DOUBLE PRECISION :: sol(SIZE(r),SIZE(theta))
+    DOUBLE PRECISION :: nump
+    INTEGER :: j
+!
+    SELECT CASE(TRIM(prb))
+    CASE('poly')
+       DO j=1,SIZE(theta)
+          sol(:,j) = (1-r(:)**2) * r(:)**modem * COS(modem*theta(j))
+       END DO
+    CASE('bess')
+       nump = root_bessj(modem, modep)
+       DO j=1,SIZE(theta)
+          sol(:,j) = BESSEL_JN(modem, nump*r(:)) * COS(modem*theta(j))
+       END DO
+    END SELECT
+  END FUNCTION sol
+!+++
+  SUBROUTINE coefeq(r, theta, idt, idw, c)
+!
+! Weak form = Int( \nabla(w).\nabla(t) + \sigma.t.w) dV)
+!
+    DOUBLE PRECISION, INTENT(in)  :: r, theta
+    INTEGER, INTENT(out)           :: idt(:,:), idw(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+    c(1) = r
+    idt(1,1) = 1
+    idt(1,2) = 0
+    idw(1,1) = 1
+    idw(1,2) = 0
+!
+    c(2) = 1.0d0/r
+    idt(2,1) = 0
+    idt(2,2) = 1
+    idw(2,1) = 0
+    idw(2,2) = 1
+  END SUBROUTINE coefeq
+!+++
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='test_transf2d_cyl.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', n(1))
+    CALL attach(fid, '/', 'NY', n(2))
+    CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+    CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+    CALL attach(fid, '/', 'MODEM', modem)
+    CALL attach(fid, '/', 'MODEP', modep)
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL creatg(fid, '/mglevels')
+    DO l=1,levels
+       WRITE(dsname,'("/mglevels/level.",i2.2)') l
+       CALL creatg(fid, TRIM(dsname))
+       CALL putmat(fid, TRIM(dsname)//'/mata', grids(l)%mata)
+       IF(l.GT.1) THEN
+          CALL putmat(fid, TRIM(dsname)//'/matpx', grids(l)%matp(1))
+          CALL putmat(fid, TRIM(dsname)//'/matpy', grids(l)%matp(2))
+       END IF
+       CALL putarr(fid, TRIM(dsname)//'/x', grids(l)%x)
+       CALL putarr(fid, TRIM(dsname)//'/y', grids(l)%y)
+       CALL putarr(fid, TRIM(dsname)//'/f', grids(l)%f)
+       CALL putarr(fid, TRIM(dsname)//'/v', grids(l)%v)
+       CALL putarr(fid, TRIM(dsname)//'/f1d', grids(l)%f1d)
+       CALL putarr(fid, TRIM(dsname)//'/v1d', grids(l)%v1d)
+    END DO
+!
+!  Solutions at finest grid
+!
+    CALL creatg(fid, '/solutions')
+    CALL putarr(fid, '/solutions/xg', grids(1)%x)
+    CALL putarr(fid, '/solutions/yg', grids(1)%y)
+    CALL putarr(fid, '/solutions/direct', sol_direct_grid)
+    CALL putarr(fid, '/solutions/anal', sol_anal_grid)
+    CALL putarr(fid, '/solutions/vfine', vfine_grid)
+!
+!   Some errors
+!
+    CALL creatg(fid, '/errors')
+    CALL putarr(fid, '/errors/errdisc', errdisc)
+    CALL putarr(fid, '/errors/resid', resid)
+    CALL putarr(fid, '/errors/restrict', err_restrict)
+    CALL putarr(fid, '/errors/prolong', err_prolong)
+    CALL putarr(fid, '/errors/disc_err_prolong', disc_err_prolong)
+!
+    CALL closef(fid)
+  END SUBROUTINE h5file
+!+++
+END PROGRAM
diff --git a/multigrid/src/transfer1d.f90 b/multigrid/src/transfer1d.f90
new file mode 100644
index 0000000..c245f09
--- /dev/null
+++ b/multigrid/src/transfer1d.f90
@@ -0,0 +1,126 @@
+!>
+!> @file transfer1d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!--------------------------------------------------------------------------------
+!--------------------------------------------------------------------------------
+!--------------------------------------------------------------------------------
+PROGRAM main
+  USE multigrid
+  IMPLICIT NONE
+!
+  INTEGER          :: nx=8, nidbas=1, ngauss=4, alpha=0, modem=0
+  DOUBLE PRECISION :: sigma=1.0d0
+  LOGICAL          :: nlper=.FALSE.
+  INTEGER          :: j
+!
+  TYPE(grid1d) :: gridx(2)
+  TYPE(gemat)  :: prolong_mat, restrict_mat, coarse_mat
+  DOUBLE PRECISION, ALLOCATABLE :: arow(:), temp(:,:)
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, sigma, alpha, modem, nlper
+!--------------------------------------------------------------------------------
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!    Set up fine and coarse grids
+!
+  CALL create_grid(nx, nidbas, ngauss, alpha, gridx, period=nlper)
+  CALL printmat('** Prolongation matrix **', gridx(2)%transf)
+!
+!   Restriction matrix = transpose of prolongation matrix
+!
+  CALL mcopy(gridx(2)%transf, prolong_mat)
+  CALL init(prolong_mat%mrows, 1, restrict_mat, mrows=prolong_mat%ncols)
+  restrict_mat%val = TRANSPOSE(prolong_mat%val)
+!
+!   Compute femat on fine and coarse grids
+!
+  IF(nlper) THEN
+     CALL femat(gridx(1)%spl, gridx(1)%matap, coefeq)
+     CALL printmat('** FE matrix on fine mesh **', gridx(1)%matap)
+     CALL femat(gridx(2)%spl, gridx(2)%matap, coefeq)
+     CALL printmat('** FE matrix on coarse mesh **', gridx(2)%matap)
+  ELSE
+     CALL femat(gridx(1)%spl, gridx(1)%mata, coefeq)
+     CALL printmat('** FE matrix on fine mesh **', gridx(1)%mata)
+     CALL femat(gridx(2)%spl, gridx(2)%mata, coefeq)
+     CALL printmat('** FE matrix on coarse mesh **', gridx(2)%mata)
+  END IF
+!
+!   Compute coarse FE matrix using transfer matrix
+!
+  IF(nlper) THEN
+     CALL init(gridx(2)%matap%rank, 1, coarse_mat)
+     ALLOCATE(temp(gridx(1)%matap%rank,gridx(2)%matap%rank))
+     DO j=1,gridx(2)%matap%rank
+        temp(:,j) = vmx(gridx(1)%matap,prolong_mat%val(:,j))
+     END DO
+     coarse_mat%val = vmx(restrict_mat,temp)
+     DEALLOCATE(temp)
+  ELSE
+     CALL init(gridx(2)%mata%rank, 1, coarse_mat)
+     coarse_mat%val = vmx(restrict_mat,vmx(gridx(1)%mata,prolong_mat%val))
+  END IF
+  CALL printmat('** Coarse FE matrix using transfer operators **', coarse_mat)
+!
+!   Compute the diff of Ac - R*Af*P
+!
+  IF(nlper) THEN
+     coarse_mat%val = coarse_mat%val - gridx(2)%matap%val
+  ELSE
+     ALLOCATE(arow(gridx(2)%mata%rank))
+     DO j=1,gridx(2)%mata%rank
+        CALL getcol(gridx(2)%mata, j, arow)
+        coarse_mat%val(:,j) = coarse_mat%val(:,j)-arow(:)
+     END DO
+     DEALLOCATE(arow)
+  END IF
+  WRITE(*,'(a,1pe12.3)') 'Diff =', MAXVAL(ABS(coarse_mat%val))
+CONTAINS
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+    SELECT CASE (alpha)
+    CASE(0)           ! Cartesian geometry
+       c(1) = 1.0d0
+       idt(1) = 1
+       idw(1) = 1
+       c(2) = sigma
+       idt(2) = 0
+       idw(2) = 0
+    CASE(1)
+       c(1) = x
+       idt(1) = 1
+       idw(1) = 1
+       c(2) = modem**2/x
+       idt(2) = 0
+       idw(2) = 0
+    CASE default
+       WRITE(*,'(a,i0,a)') 'COEFEQ: alpha ', alpha, ' not defined!'
+    END SELECT
+  END SUBROUTINE coefeq
+!--------------------------------------------------------------------------------
+END PROGRAM main
diff --git a/multigrid/src/transfer1d_col.f90 b/multigrid/src/transfer1d_col.f90
new file mode 100644
index 0000000..d0724d5
--- /dev/null
+++ b/multigrid/src/transfer1d_col.f90
@@ -0,0 +1,53 @@
+!>
+!> @file transfer1d_col.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!    Obtain grid transfer by collocation
+!
+  USE multigrid
+  IMPLICIT NONE
+!
+  INTEGER :: nx=8, nidbas=1
+  LOGICAL :: nlper=.TRUE.
+!
+  TYPE(grid1d) :: gridx(2)
+  TYPE(gemat)  :: pmat
+!
+  NAMELIST /newrun/ nx, nidbas, nlper
+!--------------------------------------------------------------------------------
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+  CALL create_grid(nx, nidbas, 1, 0, gridx, period=nlper)
+  CALL printmat('** Prolongation matrix (using mass matrix) **', gridx(2)%transf)
+!
+  CALL calc_pmat(gridx(1), gridx(2), pmat, .TRUE.)
+  CALL printmat('** Prolongation matrix (by collocation) **', pmat)
+!
+  WRITE(*,'(/a,1pe12.3)') 'Max diff =', MAXVAL(ABS(pmat%val-gridx(2)%transf%val))
+!
+END PROGRAM main
+
diff --git a/multigrid/src/two_grid.f90 b/multigrid/src/two_grid.f90
new file mode 100644
index 0000000..db513a8
--- /dev/null
+++ b/multigrid/src/two_grid.f90
@@ -0,0 +1,189 @@
+!>
+!> @file two_grid.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Check some properties of grid transfer
+!
+  USE multigrid
+  USE math_util, ONLY : root_bessj
+  IMPLICIT NONE
+!
+  INTEGER          :: nx=8, nidbas=1, ngauss=2, alpha=0
+  INTEGER          :: modem=22, modep=10
+  INTEGER          :: levels=2
+  INTEGER          :: l, nrank
+  DOUBLE PRECISION :: sigma=1.0d0, kmode=10.0, pi=4.0d0*ATAN(1.0d0)
+  DOUBLE PRECISION, ALLOCATABLE :: v_prolong(:)
+!
+  TYPE(grid1d)  :: gridx(2)
+!
+  NAMELIST /newrun/ nx, nidbas, ngauss, sigma, kmode, modem, modep, alpha
+!--------------------------------------------------------------------------------
+!                   1.  Prologue
+!   Inputs
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Create grids
+!
+  CALL create_grid(nx, nidbas, ngauss, alpha, gridx)
+  WRITE(*,'(a/(20i6))') 'Number of intervals in grids', (gridx(l)%n, l=1,levels)
+!
+!   Create FE matrice and set BC u(0)=u(1)=0
+!
+  DO l=1,levels
+     CALL femat(gridx(l)%spl, gridx(l)%mata, coefeq)
+!
+!     Left Dirichlet BC (only for Cartesian geometry)
+     IF(alpha .EQ. 0) THEN
+        CALL ibcmat(1, gridx(l)%mata)
+     END IF
+!
+!     Right Dirichlet BC
+     CALL ibcmat(gridx(l)%mata%rank, gridx(l)%mata)
+!
+!     BC on grid transfer operator
+     IF(l.GT.1) THEN
+        WHERE( ABS(gridx(l)%transf%val) < 1.d-8) gridx(l)%transf%val=0.0d0
+        IF(alpha .EQ. 0) gridx(l)%transf%val(2:,1)=0.0d0
+        gridx(l)%transf%val(1:gridx(l-1)%rank-1,gridx(l)%rank)=0.0d0
+     END IF
+  END DO
+!
+!   Construct RHS and set BC only on the finest grid
+!
+  nrank = gridx(1)%rank
+  CALL disrhs(gridx(1)%spl, gridx(1)%f, rhs)
+!
+!     Left Dirichlet BC (only for Cartesian geometry)
+  IF(alpha .EQ. 0) THEN
+     gridx(1)%f(1) = 0.0d0
+  END IF
+!
+!     Right Dirichlet BC
+  gridx(1)%f(nrank) = 0.0d0
+!
+!   RHS on coarse grid by restriction
+!
+  gridx(2)%f = restrict(gridx(2)%transf,gridx(1)%f)
+!--------------------------------------------------------------------------------
+!                   2.  Direct solutions
+!
+  DO l=1,levels
+     CALL direct_solve(gridx(l), gridx(l)%v)
+     WRITE(*,'(a,i3/(10(1pe12.3)))') 'Sol at level', l, gridx(l)%v
+  END DO
+!
+!   Prolongation of coarse solution  
+!
+  ALLOCATE(v_prolong(SIZE(gridx(1)%v)))
+!
+  v_prolong = prolong(gridx(2)%transf, gridx(2)%v)
+  WRITE(*,'(a,i3/(10(1pe12.3)))') 'Prolong. sol.', l, v_prolong
+  WRITE(*,'(a,1pe12.3)') 'Error ||V_prolong-V_fine||', normf(gridx(1)%matm, v_prolong-gridx(1)%v)
+!--------------------------------------------------------------------------------
+!                   9.  Epilogue
+!
+!   Creata HDF5 file
+!
+  CALL h5file
+!--------------------------------------------------------------------------------
+CONTAINS
+  SUBROUTINE h5file
+    USE futils
+    CHARACTER(len=128) :: file='two_grid.h5'
+    INTEGER :: fid
+    INTEGER :: l
+    CHARACTER(len=64) :: dsname
+    CALL creatf(file, fid, real_prec='d')
+    CALL attach(fid, '/', 'NX', nx)
+    CALL attach(fid, '/', 'NIDBAS', nidbas)
+    CALL attach(fid, '/', 'SIGMA', sigma)
+    CALL attach(fid, '/', 'KMODE', kmode)
+    CALL attach(fid, '/', 'ALPHA', alpha)
+    CALL attach(fid, '/', 'LEVELS', levels)
+    CALL creatg(fid, '/mglevels')
+    DO l=1,levels
+       WRITE(dsname,'("/mglevels/level.",i2.2)') l
+       CALL creatg(fid, TRIM(dsname))
+       CALL putmat(fid, TRIM(dsname)//'/mata', gridx(l)%mata)
+       IF(l.GT.1) THEN
+          CALL putarr(fid, TRIM(dsname)//'/matp', gridx(l)%transf%val)
+          CALL attach(fid, TRIM(dsname)//'/matp', 'M', gridx(l)%transf%mrows)
+          CALL attach(fid, TRIM(dsname)//'/matp', 'N', gridx(l)%transf%ncols)
+       END IF
+       CALL putarr(fid, TRIM(dsname)//'/f', gridx(l)%f)
+       CALL putarr(fid, TRIM(dsname)//'/v', gridx(l)%v)
+    END DO
+    CALL putarr(fid, '/v_prolong', v_prolong)
+  END SUBROUTINE h5file
+  FUNCTION rhs(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    DOUBLE PRECISION :: rhs
+    DOUBLE PRECISION :: nump
+    SELECT CASE (alpha)
+    CASE(0)                  ! Cartesian geometry
+       rhs = SIN(pi*kmode*x)
+    CASE(1)                  ! Cylindrical
+       nump = root_bessj(modem, modep)
+       rhs = x * nump**2 * bessel_jn(modem, nump*x)
+    END SELECT
+  END FUNCTION rhs
+  FUNCTION sol(x)
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION :: sol(SIZE(x))
+    DOUBLE PRECISION :: nump
+    SELECT CASE (alpha)
+    CASE(0)                  ! Cartesian geometry
+       sol(:) = 1.0d0/((pi*kmode)**2+sigma)*SIN(pi*kmode*x(:))
+    CASE(1)                  ! Cylindrical
+       nump = root_bessj(modem, modep)
+       sol(:) = bessel_jn(modem, nump*x(:))
+    END SELECT
+  END FUNCTION sol
+  SUBROUTINE coefeq(x, idt, idw, c)
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: idt(:), idw(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+    SELECT CASE (alpha)
+    CASE(0)           ! Cartesian geometry
+       c(1) = 1.0d0
+       idt(1) = 1
+       idw(1) = 1
+       c(2) = sigma
+       idt(2) = 0
+       idw(2) = 0
+    CASE(1)           ! Cylindrical
+       c(1) = x
+       idt(1) = 1
+       idw(1) = 1
+       c(2) = REAL(modem,8)**2/x
+       idt(2) = 0
+       idw(2) = 0
+    END SELECT
+  END SUBROUTINE coefeq
+END PROGRAM main
diff --git a/multigrid/wk/CMakeLists.txt b/multigrid/wk/CMakeLists.txt
new file mode 100644
index 0000000..b784517
--- /dev/null
+++ b/multigrid/wk/CMakeLists.txt
@@ -0,0 +1,53 @@
+/**
+ * @file CMakeLists.txt
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+project(multigrid_wk)
+
+set(MG_TESTS
+  transfer1d
+  test_relax
+  test_mg
+  test_mgp
+  test_csr
+  two_grid
+  test_mg2d
+  test_relax2d
+  test_transf2d
+  transfer1d_col
+  test_relax2d_cyl
+  test_transf2d_cyl 
+  test_mg2d_cyl
+  poisson_fd
+)
+
+set(RUNTESTS "${CMAKE_CURRENT_SOURCE_DIR}/runtest.sh")
+set(BIN_DIR "${multigrid_tests_BINARY_DIR}")
+set(INPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
+
+foreach(prog ${MG_TESTS})
+  add_test(${prog} ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 
+    ${RUNTESTS} ${BIN_DIR}/${prog} ${INPUT_DIR}
+    )
+endforeach()
diff --git a/multigrid/wk/poisson_fd.in b/multigrid/wk/poisson_fd.in
new file mode 100644
index 0000000..931e881
--- /dev/null
+++ b/multigrid/wk/poisson_fd.in
@@ -0,0 +1,28 @@
+&parameters
+ prb='dddd'
+ prb='nndd'
+ mat_type='cds'
+ nx=16, ny=64
+ nx=72, ny=224,
+ nx=64, ny=256
+ nx=256, ny=1024
+ nx=1024,ny=4096
+ nx=1536, ny=6144
+ nx=512, ny=2048
+ nx=32, ny=128
+ nx=128, ny=512
+ nz=1,
+ kx=4, ky=4,
+ Lx=100.d00, Ly=800.d00,
+ nldebug=f
+ nldirect=t,
+ icrosst=1,
+ beta=-1E-2
+ levels=5
+ nnu=1
+ nu1= 3,1,2,3,4,5
+ nu2= 3,1,2,3,4,5
+ mu=1, nu0=1,
+ relax= 'jac', omega=0.9
+ nits=15, atol=0., rtol=1.e-8
+/
diff --git a/multigrid/wk/ppoisson_fd.in b/multigrid/wk/ppoisson_fd.in
new file mode 100644
index 0000000..731a35d
--- /dev/null
+++ b/multigrid/wk/ppoisson_fd.in
@@ -0,0 +1,19 @@
+&in
+ nldebug=t
+ prb='nndd'
+ prb='dddd'
+ nx=32, ny=128
+ nx=16, ny=64
+ nx=512, ny=2048
+ nx=1024,ny=4096
+ nx=256, ny=1024,
+ nx=64, ny=256
+ nx=128, ny=512
+ kx=4, ky=4,
+ Lx=100.d00, Ly=800.d00,
+ icrosst=1, beta=-0.01, miome = 200.0,
+ levels=5, nu1=3, nu2=3, mu=1, nu0=1
+ relax='jac', omega=0.9, 
+ nits=20, rtol=1.e-8, atol=0., errtol=1.e-6
+ direct_solve_nits=5,
+/
diff --git a/multigrid/wk/run.sh b/multigrid/wk/run.sh
new file mode 100644
index 0000000..152144a
--- /dev/null
+++ b/multigrid/wk/run.sh
@@ -0,0 +1,59 @@
+#
+# @file run.sh
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+#!/bin/bash
+
+EXE=/home/ttran/bsplines/multigrid/src/poisson_mg
+TMP=/misc/multigrid
+[ -e $TMP ] || mkdir -p $TMP
+
+cat > in0 <<EOF
+&parameters
+ prb='dddd'
+ mat_type='cds'
+ nx=xxx, ny=yyy
+ nz=1,
+ kx=4, ky=4,
+ Lx=100.d00, Ly=800.d00,
+ nldebug=f
+ nldirect=f,
+ icrosst=16,
+ beta=-1E-2
+ levels=12
+ nnu=1
+ nu1= 3,4,5
+ nu2= 3,4,5
+ mu=1, nu0=1,
+ relax= 'gs', omega=0.9
+ nits=15, atol=0., rtol=1.e-8
+/
+EOF
+for x in 32 64 128 256 512; do
+    y=$(echo "4 * $x" | bc)
+    echo -n "Nx = $x, Ny = $y: "
+    sed "s/xxx/$x/g" in0 | sed "s/yyy/$y/g" | ${EXE} | grep '     3     3 '
+    mv poisson_mg.h5 $TMP/dddd_${x}x${y}.h5
+done
+
diff --git a/multigrid/wk/runtest.sh b/multigrid/wk/runtest.sh
new file mode 100644
index 0000000..810d2bb
--- /dev/null
+++ b/multigrid/wk/runtest.sh
@@ -0,0 +1,37 @@
+#
+# @file runtest.sh
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+#!/bin/sh
+
+progname=$1
+input_dir=$2
+
+prog=$(basename ${progname})
+input_file=${input_dir}/${prog}.in
+
+${progname} < $input_file
+
+exit $?
+
diff --git a/multigrid/wk/test_csr.in b/multigrid/wk/test_csr.in
new file mode 100644
index 0000000..fcbe78a
--- /dev/null
+++ b/multigrid/wk/test_csr.in
@@ -0,0 +1,8 @@
+&newrun
+ nx=4
+ nidbas=2,
+ sigma = 10.0d0, kmode=10, 
+ modem=0,	
+ alpha = 0,
+ nlper = f,
+/
diff --git a/multigrid/wk/test_intergrid.in b/multigrid/wk/test_intergrid.in
new file mode 100644
index 0000000..28db575
--- /dev/null
+++ b/multigrid/wk/test_intergrid.in
@@ -0,0 +1,9 @@
+&parameters
+ prb='nndd'
+ prb='dddd'
+ nx=8,  ny=6,    kx=1, ky=1, Lx=1.d00,   Ly=1.d00,  nldebug=t
+ nx=32, ny=128,  kx=1, ky=4, Lx=100.d00, Ly=800.d00,  nldebug=f
+ icrosst=1,
+ beta=-1E-2
+ levels=5
+/
diff --git a/multigrid/wk/test_jacobi.in b/multigrid/wk/test_jacobi.in
new file mode 100644
index 0000000..d5ddd5f
--- /dev/null
+++ b/multigrid/wk/test_jacobi.in
@@ -0,0 +1,4 @@
+&in
+ nx=8, ny=24
+ omega=1.0,  nits=400,
+/
diff --git a/multigrid/wk/test_jacobig.in b/multigrid/wk/test_jacobig.in
new file mode 100644
index 0000000..c93ecc3
--- /dev/null
+++ b/multigrid/wk/test_jacobig.in
@@ -0,0 +1,13 @@
+&in
+ prb='nndd'
+ prb='dddd'
+ nx=256, ny=1024,
+ nx=32, ny=128
+ nx=64, ny=256
+ nx=16, ny=64
+ nx=128, ny=512
+ kx=4, ky=4,
+ Lx=100.d00, Ly=800.d00,
+ icrosst=1, beta=-0.01, miome = 200.0,
+ omega=1.0,  nits=40, nu=10,
+/
diff --git a/multigrid/wk/test_mg.in b/multigrid/wk/test_mg.in
new file mode 100644
index 0000000..c1e2de2
--- /dev/null
+++ b/multigrid/wk/test_mg.in
@@ -0,0 +1,13 @@
+&newrun
+ nx=128,
+ nidbas=3, ngauss=2,
+ sigma = 0.0d0, kmode=10,
+ modem=1, modep=10,
+ modem=22, modep=10,
+ alpha = 1,
+ relax='gs', nits=10
+ omega=0.6667
+ nlfixed=f,
+ levels=6,
+ nu1=1 nu2=1, mu=1, nu0=1
+/
diff --git a/multigrid/wk/test_mg2d.in b/multigrid/wk/test_mg2d.in
new file mode 100644
index 0000000..56cff89
--- /dev/null
+++ b/multigrid/wk/test_mg2d.in
@@ -0,0 +1,15 @@
+&newrun
+ n=2*128
+ nidbas=2*3
+ ngauss=2*1
+ kx=3, ky=40,
+ kx=3, ky=3,
+ alpha=0,0
+ sigma=0.
+ levels=12
+ nu1=2, nu2=1, mu=1, nu0=1
+ relax='gs',  omega=0.6667,
+ nits=20, tol=1.e-10,
+ nlfixed=f
+ prb = 'poly'
+/
diff --git a/multigrid/wk/test_mg2d_cyl.in b/multigrid/wk/test_mg2d_cyl.in
new file mode 100644
index 0000000..5ce28da
--- /dev/null
+++ b/multigrid/wk/test_mg2d_cyl.in
@@ -0,0 +1,14 @@
+&newrun
+ n=2*128
+ nidbas=2*3
+ ngauss=2*6,
+ modem=22, modep=10
+ prb='poly',
+ levels=12,
+ nu1=7, nu2=7, mu=1, nu0=1
+ omega=0.65,
+ relax='gs',
+ nits=60, tol=1.e-8, rtol=0.0
+ nlfixed=f
+ nluniq=t
+/
diff --git a/multigrid/wk/test_mgp.in b/multigrid/wk/test_mgp.in
new file mode 100644
index 0000000..80a305b
--- /dev/null
+++ b/multigrid/wk/test_mgp.in
@@ -0,0 +1,9 @@
+&newrun
+ nx=1024,
+ nidbas=2, ngauss=2,
+ sigma = 0.01, kmode=10,
+ relax='gs', nits=10
+ nlfixed=f,
+ levels=6,
+ nu1=1, nu2=1, mu=1, nu0=1
+/
diff --git a/multigrid/wk/test_relax.in b/multigrid/wk/test_relax.in
new file mode 100644
index 0000000..fe5319c
--- /dev/null
+++ b/multigrid/wk/test_relax.in
@@ -0,0 +1,11 @@
+&newrun
+ nx=32
+ nidbas=1,
+ ngauss = 6
+ sigma = 0.0d0, kmode=10, 
+ modem=22, modep=10,
+ alpha = 0,
+ relax='gs', nits=200
+ omega = 0.6667
+ nlfixed=f,
+/
diff --git a/multigrid/wk/test_relax2d.in b/multigrid/wk/test_relax2d.in
new file mode 100644
index 0000000..42892a9
--- /dev/null
+++ b/multigrid/wk/test_relax2d.in
@@ -0,0 +1,11 @@
+&newrun
+ n=2*128
+ nidbas=2*3
+ ngauss=2*4
+ kx=3, ky=3,
+ sigma=0.0,
+ alpha=0,0
+ levels=4
+ nits=1000
+ relax='gs'
+/
diff --git a/multigrid/wk/test_relax2d_cyl.in b/multigrid/wk/test_relax2d_cyl.in
new file mode 100644
index 0000000..7a96c0e
--- /dev/null
+++ b/multigrid/wk/test_relax2d_cyl.in
@@ -0,0 +1,14 @@
+&newrun
+ n=2*16
+ nidbas=2*3
+ ngauss=2*6
+ prb='bess',
+ modem=3, modep=10,
+ modem=0, modep=5,
+ levels=4
+ nits=500,
+ omega = 0.6667,
+ relax='gs'
+ nlfixed = f
+ nluniq = t
+/
diff --git a/multigrid/wk/test_stencil.in b/multigrid/wk/test_stencil.in
new file mode 100644
index 0000000..a8bffc0
--- /dev/null
+++ b/multigrid/wk/test_stencil.in
@@ -0,0 +1,4 @@
+&in
+ nx=12,
+ ny=10,
+/
diff --git a/multigrid/wk/test_stencilg.in b/multigrid/wk/test_stencilg.in
new file mode 100644
index 0000000..a8bffc0
--- /dev/null
+++ b/multigrid/wk/test_stencilg.in
@@ -0,0 +1,4 @@
+&in
+ nx=12,
+ ny=10,
+/
diff --git a/multigrid/wk/test_transf2d.in b/multigrid/wk/test_transf2d.in
new file mode 100644
index 0000000..7c75afb
--- /dev/null
+++ b/multigrid/wk/test_transf2d.in
@@ -0,0 +1,9 @@
+&newrun
+ n=2*128
+ nidbas=2*3
+ ngauss=2*8
+ kx=3, ky=2,
+ sigma=0.0,
+ alpha=0,0
+ levels=6
+/
diff --git a/multigrid/wk/test_transf2d_cyl.in b/multigrid/wk/test_transf2d_cyl.in
new file mode 100644
index 0000000..b8c00b3
--- /dev/null
+++ b/multigrid/wk/test_transf2d_cyl.in
@@ -0,0 +1,10 @@
+&newrun
+ n=2*128
+ nidbas=2*1
+ ngauss=2*12
+ modem=3, modep=10,
+ modem=0, modep=5,
+ levels=7,
+ prb='bess'
+ nluniq = f,
+/
diff --git a/multigrid/wk/transfer1d.in b/multigrid/wk/transfer1d.in
new file mode 100644
index 0000000..8c6ded9
--- /dev/null
+++ b/multigrid/wk/transfer1d.in
@@ -0,0 +1,9 @@
+&newrun
+ nx=8,
+ nidbas=2,
+ ngauss = 3,
+ sigma = 1.0d0
+ alpha = 1,
+ modem=0,
+ nlper=f,
+/
diff --git a/multigrid/wk/transfer1d_col.in b/multigrid/wk/transfer1d_col.in
new file mode 100644
index 0000000..ef91365
--- /dev/null
+++ b/multigrid/wk/transfer1d_col.in
@@ -0,0 +1,5 @@
+&newrun
+ nx=8,
+ nidbas=3,
+ nlper=f,
+/
diff --git a/multigrid/wk/two_grid.in b/multigrid/wk/two_grid.in
new file mode 100644
index 0000000..6f862d1
--- /dev/null
+++ b/multigrid/wk/two_grid.in
@@ -0,0 +1,8 @@
+&newrun
+ nx=128,
+ nidbas=3, ngauss=2,
+ sigma = 0.0d0, kmode=10,
+ modem=1, modep=10,
+ modem=22, modep=10,
+ alpha = 1,
+/
diff --git a/pppack/CMakeLists.txt b/pppack/CMakeLists.txt
new file mode 100644
index 0000000..a6796da
--- /dev/null
+++ b/pppack/CMakeLists.txt
@@ -0,0 +1,37 @@
+/**
+ * @file CMakeLists.txt
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+set(SRCS
+  bvalue.f90
+  interv.f90
+)
+
+add_library(pppack STATIC ${SRCS})
+
+install(TARGETS pppack
+  EXPORT ${BSPLINES_EXPORT_TARGETS}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+)
diff --git a/pppack/Makefile b/pppack/Makefile
new file mode 100644
index 0000000..0a5cbcf
--- /dev/null
+++ b/pppack/Makefile
@@ -0,0 +1,72 @@
+#
+# @file Makefile
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Sébastien Jolliet <sebastien.jolliet@epfl.ch>
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+SRCS =	banfac.f90 banslv.f90 bchfac.f90 bchslv.f90 bsplpp.f90 bsplvb.f90 \
+	bsplvd.f90 bspp2d.f90 bvalue.f90 chol1d.f90 colloc.f90 colpnt.f90 \
+	cspint.f90 cubset.f90 cubslo.f90 cubspl.f90 cwidth.f90 difequ.f90 \
+	dtblok.f90 eqblok.f90 evnnot.f90 factrb.f90 fcblok.f90 interv.f90 \
+	knots.f90 l2appr.f90 l2err.f90 l2knts.f90 newnot.f90 ppvalu.f90 \
+	putit.f90 rvec_print.f90 sbblok.f90 setupq.f90 shiftb.f90 slvblk.f90 \
+	smooth.f90 spli2d.f90 spline_hermite_set.f90 spline_hermite_val.f90 \
+	splint.f90 splopt.f90 subbak.f90 subfor.f90 tautsp.f90 titanium.f90
+
+OBJS =	banfac.o banslv.o bchfac.o bchslv.o bsplpp.o bsplvb.o bsplvd.o \
+	bspp2d.o bvalue.o chol1d.o colloc.o colpnt.o cspint.o cubset.o \
+	cubslo.o cubspl.o cwidth.o difequ.o dtblok.o eqblok.o evnnot.o \
+	factrb.o fcblok.o interv.o knots.o l2appr.o l2err.o l2knts.o newnot.o \
+	ppvalu.o putit.o rvec_print.o sbblok.o setupq.o shiftb.o slvblk.o \
+	smooth.o spli2d.o spline_hermite_set.o spline_hermite_val.o splint.o \
+	splopt.o subbak.o subfor.o tautsp.o titanium.o
+
+OBJS = interv.o bvalue.o
+
+LIBS =
+
+CC = cc
+CFLAGS = -g
+FC = ifort
+FFLAGS = $(OPT)
+F90 = $(FC)
+F90FLAGS = $(FFLAGS)
+LDFLAGS =
+
+lib: libpppack.a
+
+libpppack.a: $(OBJS)
+	xiar r $@ $?
+	ranlib $@
+
+clean:
+	rm -f *.o *.mod *~ core
+
+distclean: clean
+	rm -f libpppack.a  a.out
+
+.SUFFIXES:
+.SUFFIXES: .o .c .f90
+
+.f90.o:
+	$(F90) $(F90FLAGS) -c $<
diff --git a/pppack/banfac.f90 b/pppack/banfac.f90
new file mode 100644
index 0000000..50ecb50
--- /dev/null
+++ b/pppack/banfac.f90
@@ -0,0 +1,234 @@
+!>
+!> @file banfac.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine banfac ( w, nroww, nrow, nbandl, nbandu, iflag )
+
+!*************************************************************************
+!
+!! BANFAC factors a banded matrix without pivoting.
+!
+!  Discussion:
+!
+!    BANFAC returns in W the LU-factorization, without pivoting, of
+!    the banded matrix A of order NROW with (NBANDL+1+NBANDU) bands
+!    or diagonals in the work array W.
+!
+!    Gauss elimination without pivoting is used.  The routine is
+!    intended for use with matrices A which do not require row
+!    interchanges during factorization, especially for the totally
+!    positive matrices which occur in spline calculations.
+!
+!    The matrix storage mode used is the same one used by LINPACK
+!    and LAPACK, and results in efficient innermost loops.
+!
+!    Explicitly, A has
+!
+!      NBANDL bands below the diagonal
+!      1     main diagonal
+!      NBANDU bands above the diagonal
+!
+!    and thus, with MIDDLE=NBANDU+1,
+!    A(I+J,J) is in W(I+MIDDLE,J) for I=-NBANDU,...,NBANDL, J=1,...,NROW.
+!
+!    For example, the interesting entries of a banded matrix
+!    matrix of order 9, with NBANDL=1, NBANDU=2:
+!
+!      11 12 13  0  0  0  0  0  0
+!      21 22 23 24  0  0  0  0  0
+!       0 32 33 34 35  0  0  0  0
+!       0  0 43 44 45 46  0  0  0
+!       0  0  0 54 55 56 57  0  0
+!       0  0  0  0 65 66 67 68  0
+!       0  0  0  0  0 76 77 78 79
+!       0  0  0  0  0  0 87 88 89
+!       0  0  0  0  0  0  0 98 99
+!
+!    would appear in the first 1+1+2=4 rows of W as follows:
+!
+!       0  0 13 24 35 46 57 68 79
+!       0 12 23 34 45 56 67 78 89
+!      11 22 33 44 55 66 77 88 99
+!      21 32 43 54 65 76 87 98  0
+!
+!    All other entries of W not identified in this way with an
+!    entry of A are never referenced.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input/output, real ( kind = 8 ) W(NROWW,NROW).
+!    On input, W contains the "interesting" part of a banded
+!    matrix A, with the diagonals or bands of A stored in the
+!    rows of W, while columns of A correspond to columns of W.
+!    On output, W contains the LU-factorization of A into a unit
+!    lower triangular matrix L and an upper triangular matrix U
+!    (both banded) and stored in customary fashion over the
+!    corresponding entries of A.
+!
+!    This makes it possible to solve any particular linear system A*X=B
+!    for X by the call
+!
+!      call banslv ( w, nroww, nrow, nbandl, nbandu, b )
+!
+!    with the solution X contained in B on return.
+!
+!    If IFLAG=2, then one of NROW-1, NBANDL, NBANDU failed to be nonnegative,
+!    or else one of the potential pivots was found to be zero
+!    indicating that A does not have an LU-factorization.  This
+!    implies that A is singular in case it is totally positive.
+!
+!    Input, integer NROWW, the row dimension of the work array W.
+!    NROWW must be at least NBANDL+1 + NBANDU.
+!
+!    Input, integer NROW, the number of rows in A.
+!
+!    Input, integer NBANDL, the number of bands of A below the main diagonal.
+!
+!    Input, integer NBANDU, the number of bands of A above the main diagonal.
+!
+!    Output, integer IFLAG, error flag.
+!    1, success.
+!    2, failure, the matrix was not factored.
+!
+  implicit none
+
+  integer nrow
+  integer nroww
+
+  real ( kind = 8 ) factor
+  integer i
+  integer iflag
+  integer j
+  integer k
+  integer middle
+  integer nbandl
+  integer nbandu
+  real ( kind = 8 ) pivot
+  real ( kind = 8 ) w(nroww,nrow)
+
+  iflag = 1
+
+  if ( nrow < 1 ) then
+    iflag = 2
+    return
+  end if
+!
+!  W(MIDDLE,*) contains the main diagonal of A.
+!
+  middle = nbandu + 1
+
+  if ( nrow == 1 ) then
+    if ( w(middle,nrow) == 0.0D+00 ) then
+      iflag = 2
+    end if
+    return
+  end if
+!
+!  A is upper triangular.  Check that the diagonal is nonzero.
+!
+  if ( nbandl <= 0 ) then
+
+    do i = 1, nrow-1
+      if ( w(middle,i) == 0.0D+00 ) then
+        iflag = 2
+        return
+      end if
+    end do
+
+    if ( w(middle,nrow) == 0.0D+00 ) then
+      iflag = 2
+    end if
+
+    return
+!
+!  A is lower triangular.  Check that the diagonal is nonzero and
+!  divide each column by its diagonal.
+!
+  else if ( nbandu <= 0 ) then
+
+    do i = 1, nrow-1
+
+      pivot = w(middle,i)
+
+      if ( pivot == 0.0D+00 ) then
+        iflag = 2
+        return
+      end if
+
+      do j = 1, min ( nbandl, nrow-i )
+        w(middle+j,i) = w(middle+j,i) / pivot
+      end do
+
+    end do
+
+    return
+
+  end if
+!
+!  A is not just a triangular matrix.
+!  Construct the LU factorization.
+!
+  do i = 1, nrow-1
+!
+!  W(MIDDLE,I) is the pivot for the I-th step.
+!
+    if ( w(middle,i) == 0.0D+00 ) then
+      iflag = 2
+      write ( *, '(a)' ) ' '
+      write ( *, '(a)' ) 'BANFAC - Fatal error!'
+      write ( *, '(a,i6)' ) '  Zero pivot encountered in column ', i
+      stop
+    end if
+!
+!  Divide each entry in column I below the diagonal by PIVOT.
+!
+    do j = 1, min ( nbandl, nrow-i )
+      w(middle+j,i) = w(middle+j,i) / w(middle,i)
+    end do
+!
+!  Subtract A(I,I+K)*(I-th column) from (I+K)-th column (below row I).
+!
+    do k = 1, min ( nbandu, nrow-i )
+      factor = w(middle-k,i+k)
+      do j = 1, min ( nbandl, nrow-i )
+        w(middle-k+j,i+k) = w(middle-k+j,i+k) - w(middle+j,i) * factor
+      end do
+    end do
+
+  end do
+!
+!  Check the last diagonal entry.
+!
+  if ( w(middle,nrow) == 0.0D+00 ) then
+    iflag = 2
+  end if
+
+  return
+end
diff --git a/pppack/banslv.f90 b/pppack/banslv.f90
new file mode 100644
index 0000000..a8159de
--- /dev/null
+++ b/pppack/banslv.f90
@@ -0,0 +1,112 @@
+!>
+!> @file banslv.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine banslv ( w, nroww, nrow, nbandl, nbandu, b )
+
+!*************************************************************************
+!
+!! BANSLV solves a banded linear system X * X = B factored by BANFAC.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) W(NROWW,NROW).  W contains the banded matrix,
+!    after it has been factored by BANFAC.
+!
+!    Input, integer NROWW, the row dimension of the work array W.
+!    NROWW must be at least NBANDL+1 + NBANDU.
+!
+!    Input, integer NROW, the number of rows in A.
+!
+!    Input, integer NBANDL, the number of bands of A below the
+!    main diagonal.
+!
+!    Input, integer NBANDU, the number of bands of A above the
+!    main diagonal.
+!
+!    Input/output, real ( kind = 8 ) B(NROW).
+!    On input, B contains the right hand side of the system to be solved.
+!    On output, B contains the solution, X.
+!
+  implicit none
+
+  integer nrow
+  integer nroww
+
+  real ( kind = 8 ) b(nrow)
+  integer i
+  integer j
+  integer jmax
+  integer middle
+  integer nbandl
+  integer nbandu
+  real ( kind = 8 ) w(nroww,nrow)
+
+  middle = nbandu + 1
+
+  if ( nrow == 1 ) then
+    b(1) = b(1) / w(middle,1)
+    return
+  end if
+!
+!  Forward pass
+!
+!  For I = 1, 2, ..., NROW-1, subtract RHS(I)*(I-th column of L)
+!  from the right side, below the I-th row.
+!
+  if ( 0 < nbandl ) then
+    do i = 1, nrow-1
+      jmax = min ( nbandl, nrow-i )
+      do j = 1, jmax
+        b(i+j) = b(i+j) - b(i) * w(middle+j,i)
+      end do
+    end do
+  end if
+!
+!  Backward pass
+!
+!  For I=NROW, NROW-1,...,1, divide RHS(I) by
+!  the I-th diagonal entry of U, then subtract
+!  RHS(I)*(I-th column of U) from right side, above the I-th row.
+!
+  do i = nrow, 2, -1
+
+    b(i) = b(i) / w(middle,i)
+
+    do j = 1, min ( nbandu, i-1 )
+      b(i-j) = b(i-j) - b(i) * w(middle-j,i)
+    end do
+
+  end do
+
+  b(1) = b(1) / w(middle,1)
+
+  return
+end
diff --git a/pppack/bchfac.f90 b/pppack/bchfac.f90
new file mode 100644
index 0000000..c582ad0
--- /dev/null
+++ b/pppack/bchfac.f90
@@ -0,0 +1,168 @@
+!>
+!> @file bchfac.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine bchfac ( w, nbands, nrow, diag )
+
+!*************************************************************************
+!
+!! BCHFAC constructs a Cholesky factorization of a matrix.
+!
+!  Discussion:
+!
+!    The factorization has the form
+!
+!      C = L * D * L'
+!
+!    with L unit lower triangular and D diagonal, for a given matrix C of
+!    order NROW, where C is symmetric positive semidefinite and banded,
+!    having NBANDS diagonals at and below the main diagonal.
+!
+!    Gauss elimination is used, adapted to the symmetry and bandedness of C.
+!
+!    Near-zero pivots are handled in a special way.  The diagonal
+!    element C(N,N)=W(1,N) is saved initially in DIAG(N), all N.
+!
+!    At the N-th elimination step, the current pivot element, W(1,N),
+!    is compared with its original value, DIAG(N).  If, as the result
+!    of prior elimination steps, this element has been reduced by about
+!    a word length, (i.e., if W(1,N)+DIAG(N) <= DIAG(N)), then the pivot
+!    is declared to be zero, and the entire N-th row is declared to
+!    be linearly dependent on the preceding rows.  This has the effect
+!    of producing X(N) = 0 when solving C*X = B for X, regardless of B.
+!
+!    Justification for this is as follows.  In contemplated applications
+!    of this program, the given equations are the normal equations for
+!    some least-squares approximation problem, DIAG(N) = C(N,N) gives
+!    the norm-square of the N-th basis function, and, at this point,
+!    W(1,N) contains the norm-square of the error in the least-squares
+!    approximation to the N-th basis function by linear combinations
+!    of the first N-1.
+!
+!    Having W(1,N)+DIAG(N) <= DIAG(N) signifies that the N-th function
+!    is linearly dependent to machine accuracy on the first N-1
+!    functions, therefore can safely be left out from the basis of
+!    approximating functions.
+!
+!    The solution of a linear system C*X=B is effected by the
+!    succession of the following two calls:
+!
+!      CALL BCHFAC(W,NBANDS,NROW,DIAG)
+!
+!      CALL BCHSLV(W,NBANDS,NROW,B,X)
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input/output, real ( kind = 8 ) W(NBANDS,NROW).
+!
+!    On input, W contains the NBANDS diagonals in its rows,
+!    with the main diagonal in row 1.  Precisely, W(I,J)
+!    contains C(I+J-1,J), I=1,...,NBANDS, J=1,...,NROW.
+!
+!    For example, the interesting entries of a seven diagonal
+!    symmetric matrix C of order 9 would be stored in W as
+!
+!      11 22 33 44 55 66 77 88 99
+!      21 32 43 54 65 76 87 98  *
+!      31 42 53 64 75 86 97  *  *
+!      41 52 63 74 85 96  *  *  *
+!
+!    Entries of the array not associated with an
+!    entry of C are never referenced.
+!
+!    On output, W contains the Cholesky factorization
+!    C = L*D*L-transp, with W(1,I) containing 1/D(I,I) and W(I,J)
+!    containing L(I-1+J,J), I=2,...,NBANDS.
+!
+!    Input, integer NBANDS, indicates the bandwidth of the
+!    matrix C, i.e., C(I,J) = 0 for NBANDS < ABS(I-J).
+!
+!    Input, integer NROW, is the order of the matrix C.
+!
+!    Work array, real ( kind = 8 ) DIAG(NROW).
+!
+  implicit none
+
+  integer nbands
+  integer nrow
+
+  real ( kind = 8 ) diag(nrow)
+  integer i
+  integer imax
+  integer j
+  integer jmax
+  integer n
+  real ( kind = 8 ) ratio
+  real ( kind = 8 ) w(nbands,nrow)
+
+  if ( nrow <= 1 ) then
+    if ( 0.0D+00 < w(1,1) ) then
+      w(1,1) = 1.0D+00 / w(1,1)
+    end if
+    return
+  end if
+!
+!  Store the diagonal.
+!
+  diag(1:nrow) = w(1,1:nrow)
+!
+!  Factorization.
+!
+  do n = 1, nrow
+
+    if ( w(1,n) + diag(n) <= diag(n) ) then
+      w(1:nbands,n) = 0.0D+00
+    else
+
+      w(1,n) = 1.0D+00 / w(1,n)
+
+      imax = min ( nbands-1, nrow-n )
+
+      jmax = imax
+
+      do i = 1, imax
+
+        ratio = w(i+1,n) * w(1,n)
+
+        do j = 1, jmax
+          w(j,n+i) = w(j,n+i) - w(j+i,n) * ratio
+        end do
+
+        jmax = jmax-1
+        w(i+1,n) = ratio
+
+      end do
+
+    end if
+
+  end do
+
+  return
+end
diff --git a/pppack/bchslv.f90 b/pppack/bchslv.f90
new file mode 100644
index 0000000..503e3e1
--- /dev/null
+++ b/pppack/bchslv.f90
@@ -0,0 +1,114 @@
+!>
+!> @file bchslv.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine bchslv ( w, nbands, nrow, b )
+
+!*************************************************************************
+!
+!! BCHSLV solves a banded symmetric positive definite system.
+!
+!  Discussion:
+!
+!    The system is of the form:
+!
+!      C * X = B
+!
+!    and the Cholesky factorization of C has been constructed
+!    by BCHFAC.
+!
+!    With the factorization
+!
+!      C = L * D * L'
+!
+!    available, where L is unit lower triangular and D is diagonal,
+!    the triangular system
+!
+!      L * Y = B
+!
+!    is solved for Y (forward substitution), Y is stored in B, the
+!    vector D**(-1)*Y is computed and stored in B, then the
+!    triangular system L'*X = D**(-1)*Y is solved for X
+!    (backsubstitution).
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) W(NBANDS,NROW), the Cholesky factorization for C,
+!    as computed by BCHFAC.
+!
+!    Input, integer NBANDS, the bandwidth of C.
+!
+!    Input, integer NROW, the order of the matrix C.
+!
+!    Input/output, real ( kind = 8 ) B(NROW).
+!    On input, the right hand side.
+!    On output, the solution.
+!
+  implicit none
+
+  integer nbands
+  integer nrow
+
+  real ( kind = 8 ) b(nrow)
+  integer j
+  integer n
+  real ( kind = 8 ) w(nbands,nrow)
+
+  if ( nrow <= 1 ) then
+    b(1) = b(1) * w(1,1)
+    return
+  end if
+!
+!  Forward substitution.
+!  Solve L*Y=B.
+!
+  do n = 1, nrow
+
+    do j = 1, min(nbands-1,nrow-n)
+      b(j+n) = b(j+n) - w(j+1,n) * b(n)
+    end do
+
+  end do
+!
+!  Backsubstitution.
+!  Solve L'*X=D**(-1)*Y.
+!
+  do n = nrow, 1, -1
+
+    b(n) = b(n)*w(1,n)
+
+    do j = 1, min(nbands-1,nrow-n)
+      b(n) = b(n) - w(j+1,n) * b(j+n)
+    end do
+
+  end do
+
+  return
+end
diff --git a/pppack/bsplpp.f90 b/pppack/bsplpp.f90
new file mode 100644
index 0000000..41b76bd
--- /dev/null
+++ b/pppack/bsplpp.f90
@@ -0,0 +1,165 @@
+!>
+!> @file bsplpp.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine bsplpp ( t, bcoef, n, k, scrtch, break, coef, l )
+
+!*************************************************************************
+!
+!! BSPLPP converts from B-spline to piecewise polynomial form.
+!
+!  Discussion:
+!
+!    The B-spline representation of a spline is ( T, BCOEF, N, K ),
+!    while the piecewise polynomial representation is
+!    ( BREAK, COEF, L, K ).
+!
+!    For each breakpoint interval, the K relevant B-spline coefficients
+!    of the spline are found and then differenced repeatedly to get the
+!    B-spline coefficients of all the derivatives of the spline on that
+!    interval.
+!
+!    The spline and its first K-1 derivatives are then evaluated at the
+!    left end point of that interval, using BSPLVB repeatedly to obtain
+!    the values of all B-splines of the appropriate order at that point.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) T(N+K), the knot sequence.
+!
+!    Input, real ( kind = 8 ) BCOEF(N), the B spline coefficient sequence.
+!
+!    Input, integer N, the number of B spline coefficients.
+!
+!    Input, integer K, the order of the spline.
+!
+!    Work array, real ( kind = 8 ) SCRTCH(K,K).
+!
+!    Output, real ( kind = 8 ) BREAK(L+1), the piecewise polynomial breakpoint
+!    sequence.  BREAK contains the distinct points in the
+!    sequence T(K),...,T(N+1)
+!
+!    Output, real ( kind = 8 ) COEF(K,N), with COEF(I,J) = (I-1)st derivative
+!    of the spline at BREAK(J) from the right.
+!
+!    Output, integer L, the number of polynomial pieces which
+!    make up the spline in the interval (T(K),T(N+1)).
+!
+  implicit none
+
+  integer k
+  integer l
+  integer n
+
+  real ( kind = 8 ) bcoef(n)
+  real ( kind = 8 ) biatx(k)
+  real ( kind = 8 ) break(*)
+  real ( kind = 8 ) coef(k,n)
+  real ( kind = 8 ) diff
+  integer i
+  integer j
+  integer jp1
+  integer left
+  integer lsofar
+  real ( kind = 8 ) scrtch(k,k)
+  real ( kind = 8 ) sum1
+  real ( kind = 8 ) t(n+k)
+
+  lsofar = 0
+  break(1) = t(k)
+
+  do left = k, n
+!
+!  Find the next nontrivial knot interval.
+!
+    if ( t(left+1) == t(left) ) then
+      cycle
+    end if
+
+    lsofar = lsofar + 1
+    break(lsofar+1) = t(left+1)
+
+    if ( k <= 1 ) then
+      coef(1,lsofar) = bcoef(left)
+      cycle
+    end if
+!
+!  Store the K B-spline coefficients relevant to current knot
+!  interval in SCRTCH(*,1).
+!
+    do i = 1, k
+      scrtch(i,1) = bcoef(left-k+i)
+    end do
+!
+!  For j=1,...,k-1, compute the  k-j  b-spline coefficients relevant to
+!  current knot interval for the j-th derivative by differencing
+!  those for the (j-1)st derivative, and store in scrtch(.,j+1) .
+!
+    do jp1 = 2, k
+      j = jp1-1
+      do i = 1, k-j
+        diff = t(left+i)-t(left+i-(k-j))
+        if ( 0.0D+00 < diff ) then
+          scrtch(i,jp1)=((scrtch(i+1,j)-scrtch(i,j)) / diff ) &
+            * real ( k - j, kind = 8 )
+        end if
+      end do
+    end do
+!
+!  For J=0, ..., K-1, find the values at T(left)  of the  j+1
+!  B-splines of order J+1 whose support contains the current
+!  knot interval from those of order J (in  biatx ), then comb-
+!  ine with the B-spline coefficients (in scrtch(.,k-j) ) found earlier
+!  to compute the (k-j-1)st derivative at  t(left)  of the given
+!  spline.
+!
+    call bsplvb ( t, 1, 1, t(left), left, biatx )
+
+    coef(k,lsofar) = scrtch(1,k)
+
+    do jp1 = 2, k
+
+      call bsplvb ( t, jp1, 2, t(left), left, biatx )
+
+      sum1 = 0.0D+00
+      do i = 1, jp1
+        sum1 = sum1 + biatx(i) * scrtch(i,k+1-jp1)
+      end do
+
+      coef(k+1-jp1,lsofar) = sum1
+
+    end do
+
+  end do
+
+  l = lsofar
+
+  return
+end
diff --git a/pppack/bsplvb.f90 b/pppack/bsplvb.f90
new file mode 100644
index 0000000..89d9578
--- /dev/null
+++ b/pppack/bsplvb.f90
@@ -0,0 +1,170 @@
+!>
+!> @file bsplvb.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine bsplvb ( t, jhigh, index, x, left, biatx )
+
+!*************************************************************************
+!
+!! BSPLVB evaluates B-splines at a point X with a given knot sequence.
+!
+!  Discusion:
+!
+!    BSPLVB evaluates all possibly nonzero B-splines at X of order
+!
+!      JOUT = MAX ( JHIGH, (J+1)*(INDEX-1) )
+!
+!    with knot sequence T.
+!
+!    The recurrence relation
+!
+!                     X - T(I)               T(I+J+1) - X
+!    B(I,J+1)(X) = ----------- * B(I,J)(X) + --------------- * B(I+1,J)(X)
+!                  T(I+J)-T(I)               T(I+J+1)-T(I+1)
+!
+!    is used to generate B(LEFT-J:LEFT,J+1)(X) from B(LEFT-J+1:LEFT,J)(X)
+!    storing the new values in BIATX over the old.
+!
+!    The facts that
+!
+!      B(I,1)(X) = 1  if  T(I) <= X < T(I+1)
+!
+!    and that
+!
+!      B(I,J)(X) = 0  unless  T(I) <= X < T(I+J)
+!
+!    are used.
+!
+!    The particular organization of the calculations follows
+!    algorithm 8 in chapter X of the text.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) T(LEFT+JOUT), the knot sequence.  T is assumed to
+!    be nondecreasing, and also, T(LEFT) must be strictly less than
+!    T(LEFT+1).
+!
+!    Input, integer JHIGH, INDEX, determine the order
+!    JOUT = MAX(JHIGH,(J+1)*(INDEX-1))
+!    of the B-splines whose values at X are to be returned.
+!    INDEX is used to avoid recalculations when several
+!    columns of the triangular array of B-spline values are
+!    needed, for example, in BVALUE or in BSPLVD.
+!
+!    If INDEX = 1, the calculation starts from scratch and the entire
+!    triangular array of B-spline values of orders
+!    1, 2, ...,JHIGH is generated order by order, i.e.,
+!    column by column.
+!
+!    If INDEX = 2, only the B-spline values of order J+1, J+2, ..., JOUT
+!    are generated, the assumption being that BIATX, J,
+!    DELTAL, DELTAR are, on entry, as they were on exit
+!    at the previous call.  In particular, if JHIGH = 0,
+!    then JOUT = J+1, i.e., just the next column of B-spline
+!    values is generated.
+!
+!    WARNING: the restriction  JOUT <= JMAX (= 20) is
+!    imposed arbitrarily by the dimension statement for DELTAL
+!    and DELTAR, but is nowhere checked for.
+!
+!    Input, real ( kind = 8 ) X, the point at which the B-splines
+!    are to be evaluated.
+!
+!    Input, integer LEFT, an integer chosen so that
+!    T(LEFT) <= X <= T(LEFT+1).
+!
+!    Output, real ( kind = 8 ) BIATX(JOUT), with BIATX(I) containing the
+!    value at X of the polynomial of order JOUT which agrees
+!    with the B-spline B(LEFT-JOUT+I,JOUT,T) on the interval
+!    (T(LEFT),T(LEFT+1)).
+!
+  implicit none
+
+  integer, parameter :: jmax = 20
+
+  integer jhigh
+
+  real ( kind = 8 ) biatx(jhigh)
+!!$  real ( kind = 8 ), save, dimension ( jmax ) :: deltal
+!!$  real ( kind = 8 ), save, dimension ( jmax ) :: deltar
+  real ( kind = 8 ), dimension ( jmax ) :: deltal
+  real ( kind = 8 ), dimension ( jmax ) :: deltar
+  integer i
+  integer index
+!!$  integer, save :: j = 1
+  integer :: j
+  integer left
+  real ( kind = 8 ) saved
+  real ( kind = 8 ) t(left+jhigh)
+  real ( kind = 8 ) term
+  real ( kind = 8 ) x
+
+! Forces starting always from scratch!
+!!$  if ( index == 1 ) then
+    j = 1
+    biatx(1) = 1.0D+00
+    if ( jhigh <= j ) then
+      return
+    end if
+!!$  end if
+
+  if ( t(left+1) <= t(left) ) then
+    write ( *, '(a)' ) ' '
+    write ( *, '(a)' ) 'BSPLVB - Fatal error!'
+    write ( *, '(a)' ) '  It is required that T(LEFT) < T(LEFT+1).'
+    write ( *, '(a,i6)' ) '  But LEFT = ', left
+    write ( *, '(a,g14.6)' ) '  T(LEFT) =   ', t(left)
+    write ( *, '(a,g14.6)' ) '  T(LEFT+1) = ', t(left+1)
+    stop
+  end if
+
+  do
+
+    deltar(j) = t(left+j) - x
+    deltal(j) = x - t(left+1-j)
+
+    saved = 0.0D+00
+    do i = 1, j
+      term = biatx(i) / ( deltar(i) + deltal(j+1-i) )
+      biatx(i) = saved + deltar(i) * term
+      saved = deltal(j+1-i) * term
+    end do
+
+    biatx(j+1) = saved
+    j = j + 1
+
+    if ( jhigh <= j ) then
+      exit
+    end if
+
+  end do
+
+  return
+end
diff --git a/pppack/bsplvd.f90 b/pppack/bsplvd.f90
new file mode 100644
index 0000000..82d203c
--- /dev/null
+++ b/pppack/bsplvd.f90
@@ -0,0 +1,189 @@
+!>
+!> @file bsplvd.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine bsplvd ( t, k, x, left, a, dbiatx, nderiv )
+
+!*************************************************************************
+!
+!! BSPLVD calculates the nonvanishing B-splines and derivatives at X.
+!
+!  Discussion:
+!
+!    Values at X of all the relevant B-splines of order K, K-1,..., K+1-NDERIV
+!    are generated via BSPLVB and stored temporarily in DBIATX.
+!
+!    Then, the B-spline coefficients of the required derivatives
+!    of the B-splines of interest are generated by differencing,
+!    each from the preceding one of lower order, and combined with
+!    the values of B-splines of corresponding order in DBIATX
+!    to produce the desired values.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) T(LEFT+K), the knot sequence.  It is assumed that
+!    T(LEFT) < T(LEFT+1).  Also, the output is correct only if
+!    T(LEFT) <= X <= T(LEFT+1) .
+!
+!    Input, integer K, the order of the B-splines to be evaluated.
+!
+!    Input, real ( kind = 8 ) X, the point at which these values are sought.
+!
+!    Input, integer LEFT, indicates the left endpoint of the interval of
+!    interest.  The K B-splines whose support contains the interval
+!    (T(LEFT), T(LEFT+1)) are to be considered.
+!
+!    Workspace, real ( kind = 8 ) A(K,K).
+!
+!    Output, real ( kind = 8 ) DBIATX(K,NDERIV).  DBIATX(I,M) contains
+!    the value of the (M-1)st derivative of the (LEFT-K+I)-th B-spline
+!    of order K for knot sequence T, I=M,...,K, M=1,...,NDERIV.
+!
+!    Input, integer NDERIV, indicates that values of
+!    B-splines and their derivatives up to but not
+!    including the NDERIV-th are asked for.
+!
+  implicit none
+
+  integer k
+  integer left
+  integer nderiv
+
+  real ( kind = 8 ) a(k,k)
+  real ( kind = 8 ) dbiatx(k,nderiv)
+  real ( kind = 8 ) factor
+  real ( kind = 8 ) fkp1mm
+  integer i
+  integer ideriv
+  integer il
+  integer j
+  integer jlow
+  integer jp1mid
+  integer ldummy
+  integer m
+  integer mhigh
+  real ( kind = 8 ) sum1
+  real ( kind = 8 ) t(left+k)
+  real ( kind = 8 ) x
+
+  mhigh = max ( min ( nderiv, k ), 1 )
+!
+!  MHIGH is usually equal to nderiv.
+!
+  call bsplvb ( t, k+1-mhigh, 1, x, left, dbiatx )
+
+  if ( mhigh == 1 ) then
+    return
+  end if
+!
+!  The first column of DBIATX always contains the B-spline values
+!  for the current order.  These are stored in column K+1-current
+!  order  before BSPLVB is called to put values for the next
+!  higher order on top of it.
+!
+  ideriv = mhigh
+  do m = 2, mhigh
+    jp1mid = 1
+    do j = ideriv, k
+      dbiatx(j,ideriv) = dbiatx(jp1mid,1)
+      jp1mid = jp1mid+1
+    end do
+    ideriv = ideriv-1
+    call bsplvb(t,k+1-ideriv,2,x,left,dbiatx)
+  end do
+!
+!  At this point,  b(left-k+i, k+1-j)(x) is in  dbiatx(i,j) for
+!  i=j,...,k and j=1,...,mhigh ('=' nderiv). in particular, the
+!  first column of  dbiatx  is already in final form. to obtain cor-
+!  ???  LOST A LINE ???
+!  rate their b-repr. by differencing, then evaluate at  x.
+!
+  jlow = 1
+  do i = 1, k
+    do j = jlow,k
+      a(j,i) = 0.0D+00
+    end do
+    jlow = i
+    a(i,i) = 1.0D+00
+  end do
+!
+!  At this point, a(.,j) contains the b-coefficients for the J-th of the
+!  k  b-splines of interest here.
+!
+  do m = 2, mhigh
+
+    fkp1mm = real ( k + 1 - m, kind = 8 )
+    il = left
+    i = k
+!
+!  For j=1,...,k, construct b-coefficients of  (m-1)st  derivative of
+!  b-splines from those for preceding derivative by differencing
+!  and store again in  a(.,j) .  The fact that  a(i,j)=0  for
+!  i < j  is used.
+!
+    do ldummy = 1, k+1-m
+
+      factor = fkp1mm/(t(il+k+1-m)-t(il))
+!
+!  The assumption that t(left) < t(left+1) makes denominator
+!  in  factor  nonzero.
+!
+      do j = 1, i
+        a(i,j) = (a(i,j)-a(i-1,j))*factor
+      end do
+
+      il = il-1
+      i = i-1
+
+    end do
+!
+!  For i=1,...,k, combine b-coefficients a(.,i) with B-spline values
+!  stored in dbiatx(.,m) to get value of  (m-1)st  derivative of
+!  i-th b-spline (of interest here) at  x , and store in
+!  dbiatx(i,m). storage of this value over the value of a b-spline
+!  of order m there is safe since the remaining b-spline derivat-
+!  ives of the same order do not use this value due to the fact
+!  that  a(j,i)=0  for j < i.
+!
+    do i = 1, k
+
+      sum1 = 0.0D+00
+      jlow = max(i,m)
+      do j = jlow,k
+        sum1 = sum1 + a(j,i) * dbiatx(j,m)
+      end do
+
+      dbiatx(i,m) = sum1
+    end do
+
+  end do
+
+  return
+end
diff --git a/pppack/bspp2d.f90 b/pppack/bspp2d.f90
new file mode 100644
index 0000000..59db260
--- /dev/null
+++ b/pppack/bspp2d.f90
@@ -0,0 +1,203 @@
+!>
+!> @file bspp2d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine bspp2d ( t, bcoef, n, k, m, scrtch, break, coef, l )
+
+!*************************************************************************
+!
+!! BSPP2D converts from B-spline to piecewise polynomial representation.
+!
+!  Discussion:
+!
+!    The B-spline representation
+!
+!      T, BCOEF(.,J), N, K
+!
+!    is converted to its piecewise polynomial representation
+!
+!      BREAK, COEF(J,.,.), L, K, J=1, ..., M.
+!
+!    This is an extended version of BSPLPP for use with tensor products.
+!
+!    For each breakpoint interval, the K relevant B-spline
+!    coefficients of the spline are found and then differenced
+!    repeatedly to get the B-spline coefficients of all the
+!    derivatives of the spline on that interval.
+!
+!    The spline and its first K-1 derivatives are then evaluated
+!    at the left endpoint of that interval, using BSPLVB
+!    repeatedly to obtain the values of all B-splines of the
+!    appropriate order at that point.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) T(N+K), the knot sequence.
+!
+!    Input, real ( kind = 8 ) BCOEF(N,M).  For each J, B(*,J) is the
+!    B-spline coefficient sequence, of length N.
+!
+!    Input, integer N, the length of BCOEF.
+!
+!    Input, integer K, the order of the spline.
+!
+!    Input, integer M, the number of data sets.
+!
+!    Work array, real ( kind = 8 ) SCRTCH(K,K,M).
+!
+!    Output, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence
+!    containing the distinct points in the sequence T(K),...,T(N+1)
+!
+!    Output, real ( kind = 8 ) COEF(M,K,N), with COEF(MM,I,J) = the (I-1)st
+!    derivative of the MM-th spline at BREAK(J) from the right, MM=1, ..., M.
+!
+!    Output, integer L, the number of polynomial pieces which make up the
+!    spline in the interval (T(K), T(N+1)).
+!
+  implicit none
+
+  integer k
+  integer m
+  integer n
+
+  real ( kind = 8 ) bcoef(n,m)
+  real ( kind = 8 ) biatx(k)
+  real ( kind = 8 ) break(*)
+  real ( kind = 8 ) coef(m,k,*)
+  real ( kind = 8 ) diff
+  real ( kind = 8 ) fkmj
+  integer i
+  integer j
+  integer jp1
+  integer kmj
+  integer l
+  integer left
+  integer lsofar
+  integer mm
+  real ( kind = 8 ) scrtch(k,k,m)
+  real ( kind = 8 ) sum1
+  real ( kind = 8 ) t(n+k)
+
+  lsofar = 0
+  break(1) = t(k)
+
+  do left = k, n
+!
+!  Find the next nontrivial knot interval.
+!
+    if ( t(left+1) == t(left) ) then
+      cycle
+    end if
+
+    lsofar = lsofar+1
+    break(lsofar+1) = t(left+1)
+
+    if ( k <= 1 ) then
+
+      do mm = 1, m
+        coef(mm,1,lsofar) = bcoef(left,mm)
+      end do
+
+      cycle
+
+    end if
+!
+!  Store the K b-spline coefficients relevant to current knot interval
+!  in  scrtch(.,1) .
+!
+    do i = 1, k
+      do mm = 1, m
+        scrtch(i,1,mm) = bcoef(left-k+i,mm)
+      end do
+    end do
+!
+!  for j=1,...,k-1, compute the  k-j  b-spline coefficients relevant to
+!  current knot interval for the j-th derivative by differencing
+!  those for the (j-1)st derivative, and store in scrtch(.,j+1) .
+!
+    do jp1 = 2, k
+
+      j = jp1-1
+      kmj = k-j
+      fkmj = real ( k - j, kind = 8 )
+
+      do i = 1, k-j
+
+        diff = (t(left+i)-t(left+i-kmj))/fkmj
+
+        if ( 0.0D+00 < diff ) then
+
+          do mm = 1, m
+            scrtch(i,jp1,mm)=(scrtch(i+1,j,mm)-scrtch(i,j,mm))/diff
+          end do
+
+        end if
+
+      end do
+
+    end do
+!
+!  For  j=0, ..., k-1, find the values at T(left)  of the  j+1
+!  b-splines of order  j+1  whose support contains the current
+!  knot interval from those of order  j  (in  biatx ), then comb-
+!  ine with the b-spline coefficients (in scrtch(.,k-j) ) found earlier
+!  to compute the (k-j-1)st derivative at  t(left)  of the given
+!  spline.
+!
+    call bsplvb ( t, 1, 1, t(left), left, biatx )
+
+    do mm = 1, m
+      coef(mm,k,lsofar) = scrtch(1,k,mm)
+    end do
+
+    do jp1 = 2, k
+
+      call bsplvb (t,jp1,2,t(left),left,biatx)
+      kmj = k+1-jp1
+
+      do mm = 1, m
+
+        sum1 = 0.0D+00
+        do i = 1, jp1
+          sum1 = sum1 + biatx(i) * scrtch(i,kmj,mm)
+        end do
+
+        coef(mm,kmj,lsofar) = sum1
+
+      end do
+
+    end do
+
+  end do
+
+  l = lsofar
+
+  return
+end
diff --git a/pppack/bvalue.f90 b/pppack/bvalue.f90
new file mode 100644
index 0000000..ead864f
--- /dev/null
+++ b/pppack/bvalue.f90
@@ -0,0 +1,226 @@
+!>
+!> @file bvalue.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+function bvalue ( t, bcoef, n, k, x, jderiv )
+
+!*************************************************************************
+!
+!! BVALUE evaluates a derivative of a spline from its B-spline representation.
+!
+!  Discussion:
+!
+!    The spline is taken to be continuous from the right.
+!
+!    The nontrivial knot interval (T(I),T(I+1)) containing X is
+!    located with the aid of INTERV.  The K B-spline coefficients
+!    of F relevant for this interval are then obtained from BCOEF,
+!    or are taken to be zero if not explicitly available, and are
+!    then differenced JDERIV times to obtain the B-spline
+!    coefficients of (D**JDERIV)F relevant for that interval.
+!
+!    Precisely, with J = JDERIV, we have from X.(12) of the text that:
+!
+!      (D**J)F = sum ( BCOEF(.,J)*B(.,K-J,T) )
+!
+!    where
+!                      / BCOEF(.),                    ,  J == 0
+!                     /
+!       BCOEF(.,J) = / BCOEF(.,J-1) - BCOEF(.-1,J-1)
+!                   / -----------------------------,  0 < J
+!                  /    (T(.+K-J) - T(.))/(K-J)
+!
+!    Then, we use repeatedly the fact that
+!
+!      sum ( A(.)*B(.,M,T)(X) ) = sum ( A(.,X)*B(.,M-1,T)(X) )
+!
+!    with
+!                   (X - T(.))*A(.) + (T(.+M-1) - X)*A(.-1)
+!      A(.,X) =   ---------------------------------------
+!                   (X - T(.))      + (T(.+M-1) - X)
+!
+!    to write (D**J)F(X) eventually as a linear combination of
+!    B-splines of order 1, and the coefficient for B(I,1,T)(X)
+!    must then be the desired number (D**J)F(X).
+!    See x.(17)-(19) of text.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) T(N+K), the knot sequence.  T is assumed
+!    to be nondecreasing.
+!
+!    Input, real ( kind = 8 ) BCOEF(N), B-spline coefficient sequence.
+!
+!    Input, integer N, the length of BCOEF.
+!
+!    Input, integer K, the order of the spline.
+!
+!    Input, real ( kind = 8 ) X, the point at which to evaluate.
+!
+!    Input, integer JDERIV, the order of the derivative to
+!    be evaluated.  JDERIV is assumed to be zero or positive.
+!
+!    Output, real ( kind = 8 ) BVALUE, the value of the (JDERIV)-th
+!    derivative of the spline at X.
+!
+  implicit none
+
+  integer k
+  integer n
+
+  real ( kind = 8 ) aj(k)
+  real ( kind = 8 ) bcoef(n)
+  real ( kind = 8 ) bvalue
+  real ( kind = 8 ) dl(k)
+  real ( kind = 8 ) dr(k)
+  integer i
+  integer ilo
+  integer j
+  integer jc
+  integer jcmax
+  integer jcmin
+  integer jderiv
+  integer jj
+  integer mflag
+  real ( kind = 8 ) t(n+k)
+  real ( kind = 8 ) x
+
+  bvalue = 0.0D+00
+
+  if ( k <= jderiv ) then
+    return
+  end if
+!
+!  Find I so that 1 <= i < n+k and t(i) < t(i+1) and t(i) <= x < t(i+1).
+!
+!  If no such i can be found, X lies
+!  outside the support of  the spline F and  bvalue=0.
+!  (the asymmetry in this choice of  i  makes F rightcontinuous)
+!
+  call interv ( t, n+k, x, i, mflag )
+
+  if ( mflag /= 0 ) then
+    return
+  end if
+!
+!  If K=1 (and jderiv = 0), bvalue = bcoef(i).
+!
+  if ( k <= 1 ) then
+    bvalue = bcoef(i)
+    return
+  end if
+!
+!  Store the K b-spline coefficients relevant for the knot interval
+!  (T(i),T(i+1)) in aj(1),...,aj(k) and compute dl(j)=x-t(i+1-j),
+!  dr(j)=T(i+j)-x, j=1,...,k-1 . set any of the aj not obtainable
+!  from input to zero.  Set any T's not obtainable equal to T(1) or
+!  to T(n+k) appropriately.
+!
+  jcmin = 1
+
+  if ( k <= i ) then
+
+    do j = 1, k-1
+      dl(j) = x-t(i+1-j)
+    end do
+
+  else
+
+    jcmin = 1-(i-k)
+
+    do j = 1, i
+      dl(j) = x-t(i+1-j)
+    end do
+
+    do j = i, k-1
+      aj(k-j) = 0.0D+00
+      dl(j) = dl(i)
+    end do
+
+  end if
+
+  jcmax = k
+
+  if ( i <= n ) then
+    go to 90
+  end if
+
+  jcmax = k + n - i
+  do j = 1, k+n-i
+    dr(j) = t(i+j)-x
+  end do
+
+  do j = k+n-i, k-1
+    aj(j+1) = 0.0D+00
+    dr(j) = dr(k+n-i)
+  end do
+
+  go to 110
+
+   90 continue
+
+  do j = 1, k-1
+    dr(j) = t(i+j)-x
+  end do
+
+  110 continue
+
+  do jc = jcmin, jcmax
+    aj(jc) = bcoef(i-k+jc)
+  end do
+!
+!  Difference the coefficients JDERIV times.
+!
+  do j = 1, jderiv
+
+    ilo = k-j
+    do jj = 1, k-j
+      aj(jj) = ((aj(jj+1)-aj(jj))/(dl(ilo)+dr(jj))) * real ( k - j, kind = 8 )
+      ilo = ilo-1
+    end do
+
+  end do
+!
+!  Compute value at X in (t(i),t(i+1)) of jderiv-th derivative,
+!  given its relevant b-spline coefficients in aj(1),...,aj(k-jderiv).
+!
+  do j = jderiv+1, k-1
+    ilo = k-j
+    do jj = 1, k-j
+      aj(jj) = ( aj(jj+1) * dl(ilo) + aj(jj) * dr(jj) ) &
+        / ( dl(ilo) + dr(jj) )
+      ilo = ilo-1
+    end do
+  end do
+
+  bvalue = aj(1)
+
+  return
+end
diff --git a/pppack/chol1d.f90 b/pppack/chol1d.f90
new file mode 100644
index 0000000..8a0e19b
--- /dev/null
+++ b/pppack/chol1d.f90
@@ -0,0 +1,146 @@
+!>
+!> @file chol1d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine chol1d ( p, v, qty, npoint, u, qu )
+
+!*************************************************************************
+!
+!! CHOL1D sets up and solves linear systems needed by SMOOTH.
+!
+!  Discussion:
+!
+!    This routine constructs the upper three diagonals of
+!
+!      V(I,J), I=2 to NPOINT-1, J=1,3,
+!
+!    of the matrix
+!
+!      6*(1-P)*Q-transpose*(D**2)*Q + P*R.
+!
+!    It then computes its L*L' decomposition and stores it also
+!    in V, then applies forward and backsubstitution to the right side
+!
+!      Q'*Y
+!
+!    in QTY to obtain the solution in U.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) P, ?
+!
+!    ?put, real ( kind = 8 ) V(NPOINT,7), ?
+!
+!    ?put, real ( kind = 8 ) QTY(NPOINT), ?
+!
+!    Input, integer NPOINT, ?
+!
+!    Output, real ( kind = 8 ) U(NPOINT), the solution.
+!
+!    Output, real ( kind = 8 ) QU(NPOINT), the value of Q * U.
+!
+  implicit none
+
+  integer npoint
+
+  integer i
+  real ( kind = 8 ) p
+  real ( kind = 8 ) qty(npoint)
+  real ( kind = 8 ) qu(npoint)
+  real ( kind = 8 ) u(npoint)
+  real ( kind = 8 ) v(npoint,7)
+  real ( kind = 8 ) prev
+  real ( kind = 8 ) ratio
+  real ( kind = 8 ) six1mp
+  real ( kind = 8 ) twop
+!
+!  Construct 6*(1-p)*q'*(d**2)*q + p*r
+!
+  six1mp = 6.0D+00 * ( 1.0D+00 - p )
+  twop = 2.0D+00 * p
+
+  do i = 2, npoint-1
+    v(i,1) = six1mp * v(i,5)+twop*(v(i-1,4)+v(i,4))
+    v(i,2) = six1mp * v(i,6)+p*v(i,4)
+    v(i,3) = six1mp * v(i,7)
+  end do
+
+  if ( npoint < 4 ) then
+    u(1) = 0.0D+00
+    u(2) = qty(2) / v(2,1)
+    u(3) = 0.0D+00
+!
+!  Factorization
+!
+  else
+
+    do i = 2, npoint-2
+      ratio = v(i,2)/v(i,1)
+      v(i+1,1) = v(i+1,1)-ratio*v(i,2)
+      v(i+1,2) = v(i+1,2)-ratio*v(i,3)
+      v(i,2) = ratio
+      ratio = v(i,3)/v(i,1)
+      v(i+2,1) = v(i+2,1)-ratio*v(i,3)
+      v(i,3) = ratio
+    end do
+!
+!  Forward substitution
+!
+    u(1) = 0.0D+00
+    v(1,3) = 0.0D+00
+    u(2) = qty(2)
+    do i = 2, npoint-2
+      u(i+1) = qty(i+1)-v(i,2)*u(i)-v(i-1,3)*u(i-1)
+    end do
+!
+!  Back substitution.
+!
+    u(npoint) = 0.0D+00
+    u(npoint-1) = u(npoint-1) / v(npoint-1,1)
+
+    do i = npoint-2, 2, -1
+      u(i) = u(i)/v(i,1)-u(i+1)*v(i,2)-u(i+2)*v(i,3)
+    end do
+
+  end if
+!
+!  Construct Q*U.
+!
+  prev = 0.0D+00
+  do i = 2, npoint
+    qu(i) = (u(i)-u(i-1))/v(i-1,4)
+    qu(i-1) = qu(i)-prev
+    prev = qu(i)
+  end do
+
+  qu(npoint) = -qu(npoint)
+
+  return
+end
diff --git a/pppack/colloc.f90 b/pppack/colloc.f90
new file mode 100644
index 0000000..df60a0e
--- /dev/null
+++ b/pppack/colloc.f90
@@ -0,0 +1,275 @@
+!>
+!> @file colloc.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine colloc ( aleft, aright, lbegin, iorder, ntimes, addbrk, &
+  relerr )
+
+!*************************************************************************
+!
+!! COLLOC solves an ordinary differential equation by collocation.
+!
+!  Method:
+!
+!    The M-th order ordinary differential equation with M side
+!    conditions, to be specified in subroutine DIFEQU, is solved
+!    approximately by collocation.
+!
+!    The approximation F to the solution G is piecewise polynomial of order
+!    k+m with L pieces and M-1 continuous derivatives.   F is determined by
+!    the requirement that it satisfy the differential equation at K points
+!    per interval (to be specified in COLPNT ) and the M side conditions.
+!
+!    This usually nonlinear system of equations for f is solved by
+!    Newton's method. the resulting linear system for the b-coefficients of an
+!    iterate is constructed appropriately in eqblok and then solved
+!    in slvblk, a program designed to solve almost block
+!    diagonal linear systems efficiently.
+!
+!    There is an opportunity to attempt improvement of the breakpoint
+!    sequence (both in number and location) through use of NEWNOT.
+!
+!    Printed output consists of the pp-representation of the approximate
+!    solution, and of the error at selected points.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) ALEFT, ARIGHT, the endpoints of the interval.
+!
+!    Input, integer LBEGIN, the initial number of polynomial pieces
+!    in the approximation.  A uniform breakpoint sequence will be chosen.
+!
+!    Input, integer IORDER, the order of the polynomial pieces to be
+!    used in the approximation
+!
+!    Input, integer NTIMES, the number of passes to be made through NEWNOT.
+!
+!  addbrk   the number (possibly fractional) of breaks to be added per
+!           pass through newnot. e.g., if addbrk=.33334, then a break-
+!           point will be added at every third pass through newnot.
+!
+!  relerr   a tolerance.  Newton iteration is stopped if the difference
+!           between the b-coefficients of two successive iterates is no more
+!           than  relerr*(absolute largest b-coefficient).
+!
+  implicit none
+
+  integer, parameter :: npiece = 100
+  integer, parameter :: ndim = 200
+  integer, parameter :: ncoef = 2000
+  integer, parameter :: lenblk = 2000
+
+  real ( kind = 8 ) a(ndim)
+  real ( kind = 8 ) addbrk
+  real ( kind = 8 ) aleft
+  real ( kind = 8 ) amax
+  real ( kind = 8 ) aright
+  real ( kind = 8 ) asave(ndim)
+  real ( kind = 8 ) b(ndim)
+  real ( kind = 8 ) bloks(lenblk)
+  real ( kind = 8 ) break
+  real ( kind = 8 ) coef
+  real ( kind = 8 ) dx
+  real ( kind = 8 ) err
+  integer i
+  integer iflag
+  integer ii
+  integer integs(3,npiece)
+  integer iorder
+  integer iside
+  integer itemps(ndim)
+  integer iter
+  integer itermx
+  integer j
+  integer k
+  integer kpm
+  integer l
+  integer lbegin
+  integer lnew
+  integer m
+  integer n
+  integer nbloks
+  integer nt
+  integer ntimes
+  real ( kind = 8 ) relerr
+  real ( kind = 8 ) rho
+  real ( kind = 8 ) t(ndim)
+  real ( kind = 8 ) templ(lenblk)
+  real ( kind = 8 ) temps(ndim)
+  real ( kind = 8 ) xside
+
+  equivalence (bloks,templ)
+
+  common /approx/ break(npiece),coef(ncoef),l,kpm
+  common /side/ m,iside,xside(10)
+  common /other/ itermx,k,rho(19)
+
+  kpm = iorder
+
+  if ( ncoef < lbegin * kpm ) then
+    go to 120
+  end if
+!
+!  Set the various parameters concerning the particular dif.equ.
+!  including a first approximation in case the de is to be solved by
+!  iteration ( 0 < itermx ).
+!
+  call difequ ( 1, temps(1), temps )
+!
+!  Obtain the K collocation points for the standard interval.
+!
+  k = kpm-m
+  call colpnt(k,rho)
+!
+!  The following five statements could be replaced by a read in or-
+!  der to obtain a specific (nonuniform) spacing of the breakpnts.
+!
+  dx = (aright-aleft) / real ( lbegin, kind = 8 )
+
+  temps(1) = aleft
+  do i = 2, lbegin
+    temps(i) = temps(i-1)+dx
+  end do
+  temps(lbegin+1) = aright
+!
+!  Generate the required knots t(1),...,t(n+kpm).
+!
+  call knots ( temps, lbegin, kpm, t, n )
+  nt = 1
+!
+!  Generate the almost block diagonal coefficient matrix  bloks  and
+!  right side  b  from collocation equations and side conditions.
+!  then solve via  slvblk , obtaining the b-representation of the
+!  approximation in T, A, N, KPM.
+!
+20    continue
+
+  call eqblok ( t, n, kpm, temps, a, bloks, lenblk, integs, nbloks, b )
+
+  call slvblk ( bloks, integs, nbloks, b, itemps, a, iflag )
+
+  iter = 1
+  if ( itermx <= 1 ) then
+    go to 60
+  end if
+!
+!  Save b-spline coefficients of current approx. in  asave , then get new
+!  approx. and compare with old. if coefficients are more than  relerr
+!  apart (relatively) or if number of iterations is less than  itermx ,
+!  continue iterating.
+!
+   30 continue
+
+  call bsplpp(t,a,n,kpm,templ,break,coef,l)
+
+  do i = 1, n
+    asave(i) = a(i)
+  end do
+
+  call eqblok ( t, n, kpm, temps, a, bloks, lenblk, integs, nbloks, b )
+
+  call slvblk(bloks,integs,nbloks,b,itemps,a,iflag)
+
+  err = 0.0D+00
+  amax = 0.0D+00
+  do i = 1, n
+    amax = max ( amax, abs ( a(i) ) )
+    err = max ( err, abs ( a(i)-asave(i) ) )
+  end do
+
+  if ( err <= relerr*amax ) then
+    go to 60
+  end if
+
+  iter = iter + 1
+
+  if ( iter < itermx ) then
+    go to 30
+  end if
+!
+!  Iteration (if any) completed. print out approx. based on current
+!  breakpoint sequence, then try to improve the sequence.
+!
+   60 continue
+
+  write(*,70)kpm,l,n,(break(i),i=2,l)
+   70 format (' approximation from a space of splines of order',i3, &
+     ' on ',i3,' intervals,'/' of dimension',i4,'.  breakpoints -'/ &
+     (5e20.10))
+
+  if ( 0 < itermx ) then
+    write(*,*)' '
+    write(*,*)'Results on interation ',iter
+  end if
+
+  call bsplpp(t,a,n,kpm,templ,break,coef,l)
+
+  write ( *, * ) ' '
+  write ( *, * ) 'The piecewise polynomial representation of the approximation:'
+  write ( *, * ) ' '
+
+  do i = 1, l
+    ii = ( i - 1 ) * kpm
+    write(*,'(f9.3,e13.6,10e11.3)')break(i),(coef(ii+j),j=1,kpm)
+  end do
+!
+!  The following call is provided here for possible further analysis
+!  of the approximation specific to the problem being solved.
+!  it is, of course, easily omitted.
+!
+  call difequ ( 4, temps(1), temps )
+
+  if ( ntimes < nt ) then
+    return
+  end if
+!
+!  From the pp-rep. of the current approx., obtain in NEWNOT a new
+!  (and possibly better) sequence of breakpoints, adding (on the
+!  average) ADDBRK breakpoints per pass through NEWNOT.
+!
+  lnew = lbegin + int ( real ( nt, kind = 8 ) * addbrk )
+
+  if ( ncoef < lnew * kpm ) then
+    go to 120
+  end if
+
+  call newnot(break,coef,l,kpm,temps,lnew,templ)
+
+  call knots ( temps, lnew, kpm, t, n )
+  nt = nt+1
+  go to 20
+
+  120 continue
+  write(*,*)' '
+  write(*,*)'COLLOC - Fatal error!'
+  write(*,*)'  The assigned dimension for COEF is ',ncoef
+  write(*,*)'  but this is too small.'
+  stop
+end
diff --git a/pppack/colpnt.f90 b/pppack/colpnt.f90
new file mode 100644
index 0000000..8bfb5ad
--- /dev/null
+++ b/pppack/colpnt.f90
@@ -0,0 +1,117 @@
+!>
+!> @file colpnt.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine colpnt ( k, rho )
+
+!*************************************************************************
+!
+!! COLPNT supplies collocation points.
+!
+!  Discussion:
+!
+!    The collocation points are for the standard interval (-1,1) as the
+!    zeros of the Legendre polynomial of degree K, provided K <= 8.
+!
+!    Otherwise, uniformly spaced points are given.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, integer K, the number of collocation points desired.
+!
+!    Output, real ( kind = 8 ) RHO(K), the collocation points.
+!
+  implicit none
+
+  integer k
+
+  integer j
+  real ( kind = 8 ) rho(k)
+
+  if ( k == 1 ) then
+    rho(1) = 0.0D+00
+  else if ( k == 2 ) then
+    rho(1) = -0.577350269189626D+00
+    rho(2) =  0.577350269189626D+00
+  else if ( k == 3 ) then
+    rho(1) = -0.774596669241483D+00
+    rho(2) =  0.0
+    rho(3) =  0.774596669241483D+00
+  else if ( k == 4 ) then
+    rho(1) = -0.861136311594053D+00
+    rho(2) = -0.339981043584856D+00
+    rho(3) =  0.339981043584856D+00
+    rho(4) =  0.861136311594053D+00
+  else if ( k == 5 ) then
+    rho(1) = -0.906179845938664D+00
+    rho(2) = -0.538469310105683D+00
+    rho(3) =  0.0D+00
+    rho(4) =  0.538469310105683D+00
+    rho(5) =  0.906179845938664D+00
+  else if ( k == 6 ) then
+    rho(1) = -0.932469514203152D+00
+    rho(2) = -0.661209386466265D+00
+    rho(3) = -0.238619186083197D+00
+    rho(4) =  0.238619186083197D+00
+    rho(5) =  0.661209386466265D+00
+    rho(6) =  0.932469514203152D+00
+  else if ( k == 7 ) then
+    rho(5) = 0.405845151377397D+00
+    rho(3) = -rho(5)
+    rho(6) = 0.741531185599394D+00
+    rho(2) = -rho(6)
+    rho(7) = 0.949107912342759D+00
+    rho(1) = -rho(7)
+    rho(4) = 0.0
+  else if ( k == 8 ) then
+    rho(5) = 0.183434642495650D+00
+    rho(4) = -rho(5)
+    rho(6) = 0.525532409916329D+00
+    rho(3) = -rho(6)
+    rho(7) = 0.796666477413627D+00
+    rho(2) = -rho(7)
+    rho(8) = 0.960289856497536D+00
+    rho(1) = -rho(8)
+  else
+
+    write ( *, * )' '
+    write ( *, * )'ColPnt - Warning!'
+    write ( *, * )'  Equispaced collocation points will be used,'
+    write ( *, * )'  because K =',k,' which is greater than 8.'
+
+    do j = 1, k
+      rho(j) = -1.0D+00 + 2.0D+00 * real ( j - 1, kind = 8 ) &
+                                  / real ( k - 1, kind = 8 )
+    end do
+
+  end if
+
+  return
+end
diff --git a/pppack/cspint.f90 b/pppack/cspint.f90
new file mode 100644
index 0000000..fb201fb
--- /dev/null
+++ b/pppack/cspint.f90
@@ -0,0 +1,214 @@
+!>
+!> @file cspint.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine cspint ( ftab, xtab, ntab, a, b, y, e, work, result, ind )
+
+!*************************************************************************
+!
+!! CSPINT estimates an integral using a spline interpolant.
+!
+!  Discussion:
+!
+!    CSPINT estimates the integral from A to B of F(X) by
+!    computing the natural spline S(X) that interpolates to F
+!    and integrating that exactly.
+!
+!    F is supplied to the routine in the form of tabulated data.
+!
+!    Other output from the program includes the definite integral
+!    from X(1) to X(I) of the spline, and the coefficients
+!    necessary for the user to evaluate the spline outside of
+!    this routine.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) FTAB(NTAB), contains the tabulated values
+!    of the functions, FTAB(I)=F(XTAB(I)).
+!
+!    Input, real ( kind = 8 ) XTAB(NTAB), contains the points at
+!    which the function was evaluated.  The XTAB's must be
+!    distinct and in ascending order.
+!
+!    Input, integer NTAB, the number of entries in FTAB
+!    and XTAB.  NTAB must be at least 3.
+!
+!    Input, real ( kind = 8 ) A, lower limit of integration.
+!
+!    Input, real ( kind = 8 ) B, upper limit of integration.
+!
+!    Output, real ( kind = 8 ) Y(3,NTAB), will contain the coefficients
+!    of the interpolating natural spline over each subinterval.
+!
+!    For XTAB(I) < = X  <= XTAB(I+1),
+!
+!      S(X) = FTAB(I) + Y(1,I)*(X-XTAB(I)) + Y(2,I)*(X-XTAB(I))**2
+!                 + Y(3,I)*(X-XTAB(I))**3
+!
+!    Output, real ( kind = 8 ) E(NTAB), E(I)=the definite integral
+!    from XTAB(1) to XTAB(I) of S(X).
+!
+!    Workspace, real ( kind = 8 ) WORK(NTAB).
+!
+!    Output, real ( kind = 8 ) RESULT, the estimated value of the integral.
+!
+!    Output, integer IND, error flag.
+!    IND=0 if NTAB < 3 or the XTAB's are not distinct and in
+!    ascending order.
+!    IND=1 otherwise.
+!
+  implicit none
+
+  integer ntab
+
+  real ( kind = 8 ) a
+  real ( kind = 8 ) b
+  real ( kind = 8 ) e(ntab)
+  real ( kind = 8 ) ftab(ntab)
+  integer i
+  integer ind
+  integer j
+  real ( kind = 8 ) r
+  real ( kind = 8 ) result
+  real ( kind = 8 ) s
+  real ( kind = 8 ) term
+  real ( kind = 8 ) u
+  real ( kind = 8 ) work(ntab)
+  real ( kind = 8 ) xtab(ntab)
+  real ( kind = 8 ) y(3,ntab)
+
+  ind = 0
+
+  if ( ntab < 3 ) then
+    write(*,*)' '
+    write(*,*)'CSPINT - Fatal error!'
+    write(*,*)'  NTAB must be at least 3,'
+    write(*,*)'  but your value was NTAB = ',ntab
+    stop
+  end if
+
+  do i = 1, ntab-1
+
+    if ( xtab(i+1) <= xtab(i) ) then
+      write(*,*)' '
+      write(*,*)'CSPINT - Fatal error!'
+      write(*,*)'  Interval ',i,' is illegal.'
+      write(*,*)'  XTAB(I) =',xtab(i)
+      write(*,*)'  XTAB(I+1)=',xtab(i+1)
+      stop
+    end if
+
+  end do
+
+  s = 0.0D+00
+  do i = 1, ntab-1
+    r = ( ftab(i+1) - ftab(i) ) / ( xtab(i+1) - xtab(i) )
+    y(2,i) = r - s
+    s = r
+  end do
+
+  result = 0.0D+00
+  s = 0.0D+00
+  r = 0.0D+00
+  y(2,1) = 0.0D+00
+  y(2,ntab) = 0.0D+00
+
+  do i = 2, ntab-1
+    y(2,i) = y(2,i) + r * y(2,i-1)
+    work(i) = 2.0D+00 * ( xtab(i-1) - xtab(i+1) ) - r * s
+    s = xtab(i+1) - xtab(i)
+    r = s / work(i)
+  end do
+
+  do j = 2, ntab-1
+    i = ntab+1-j
+    y(2,i) = ((xtab(i+1)-xtab(i))*y(2,i+1)-y(2,i))/work(i)
+  end do
+
+  do i = 1, ntab-1
+    s = xtab(i+1) - xtab(i)
+    r = y(2,i+1) - y(2,i)
+    y(3,i) = r / s
+    y(2,i) = 3.0D+00 * y(2,i)
+    y(1,i) = ( ftab(i+1) - ftab(i) ) / s - ( y(2,i) + r ) * s
+  end do
+
+  e(1) = 0.0D+00
+
+  do i = 1, ntab-1
+
+    s = xtab(i+1) - xtab(i)
+
+    term = ( ( ( y(3,i) * 0.25D+00  * s &
+               + y(2,i) / 3.0D+00 ) * s &
+               + y(1,i) * 0.5D+00 ) * s + ftab(i) ) * s
+
+    e(i+1) = e(i) + term
+
+  end do
+!
+!  Determine where the endpoints A and B lie in the mesh of XTAB's.
+!
+  r = a
+  u = 1.0D+00
+
+  do j = 1, 2
+
+    if ( r <= xtab(1) ) then
+      result = result-u*((r-xtab(1))*y(1,1)* 0.5D+00 + ftab(1))*(r-xtab(1))
+    else if ( xtab(ntab) <= r ) then
+      result = result-u*(e(ntab)+(r-xtab(ntab))*(ftab(ntab) + 0.5D+00 * &
+        (ftab(ntab-1)+(xtab(ntab)-xtab(ntab-1))*y(1,ntab-1))*(r- &
+        xtab(ntab))))
+    else
+      do i = 1, ntab-1
+
+        if ( r <= xtab(i+1) ) then
+          r = r - xtab(i)
+          result = result-u*(e(i)+(((y(3,i)*0.25D+00*r+y(2,i)/3.0D+00)*r &
+            +y(1,i) * 0.5D+00 )*r+ftab(i))*r)
+          go to 100
+        end if
+
+      end do
+
+    end if
+
+  100   continue
+
+    u = -1.0D+00
+    r = b
+
+  end do
+
+  ind = 1
+
+  return
+end
diff --git a/pppack/cubset.f90 b/pppack/cubset.f90
new file mode 100644
index 0000000..567b9de
--- /dev/null
+++ b/pppack/cubset.f90
@@ -0,0 +1,106 @@
+!>
+!> @file cubset.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine cubset ( tau, c, n, ibcbeg, ibcend )
+
+!*******************************************************************************
+!
+!! CUBSET sets up a simple cubic spline interpolant.
+!
+!  WARNING: IBCBEG and IBCEND are not set up yet.
+!
+!  A tridiagonal linear system for the unknown slopes S(I) of
+!  F at TAU(I), I=1,..., N, is generated and then solved by Gauss
+!  elimination, with S(I) ending up in C(2,I), for all I.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) TAU(N), the abscissas or X values of
+!    the data points.  The entries of TAU are assumed to be
+!    strictly increasing.
+!
+!    Input, integer N, the number of data points.  N is
+!    assumed to be at least 2.
+!
+!    Input/output, real ( kind = 8 ) C(4,N).
+!    On input, if IBCBEG or IBCBEG is 1 or 2, then C(2,1)
+!    or C(2,N) should have been set to the desired derivative
+!    values, as described further under IBCBEG and IBCEND.
+!    On output, C contains the polynomial coefficients of
+!    the cubic interpolating spline with interior knots
+!    TAU(2) through TAU(N-1).
+!    In the interval interval (TAU(I), TAU(I+1)), the spline
+!    F is given by F(X) =
+!      C(1,I) +
+!      C(2,I) * ( X - TAU(I) ) +
+!      C(3,I) * ( X - TAU(I) )**2 +
+!      C(4,I) * ( X - TAU(I) )**3
+!
+!  IBCBEG,
+!  IBCEND Input, integer IBCBEG, IBCEND, boundary condition
+!         indicators.
+!
+!         IBCBEG=0 means no boundary condition at TAU(1) is given.
+!         In this case, the "not-a-knot condition" is used.  That
+!         is, the jump in the third derivative across TAU(2) is
+!         forced to zero.  Thus the first and the second cubic
+!         polynomial pieces are made to coincide.
+!
+!         IBCBEG=1 means that the slope at TAU(1) is to equal the
+!         input value C(2,1).
+!
+!         IBCBEG=2 means that the second derivative at TAU(1) is
+!         to equal C(2,1).
+!
+!         IBCEND=0, 1, or 2 has analogous meaning concerning the
+!         boundary condition at TAU(N), with the additional
+!         information taken from C(2,N).
+!
+  implicit none
+
+  integer n
+
+  real ( kind = 8 ) c(4,n)
+  integer ibcbeg
+  integer ibcend
+  real ( kind = 8 ) tau(n)
+!
+!  Solve for the slopes at internal nodes.
+!
+  call cubslo ( tau, c, n )
+!
+!  Now compute the quadratic and cubic coefficients used in the
+!  piecewise polynomial representation.
+!
+  call spline_hermite_set ( n, tau, c )
+
+  return
+end
diff --git a/pppack/cubslo.f90 b/pppack/cubslo.f90
new file mode 100644
index 0000000..659792d
--- /dev/null
+++ b/pppack/cubslo.f90
@@ -0,0 +1,111 @@
+!>
+!> @file cubslo.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine cubslo ( tau, c, n )
+
+!*******************************************************************************
+!
+!! CUBSLO solves for slopes defining a cubic spline.
+!
+!  Discussion:
+!
+!    A tridiagonal linear system for the unknown slopes S(I) of
+!    F at TAU(I), I=1,..., N, is generated and then solved by Gauss
+!    elimination, with S(I) ending up in C(2,I), for all I.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) TAU(N), the abscissas or X values of
+!    the data points.  The entries of TAU are assumed to be
+!    strictly increasing.
+!
+!    Input, integer N, the number of data points.  N is
+!    assumed to be at least 2.
+!
+!    Input/output, real ( kind = 8 ) C(4,N).
+!    On input, C(1,I) contains the function value at TAU(I),
+!    for I = 1 to N.
+!    C(2,1) contains the slope at TAU(1) and C(2,N) contains
+!    the slope at TAU(N).
+!    On output, the intermediate slopes at TAU(I) have been
+!    stored in C(2,I), for I = 2 to N-1.
+!
+  implicit none
+
+  integer n
+
+  real ( kind = 8 ) c(4,n)
+  real ( kind = 8 ) g
+  integer i
+  integer ibcbeg
+  integer ibcend
+  real ( kind = 8 ) tau(n)
+!
+!  Set up the right hand side of the linear system.
+!  C(2,1) and C(2,N) are presumably already set.
+!
+  do i = 2, n-1
+    c(2,i) = 3.0D+00 * ( &
+      ( tau(i) - tau(i-1) ) * ( c(1,i+1) - c(1,i) ) / ( tau(i+1) - tau(i) ) + &
+      ( tau(i+1) - tau(i) ) * ( c(1,i) - c(1,i-1) ) / ( tau(i) - tau(i-1) ) )
+  end do
+!
+!  Set the diagonal coefficients.
+!
+  c(4,1) = 1.0D+00
+  do i = 2, n-1
+    c(4,i) = 2.0D+00 * ( tau(i+1) - tau(i-1) )
+  end do
+  c(4,n) = 1.0D+00
+!
+!  Set the off-diagonal coefficients.
+!
+  c(3,1) = 0.0D+00
+  do i = 2, n
+    c(3,i) = tau(i) - tau(i-1)
+  end do
+!
+!  Forward elimination.
+!
+  do i = 2, n-1
+    g = -c(3,i+1) / c(4,i-1)
+    c(4,i) = c(4,i) + g * c(3,i-1)
+    c(2,i) = c(2,i) + g * c(2,i-1)
+  end do
+!
+!  Back substitution for the interior slopes.
+!
+  do i = n-1, 2, -1
+    c(2,i) = ( c(2,i) - c(3,i) * c(2,i+1) ) / c(4,i)
+  end do
+
+  return
+end
diff --git a/pppack/cubspl.f90 b/pppack/cubspl.f90
new file mode 100644
index 0000000..ffbc652
--- /dev/null
+++ b/pppack/cubspl.f90
@@ -0,0 +1,282 @@
+!>
+!> @file cubspl.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine cubspl ( tau, c, n, ibcbeg, ibcend )
+
+!*******************************************************************************
+!
+!! CUBSPL defines an interpolatory cubic spline.
+!
+!  Discussion:
+!
+!    A tridiagonal linear system for the unknown slopes S(I) of
+!    F at TAU(I), I=1,..., N, is generated and then solved by Gauss
+!    elimination, with S(I) ending up in C(2,I), for all I.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) TAU(N), the abscissas or X values of
+!    the data points.  The entries of TAU are assumed to be
+!    strictly increasing.
+!
+!    Input, integer N, the number of data points.  N is
+!    assumed to be at least 2.
+!
+!    Input/output, real ( kind = 8 ) C(4,N).
+!    On input, if IBCBEG or IBCBEG is 1 or 2, then C(2,1)
+!    or C(2,N) should have been set to the desired derivative
+!    values, as described further under IBCBEG and IBCEND.
+!    On output, C contains the polynomial coefficients of
+!    the cubic interpolating spline with interior knots
+!    TAU(2) through TAU(N-1).
+!
+!    In the interval interval (TAU(I), TAU(I+1)), the spline
+!    F is given by
+!
+!      F(X) =
+!        C(1,I) +
+!        C(2,I) * H +
+!        C(3,I) * H**2 / 2 +
+!        C(4,I) * H**3 / 6.
+!
+!    where H=X-TAU(I).  The routine PPVALU may be used to
+!    evaluate F or its derivatives from TAU, C, L=N-1,
+!    and K=4.
+!
+!    Input, integer IBCBEG, IBCEND, boundary condition indicators.
+!
+!    IBCBEG=0 means no boundary condition at TAU(1) is given.
+!    In this case, the "not-a-knot condition" is used.  That
+!    is, the jump in the third derivative across TAU(2) is
+!    forced to zero.  Thus the first and the second cubic
+!    polynomial pieces are made to coincide.
+!
+!    IBCBEG=1 means the slope at TAU(1) is to equal the
+!    input value C(2,1).
+!
+!    IBCBEG=2 means the second derivative at TAU(1) is
+!    to equal C(2,1).
+!
+!    IBCEND=0, 1, or 2 has analogous meaning concerning the
+!    boundary condition at TAU(N), with the additional
+!    information taken from C(2,N).
+!
+  implicit none
+
+  integer n
+
+  real ( kind = 8 ) c(4,n)
+  real ( kind = 8 ) divdf1
+  real ( kind = 8 ) divdf3
+  real ( kind = 8 ) dtau
+  real ( kind = 8 ) g
+  integer i
+  integer ibcbeg
+  integer ibcend
+  real ( kind = 8 ) tau(n)
+!
+!  C(3,*) and C(4,*) are used initially for temporary storage.
+!
+!  Store first differences of the TAU sequence in C(3,*).
+!
+!  Store first divided difference of data in C(4,*).
+!
+  do i = 2, n
+    c(3,i) = tau(i) - tau(i-1)
+  end do
+
+  do i = 2, n
+    c(4,i) = ( c(1,i) - c(1,i-1) ) / ( tau(i) - tau(i-1) )
+  end do
+!
+!  Construct the first equation from the boundary condition
+!  at the left endpoint, of the form:
+!
+!    C(4,1)*S(1) + C(3,1)*S(2) = C(2,1)
+!
+!  IBCBEG = 0: Not-a-knot
+!
+  if ( ibcbeg == 0 ) then
+
+    if ( n <= 2 ) then
+      c(4,1) = 1.0D+00
+      c(3,1) = 1.0D+00
+      c(2,1) = 2.0D+00 * c(4,2)
+      go to 120
+    end if
+
+    c(4,1) = c(3,3)
+    c(3,1) = c(3,2) + c(3,3)
+    c(2,1) = ( ( c(3,2) + 2.0D+00 * c(3,1) ) * c(4,2) * c(3,3) &
+      + c(3,2)**2 * c(4,3) ) / c(3,1)
+!
+!  IBCBEG = 1: derivative specified.
+!
+  else if ( ibcbeg == 1 ) then
+
+    c(4,1) = 1.0D+00
+    c(3,1) = 0.0D+00
+
+    if ( n == 2 ) then
+      go to 120
+    end if
+!
+!  Second derivative prescribed at left end.
+!
+  else
+
+    c(4,1) = 2.0D+00
+    c(3,1) = 1.0D+00
+    c(2,1) = 3.0D+00 * c(4,2) - c(3,2) / 2.0D+00 * c(2,1)
+
+    if ( n == 2 ) then
+      go to 120
+    end if
+
+  end if
+!
+!  If there are interior knots, generate the corresponding
+!  equations and carry out the forward pass of Gauss elimination,
+!  after which the I-th equation reads:
+!
+!    C(4,I) * S(I) + C(3,I) * S(I+1) = C(2,I).
+!
+  do i = 2, n-1
+    g = -c(3,i+1) / c(4,i-1)
+    c(2,i) = g * c(2,i-1) + 3.0D+00 * ( c(3,i) * c(4,i+1) + c(3,i+1) * c(4,i) )
+    c(4,i) = g * c(3,i-1) + 2.0D+00 * ( c(3,i) + c(3,i+1))
+  end do
+!
+!  Construct the last equation from the second boundary condition, of
+!  the form
+!
+!    -G * C(4,N-1) * S(N-1) + C(4,N) * S(N) = C(2,N)
+!
+!  If slope is prescribed at right end, one can go directly to
+!  back-substitution, since the C array happens to be set up just
+!  right for it at this point.
+!
+  if ( ibcend == 1 ) then
+    go to 160
+  end if
+
+  if ( 1 < ibcend ) then
+    go to 110
+  end if
+
+90    continue
+!
+!  Not-a-knot and 3 <= N, and either 3 < N or also not-a-knot
+!  at left end point.
+!
+  if ( n /= 3 .or. ibcbeg /= 0 ) then
+    g = c(3,n-1) + c(3,n)
+    c(2,n) = ( ( c(3,n) + 2.0D+00 * g ) * c(4,n) * c(3,n-1) + c(3,n)**2 &
+      * ( c(1,n-1) - c(1,n-2) ) / c(3,n-1) ) / g
+    g = - g / c(4,n-1)
+    c(4,n) = c(3,n-1)
+    c(4,n) = c(4,n) + g * c(3,n-1)
+    c(2,n) = ( g * c(2,n-1) + c(2,n) ) / c(4,n)
+    go to 160
+  end if
+!
+!  N=3 and not-a-knot also at left.
+!
+100   continue
+
+  c(2,n) = 2.0D+00 * c(4,n)
+  c(4,n) = 1.0D+00
+  g = -1.0D+00 / c(4,n-1)
+  c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
+  c(2,n) = ( g * c(2,n-1) + c(2,n) ) / c(4,n)
+  go to 160
+!
+!  IBCEND = 2: Second derivative prescribed at right endpoint.
+!
+110   continue
+
+  c(2,n) = 3.0D+00 * c(4,n) + c(3,n) / 2.0D+00 * c(2,n)
+  c(4,n) = 2.0D+00
+  g = -1.0D+00 / c(4,n-1)
+  c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
+  c(2,n) = ( g * c(2,n-1)+c(2,n))/c(4,n)
+  go to 160
+!
+!  N = 2.
+!
+120   continue
+
+  if ( ibcend == 2  ) then
+
+    c(2,n) = 3.0D+00 * c(4,n) + c(3,n) / 2.0D+00 * c(2,n)
+    c(4,n) = 2.0D+00
+    g = -1.0D+00 / c(4,n-1)
+    c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
+    c(2,n) = (g*c(2,n-1)+c(2,n)) / c(4,n)
+
+  else if ( ibcend == 0 .and. ibcbeg /= 0 ) then
+
+    c(2,n) = 2.0D+00 * c(4,n)
+    c(4,n) = 1.0D+00
+    g = -1.0D+00 / c(4,n-1)
+    c(4,n) = c(4,n) - c(3,n-1) / c(4,n-1)
+    c(2,n) = (g*c(2,n-1)+c(2,n))/c(4,n)
+
+  else if ( ibcend == 0 .and. ibcbeg == 0 ) then
+
+    c(2,n) = c(4,n)
+
+  end if
+!
+!  Back solve the upper triangular system
+!    C(4,I) * S(I) + C(3,I) * S(I+1) = B(I)
+!  for the slopes C(2,I), given that S(N) is already known.
+!
+160   continue
+
+  do i = n-1, 1, -1
+    c(2,i) = ( c(2,i) - c(3,i) * c(2,i+1) ) / c(4,i)
+  end do
+!
+!  Generate cubic coefficients in each interval, that is, the
+!  derivatives at its left endpoint, from value and slope at its
+!  endpoints.
+!
+  do i = 2, n
+    dtau = c(3,i)
+    divdf1 = ( c(1,i) - c(1,i-1) ) / dtau
+    divdf3 = c(2,i-1) + c(2,i) - 2.0D+00 * divdf1
+    c(3,i-1) = 2.0D+00 * ( divdf1 - c(2,i-1) - divdf3 ) / dtau
+    c(4,i-1) = 6.0D+00 * divdf3 / dtau**2
+  end do
+
+  return
+end
diff --git a/pppack/cwidth.f90 b/pppack/cwidth.f90
new file mode 100644
index 0000000..cdca694
--- /dev/null
+++ b/pppack/cwidth.f90
@@ -0,0 +1,351 @@
+!>
+!> @file cwidth.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine cwidth ( w, b, nequ, ncols, integs, nbloks, d, x, iflag )
+
+!*************************************************************************
+!
+!! CWIDTH solves an almost block diagonal linear system.
+!
+!  Discussion:
+!
+!    This routine is a variation of the theme in the algorithm bandet1
+!    by Martin and Wilkinson (numer.math. 9(1976)279-307).  It solves
+!    the linear system
+!      A*X = B
+!    of NEQU equations in case A is almost block diagonal with all
+!    blocks having NCOLS columns using no more storage than it takes to
+!    store the interesting part of A.  Such systems occur in the determ-
+!    ination of the b-spline coefficients of a spline approximation.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!  w     on input, a two-dimensional array of size (nequ,ncols) contain-
+!        ing the interesting part of the almost block diagonal coeffici-
+!        ent matrix  a (see description and example below). the array
+!        integs  describes the storage scheme.
+!        on output, w  contains the upper triangular factor  u  of the
+!        lu factorization of a possibly permuted version of  a . in par-
+!        ticular, the determinant of  a  could now be found as
+!            iflag*w(1,1)*w(2,1)* ... * w(nequ,1)  .
+!
+!  b     on input, the right side of the linear system, of length  nequ.
+!        the contents of  b  are changed during execution.
+!
+!    Input, integer NEQU, the number of equations.
+!
+!    Input, integer NCOLS, the block width, that is, the number of
+!    columns in each block.
+!
+!  integs integer array, of size (2,nequ), describing the block
+!         structure of  a .
+!         integs(1,i)=no. of rows in block i              = nrow
+!         integs(2,i)=no. of elimination steps in block i
+!                     =overhang over next block             = last
+!  nbloks number of blocks
+!
+!  d      work array, to contain row sizes . if storage is scarce, the
+!         array  x  could be used in the calling sequence for  d .
+!
+!  x      on output, contains computed solution (if iflag /= 0), of
+!         length  nequ .
+!
+!  iflag  on output, integer
+!        =(-1)**(no.of interchanges during elimination)
+!                if  a  is invertible
+!        = 0   if  a  is singular
+!
+!  block structure of  a
+!
+!  the interesting part of  a  is taken to consist of  nbloks  con-
+!  secutive blocks, with the i-th block made up of  nrowi=integs(1,i)
+!  consecutive rows and  ncols  consecutive columns of  a , and with
+!  the first  lasti=integs(2,i) columns to the left of the next block.
+!  these blocks are stored consecutively in the workarray  w .
+!
+!  for example, here is an 11th order matrix and its arrangement in
+!  the workarray  w . (the interesting entries of  a  are indicated by
+!  their row and column index modulo 10.)
+!
+!                  ---   a   ---                          ---   w   ---
+!
+!                     nrow1=3
+!          11 12 13 14                                     11 12 13 14
+!          21 22 23 24                                     21 22 23 24
+!          31 32 33 34      nrow2=2                        31 32 33 34
+!   last1=2      43 44 45 46                               43 44 45 46
+!                53 54 55 56         nrow3=3               53 54 55 56
+!         last2=3         66 67 68 69                      66 67 68 69
+!                         76 77 78 79                      76 77 78 79
+!                         86 87 88 89   nrow4=1            86 87 88 89
+!                  last3=1   97 98 99 90   nrow5=2         97 98 99 90
+!                     last4=1   08 09 00 01                08 09 00 01
+!                               18 19 10 11                18 19 10 11
+!                        last5=4
+!
+!  for this interpretation of  a  as an almost block diagonal matrix,
+!  we have  nbloks=5 , and the integs array is
+!
+!                        i= 1   2   3   4   5
+!                  k=
+!  integs(k,i)=      1      3   2   3   1   2
+!                     2      2   3   1   1   4
+!
+!
+!  Method:
+!
+!  gauss elimination with scaled partial pivoting is used, but mult-
+!  ipliers are  n o t  s a v e d  in order to save storage. rather, the
+!  right side is operated on during elimination.  the two parameters
+!                  i p v t e q   and  l a s t e q
+!  are used to keep track of the action.  ipvteq is the index of the
+!  variable to be eliminated next, from equations  ipvteq+1,...,lasteq,
+!  using equation  ipvteq (possibly after an interchange) as the pivot
+!  equation. the entries in the pivot column are  a l w a y s  in column
+!  1 of  w . this is accomplished by putting the entries in rows
+!  ipvteq+1,...,lasteq  revised by the elimination of the  ipvteq-th
+!  variable one to the left in  w . in this way, the columns of the
+!  equations in a given block (as stored in  w ) will be aligned with
+!  those of the next block at the moment when these next equations be-
+!  come involved in the elimination process.
+!
+!  thus, for the above example, the first elimination steps proceed
+!  as follows.
+!
+!  *11 12 13 14    11 12 13 14    11 12 13 14    11 12 13 14
+!  *21 22 23 24   *22 23 24       22 23 24       22 23 24
+!  *31 32 33 34   *32 33 34      *33 34          33 34
+!   43 44 45 46    43 44 45 46   *43 44 45 46   *44 45 46        etc.
+!   53 54 55 56    53 54 55 56   *53 54 55 56   *54 55 56
+!   66 67 68 69    66 67 68 69    66 67 68 69    66 67 68 69
+!        .              .              .              .
+!
+!  In all other respects, the procedure is standard, including the
+!  scaled partial pivoting.
+!
+  implicit none
+
+  integer nbloks
+  integer ncols
+  integer nequ
+
+  real ( kind = 8 ) awi1od
+  real ( kind = 8 ) b(nequ)
+  real ( kind = 8 ) colmax
+  real ( kind = 8 ) d(nequ)
+  integer i
+  integer icount
+  integer iflag
+  integer ii
+  integer integs(2,nbloks)
+  integer ipvteq
+  integer ipvtp1
+  integer istar
+  integer j
+  integer jmax
+  integer lastcl
+  integer lasteq
+  integer lasti
+  integer nexteq
+  integer nrowad
+  real ( kind = 8 ) ratio
+  real ( kind = 8 ) rowmax
+  real ( kind = 8 ) sum1
+  real ( kind = 8 ) temp
+  real ( kind = 8 ) w(nequ,ncols)
+  real ( kind = 8 ) x(nequ)
+
+  iflag = 1
+  ipvteq = 0
+  lasteq = 0
+!
+!  The I loop runs over the blocks.
+!
+  do i = 1, nbloks
+!
+!  The equations for the current block are added to those current-
+!  ly involved in the elimination process, by increasing  lasteq
+!  by  integs(1,i) after the rowsize of these equations has been
+!  recorded in the array D.
+!
+    nrowad = integs(1,i)
+
+    do icount = 1, nrowad
+
+      nexteq = lasteq + icount
+
+      rowmax = 0.0D+00
+      do j = 1, ncols
+        rowmax = max ( rowmax, abs ( w(nexteq,j) ) )
+      end do
+
+      if ( rowmax == 0.0D+00 ) then
+        go to 150
+      end if
+
+      d(nexteq) = rowmax
+
+    end do
+
+    lasteq = lasteq + nrowad
+!
+!  There will be  lasti=integs(2,i)  elimination steps before
+!  the equations in the next block become involved. further,
+!  l a s t c l  records the number of columns involved in the cur-
+!  rent elimination step. it starts equal to  ncols  when a block
+!  first becomes involved and then drops by one after each elim-
+!  ination step.
+!
+    lastcl = ncols
+    lasti = integs(2,i)
+
+    do icount = 1, lasti
+
+      ipvteq = ipvteq+1
+
+      if ( ipvteq < lasteq ) then
+        go to 30
+      end if
+
+      if ( d(ipvteq) < abs ( w(ipvteq,1)) + d(ipvteq) ) then
+        go to 100
+      end if
+
+      go to 150
+!
+!  Determine the smallest ISTAR in  (ipvteq,lasteq)  for
+!  which  abs(w(istar,1))/d(istar)  is as large as possible, and
+!  interchange equations  ipvteq  and  istar  in case  ipvteq
+!  < istar .
+!
+   30     continue
+
+      colmax = abs(w(ipvteq,1)) / d(ipvteq)
+      istar = ipvteq
+      ipvtp1 = ipvteq+1
+
+      do ii = ipvtp1, lasteq
+        awi1od = abs(w(ii,1)) / d(ii)
+        if ( colmax < awi1od ) then
+          colmax = awi1od
+          istar = ii
+        end if
+      end do
+
+      if ( abs(w(istar,1))+d(istar) == d(istar) ) then
+        go to 150
+      end if
+
+      if ( istar == ipvteq ) then
+        go to 60
+      end if
+
+      iflag = -iflag
+
+      temp = d(istar)
+      d(istar) = d(ipvteq)
+      d(ipvteq) = temp
+
+      temp = b(istar)
+      b(istar) = b(ipvteq)
+      b(ipvteq) = temp
+
+      do j = 1, lastcl
+        temp = w(istar,j)
+        w(istar,j) = w(ipvteq,j)
+        w(ipvteq,j) = temp
+      end do
+!
+!  Subtract the appropriate multiple of equation  ipvteq  from
+!  equations  ipvteq+1,...,lasteq to make the coefficient of the
+!  ipvteq-th unknown (presently in column 1 of  w ) zero, but
+!  store the new coefficients in  w  one to the left from the old.
+!
+   60     continue
+
+      do ii = ipvtp1, lasteq
+
+        ratio = w(ii,1)/w(ipvteq,1)
+        do j = 2, lastcl
+          w(ii,j-1) = w(ii,j)-ratio*w(ipvteq,j)
+        end do
+        w(ii,lastcl) = 0.0D+00
+        b(ii) = b(ii)-ratio*b(ipvteq)
+
+      end do
+
+      lastcl = lastcl-1
+
+    end do
+
+100     continue
+
+  end do
+!
+!  At this point, W and B contain an upper triangular linear system
+!  equivalent to the original one, with  w(i,j) containing entry
+!  (i, i-1+j ) of the coefficient matrix. solve this system by backsub-
+!  stitution, taking into account its block structure.
+!
+!  i-loop over the blocks, in reverse order
+!
+  i = nbloks
+
+  110 continue
+
+  lasti = integs(2,i)
+  jmax = ncols-lasti
+
+  do icount = 1, lasti
+
+    sum1 = 0.0D+00
+    do j = 1, jmax
+      sum1 = sum1 + x(ipvteq+j) * w(ipvteq,j+1)
+    end do
+
+    x(ipvteq) = ( b(ipvteq) - sum1 ) / w(ipvteq,1)
+    jmax = jmax+1
+    ipvteq = ipvteq-1
+
+  end do
+
+  i = i-1
+  if ( 0 < i ) then
+    go to 110
+  end if
+
+  return
+
+  150 continue
+
+  iflag = 0
+  return
+end
diff --git a/pppack/difequ.f90 b/pppack/difequ.f90
new file mode 100644
index 0000000..51a61f3
--- /dev/null
+++ b/pppack/difequ.f90
@@ -0,0 +1,185 @@
+!>
+!> @file difequ.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine difequ ( mode, xx, v )
+
+!*************************************************************************
+!
+!! DIFEQU returns information about a differential equation.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, integer MODE, an integer indicating the task to be performed.
+!    1, initialization
+!    2, evaluate  de  at  xx
+!    3, specify the next side condition
+!    4, analyze the approximation
+!
+!    Input, real ( kind = 8 ) XX, a point at which information is wanted
+!
+!    Output, real ( kind = 8 ) V, depends on the MODE.  See comments below
+!
+  implicit none
+
+  integer, parameter :: npiece = 100
+  integer, parameter :: ncoef = 2000
+
+  real ( kind = 8 ) break
+  real ( kind = 8 ) coef
+  real ( kind = 8 ) eps
+  real ( kind = 8 ) ep1
+  real ( kind = 8 ) ep2
+  real ( kind = 8 ) error
+  real ( kind = 8 ) factor
+  integer i
+  integer iside
+  integer itermx
+  integer k
+  integer kpm
+  integer l
+  integer m
+  integer mode
+  real ( kind = 8 ) rho
+  real ( kind = 8 ) s2ovep
+  real ( kind = 8 ) solutn
+  real ( kind = 8 ) un
+  real ( kind = 8 ) v(20)
+  real ( kind = 8 ) value
+  real ( kind = 8 ) x
+  real ( kind = 8 ) xside
+  real ( kind = 8 ) xx
+
+  common /approx/ break(npiece),coef(ncoef),l,kpm
+  common /side/ m,iside,xside(10)
+  common /other/ itermx,k,rho(19)
+!
+!  This sample of DIFEQU is for the example in chapter xv.  It is a
+!  nonlinear second order two point boundary value problem.
+!
+  go to (10,50,60,110), mode
+!
+!  Initialize everything,  Set the order M of the differential equation,
+!  the nondecreasing sequence xside(i),i=1,...,m, of points at which side
+!  conditions are given and anything else necessary.
+!
+   10 continue
+
+  m = 2
+  xside(1) = 0.0D+00
+  xside(2) = 1.0D+00
+!
+!  Print out heading.
+!
+  write ( *, * ) ' '
+  write ( *, * ) '  Carrier''s nonlinear perturb. problem'
+  write ( *, * ) ' '
+
+  eps = 0.005D+00
+  write(*,*)'EPS = ',eps
+!
+!  Set constants used in formula for solution below.
+!
+  factor = ( sqrt ( 2.0D+00 ) + sqrt ( 3.0D+00 ) )**2
+  s2ovep = sqrt ( 2.0D+00 / eps )
+!
+!  Initial guess for Newton iteration. un(x)=x*x-1.
+!
+  l = 1
+  break(1) = 0.0D+00
+  do i = 1, kpm
+    coef(i) = 0.0D+00
+  end do
+  coef(1) = -1.0D+00
+  coef(3) = 2.0D+00
+  itermx = 10
+  return
+!
+!  Provide value of left side coefficients and right side at  xx .
+!  specifically, at  xx  the dif.equ. reads:
+!
+!    v(m+1)d**m+v(m)d**(m-1) + ... + v(1)d**0 = v(m+2)
+!
+!  in terms of the quantities v(i),i=1,...,m+2, to be computed here.
+!
+   50 continue
+
+  v(3) = eps
+  v(2) = 0.0D+00
+  call ppvalu(break,coef,l,kpm,xx,0,un)
+  v(1) = 2.0D+00 * un
+  v(4) = un**2 + 1.0D+00
+  return
+!
+!  provide the M side conditions. these conditions are of the form
+!        v(m+1)d**m+v(m)d**(m-1) + ... + v(1)d**0 = v(m+2)
+!  in terms of the quantities v(i),i=1,...,m+2, to be specified here.
+!  note that v(m+1)=0  for customary side conditions.
+!
+   60 continue
+
+  v(m+1) = 0.0D+00
+  if ( iside == 1 ) then
+    v(2) = 1.0D+00
+    v(1) = 0.0D+00
+    v(4) = 0.0D+00
+    iside = iside+1
+  else if ( iside == 2 ) then
+    v(2) = 0.0D+00
+    v(1) = 1.0D+00
+    v(4) = 0.0D+00
+    iside = iside + 1
+  end if
+
+  return
+!
+!  Calculate the error near the boundary layer at 1.
+!
+  110 continue
+
+  write(*,*)' '
+  write(*,*)' X, G(X) and G(X)-F(X) at selected points:'
+  write(*,*)' '
+
+  x = 0.75D+00
+
+  do i = 1, 9
+    ep1 = exp ( s2ovep * ( 1.0D+00 - x ) ) * factor
+    ep2 = exp ( s2ovep * ( 1.0D+00 + x ) ) * factor
+    solutn = 12.0D+00 / ( 1.0D+00 + ep1 )**2 * ep1 &
+            +12.0D+00 / ( 1.0D+00 + ep2 )**2 * ep2 - 1.0D+00
+    call ppvalu(break,coef,l,kpm,x,0,value)
+    error = solutn-value
+    write ( *, '(1x,3g14.6)' ) x, solutn, error
+    x = x+0.03125
+  end do
+
+  return
+end
diff --git a/pppack/dtblok.f90 b/pppack/dtblok.f90
new file mode 100644
index 0000000..03d5727
--- /dev/null
+++ b/pppack/dtblok.f90
@@ -0,0 +1,103 @@
+!>
+!> @file dtblok.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine dtblok ( bloks, integs, nbloks, ipivot, iflag, detsgn, detlog )
+
+!*************************************************************************
+!
+!! DTBLOK gets the determinant of an almost block diagonal matrix.
+!
+!  Discussion:
+!
+!    The matrix's PLU factorization must have been obtained
+!    previously by FCBLOK.
+!
+!    The logarithm of the determinant is computed instead of the
+!    determinant itself to avoid the danger of overflow or underflow
+!    inherent in this calculation.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!  bloks, integs, nbloks, ipivot, iflag  are as on return from fcblok.
+!            in particular, iflag=(-1)**(number of interchanges dur-
+!            ing factorization) if successful, otherwise iflag=0.
+!
+!  detsgn  on output, contains the sign of the determinant.
+!
+!  detlog  on output, contains the natural logarithm of the determi-
+!            nant if determinant is not zero. otherwise contains 0.
+!
+  implicit none
+
+  integer nbloks
+
+  real ( kind = 8 ) bloks(1)
+  real ( kind = 8 ) detlog
+  real ( kind = 8 ) detsgn
+  integer i
+  integer iflag
+  integer index
+  integer indexp
+  integer integs(3,nbloks)
+  integer ip
+  integer ipivot(1)
+  integer k
+  integer last
+  integer nrow
+
+  detsgn = iflag
+  detlog = 0.0D+00
+
+  if ( iflag == 0 ) then
+    return
+  end if
+
+  index = 0
+  indexp = 0
+
+  do i = 1, nbloks
+
+    nrow = integs(1,i)
+    last = integs(3,i)
+
+    do k = 1, last
+      ip = index + nrow * (k-1) + ipivot(indexp+k)
+      detlog = detlog + log ( abs ( bloks(ip) ) )
+      detsgn = detsgn * sign ( 1.0D+00, bloks(ip) )
+    end do
+
+    index = nrow*integs(2,i)+index
+    indexp = indexp+nrow
+
+  end do
+
+  return
+end
diff --git a/pppack/eqblok.f90 b/pppack/eqblok.f90
new file mode 100644
index 0000000..411f1de
--- /dev/null
+++ b/pppack/eqblok.f90
@@ -0,0 +1,192 @@
+!>
+!> @file eqblok.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine eqblok ( t, n, kpm, work1, work2, bloks, lenblk, integs, &
+  nbloks, b )
+
+!*************************************************************************
+!
+!! EQBLOK is to be called in COLLOC.
+!
+!  Method:
+!
+!    Each breakpoint interval gives rise to a block in the linear system.
+!    this block is determined by the K collocation equations in the interval
+!    with the side conditions (if any) in the interval interspersed ap-
+!    propriately, and involves the  kpm  b-splines having the interval in
+!    their support. correspondingly, such a block has  nrow=k+isidel
+!    rows, with  isidel=number of side conditions in this and the prev-
+!    ious intervals, and  ncol=kpm  columns.
+!
+!    Further, because the interior knots have multiplicity  k, we can
+!    carry out (in slvblk)  k  elimination steps in a block before pivot-
+!    ing might involve an equation from the next block. in the last block,
+!    of course, all kpm elimination steps will be carried out (in slvblk).
+!
+!    see the detailed comments in the solveblok package for further in-
+!    formation about the almost block diagonal form used here.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!  input
+!
+!    Input, real ( kind = 8 ) T(N+KPM), the knot sequence.
+!
+!    Input, integer N, the dimension of the approximating spline space,
+!    that is, the order of the linear system to be constructed.
+!
+!    Input, integer KPM, = K + M, the order of the approximating spline.
+!
+!    Input, integer LENBLK, the maximum length of the array BLOKS,
+!    as allowed by the dimension statement in COLLOC.
+!
+!  work  areas
+!
+!  work1    used in  putit, of size (kpm,kpm)
+!  work2    used in  putit, of size (kpm,m+1)
+!
+!  output
+!
+!  bloks    the coefficient matrix of the linear system, stored in al-
+!           most block diagonal form, of size
+!              kpm*sum(integs(1,i) , i=1,...,nbloks)
+!
+!  integs   an integer array, of size (3,nbloks), describing the block
+!           structure.
+!           integs(1,i) = number of rows in block  i
+!           integs(2,i) = number of columns in block  i
+!           integs(3,i) = number of elimination steps which can be
+!                       carried out in block  i  before pivoting might
+!                       bring in an equation from the next block.
+!
+!  nbloks number of blocks, equals number of polynomial pieces
+!
+!  b      the right side of the linear system, stored corresponding to the
+!         almost block diagonal form, of size sum(integs(1,i) , i=1,...,
+!         nbloks).
+!
+  implicit none
+
+  integer kpm
+  integer n
+
+  real ( kind = 8 ) b(*)
+  real ( kind = 8 ) bloks(*)
+  integer i
+  integer index
+  integer indexb
+  integer integs(3,*)
+  integer iside
+  integer isidel
+  integer itermx
+  integer k
+  integer left
+  integer lenblk
+  integer m
+  integer nbloks
+  integer nrow
+  real ( kind = 8 ) rho
+  real ( kind = 8 ) t(n+kpm)
+  real ( kind = 8 ) work1(kpm,kpm)
+  real ( kind = 8 ) work2(kpm,*)
+  real ( kind = 8 ) xside
+
+  common /side/ m,iside,xside(10)
+  common /other/ itermx,k,rho(19)
+
+  index = 1
+  indexb = 1
+  i = 0
+  iside = 1
+
+  do left = kpm, n, k
+
+    i = i + 1
+!
+!  Determine integs(.,i)
+!
+    integs(2,i) = kpm
+
+    if ( n <= left ) then
+      integs(3,i) = kpm
+      isidel = m
+      go to 30
+    end if
+
+    integs(3,i) = k
+!
+!  At this point,  iside-1  gives the number of side conditions
+!  incorporated so far. adding to this the side conditions in the
+!  current interval gives the number  isidel .
+!
+    isidel = iside - 1
+
+    do
+
+      if ( isidel == m ) then
+        exit
+      end if
+
+      if ( t(left+1) <= xside(isidel+1) ) then
+        exit
+      end if
+
+      isidel = isidel + 1
+
+    end do
+
+30  continue
+
+    nrow = k + isidel
+    integs(1,i) = nrow
+!
+!  The detailed equations for this block are generated and put
+!  together in PUTIT.
+!
+    if ( lenblk < index + nrow * kpm - 1 ) then
+      write ( *, * ) ' '
+      write ( *, * ) 'EQBLOK - Fatal error!'
+      write ( *, * ) '  The dimension of BLOKS is too small.'
+      write ( *, * ) '  LENBLK = ', lenblk
+      stop
+    end if
+
+    call putit ( t, kpm, left, work1, work2, bloks(index), nrow, b(indexb) )
+
+    index = index + nrow * kpm
+    indexb = indexb + nrow
+
+  end do
+
+  nbloks = i
+
+  return
+end
diff --git a/pppack/evnnot.f90 b/pppack/evnnot.f90
new file mode 100644
index 0000000..518bf00
--- /dev/null
+++ b/pppack/evnnot.f90
@@ -0,0 +1,85 @@
+!>
+!> @file evnnot.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine evnnot ( break, coef, l, k, brknew, lnew, coefg )
+
+!*************************************************************************
+!
+!! EVNNOT is a "fake" version of NEWNOT.
+!
+!  Discussion:
+!
+!    EVNNOT returns LNEW+1 knots in BRKNEW which are
+!    evenly spaced between BREAK(1) and BREAK(L+1).
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) BREAK(L+1), coef, l, k.....contains the
+!    pp-representation of a certain function F of order K.  Specifically,
+!    d**(k-1)f(x)=coef(k,i)  for  break(i) <= x < break(i+1)
+!
+!    Input, integer LNEW, the number of subintervals into which the interval
+!    (a,b) is to be sectioned by the new breakpoint sequence  brknew .
+!
+!    Output, real ( kind = 8 ) BRKNEW(LNEW+1), the new breakpoints.
+!
+!    Output, coefg  the coefficient part of the pp-repr.  break, coefg, l, 2
+!    for the monotone p.linear function G with respect to which  brknew  will
+!    be equidistributed.
+!
+  implicit none
+
+  integer k
+  integer l
+  integer lnew
+
+  real ( kind = 8 ) break(l+1)
+  real ( kind = 8 ) brknew(lnew+1)
+  real ( kind = 8 ) coef(k,l)
+  real ( kind = 8 ) coefg(2,l)
+  integer i
+
+  if ( lnew == 0 ) then
+
+    brknew(1) = 0.5D+00 * ( break(1) + break(l+1) )
+
+  else
+
+    do i = 1, lnew+1
+      brknew(i) = ( real ( lnew - i + 1, kind = 8 ) * break(1) &
+                  + real (        i - 1, kind = 8 ) * break(l+1) ) &
+                  / real ( lnew,         kind = 8 )
+    end do
+
+  end if
+
+  return
+end
diff --git a/pppack/factrb.f90 b/pppack/factrb.f90
new file mode 100644
index 0000000..298312b
--- /dev/null
+++ b/pppack/factrb.f90
@@ -0,0 +1,188 @@
+!>
+!> @file factrb.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine factrb ( w, ipivot, d, nrow, ncol, last, iflag )
+
+!*************************************************************************
+!
+!! FACTRB constructs a partial PLU factorization.
+!
+!  Discussion:
+!
+!    This factorization corresponds to steps 1 through LAST in Gauss
+!    elimination for the matrix W of order ( NROW, NCOL ), using
+!    pivoting of scaled rows.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input/output, real ( kind = 8 ) W(NROW,NCOL); on input, contains the
+!    matrix to be partially factored; on output, the partial factorization.
+!
+!    Output, integer IPIVOT(NROW), contains a record of the pivoting
+!    strategy used; row IPIVOT(I) is used during the I-th elimination step,
+!    for I = 1, ..., LAST.
+!
+!    Workspace, real ( kind = 8 ) D(NROW), used to store the maximum entry
+!    in each row.
+!
+!    Input, integer NROW, the number of rows of W.
+!
+!    Input, integer NCOL, the number of columns of W.
+!
+!    Input, integer LAST, the number of elimination steps to be carried out.
+!
+!    Input/output, integer IFLAG.  On output, equals the input value
+!    times (-1)**(number of row interchanges during the factorization
+!    process), in case no zero pivot was encountered.
+!    Otherwise, iflag=0 on output.
+!
+  implicit none
+
+  integer ncol
+  integer nrow
+
+  real ( kind = 8 ) awikdi
+  real ( kind = 8 ) colmax
+  real ( kind = 8 ) d(nrow)
+  integer i
+  integer iflag
+  integer ipivi
+  integer ipivk
+  integer ipivot(nrow)
+  integer j
+  integer k
+  integer kp1
+  integer last
+  real ( kind = 8 ) ratio
+  real ( kind = 8 ) rowmax
+  real ( kind = 8 ) w(nrow,ncol)
+!
+!  Initialize IPIVOT and D.
+!
+  do i = 1, nrow
+    ipivot(i) = i
+  end do
+
+  do i = 1, nrow
+
+    rowmax = 0.0D+00
+    do j = 1, ncol
+      rowmax = max ( rowmax, abs ( w(i,j) ) )
+    end do
+
+    if ( rowmax == 0.0D+00 ) then
+      iflag = 0
+      return
+    end if
+
+    d(i) = rowmax
+
+  end do
+!
+!  Gauss elimination with pivoting of scaled rows, loop over k=1,.,last
+!
+  k = 1
+!
+!  As pivot row for k-th step, pick among the rows not yet used,
+!  i.e., from rows ipivot(k),...,ipivot(nrow), the one whose k-th
+!  entry (compared to the row size) is largest. then, if this row
+!  does not turn out to be row ipivot(k), redefine ipivot(k) ap-
+!  propriately and record this interchange by changing the sign
+!  of IFLAG.
+!
+   30 continue
+
+  ipivk = ipivot(k)
+
+  if ( k == nrow ) then
+    if ( abs(w(ipivk,nrow))+d(ipivk) <= d(ipivk) ) then
+      iflag = 0
+    end if
+    return
+  end if
+
+  j = k
+  kp1 = k+1
+  colmax = abs(w(ipivk,k))/d(ipivk)
+!
+!  Find the largest pivot
+!
+  do i = kp1, nrow
+    ipivi = ipivot(i)
+    awikdi = abs(w(ipivi,k)) / d(ipivi)
+    if ( colmax < awikdi ) then
+      colmax = awikdi
+      j = i
+    end if
+  end do
+
+  if ( j /= k ) then
+    ipivk = ipivot(j)
+    ipivot(j) = ipivot(k)
+    ipivot(k) = ipivk
+    iflag = -iflag
+  end if
+!
+!  If pivot element is too small in absolute value, declare
+!  matrix to be noninvertible and quit.
+!
+  if ( abs(w(ipivk,k))+d(ipivk) <= d(ipivk) ) then
+    iflag = 0
+    return
+  end if
+!
+!  Otherwise, subtract the appropriate multiple of the pivot
+!  row from remaining rows, i.e., the rows ipivot(k+1),...,
+!  ipivot(nrow), to make k-th entry zero. save the multiplier in
+!  its place.
+!
+  do i = kp1, nrow
+
+    ipivi = ipivot(i)
+    w(ipivi,k) = w(ipivi,k)/w(ipivk,k)
+
+    ratio = -w(ipivi,k)
+    do j = kp1, ncol
+      w(ipivi,j) = ratio*w(ipivk,j)+w(ipivi,j)
+    end do
+
+  end do
+
+  k = kp1
+!
+!  Check for having reached the next block.
+!
+  if ( k <= last ) then
+    go to 30
+  end if
+
+  return
+end
diff --git a/pppack/fcblok.f90 b/pppack/fcblok.f90
new file mode 100644
index 0000000..72f11d8
--- /dev/null
+++ b/pppack/fcblok.f90
@@ -0,0 +1,126 @@
+!>
+!> @file fcblok.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine fcblok ( bloks, integs, nbloks, ipivot, scrtch, iflag )
+
+!*************************************************************************
+!
+!! FCBLOK supervises the PLU factorization with pivoting of
+!  scaled rows of the almost block diagonal matrix.
+!
+!    The almost block diagonal matrix is stored in the arrays
+!    BLOKS and INTEGS.
+!
+!    The FACTRB routine carries out steps 1,...,last of gauss
+!    elimination (with pivoting) for an individual block.
+!
+!    The SHIFTB routine shifts the remaining rows to the top of
+!    the next block
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!  bloks   an array that initially contains the almost block diagonal
+!            matrix  a  to be factored, and on return contains the com-
+!            puted factorization of  a .
+!
+!  integs  an integer array describing the block structure of  a .
+!
+!  nbloks  the number of blocks in  a .
+!
+!  ipivot  an integer array of dimension  sum (integs(1,n) ; n=1,
+!            ...,nbloks) which, on return, contains the pivoting stra-
+!            tegy used.
+!
+!  scrtch  work area required, of length  max (integs(1,n) ; n=1,
+!            ...,nbloks).
+!
+!  iflag   output parameter;
+!          =0  in case matrix was found to be singular.
+!            otherwise,
+!          =(-1)**(number of row interchanges during factorization)
+!
+  implicit none
+
+  integer nbloks
+
+  real ( kind = 8 ) bloks(*)
+  integer i
+  integer iflag
+  integer index
+  integer indexb
+  integer indexn
+  integer integs(3,nbloks)
+  integer ipivot(*)
+  integer last
+  integer ncol
+  integer nrow
+  real ( kind = 8 ) scrtch(*)
+
+  iflag = 1
+  indexb = 1
+  indexn = 1
+  i = 1
+!
+!  Loop over the blocks.  i  is loop index
+!
+  do
+
+    index = indexn
+    nrow = integs(1,i)
+    ncol = integs(2,i)
+    last = integs(3,i)
+!
+!  Carry out elimination on the I-th block until next block
+!  enters, for columns 1 through LAST of I-th block.
+!
+    call factrb ( bloks(index), ipivot(indexb), scrtch, nrow, ncol, &
+      last, iflag )
+!
+!  Check for having reached a singular block or the last block.
+!
+    if ( iflag == 0 .or. i == nbloks ) then
+      exit
+    end if
+
+    i = i + 1
+    indexn = nrow * ncol + index
+!
+!  Put the rest of the I-th block onto the next block.
+!
+    call shiftb ( bloks(index), ipivot(indexb), nrow, ncol, last, &
+      bloks(indexn), integs(1,i), integs(2,i) )
+
+    indexb = indexb + nrow
+
+  end do
+
+  return
+end
diff --git a/pppack/interv.f90 b/pppack/interv.f90
new file mode 100644
index 0000000..60b9cb1
--- /dev/null
+++ b/pppack/interv.f90
@@ -0,0 +1,223 @@
+!>
+!> @file interv.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine interv ( xt, lxt, x, left, mflag )
+
+!*******************************************************************************
+!
+!! INTERV brackets a real value in an ascending vector of values.
+!
+!  Discussion:
+!
+!    The XT array is a set of increasing values.  The goal of the routine
+!    is to determine the largest index I so that XT(I) <= X.
+!
+!    The routine is designed to be efficient in the common situation
+!    that it is called repeatedly, with X taken from an increasing
+!    or decreasing sequence.
+!
+!    This will happen when a piecewise polynomial is to be graphed.
+!    The first guess for LEFT is therefore taken to be the value
+!    returned at the previous call and stored in the local variable ILO.
+!
+!    A first check ascertains that ILO < LXT.  This is necessary
+!    since the present call may have nothing to do with the previous
+!    call.  Then, if XT(ILO) < = X < XT(ILO+1), we set LEFT=ILO
+!    and are done after just three comparisons.
+!
+!    Otherwise, we repeatedly double the difference ISTEP=IHI-ILO
+!    while also moving ILO and IHI in the direction of X, until
+!      XT(ILO) < = X < XT(IHI)
+!    after which we use bisection to get, in addition, ILO+1=IHI.
+!    LEFT=ILO is then returned.
+!
+!  Modified:
+!
+!    05 February 2004
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) XT(LXT), a nondecreasing sequence of values.
+!
+!    Input, integer LXT, the dimension of XT.
+!
+!    Input, real ( kind = 8 ) X, the point whose location with
+!    respect to the sequence XT is to be determined.
+!
+!    Output, integer LEFT, the index of the bracketing value:
+!      1     if             X  <  XT(1)
+!      I     if   XT(I)  <= X  < XT(I+1)
+!      LXT   if  XT(LXT) <= X
+!
+!    Output, integer MFLAG, indicates whether X lies within the
+!    range of the data.
+!    -1:            X  <  XT(1)
+!     0: XT(I)   <= X  < XT(I+1)
+!    +1: XT(LXT) <= X
+!
+  implicit none
+
+  integer lxt
+
+  integer left
+  integer mflag
+  integer ihi
+  integer, save :: ilo = 1
+  integer istep
+  integer middle
+  real ( kind = 8 ) x
+  real ( kind = 8 ) xt(lxt)
+
+!$omp threadprivate(ilo)
+
+  ihi = ilo + 1
+
+  if ( lxt <= ihi ) then
+
+    if ( xt(lxt) <= x ) then
+      go to 110
+    end if
+
+    if ( lxt <= 1 ) then
+      mflag = -1
+      left = 1
+      return
+    end if
+
+    ilo = lxt - 1
+    ihi = lxt
+
+  end if
+
+  if ( xt(ihi) <= x ) then
+    go to 40
+  end if
+
+  if ( xt(ilo) <= x ) then
+    mflag = 0
+    left = ilo
+    return
+  end if
+!
+!  Now X < XT(ILO).  Decrease ILO to capture X.
+!
+  istep = 1
+
+   31 continue
+
+  ihi = ilo
+  ilo = ihi - istep
+
+  if ( 1 < ilo ) then
+    if ( xt(ilo) <= x ) then
+      go to 50
+    end if
+    istep = istep * 2
+    go to 31
+  end if
+
+  ilo = 1
+
+  if ( x < xt(1) ) then
+    mflag = -1
+    left = 1
+    return
+  end if
+
+  go to 50
+!
+!  Now XT(IHI) <= X.  Increase IHI to capture X.
+!
+   40 continue
+
+  istep = 1
+
+   41 continue
+
+  ilo = ihi
+  ihi = ilo + istep
+
+  if ( ihi < lxt ) then
+    if ( x < xt(ihi) ) then
+      go to 50
+    end if
+    istep = istep * 2
+    go to 41
+  end if
+
+  if ( xt(lxt) <= x ) then
+    go to 110
+  end if
+!
+!  Now XT(ILO) < = X < XT(IHI).  Narrow the interval.
+!
+  ihi = lxt
+
+50 continue
+
+  do
+
+    middle = ( ilo + ihi ) / 2
+
+    if ( middle == ilo ) then
+      mflag = 0
+      left = ilo
+      return
+    end if
+!
+!  It is assumed that MIDDLE = ILO in case IHI = ILO+1.
+!
+    if ( xt(middle) <= x ) then
+      ilo = middle
+    else
+      ihi = middle
+    end if
+
+  end do
+!
+!  Set output and return.
+!
+  110 continue
+
+  mflag = 1
+
+  if ( x == xt(lxt) ) then
+    mflag = 0
+  end if
+
+  do left = lxt, 1, -1
+    if ( xt(left) < xt(lxt) ) then
+      return
+    end if
+  end do
+
+  return
+end
diff --git a/pppack/knots.f90 b/pppack/knots.f90
new file mode 100644
index 0000000..c7be744
--- /dev/null
+++ b/pppack/knots.f90
@@ -0,0 +1,103 @@
+!>
+!> @file knots.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine knots ( break, l, kpm, t, n )
+
+!*************************************************************************
+!
+!! KNOTS is to be called in COLLOC.
+!
+!  Discussion:
+!
+!    From the given breakpoint sequence BREAK the routine constructs the
+!    knot sequence T so that
+!
+!      SPLINE(K+M,T) = PP(K+M,BREAK)
+!
+!    with M-1 continuous derivatives.  This means that
+!
+!      t(1),...,t(n+kpm) = break(1) kpm times, then break(2),...,
+!        break(l) each  k  times, then, finally, break(l+1) kpm times.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence.
+!
+!    Input, integer L, the number of intervals or pieces.
+!
+!    Input, integer KPM, = K+M, the order of the piecewise polynomial
+!    function or spline.
+!
+!    Output, real ( kind = 8 ) T(N+KPM), the knot sequence.
+!
+!    Output, integer N, = L*K+M = the dimension of SPLINE(K+M,T).
+!
+  implicit none
+
+  integer kpm
+  integer l
+  integer n
+
+  real ( kind = 8 ) break(l+1)
+  integer iside
+  integer j
+  integer jj
+  integer jjj
+  integer k
+  integer ll
+  integer m
+  real ( kind = 8 ) t(*)
+  real ( kind = 8 ) xside
+
+  common /side/ m,iside,xside(10)
+
+  k = kpm-m
+  n = l*k+m
+  jj = n+kpm
+  jjj = l+1
+
+  do ll = 1, kpm
+    t(jj) = break(jjj)
+    jj = jj-1
+  end do
+
+  do j = 1, l
+    jjj = jjj-1
+    do ll = 1, k
+      t(jj) = break(jjj)
+      jj = jj-1
+    end do
+  end do
+
+  t(1:kpm) = break(1)
+
+  return
+end
diff --git a/pppack/l2appr.f90 b/pppack/l2appr.f90
new file mode 100644
index 0000000..861d3bd
--- /dev/null
+++ b/pppack/l2appr.f90
@@ -0,0 +1,196 @@
+!>
+!> @file l2appr.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine l2appr ( t, n, k, q, diag, bcoef )
+
+!*************************************************************************
+!
+!! L2APPR constructs a weighted L2 spline approximation to given data.
+!
+!  Discussion:
+!
+!    The routine constructs the weighted discrete L2-approximation by
+!    splines of order K with knot sequence T(1), ..., T(n+k) to
+!    given data points ( TAU(1:NTAU), GTAU(1:NTAU) ).
+!
+!    The B-spline coefficients BCOEF of the approximating spline are
+!    determined from the normal equations using Cholesky's method.
+!
+!  Method:
+!
+!    The B-spline coefficients of the L2-approximation are determined as the
+!    solution of the normal equations
+!
+!      sum ( (b(i), b(j) ) * bcoef(j) : j=1,...,n) =(b(i),g),
+!                                               i=1, ..., n .
+!    Here, b(i) denotes the i-th B-spline, G denotes the function to
+!    be approximated, and the inner product of two functions F and G
+!    is given by
+!
+!      (f,g)  := sum ( f(tau(i))*g(tau(i))*weight(i) : i=1,...,ntau) .
+!
+!    The arrays TAU and WEIGHT are given in common block
+!    DATA, as is the array GTAU containing the sequence
+!    g(tau(i)), i=1,..., NTAU.
+!
+!    The relevant function values of the B-splines  b(i), i=1,...,n, are
+!    supplied by the subprogram BSPLVB.
+!
+!    The coefficient matrix C, with
+!           c(i,j)  := (b(i), b(j)), i,j=1,...,n,
+!    of the normal equations is symmetric and (2*k-1)-banded, therefore
+!    can be specified by giving its K bands at or below the diagonal.
+!    For i=1,...,n,  we store
+!    (b(i),b(j)) = c(i,j)  in  q(i-j+1,j), j=i,...,min(i+k-1,n)
+!    and the right side
+!    (b(i), g )  in  bcoef(i).
+!
+!    Since B-spline values are most efficiently generated by finding sim-
+!    ultaneously the value of every nonzero B-spline at one point,
+!    the entries of C (i.e., of  Q ), are generated by computing, for
+!    each ll, all the terms involving  tau(ll)  simultaneously and adding
+!    them to all relevant entries.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) T(N+K), the knot sequence.
+!
+!    Input, integer N, the dimension of the space of splines of order K
+!    with knots t.
+!
+!    Input, integer K, the order.
+!
+!  work arrays
+!
+!    q....a work array of size (at least) k*n. its first  k  rows are used
+!       for the  k  lower diagonals of the gramian matrix  c.
+!
+!    diag.....a work array of length  n  used in bchfac .
+!
+!  input  via  c o m m o n  /data/
+!
+!  ntau.....number of data points
+!  (tau(i),gtau(i)), i=1,...,ntau     are the  ntau  data points to be
+!        fitted .
+!  weight(i), i=1,...,ntau    are the corresponding weights .
+!
+!  output
+!  bcoef(1), ..., bcoef(n)  the b-spline coefficients of the l2-appr.
+!
+  implicit none
+
+  integer k
+  integer n
+  integer, parameter :: ntmax = 200
+
+  real ( kind = 8 ) bcoef(n)
+  real ( kind = 8 ) biatx(k)
+  real ( kind = 8 ) diag(n)
+  real ( kind = 8 ) dw
+  real ( kind = 8 ) gtau
+  integer i
+  integer j
+  integer jj
+  integer left
+  integer leftmk
+  integer ll
+  integer mm
+  integer ntau
+  real ( kind = 8 ) q(k,n)
+  real ( kind = 8 ) t(n+k)
+  real ( kind = 8 ) tau
+  real ( kind = 8 ) totalw
+  real ( kind = 8 ) weight
+
+  COMMON /DATA/ tau(ntmax),gtau(ntmax),weight(ntmax),totalw,ntau
+
+  bcoef(1:n) = 0.0D+00
+  q(1:k,1:n) = 0.0D+00
+
+  left = k
+  leftmk = 0
+
+  do ll = 1, ntau
+!
+!  Locate LEFT such that tau(ll) in (t(left),t(left+1)).
+!
+    do
+
+      if ( left == n ) then
+        exit
+      end if
+
+      if ( tau(ll) < t(left+1) ) then
+        exit
+      end if
+
+      left = left + 1
+      leftmk = leftmk + 1
+
+    end do
+
+    call bsplvb ( t, k, 1, tau(ll), left, biatx )
+!
+!  biatx(mm) contains the value of b(left-k+mm) at tau(ll).
+!  hence, with  dw := biatx(mm)*weight(ll), the number dw*gtau(ll)
+!  is a summand in the inner product
+!     (b(left-k+mm), g)  which goes into  bcoef(left-k+mm)
+!  and the number biatx(jj)*dw is a summand in the inner product
+!     (b(left-k+jj), b(left-k+mm)), into  q(jj-mm+1,left-k+mm)
+!  since  (left-k+jj)-(left-k+mm)+1 = jj - mm + 1 .
+!
+    do mm = 1, k
+
+      dw = biatx(mm)*weight(ll)
+      j = leftmk+mm
+      bcoef(j) = dw*gtau(ll)+bcoef(j)
+      i = 1
+
+      do jj = mm, k
+        q(i,j) = biatx(jj)*dw+q(i,j)
+        i = i+1
+      end do
+
+    end do
+
+  end do
+!
+!  Construct the Cholesky factorization for C in  q , then
+!  use it to solve the normal equations
+!    c*x = bcoef
+!  for X, and store X in BCOEF.
+!
+  call bchfac ( q, k, n, diag )
+
+  call bchslv ( q, k, n, bcoef )
+
+  return
+end
diff --git a/pppack/l2err.f90 b/pppack/l2err.f90
new file mode 100644
index 0000000..7449380
--- /dev/null
+++ b/pppack/l2err.f90
@@ -0,0 +1,146 @@
+!>
+!> @file l2err.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine l2err ( iprfun, ftau, error )
+
+!*************************************************************************
+!
+!! L2ERR computes the errors of an L2 approximation.
+!
+!  Discussion:
+!
+!    This routine computes various errors of the current L2-approximation,
+!    whose piecewise polynomial representation is contained in common
+!    block APPROX, to the given data contained in common block  data.
+!
+!    It prints out the average error errl1, the l2-error errl2, and the
+!    maximum error errmax.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, integer IPRFUN.  If iprfun= 1, the routine prints out
+!    the value of the approximation as well as its error at
+!    every data point.
+!
+!    Output, real ( kind = 8 ) FTAU(NTAU), contains the value of the computed
+!    approximation at each value TAU(1:NTAU).
+!
+!    Output, error(1), ..., error(ntau),  with  error(i)=scale*(g-f)
+!    at tau(i), all i. here, SCALE equals 1. in case
+!    iprfun /= 1 , or the absolute error is greater than 100 some-
+!    where. otherwise, SCALE is such that the maximum of
+!    abs(error))  over all I lies between 10 and 100.  This
+!    makes the printed output more illustrative.
+!
+  implicit none
+
+  integer, parameter :: lpkmax = 100
+  integer, parameter :: ntmax = 200
+  integer, parameter :: ltkmax = 2000
+
+  integer ntau
+
+  real ( kind = 8 ) break
+  real ( kind = 8 ) coef
+  real ( kind = 8 ) err
+  real ( kind = 8 ) errl1
+  real ( kind = 8 ) errl2
+  real ( kind = 8 ) errmax
+  real ( kind = 8 ) error(ntau)
+  real ( kind = 8 ) ftau(ntau)
+  real ( kind = 8 ) gtau
+  integer ie
+  integer iprfun
+  integer k
+  integer l
+  integer ll
+  real ( kind = 8 ) scale
+  real ( kind = 8 ) tau
+  real ( kind = 8 ) totalw
+  real ( kind = 8 ) weight
+
+  COMMON /DATA/ tau(ntmax),gtau(ntmax),weight(ntmax),totalw,ntau
+  common /approx/ break(lpkmax),coef(ltkmax),l,k
+
+  errl1 = 0.0D+00
+  errl2 = 0.0D+00
+  errmax = 0.0D+00
+
+  do ll = 1, ntau
+    call ppvalu(break,coef,l,k,tau(ll),0,ftau(ll))
+    error(ll) = gtau(ll)-ftau(ll)
+    err = abs(error(ll))
+    if ( errmax < err ) then
+      errmax = err
+    end if
+    errl1 = errl1 + err * weight(ll)
+    errl2 = errl2 + err**2 * weight(ll)
+  end do
+
+  errl1 = errl1 / totalw
+  errl2 = sqrt ( errl2 / totalw )
+
+  write ( *, * ) ' '
+  write ( *, * ) ' Least square error =',errl2
+  write ( *, * ) ' Average error     =',errl1
+  write ( *, * ) ' Maximum error     =',errmax
+  write ( *, * ) ' '
+
+  if ( iprfun /= 1 ) then
+    return
+  end if
+!
+!  Scale error curve and print
+!
+  ie = 0
+  scale = 1.0D+00
+
+  if ( errmax < 10.0D+00 ) then
+
+    do ie = 1, 9
+      scale = scale * 10.0D+00
+      if ( 10.0D+00 <= errmax * scale ) then
+        exit
+      end if
+    end do
+
+  end if
+
+  error(1:ntau) = error(1:ntau) * scale
+
+  write(*,60) ie, (ll,tau(ll),ftau(ll),error(ll),ll=1,ntau)
+
+   60 format (///14x,'approximation and scaled error curve'/ &
+       7x,'data point',7x,'approximation',3x,'deviation x 10**',i1/ &
+       (i4, f16.8,f16.8,f17.6))
+
+  return
+end
diff --git a/pppack/l2knts.f90 b/pppack/l2knts.f90
new file mode 100644
index 0000000..6c064a6
--- /dev/null
+++ b/pppack/l2knts.f90
@@ -0,0 +1,83 @@
+!>
+!> @file l2knts.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine l2knts ( break, l, k, t, n )
+
+!*************************************************************************
+!
+!! L2KNTS converts breakpoints to knots.
+!
+!  Discussion:
+!
+!    The breakpoint sequence BREAK is converted into a corresponding
+!    knot sequence T to allow the representation of a piecewise
+!    polynomial function of order K with K-2 continuous derivatives
+!    as a spline of order K with knot sequence T.
+!
+!    This means that
+!    t(1), ..., t(n+k)= break(1) k times, then break(i), i=2,...,l, each
+!    once, then break(l+1) k times.  Therefore,  n=k-1+l.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, integer K, the order.
+!
+!    Input, integer L, the number of polynomial pieces.
+!
+!    Input, real ( kind = 8 ) BREAK(L+1), the breakpoint sequence.
+!
+!    Output, real ( kind = 8 ) T(N+K), the knot sequence.
+!
+!    Output, integer N, the dimension of the corresponding spline space
+!    of order K.
+!
+  implicit none
+
+  integer k
+  integer l
+  integer n
+
+  real ( kind = 8 ) break(l+1)
+  integer i
+  real ( kind = 8 ) t(k-1+l+k)
+
+  t(1:k-1) = break(1)
+
+  do i = 1, l
+    t(k-1+i) = break(i)
+  end do
+
+  n = k-1+l
+
+  t(n+1:n+k) = break(l+1)
+
+  return
+end
diff --git a/pppack/newnot.f90 b/pppack/newnot.f90
new file mode 100644
index 0000000..f3c556a
--- /dev/null
+++ b/pppack/newnot.f90
@@ -0,0 +1,206 @@
+!>
+!> @file newnot.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine newnot ( break, coef, l, k, brknew, lnew, coefg )
+
+!*************************************************************************
+!
+!! NEWNOT returns LNEW+1 knots which are equidistributed on (A,B).
+!
+!  Discussion:
+!
+!    (a,b) = (break(1),break(l+1)) with respect to a certain monotone
+!    function G related to the K-th root of the K-th derivative of the
+!    piecewise polynomial function F whose piecewise polynomial
+!    representation is contained in  break, coef, l, k .
+!
+!  method
+!
+!    The K-th derivative of the given piecewise polynomial function F does
+!    not exist, except perhaps as a linear combination of delta functions.
+!    Nevertheless, we construct a piecewise constant function H with
+!    breakpoint sequence BREAK which is approximately proportional
+!    to abs(d**k(f)).
+!
+!  Specifically, on  (break(i), break(i+1)),
+!
+!     abs(jump at break(i) of pc)    abs(jump at break(i+1) of pc)
+!  h=-------------- + ----------------------------
+!       break(i+1)-break(i-1)         break(i+2) - break(i)
+!
+!  with  pc  the p.constant (k-1)st derivative of  f .
+!      then, the p.linear function  g  is constructed as
+!
+!    g(x) = integral of  h(y)**(1/k)  for  y  from  a  to  x
+!
+!  and its pp coefficients stored in  coefg .
+!
+!  then  brknew  is determined by
+!
+!        brknew(i) = a+g**(-1)((i-1)*step) , i=1,...,lnew+1
+!
+!  where  step=g(b)/lnew  and  (a,b) = (break(1),break(l+1)).
+!
+!  In the event that  pc=d**(k-1)(f) is constant in  (a,b)  and
+!  therefore  h=0 identically,  brknew  is chosen uniformly spaced.
+!
+!  optional  printed  output
+!  coefg.....the pp coefficients of  g  are printed out if  iprint  is set
+!        positive  in data statement below.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, break, coef, l, k.....contains the pp-representation of a certain
+!        function  f  of order  k . specifically,
+!        d**(k-1)f(x)=coef(k,i)  for  break(i) <= x < break(i+1)
+!
+!    Input, lnew.....number of intervals into which the interval (a,b) is to be
+!        sectioned by the new breakpoint sequence  brknew .
+!
+!    Output, real ( kind = 8 ) BRKNEW(LNEW+1), the new breakpoint sequence.
+!
+!    Output, coefg.....the coefficient part of the pp-repr.  break, coefg, l, 2
+!        for the monotone p.linear function g with respect to which brknew  will
+!        be equidistributed.
+!
+  implicit none
+
+  integer k
+  integer l
+  integer lnew
+
+  real ( kind = 8 ) break(l+1)
+  real ( kind = 8 ) brknew(lnew+1)
+  real ( kind = 8 ) coef(k,l)
+  real ( kind = 8 ) coefg(2,l)
+  real ( kind = 8 ) dif
+  real ( kind = 8 ) difprv
+  integer i
+  integer, save :: iprint = 0
+  integer j
+  real ( kind = 8 ) oneovk
+  real ( kind = 8 ) step
+  real ( kind = 8 ) stepi
+
+  brknew(1) = break(1)
+  brknew(lnew+1) = break(l+1)
+!
+!  If G is constant, BRKNEW is uniform.
+!
+  if ( l <= 1) then
+
+    step = (break(l+1)-break(1))/ real ( lnew, kind = 8 )
+
+    do i = 2, lnew
+      brknew(i) = break(1) + real ( i - 1, kind = 8 ) * step
+    end do
+
+    return
+
+  end if
+!
+!  Construct the continuous piecewise linear function G.
+!
+  oneovk = 1.0D+00 / real ( k, kind = 8 )
+  coefg(1,1) = 0.0D+00
+  difprv = abs(coef(k,2)-coef(k,1))/(break(3)-break(1))
+
+  do i = 2, l
+    dif = abs(coef(k,i)-coef(k,i-1))/(break(i+1)-break(i-1))
+    coefg(2,i-1) = (dif+difprv)**oneovk
+    coefg(1,i) = coefg(1,i-1)+coefg(2,i-1)*(break(i)-break(i-1))
+    difprv = dif
+  end do
+
+  coefg(2,l) = ( 2.0D+00 * difprv )**oneovk
+!
+!  step = g(b)/lnew
+!
+  step=(coefg(1,l)+coefg(2,l)*(break(l+1)-break(l))) / real ( lnew, kind = 8 )
+
+  if ( 0 < iprint ) then
+    write(*,20)step,(i,coefg(1,i),coefg(2,i),i=1,l)
+  end if
+
+   20 format (' step =',e16.7/(i5,2e16.5))
+!
+!  if G is constant, BRKNEW is uniform.
+!
+  if ( step <= 0.0D+00 ) then
+
+    step = (break(l+1)-break(1)) / real ( lnew, kind = 8 )
+
+    do i = 2, lnew
+      brknew(i) = break(1) + real ( i - 1, kind = 8 ) * step
+    end do
+
+    return
+
+  end if
+!
+!  for i=2,...,lnew, construct  brknew(i)=a+g**(-1)(stepi),
+!  with  stepi=(i-1)*step .  this requires inversion of the p.lin-
+!  ear function  g .  for this,  j  is found so that
+!    g(break(j)) <= stepi .le. g(break(j+1))
+!  and then
+!    brknew(i) = break(j)+(stepi-g(break(j)))/dg(break(j)) .
+!  the midpoint is chosen if  dg(break(j))=0 .
+!
+  j = 1
+
+  do i = 2, lnew
+
+    stepi = real ( i - 1, kind = 8 ) * step
+
+    do
+
+      if ( j == l ) then
+        exit
+      end if
+
+      if ( stepi <= coefg(1,j+1) ) then
+        exit
+      end if
+
+      j = j + 1
+
+    end do
+
+    if ( coefg(2,j) /= 0.0D+00 ) then
+      brknew(i) = break(j)+(stepi-coefg(1,j))/coefg(2,j)
+    else
+      brknew(i) = ( break(j) + break(j+1) ) / 2.0D+00
+    end if
+
+  end do
+
+  return
+end
diff --git a/pppack/ppvalu.f90 b/pppack/ppvalu.f90
new file mode 100644
index 0000000..b392355
--- /dev/null
+++ b/pppack/ppvalu.f90
@@ -0,0 +1,134 @@
+!>
+!> @file ppvalu.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine ppvalu ( break, coef, l, k, x, jderiv, value )
+
+!*******************************************************************************
+!
+!! PPVALU evaluates a piecewise polynomial function or its derivative.
+!
+!  Discussion:
+!
+!    PPVALU calculates the value at X of the JDERIV-th derivative of
+!    the piecewise polynomial function F from its piecewise
+!    polynomial representation.
+!
+!    The interval index I, appropriate for X, is found through a
+!    call to INTERV.  The formula for the JDERIV-th derivative
+!    of F is then evaluated by nested multiplication.
+!
+!    The J-th derivative of F is given by:
+!
+!      (d**j)f(x) =
+!        coef(j+1,i) + h * (
+!        coef(j+2,i) + h * (
+!        ...
+!        coef(k-1,i) + h * (
+!        coef(k,i) / (k-j-1) ) / (k-j-2) ... ) / 2 ) / 1
+!
+!    with
+!
+!      H=X-BREAK(I)
+!
+!    and
+!
+!      i = max( 1 , max( j ,  break(j) <= x , 1 <= j <= l ) ).
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) BREAK(L+1), real COEF(*), integer L, for
+!    piecewise polynomial representation of the function F to
+!    be evaluated.
+!
+!    Input, integer K, the order of the polynomial pieces
+!    that make up the function F.  The usual value for
+!    K is 4, signifying a piecewise cubic polynomial.
+!
+!    Input, real ( kind = 8 ) X, the point at which to evaluate F or
+!    of its derivatives.
+!
+!    Input, integer JDERIV, the order of the derivative to be
+!    evaluated.  If JDERIV is 0, then F itself is evaluated,
+!    which is actually the most common case.  It is assumed
+!    that JDERIV is zero or positive.
+!
+!    Output, real ( kind = 8 ) VALUE, the value of the JDERIV-th
+!    derivative of F at X.
+!
+  implicit none
+
+  integer k
+  integer l
+
+  real ( kind = 8 ) break(l+1)
+  real ( kind = 8 ) coef(k,l)
+  real ( kind = 8 ) fmmjdr
+  real ( kind = 8 ) h
+  integer i
+  integer jderiv
+  integer m
+  integer ndummy
+  real ( kind = 8 ) value
+  real ( kind = 8 ) x
+
+  value = 0.0D+00
+
+  fmmjdr = k - jderiv
+!
+!  Derivatives of order K or higher are identically zero.
+!
+  if ( k <= jderiv ) then
+    return
+  end if
+!
+!  Find the index I of the largest breakpoint to the left of X.
+!
+  call interv ( break, l+1, x, i, ndummy )
+!
+!  Evaluate the JDERIV-th derivative of the I-th polynomial piece at X.
+!
+  h = x - break(i)
+  m = k
+
+  do
+
+    value = ( value / fmmjdr ) * h + coef(m,i)
+    m = m - 1
+    fmmjdr = fmmjdr - 1.0D+00
+
+    if ( fmmjdr <= 0.0D+00 ) then
+      exit
+    end if
+
+  end do
+
+  return
+end
diff --git a/pppack/putit.f90 b/pppack/putit.f90
new file mode 100644
index 0000000..28886c2
--- /dev/null
+++ b/pppack/putit.f90
@@ -0,0 +1,178 @@
+!>
+!> @file putit.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine putit ( t, kpm, left, scrtch, dbiatx, q, nrow, b )
+
+!*************************************************************************
+!
+!! PUTIT puts together one block of the collocation equation system.
+!
+!  Method:
+!
+!    The K collocation equations for the interval  (t(left),t(left+1))
+!    are constructed with the aid of the subroutine DIFEQU( 2, .,
+!    . ) and interspersed (in order) with the side conditions (if any) in
+!    this interval, using DIFEQU ( 3, ., . )  for the information.
+!
+!    The block Q has  kpm  columns, corresponding to the  kpm  b-
+!    splines of order  kpm  which have the interval (t(left),t(left+1))
+!    in their support. the block's diagonal is part of the diagonal of the
+!    total system.  The first equation in this block not overlapped by the
+!    preceding block is therefore equation LOWROW, with lowrow =
+!    number of side conditions in preceding intervals (or blocks).
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) T(LEFT+KPM), the knot sequence.
+!
+!    Input, integer KPM, the order of the spline.
+!
+!    Input, integer LEFT, indicates the interval of interest, viz the interval
+!    (t(left), t(left+1)).
+!
+!    Input, integer NROW, number of rows in block to be put together
+!
+!    Workspace, scrtch used in bsplvd, of size (kpm,kpm)
+!
+!    Workspace, real ( kind = 8 ) DBIATX(KPM,M+1), derivatives of b-splines,
+!    with dbiatx(j,i+1) containing the i-th derivative of the
+!    j-th b-spline of interest
+!
+!    Output, Q  the block, of size (nrow,kpm).
+!
+!    Output, B  the corresponding piece of the right side, of size (nrow)
+!
+  implicit none
+
+  integer kpm
+  integer nrow
+
+  real ( kind = 8 ) b(*)
+  real ( kind = 8 ) dbiatx(kpm,*)
+  real ( kind = 8 ) dx
+  integer i
+  integer irow
+  integer iside
+  integer itermx
+  integer j
+  integer k
+  integer left
+  integer ll
+  integer lowrow
+  integer m
+  integer mode
+  integer mp1
+  real ( kind = 8 ) q(nrow,kpm)
+  real ( kind = 8 ) rho
+  real ( kind = 8 ) scrtch(*)
+  real ( kind = 8 ) sum1
+  real ( kind = 8 ) t(*)
+  real ( kind = 8 ) v(20)
+  real ( kind = 8 ) xm
+  real ( kind = 8 ) xside
+  real ( kind = 8 ) xx
+
+  common /side/ m,iside,xside(10)
+  common /other/ itermx,k,rho(19)
+
+  mp1 = m + 1
+
+  q(1:nrow,1:kpm) = 0.0D+00
+
+  xm = ( t(left+1) + t(left) ) / 2.0D+00
+  dx = ( t(left+1) - t(left) ) / 2.0D+00
+
+  ll = 1
+  lowrow = iside
+
+  do irow = lowrow, nrow
+
+    if ( k < ll ) then
+      go to 20
+    end if
+
+    mode = 2
+!
+!  next collocation point is ...
+!
+    xx = xm+dx*rho(ll)
+    ll = ll+1
+!
+!  The corresponding collocation equation is next unless the next side
+!  condition occurs at a point at, or to the left of, the next
+!  collocation point.
+!
+    if ( m < iside ) then
+      go to 30
+    end if
+
+    if ( xx < xside(iside) ) then
+      go to 30
+    end if
+
+    ll = ll-1
+
+   20   continue
+
+    mode = 3
+    xx = xside(iside)
+
+   30   continue
+
+    call difequ(mode,xx,v)
+!
+!  The next equation, a collocation equation (mode=2) or a side
+!  condition (mode=3), reads
+!    (*)   (v(m+1)*d**m+v(m)*d**(m-1) +...+ v(1)*d**0)f(xx)=v(m+2)
+!  in terms of the info supplied by  difequ . the corresponding
+!  equation for the b-coefficients of  f  therefore has the left side of
+!  (*), evaluated at each of the  kpm  b-splines having  xx  in
+!  their support, as its  kpm  possibly nonzero coefficients.
+!
+    call bsplvd(t,kpm,xx,left,scrtch,dbiatx,mp1)
+
+    do j = 1, kpm
+
+      sum1 = 0.0D+00
+      do i = 1, mp1
+        sum1 = sum1 + v(i) * dbiatx(j,i)
+      end do
+
+      q(irow,j) = sum1
+
+    end do
+
+    b(irow) = v(m+2)
+
+  end do
+
+  return
+end
diff --git a/pppack/rvec_print.f90 b/pppack/rvec_print.f90
new file mode 100644
index 0000000..e482e0a
--- /dev/null
+++ b/pppack/rvec_print.f90
@@ -0,0 +1,83 @@
+!>
+!> @file rvec_print.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine rvec_print ( n, a, title )
+
+!*******************************************************************************
+!
+!! RVEC_PRINT prints a real vector.
+!
+!  Discussion:
+!
+!    If all the entries are integers, the data is printed
+!    in integer format.
+!
+!  Modified:
+!
+!    19 November 2002
+!
+!  Author:
+!
+!    John Burkardt
+!
+!  Parameters:
+!
+!    Input, integer N, the number of components of the vector.
+!
+!    Input, real ( kind = 8 ) A(N), the vector to be printed.
+!
+!    Input, character ( len = * ) TITLE, a title to be printed.
+!
+  implicit none
+
+  integer n
+
+  real ( kind = 8 ) a(n)
+  integer i
+  character ( len = * ) title
+
+  if ( 0 < len_trim ( title ) ) then
+    write ( *, '(a)' ) ' '
+    write ( *, '(a)' ) trim ( title )
+  end if
+
+  write ( *, '(a)' ) ' '
+
+  if ( all ( a(1:n) == aint ( a(1:n) ) ) ) then
+    do i = 1, n
+      write ( *, '(i6,i6)' ) i, int ( a(i) )
+    end do
+  else if ( all ( abs ( a(1:n) ) < 1000000.0D+00 ) ) then
+    do i = 1, n
+      write ( *, '(i6,f14.6)' ) i, a(i)
+    end do
+  else
+    do i = 1, n
+      write ( *, '(i6,g14.6)' ) i, a(i)
+    end do
+  end if
+
+  return
+end
diff --git a/pppack/sbblok.f90 b/pppack/sbblok.f90
new file mode 100644
index 0000000..e490a42
--- /dev/null
+++ b/pppack/sbblok.f90
@@ -0,0 +1,106 @@
+!>
+!> @file sbblok.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine sbblok ( bloks, integs, nbloks, ipivot, b, x )
+
+!*************************************************************************
+!
+!! SBBLOK solves a linear system that was factored by FCBLOK.
+!
+!  Discussion:
+!
+!    The routine supervises the solution, by forward and backward
+!    substitution, of the linear system
+!
+!      A * x = b
+!
+!    for X, with the PLU factorization of A already generated in FCBLOK.
+!    Individual blocks of equations are solved via SUBFOR and SUBBAK.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    bloks, integs, nbloks, ipivot    are as on return from fcblok.
+!
+!    b       the right side, stored corresponding to the storage of
+!            the equations. see comments in SLVBLK for details.
+!
+!    Output, real ( kind = 8 ) X(*), the solution vector.
+!
+  implicit none
+
+  integer nbloks
+
+  real ( kind = 8 ) b(*)
+  real ( kind = 8 ) bloks(*)
+  integer i
+  integer index
+  integer indexb
+  integer indexx
+  integer integs(3,nbloks)
+  integer ipivot(*)
+  integer j
+  integer last
+  integer nbp1
+  integer ncol
+  integer nrow
+  real ( kind = 8 ) x(*)
+!
+!  Forward substitution pass:
+!
+  index = 1
+  indexb = 1
+  indexx = 1
+  do i = 1, nbloks
+    nrow = integs(1,i)
+    last = integs(3,i)
+    call subfor(bloks(index),ipivot(indexb),nrow,last,b(indexb),x(indexx))
+    index = nrow*integs(2,i)+index
+    indexb = indexb+nrow
+    indexx = indexx+last
+  end do
+!
+!  Back substitution pass.
+!
+  nbp1 = nbloks + 1
+
+  do j = 1, nbloks
+    i = nbp1 - j
+    nrow = integs(1,i)
+    ncol = integs(2,i)
+    last = integs(3,i)
+    index = index - nrow * ncol
+    indexb = indexb - nrow
+    indexx = indexx - last
+    call subbak ( bloks(index), ipivot(indexb), nrow, ncol, last, x(indexx) )
+  end do
+
+  return
+end
diff --git a/pppack/setupq.f90 b/pppack/setupq.f90
new file mode 100644
index 0000000..56aa439
--- /dev/null
+++ b/pppack/setupq.f90
@@ -0,0 +1,101 @@
+!>
+!> @file setupq.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine setupq ( x, dx, y, npoint, v, qty )
+
+!*************************************************************************
+!
+!! SETUPQ is to be called in SMOOTH.
+!
+!  Discussion:
+!
+!    put  delx=x(.+1)-x(.)  into  v(.,4),
+!    put  the three bands of  q-transp*d  into  v(.,1-3), and
+!    put the three bands of  (d*q)-transp*(d*q)  at and above the diagonal
+!    into  v(.,5-7) .
+!
+!    here,  q is  the tridiagonal matrix of order (npoint-2,npoint)
+!    with general row  1/delx(i) , -1/delx(i)-1/delx(i+1) , 1/delx(i+1)
+!    and   d  is the diagonal matrix  with general row  dx(i) .
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+  implicit none
+
+  integer npoint
+
+  real ( kind = 8 ) diff
+  real ( kind = 8 ) dx(npoint)
+  integer i
+  real ( kind = 8 ) prev
+  real ( kind = 8 ) qty(npoint)
+  real ( kind = 8 ) v(npoint,7)
+  real ( kind = 8 ) x(npoint)
+  real ( kind = 8 ) y(npoint)
+
+  v(1,4) = x(2)-x(1)
+
+  do i = 2, npoint-1
+    v(i,4) = x(i+1) - x(i)
+    v(i,1) = dx(i-1) / v(i-1,4)
+    v(i,2) = -dx(i) / v(i,4) - dx(i) / v(i-1,4)
+    v(i,3) = dx(i+1) / v(i,4)
+  end do
+
+  v(npoint,1) = 0.0D+00
+  do i = 2, npoint-1
+    v(i,5) = v(i,1)**2 + v(i,2)**2 + v(i,3)**2
+  end do
+
+  do i = 3, npoint-1
+    v(i-1,6) = v(i-1,2)*v(i,1)+v(i-1,3)*v(i,2)
+  end do
+
+  v(npoint-1,6) = 0.0D+00
+
+  do i = 4, npoint-1
+    v(i-2,7) = v(i-2,3) * v(i,1)
+  end do
+
+  v(npoint-2,7) = 0.0D+00
+  v(npoint-1,7) = 0.0D+00
+!
+!  Construct  q-transp. * y  in QTY.
+!
+  prev = (y(2)-y(1)) / v(1,4)
+  do i = 2, npoint-1
+    diff = (y(i+1)-y(i)) / v(i,4)
+    qty(i) = diff-prev
+    prev = diff
+  end do
+
+  return
+end
diff --git a/pppack/shiftb.f90 b/pppack/shiftb.f90
new file mode 100644
index 0000000..0ddcd79
--- /dev/null
+++ b/pppack/shiftb.f90
@@ -0,0 +1,107 @@
+!>
+!> @file shiftb.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine shiftb ( ai, ipivot, nrowi, ncoli, last, ai1, nrowi1, ncoli1 )
+
+!*************************************************************************
+!
+!! SHIFTB shifts the rows in current block, ai, not used as pivot
+!  rows, if any, i.e., rows ipivot(last+1),...,ipivot(nrowi),
+!  onto the first mmax=nrow-last rows of the next block, ai1,
+!  with column last+j of ai  going to column j ,
+!  for j=1,...,jmax=ncoli-last. the remaining columns of these
+!  rows of ai1 are zeroed out.
+!
+!                             picture
+!
+!       original situation after         results in a new block i+1
+!       last=2 columns have been       created and ready to be
+!       done in factrb (assuming no      factored by next factrb call.
+!       interchanges of rows)
+!                   1
+!              x  x 1x  x  x           x  x  x  x  x
+!                   1
+!              0  x 1x  x  x           0  x  x  x  x
+!  block i          1                       ---
+!  nrowi=4     0  0 1x  x  x           0  0 1x  x  x  0  01
+!  ncoli=5          1                       1             1
+!  last=2      0  0 1x  x  x           0  0 1x  x  x  0  01
+!              -------------------          1             1   new
+!                   1x  x  x  x  x          1x  x  x  x  x1  block
+!                   1                       1             1   i+1
+!  block i+1        1x  x  x  x  x          1x  x  x  x  x1
+!  nrowi1= 5        1                       1             1
+!  ncoli1= 5        1x  x  x  x  x          1x  x  x  x  x1
+!              -------------------          1-------------1
+!                   1
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+  implicit none
+
+  integer ncoli
+  integer ncoli1
+  integer nrowi1
+  integer nrowi
+
+  real ( kind = 8 ) ai(nrowi,ncoli)
+  real ( kind = 8 ) ai1(nrowi1,ncoli1)
+  integer ip
+  integer ipivot(nrowi)
+  integer j
+  integer last
+  integer m
+
+  if ( nrowi-last < 1 ) then
+    return
+  end if
+
+  if ( ncoli-last < 1 ) then
+    return
+  end if
+!
+!  Put the remainder of block I into AI1.
+!
+  do m = 1, nrowi-last
+    ip = ipivot(last+m)
+    do j = 1, ncoli-last
+      ai1(m,j) = ai(ip,last+j)
+    end do
+  end do
+!
+!  Zero out the upper right corner of ai1.
+!
+  do j = ncoli+1-last, ncoli1
+    do m = 1, nrowi-last
+      ai1(m,j) = 0.0D+00
+    end do
+  end do
+
+  return
+end
diff --git a/pppack/slvblk.f90 b/pppack/slvblk.f90
new file mode 100644
index 0000000..0ce546c
--- /dev/null
+++ b/pppack/slvblk.f90
@@ -0,0 +1,180 @@
+!>
+!> @file slvblk.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine slvblk ( bloks, integs, nbloks, b, ipivot, x, iflag )
+
+!*************************************************************************
+!
+!! SLVBLK solves the almost block diagonal linear system A*x=b.
+!
+!  Discussion:
+!
+!    Such almost block diagonal matrices arise naturally in piecewise
+!    polynomial interpolation or approximation and in finite element
+!    methods for two-point boundary value problems.  The PLU factorization
+!    method is implemented here to take advantage of the special structure
+!    of such systems for savings in computing time and storage requirements.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!  bloks  a one-dimenional array, of length
+!                   sum( integs(1,i)*integs(2,i) ; i=1,nbloks )
+!         on input, contains the blocks of the almost block diagonal
+!         matrix  a  .  the array integs (see below and the example)
+!         describes the block structure.
+!         on output, contains correspondingly the plu factorization
+!         of  a  (if iflag /= 0).  certain of the entries into bloks
+!         are arbitrary (where the blocks overlap).
+!
+!  integs integer array description of the block structure of  a .
+!         integs(1,i)=no. of rows of block i       = nrow
+!         integs(2,i)=no. of colums of block i     = ncol
+!         integs(3,i)=no. of elim. steps in block i = last
+!                          i =1,2,...,nbloks
+!         the linear system is of order
+!         n = sum ( integs(3,i) , i=1,...,nbloks ),
+!         but the total number of rows in the blocks is
+!         nbrows=sum( integs(1,i) ; i = 1,...,nbloks)
+!
+!  nbloks number of blocks
+!  b       right side of the linear system, array of length nbrows.
+!          certain of the entries are arbitrary, corresponding to
+!          rows of the blocks which overlap (see block structure and
+!          the example below).
+!  ipivot  on output, integer array containing the pivoting sequence
+!          used. length is nbrows
+!  x       on output, contains the computed solution (if iflag /= 0)
+!          length is n.
+!  iflag   on output, integer
+!          =(-1)**(no. of interchanges during factorization)
+!                   if  a  is invertible
+!          =0    if  a  is singular
+!
+!                   auxiliary programs
+!  fcblok (bloks,integs,nbloks,ipivot,scrtch,iflag)  factors the matrix
+!           a , and is used for this purpose in slvblk. its arguments
+!          are as in slvblk, except for
+!              scrtch=a work array of length max(integs(1,i)).
+!
+!  sbblok (bloks,integs,nbloks,ipivot,b,x)  solves the system a*x=b
+!          once  a  is factored. this is done automatically by slvblk
+!          for one right side b, but subsequent solutions may be
+!          obtained for additional b-vectors. the arguments are all
+!          as in slvblk.
+!
+!  dtblok (bloks,integs,nbloks,ipivot,iflag,detsgn,detlog) computes the
+!          determinant of  a  once slvblk or fcblok has done the fact-
+!          orization.the first five arguments are as in slvblk.
+!              detsgn =sign of the determinant
+!              detlog =natural log of the determinant
+!
+!              block structure of  a
+!  the nbloks blocks are stored consecutively in the array  bloks .
+!  the first block has its (1,1)-entry at bloks(1), and, if the i-th
+!  block has its (1,1)-entry at bloks(index(i)), then
+!         index(i+1)=index(i) + nrow(i)*ncol(i) .
+!    the blocks are pieced together to give the interesting part of  a
+!  as follows.  for i=1,2,...,nbloks-1, the (1,1)-entry of the next
+!  block (the (i+1)st block ) corresponds to the (last+1,last+1)-entry
+!  of the current i-th block.  recall last=integs(3,i) and note that
+!  this means that
+!      a. every block starts on the diagonal of  a .
+!      b. the blocks overlap (usually). the rows of the (i+1)st block
+!         which are overlapped by the i-th block may be arbitrarily de-
+!         fined initially. they are overwritten during elimination.
+!    the right side for the equations in the i-th block are stored cor-
+!  respondingly as the last entries of a piece of  b  of length  nrow
+!  (= integs(1,i)) and following immediately in  b  the corresponding
+!  piece for the right side of the preceding block, with the right side
+!  for the first block starting at  b(1) . in this, the right side for
+!  an equation need only be specified once on input, in the first block
+!  in which the equation appears.
+!
+!              example and test driver
+!    the test driver for this package contains an example, a linear
+!  system of order 11, whose nonzero entries are indicated in the fol-
+!  lowing schema by their row and column index modulo 10. next to it
+!  are the contents of the  integs  arrray when the matrix is taken to
+!  be almost block diagonal with  nbloks=5, and below it are the five
+!  blocks.
+!
+!                      nrow1=3, ncol1 = 4
+!           11 12 13 14
+!           21 22 23 24   nrow2=3, ncol2 = 3
+!           31 32 33 34
+!  last1=2      43 44 45
+!                 53 54 55            nrow3=3, ncol3 = 4
+!        last2=3         66 67 68 69   nrow4 = 3, ncol4 = 4
+!                          76 77 78 79      nrow5=4, ncol5 = 4
+!                          86 87 88 89
+!                 last3=1   97 98 99 90
+!                    last4=1   08 09 00 01
+!                                18 19 10 11
+!                       last5=4
+!
+!         actual input to bloks shown by rows of blocks of  a .
+!      (the ** items are arbitrary, this storage is used by slvblk)
+!
+!  11 12 13 14  / ** ** **  / 66 67 68 69  / ** ** ** **  / ** ** ** **
+!  21 22 23 24 /  43 44 45 /  76 77 78 79 /  ** ** ** ** /  ** ** ** **
+!  31 32 33 34/   53 54 55/   86 87 88 89/   97 98 99 90/   08 09 00 01
+!                                                           18 19 10 11
+!
+!  index=1      index = 13  index = 22     index = 34     index = 46
+!
+!         actual right side values with ** for arbitrary values
+!  b1 b2 b3 ** b4 b5 b6 b7 b8 ** ** b9 ** ** b10 b11
+!
+!  (it would have been more efficient to combine block 3 with block 4)
+!
+  implicit none
+
+  integer nbloks
+
+  real ( kind = 8 ) b(*)
+  real ( kind = 8 ) bloks(*)
+  integer iflag
+  integer integs(3,nbloks)
+  integer ipivot(*)
+  real ( kind = 8 ) x(*)
+!
+!  In the call to FCBLOK, X is used for temporary storage.
+!
+  call fcblok ( bloks, integs, nbloks, ipivot, x, iflag )
+
+  if ( iflag == 0 ) then
+    return
+  end if
+
+  call sbblok ( bloks, integs, nbloks, ipivot, b, x )
+
+  return
+end
diff --git a/pppack/smooth.f90 b/pppack/smooth.f90
new file mode 100644
index 0000000..cab3d0b
--- /dev/null
+++ b/pppack/smooth.f90
@@ -0,0 +1,218 @@
+!>
+!> @file smooth.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine smooth ( x, y, dy, npoint, s, v, a, sfp )
+
+!*************************************************************************
+!
+!! SMOOTH constructs the cubic smoothing spline to given data.
+!
+!  Discussion:
+!
+!    The data is of the form
+!
+!      (x(i),y(i)), i=1,...,npoint,
+!
+!    The cubic smoothing spline has as small a second derivative as
+!    possible while
+!
+!      s(f)=sum( ((y(i)-f(x(i)))/dy(i))**2 , i=1,...,npoint ) <= s .
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!  input
+!
+!    Input, real ( kind = 8 ) X(NPOINT), the abscissas, assumed to be strictly
+!    increasing .
+!
+!    Input, real ( kind = 8 ) Y(NPOINT), the corresponding ordinates.
+!
+!  dy(1),...,dy(npoint)     estimate of uncertainty in data, assumed
+!  to be positive .
+!
+!  npoint.....number of data points,  assumed greater than 1
+!
+!  s.....upper bound on the discrete weighted mean square distance of
+!        the approximation  f  from the data .
+!
+!  work arrays:
+!
+!  v      of size (npoint,7)
+!  a      of size (npoint,4)
+!
+!  output
+!
+!  a(.,1).....contains the sequence of smoothed ordinates .
+!  a(i,j)=d**(j-1)f(x(i)), j=2,3,4, i=1,...,npoint-1 ,  i.e., the
+!        first three derivatives of the smoothing spline  f  at the
+!        left end of each of the data intervals .
+!     Warning . . .   a  would have to be transposed before it
+!        could be used in  ppvalu .
+!
+!  Method:
+!
+!    the matrices  q-transp*d  and  q-transp*d**2*q  are constructed in
+!    SETUPQ from  x  and  dy , as is the vector  qty=q-transp*y .
+!    then, for given  p , the vector U is determined in CHOL1D as
+!    the solution of the linear system
+!               (6(1-p)q-transp*d**2*q+p*r)u =qty  .
+!  from  u , the smoothing spline  f  (for this choice of smoothing par-
+!  ameter  p ) is obtained in the sense that
+!                        f(x(.)) = y-6(1-p)d**2*q*u        and
+!                  (d**2)f(x(.)) = 6*p*u                      .
+!
+!  the smoothing parameter  p  is found (if possible) so that
+!                sf(p) = s ,
+!  with  sf(p)=s(f) , where  f  is the smoothing spline as it depends
+!  on  p .  if  s=0, then p = 1 . if  sf(0) <= s , then p = 0 .
+!  otherwise, the secant method is used to locate an appropriate  p  in
+!  the open interval  (0,1) . specifically,
+!                p(0)=0,  p(1) = (s-sf(0))/dsf
+!  with  dsf=-24*u-transp*r*u  a good approximation to  d(sf(0)) = dsf
+!  +60*(d*q*u)-transp*(d*q*u) , and  u  as obtained for  p=0 .
+!  after that, for n=1,2,...  until sf(p(n)) <= 1.01*s, do....
+!  determine  p(n+1)  as the point at which the secant to  sf  at the
+!  points  p(n)  and  p(n-1)  takes on the value  s .
+!  if  p(n+1) >= 1 , choose instead  p(n+1)  as the point at which
+!  the parabola  sf(p(n))*((1-.)/(1-p(n)))**2  takes on the value  s.
+!
+!  Note that, in exact arithmetic, always  p(n+1) < p(n) , hence
+!  sf(p(n+1)) < sf(p(n)) . therefore, also stop the iteration,
+!  with final  p=1 , in case  sf(p(n+1)) >= sf(p(n)) .
+!
+  implicit none
+
+  integer npoint
+
+  real ( kind = 8 ) a(npoint,4)
+  real ( kind = 8 ) change
+  real ( kind = 8 ) dy(npoint)
+  integer i
+  real ( kind = 8 ) p
+  real ( kind = 8 ) prevp
+  real ( kind = 8 ) prevsf
+  real ( kind = 8 ) s
+  real ( kind = 8 ) sfp
+  real ( kind = 8 ) utru
+  real ( kind = 8 ) v(npoint,7)
+  real ( kind = 8 ) x(npoint)
+  real ( kind = 8 ) y(npoint)
+
+  call setupq ( x, dy, y, npoint, v, a(1,4) )
+
+  if ( 0.0D+00 < s ) then
+    go to 20
+  end if
+
+10    continue
+
+  p = 1.0D+00
+  call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) )
+  sfp = 0.0D+00
+  go to 70
+
+20    continue
+
+  p = 0.0D+00
+  call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) )
+
+  sfp = 0.0D+00
+  do i = 1, npoint
+    sfp = sfp + ( a(i,1) * dy(i) )**2
+  end do
+  sfp = sfp * 36.0D+00
+
+  if ( sfp <= s ) then
+    go to 70
+  end if
+
+  prevp = 0.0D+00
+  prevsf = sfp
+
+  utru = 0.0D+00
+  do i = 2, npoint
+    utru = utru + v(i-1,4) * ( a(i-1,3) * ( a(i-1,3) + a(i,3) ) + a(i,3)**2 )
+  end do
+
+  p = ( sfp - s ) / ( 24.0D+00 * utru )
+!
+!  Secant iteration for the determination of p starts here.
+!
+   50 continue
+
+  call chol1d ( p, v, a(1,4), npoint, a(1,3), a(1,1) )
+
+  sfp = 0.0D+00
+  do i = 1, npoint
+    sfp = sfp + ( a(i,1) * dy(i) )**2
+  end do
+  sfp = sfp * 36.0D+00 * ( 1.0D+00 - p )**2
+
+  if ( sfp <= 1.01D+00 * s ) then
+    go to 70
+  end if
+
+  if ( prevsf <= sfp ) then
+    go to 10
+  end if
+
+  change = ( p - prevp ) / ( sfp - prevsf ) * ( sfp - s )
+  prevp = p
+  p = p - change
+  prevsf = sfp
+
+  if ( 1.0D+00 <= p ) then
+    p = 1.0D+00 - sqrt ( s / prevsf ) * ( 1.0D+00 - prevp )
+  end if
+
+  go to 50
+!
+!  The correct value of p has been found.
+!  Compute polynomial coefficients from  q*u (in a(.,1)).
+!
+   70 continue
+
+  do i = 1, npoint
+    a(i,1) = y(i) - 6.0D+00 * ( 1.0D+00 - p ) * dy(i)**2 * a(i,1)
+  end do
+
+  do i = 1, npoint
+    a(i,3) = 6.0D+00 * a(i,3) * p
+  end do
+
+  do i = 1, npoint-1
+    a(i,4) = ( a(i+1,3) - a(i,3) ) / v(i,4)
+    a(i,2) = ( a(i+1,1) - a(i,1) ) / v(i,4) &
+      - ( a(i,3) + a(i,4) / 3.0D+00 * v(i,4) ) / 2.0D+00 * v(i,4)
+  end do
+
+  return
+end
diff --git a/pppack/spli2d.f90 b/pppack/spli2d.f90
new file mode 100644
index 0000000..348d253
--- /dev/null
+++ b/pppack/spli2d.f90
@@ -0,0 +1,241 @@
+!>
+!> @file spli2d.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine spli2d ( tau, gtau, t, n, k, m, work, q, bcoef, iflag )
+
+!*************************************************************************
+!
+!! SPLI2D produces a interpolatory tensor product spline.
+!
+!  Discussion:
+!
+!    SPLI2D is an extended version of SPLINT.
+!
+!    SPLI2D produces the B-spline coefficients BCOEF(J,.) of the
+!    spline of order K with knots T(I), I=1,..., N+K, which takes on
+!    the value GTAU(I,J) at TAU(I), I=1,..., N, J=1,...,M.
+!
+!    The I-th equation of the linear system
+!
+!      A * BCOEF = B
+!
+!    for the B-spline coefficients of the interpolant enforces
+!    interpolation at TAU(I), I=1,...,N.  Hence,  B(I)=GTAU(I),
+!    all I, and A is a band matrix with 2K-1 bands, if it is
+!    invertible.
+!
+!    The matrix A is generated row by row and stored, diagonal by
+!    diagonal, in the rows of the array Q, with the main diagonal
+!    going into row K.
+!
+!    The banded system is then solved by a call to BANFAC, which
+!    constructs the triangular factorization for A and stores it
+!    again in Q, followed by a call to BANSLV, which then obtains
+!    the solution BCOEF by substitution.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) TAU(N), contains the data point abscissas.
+!    TAU must be strictly increasing
+!
+!    Input, real ( kind = 8 ) GTAU(N), contains the data point ordinates,
+!    J=1,...,M.
+!
+!    Input, real ( kind = 8 ) T(N+K), the knot sequence.
+!
+!    Input, integer N,  the number of data points and the
+!    dimension of the spline space SPLINE(K,T)
+!
+!    Input, integer K, the order of the spline.
+!
+!    Input, integer M, the number of data sets.
+!
+!    Work space, real ( kind = 8 ) WORK(N).
+!
+!    Output, real ( kind = 8 ) Q(2*K-1)*N, containing the triangular
+!    factorization of the coefficient matrix of the linear
+!    system for the B-spline coefficients of the spline interpolant.
+!
+!    The B-spline coefficients for the interpolant of an additional
+!    data set (TAU(I),HTAU(I)), I=1,...,N  with the same data
+!    abscissae can be obtained without going through all the
+!    calculations in this routine, simply by loading HTAU into
+!    BCOEF and then using the statement
+!
+!      CALL BANSLV(Q,2*K-1,N,K-1,K-1,BCOEF)
+!
+!    Output, real ( kind = 8 ) BCOEF(N), the B-spline coefficients of
+!    the interpolant.
+!
+!    Output, integer IFLAG, error indicator.
+!    1, no error.
+!    2, an error occurred, which may have been caused by
+!       singularity of the linear system.
+!
+!       The linear system to be solved is theoretically invertible if
+!       and only if
+!
+!         T(I) < TAU(I) < TAU(I+K), for all I.
+!
+!       Violation of this condition is certain to lead to IFLAG=2.
+!
+  implicit none
+
+  integer m
+  integer n
+
+  real ( kind = 8 ) bcoef(m,n)
+  real ( kind = 8 ) gtau(n,m)
+  integer i
+  integer iflag
+  integer ilp1mx
+  integer j
+  integer jj
+  integer k
+  integer left
+  real ( kind = 8 ) q((2*k-1)*n)
+  real ( kind = 8 ) t(n+k)
+  real ( kind = 8 ) tau(n)
+  real ( kind = 8 ) taui
+  real ( kind = 8 ) work(n)
+
+  left = k
+
+  do i = 1, (2*k-1)*n
+    q(i) = 0.0
+  end do
+!
+!  Construct the N interpolation equations.
+!
+  do i = 1, n
+
+    taui = tau(i)
+    ilp1mx = min(i+k,n+1)
+!
+!  Find the index LEFT in the closed interval (I,I+K-1) such
+!  that:
+!
+!    T(LEFT) < = TAU(I) < T(LEFT+1)
+!
+!  The matrix will be singular if this is not possible.
+!
+    left = max(left,i)
+
+    if ( taui < t(left) ) then
+      iflag = 2
+      write(*,*)' '
+      write(*,*)'SPLI2D - Fatal error!'
+      write(*,*)'  The TAU array is not strictly increasing.'
+      stop
+    end if
+
+   20   continue
+
+    if ( t(left+1) <= taui ) then
+
+      left = left+1
+      if ( left < ilp1mx ) then
+        go to 20
+      end if
+
+      left = left-1
+
+      if ( t(left+1) < taui ) then
+        iflag = 2
+        write(*,*)' '
+        write(*,*)'SPLI2D - Fatal error!'
+        write(*,*)'  The TAU array is not strictly increasing.'
+        stop
+      end if
+
+    end if
+!
+!  The I-th equation enforces interpolation at TAUI, hence
+!
+!    A(I,J)=B(J,K,T)(TAUI), for all J.
+!
+!  Only the K entries with J=LEFT-K+1, ..., LEFT actually might be
+!  nonzero.  These K numbers are returned, in WORK (used for
+!  temporary storage here), by the following call:
+!
+    call bsplvb(t,k,1,taui,left,work)
+!
+!  We therefore want
+!
+!    WORK(J)=B(LEFT-K+J)(TAUI)
+!
+!  to go into
+!
+!        a(i,left-k+j), i.e., into  q(i-(left+j)+2*k,(left+j)-k) since
+!        a(i+j,j)  is to go into  q(i+k,j), all i,j,  if we consider  q
+!        as a two-dim. array , with  2*k-1  rows (see comments in
+!        banfac). in the present program, we treat  q  as an equivalent
+!        one-dimensional array (because of fortran restrictions on
+!  ??  LOST LINE ??
+!        entry
+!            i -(left+j)+2*k + ((left+j)-k-1)*(2*k-1)
+!                 = i-left+1+(left -k)*(2*k-1) + (2*k-2)*j
+!  of  q .
+!
+    jj = i-left+1+(left-k)*(k+k-1)
+
+    do j = 1, k
+      jj = jj+k+k-2
+      q(jj) = work(j)
+    end do
+
+  end do
+!
+!  Factor A, stored again in Q.
+!
+  call banfac(q,k+k-1,n,k-1,k-1,iflag)
+
+  if ( iflag == 2 ) then
+    write(*,*)' '
+    write(*,*)'SPLI2D - Fatal error!'
+    write(*,*)'  BANFAC reports that the matrix is singular.'
+    stop
+  end if
+!
+!  Solve A*BCOEF=GTAU by backsubstitution.
+!
+  do j = 1, m
+
+    work(1:n) = gtau(1:n,j)
+
+    call banslv ( q, k+k-1, n, k-1, k-1, work )
+
+    bcoef(j,1:n) = work(1:n)
+
+  end do
+
+  return
+end
diff --git a/pppack/spline_hermite_set.f90 b/pppack/spline_hermite_set.f90
new file mode 100644
index 0000000..3b7e28e
--- /dev/null
+++ b/pppack/spline_hermite_set.f90
@@ -0,0 +1,90 @@
+!>
+!> @file spline_hermite_set.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine spline_hermite_set ( ndata, tdata, c )
+
+!*************************************************************************
+!
+!! SPLINE_HERMITE_SET sets up a piecewise cubic Hermite interpolant.
+!
+!  Modified:
+!
+!    06 April 1999
+!
+!  Reference:
+!
+!    Conte and de Boor,
+!    Algorithm CALCCF,
+!    Elementary Numerical Analysis,
+!    1973, page 235.
+!
+!  Parameters:
+!
+!    Input, integer NDATA, the number of data points.
+!    NDATA must be at least 2.
+!
+!    Input, real ( kind = 8 ) TDATA(NDATA), the abscissas of the data points.
+!    The entries of TDATA are assumed to be strictly increasing.
+!
+!    Input/output, real ( kind = 8 ) C(4,NDATA).
+!    On input, C(1,I) and C(2,I) should contain the value of the
+!    function and its derivative at TDATA(I), for I = 1 to NDATA.
+!    These values will not be changed by this routine.
+!    On output, C(3,I) and C(4,I) contain the quadratic
+!    and cubic coefficients of the Hermite polynomial
+!    in the interval (TDATA(I), TDATA(I+1)), for I=1 to NDATA-1.
+!    C(3,NDATA) and C(4,NDATA) are set to 0.
+!    In the interval (TDATA(I), TDATA(I+1)), the interpolating Hermite
+!    polynomial is given by
+!
+!    SVAL(TVAL) =                C(1,I)
+!       + ( TVAL - TDATA(I) ) * ( C(2,I)
+!       + ( TVAL - TDATA(I) ) * ( C(3,I)
+!       + ( TVAL - TDATA(I) ) *   C(4,I) ) )
+!
+  implicit none
+
+  integer ndata
+
+  real ( kind = 8 ) c(4,ndata)
+  real ( kind = 8 ) divdif1
+  real ( kind = 8 ) divdif3
+  real ( kind = 8 ) dt
+  integer i
+  real ( kind = 8 ) tdata(ndata)
+
+  do i = 1, ndata-1
+    dt = tdata(i+1) - tdata(i)
+    divdif1 = ( c(1,i+1) - c(1,i) ) / dt
+    divdif3 = c(2,i) + c(2,i+1) - 2.0D+00 * divdif1
+    c(3,i) = ( divdif1 - c(2,i) - divdif3 ) / dt
+    c(4,i) = divdif3 / ( dt * dt )
+  end do
+
+  c(3,ndata) = 0.0D+00
+  c(4,ndata) = 0.0D+00
+
+  return
+end
diff --git a/pppack/spline_hermite_val.f90 b/pppack/spline_hermite_val.f90
new file mode 100644
index 0000000..12bdcc9
--- /dev/null
+++ b/pppack/spline_hermite_val.f90
@@ -0,0 +1,97 @@
+!>
+!> @file spline_hermite_val.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine spline_hermite_val ( ndata, tdata, c, tval, sval )
+
+!*************************************************************************
+!
+!! SPLINE_HERMITE_VAL evaluates a piecewise cubic Hermite interpolant.
+!
+!  Discussion:
+!
+!    SPLINE_HERMITE_SET must be called first, to set up the
+!    spline data from the raw function and derivative data.
+!
+!  Modified:
+!
+!    06 April 1999
+!
+!  Reference:
+!
+!    Conte and de Boor,
+!    Algorithm PCUBIC,
+!    Elementary Numerical Analysis,
+!    1973, page 234.
+!
+!  Parameters:
+!
+!    Input, integer NDATA, the number of data points.
+!    NDATA is assumed to be at least 2.
+!
+!    Input, real ( kind = 8 ) TDATA(NDATA), the abscissas of the data points.
+!    The entries of TDATA are assumed to be strictly increasing.
+!
+!    Input, real ( kind = 8 ) C(4,NDATA), contains the data computed by
+!    SPLINE_HERMITE_SET.
+!
+!    Input, real ( kind = 8 ) TVAL, the point where the interpolant is to
+!    be evaluated.
+!
+!    Output, real ( kind = 8 ) SVAL, the value of the interpolant at TVAL.
+!
+  implicit none
+
+  integer ndata
+
+  real ( kind = 8 ) c(4,ndata)
+  real ( kind = 8 ) dt
+  integer i
+  integer j
+  real ( kind = 8 ) sval
+  real ( kind = 8 ) tdata(ndata)
+  real ( kind = 8 ) tval
+!
+!  Find the interval J = [ TDATA(J), TDATA(J+1) ] that contains
+!  or is nearest to TVAL.
+!
+  j = ndata - 1
+
+  do i = 1, ndata-2
+
+    if ( tval < tdata(i+1) ) then
+      j = i
+      exit
+    end if
+
+  end do
+!
+!  Evaluate the cubic polynomial.
+!
+  dt = tval - tdata(j)
+
+  sval = c(1,j) + dt * ( c(2,j) + dt * ( c(3,j) + dt * c(4,j) ) )
+
+  return
+end
diff --git a/pppack/splint.f90 b/pppack/splint.f90
new file mode 100644
index 0000000..10ef1f8
--- /dev/null
+++ b/pppack/splint.f90
@@ -0,0 +1,208 @@
+!>
+!> @file splint.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine splint ( tau, gtau, t, n, k, q, bcoef, iflag )
+
+!*************************************************************************
+!
+!! SPLINT produces the B-spline coefficients BCOEF of an interpolating spline.
+!
+!  Discussion:
+!
+!    The spline is of order K with knots T(1:N+K), and takes on the
+!    value GTAU(I) at TAU(I), for I = 1 to N.
+!
+!    The I-th equation of the linear system
+!      A * BCOEF = B
+!    for the b-coefficients of the interpolant enforces interpolation
+!    at TAU(1:N).
+!
+!    Hence,  b(i)=gtau(i), all i, and  a  is a band matrix with  2k-1
+!    bands (if it is invertible).
+!
+!    The matrix A is generated row by row and stored, diagonal by di-
+!    agonal, in the rows of the array  q , with the main diagonal go-
+!    ing into row K.  see comments in the program below.
+!
+!    The banded system is then solved by a call to  banfac (which con-
+!    structs the triangular factorization for  a  and stores it again in
+!    q ), followed by a call to  banslv (which then obtains the solution
+!    bcoef  by substitution).
+!
+!    BANFAC does no pivoting, since the total positivity of the matrix
+!    A makes this unnecessary.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) TAU(N), the data point abscissas.  The entries in
+!    TAU should be strictly increasing.
+!
+!    Input, real ( kind = 8 ) GTAU(N), the data ordinates.
+!
+!    Input, real ( kind = 8 ) T(N+K), the knot sequence.
+!
+!    Input, integer N, the number of data points.
+!
+!    Input, integer K, the order of the spline.
+!
+!  output
+!
+!  q, array of size  (2*k-1)*n , containing the triangular factoriz-
+!        ation of the coefficient matrix of the linear system for the b-
+!        coefficients of the spline interpolant.
+!        the b-coefficients for the interpolant of an additional data set can
+!        be obtained without going through all the calculations in this
+!        routine, simply by loading  htau  into  bcoef  and then execut-
+!        ing the    call banslv ( q, 2*k-1, n, k-1, k-1, bcoef )
+!
+!  bcoef, the b-coefficients of the interpolant, of length  n.
+!
+!  iflag, an integer indicating success (= 1)  or failure (= 2)
+!        the linear system to be solved is (theoretically) invertible if
+!        and only if
+!              t(i) < tau(i) < tau(i+k),    all i.
+!        violation of this condition is certain to lead to  iflag=2 .
+!
+  implicit none
+
+  integer n
+
+  real ( kind = 8 ) bcoef(n)
+  real ( kind = 8 ) gtau(n)
+  integer i
+  integer iflag
+  integer ilp1mx
+  integer j
+  integer jj
+  integer k
+  integer kpkm2
+  integer left
+  real ( kind = 8 ) q((2*k-1)*n)
+  real ( kind = 8 ) t(n+k)
+  real ( kind = 8 ) tau(n)
+  real ( kind = 8 ) taui
+
+  kpkm2 = 2*(k-1)
+  left = k
+
+  do i = 1, (2*k-1)*n
+    q(i) = 0.0D+00
+  end do
+!
+!  loop over i to construct the  n  interpolation equations
+!
+  do i = 1, n
+
+    taui = tau(i)
+    ilp1mx = min(i+k,n+1)
+!
+!  find  left  in the closed interval (i,i+k-1) such that
+!    t(left) <= tau(i)  < t(left+1)
+!  matrix is singular if this is not possible
+!
+    left = max(left,i)
+
+    if ( taui < t(left)) then
+      go to 70
+    end if
+
+   20   continue
+
+    if ( taui < t(left+1)) then
+      go to 30
+    end if
+
+    left = left+1
+    if ( left < ilp1mx) then
+      go to 20
+    end if
+
+    left = left-1
+    if ( t(left+1) < taui ) then
+      go to 70
+    end if
+!
+!  The i-th equation enforces interpolation at taui, hence
+!  a(i,j)=b(j,k,t)(taui), all j. only the  k  entries with  j =
+!  left-k+1,...,left actually might be nonzero. these  k  numbers
+!  are returned, in  bcoef (used for temp.storage here), by the
+!  following
+!
+   30   continue
+
+    call bsplvb(t,k,1,taui,left,bcoef)
+!
+!  We therefore want  bcoef(j)=b(left-k+j)(taui) to go into
+!  a(i,left-k+j), i.e., into  q(i-(left+j)+2*k,(left+j)-k) since
+!  a(i+j,j)  is to go into  q(i+k,j), all i,j,  if we consider  q
+!  as a two-dim. array , with  2*k-1  rows (see comments in
+!  banfac). in the present program, we treat  q  as an equivalent
+!  one-dimensional array (because of fortran restrictions on
+!  dimension statements) . we therefore want  bcoef(j) to go into
+!  entry
+!    i -(left+j)+2*k + ((left+j)-k-1)*(2*k-1)
+!   = i-left+1+(left -k)*(2*k-1) + (2*k-2)*j
+!  of  q .
+!
+    jj = i-left+1+(left-k)*(k+k-1)
+    do j = 1,k
+      jj = jj+kpkm2
+      q(jj) = bcoef(j)
+    end do
+
+  end do
+!
+!  Obtain factorization of A, stored again in Q.
+!
+  call banfac ( q, k+k-1, n, k-1, k-1, iflag )
+
+  if ( iflag == 2 ) then
+    write(*,*)' '
+    write(*,*)'SPLINT - Fatal Error!'
+    write(*,*)'  The linear system is not invertible!'
+    return
+  end if
+!
+!  Solve  a*bcoef=gtau  by backsubstitution
+!
+  bcoef(1:n) = gtau(1:n)
+
+  call banslv ( q, k+k-1, n, k-1, k-1, bcoef )
+  return
+
+   70 iflag=2
+
+  write ( *, * ) ' '
+  write ( *, * ) 'SPLINT - Fatal Error!'
+  write ( *, * ) '  The linear system is not invertible!'
+
+  return
+end
diff --git a/pppack/splopt.f90 b/pppack/splopt.f90
new file mode 100644
index 0000000..e27065d
--- /dev/null
+++ b/pppack/splopt.f90
@@ -0,0 +1,371 @@
+!>
+!> @file splopt.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine splopt ( tau, n, k, scrtch, t, iflag )
+
+!*************************************************************************
+!
+!! SPLOPT computes the knots for an optimal recovery scheme.
+!
+!  Discussion:
+!
+!    The optimal recovery scheme is of order K for data at TAU(1:N).
+!
+!    The interior knots T(K+1:N) are determined by Newton's method in
+!    such a way that the signum function which changes sign at
+!      T(K+1), ..., T(N)  and nowhere else in ( TAU(1), TAU(n) ) is
+!    orthogonal to the spline space SPLINE ( K, TAU ) on that interval.
+!
+!    Let XI(J) be the current guess for T(K+J), j=1,...,n-k.  Then
+!    the next Newton iterate is of the form
+!      xi(j) + (-)**(n-k-j)*x(j)  ,  j=1,...,n-k,
+!    with X the solution of the linear system
+!      C * X = D.
+!
+!    Here,  c(i,j)=b(i)(xi(j)), all j, with  b(i)  the i-th b-spline of
+!    order K for the knot sequence TAU, all i, and D is the vector
+!    given by  d(i)=sum( -a(j) , j=i,...,n )*(TAU(i+k)-TAU(i))/k, all i,
+!    with  a(i)=sum ( (-)**(n-k-j)*b(i,k+1,tau)(xi(j)) , j=1,...,n-k )
+!    for i=1,...,n-1, and  a(n)=-.5 .
+!
+!    See chapter XIII of text and references there for a derivation.
+!
+!    The first guess for  t(k+j)  is  (TAU(j+1)+...+TAU(j+k-1))/(k-1) .
+!    iteration terminates if  max(abs(x(j))) < t o l  , with
+!      TOL = t o l r t e *(TAU(n)-TAU(1))/(n-k) ,
+!    or else after NEWTMX iterations , currently,
+!      newtmx, tolrte / 10, .000001
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) TAU(N), the interpolation points.
+!    assumed to be nondecreasing, with tau(i) < tau(i+k),all i.
+!
+!    Input, integer N, the number of data points.
+!
+!    Input, integer K, the order of the optimal recovery scheme to be used.
+!
+!    Workspace, real ( kind = 8 ) SCRTCH((N-K)*(2*K+3)+5*K+3).  The various
+!    contents are specified in the text below .
+!
+!    Output, real ( kind = 8 ) T(N+K), the optimal knots ready for
+!    use in optimal recovery. specifically,  t(1)=... = t(k) =
+!    tau(1)  and  t(n+1)=... = t(n+k) = tau(n) , while the  n-k
+!    interior knots  t(k+1), ..., t(n)  are calculated.
+!
+!    Output, integer IFLAG, error indicator.
+!    = 1, success.  T contains the optimal knots.
+!    = 2, failure.  K < 3 or N < K or the linear system was singular.
+!
+  implicit none
+
+  integer k
+  integer n
+
+  real ( kind = 8 ) del
+  real ( kind = 8 ) delmax
+  real ( kind = 8 ) floatk
+  integer i
+  integer id
+  integer iflag
+  integer index
+  integer j
+  integer kp1
+  integer kpkm1
+  integer kpn
+  integer l
+  integer left
+  integer leftmk
+  integer lenw
+  integer ll
+  integer llmax
+  integer llmin
+  integer na
+  integer nb
+  integer nc
+  integer nd
+  integer, parameter :: newtmx = 10
+  integer newton
+  integer nmk
+  integer nx
+  real ( kind = 8 ) scrtch((n-k)*(2*k+3)+5*k+3)
+  real ( kind = 8 ) t(n+k)
+  real ( kind = 8 ) tau(n)
+  real ( kind = 8 ) sign
+  real ( kind = 8 ) signst
+  real ( kind = 8 ) sum1
+  real ( kind = 8 ) tol
+  real ( kind = 8 ), parameter :: tolrte = 0.000001D+00
+  real ( kind = 8 ) xij
+
+  nmk = n - k
+
+  if ( n < k ) then
+    write ( *, * ) ' '
+    write ( *, * ) 'SPLOPT - Fatal error!'
+    write ( *, * ) '  N < K, N = ',n,' K = ',k
+    iflag = 2
+    return
+  end if
+
+  if ( n == k ) then
+    do i = 1, k
+      t(i) = tau(1)
+      t(n+i) = tau(n)
+    end do
+    return
+  end if
+
+  if ( k <= 2 ) then
+    write(*,*)' '
+    write(*,*)'SPLOPT - Fatal error!'
+    write(*,*)'  K < 2, K = ',k
+    iflag = 2
+    stop
+  end if
+
+  floatk = k
+  kp1 = k+1
+  kpkm1 = k+k-1
+  kpn = k+n
+
+  signst = -1.0D+00
+  if ( (nmk/2) * 2 < nmk ) then
+    signst = 1.0D+00
+  end if
+!
+!  scrtch(i)=tau-extended(i), i=1,...,n+k+k
+!
+  nx = n + k + k + 1
+!
+!  scrtch(i+nx)=xi(i),i=0,...,n-k+1
+!
+  na = nx + nmk + 1
+!
+!  scrtch(i+na)=-a(i), i=1,...,n
+!
+  nd = na + n
+!
+!  scrtch(i+nd)=x(i) or d(i), i=1,...,n-k
+!
+  nb = nd+nmk
+!
+!  scrtch(i+nb)=biatx(i),i=1,...,k+1
+!
+  nc = nb+kp1
+!
+!  scrtch(i+(j-1)*(2k-1)+nc)=w(i,j) = c(i-k+j,j), i=j-k,...,j+k,
+!                                                     j=1,...,n-k.
+!
+  lenw = kpkm1*nmk
+!
+!  Extend TAU to a knot sequence and store in scrtch.
+!
+  do j = 1, k
+    scrtch(j) = tau(1)
+    scrtch(kpn+j) = tau(n)
+  end do
+
+  do j = 1, n
+    scrtch(k+j) = tau(j)
+  end do
+!
+!  First guess for  scrtch (.+nx) = xi .
+!
+  scrtch(nx) = tau(1)
+  scrtch(nmk+1+nx) = tau(n)
+
+  do j = 1, nmk
+
+    sum1 = 0.0D+00
+    do l = 1, k-1
+      sum1 = sum1 + tau(j+l)
+    end do
+
+    scrtch(j+nx) = sum1 / real ( k - 1, kind = 8 )
+
+  end do
+!
+!  last entry of  scrtch (.+na) =-a  is always ...
+!
+  scrtch(n+na) = 0.5D+00
+!
+!  Start the Newton iteration.
+!
+  newton = 1
+  tol = tolrte * ( tau(n) - tau(1) ) / real ( nmk, kind = 8 )
+!
+!  Start the Newton step.
+!  compute the 2k-1 bands of the matrix c and store in scrtch(.+nc),
+!  and compute the vector  scrtch(.+na)=-a.
+!
+  100 continue
+
+  do i = 1, lenw
+    scrtch(i+nc) = 0.0D+00
+  end do
+
+  do i = 2, n
+    scrtch(i-1+na) = 0.0D+00
+  end do
+
+  sign = signst
+  left = kp1
+
+  do j = 1, nmk
+
+    xij = scrtch(j+nx)
+
+  130   continue
+
+    if ( xij < scrtch(left+1) ) then
+      go to 140
+    end if
+
+    left = left+1
+    if ( left < kpn ) then
+      go to 130
+    end if
+    left = left-1
+
+  140   continue
+
+    call bsplvb(scrtch,k,1,xij,left,scrtch(1+nb))
+!
+!  The TAU sequence in scrtch is preceded by  k  additional knots
+!  therefore,  scrtch(ll+nb)  now contains  b(left-2k+ll)(xij)
+!  which is destined for  c(left-2k+ll,j), and therefore for
+!    w(left-k-j+ll,j)= scrtch(left-k-j+ll+(j-1)*kpkm1 + nc)
+!  since we store the 2k-1 bands of  c  in the 2k-1  r o w s  of
+!  the work array w, and  w  in turn is stored in  s c r t c h ,
+!  with  w(1,1)=scrtch(1+nc).
+!
+!  also, c  being of order  n-k, we would want
+!    1 <= left-2k+ll .le. n-k  or
+!    llmin=2k-left  <=  ll  .le.  n-left+k = llmax .
+!
+    leftmk = left-k
+    index = leftmk-j+(j-1)*kpkm1+nc
+    llmin = max(1,k-leftmk)
+    llmax = min(k,n-leftmk)
+    do ll = llmin, llmax
+      scrtch(ll+index)=scrtch(ll+nb)
+    end do
+
+    call bsplvb (scrtch,kp1,2,xij,left,scrtch(1+nb))
+    id=max(0,leftmk-kp1)
+    llmin=1-min(0,leftmk-kp1)
+    do ll=llmin, kp1
+      id=id+1
+      scrtch(id+na)=scrtch(id+na)-sign*scrtch(ll+nb)
+    end do
+
+    sign=-sign
+
+  end do
+
+  call banfac(scrtch(1+nc),kpkm1,nmk,k-1,k-1,iflag)
+
+  if ( iflag == 2 ) then
+    write ( *, * ) ' '
+    write ( *, * ) 'SPLOPT - Fatal error!'
+    write ( *, * ) '  Matrix C is not invertible.'
+    stop
+  end if
+!
+!  compute  scrtch (.+nd)= d  from  scrtch (.+na) =-a .
+!
+  do i=n,2,-1
+    scrtch(i-1+na)=scrtch(i-1+na)+scrtch(i+na)
+  end do
+
+  do i=1,nmk
+    scrtch(i+nd)=scrtch(i+na)*(tau(i+k)-tau(i))/floatk
+  end do
+!
+!  Compute  scrtch (.+nd)= x .
+!
+  call banslv(scrtch(1+nc),kpkm1,nmk,k-1,k-1,scrtch(1+nd))
+!
+!  Compute  scrtch (.+nd)=change in  xi . modify, if necessary, to
+!  prevent new  xi  from moving more than 1/3 of the way to its
+!  neighbors. then add to  xi  to obtain new  xi  in scrtch(.+nx).
+!
+  delmax = 0.0D+00
+  sign = signst
+  do i = 1, nmk
+    del = sign * scrtch(i+nd)
+    delmax = max ( delmax, abs ( del ) )
+    if ( 0.0D+00 < del ) then
+      go to 230
+    end if
+    del = max ( del, ( scrtch(i-1+nx) - scrtch(i+nx) ) / 3.0D+00 )
+    go to 240
+  230   del = min (del,(scrtch(i+1+nx)-scrtch(i+nx))/3.0D+00 )
+  240   sign = -sign
+    scrtch(i+nx) = scrtch(i+nx)+del
+  end do
+!
+!  Call it a day in case change in  xi  was small enough or too many
+!  steps were taken.
+!
+  if ( delmax < tol ) then
+    go to 270
+  end if
+
+  newton = newton + 1
+  if ( newton <= newtmx ) then
+    go to 100
+  end if
+
+  write ( *, * ) ' '
+  write ( *, * ) 'SPLOPT - Warning!'
+  write ( *, * ) '  No convergence.  Number of Newton steps was ', newtmx
+
+  270 continue
+
+  do i = 1, nmk
+    t(k+i) = scrtch(i+nx)
+  end do
+
+  290 continue
+
+  do i=1,k
+    t(i)=tau(1)
+    t(n+i)=tau(n)
+  end do
+
+  return
+
+! 310 iflag=2
+!
+!     return
+end
diff --git a/pppack/subbak.f90 b/pppack/subbak.f90
new file mode 100644
index 0000000..c3df5e9
--- /dev/null
+++ b/pppack/subbak.f90
@@ -0,0 +1,80 @@
+!>
+!> @file subbak.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine subbak ( w, ipivot, nrow, ncol, last, x )
+
+!*************************************************************************
+!
+!! SUBBAK carries out backsubstitution for the current block.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    w, ipivot, nrow, ncol, last  are as on return from factrb.
+!
+!    x(1),...,x(ncol)  contains, on input, the right side for the
+!            equations in this block after backsubstitution has been
+!            carried up to but not including equation ipivot(last).
+!            means that x(j) contains the right side of equation ipi-
+!            vot(j) as modified during elimination, j=1,...,last, while
+!            for j > last, x(j) is already a component of the solut-
+!            ion vector.
+!
+!    x(1),...,x(ncol) contains, on output, the components of the solut-
+!            ion corresponding to the present block.
+!
+  implicit none
+
+  integer ncol
+  integer nrow
+
+  integer ip
+  integer ipivot(nrow)
+  integer j
+  integer k
+  integer last
+  real ( kind = 8 ) s
+  real ( kind = 8 ) w(nrow,ncol)
+  real ( kind = 8 ) x(ncol)
+
+  do k = last, 1, -1
+
+    ip = ipivot(k)
+
+    s = 0.0D+00
+    do j = k+1, ncol
+      s = s + w(ip,j) * x(j)
+    end do
+
+    x(k) = ( x(k) - s ) / w(ip,k)
+
+  end do
+
+end
diff --git a/pppack/subfor.f90 b/pppack/subfor.f90
new file mode 100644
index 0000000..58f7afe
--- /dev/null
+++ b/pppack/subfor.f90
@@ -0,0 +1,100 @@
+!>
+!> @file subfor.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine subfor ( w, ipivot, nrow, last, b, x )
+
+!*************************************************************************
+!
+!! SUBFOR carries out the forward pass of substitution for the current block.
+!
+!  Discussion:
+!
+!    The forward pass is the action on the right side corresponding to the
+!    elimination carried out in FACTRB for this block.
+!
+!    At the end, x(j) contains the right side of the transformed
+!    ipivot(j)-th equation in this block, j=1,...,nrow. then, since
+!    for i=1,...,nrow-last, b(nrow+i) is going to be used as the right
+!    side of equation I in the next block (shifted over there from
+!    this block during factorization), it is set equal to x(last+i) here.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    w, ipivot, nrow, last  are as on return from factrb.
+!
+!    b(j)   is expected to contain, on input, the right side of j-th
+!           equation for this block, j=1,...,nrow.
+!    b(nrow+j)   contains, on output, the appropriately modified right
+!           side for equation j in next block, j=1,...,nrow-last.
+!
+!    x(j)   contains, on output, the appropriately modified right
+!           side of equation ipivot(j) in this block, j=1,...,last (and
+!           even for j=last+1,...,nrow).
+!
+  implicit none
+
+  integer last
+  integer nrow
+
+  real ( kind = 8 ) b(nrow+nrow-last)
+  integer ip
+  integer ipivot(nrow)
+  integer j
+  integer k
+  real ( kind = 8 ) s
+  real ( kind = 8 ) w(nrow,last)
+  real ( kind = 8 ) x(nrow)
+
+  ip = ipivot(1)
+  x(1) = b(ip)
+
+  do k = 2, nrow
+
+    ip = ipivot(k)
+
+    s = 0.0D+00
+    do j = 1, min ( k-1, last )
+      s = s + w(ip,j) * x(j)
+    end do
+
+    x(k) = b(ip) - s
+
+  end do
+!
+!  Transfer modified right sides of equations ipivot(last+1),...,
+!  ipivot(nrow) to next block.
+!
+  do k = last+1, nrow
+    b(nrow-last+k) = x(k)
+  end do
+
+  return
+end
diff --git a/pppack/tautsp.f90 b/pppack/tautsp.f90
new file mode 100644
index 0000000..d585f07
--- /dev/null
+++ b/pppack/tautsp.f90
@@ -0,0 +1,530 @@
+!>
+!> @file tautsp.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine tautsp ( tau, gtau, ntau, gamma, s, break, coef, l, k, iflag )
+
+!*************************************************************************
+!
+!! TAUTSP constructs a cubic spline interpolant to given data.
+!
+!  Discussion:
+!
+!    If 0 < GAMMA, additional knots are introduced where needed to
+!    make the interpolant more flexible locally.  This avoids extraneous
+!    inflection points typical of cubic spline interpolation at knots to
+!    rapidly changing data.
+!
+!  Method:
+!
+!    On the I-th interval, (TAU(I), TAU(I+1)), the interpolant is of the
+!    form:
+!
+!    (*)  f(u(x))=a+b*u + c*h(u,z) + d*h(1-u,1-z) ,
+!
+!    with
+!
+!      U = U(X) = ( X - TAU(I) ) / DTAU(I).
+!
+!    Here,
+!       z=z(i) = addg(i+1)/(addg(i)+addg(i+1))
+!  (= .5, in case the denominator vanishes). with
+!       addg(j)=abs(ddg(j)), ddg(j) = dg(j+1)-dg(j),
+!       dg(j)=divdif ( j) = (gtau(j+1)-gtau(j))/dtau(j)
+!  and
+!       h(u,z)=alpha*u**3+(1-alpha)*(max(((u-zeta)/(1-zeta)),0)**3
+!  with
+!       alpha(z)=(1-gamma/3)/zeta
+!       zeta(z)=1-gamma*min((1 - z), 1/3)
+!  thus, for 1/3 <= z .le. 2/3,  f  is just a cubic polynomial on
+!  the interval i. otherwise, it has one additional knot, at
+!         tau(i)+zeta*dtau(i) .
+!  as  z  approaches  1, h(.,z) has an increasingly sharp bend  near 1,
+!  thus allowing  f  to turn rapidly near the additional knot.
+!     in terms of f(j)=gtau(j) and
+!       fsecnd(j)= second derivative of  f  at  tau(j),
+!  the coefficients for (*) are given as
+!       a=f(i)-d
+!       b=(f(i+1)-f(i)) - (c - d)
+!       c=fsecnd(i+1)*dtau(i)**2/hsecnd(1,z)
+!       d=fsecnd(i)*dtau(i)**2/hsecnd(1,1-z)
+!  hence can be computed once fsecnd(i),i=1,...,ntau, is fixed.
+!
+!    F is automatically continuous and has a continuous second derivative
+!    (except when z=0 or 1 for some i). we determine fscnd(.) from
+!    the requirement that also the first derivative of F be continuous.
+!
+!    In addition, we require that the third derivative be continuous
+!    across TAU(2) and across TAU(NTAU-1).  This leads to a strictly
+!    diagonally dominant tridiagonal linear system for the fsecnd(i)
+!    which we solve by Gauss elimination without pivoting.
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Input, real ( kind = 8 ) TAU(NTAU), the sequence of data points.
+!    TAU must be strictly increasing.
+!
+!    Input, real ( kind = 8 ) GTAU(NTAU), the corresponding sequence of
+!    function values.
+!
+!    Input, integer NTAU, the number of data points.  NTAU must be at least 4.
+!
+!    Input, gamma  indicates whether additional flexibility is desired.
+!        =0., no additional knots
+!         in (0.,3.), under certain conditions on the given data at
+!                points i-1, i, i+1, and i+2, a knot is added in the
+!                i-th interval, i=2,...,ntau-2. see description of meth-
+!                od below. the interpolant gets rounded with increasing
+!                gamma. a value of  2.5  for gamma is typical.
+!          in (3.,6.), same , except that knots might also be added in
+!                intervals in which an inflection point would be permit-
+!                ted.  a value of  5.5  for gamma is typical.
+!
+!    Output, break, coef, l, k  give the pp-representation of the interpolant.
+!          specifically, for break(i) <= x .le. break(i+1), the
+!        interpolant has the form
+!  f(x)=coef(1,i) +dx(coef(2,i) +(dx/2)(coef(3,i) +(dx/3)coef(4,i)))
+!        with  dx=x-break(i) and i=1,...,l .
+!
+!    Output, iflag =1, ok
+!        =2, input was incorrect. a printout specifying the mistake
+!            was made.
+!            workspace
+!
+!    Output, s     is required, of size (ntau,6). the individual columns of this
+!        array contain the following quantities mentioned in the write-
+!        up and below.
+!     s(.,1)=dtau = tau(.+1)-tau
+!     s(.,2)=diag = diagonal in linear system
+!     s(.,3)=u = upper diagonal in linear system
+!     s(.,4)=r = right side for linear system (initially)
+!          = fsecnd = solution of linear system , namely the second
+!                       derivatives of interpolant at  tau
+!     s(.,5)=z = indicator of additional knots
+!     s(.,6)=1/hsecnd(1,x) with x = z or = 1-z. see below.
+!
+  implicit none
+
+  integer ntau
+
+  real ( kind = 8 ) alph
+  real ( kind = 8 ) alpha
+  real ( kind = 8 ) break(*)
+  real ( kind = 8 ) c
+  real ( kind = 8 ) coef(4,*)
+  real ( kind = 8 ) d
+  real ( kind = 8 ) del
+  real ( kind = 8 ) denom
+  real ( kind = 8 ) divdif
+  real ( kind = 8 ) entry
+  real ( kind = 8 ) entry3
+  real ( kind = 8 ) factor
+  real ( kind = 8 ) factr2
+  real ( kind = 8 ) gam
+  real ( kind = 8 ) gamma
+  real ( kind = 8 ) gtau(ntau)
+  integer i
+  integer iflag
+  integer k
+  integer l
+  integer method
+  real ( kind = 8 ) onemg3
+  real ( kind = 8 ) onemzt
+  real ( kind = 8 ) ratio
+  real ( kind = 8 ) s(ntau,6)
+  real ( kind = 8 ) sixth
+  real ( kind = 8 ) tau(ntau)
+  real ( kind = 8 ) temp
+  real ( kind = 8 ) x
+  real ( kind = 8 ) z
+  real ( kind = 8 ) zeta
+  real ( kind = 8 ) zt2
+
+  alph(x) = min ( 1.0D+00, onemg3 / x )
+!
+!  There must be at least 4 interpolation points.
+!
+  if ( ntau < 4 ) then
+    write ( *, '(a)' ) ' '
+    write ( *, '(a)' ) 'TAUTSP - Fatal error!'
+    write ( *, '(a)' ) '  Input NTAU must be at least 4.'
+    write ( *, '(a,i6)' ) '  NTAU = ', ntau
+    iflag = 2
+    stop
+  end if
+!
+!  Construct delta tau and first and second (divided) differences of data.
+!
+  do i = 1, ntau-1
+
+    s(i,1) = tau(i+1)-tau(i)
+
+    if ( s(i,1) <= 0.0D+00 ) then
+      write(*,30)i,tau(i),tau(i+1)
+   30     format (' point ',i3,' and the next',2e15.6,' are disordered')
+      iflag=2
+      return
+    end if
+
+    s(i+1,4) = ( gtau(i+1) - gtau(i) ) / s(i,1)
+  end do
+
+  do i = 2, ntau-1
+    s(i,4) = s(i+1,4)-s(i,4)
+  end do
+!
+!  Construct system of equations for second derivatives at  tau. at each
+!  interior data point, there is one continuity equation, at the first
+!  and the last interior data point there is an additional one for a
+!  total of NTAU equations in  ntau  unknowns.
+!
+  i = 2
+  s(2,2) = s(1,1) / 3.0D+00
+  sixth = 1.0D+00 / 6.0D+00
+  method = 2
+  gam = gamma
+
+  if ( gam <= 0.0D+00 ) then
+    method=1
+  end if
+
+  if ( 3.0D+00 < gam ) then
+    method = 3
+    gam = gam - 3.0D+00
+  end if
+
+  onemg3 = 1.0D+00 - gam / 3.0D+00
+!
+!  loop over i
+!
+   70 continue
+!
+!  Construct z(i) and zeta(i)
+!
+  z = 0.5D+00
+
+  if ( method == 1) then
+    go to 100
+  end if
+
+  if ( method == 3) then
+    go to 90
+  end if
+
+  if ( s(i,4)*s(i+1,4) < 0.0D+00 ) then
+    go to 100
+  end if
+
+   90 continue
+
+  temp = abs ( s(i+1,4) )
+  denom = abs ( s(i,4) ) +temp
+
+  if ( denom /= 0.0D+00 ) then
+    z = temp/denom
+    if ( abs ( z - 0.5D+00 ) <= sixth ) then
+      z=0.5D+00
+    end if
+  end if
+
+  100 continue
+
+  s(i,5) = z
+!
+!  Set up part of the i-th equation which depends on the i-th interval.
+!
+  if ( z < 0.5D+00 ) then
+
+    zeta = gam*z
+    onemzt = 1.0D+00 - zeta
+    zt2 = zeta**2
+    alpha = alph(onemzt)
+    factor = zeta/(alpha*(zt2-1.0D+00 ) + 1.0D+00 )
+    s(i,6) = zeta*factor / 6.0D+00
+    s(i,2) = s(i,2) + s(i,1) &
+      * ( ( 1.0D+00 - alpha * onemzt ) * factor / 2.0D+00-s(i,6))
+!
+!  If z=0 and the previous z = 1, then d(i) = 0. since then
+!  also u(i-1)=l(i+1) = 0, its value does not matter. reset
+!  d(i)=1 to insure nonzero pivot in elimination.
+!
+    if ( s(i,2) <= 0.0D+00 ) then
+      s(i,2) = 1.0D+00
+    end if
+
+    s(i,3)=s(i,1) / 6.0D+00
+  else if ( z - 0.5D+00 == 0.0D+00 ) then
+    s(i,2)=s(i,2)+s(i,1) / 3.0D+00
+    s(i,3)=s(i,1) / 6.0D+00
+  else if ( 0.0D+00 < z - 0.5D+00 ) then
+    onemzt = gam*(1.0D+00 - z)
+    zeta = 1.0D+00 - onemzt
+    alpha = alph(zeta)
+    factor = onemzt/(1.0D+00 - alpha * zeta * ( 1.0D+00 + onemzt ) )
+    s(i,6) = onemzt*factor / 6.0D+00
+    s(i,2) = s(i,2)+s(i,1) / 3.0D+00
+    s(i,3) = s(i,6)*s(i,1)
+  end if
+
+  if ( 2 < i ) then
+    go to 190
+  end if
+
+  s(1,5) = 0.5D+00
+!
+!  The first two equations enforce continuity of the first and of
+!  the third derivative across tau(2).
+!
+  s(1,2)=s(1,1) / 6.0D+00
+  s(1,3)=s(2,2)
+  entry3=s(2,3)
+  if ( z-0.5D+00) 150, 160, 170
+
+  150 continue
+
+  factr2 = zeta * ( alpha * ( zt2 - 1.0D+00 ) + 1.0D+00 ) &
+    / ( alpha * ( zeta * zt2 - 1.0D+00 ) + 1.0D+00 )
+
+  ratio=factr2*s(2,1)/s(1,2)
+  s(2,2)=factr2*s(2,1)+s(1,1)
+  s(2,3)=-factr2*s(1,1)
+  go to 180
+
+  160 continue
+
+  ratio=s(2,1)/s(1,2)
+  s(2,2)=s(2,1)+s(1,1)
+  s(2,3)=-s(1,1)
+  go to 180
+
+  170 continue
+
+  ratio=s(2,1)/s(1,2)
+  s(2,2)=s(2,1)+s(1,1)
+  s(2,3)=-s(1,1)*6.0D+00 * alpha * s(2,6)
+!
+!  At this point, the first two equations read
+!              diag(1)*x1+u(1)*x2 + entry3*x3=r(2)
+!       -ratio*diag(1)*x1+diag(2)*x2 + u(2)*x3=0.0
+!  Eliminate first unknown from second equation
+!
+  180 continue
+
+  s(2,2)=ratio*s(1,3)+s(2,2)
+  s(2,3)=ratio*entry3+s(2,3)
+  s(1,4)=s(2,4)
+  s(2,4)=ratio*s(1,4)
+  go to 200
+
+  190 continue
+!
+!  The i-th equation enforces continuity of the first derivative
+!  across tau(i). it has been set up in statements 35 up to 40
+!  and 21 up to 25 and reads now
+!    -ratio*diag(i-1)*xi-1+diag(i)*xi + u(i)*xi+1=r(i) .
+!  eliminate (i-1)st unknown from this equation
+!
+  s(i,2)=ratio*s(i-1,3)+s(i,2)
+  s(i,4)=ratio*s(i-1,4)+s(i,4)
+!
+!  Set up the part of the next equation which depends on the
+!  i-th interval.
+!
+  200 continue
+
+  if ( z- 0.5D+00 ) 210, 220, 230
+
+  210 continue
+  ratio = -s(i,6) * s(i,1) / s(i,2)
+  s(i+1,2)=s(i,1) / 3.0D+00
+  go to 240
+
+  220 continue
+  ratio=-(s(i,1) / 6.0D+00 ) / s(i,2)
+  s(i+1,2)=s(i,1) / 3.0D+00
+  go to 240
+
+  230 continue
+  ratio=-( s(i,1) / 6.0D+00 )/s(i,2)
+  s(i+1,2)=s(i,1)*((1.0D+00-zeta*alpha) * factor / 2.0D+00 - s(i,6) )
+!
+!  end of i loop
+!
+  240 continue
+
+  i=i+1
+  if ( i < ntau-1) then
+    go to 70
+  end if
+
+  s(i,5) = 0.5D+00
+!
+!  The last two equations enforce continuity of third derivative and
+!  of first derivative across  tau(ntau-1).
+!
+  entry=ratio*s(i-1,3)+s(i,2)+s(i,1)/3.0D+00
+  s(i+1,2)=s(i,1)/6.0D+00
+  s(i+1,4)=ratio*s(i-1,4)+s(i,4)
+  if ( z- 0.5D+00 ) 250, 260, 270
+
+  250 continue
+
+  ratio = s(i,1) * 6.0D+00 * s(i-1,6) * alpha / s(i-1,2)
+  s(i,2)=ratio*s(i-1,3)+s(i,1)+s(i-1,1)
+  s(i,3)=-s(i-1,1)
+  go to 280
+
+  260 continue
+
+  ratio=s(i,1)/s(i-1,2)
+  s(i,2)=ratio*s(i-1,3)+s(i,1)+s(i-1,1)
+  s(i,3)=-s(i-1,1)
+  go to 280
+
+  270 continue
+
+  factr2=onemzt*(alpha*(onemzt**2-1.0D+00)+1.0D+00) &
+    /(alpha*(onemzt**3-1.0D+00)+1.0D+00)
+
+  ratio = factr2*s(i,1) / s(i-1,2)
+  s(i,2)=ratio*s(i-1,3)+factr2*s(i-1,1)+s(i,1)
+  s(i,3)=-factr2*s(i-1,1)
+!
+!  At this point, the last two equations read:
+!
+!           diag(i)*xi+     u(i)*xi+1=r(i)
+!    -ratio*diag(i)*xi+diag(i+1)*xi+1=r(i+1)
+!
+!  Eliminate XI from the last equation.
+!
+  280 continue
+
+  s(i,4)=ratio*s(i-1,4)
+  ratio=-entry/s(i,2)
+  s(i+1,2)=ratio*s(i,3)+s(i+1,2)
+  s(i+1,4)=ratio*s(i,4)+s(i+1,4)
+!
+!  Back substitution.
+!
+  s(ntau,4) = s(ntau,4) / s(ntau,2)
+
+  290 continue
+
+  s(i,4)=(s(i,4)-s(i,3)*s(i+1,4))/s(i,2)
+  i=i-1
+  if ( 1 < i ) then
+    go to 290
+  end if
+
+  s(1,4)=(s(1,4)-s(1,3)*s(2,4)-entry3*s(3,4))/s(1,2)
+!
+!  Construct polynomial pieces.
+!
+  break(1)=tau(1)
+  l=1
+
+  do i=1, ntau-1
+    coef(1,l)=gtau(i)
+    coef(3,l)=s(i,4)
+    divdif=(gtau(i+1)-gtau(i))/s(i,1)
+    z=s(i,5)
+    if ( z- 0.5D+00 ) 300, 310, 320
+
+  300   continue
+
+    if ( z == 0.0D+00 ) go to 330
+    zeta=gam*z
+    onemzt=1.0D+00-zeta
+    c=s(i+1,4) / 6.0D+00
+    d=s(i,4)*s(i,6)
+    l=l+1
+    del=zeta*s(i,1)
+    break(l)=tau(i)+del
+    zt2=zeta**2
+    alpha=alph(onemzt)
+    factor=onemzt**2*alpha
+    coef(1,l)=gtau(i)+divdif*del+s(i,1)**2*(d*onemzt*(factor-1.0D+00) &
+      +c*zeta*(zt2-1.0D+00))
+    coef(2,l)=divdif+s(i,1)*(d*(1.0D+00-3.0D+00*factor)+c*(3.0D+00*zt2-1.0D+00))
+    coef(3,l)=6.0D+00*(d*alpha*onemzt+c*zeta)
+    coef(4,l)=6.0D+00*(c-d*alpha)/s(i,1)
+    coef(4,l-1)=coef(4,l)-6.0D+00*d*(1.0D+00-alpha)/(del*zt2)
+    coef(2,l-1)=coef(2,l)-del*(coef(3,l)-(del/2.0D+00)*coef(4,l-1))
+    go to 340
+
+  310   continue
+
+    coef(2,l) = divdif - s(i,1) * ( 2.0D+00 * s(i,4) + s(i+1,4) ) / 6.0D+00
+    coef(4,l)=(s(i+1,4)-s(i,4))/s(i,1)
+    go to 340
+
+  320 continue
+
+    onemzt=gam*(1.0D+00-z)
+
+    if ( onemzt == 0.0D+00 ) then
+      go to 330
+    end if
+
+    zeta = 1.0D+00 - onemzt
+    alpha=alph(zeta)
+    c=s(i+1,4)*s(i,6)
+    d=s(i,4)/6.0D+00
+    del=zeta*s(i,1)
+    break(l+1)=tau(i)+del
+    coef(2,l)=divdif-s(i,1)*(2.0D+00*d+c)
+    coef(4,l)=6.0D+00*(c*alpha-d)/s(i,1)
+    l=l+1
+    coef(4,l)=coef(4,l-1)+6.0D+00*(1.0D+00-alpha)*c/(s(i,1)*onemzt**3)
+    coef(3,l)=coef(3,l-1)+del*coef(4,l-1)
+    coef(2,l)=coef(2,l-1)+del*(coef(3,l-1)+(del/2.0D+00)*coef(4,l-1))
+    coef(1,l)=coef(1,l-1)+del*(coef(2,l-1)+(del/2.0D+00)*(coef(3,l-1) &
+      +(del/3.0D+00)*coef(4,l-1)))
+    go to 340
+
+  330   continue
+
+    coef(2,l) = divdif
+    coef(3,l) = 0D+00
+    coef(4,l) = 0.0D+00
+
+  340   continue
+
+    l = l + 1
+    break(l) = tau(i+1)
+
+  end do
+
+  l = l - 1
+  k = 4
+  iflag = 1
+
+  return
+end
diff --git a/pppack/titanium.f90 b/pppack/titanium.f90
new file mode 100644
index 0000000..9879a5f
--- /dev/null
+++ b/pppack/titanium.f90
@@ -0,0 +1,88 @@
+!>
+!> @file titanium.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+subroutine titanium ( n, t, g )
+
+!***********************************************************************
+!
+!! TITANIUM represents a temperature dependent property of titanium.
+!
+!  Discussion:
+!
+!    The data has been used extensively as an example in spline
+!    approximation with variable knots.
+!
+!  Modified:
+!
+!    20 November 2000
+!
+!  Reference:
+!
+!    Carl DeBoor,
+!    A Practical Guide to Splines,
+!    Springer Verlag.
+!
+!  Parameters:
+!
+!    Output, integer N, the number of data points, which is 49.
+!
+!    Output, real ( kind = 8 ) T(N), the location of the data points.
+!
+!    Output, real ( kind = 8 ) G(N), the value associated with the data points.
+!
+  implicit none
+
+  real ( kind = 8 ) g(*)
+  integer n
+  real ( kind = 8 ) t(*)
+
+  n = 49
+
+  t(1:49) = (/ &
+     595.0D+00,  605.0D+00,  615.0D+00,  625.0D+00,  635.0D+00, &
+     645.0D+00,  655.0D+00,  665.0D+00,  675.0D+00,  685.0D+00, &
+     695.0D+00,  705.0D+00,  715.0D+00,  725.0D+00,  735.0D+00, &
+     745.0D+00,  755.0D+00,  765.0D+00,  775.0D+00,  785.0D+00, &
+     795.0D+00,  805.0D+00,  815.0D+00,  825.0D+00,  835.0D+00, &
+     845.0D+00,  855.0D+00,  865.0D+00,  875.0D+00,  885.0D+00, &
+     895.0D+00,  905.0D+00,  915.0D+00,  925.0D+00,  935.0D+00, &
+     945.0D+00,  955.0D+00,  965.0D+00,  975.0D+00,  985.0D+00, &
+     995.0D+00, 1005.0D+00, 1015.0D+00, 1025.0D+00, 1035.0D+00, &
+    1045.0D+00, 1055.0D+00, 1065.0D+00, 1075.0D+00 /)
+
+  g(1:49) = (/ &
+    0.644D+00, 0.622D+00, 0.638D+00, 0.649D+00, 0.652D+00, &
+    0.639D+00, 0.646D+00, 0.657D+00, 0.652D+00, 0.655D+00, &
+    0.644D+00, 0.663D+00, 0.663D+00, 0.668D+00, 0.676D+00, &
+    0.676D+00, 0.686D+00, 0.679D+00, 0.678D+00, 0.683D+00, &
+    0.694D+00, 0.699D+00, 0.710D+00, 0.730D+00, 0.763D+00, &
+    0.812D+00, 0.907D+00, 1.044D+00, 1.336D+00, 1.881D+00, &
+    2.169D+00, 2.075D+00, 1.598D+00, 1.211D+00, 0.916D+00, &
+    0.746D+00, 0.672D+00, 0.627D+00, 0.615D+00, 0.607D+00, &
+    0.606D+00, 0.609D+00, 0.603D+00, 0.601D+00, 0.603D+00, &
+    0.601D+00, 0.611D+00, 0.601D+00, 0.608D+00 /)
+
+  return
+end
diff --git a/pputils2/CMakeLists.txt b/pputils2/CMakeLists.txt
new file mode 100644
index 0000000..d0e4592
--- /dev/null
+++ b/pputils2/CMakeLists.txt
@@ -0,0 +1,74 @@
+/**
+ * @file CMakeLists.txt
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+project(pputils2)
+
+set(SRCS
+  pputils2.f90
+)
+
+set(CMAKE_Fortran_MODULE_DIRECTORY
+  ${CMAKE_CURRENT_BINARY_DIR}/modules
+  )
+
+add_library(pputils2 STATIC ${SRCS})
+target_include_directories(pputils2
+  PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/modules>
+          ${MPI_Fortran_INCLUDE_PATH}
+  INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/modules>
+            $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+            ${MPI_Fortran_INCLUDE_PATH}
+  )
+
+target_compile_options(pputils2 PUBLIC ${MPI_Fortran_COMPILE_FLAGS})
+target_link_libraries(pputils2 PUBLIC ${MPI_Fortran_LIBRARIES})
+
+set_property(TARGET pputils2
+  PROPERTY PUBLIC_HEADER ${CMAKE_CURRENT_BINARY_DIR}/modules/pputils.mod)
+
+include(GNUInstallDirs)
+install(TARGETS pputils2
+  EXPORT ${BSPLINES_EXPORT_TARGETS}
+  LIBRARY DESTINATION ${CNAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CNAKE_INSTALL_LIBDIR}
+  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+  )
+
+if(BSPLINES_EXAMPLES)
+  set(EXAMPLES ex1 ex2 ex3 ex4 ex5 ex6 ex7)
+  foreach(ex ${EXAMPLES})
+    add_executable(pputils2_${ex} ${ex}.f90)
+    target_link_libraries(pputils2_${ex} pputils2 futils)
+  endforeach()
+
+  add_test(ex1 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex1)
+  add_test(ex2 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 9 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex2)
+  add_test(ex3 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 5 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex3)
+  add_test(ex4 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 12 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex4)
+  add_test(ex5 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 8 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex5)
+  add_test(ex6 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 12 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex6)
+  add_test(ex7 ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 6 ${CMAKE_CURRENT_BINARY_DIR}/pputils2_ex7)
+endif()
diff --git a/pputils2/Makefile b/pputils2/Makefile
new file mode 100644
index 0000000..b0f3ddd
--- /dev/null
+++ b/pputils2/Makefile
@@ -0,0 +1,105 @@
+#
+# @file Makefile
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+F90 = mpiifort
+CC = cc
+
+debug = -g -traceback -CB
+optim = -O3 -xSSE4.2
+
+#OPT=$(debug)
+OPT=$(optim)
+
+F90FLAGS =  $(OPT) -I. -I$(FUTILS)/include -I${HDF5}/lib
+CFLAGS = -O2
+LDFLAGS = $(OPT) -fPIC -L. -L$(FUTILS)/lib -L${HDF5}/lib
+LIBS =  -lfutils pputils2.o -lhdf5_fortran -lhdf5 -lz
+
+.SUFFIXES:
+.SUFFIXES: .o .c .f90
+
+.f90.o:
+	$(F90) $(F90FLAGS) -c $<
+
+all:	ex1 ex2 ex3 ex4 ex5 ex6 ex7
+
+lib:    libpputils2.a
+
+libpputils2.a: pputils2.o
+	xiar r $@ $?
+	ranlib $@
+
+ex1:	ex1.o
+	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
+
+ex2:	ex2.o
+	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
+
+ex3:	ex3.o
+	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
+
+ex4:	ex4.o
+	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
+
+ex5:	ex5.o
+	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
+
+ex6:	ex6.o
+	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
+
+ex7:	ex7.o
+	$(F90) $(LDFLAGS) -o $@ $<  $(LIBS)
+
+tests:	ex1 ex2 ex3 ex4 ex5 ex6 ex7
+	@echo ==== Running ex1 ======
+	@mpiexec -n 4 ./ex1
+	@echo ==== Running ex2 ======
+	@mpiexec -n 9 ./ex2
+	@echo ==== Running ex3 ======
+	@mpiexec -n 5 ./ex3
+	@echo ==== Running ex4 ======
+	@mpiexec -n 12 ./ex4
+	@echo ==== Running ex5 ======
+	@mpiexec -n 8 ./ex5
+	@echo ==== Running ex6 ======
+	@mpiexec -n 12 ./ex6
+	@echo ==== Running ex7 ======
+	@mpiexec -n 6 ./ex7
+
+ex1.o: pputils2.o
+ex2.o: pputils2.o
+ex3.o: pputils2.o
+ex4.o: pputils2.o
+ex5.o: pputils2.o
+ex6.o: pputils2.o
+ex7.o: pputils2.o
+
+tags:
+	etags *.f90 $(FUTILS)/futils.f90
+
+clean:
+	rm -f *.o *~ a.out
+distclean: clean
+	rm -f ex1 ex2 ex3 ex4 ex5 ex6 ex7 *.h5 *.a *.mod
diff --git a/pputils2/ex1.f90 b/pputils2/ex1.f90
new file mode 100644
index 0000000..46dcca6
--- /dev/null
+++ b/pputils2/ex1.f90
@@ -0,0 +1,113 @@
+!>
+!> @file ex1.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Tranpsose of 2d matrix partitionned on a 1d proc grid:
+!     - A(n1,n2/P1) -> AT(n2,n1/P1) -> B(n1,n2/P1)
+!
+  USE pputils2
+  USE futils
+  IMPLICIT NONE
+  INCLUDE "mpif.h"
+  CHARACTER(len=32) :: file='ex1.h5'
+  INTEGER :: fid
+!
+  INTEGER, PARAMETER :: ndims=1              ! N. of dims of proc. grid
+  INTEGER :: ierr, me, npes
+  INTEGER, DIMENSION(ndims) :: dims, coords
+  LOGICAL :: periods(ndims), reorder
+  INTEGER :: cart
+!
+  INTEGER ::  n1, n2, n1p, n2p
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: a, atr, b
+  DOUBLE PRECISION :: x
+  INTEGER :: i, j, iglob, jglob, kerrors, nerrors
+!================================================================================
+!
+!   Init MPI
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Create cartesian topololy
+  dims    = npes
+  periods = (/.FALSE./)
+  reorder = .FALSE.
+  IF( PRODUCT(dims) .NE. npes ) THEN
+     IF( me .EQ. 0 ) THEN
+        PRINT*,  PRODUCT(dims), " processors required!"
+        CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
+     END IF
+  END IF
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
+  CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
+!
+!   Define local array
+  n1p=2; n1=n1p*dims(1)
+  n2p=2; n2=n2p*dims(1)
+  ALLOCATE( a(n1,n2p), atr(n2,n1p), b(n1,n2p) )
+  a = 0
+  atr = 0
+  b   = 0
+  DO i=1,n1
+     DO j=1,n2p
+        jglob = coords(1)*n2p + j
+        a(i,j) = 10*i + jglob
+     END DO
+  END DO
+  IF( me.EQ. 0 ) THEN
+     WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2
+  END IF
+!
+!   Tranpose A(n1,n2/P1) -> AT(n2,n1/P1) -> B(n1,n2/P1)
+  CALL pptransp(cart, a, atr)
+  CALL pptransp(cart, atr, b)
+!
+!   Check ATR
+  kerrors = 0
+  DO i=1,n1p
+     iglob = coords(1)*n1p + i
+     DO j=1,n2
+        x = 10*iglob + j
+        IF( x .NE. atr(j,i) ) kerrors = kerrors+1
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking ATR', nerrors
+!
+!   Write to file
+!
+  CALL creatf(file, fid, mpicomm=cart)
+  CALL putarrnd(fid, '/arraya', a, (/2/) )
+  CALL putarrnd(fid, '/arrayat', atr, (/2/) )
+  CALL putarrnd(fid, '/arrayb', b, (/2/) )
+!
+!   Clean up and quit
+  DEALLOCATE(a, atr)
+  CALL closef(fid)
+  CALL mpi_finalize(ierr)
+END PROGRAM main
diff --git a/pputils2/ex2.f90 b/pputils2/ex2.f90
new file mode 100644
index 0000000..8b8d1c2
--- /dev/null
+++ b/pputils2/ex2.f90
@@ -0,0 +1,170 @@
+!>
+!> @file ex2.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Tranpsose of matrices partitionned on a 2d proc grid:
+!     - A(n1,n2/P1,n3/P2)    -> AT(n3,n2/P1,n1/P2)
+!     - B(n1,n2,n3/P1,n4/P2) -> BT(n4,n2,n3/P1,n1/P2)
+!
+  USE pputils2
+  USE futils
+  IMPLICIT NONE
+  INCLUDE "mpif.h"
+  CHARACTER(len=32) :: file='ex2.h5'
+  INTEGER :: fid
+!
+  INTEGER, PARAMETER :: ndims=2              ! N. of dims of proc. grid
+  INTEGER :: ierr, me, npes
+  INTEGER, DIMENSION(ndims) :: dims, coords
+  LOGICAL :: periods(ndims), reorder
+  INTEGER :: cart, cartcol, cartrow
+!
+  INTEGER ::  n1, n2, n3, n4, n1p, n2p, n3p, n4p
+  DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t
+  DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t
+  DOUBLE PRECISION :: x
+  INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors
+!================================================================================
+!
+!   Init MPI
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Create cartesian topololy
+  dims    = (/3, 3/)
+  periods = (/.FALSE., .TRUE./)
+  reorder = .FALSE.
+  IF( PRODUCT(dims) .NE. npes ) THEN
+     IF( me .EQ. 0 ) THEN
+        PRINT*,  PRODUCT(dims), " processors required!"
+        CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
+     END IF
+  END IF
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
+  CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
+  CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr)
+  CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr)
+!
+!   Define local array A3
+  n1p=2; n1=n1p*dims(2)
+  n2p=4; n2=n2p*dims(1)
+  n3p=3; n3=n3p*dims(2)
+  ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) )
+  a3 = 0
+  a3t = 0
+  DO i=1,n1
+     DO j=1,n2p
+        jglob = coords(1)*n2p + j
+        DO k=1,n3p
+           kglob = coords(2)*n3p + k
+           a3(i,j,k) = 10000*i + 100*jglob + kglob
+        END DO
+     END DO
+  END DO
+  IF( me .EQ. 0 ) THEN
+     WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2, n3
+  END IF
+!
+!   Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2)
+  CALL pptransp(cartrow, a3, a3t, 1, 3)
+!
+!   Check A3T
+  kerrors = 0
+  DO i=1,n1p
+     iglob = coords(2)*n1p + i
+     DO j=1,n2p
+        jglob = coords(1)*n2p + j
+        DO k=1,n3
+           x = 10000*iglob + 100*jglob + k
+           IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
+!
+!   Define local array B4
+  n1p=2; n1=n1p*dims(2)
+  n2p=4; n2=n2p*dims(1)
+  n3p=3; n3=n3p*dims(1)
+  n4p=3; n4=n4p*dims(2)
+  ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n4,n2,n3p,n1p) )
+  b4 = 0
+  b4t = 0
+  DO i=1,n1
+     DO j=1,n2
+        DO k=1,n3p
+           kglob = coords(1)*n3p + k
+           DO l=1,n4p
+              lglob = coords(2)*n4p + l
+              b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob
+           END DO
+        END DO
+     END DO
+  END DO
+  IF( me.EQ. 0 ) THEN
+     WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4
+  END IF
+!
+!   Tranpose B4(n1,n2,n3/P1,n4/P2) -> B4T(n4,n2,n3/P1,n1/P2)
+  CALL pptransp(cartrow, b4, b4t, 1, 4)
+!
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+!
+!   Check B4T
+  kerrors = 0
+  DO i=1,n1p
+     iglob = coords(2)*n1p + i
+     DO j=1,n2
+        DO k=1,n3p
+           kglob = coords(1)*n3p + k
+           DO l=1,n4
+              x = 1000000*iglob + 10000*j + 100*kglob + l
+              IF( x .NE. b4t(l,j,k,i) ) kerrors = kerrors+1
+           END DO
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors
+!
+!   Write to file
+!
+  CALL creatf(file, fid, mpicomm=cart)
+  CALL putarrnd(fid, '/a3' , a3, (/2,3/) )
+  CALL putarrnd(fid, '/a3t', a3t,(/2,3/) )
+  CALL putarrnd(fid, '/b4' , b4, (/3,4/) )
+  CALL putarrnd(fid, '/b4t', b4t,(/3,4/) )
+
+!   Clean up and quit
+  DEALLOCATE(a3, a3t)
+  DEALLOCATE(b4, b4t)
+  CALL closef(fid)
+  CALL mpi_finalize(ierr)
+END PROGRAM main
diff --git a/pputils2/ex3.f90 b/pputils2/ex3.f90
new file mode 100644
index 0000000..dabf46b
--- /dev/null
+++ b/pputils2/ex3.f90
@@ -0,0 +1,111 @@
+!>
+!> @file ex3.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Tranpsose of 2d matrix partitionned on a 1d proc grid
+!     - A(n1,n2/P1) -> AT(n2,n1/P1)
+!   n1, n2 NOT REQUIRED to be divided evenly by NPES
+!
+  USE pputils2
+  USE futils
+  IMPLICIT NONE
+  INCLUDE "mpif.h"
+  CHARACTER(len=32) :: file='ex3.h5'
+  INTEGER :: fid
+!
+  INTEGER, PARAMETER :: ndims=1              ! N. of dims of proc. grid
+  INTEGER :: ierr, me, npes
+  INTEGER, DIMENSION(ndims) :: dims, coords
+  LOGICAL :: periods(ndims), reorder
+  INTEGER :: cart
+!
+  INTEGER ::  n1=9, n2=8, s1, s2, n1p, n2p
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: a, atr
+  DOUBLE PRECISION :: x
+  INTEGER :: i, j, iglob, jglob, kerrors, nerrors
+!================================================================================
+!
+!   Init MPI
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Create cartesian topololy
+  dims    = npes
+  periods = (/.FALSE./)
+  reorder = .FALSE.
+  IF( PRODUCT(dims) .NE. npes ) THEN
+     IF( me .EQ. 0 ) THEN
+        PRINT*,  PRODUCT(dims), " processors required!"
+        CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
+     END IF
+  END IF
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
+  CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
+!
+!   Partition array
+  CALL dist1d(cart, 0, n1, s1, n1p)
+  CALL dist1d(cart, 0, n2, s2, n2p)
+  ALLOCATE( a(n1,n2p), atr(n2,n1p) )
+  a = 0
+  atr = 0
+  DO i=1,n1
+     DO j=1,n2p
+        jglob = s2 + j
+        a(i,j) = 10*i + jglob
+     END DO
+  END DO
+  IF( me.EQ. 0 ) THEN
+     WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2
+  END IF
+!
+!   Tranpose A(n1,n2/P1) -> ATR(n2,n1/P1)
+  CALL pptransp(cart, a, atr)
+!
+!   Check ATR
+  kerrors = 0
+  DO i=1,n1p
+     iglob = s1 + i
+     DO j=1,n2
+        x = 10*iglob + j
+        IF( x .NE. atr(j,i) ) kerrors = kerrors+1
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking ATR', nerrors
+!
+!   Write to file
+!
+  CALL creatf(file, fid, mpicomm=cart)
+  CALL putarrnd(fid, '/arraya', a, (/2/) )
+  CALL putarrnd(fid, '/arrayat', atr, (/2/) )
+!
+!   Clean up and quit
+  DEALLOCATE(a, atr)
+  CALL closef(fid)
+  CALL mpi_finalize(ierr)
+END PROGRAM main
diff --git a/pputils2/ex4.f90 b/pputils2/ex4.f90
new file mode 100644
index 0000000..1f75216
--- /dev/null
+++ b/pputils2/ex4.f90
@@ -0,0 +1,171 @@
+!>
+!> @file ex4.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Tranpsose of matrices partitionned on a 2d proc grid:
+!     - A(n1,n2/P1,n3/P2)    -> AT(n3,n2/P1,n1/P2)
+!     - B(n1,n2,n3/P1,n4/P2) -> BT(n4,n2,n3/P1,n1/P2)
+!   n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES
+!
+  USE pputils2
+  USE futils
+  IMPLICIT NONE
+  INCLUDE "mpif.h"
+  CHARACTER(len=32) :: file='ex4.h5'
+  INTEGER :: fid
+!
+  INTEGER, PARAMETER :: ndims=2              ! N. of dims of proc. grid
+  INTEGER :: ierr, me, npes
+  INTEGER, DIMENSION(ndims) :: dims, coords
+  LOGICAL :: periods(ndims), reorder
+  INTEGER :: cart, cartcol, cartrow
+!
+  INTEGER ::  n1=15, n2=10, n3=9, n4=8, n1p, n2p, n3p, n4p, s1, s2, s3, s4
+  DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t
+  DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t
+  DOUBLE PRECISION :: x
+  INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors
+!================================================================================
+!
+!   Init MPI
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Create cartesian topololy
+  dims    = (/4, 3/)
+  periods = (/.FALSE., .TRUE./)
+  reorder = .FALSE.
+  IF( PRODUCT(dims) .NE. npes ) THEN
+     IF( me .EQ. 0 ) THEN
+        PRINT*,  PRODUCT(dims), " processors required!"
+        CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
+     END IF
+  END IF
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
+  CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
+  CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr)
+  CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr)
+!
+!   Define local array A3
+  CALL dist1d(cartrow, 0, n1, s1, n1p)
+  CALL dist1d(cartcol, 0, n2, s2, n2p)
+  CALL dist1d(cartrow, 0, n3, s3, n3p)
+  ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) )
+  a3 = 0
+  a3t = 0
+  DO i=1,n1
+     DO j=1,n2p
+        jglob = s2 + j
+        DO k=1,n3p
+           kglob = s3 + k
+           a3(i,j,k) = 10000*i + 100*jglob + kglob
+        END DO
+     END DO
+  END DO
+  IF( me.EQ. 0 ) THEN
+     WRITE(*,'(a,3i4)') 'Global dimension of matrix a', n1, n2, n3
+  END IF
+!
+!   Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2)
+  CALL pptransp(cartrow, a3, a3t, 1, 3)
+!
+!   Check A3T
+  kerrors = 0
+  DO i=1,n1p
+     iglob = s1 + i
+     DO j=1,n2p
+        jglob = s2 + j
+        DO k=1,n3
+           x = 10000*iglob + 100*jglob + k
+           IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
+!
+!   Define local array B4
+  CALL dist1d(cartrow, 0, n1, s1, n1p)
+  CALL dist1d(cartcol, 0, n3, s3, n3p)
+  CALL dist1d(cartrow, 0, n4, s4, n4p)
+  ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n4,n2,n3p,n1p) )
+  b4 = 0
+  b4t = 0
+  DO i=1,n1
+     DO j=1,n2
+        DO k=1,n3p
+           kglob = s3 + k
+           DO l=1,n4p
+              lglob = s4 + l
+              b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob
+           END DO
+        END DO
+     END DO
+  END DO
+  IF( me.EQ. 0 ) THEN
+     WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4
+  END IF
+!
+!   Tranpose B4(n1,n2,n3/P1,n4/P2) -> B4T(n4,n2,n3/P1,n1/P2)
+!!$  CALL pptransp(cartrow, b4, b4t)
+  CALL pptransp(cartrow, b4, b4t, 1, 4)
+!
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+!
+!   Check B4T
+  kerrors = 0
+  DO i=1,n1p
+     iglob = s1 + i
+     DO j=1,n2
+        DO k=1,n3p
+           kglob = s3 + k
+           DO l=1,n4
+              x = 1000000*iglob + 10000*j + 100*kglob + l
+              IF( x .NE. b4t(l,j,k,i) ) kerrors = kerrors+1
+           END DO
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors
+!
+!   Write to file
+!
+  CALL creatf(file, fid, mpicomm=cart)
+  CALL putarrnd(fid, '/a3' , a3, (/2,3/) )
+  CALL putarrnd(fid, '/a3t', a3t,(/2,3/) )
+  CALL putarrnd(fid, '/b4' , b4, (/3,4/) )
+  CALL putarrnd(fid, '/b4t', b4t,(/3,4/) )
+
+!   Clean up and quit
+  DEALLOCATE(a3, a3t)
+  DEALLOCATE(b4, b4t)
+  CALL closef(fid)
+  CALL mpi_finalize(ierr)
+END PROGRAM main
diff --git a/pputils2/ex5.f90 b/pputils2/ex5.f90
new file mode 100644
index 0000000..181a2dc
--- /dev/null
+++ b/pputils2/ex5.f90
@@ -0,0 +1,221 @@
+!>
+!> @file ex5.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Tranpsose of matrices partitionned on a 2d proc grid:
+!     - A(n1,n2/P1,n3/P2)    -> AT(n2,n1/P1,n3/P2)
+!     - B(n1,n2,n3/P1,n4/P2) -> BT(n3,n2,n1/P1,n4/P2)
+!     - C(n1,n2/P1,n3,n4/P2) -> CT(n2,n1/P1,n3,n4/P2)
+!   n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES
+!
+  USE pputils2
+  USE futils
+  IMPLICIT NONE
+  INCLUDE "mpif.h"
+  CHARACTER(len=32) :: file='ex4.h5'
+  INTEGER :: fid
+!
+  INTEGER, PARAMETER :: ndims=2              ! N. of dims of proc. grid
+  INTEGER :: ierr, me, npes
+  INTEGER, DIMENSION(ndims) :: dims, coords
+  LOGICAL :: periods(ndims), reorder
+  INTEGER :: cart, cartcol, cartrow
+!
+  INTEGER ::  n1=8, n2=10, n3=6, n4=5, n1p, n2p, n3p, n4p, s1, s2, s3, s4
+  DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t
+  DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t, c4, c4t
+  DOUBLE PRECISION :: x
+  INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors
+!================================================================================
+!
+!   Init MPI
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Create cartesian topololy
+  dims    = (/4, 2/)
+  periods = (/.FALSE., .TRUE./)
+  reorder = .FALSE.
+  IF( PRODUCT(dims) .NE. npes ) THEN
+     IF( me .EQ. 0 ) THEN
+        PRINT*,  PRODUCT(dims), " processors required!"
+        CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
+     END IF
+  END IF
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
+  CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
+  CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr)
+  CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr)
+!
+!   Define local array A3
+  CALL dist1d(cartcol, 0, n1, s1, n1p)
+  CALL dist1d(cartcol, 0, n2, s2, n2p)
+  CALL dist1d(cartrow, 0, n3, s3, n3p)
+  ALLOCATE( a3(n1,n2p,n3p), a3t(n2,n1p,n3p) )
+  a3 = 0
+  a3t = 0
+  DO i=1,n1
+     DO j=1,n2p
+        jglob = s2 + j
+        DO k=1,n3p
+           kglob = s3 + k
+           a3(i,j,k) = 10000*i + 100*jglob + kglob
+        END DO
+     END DO
+  END DO
+  IF( me .EQ. 0 ) THEN
+     WRITE(*,'(a,4i4)') 'Global dimension of matrix a', n1, n2, n3
+  END IF
+!
+!   Tranpose A(n1,n2/P1,n3/P2) -> AT(n2,n1/P1,n3/P2)
+  CALL pptransp(cartcol, a3, a3t, 1, 2)
+!
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+!
+!   Check A3T
+  kerrors = 0
+  DO i=1,n1p
+     iglob = s1 + i
+     DO j=1,n2
+        DO k=1,n3p
+           kglob = s3 + k
+           x = 10000*iglob + 100*j + kglob
+           IF( x .NE. a3t(j,i,k) ) kerrors = kerrors+1
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
+!
+!   Define local array B4
+  CALL dist1d(cartcol, 0, n1, s1, n1p)
+  CALL dist1d(cartcol, 0, n3, s3, n3p)
+  CALL dist1d(cartrow, 0, n4, s4, n4p)
+  ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n3,n2,n1p,n4p) )
+  b4 = 0
+  b4t = 0
+  DO i=1,n1
+     DO j=1,n2
+        DO k=1,n3p
+           kglob = s3 + k
+           DO l=1,n4p
+              lglob = s4 + l
+              b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob
+           END DO
+        END DO
+     END DO
+  END DO
+  IF( me .EQ. 0 ) THEN
+     WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4
+  END IF
+!
+!   Tranpose B(n1,n2,n3/P1,n4/P2) -> BT(n3,n2,n1/P1,n4/P2)
+  CALL pptransp(cartcol, b4, b4t, 1, 3)
+!
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+!
+!   Check B4T
+  kerrors = 0
+  DO i=1,n1p
+     iglob = s1 + i
+     DO j=1,n2
+        DO k=1,n3
+           DO l=1,n4p
+              lglob = s4 + l
+              x = 1000000*iglob + 10000*j + 100*k + lglob
+              IF( x .NE. b4t(k,j,i,l) ) kerrors = kerrors+1
+           END DO
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors
+!
+!   Define local array C4
+  CALL dist1d(cartcol, 0, n1, s1, n1p)
+  CALL dist1d(cartcol, 0, n2, s2, n2p)
+  CALL dist1d(cartrow, 0, n4, s4, n4p)
+  ALLOCATE( c4(n1,n2p,n3,n4p), c4t(n2,n1p,n3,n4p) )
+  c4 = 0
+  c4t = 0
+  DO i=1,n1
+     DO j=1,n2p
+        jglob = s2 + j
+        DO k=1,n3
+           DO l=1,n4p
+              lglob = s4 + l
+              c4(i,j,k,l) = 1000000*i + 10000*jglob + 100*k +lglob
+           END DO
+        END DO
+     END DO
+  END DO
+  IF( me.EQ. 0 ) THEN
+     WRITE(*,'(a,4i4)') 'Global dimension of matrix c', n1, n2, n3, n4
+  END IF
+!
+!   Tranpose C(n1,n2/P1,n3,n4/P2) -> CT(n2,n1/P1,n3,n4/P2)
+  CALL pptransp(cartcol, c4, c4t, 1, 2)
+!
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+!
+!   Check C4T
+  kerrors = 0
+  DO i=1,n1p
+     iglob = s1 + i
+     DO j=1,n2
+        DO k=1,n3
+           DO l=1,n4p
+              lglob = s4 + l
+              x = 1000000*iglob + 10000*j + 100*k + lglob
+              IF( x .NE. c4t(j,i,k,l) ) kerrors = kerrors+1
+           END DO
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking c4', nerrors
+!
+!   Write to file
+!
+  CALL creatf(file, fid, mpicomm=cart)
+  CALL putarrnd(fid, '/a3' , a3, (/2,3/) )
+  CALL putarrnd(fid, '/a3t', a3t,(/2,3/) )
+  CALL putarrnd(fid, '/b4' , b4, (/3,4/) )
+  CALL putarrnd(fid, '/b4t', b4t,(/3,4/) )
+  CALL putarrnd(fid, '/c4' , c4, (/2,4/) )
+  CALL putarrnd(fid, '/c4t', c4t,(/2,4/) )
+
+!   Clean up and quit
+  DEALLOCATE(a3, a3t)
+  DEALLOCATE(b4, b4t)
+  DEALLOCATE(c4, c4t)
+  CALL closef(fid)
+  CALL mpi_finalize(ierr)
+END PROGRAM main
diff --git a/pputils2/ex6.f90 b/pputils2/ex6.f90
new file mode 100644
index 0000000..defba7c
--- /dev/null
+++ b/pputils2/ex6.f90
@@ -0,0 +1,270 @@
+!>
+!> @file ex6.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Tranpsose of matrices partitionned on a 2d proc grid:
+!     - A(n1/P1,n2,n3/P2)    -> AT(n1/P1,n3,n2/P2)
+!     - B(n1,n2,n3/P1,n4/P2) -> BT(n1,n3,n2/P1,n4/P2)
+!     - C(n1,n2,n3/P1,n4/P2) -> CT(n1,n4,n3/P1,n2/P2)
+!     - D(n1,n2/P1,n3,n4/P2) -> DT(n1,n2/P1,n4,n3/P2)
+!   n1, n2, n3, n4 NOT REQUIRED to be divided evenly by NPES
+!
+  USE pputils2
+  USE futils
+  IMPLICIT NONE
+  INCLUDE "mpif.h"
+  CHARACTER(len=32) :: file='ex4.h5'
+  INTEGER :: fid
+!
+  INTEGER, PARAMETER :: ndims=2              ! N. of dims of proc. grid
+  INTEGER :: ierr, me, npes
+  INTEGER, DIMENSION(ndims) :: dims, coords
+  LOGICAL :: periods(ndims), reorder
+  INTEGER :: cart, cartcol, cartrow
+!
+  INTEGER ::  n1=8, n2=10, n3=6, n4=5, n1p, n2p, n3p, n4p, s1, s2, s3, s4
+  DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t
+  DOUBLE PRECISION, DIMENSION(:,:,:,:), ALLOCATABLE :: b4, b4t, c4, c4t, d4, d4t
+  DOUBLE PRECISION :: x
+  INTEGER :: i, j, k, l, iglob, jglob, kglob, lglob, kerrors, nerrors
+!================================================================================
+!
+!   Init MPI
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr)
+  CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr)
+!
+!   Create cartesian topololy
+  dims    = (/4, 3/)
+  periods = (/.FALSE., .TRUE./)
+  reorder = .FALSE.
+  IF( PRODUCT(dims) .NE. npes ) THEN
+     IF( me .EQ. 0 ) THEN
+        PRINT*,  PRODUCT(dims), " processors required!"
+        CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
+     END IF
+  END IF
+  CALL mpi_cart_create(MPI_COMM_WORLD, ndims, dims, periods, reorder, cart, ierr)
+  CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
+  CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr)
+  CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr)
+!
+!   Define local array A3
+  CALL dist1d(cartcol, 0, n1, s1, n1p)
+  CALL dist1d(cartrow, 0, n2, s2, n2p)
+  CALL dist1d(cartrow, 0, n3, s3, n3p)
+  ALLOCATE( a3(n1p,n2,n3p), a3t(n1p,n3,n2p) )
+  a3 = 0
+  a3t = 0
+  DO i=1,n1p
+     iglob = s1 + i
+     DO j=1,n2
+        DO k=1,n3p
+           kglob = s3 + k
+           a3(i,j,k) = 10000*iglob + 100*j + kglob
+        END DO
+     END DO
+  END DO
+  IF( me .EQ. 0 ) THEN
+     WRITE(*,'(a,4i4)') 'Global dimension of matrix a', n1, n2, n3
+  END IF
+!
+!   Tranpose A(n1/P1,n2,n3/P2) -> AT(n1/P1,n3,n2/P2)
+  CALL pptransp(cartrow, a3, a3t, 2, 3)
+!
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+!
+!   Check A3T
+  kerrors = 0
+  DO i=1,n1p
+     iglob = s1 + i
+     DO j=1,n2p
+        jglob = s2 + j
+        DO k=1,n3
+           x = 10000*iglob + 100*jglob + k
+           IF( x .NE. a3t(i,k,j) ) kerrors = kerrors+1
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
+!
+!   Define local array B4
+  CALL dist1d(cartcol, 0, n2, s2, n2p)
+  CALL dist1d(cartcol, 0, n3, s3, n3p)
+  CALL dist1d(cartrow, 0, n4, s4, n4p)
+  ALLOCATE( b4(n1,n2,n3p,n4p), b4t(n1,n3,n2p,n4p) )
+  b4 = 0
+  b4t = 0
+  DO i=1,n1
+     DO j=1,n2
+        DO k=1,n3p
+           kglob = s3 + k
+           DO l=1,n4p
+              lglob = s4 + l
+              b4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob
+           END DO
+        END DO
+     END DO
+  END DO
+  IF( me .EQ. 0 ) THEN
+     WRITE(*,'(a,4i4)') 'Global dimension of matrix b', n1, n2, n3, n4
+  END IF
+!
+!   Tranpose B(n1,n2,n3/P1,n4/P2) -> BT(n1,n3,n2/P1,n4/P2)
+  CALL pptransp(cartcol, b4, b4t, 2, 3)
+!
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+!
+!   Check B4T
+  kerrors = 0
+  DO i=1,n1
+     DO j=1,n2p
+        jglob = s2 + j
+        DO k=1,n3
+           DO l=1,n4p
+              lglob = s4 + l
+              x = 1000000*i + 10000*jglob + 100*k + lglob
+              IF( x .NE. b4t(i,k,j,l) ) kerrors = kerrors+1
+           END DO
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking b4', nerrors
+!
+!   Define local array C4
+  CALL dist1d(cartrow, 0, n2, s2, n2p)
+  CALL dist1d(cartcol, 0, n3, s3, n3p)
+  CALL dist1d(cartrow, 0, n4, s4, n4p)
+  ALLOCATE( c4(n1,n2,n3p,n4p), c4t(n1,n4,n3p,n2p) )
+  c4 = 0
+  c4t = 0
+  DO i=1,n1
+     DO j=1,n2
+        DO k=1,n3p
+           kglob = s3 + k
+           DO l=1,n4p
+              lglob = s4 + l
+              c4(i,j,k,l) = 1000000*i + 10000*j + 100*kglob +lglob
+           END DO
+        END DO
+     END DO
+  END DO
+  IF( me.EQ. 0 ) THEN
+     WRITE(*,'(a,4i4)') 'Global dimension of matrix c', n1, n2, n3, n4
+  END IF
+!
+!   Tranpose C(n1,n2,n3/P1,n4/P2) -> CT(n1,n4,n3/P1,n2/P2)
+  CALL pptransp(cartrow, c4, c4t, 2, 4)
+!
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+!
+!   Check C4T
+  kerrors = 0
+  DO i=1,n1
+     DO j=1,n2p
+        jglob = s2 + j
+        DO k=1,n3p
+           kglob = s3 + k
+           DO l=1,n4
+              x = 1000000*i + 10000*jglob + 100*kglob + l
+              IF( x .NE. c4t(i,l,k,j) ) kerrors = kerrors+1
+           END DO
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking c4', nerrors
+!
+!   Define local array D4
+  CALL dist1d(cartcol, 0, n2, s2, n2p)
+  CALL dist1d(cartrow, 0, n3, s3, n3p)
+  CALL dist1d(cartrow, 0, n4, s4, n4p)
+  ALLOCATE( d4(n1,n2p,n3,n4p), d4t(n1,n2p,n4,n3p) )
+  d4 = 0
+  d4t = 0
+  DO i=1,n1
+     DO j=1,n2p
+        jglob = s2 + j
+        DO k=1,n3
+           DO l=1,n4p
+              lglob = s4 + l
+              d4(i,j,k,l) = 1000000*i + 10000*jglob + 100*k +lglob
+           END DO
+        END DO
+     END DO
+  END DO
+  IF( me.EQ. 0 ) THEN
+     WRITE(*,'(a,4i4)') 'Global dimension of matrix d', n1, n2, n3, n4
+  END IF
+!
+!   Tranpose D(n1,n2/P1,n3,n4/P2) -> DT(n1,n2/P1,n4,n3/P2)
+  CALL pptransp(cartrow, d4, d4t, 3, 4)
+!
+  CALL mpi_barrier(MPI_COMM_WORLD, ierr)
+!
+!   Check D4T
+  kerrors = 0
+  DO i=1,n1
+     DO j=1,n2p
+        jglob = s2 + j
+        DO k=1,n3p
+           kglob = s3 + k
+           DO l=1,n4
+              x = 1000000*i + 10000*jglob + 100*kglob + l
+              IF( x .NE. d4t(i,j,l,k) ) kerrors = kerrors+1
+           END DO
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          MPI_COMM_WORLD, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking d4', nerrors
+!
+!   Write to file
+!
+  CALL creatf(file, fid, mpicomm=cart)
+  CALL putarrnd(fid, '/a3' , a3, (/1,3/) )
+  CALL putarrnd(fid, '/a3t', a3t,(/1,3/) )
+  CALL putarrnd(fid, '/b4' , b4, (/3,4/) )
+  CALL putarrnd(fid, '/b4t', b4t,(/3,4/) )
+  CALL putarrnd(fid, '/c4' , c4, (/3,4/) )
+  CALL putarrnd(fid, '/c4t', c4t,(/3,4/) )
+  CALL putarrnd(fid, '/d4' , d4, (/2,4/) )
+  CALL putarrnd(fid, '/d4t', d4t,(/2,4/) )
+!
+!   Clean up and quit
+  DEALLOCATE(a3, a3t)
+  DEALLOCATE(b4, b4t)
+  DEALLOCATE(c4, c4t)
+  DEALLOCATE(d4, d4t)
+  CALL closef(fid)
+  CALL mpi_finalize(ierr)
+END PROGRAM main
diff --git a/pputils2/ex7.f90 b/pputils2/ex7.f90
new file mode 100644
index 0000000..0247599
--- /dev/null
+++ b/pputils2/ex7.f90
@@ -0,0 +1,160 @@
+!>
+!> @file ex7.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+PROGRAM main
+!
+!   Tranpsose of 3d matrices partitionned in 1 and 2 proc grid:
+!     - A(n1,n2,n3/P)      -> AT(n3,n2,n1/P)
+!     - B(n1,n2/P1,n3/P2)  -> BT(n3,n2/P1,n1/P2)
+!   n1, n2, n3 NOT REQUIRED to be divided evenly by P
+!
+USE pputils2
+  IMPLICIT NONE
+  INCLUDE "mpif.h"
+  INTEGER :: ierr, me, npes, comm=MPI_COMM_WORLD
+  INTEGER ::  n1=15, n2=10, n3=20, n1p, n2p, n3p, s1, s2,s3
+  DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: a3, a3t
+  DOUBLE PRECISION :: x
+  INTEGER :: i, j, k, iglob, jglob, kglob, kerrors, nerrors
+!
+  INTEGER, PARAMETER :: ndims=2              ! N. of dims of proc. grid
+  INTEGER, DIMENSION(ndims) :: dims, coords
+  LOGICAL :: periods(ndims), reorder
+  INTEGER :: cart, cartcol, cartrow
+!================================================================================
+!
+!   Init MPI
+  CALL mpi_init(ierr)
+  CALL mpi_comm_size(comm, npes, ierr)
+  CALL mpi_comm_rank(comm, me, ierr)
+!
+!--------------------------------------------------------------------------------
+!
+! 1D partition:
+!
+!   Define local array A3
+  CALL dist1d(comm, 0, n1, s1, n1p)
+  CALL dist1d(comm, 0, n3, s3, n3p)
+  ALLOCATE( a3(n1,n2,n3p), a3t(n3,n2,n1p) )
+  a3 = 0
+  a3t = 0
+  DO i=1,n1
+     DO j=1,n2
+        DO k=1,n3p
+           kglob = s3 + k
+           a3(i,j,k) = 10000*i + 100*j + kglob
+        END DO
+     END DO
+  END DO
+  IF( me.EQ. 0 ) THEN
+     WRITE(*,'(a)') '*** 1D partition ***'
+     WRITE(*,'(a,3i4)') 'Global dimensions of matrix a', n1, n2, n3
+  END IF
+!
+!   Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2)
+  CALL pptransp(comm, a3, a3t, 1, 3)
+!
+!   Check A3T
+  kerrors = 0
+  DO i=1,n1p
+     iglob = s1 + i
+     DO j=1,n2
+        DO k=1,n3
+           x = 10000*iglob + 100*j + k
+           IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          comm, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
+  DEALLOCATE(a3, a3t)
+!--------------------------------------------------------------------------------
+!
+! 2D partition:
+!
+!   Create cartesian topololy
+  dims    = (/2, 3/)
+  periods = (/.FALSE., .FALSE./)
+  reorder = .FALSE.
+  IF( PRODUCT(dims) .NE. npes ) THEN
+     IF( me .EQ. 0 ) THEN
+        PRINT*,  PRODUCT(dims), " processors required!"
+        CALL mpi_abort(comm, -1, ierr)
+     END IF
+  END IF
+  CALL mpi_barrier(comm, ierr)
+!
+  CALL mpi_cart_create(comm, ndims, dims, periods, reorder, cart, ierr)
+  CALL mpi_cart_coords(cart, me, ndims, coords, ierr)
+  CALL mpi_cart_sub(cart, (/.TRUE., .FALSE. /), cartcol, ierr)
+  CALL mpi_cart_sub(cart, (/.FALSE., .TRUE. /), cartrow, ierr)
+!
+!   Define local array A3
+  CALL dist1d(cartrow, 0, n1, s1, n1p)
+  CALL dist1d(cartcol, 0, n2, s2, n2p)
+  CALL dist1d(cartrow, 0, n3, s3, n3p)
+  ALLOCATE( a3(n1,n2p,n3p), a3t(n3,n2p,n1p) )
+  a3 = 0
+  a3t = 0
+  DO i=1,n1
+     DO j=1,n2p
+        jglob = s2 + j
+        DO k=1,n3p
+           kglob = s3 + k
+           a3(i,j,k) = 10000*i + 100*jglob + kglob
+        END DO
+     END DO
+  END DO
+  IF( me.EQ. 0 ) THEN
+     WRITE(*,'(a)') '*** 2D partition ***'
+     WRITE(*,'(a,3i4)') 'Global dimensions of matrix a', n1, n2, n3
+  END IF
+!
+!   Tranpose A3(n1,n2/P1,n3/P2) -> A3T(n3,n2/P1,n1/P2)
+  CALL pptransp(cartrow, a3, a3t, 1, 3)
+!
+!   Check A3T
+  kerrors = 0
+  DO i=1,n1p
+     iglob = s1 + i
+     DO j=1,n2p
+        jglob = s2 + j
+        DO k=1,n3
+           x = 10000*iglob + 100*jglob + k
+           IF( x .NE. a3t(k,j,i) ) kerrors = kerrors+1
+        END DO
+     END DO
+  END DO
+  CALL mpi_reduce(kerrors, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, &
+       &          comm, ierr)
+  IF( me .EQ. 0 ) WRITE(*,'(a,i6)') 'nerrors checking a3', nerrors
+  DEALLOCATE(a3, a3t)
+!--------------------------------------------------------------------------------
+! Epilogue
+!
+  CALL mpi_finalize(ierr)
+END PROGRAM main
+
diff --git a/pputils2/pptransp2.tpl b/pputils2/pptransp2.tpl
new file mode 100644
index 0000000..ada1f71
--- /dev/null
+++ b/pputils2/pptransp2.tpl
@@ -0,0 +1,89 @@
+!>
+!> @file pptransp2.tpl
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+ !
+    INTEGER :: me, npes, i, j, istr, iend, ierr
+    INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr
+    INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists
+    INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets
+    INTEGER :: dims(lastdim), np(2), npmx(2)
+    INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount
+    INTEGER :: status(MPI_STATUS_SIZE)
+!----------------------------------------------------------------------
+!              0.   Prologue
+!
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_comm_size(comm, npes, ierr)
+!
+!  Determine send/receive proc. id
+    ALLOCATE(ids(npes), idr(npes))
+    CALL partners(comm, ids, idr)
+!----------------------------------------------------------------------
+!              1.   Send/receive buffers
+!
+!  Distribution of first and last partitionned dimensions
+    ALLOCATE(ndists(2,npes))
+    ALLOCATE(offsets(2,0:npes))
+    np(1) = SIZE(b,lastdim)  ! Local first
+    np(2) = SIZE(a,lastdim)  ! and last dimension
+    CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr)
+    offsets = 0
+    DO i=1,npes
+       offsets(:,i) = offsets(:,i-1) + ndists(:,i)
+    END DO
+!
+!  Allocate send and receive 1d buffers
+    npmx = MAXVAL(ndists,2)
+    bufsiz = npmx(1)*npmx(2)   ! Maximum size of send/receive buffers
+    DO i=2,lastdim-1
+       bufsiz = bufsiz * SIZE(a,i)
+    END DO
+    ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) )
+!----------------------------------------------------------------------
+!              2.   Exchange blocks
+!
+    DO i=1,npes
+       istr = offsets(1,ids(i)) + 1  ! Partition a along first dim
+       iend = offsets(1,ids(i)+1)
+       dims = SHAPE(a)
+       dims(1) = iend-istr+1
+       scount = PRODUCT(dims)
+       s_buf(1:scount) = RESHAPE(a(istr:iend,:), (/scount/)) !*** dim dependant ***!
+       CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,&
+            & r_buf, bufsiz, mpitype, idr(i), i,&
+            & comm, status, ierr)
+       istr = offsets(2,idr(i)) + 1   ! Partition b along first dim
+       iend = offsets(2,idr(i)+1)
+       dims = SHAPE(b)
+       dims(1) = iend-istr+1
+       b(istr:iend,:) = RESHAPE(r_buf, dims, order=(/lastdim, 1/)) !*** dim dependant ***!
+   END DO
+!----------------------------------------------------------------------
+!              9.   Epilogue
+!
+    DEALLOCATE(ids, idr)
+    DEALLOCATE(ndists, offsets)
+    DEALLOCATE(s_buf, r_buf)
+!
diff --git a/pputils2/pptransp3.tpl b/pputils2/pptransp3.tpl
new file mode 100644
index 0000000..ce817af
--- /dev/null
+++ b/pputils2/pptransp3.tpl
@@ -0,0 +1,113 @@
+!>
+!> @file pptransp3.tpl
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+    INTEGER :: me, npes, i, j, istr, iend, ierr
+    INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr
+    INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists
+    INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets
+    INTEGER :: dims(lastdim), np(2), npmx(2)
+    INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount
+    INTEGER :: status(MPI_STATUS_SIZE)
+!----------------------------------------------------------------------
+!              0.   Prologue
+!
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_comm_size(comm, npes, ierr)
+!
+!  Determine send/receive proc. id
+    ALLOCATE(ids(npes), idr(npes))
+    CALL partners(comm, ids, idr)
+!----------------------------------------------------------------------
+!              1.   Send/receive buffers
+!
+!  Distribution of dim1 and dim2 partitionned dimensions
+    ALLOCATE(ndists(2,npes))
+    ALLOCATE(offsets(2,0:npes))
+    np(1) = SIZE(b, dim2)  ! Local first
+    np(2) = SIZE(a, dim2)  ! and second dimension
+    CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr)
+    offsets = 0
+    DO i=1,npes
+       offsets(:,i) = offsets(:,i-1) + ndists(:,i)
+    END DO
+!
+!  Allocate send and receive 1d buffers
+    npmx = MAXVAL(ndists,2)
+    bufsiz = npmx(1)*npmx(2)   ! Maximum size of send/receive buffers
+    DO i=1,lastdim
+       IF ( (i .NE. dim1) .AND. (i .NE. dim2) ) bufsiz = bufsiz * SIZE(a,i)
+    END DO
+    ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) )
+!----------------------------------------------------------------------
+!              2.   Exchange blocks
+!
+    IF ( (dim1 .EQ. 1) .AND. ( dim2 .EQ. 2 ) ) THEN      !*** dim dependant ***!
+       recv_order = (/2,1,3/)                            !*** dim dependant ***!           
+    ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***!
+       recv_order = (/3,2,1/)                            !*** dim dependant ***!
+    ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***!
+       recv_order = (/1,3,2/)                            !*** dim dependant ***!
+    ELSE
+       IF ( me .EQ. 0 ) THEN
+          WRITE(*, '(a,i4,a,i4,a)') 'pptransp3: Cannot handle case dim1 = ', dim1, ', dim2 = ', dim2, '!'
+          CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
+       END IF
+    END IF
+!
+    DO i=1,npes
+       istr = offsets(1,ids(i)) + 1  ! Partition a along dimension dim1
+       iend = offsets(1,ids(i)+1)
+       dims = SHAPE(a)
+       dims(dim1) = iend-istr+1
+       scount = PRODUCT(dims)
+
+       IF (dim1 .EQ. 1) THEN                                      !*** dim dependant ***!
+          s_buf(1:scount) = RESHAPE(a(istr:iend,:,:), (/scount/)) !*** dim dependant ***!
+       ELSE IF (dim1 .EQ. 2) THEN                                 !*** dim dependant ***!
+          s_buf(1:scount) = RESHAPE(a(:,istr:iend,:), (/scount/)) !*** dim dependant ***!
+       END IF                                                     !*** dim dependant ***!
+
+       CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,&
+            & r_buf, bufsiz, mpitype, idr(i), i,&
+            & comm, status, ierr)
+       istr = offsets(2,idr(i)) + 1   ! Partition b along dimension dim1
+       iend = offsets(2,idr(i)+1)
+       dims = SHAPE(b)
+       dims(dim1) = iend-istr+1
+
+       IF (dim1 .EQ. 1) THEN                                         !*** dim dependant ***!
+          b(istr:iend,:,:) = RESHAPE(r_buf, dims, order=recv_order)  !*** dim dependant ***!
+       ELSE IF (dim1 .EQ. 2) THEN                                    !*** dim dependant ***!
+          b(:,istr:iend,:) = RESHAPE(r_buf, dims, order=recv_order)  !*** dim dependant ***!
+       END IF                                                        !*** dim dependant ***!
+
+   END DO
+!----------------------------------------------------------------------
+!              9.   Epilogue
+!
+    DEALLOCATE(ids, idr)
+    DEALLOCATE(ndists, offsets)
+    DEALLOCATE(s_buf, r_buf)
+!
diff --git a/pputils2/pptransp4.tpl b/pputils2/pptransp4.tpl
new file mode 100644
index 0000000..cec2004
--- /dev/null
+++ b/pputils2/pptransp4.tpl
@@ -0,0 +1,122 @@
+!>
+!> @file pptransp4.tpl
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+    INTEGER :: me, npes, i, j, istr, iend, ierr
+    INTEGER, DIMENSION(:), ALLOCATABLE :: ids, idr
+    INTEGER, DIMENSION(:,:), ALLOCATABLE :: ndists
+    INTEGER, DIMENSION(:,:), ALLOCATABLE :: offsets
+    INTEGER :: dims(lastdim), np(2), npmx(2)
+    INTEGER :: n1p, nlp, n1pmx, nlpmx, bufsiz, scount
+    INTEGER :: status(MPI_STATUS_SIZE)
+!----------------------------------------------------------------------
+!              0.   Prologue
+!
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_comm_size(comm, npes, ierr)
+!
+!  Determine send/receive proc. id
+    ALLOCATE(ids(npes), idr(npes))
+    CALL partners(comm, ids, idr)
+!----------------------------------------------------------------------
+!              1.   Send/receive buffers
+!
+!  Distribution of dim1 and dim2 partitionned dimensions
+    ALLOCATE(ndists(2,npes))
+    ALLOCATE(offsets(2,0:npes))
+    np(1) = SIZE(b, dim2)  ! Local first
+    np(2) = SIZE(a, dim2)  ! and second dimension
+    CALL mpi_allgather(np, 2, MPI_INTEGER, ndists, 2, MPI_INTEGER, comm, ierr)
+    offsets = 0
+    DO i=1,npes
+       offsets(:,i) = offsets(:,i-1) + ndists(:,i)
+    END DO
+!
+!  Allocate send and receive 1d buffers
+    npmx = MAXVAL(ndists,2)
+    bufsiz = npmx(1)*npmx(2)   ! Maximum size of send/receive buffers
+    DO i=1,lastdim
+       IF ( (i .NE. dim1) .AND. (i .NE. dim2) ) bufsiz = bufsiz * SIZE(a,i)
+    END DO
+    ALLOCATE(s_buf(bufsiz), r_buf(bufsiz) )
+!----------------------------------------------------------------------
+!              2.   Exchange blocks
+!
+    IF ( (dim1 .EQ. 1) .AND. (dim2 .EQ. 2) ) THEN        !*** dim dependant ***!
+       recv_order = (/2,1,3,4/)                          !*** dim dependant ***!
+    ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***!
+       recv_order = (/3,2,1,4/)                          !*** dim dependant ***!
+    ELSE IF ( (dim1 .EQ. 1) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***!
+       recv_order = (/4,2,3,1/)                          !*** dim dependant ***!
+    ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 3 ) ) THEN !*** dim dependant ***!
+       recv_order = (/1,3,2,4/)                          !*** dim dependant ***!
+    ELSE IF ( (dim1 .EQ. 2) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***!
+       recv_order = (/1,4,3,2/)                          !*** dim dependant ***!
+    ELSE IF ( (dim1 .EQ. 3) .AND. ( dim2 .eq. 4 ) ) THEN !*** dim dependant ***!
+       recv_order = (/1,2,4,3/)                          !*** dim dependant ***!
+    ELSE
+       IF ( me .EQ. 0 ) THEN
+          WRITE(*, '(a,i4,a,i4,a)') 'pptransp4: Cannot handle case dim1 = ', dim1, ', dim2 = ', dim2, '!'
+          CALL mpi_abort(MPI_COMM_WORLD, -1, ierr)
+       END IF
+    END IF
+!
+    DO i=1,npes
+       istr = offsets(1,ids(i)) + 1  ! Partition a along dimension dim1
+       iend = offsets(1,ids(i)+1)
+       dims = SHAPE(a)
+       dims(dim1) = iend-istr+1
+       scount = PRODUCT(dims)
+
+       IF (dim1 .EQ. 1) THEN                                        !*** dim dependant ***!
+          s_buf(1:scount) = RESHAPE(a(istr:iend,:,:,:), (/scount/)) !*** dim dependant ***!
+       ELSE IF (dim1 .EQ. 2) THEN                                   !*** dim dependant ***!
+          s_buf(1:scount) = RESHAPE(a(:,istr:iend,:,:), (/scount/)) !*** dim dependant ***!
+       ELSE IF (dim1 .EQ. 3) THEN                                   !*** dim dependant ***!
+          s_buf(1:scount) = RESHAPE(a(:,:,istr:iend,:), (/scount/)) !*** dim dependant ***!
+       END IF                                                       !*** dim dependant ***!
+
+       CALL MPI_SENDRECV(s_buf, scount, mpitype, ids(i), i,&
+            & r_buf, bufsiz, mpitype, idr(i), i,&
+            & comm, status, ierr)
+       istr = offsets(2,idr(i)) + 1   ! Partition b along dimension dim1
+       iend = offsets(2,idr(i)+1)
+       dims = SHAPE(b)
+       dims(dim1) = iend-istr+1
+
+       IF (dim1 .EQ. 1) THEN                                          !*** dim dependant ***!
+          b(istr:iend,:,:,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***!
+       ELSE IF (dim1 .EQ. 2) THEN                                     !*** dim dependant ***!
+          b(:,istr:iend,:,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***!
+       ELSE IF (dim1 .EQ. 3) THEN                                     !*** dim dependant ***!
+          b(:,:,istr:iend,:) = RESHAPE(r_buf, dims, order=recv_order) !*** dim dependant ***!
+       END IF                                                         !*** dim dependant ***!
+   END DO
+!----------------------------------------------------------------------
+!              9.   Epilogue
+!
+    DEALLOCATE(ids, idr)
+    DEALLOCATE(ndists, offsets)
+    DEALLOCATE(s_buf, r_buf)
+!
diff --git a/pputils2/pputils2.f90 b/pputils2/pputils2.f90
new file mode 100644
index 0000000..348a14a
--- /dev/null
+++ b/pputils2/pputils2.f90
@@ -0,0 +1,456 @@
+!>
+!> @file pputils2.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE pputils2
+!
+!    PPUTILS2: Some MPI utilities.
+!
+!    T.M. Tran, CRPP-EPFL
+!    September 2010
+!    September 2013: add exchange, norm2
+!    November 2013: add timera, hostlist
+!
+  USE iso_fortran_env, ONLY : rkind => real64
+  USE mpi
+  IMPLICIT NONE
+  PRIVATE
+  PUBLIC :: pptransp, dist1d, exchange, ppnorm2, timera, hostlist
+!
+  INTERFACE pptransp
+     MODULE PROCEDURE pptransp2_r, pptransp3_r, pptransp4_r
+     MODULE PROCEDURE pptransp2_c, pptransp3_c, pptransp4_c
+  END INTERFACE
+  INTERFACE exchange
+     MODULE PROCEDURE exchange_2d, exchange_2d_new
+  END INTERFACE exchange
+  INTERFACE ppnorm2
+     MODULE PROCEDURE norm2_para_2d
+  END INTERFACE ppnorm2
+!
+CONTAINS
+!=======================================================================
+  SUBROUTINE pptransp2_r(comm, a, b)
+!
+! Handles double precision-type matrices.
+!
+! Transpose of rank 2 matrix A:
+!    A(n1,n2/P) -> B(n2,n1/P)
+!
+    INTEGER, INTENT(in)                      :: comm
+    REAL(rkind), DIMENSION(:,:), INTENT(in)  :: a               !*** dim dependant ***!
+    REAL(rkind), DIMENSION(:,:), INTENT(out) :: b              !*** dim dependant ***!
+    INTEGER, PARAMETER :: lastdim = 2, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***!
+    REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
+!
+    INCLUDE 'pptransp2.tpl'
+!
+  END SUBROUTINE pptransp2_r
+!======================================================================= 
+  SUBROUTINE pptransp2_c(comm, a, b)
+!
+! Same as pptransp2_r, but for double complex-type matrices.
+!
+    INTEGER, INTENT(in)                         :: comm
+    COMPLEX(rkind), DIMENSION(:,:), INTENT(in)  :: a               !*** dim dependant ***!
+    COMPLEX(rkind), DIMENSION(:,:), INTENT(out) :: b              !*** dim dependant ***!
+    INTEGER, PARAMETER :: lastdim = 2, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***!
+    COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
+!
+    INCLUDE 'pptransp2.tpl'
+!
+  END SUBROUTINE pptransp2_c
+!=======================================================================
+  SUBROUTINE pptransp3_r(comm, a, b, dim1, dim2)
+!
+! Handles double precision-type matrices.
+!
+! Transpose dimensions dim1 and dim2 of rank 3 matrix A. 
+! dim1 and dim2 are such that 1 <= dim1 < dim2 <= 3.
+! At input, matrix A is partitioned along dimension dim2 of matrix A.
+! At exit, B = transpose(A), and B is partitioned along dimension dim1 of matrix A.
+!
+! For example:
+!    dim1 = 1, dim2 = 2 : A(n1,n2/P,n3) -> B(n2,n1/P,n3)
+!    dim1 = 1, dim2 = 3 : A(n1,n2,n3/P) -> B(n3,n2,n1/P)
+!    dim1 = 2, dim2 = 3 : A(n1,n2,n3/P) -> B(n1,n3,n2/P)
+!
+    INTEGER, INTENT(in)                        :: comm
+    REAL(rkind), DIMENSION(:,:,:), INTENT(in)  :: a             !*** dim dependant ***!
+    REAL(rkind), DIMENSION(:,:,:), INTENT(out) :: b            !*** dim dependant ***!
+    INTEGER, INTENT(in) :: dim1, dim2
+    INTEGER, PARAMETER :: lastdim = 3, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***!
+    REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
+    INTEGER :: recv_order(lastdim)
+ ! 
+    INCLUDE 'pptransp3.tpl'
+ ! 
+  END SUBROUTINE pptransp3_r
+!=======================================================================
+  SUBROUTINE pptransp3_c(comm, a, b, dim1, dim2)
+!
+! Same as pptransp3_r, but for double complex-type matrices.
+!
+    INTEGER, INTENT(in)                           :: comm
+    COMPLEX(rkind), DIMENSION(:,:,:), INTENT(in)  :: a             !*** dim dependant ***!
+    COMPLEX(rkind), DIMENSION(:,:,:), INTENT(out) :: b            !*** dim dependant ***!
+    INTEGER, INTENT(in) :: dim1, dim2
+    INTEGER, PARAMETER :: lastdim = 3, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***!
+    COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
+    INTEGER :: recv_order(lastdim)
+ ! 
+    INCLUDE 'pptransp3.tpl'
+ ! 
+  END SUBROUTINE pptransp3_c
+!=======================================================================
+  SUBROUTINE pptransp4_r(comm, a, b, dim1, dim2)
+!
+! Handles double precision-type matrices.
+!
+! Transpose dimensions dim1 and dim2 of rank 4 matrix A. 
+! dim1 and dim2 are such that 1 <= dim1 < dim2 <= 4.
+! At input, matrix A is partitioned along dimension dim2 of matrix A.
+! At exit, B = transpose(A), and B is partitioned along dimension dim1 of matrix A.
+!
+! For example:
+!    dim1 = 1, dim2 = 2 : A(n1,n2/P,n3  ,n4  ) -> B(n2,n1/P,n3  ,n4  )
+!    dim1 = 1, dim2 = 3 : A(n1,n2  ,n3/P,n4  ) -> B(n3,n2  ,n1/P,n4  )
+!    dim1 = 1, dim2 = 4 : A(n1,n2  ,n3  ,n4/P) -> B(n4,n2  ,n3  ,n1/P)
+!    dim1 = 2, dim2 = 3 : A(n1,n2  ,n3/P,n4  ) -> B(n1,n3  ,n2/P,n4  )
+!    dim1 = 2, dim2 = 4 : A(n1,n2  ,n3  ,n4/P) -> B(n1,n4  ,n3  ,n2/P)
+!    dim1 = 3, dim2 = 4 : A(n1,n2  ,n3  ,n4/P) -> B(n1,n2  ,n4  ,n3/P)
+!
+    INTEGER, INTENT(in)                          :: comm
+    REAL(rkind), DIMENSION(:,:,:,:), INTENT(in ) :: a           !*** dim dependant ***!
+    REAL(rkind), DIMENSION(:,:,:,:), INTENT(out) :: b          !*** dim dependant ***!
+    INTEGER, INTENT(in) :: dim1, dim2
+    INTEGER, PARAMETER :: lastdim = 4, mpitype=MPI_DOUBLE_PRECISION !*** dim dependant ***!
+    REAL(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
+    INTEGER :: recv_order(lastdim)
+!
+    INCLUDE 'pptransp4.tpl'
+!
+  END SUBROUTINE pptransp4_r
+!=======================================================================
+  SUBROUTINE pptransp4_c(comm, a, b, dim1, dim2)
+!
+! Same as pptransp4_r, but for double complex-type matrices
+!
+    INTEGER, INTENT(in)                             :: comm
+    COMPLEX(rkind), DIMENSION(:,:,:,:), INTENT(in)  :: a           !*** dim dependant ***!
+    COMPLEX(rkind), DIMENSION(:,:,:,:), INTENT(out) :: b          !*** dim dependant ***!
+    INTEGER, INTENT(in)                             :: dim1, dim2
+    INTEGER, PARAMETER :: lastdim = 4, mpitype=MPI_DOUBLE_COMPLEX !*** dim dependant ***!
+    COMPLEX(rkind), DIMENSION(:), ALLOCATABLE :: s_buf, r_buf
+    INTEGER :: recv_order(lastdim)
+!
+    INCLUDE 'pptransp4.tpl'
+!
+  END SUBROUTINE pptransp4_c
+!=======================================================================
+  SUBROUTINE dist1d(comm, s0, ntot, s, nloc)
+!
+!  1d distribute ntot elements, returns offset s and local number of 
+!  elements nloc.
+! 
+    INTEGER, INTENT(in)  :: s0, ntot
+    INTEGER, INTENT(out) :: s, nloc
+    INTEGER :: comm, me, npes, ierr, naver, rem
+!
+    CALL MPI_COMM_SIZE(comm, npes, ierr)
+    CALL MPI_COMM_RANK(comm, me, ierr)
+    naver = ntot/npes
+    rem = MODULO(ntot,npes)
+    s = s0 + MIN(rem,me) + me*naver
+    nloc = naver
+    IF( me.LT.rem ) nloc = nloc+1
+!
+  END SUBROUTINE dist1d
+!=======================================================================
+  SUBROUTINE exchange_2d_new(comm, u, garea)
+!
+!  Exhange ghost cells with (west,east,south,north) neighbors.
+!  Assume same ghost cells on each dimension:
+!    garea(1) : number of ghost cells on west and east boundaries
+!    garea(2) : number of ghost cells on south and north boundaries
+!  Both are equal to 1 by default.
+!
+    INTEGER, INTENT(in)                     :: comm
+    REAL(rkind), ALLOCATABLE, INTENT(inout) :: u(:,:)
+    INTEGER, OPTIONAL, INTENT(in)           :: garea(2)
+    INTEGER :: neighs(4), ierr
+!
+    CALL mpi_cart_shift(comm, 0, 1, neighs(1), neighs(2), ierr)
+    CALL mpi_cart_shift(comm, 1, 1, neighs(3), neighs(4), ierr)
+    CALL exchange_2d(comm, neighs, u, garea)
+  END SUBROUTINE exchange_2d_new
+!=======================================================================
+  SUBROUTINE exchange_2d(comm, neighs, u, garea)
+!
+!  Exhange ghost cells with (west,east,south,north) neighbors.
+!  Assume same ghost cells on each dimension:
+!    garea(1) : number of ghost cells on west and east boundaries
+!    garea(2) : number of ghost cells on south and north boundaries
+!  Both are equal to 1 by default.
+!
+    INTEGER, INTENT(in)                     :: comm
+    INTEGER, INTENT(in)                     :: neighs(4)
+    REAL(rkind), ALLOCATABLE, INTENT(inout) :: u(:,:)
+    INTEGER, OPTIONAL, INTENT(in)           :: garea(2)
+!
+    INTEGER                  :: cols, rows
+    INTEGER                  :: ierr
+    INTEGER, PARAMETER       :: ndim=2
+    INTEGER, DIMENSION(ndim) :: g, lb, ub, s, e, n
+!
+    g = [1,1]
+    IF(PRESENT(garea)) g = garea
+    lb = LBOUND(u)
+    ub = UBOUND(u)
+    s = lb + g
+    e = ub - g
+    n = ub - lb + 1   ! include ghost cells
+!
+!   g(2) matrix full rows with stride n(1)
+    CALL mpi_type_vector(n(2), g(2), n(1), MPI_DOUBLE_PRECISION, rows, ierr)
+    CALL mpi_type_commit(rows, ierr)
+!
+!   g(1) contiguous matrix full columns
+    CALL mpi_type_contiguous(n(1)*g(1), MPI_DOUBLE_PRECISION,  cols, ierr)
+    CALL mpi_type_commit(cols, ierr)
+!
+!  Exchange along first dimension
+    CALL mpi_sendrecv(u(s(1),  lb(2)), 1, rows, neighs(1), 0, &
+         &            u(e(1)+1,lb(2)), 1, rows, neighs(2), 0, &
+         &                                  comm, MPI_STATUS_IGNORE, ierr)
+    CALL mpi_sendrecv(u(e(1)-g(1)+1,lb(2)), 1, rows, neighs(2), 0, &
+         &            u(lb(1),      lb(2)), 1, rows, neighs(1), 0, &
+         &                                  comm, MPI_STATUS_IGNORE, ierr)
+!
+!  Exchange along second dimension
+    CALL mpi_sendrecv(u(lb(1),s(2)),   1, cols, neighs(3), 0, &
+         &            u(lb(1),e(2)+1), 1, cols, neighs(4), 0, &
+         &                                  comm, MPI_STATUS_IGNORE, ierr)
+    CALL mpi_sendrecv(u(lb(1),e(2)-g(2)+1),   1, cols, neighs(4), 0, &
+         &            u(lb(1),lb(2)),         1, cols, neighs(3), 0, &
+         &                                  comm, MPI_STATUS_IGNORE, ierr)
+  END SUBROUTINE exchange_2d
+!=======================================================================
+  FUNCTION norm2_para_2d(x, comm, root, garea) RESULT(res)
+!
+!  Vector norm of 2d distributed array with ghost cells
+!
+    USE mpi
+    REAL(rkind), ALLOCATABLE, INTENT(in) :: x(:,:)
+    INTEGER, INTENT(in)                  :: comm
+    INTEGER, INTENT(in), OPTIONAL        :: root
+    INTEGER, INTENT(in), OPTIONAL        :: garea(:)
+    REAL(rkind)                          :: res
+    INTEGER, PARAMETER       :: ndim=2
+    INTEGER, DIMENSION(ndim) :: g, s, e
+    REAL(rkind)              :: res_loc
+    INTEGER                  :: r, me, ierr
+!
+    CALL mpi_comm_rank(comm, me, ierr)
+    g = [1,1]
+    IF(PRESENT(garea)) g = garea
+    r = 0
+    IF(PRESENT(root)) r = root
+    s = LBOUND(x) + g
+    e = UBOUND(x) - g
+    res_loc = SUM(x(s(1):e(1),s(2):e(2))**2)
+    CALL mpi_reduce(res_loc, res, 1, MPI_DOUBLE_PRECISION, MPI_SUM, r, comm, ierr)
+    if(me.eq.r) res = SQRT(res)    
+  END FUNCTION norm2_para_2d
+!=======================================================================
+  SUBROUTINE timera(cntrl, str, eltime, comm)
+!
+!   Timers (cntrl=0/1 to Init/Update)
+!
+    USE mpi
+    INTEGER, INTENT(in)                     :: cntrl
+    CHARACTER(len=*), INTENT(in)            :: str
+    DOUBLE PRECISION, OPTIONAL, INTENT(out) :: eltime
+    INTEGER, OPTIONAL, INTENT(in)           :: comm
+!
+    INTEGER, PARAMETER :: ncmax=128, maxlen=32
+!
+    INTEGER, SAVE                            :: icall=0, nc=0
+    DOUBLE PRECISION, SAVE                   :: startt0=0.0
+    DOUBLE PRECISION, DIMENSION(ncmax), SAVE :: startt = 0.0, endt = 0.0
+    CHARACTER(len=maxlen), SAVE              :: which(ncmax)
+!
+    DOUBLE PRECISION, DIMENSION(ncmax) :: endtmin, endtmax
+    INTEGER :: comm0, me, lstr, found, i, ierr
+!________________________________________________________________________________
+    IF(PRESENT(comm)) THEN
+       comm0 = comm
+    ELSE
+       comm0 = MPI_COMM_WORLD
+    END IF
+    CALL mpi_comm_rank(comm0, me, ierr)
+    CALL mpi_barrier(comm0, ierr)
+!________________________________________________________________________________
+!
+    IF( icall .EQ. 0 ) THEN
+       icall = icall+1
+       startt0 = mpi_wtime()
+    END IF
+
+    lstr = MIN(LEN_TRIM(str),maxlen)
+    IF( lstr .GT. 0 ) found = loc(str)
+!________________________________________________________________________________
+!
+    SELECT CASE (cntrl)
+!
+    CASE(-1)    !  Current wall time
+       IF( PRESENT(eltime) ) THEN
+          eltime = mpi_wtime() - startt0
+       ELSE IF (me .EQ. 0 ) THEN
+          WRITE(*,'(/a,a,1pe10.3/)') "++ ", ' Wall time used so far = ', &
+               &                     mpi_wtime() - startt0
+       END IF
+!
+    CASE(0)    !  Init Timer
+       IF( found .EQ. 0 ) THEN  !  Called for the 1st time for 'str'
+          nc = nc+1
+          which(nc) = str(1:lstr)
+          found = nc
+       END IF
+       startt(found) = mpi_wtime()
+!
+    CASE(1)   !  Update timer
+       endt(found) = mpi_wtime() - startt(found)
+       IF( PRESENT(eltime) ) THEN
+          eltime = endt(found)
+       ELSE IF (me .EQ. 0 ) THEN
+          WRITE(*,'(/a,a,1pe10.3/)') "++ "//str, ' wall clock time = ', &
+               &                     endt(found)
+       END IF
+!
+    CASE(2)   !  Update and reset timer
+       endt(found) = endt(found) + mpi_wtime() - startt(found)
+       startt(found) = mpi_wtime()
+       IF( PRESENT(eltime) ) THEN
+          eltime = endt(found)
+       END IF
+!
+    CASE(9)   !  Display all timers
+       IF( nc .GT. 0 ) THEN
+          CALL mpi_reduce(endt, endtmin, nc, MPI_DOUBLE_PRECISION, MPI_MIN, 0, comm0, ierr)
+          CALL mpi_reduce(endt, endtmax, nc, MPI_DOUBLE_PRECISION, MPI_MAX, 0, comm0, ierr)
+          IF( me .EQ. 0 ) THEN
+             WRITE(*,'(a)') "Minmax Timer Summary"
+             WRITE(*,'(a)') "===================="
+             DO i=1,nc
+                WRITE(*,'(a20,2x,2(1pe12.3))') TRIM(which(i))//":", endtmin(i), endtmax(i)
+             END DO
+          END IF
+       END IF
+!
+    END SELECT
+!
+  CONTAINS
+    INTEGER FUNCTION loc(str)
+      CHARACTER(len=*), INTENT(in) :: str
+      INTEGER :: i, ind
+      loc = 0
+      DO i=1,nc
+         ind = INDEX(which(i), str(1:lstr))
+         IF( ind .GT. 0 .AND. LEN_TRIM(which(i)) .EQ. lstr ) THEN
+            loc = i
+            EXIT
+         END IF
+      END DO
+    END FUNCTION loc
+  END SUBROUTINE timera
+!=======================================================================
+  SUBROUTINE hostlist(comm)
+!
+!   Print list of hostnames in comm
+!
+    USE mpi
+    INTEGER, OPTIONAL, INTENT(in) :: comm
+!
+    CHARACTER(len=MPI_MAX_PROCESSOR_NAME) :: procname
+    CHARACTER(len=MPI_MAX_PROCESSOR_NAME), ALLOCATABLE :: procnames(:)
+    INTEGER :: comm0, me, nprocs, ierr, i, l
+!
+    IF(PRESENT(comm)) THEN
+       comm0 = comm
+    ELSE
+       comm0 = mpi_comm_world
+    END IF
+    CALL MPI_COMM_RANK(comm0, me, ierr)
+    CALL MPI_COMM_SIZE(comm0, nprocs, ierr)
+    CALL MPI_GET_PROCESSOR_NAME(procname, l, ierr)
+    ALLOCATE(procnames(0:nprocs-1))
+    CALL mpi_gather(procname,MPI_MAX_PROCESSOR_NAME,mpi_character, &
+       &          procnames,MPI_MAX_PROCESSOR_NAME,mpi_character,0, &
+       &          comm0,ierr)
+    IF(me.EQ.0) THEN
+       WRITE(*,'(a/(10(1x,a)))') 'Host list:', &
+            &                   (TRIM(procnames(i)),i=0,nprocs-1)
+    END IF
+  END SUBROUTINE hostlist
+!=======================================================================
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!                     Private routines/functions                       !
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  SUBROUTINE partners(comm, ids, idr)
+!
+!   Compute ranks of send and receive procs.
+!
+    IMPLICIT NONE
+    INTEGER, INTENT(in)  :: comm
+    INTEGER, INTENT(out) :: ids(:), idr(:)
+    INTEGER :: me, npes, ierr, i
+!
+    CALL mpi_comm_rank(comm, me, ierr)
+    CALL mpi_comm_size(comm, npes, ierr)
+    IF( ispower2(npes) ) THEN
+       DO i=0,npes-1
+          ids(i+1) = IEOR(me, i)
+          idr(i+1) = ids(i+1)
+      END DO
+    ELSE
+       DO i=0,npes-1
+          ids(i+1) = MODULO(me+i, npes)
+          idr(i+1) = MODULO(me-i, npes)
+       END DO
+    END IF
+  END SUBROUTINE partners
+!======================================================================= 
+  LOGICAL FUNCTION ispower2(n)
+    INTEGER, INTENT(in) :: n
+    INTEGER :: l
+    l=2
+    DO WHILE ( l .LT. n ) 
+       l = 2*l
+    END DO
+    ispower2 = l .EQ. n    
+  END FUNCTION ispower2
+!======================================================================= 
+END MODULE pputils2
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..6c6dac0
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,111 @@
+/**
+ * @file CMakeLists.txt
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+project(bsplines_src)
+
+set(SRCS
+  bsplines.f90
+  matrix.f90
+  sparse_mod.f90
+  lapack_extra.f
+  math_util.f90
+)
+
+set(SRCS_PP
+  conmat.f90
+  )
+
+set(PUBLIC_MODULES
+  bsplines.mod
+  matrix.mod
+  math_util.mod
+  conmat_mod.mod
+  sparse.mod
+  )
+
+if(HAS_PARDISO)
+  list(APPEND ${SRCS_PP} pardiso_mod.f90)
+endif()
+
+set_property(SOURCE conmat.f90 APPEND PROPERTY COMPILE_OPTIONS -DWSMP ${MKL_DEFINITIONS})
+
+if(HAS_MUMPS)
+  list(APPEND SRCS
+    multigrid_mod.f90
+    )
+
+  list(APPEND SRCS_PP
+    mumps_mod.f90
+    csr_mod.f90
+    cds_mod.f90
+  )
+
+  list(APPEND PUBLIC_MODULES
+    cds.mod
+    csr.mod
+    mumps_bsplines.mod)
+
+  set_property(SOURCE conmat.f90 APPEND PROPERTY COMPILE_OPTIONS -DMUMPS)
+endif()
+
+set(_public_headers)
+foreach(_modules ${PUBLIC_MODULES})
+  list(APPEND _public_headers ${CMAKE_CURRENT_BINARY_DIR}/${_modules})
+endforeach()
+set_property(SOURCE ${SRCS_PP} APPEND PROPERTY COMPILE_OPTIONS -cpp)
+
+include(GNUInstallDirs)
+
+add_library(bsplines STATIC ${SRCS} ${SRCS_PP})
+target_include_directories(bsplines
+  PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+          ${MUMPS_INCLUDE_DIR}
+  INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+            $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+  )
+
+set_property(TARGET bsplines
+  PROPERTY PUBLIC_HEADER ${_public_headers})
+
+target_link_libraries(bsplines
+  PUBLIC futils pppack pputils2 fft
+         ${BLAS_LIBRARIES}
+         ${MUMPS_LIBRARIES}
+         ${LAPACK_LIBRARIES}
+  )
+
+
+if(MKL_Fortran_FLAGS)
+  separate_arguments(MKL_Fortran_FLAGS)
+  target_compile_options(bsplines PUBLIC ${MKL_Fortran_FLAGS})
+  target_link_options(bsplines PUBLIC ${MKL_Fortran_FLAGS})
+endif()
+
+install(TARGETS bsplines
+  EXPORT ${BSPLINES_EXPORT_TARGETS}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..229a18e
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,176 @@
+#
+# @file Makefile
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Stephan Brunner <stephan.brunner@epfl.ch>
+# @author Sébastien Jolliet <sebastien.jolliet@epfl.ch>
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+PREFIX=/usr/local/crpp
+FUTILS=$(PREFIX)/futils
+PPPACK=../pppack
+PPUTILS2=../pputils2
+
+MPIF90 = mpif90
+F90 = mpif90
+LD = $(MPIF90)
+
+debug = -g -traceback -check bounds -fpe0 -warn alignments -warn unused
+debug = -g -traceback -check bounds -fpe0 -warn alignments
+optim = -O3 -xHOST
+
+#OPT=$(debug)
+OPT=$(optim)
+
+F90FLAGS = $(OPT) -fPIC -I. -I$(FUTILS)/include
+
+CC = cc
+CFLAGS = -O2
+
+SPL_OBJS = bsplines.o matrix.o sparse_mod.o pardiso_mod.o \
+           lapack_extra.o conmat.o math_util.o
+
+ifdef MKL
+SPBLAS = -DMKL
+endif
+
+ifdef MUMPS
+SPL_OBJS += mumps_mod.o csr_mod.o cds_mod.o multigrid_mod.o
+F90FLAGS += -I$(MUMPS)/include
+endif
+
+ifdef WSMP
+SPL_OBJS += wsmp_mod.o pwsmp_mod.o
+endif
+
+ifdef PETSC_DIR
+SPL_OBJS += petsc_mod.o
+FCCPFLAGS = -I$(PETSC_DIR)/include -I$(PETSC_DIR)/$(PETSC_ARCH)/include
+endif
+
+.SUFFIXES:
+.SUFFIXES: .o .c .f90 .f .F90
+
+.f90.o:
+	$(MPIF90) $(F90FLAGS) -c $<
+.F90.o:
+	$(MPIF90) $(F90FLAGS) $(FCCPFLAGS) -c $<
+.f.o:
+	$(F90) $(F90FLAGS) -c $<
+
+SUBDIRS = pputils2 pppack fft
+subdirs: $(SUBDIRS)
+.PHONY: subdirs $(SUBSDIRS) $(PPUTILS2)
+
+$(SUBDIRS):
+	$(MAKE) "OPT=$(OPT)" -C ../$@ lib
+
+lib:	subdirs libbsplines.a
+	cp -p $(PPPACK)/libpppack.a ./
+	touch lib
+	cp -p lib ../examples
+
+libbsplines.a: $(SPL_OBJS)
+	xiar r $@ $?
+	ranlib $@
+
+debug:
+	make clean
+	make "OPT=$(debug)" lib
+	mkdir -p .g
+	cp -p libbsplines.a $(PPPACK)/libpppack.a *.mod .g/
+
+opt:
+	make clean
+	make "OPT=$(optim)" lib
+	mkdir -p $(PREFIX)/{lib,include}/O
+	mkdir -p .O
+	cp -p libbsplines.a $(PPPACK)/libpppack.a *.mod .O/
+
+install: debug opt
+	mkdir -p $(PREFIX)/{lib,include}/g
+	mv .g/*.a $(PREFIX)/lib/g/
+	mv .g/*.mod $(PREFIX)/include/g/
+	mkdir -p $(PREFIX)/{lib,include}/O
+	mv .O/*.a $(PREFIX)/lib/O/
+	mv .O/*.mod $(PREFIX)/include/O/
+
+
+uninstall:
+	rm -f $(PREFIX)/include/{O,g}/bsplines.mod \
+              $(PREFIX)/include/{O,g}/cds.mod \
+              $(PREFIX)/include/{O,g}/conmat_mod.mod \
+              $(PREFIX)/include/{O,g}/csr.mod \
+              $(PREFIX)/include/{O,g}/math_util.mod \
+              $(PREFIX)/include/{O,g}/matrix.mod \
+              $(PREFIX)/include/{O,g}/multigrid.mod \
+              $(PREFIX)/include/{O,g}/mumps_bsplines.mod \
+              $(PREFIX)/include/{O,g}/pardiso_bsplines.mod \
+              $(PREFIX)/include/{O,g}/petsc_bsplines.mod \
+              $(PREFIX)/include/{O,g}/sparse.mod \
+              $(PREFIX)/include/{O,g}/wsmp_bsplines.mod \
+              $(PREFIX)/lib/{O,g}/libbsplines.a \
+              $(PREFIX)/lib/{O,g}/libpppack.a
+
+matrix.o: matrix.f90
+sparse_mod.o: sparse_mod.f90
+bsplines.o: bsplines.f90 matrix.o
+multigrid_mod.o: bsplines.o matrix.o conmat.o csr_mod.o cds_mod.o
+
+conmat.o: conmat.f90 conmat.tpl conmat_1d.tpl zconmat.tpl zconmat_1d.tpl conrhs.tpl
+	    $(F90) -fpp -DMKL -DWSMP -DMUMPS $(F90FLAGS) -c conmat.f90
+
+cds_mod.o: cds_mod.f90
+	    $(F90) -fpp $(SPBLAS) $(F90FLAGS) -c cds_mod.f90
+
+pardiso_mod.o: pardiso_mod.f90 sparse_mod.o psum_mat.tpl p2p_mat.tpl
+	    $(F90) -fpp $(SPBLAS) $(F90FLAGS) -c pardiso_mod.f90
+
+mumps_mod.o:mumps_mod.f90  sparse_mod.o psum_mat.tpl p2p_mat.tpl
+	    $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c mumps_mod.f90
+
+wsmp_mod.o: wsmp_mod.f90 sparse_mod.o psum_mat.tpl p2p_mat.tpl
+	    $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c wsmp_mod.f90
+
+pwsmp_mod.o: pwsmp_mod.f90 sparse_mod.o wsmp_mod.o psum_mat.tpl p2p_mat.tpl
+	$(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c pwsmp_mod.f90
+
+petsc_mod.o: petsc_mod.F90 sparse_mod.o
+	$(F90) -fpp $(FCCPFLAGS) $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c petsc_mod.F90
+
+csr_mod.o: csr_mod.f90 sparse_mod.o mumps_mod.o
+	    $(F90) -fpp $(SPBLAS) $(F90FLAGS) -I$(PPUTILS2) -c csr_mod.f90
+
+tags:
+	etags *.f *.f90 $(PPPACK)/*.f90
+
+clean:
+	$(MAKE) -C $(PPPACK) clean
+	$(MAKE) -C ../fft clean
+	rm -f *.o *.mod *~ a.out
+
+distclean: clean
+	$(MAKE) -C $(PPPACK) distclean
+	$(MAKE) -C ../fft distclean
+	$(MAKE) -C $(PPUTILS2) distclean
+	rm -f lib *.a *.mod ../bin/*
+	rm -rf .O .g
diff --git a/src/bsplines.f90 b/src/bsplines.f90
new file mode 100644
index 0000000..1feff79
--- /dev/null
+++ b/src/bsplines.f90
@@ -0,0 +1,4285 @@
+!>
+!> @file bsplines.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Stephan Brunner <stephan.brunner@epfl.ch>
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE bsplines
+!
+!    BSPLINES: A module to construct B-Splines of any order on
+!              non-equidistant mesh. Can be used for interpolation and
+!              Finite Element discretization.
+!
+!    T.M. Tran, S. Brunner, CRPP-EPFL
+!    February 2007
+!
+  USE matrix
+  IMPLICIT NONE
+  PRIVATE
+  PUBLIC :: spline1d, set_spline, get_dim, get_gauss, gridval
+  PUBLIC :: spline2d, spline2d1d, def_knots, allsplines
+  PUBLIC :: set_splcoef, get_splcoef
+  PUBLIC :: fintg, calc_integ, destroy_sp
+  PUBLIC :: gauleg, CompMassMatrix
+  PUBLIC :: basfun_recur, basfun, def_basfun, is_equid, locintv_old, locintv
+  PUBLIC :: calc_fftmass, calc_fftmass_old
+  PUBLIC :: init_dft, ft_basfun
+  PUBLIC :: getgrad
+  PUBLIC :: dftmap
+!
+  DOUBLE PRECISION, PARAMETER :: pi=3.1415926535897931d0
+!
+  TYPE dftmap
+     INTEGER :: n                                    ! Total number of modes
+     INTEGER :: kmin, kmax                           ! Define the Foyrier window
+     INTEGER :: dk                                   ! Number of modes in window
+     DOUBLE PRECISION :: dx                          ! Interval in real space
+     INTEGER, POINTER :: mode_couplings(:) => NULL() ! Table of mode couplings
+     DOUBLE COMPLEX, POINTER :: coefs(:,:) => NULL() ! The restricted Fourier coefs
+  END TYPE dftmap
+!
+  TYPE spline1d
+     INTEGER :: order                 ! Spline order = spline degree + 1
+     INTEGER :: nints                 ! Number of knot intervals
+     INTEGER :: nsites                ! Number of interpolation sites
+     INTEGER :: dim                   ! Dimension of spline space
+     INTEGER :: left=0                ! Save value used by routine LOCATE
+     LOGICAL :: period                ! is the grid periodic ?
+     LOGICAL :: nlequid               ! is the grid equidistant ?
+     LOGICAL :: nlppform              ! Construct and build PPFORM in GRIDVAL if .TRUE.
+     DOUBLE PRECISION :: lperiod      ! Periodicity
+     DOUBLE PRECISION :: hinv         ! Inverse of mesh size, when nlequid=T
+     INTEGER, POINTER ::&
+          & fmap(:) => NULL()         ! Mapping of fine to coarse mesh
+     DOUBLE PRECISION, POINTER :: &
+          & knots(:) => NULL(), &     ! Spline knots (-p:n+p)
+          & val0(:,:,:) => NULL(), &  ! Values and deriv. at left boundary of all splines 
+          & valc(:,:) => NULL(), &    ! Values and deriv. at left boundary of equid. per. splines 
+          & gausx(:,:) => NULL(), &   ! Gauss abscissas
+          & gausw(:,:) => NULL(), &   ! Gauss weights
+          & intspl(:) => NULL(), &    ! Integral of splines
+          & ppform(:,:) => NULL(), &  ! PPFORM coefs
+          & bcoefs(:) => NULL()       ! Spline coefs
+     DOUBLE COMPLEX, POINTER :: &
+          & ppformz(:,:) => NULL(), & ! PPFORM coefs for complex function
+          & bcoefsc(:) => NULL()      ! Spline coefs for complex function
+     TYPE(GBMAT) :: mat               ! Interpolation matrix
+     TYPE(periodic_mat) :: matp       ! Interpolation matrix (periodic case)
+     TYPE(dftmap) :: dft                ! Define DFT mapping
+  END TYPE spline1d
+!
+  TYPE spline2d
+     TYPE(spline1d) :: sp1                                  ! Spline in direction 1
+     TYPE(spline1d) :: sp2                                  ! Spline in direction 2
+     DOUBLE PRECISION, POINTER :: ppform(:,:,:,:) => NULL() ! 2d PPFORM coefs
+     DOUBLE PRECISION, POINTER :: bcoefs(:,:) => NULL()     ! Spline coefs
+     DOUBLE COMPLEX, POINTER :: ppformz(:,:,:,:) => NULL()  ! PPFORM coefs for complex function
+     DOUBLE COMPLEX, POINTER :: bcoefsc(:,:) => NULL()      ! Spline coefs for complex function
+  END TYPE spline2d
+!
+  TYPE spline2d1d
+     TYPE(spline2d) :: sp12  ! 2D spline for dir. 1 and 2
+     TYPE(spline1d) :: sp3   ! 1D spline for dir. 3
+     DOUBLE PRECISION, POINTER :: ppform(:,:,:,:,:,:) => NULL() ! PPFORM coefs
+     DOUBLE PRECISION, POINTER :: bcoefs(:,:,:) => NULL()       ! Spline coefs
+     DOUBLE PRECISION, POINTER :: ppformz(:,:,:,:,:,:) => NULL()! PPFORM coefs for complex function
+     DOUBLE COMPLEX, POINTER :: bcoefsc(:,:,:) => NULL()        ! Spline coefs for complex function
+  END TYPE spline2d1d
+!
+  INTERFACE set_spline
+     MODULE PROCEDURE set_spline1d, set_spline2d, set_spline2d1d
+  END INTERFACE
+  INTERFACE get_dim
+     MODULE PROCEDURE get_dim1, get_dim2
+  END INTERFACE
+  INTERFACE gridval
+     MODULE PROCEDURE gridval1d, gridval1dz, &
+          &           gridval2d, gridval2dz, &
+          &           gridval2d_1d, gridval2d_1dz, &
+          &           gridval2d_2d, gridval2d_2dz, &
+          &           gridval2d1d_3d, gridval2d1d_1d 
+  END INTERFACE
+  INTERFACE set_splcoef
+     MODULE PROCEDURE set_splcoef1d, set_splcoef2d
+  END INTERFACE
+  INTERFACE get_splcoef
+     MODULE PROCEDURE get_splcoef1, get_splcoef1z, get_splcoefn, &
+          &           get_splcoef2d, get_splcoef2dz
+  END INTERFACE
+  INTERFACE fintg
+     MODULE PROCEDURE fintg1, fintg2
+  END INTERFACE
+  INTERFACE destroy_sp
+     MODULE PROCEDURE destroy_sp1d, destroy_sp2d, destroy_sp2d1d
+  END INTERFACE
+  INTERFACE CompMassMatrix
+     MODULE PROCEDURE CompMassMatrix1, CompMassMatrix_gb, CompMassMatrix_zgb
+  END INTERFACE
+  INTERFACE calc_integ
+     MODULE PROCEDURE calc_integ0,calc_integn 
+  END INTERFACE
+  INTERFACE locintv
+     MODULE PROCEDURE locintv0, locintv1
+  END INTERFACE locintv
+  INTERFACE locintv_old
+     MODULE PROCEDURE locintv0_old, locintv1_old
+  END INTERFACE locintv_old
+  INTERFACE ppval
+     MODULE PROCEDURE ppval0,  ppval1,  ppval2, &
+          &           ppval0_n, &
+          &           ppval0z, ppval1z, ppval2z, &
+          &           ppval0z_n
+  END INTERFACE ppval
+  INTERFACE basfun
+     MODULE PROCEDURE basfun0, basfun1
+  END INTERFACE basfun
+  INTERFACE ft_basfun
+     MODULE PROCEDURE ft_basfun0, ft_basfun1
+  END INTERFACE ft_basfun
+  INTERFACE def_basfun
+     MODULE PROCEDURE def_basfun0, def_basfun1
+  END INTERFACE def_basfun
+  INTERFACE getgrad
+     MODULE PROCEDURE getgradr, getgradz
+  END INTERFACE getgrad
+!
+CONTAINS
+!===========================================================================
+  SUBROUTINE set_spline1d(p, ngauss, grid, sp, period, nlppform, nlequid)
+!
+!  Setup a spline
+!
+    INTEGER, INTENT(in) :: p, ngauss
+    DOUBLE PRECISION, INTENT(in)  :: grid(:)
+    TYPE(spline1d), INTENT(out)   :: sp
+    LOGICAL, OPTIONAL, INTENT(in) :: period
+    LOGICAL, OPTIONAL, INTENT(in) :: nlppform
+    LOGICAL, OPTIONAL, INTENT(in) :: nlequid
+!
+    DOUBLE COMPLEX :: zc
+    DOUBLE PRECISION :: leng, xp, factinv, h
+    INTEGER :: order, nints, i, k
+    DOUBLE PRECISION :: temp(1:p+1,0:p)
+!
+!  Order of splines
+    order = p+1
+    sp%order = order
+!
+!  Dimension of spline space
+    nints = SIZE(grid)-1
+    sp%nints = nints
+    sp%period = .FALSE.
+    IF( PRESENT(period) ) THEN 
+       sp%period = period
+       sp%lperiod = grid(nints+1) - grid(1)
+    END IF
+    sp%dim = nints+p
+!
+!  Use or not PPFORM
+    sp%nlppform = .TRUE.
+    IF( PRESENT(nlppform) ) THEN
+       sp%nlppform = nlppform
+    END IF
+!
+!  Determine sequence of knots
+    IF( ASSOCIATED(sp%knots) ) DEALLOCATE(sp%knots)
+    ALLOCATE( sp%knots(-p:nints+p) )
+    sp%knots(0:nints) = grid(:)
+!
+!  Is the grid equidistant ?
+    IF( PRESENT(nlequid) ) THEN
+       sp%nlequid = nlequid
+    ELSE
+       sp%nlequid = is_equid(grid)
+    END IF
+!
+!  Coarse to fine mesh mapping for non-equidistant mesh
+    IF(sp%nlequid) THEN
+       sp%hinv = 1.0d0/(sp%knots(1)-sp%knots(0))
+    ELSE
+       IF(ASSOCIATED(sp%fmap)) DEALLOCATE(sp%fmap)
+       CALL create_fine(sp%knots(0:nints), h, sp%fmap)
+       sp%hinv = 1.0d0/h
+    END IF
+!
+!  Extend knots at both sides of given grid points
+    IF( sp%period ) THEN
+       leng = sp%knots(nints) - sp%knots(0)
+       DO i=-1,-p,-1
+          sp%knots(i) = sp%knots(nints+i) - leng
+       END DO
+       DO i=1,p
+          sp%knots(nints+i) = sp%knots(i) + leng
+       END DO
+!!$       sp%knots(-p:-1) = sp%knots(nints-p:nints-1) - leng
+!!$       sp%knots(nints+1:nints+p) = leng + sp%knots(1:p)
+    ELSE
+       sp%knots(-p:-1) = sp%knots(0)
+       sp%knots(nints+1:nints+p) = sp%knots(nints)
+    END IF
+!
+!  Precalculated values of all splines and their derivatives at left boundaries 
+    IF( ASSOCIATED(sp%val0) ) DEALLOCATE(sp%val0)
+    ALLOCATE( sp%val0(0:p, p+1, 1:nints) )
+    sp%val0 = 0.0d0
+    DO i=1,nints
+       xp = sp%knots(i-1) + EPSILON(1.0d0)*ABS(sp%knots(i-1))
+       CALL basfun_recur(xp, sp, temp, i)
+       sp%val0(:,:,i) = TRANSPOSE(temp)
+    END DO
+!
+    factinv = 1.0d0
+    DO k=2,p   !   Divide by k! for use in PPFORM_ALT
+       factinv = factinv/k
+       sp%val0(k,:,:) = sp%val0(k,:,:)*factinv
+    END DO
+!
+! Case of periodic equidistant splines (translational invariance)
+    IF(sp%period .AND. sp%nlequid) THEN
+       IF( ASSOCIATED(sp%valc) ) DEALLOCATE(sp%valc)
+       ALLOCATE(sp%valc(0:p, p+1))
+       sp%valc = sp%val0(:,:,1)
+    END IF
+!
+!  Gauss abscissas and weights
+    IF( ngauss .GT. 0 ) THEN
+       IF( ASSOCIATED(sp%gausx) ) DEALLOCATE(sp%gausx)
+       IF( ASSOCIATED(sp%gausw) ) DEALLOCATE(sp%gausw)
+       ALLOCATE(sp%gausx(ngauss,nints))
+       ALLOCATE(sp%gausw(ngauss,nints))
+       DO i=1,nints
+          CALL gauleg(sp%knots(i-1), sp%knots(i),  &
+               &      sp%gausx(1:ngauss,i), sp%gausw(1:ngauss,i), ngauss)
+       END DO
+    END IF
+!
+!  Compute integral of each splines
+    IF( ASSOCIATED(sp%intspl) ) DEALLOCATE(sp%intspl)
+    ALLOCATE(sp%intspl(0:sp%dim-1))
+    CALL calc_integ(sp, sp%intspl)
+!
+  END SUBROUTINE set_spline1d
+!===========================================================================
+  SUBROUTINE init_dft(sp, kmin, kmax, couplings)
+!
+!   Initialize DFT
+!
+    TYPE(spline1d)      :: sp
+    INTEGER, INTENT(in) :: kmin, kmax
+    INTEGER, INTENT(in), OPTIONAL :: couplings(:)
+!
+    INTEGER :: n, p, dk, k, j, nc
+    DOUBLE COMPLEX :: zc
+!
+    n = sp%nints
+    p = sp%order-1
+    dk = kmax-kmin+1
+!
+! Check that -N/2 .LE. Kmin .LE. Kmax .LT. N/2
+!
+    IF(kmin.GT.kmax .OR. kmin.LT.-n/2 .OR. kmax.GE.n/2) THEN
+       WRITE(*,'(a,2i6,a)') 'kmin, kmax =', kmin, kmax, ' erroneous!'
+       STOP
+    END IF
+!
+! The Fourier window
+!
+    sp%dft%n = n
+    sp%dft%kmin = kmin
+    sp%dft%kmax = kmax
+    sp%dft%dk = dk
+    sp%dft%dx = sp%knots(1) - sp%knots(0)
+!
+! Precalculate the DFT coefs  exp( i(2*pi/N)jk ), k=kmin,kmax, j=0,p
+!
+    IF( ASSOCIATED(sp%dft%coefs) ) DEALLOCATE(sp%dft%coefs)
+    ALLOCATE(sp%dft%coefs(kmin:kmax,0:p))
+    zc = EXP( CMPLX(0.0d0, 2.0d0*pi/REAL(n,8),8) )
+    sp%dft%coefs(:,0) = 1.0d0       ! j=0
+    sp%dft%coefs(kmin,1) = zc**kmin ! j=1 
+    DO k=kmin+1,kmax
+       sp%dft%coefs(k,1) = sp%dft%coefs(k-1,1)*zc
+    END DO
+    DO j=2,p
+       sp%dft%coefs(:,j) = sp%dft%coefs(:,1)*sp%dft%coefs(:,j-1)
+    END DO
+!
+!   Mode couplings: by default use the whole window
+!
+    nc = dk
+    IF( PRESENT(couplings)) nc = SIZE(couplings)
+!
+    IF(ASSOCIATED(sp%dft%mode_couplings)) DEALLOCATE(sp%dft%mode_couplings)
+    ALLOCATE(sp%dft%mode_couplings(nc))
+!
+    IF(PRESENT(couplings)) THEN
+       sp%dft%mode_couplings = couplings
+    ELSE
+       sp%dft%mode_couplings = (/ (k,k=kmin,kmax) /)
+    END IF
+  END SUBROUTINE init_dft
+!===========================================================================
+  SUBROUTINE set_spline2d(p, ngauss, grid1, grid2, sp, period, nlppform,&
+       &                  nlequid)
+!
+!  Setup a 2d spline
+!
+    INTEGER, INTENT(in) :: p(2), ngauss(2)
+    DOUBLE PRECISION, INTENT(in)  :: grid1(:)
+    DOUBLE PRECISION, INTENT(in)  :: grid2(:)
+    TYPE(spline2d), INTENT(out)   :: sp
+    LOGICAL, OPTIONAL, INTENT(in) :: period(2)
+    LOGICAL, OPTIONAL, INTENT(in) :: nlppform
+    LOGICAL, OPTIONAL, INTENT(in) :: nlequid(2)
+!
+    IF(PRESENT(period).AND.PRESENT(nlppform).AND.PRESENT(nlequid)) THEN
+       CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1), nlppform, &
+            &            nlequid(1))
+       CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2), nlppform, &
+            &            nlequid(2))
+    ELSE IF(PRESENT(period).AND.PRESENT(nlppform)) THEN
+       CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1), nlppform)
+       CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2), nlppform)
+    ELSE IF(PRESENT(period).AND.PRESENT(nlequid)) THEN
+       CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period=period(1), nlequid=nlequid(1))
+       CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period=period(2), nlequid=nlequid(2))
+    ELSE IF(PRESENT(nlppform).AND.PRESENT(nlequid)) THEN
+       CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlppform=nlppform, nlequid=nlequid(1))
+       CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlppform=nlppform, nlequid=nlequid(2))
+    ELSE IF(PRESENT(period)) THEN
+       CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, period(1))
+       CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, period(2))
+    ELSE IF(PRESENT(nlppform)) THEN
+       CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlppform=nlppform)
+       CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlppform=nlppform)
+    ELSE IF(PRESENT(nlequid)) THEN
+       CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1, nlequid=nlequid(1))
+       CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2, nlequid=nlequid(2))
+    ELSE
+       CALL set_spline1d(p(1), ngauss(1), grid1, sp%sp1)
+       CALL set_spline1d(p(2), ngauss(2), grid2, sp%sp2)
+    END IF
+  END SUBROUTINE set_spline2d
+!===========================================================================
+  SUBROUTINE set_spline2d1d(p, ngauss, grid1, grid2, grid3, sp, period, &
+       &                    nlppform, nlequid)
+!
+!  Setup a 2d1d spline (for axisymmetric problems)
+!
+    INTEGER, INTENT(in) :: p(3), ngauss(3)
+    DOUBLE PRECISION, INTENT(in)  :: grid1(:)
+    DOUBLE PRECISION, INTENT(in)  :: grid2(:)
+    DOUBLE PRECISION, INTENT(in)  :: grid3(:)
+    TYPE(spline2d1d), INTENT(out) :: sp
+    LOGICAL, OPTIONAL, INTENT(in) :: period(3)
+    LOGICAL, OPTIONAL, INTENT(in) :: nlppform
+    LOGICAL, OPTIONAL, INTENT(in) :: nlequid(3)
+!
+    IF(PRESENT(period).AND.PRESENT(nlppform).AND.PRESENT(nlequid)) THEN
+       CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2),&
+            &            nlppform, nlequid(1:2))
+       CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3), nlppform, &
+            &            nlequid(3))
+    ELSE IF(PRESENT(period).AND.PRESENT(nlppform)) THEN
+       CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2),&
+            &            nlppform)
+       CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3), nlppform)
+    ELSE IF(PRESENT(period).AND.PRESENT(nlequid)) THEN
+       CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period=period(1:2),&
+            &            nlequid=nlequid(1:2))
+       CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period=period(3), &
+            &            nlequid=nlequid(3))
+    ELSE IF(PRESENT(nlppform).AND.PRESENT(nlequid)) THEN
+       CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlppform=nlppform,&
+            &            nlequid=nlequid(1:2))
+       CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlppform=nlppform,  &
+            &            nlequid=nlequid(3))
+    ELSE IF(PRESENT(period)) THEN
+       CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, period(1:2))
+       CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, period(3))
+    ELSE IF(PRESENT(nlppform)) THEN
+       CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlppform=nlppform)
+       CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlppform=nlppform)
+    ELSE IF(PRESENT(nlequid)) THEN
+       CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12, nlequid=nlequid(1:2))
+       CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3, nlequid=nlequid(3))
+    ELSE
+       CALL set_spline2d(p(1:2), ngauss(1:2), grid1, grid2, sp%sp12)
+       CALL set_spline1d(p(3), ngauss(3), grid3, sp%sp3)
+    END IF
+  END SUBROUTINE set_spline2d1d
+!===========================================================================
+  SUBROUTINE get_dim1(sp, dim, nx, nidbas)
+!
+!  Return spline dimension of 1d spline sp and optionally
+!  number of knot intervals nx and spline degree nidbas
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    INTEGER, INTENT(out) :: dim
+    INTEGER, OPTIONAL, INTENT(out) :: nx, nidbas
+    dim = sp%dim
+    IF( PRESENT(nx) ) nx = sp%nints
+    IF( PRESENT(nidbas) ) nidbas = sp%order - 1
+  END SUBROUTINE get_dim1
+!===========================================================================
+  SUBROUTINE get_dim2(sp, dim, nx, nidbas)
+!
+!  Return spline dimension of 2d spline sp and optionally
+!  number of knot intervals nx and spline degree nidbas
+!
+    TYPE(spline2d), INTENT(in) :: sp
+    INTEGER, INTENT(out) :: dim(2)
+    INTEGER, OPTIONAL, INTENT(out) :: nx(2), nidbas(2)
+!
+    dim(1) = sp%sp1%dim
+    IF( PRESENT(nx) ) nx(1) = sp%sp1%nints
+    IF( PRESENT(nidbas) ) nidbas(1) = sp%sp1%order - 1
+!
+    dim(2) = sp%sp2%dim
+    IF( PRESENT(nx) ) nx(2) = sp%sp2%nints
+    IF( PRESENT(nidbas) ) nidbas(2) = sp%sp2%order - 1
+  END SUBROUTINE get_dim2
+!===========================================================================
+  SUBROUTINE get_gauss(sp, n, i, x, w)
+!
+!   Get Gauss points and weights from spline sp
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    INTEGER, INTENT(out) :: n
+    INTEGER, INTENT(in), OPTIONAL :: i
+    DOUBLE PRECISION, DIMENSION(:), OPTIONAL, INTENT(out) :: x, w
+!
+    n = SIZE(sp%gausx, 1)
+    IF( PRESENT(i) ) THEN 
+       x(:) = sp%gausx(:,i)
+       w(:) = sp%gausw(:,i)
+    END IF
+  END SUBROUTINE get_gauss
+!===========================================================================
+  SUBROUTINE def_basfun0(xp, sp, fun, left)
+!
+!  Define the basis function and its derivatives at x
+!  fun(i,j) = (j-1)th derivative of ith basis function.
+!
+    DOUBLE PRECISION, INTENT(in) :: xp
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(out) :: fun(:,:)
+    INTEGER, OPTIONAL, INTENT(out) :: left
+    DOUBLE PRECISION :: x
+    INTEGER :: p, n, kleft
+    INTEGER :: ierr, j, k
+!
+    CALL locintv(sp, xp, kleft)
+    CALL basfun(xp, sp, fun, kleft+1)
+    IF(PRESENT(left)) THEN
+       left = kleft
+    END IF
+  END SUBROUTINE def_basfun0
+!===========================================================================
+  SUBROUTINE basfun0(xp, sp, f, left)
+!
+!  Define the basis function and its derivatives at x, in interval
+!  [left,left+1], using PPFORM defined by sp%val0., left=1,..,nints
+!    f(i,j) = jth derivative of ith basis function.
+!
+    DOUBLE PRECISION, INTENT(in)   :: xp
+    DOUBLE PRECISION, INTENT(out)  :: f(:,0:)
+    INTEGER, INTENT(in)            :: left   ! =1,2,...,nints
+    TYPE(spline1d) :: sp
+!
+    INTEGER          :: p, n, jdermx, i, jder
+    DOUBLE PRECISION :: x, h
+!
+    p = sp%order - 1
+    n = sp%nints
+    jdermx = SIZE(f,2)-1
+!
+    h = xp-sp%knots(left-1)  ! knots are numbered from 0
+!
+    IF(sp%period .AND. sp%nlequid) THEN
+       DO jder=0,jdermx   ! Derivative jder
+          CALL my_ppval(p, h, sp%valc, jder, f(:,jder))
+       END DO
+    ELSE
+       DO jder=0,jdermx   ! Derivative jder
+          CALL my_ppval(p, h, sp%val0(:,:,left), jder, f(:,jder))
+       END DO
+    END IF
+  CONTAINS
+    SUBROUTINE my_ppval(p, x, ppform, jder, f)
+      INTEGER, INTENT(in) :: p
+      DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:)
+      INTEGER, INTENT(in) :: jder
+      DOUBLE PRECISION, INTENT(out) :: f(:)
+      DOUBLE PRECISION :: fact
+      INTEGER :: j
+      SELECT CASE (jder)
+      CASE(0)            ! function value
+         SELECT CASE(p)
+         CASE(1)
+            f(1) = ppform(1,1) + x*ppform(2,1)
+         CASE(2)
+            f(1) = ppform(1,1) + x*(ppform(2,1)+x*ppform(3,1))
+            f(2) = ppform(1,2) + x*(ppform(2,2)+x*ppform(3,2))
+         CASE(3)
+            f(1) = ppform(1,1) + x*(ppform(2,1)+x*(ppform(3,1)+x*ppform(4,1))) 
+            f(2) = ppform(1,2) + x*(ppform(2,2)+x*(ppform(3,2)+x*ppform(4,2)))
+            f(3) = ppform(1,3) + x*(ppform(2,3)+x*(ppform(3,3)+x*ppform(4,3)))
+        CASE(4:)
+            f(1:p) = ppform(p+1,1:p)
+            DO j=p,1,-1
+               f(1:p) = f(1:p)*x + ppform(j,1:p)
+            END DO
+         END SELECT
+         f(p+1) = 1.0d0 - SUM(f(1:p))
+      CASE(1)            ! 1st derivative
+         SELECT CASE(p)
+         CASE(1)
+            f(1) = ppform(2,1)
+         CASE(2)
+            f(1) = ppform(2,1) + x*2.d0*ppform(3,1)
+            f(2) = ppform(2,2) + x*2.d0*ppform(3,2)
+         CASE(3)
+            f(1) = ppform(2,1) + x*(2.d0*ppform(3,1)+x*3.0d0*ppform(4,1))
+            f(2) = ppform(2,2) + x*(2.d0*ppform(3,2)+x*3.0d0*ppform(4,2))
+            f(3) = ppform(2,3) + x*(2.d0*ppform(3,3)+x*3.0d0*ppform(4,3))
+         CASE(4:)
+            f(1:p) = p*ppform(p+1,1:p)
+            DO j=p-1,1,-1
+               f(1:p) = f(1:p)*x + j*ppform(j+1,1:p)
+            END DO
+         END SELECT
+         f(p+1) = -SUM(f(1:p))
+      CASE default       ! 2nd and higher derivatives
+         fact = p-jder
+         f(1:p) = ppform(p+1,1:p)
+         DO j=p,jder+1,-1
+            f(1:p) = f(1:p)/fact*j*x + ppform(j,1:p)
+            fact = fact-1.0d0
+         END DO
+         DO j=2,jder
+            f(1:p) = f(1:p)*j
+         END DO
+         f(p+1) = -SUM(f(1:p))
+      END SELECT
+    END SUBROUTINE my_ppval
+  END SUBROUTINE basfun0
+!===========================================================================
+  SUBROUTINE basfun1(xp, sp, f, left)
+!
+!  Define the basis function and its derivatives at x, in interval i=1,2,
+!  using PPFORM defined by sp%val0.
+!    f(i,j,p) = jth derivative of ith basis function at coordinate xp
+!
+    DOUBLE precision, INTENT(in)   :: xp(:)
+    DOUBLE PRECISION, INTENT(out)  :: f(0:,0:,:)
+    INTEGER, INTENT(in)            :: left(:)  ! =1,2,...,nints
+    TYPE(spline1d) :: sp
+!
+    INTEGER          :: p, n, kleft, i, j, jder, ierr
+    INTEGER :: npt, jdermx
+    DOUBLE PRECISION :: h(SIZE(xp)), temp(SIZE(xp))
+    DOUBLE PRECISION :: ppform(SIZE(xp),sp%order)
+!
+    p = sp%order - 1
+    n = sp%nints
+    npt = SIZE(xp)
+    jdermx = SIZE(f,2)-1
+!
+    h = xp - sp%knots(left-1)  ! knots are numbered from 0
+!
+    IF( sp%period .AND. sp%nlequid) THEN
+       DO jder=0,jdermx
+          CALL my_ppval_same(p, h, sp%valc, jder, f(:,jder,1:npt))
+       END DO
+    ELSE
+       DO i=0,p                  ! Spline i
+          DO j=1,npt
+             ppform(j,:) = sp%val0(:,i+1,left(j))
+          END DO
+          DO jder=0,jdermx       ! Derivative jder
+             CALL my_ppval(p, h, ppform, jder, temp)
+             f(i,jder,1:npt) = temp
+          END DO
+       END DO
+    END IF
+  CONTAINS
+!+++
+    SUBROUTINE my_ppval(p, x, ppform, jder, f)
+!
+!   Compute function and derivatives from the PP representation
+!   for many points x(:)
+      INTEGER, INTENT(in) :: p
+      DOUBLE PRECISION, INTENT(in) :: x(:)
+      DOUBLE PRECISION, INTENT(in) :: ppform(:,:)
+      INTEGER, INTENT(in) :: jder
+      DOUBLE PRECISION, INTENT(out) :: f(:)
+      DOUBLE PRECISION :: fact
+      INTEGER :: j
+      SELECT CASE (jder)
+      CASE(0)            ! function value
+         SELECT CASE(p)
+         CASE(1)
+            f(:) = ppform(:,1) + x(:)*ppform(:,2)
+         CASE(2)
+            f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3))
+         CASE(3)
+            f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4)))
+         CASE(4:)
+            f(:) = ppform(:,p+1)
+            DO j=p,1,-1
+               f(:) = ppform(:,j) + f(:)*x(:)
+            END DO
+         END SELECT
+      CASE(1)            ! 1st derivative
+         SELECT CASE(p)
+         CASE(1)
+            f(:) = ppform(:,2)
+         CASE(2)
+            f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3)
+         CASE(3)
+            f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4))
+         CASE(4:)
+            f(:) = p*ppform(:,p+1)
+            DO j=p-1,1,-1
+               f(:) = f(:)*x(:) + j*ppform(:,j+1)
+            END DO
+         END SELECT
+      CASE default       ! 2nd and higher derivatives
+         f(:) = ppform(:,p+1)
+         fact = p-jder
+         DO j=p,jder+1,-1
+            f(:) = f(:)/fact*j*x(:) + ppform(:,j)
+            fact = fact-1.0d0
+         END DO
+         DO j=2,jder
+            f(:) = f(:)*j
+         END DO
+      END SELECT
+    END SUBROUTINE my_ppval
+!+++
+    SUBROUTINE my_ppval_same(p, x, ppform, jder, f)
+!
+!   Compute function and derivatives from the PP representation
+!   for many points x(:), same ppform (translationnal invariant spline)
+    INTEGER, INTENT(in) :: p
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE PRECISION, INTENT(in) :: ppform(:,:)
+    INTEGER, INTENT(in) :: jder
+    DOUBLE PRECISION, INTENT(out) :: f(:,:)
+    DOUBLE PRECISION :: fact
+    INTEGER :: j,k
+    SELECT CASE (jder)
+!
+! function value
+    CASE(0)            
+       SELECT CASE(p)
+       CASE(1)
+          f(1,:) = ppform(1,1) + x(:)*ppform(2,1)
+       CASE(2)
+          f(1,:) = ppform(1,1) + x(:)*(ppform(2,1)+x(:)*ppform(3,1))
+          f(2,:) = ppform(1,2) + x(:)*(ppform(2,2)+x(:)*ppform(3,2))
+       CASE(3)
+          f(1,:) = ppform(1,1) + x(:)*(ppform(2,1)+x(:)*(ppform(3,1)+x(:)*ppform(4,1)))
+          f(2,:) = ppform(1,2) + x(:)*(ppform(2,2)+x(:)*(ppform(3,2)+x(:)*ppform(4,2)))
+          f(3,:) = ppform(1,3) + x(:)*(ppform(2,3)+x(:)*(ppform(3,3)+x(:)*ppform(4,3)))
+       CASE(4:)
+          DO k=1,p
+             f(k,:) = ppform(p+1,k)
+             DO j=p,1,-1
+                f(k,:) = ppform(j,k) + f(k,:)*x(:)
+             END DO
+          END DO
+       END SELECT
+       f(p+1,:) = 1.0d0 - SUM(f(1:p,:),DIM=1)
+!
+! 1st derivative
+    CASE(1)         
+       SELECT CASE(p)
+       CASE(1)
+          f(1,:) = ppform(2,1)
+       CASE(2)
+          f(1,:) = ppform(2,1) + x(:)*2.d0*ppform(3,1)
+          f(2,:) = ppform(2,2) + x(:)*2.d0*ppform(3,2)
+       CASE(3)
+          f(1,:) = ppform(2,1) + x(:)*(2.d0*ppform(3,1)+x(:)*3.0d0*ppform(4,1))
+          f(2,:) = ppform(2,2) + x(:)*(2.d0*ppform(3,2)+x(:)*3.0d0*ppform(4,2))
+          f(3,:) = ppform(2,3) + x(:)*(2.d0*ppform(3,3)+x(:)*3.0d0*ppform(4,3))
+       CASE(4:)
+          DO k=1,p
+             f(k,:) = p*ppform(p+1,k)
+             DO j=p-1,1,-1
+                f(k,:) = f(k,:)*x(:) + j*ppform(j+1,k)
+             END DO
+          END DO
+       END SELECT
+       f(p+1,:) = -SUM(f(1:p,:),DIM=1)
+!
+! 2nd and higher derivatives
+       CASE(2:)        
+          DO k=1,p
+             f(k,:) = ppform(p+1,k)
+             fact = p-jder
+             DO j=p,jder+1,-1
+                f(k,:) = f(k,:)/fact*j*x(:) + ppform(j,k)
+                fact = fact-1.0d0
+             END DO
+             DO j=2,jder
+                f(k,:) = f(k,:)*j
+             END DO
+          END DO
+          f(p+1,:) = -SUM(f(1:p,:),DIM=1)
+       END SELECT
+     END SUBROUTINE my_ppval_same
+!+++
+  END SUBROUTINE basfun1
+!===========================================================================
+  SUBROUTINE def_basfun1(xp, sp, fun, left)
+!
+!  Define the basis function and its derivatives at x
+!  fun(i,j) = (j-1)th derivative of ith basis function.
+!
+    DOUBLE PRECISION, INTENT(in) :: xp(:)
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(out) :: fun(:,:,:)
+    INTEGER, OPTIONAL, INTENT(out) :: left(:)
+    DOUBLE PRECISION :: x(SIZE(xp))
+    INTEGER :: kleft(SIZE(xp))
+    INTEGER :: p, n
+    INTEGER :: ierr, j, k
+!
+    CALL locintv(sp, xp, kleft)
+    CALL basfun(xp, sp, fun, kleft+1)
+    IF(PRESENT(left)) THEN
+       left = kleft
+    END IF
+  END SUBROUTINE def_basfun1
+!===========================================================================
+  SUBROUTINE ft_basfun0(xp, sp, ft_f, left)
+!
+!  DFT of basis functions: ft_f(k,j), k=sp%dft%kmin, sp$dft%kmax (modes)
+!                                     j=0, p-1 (order of derivative))
+!
+    DOUBLE PRECISION, INTENT(in)   :: xp
+    DOUBLE COMPLEX, INTENT(out)    :: ft_f(:,:)
+    INTEGER, INTENT(in)            :: left
+    TYPE(spline1d)                 :: sp
+    DOUBLE PRECISION :: f(sp%order,SIZE(ft_f,2))
+!
+!  Construct all splines on interval [left,left+1] at coordinate  xp
+    CALL basfun(xp, sp, f, left)
+!
+!  DFT of splines
+    ft_f = MATMUL(sp%dft%coefs, f)    
+  END SUBROUTINE ft_basfun0
+!===========================================================================
+  SUBROUTINE ft_basfun1(xp, sp, ft_f, left)
+!
+!  DFT of basis functions: ft_f(k,j), k=sp%dft%kmin, sp$dft%kmax (modes)
+!                                     j=0, p-1 (order of derivative))
+!                          at xp(i)
+!
+    DOUBLE PRECISION, INTENT(in)   :: xp(:)
+    DOUBLE COMPLEX, INTENT(out)    :: ft_f(:,:,:)
+    INTEGER, INTENT(in)            :: left(:)
+    TYPE(spline1d)                 :: sp
+!
+    INTEGER :: i, n3
+    DOUBLE PRECISION :: f(sp%order,SIZE(ft_f,2),SIZE(ft_f,3))
+!
+!  Construct all splines on interval [left,left+1] at coordinate  xp
+    CALL basfun(xp, sp, f, left)
+!
+!  DFT of splines
+    n3 = SIZE(xp)
+    DO i=1,n3
+       ft_f(:,:,i) = MATMUL(sp%dft%coefs, f(:,:,i))
+    END DO
+  END SUBROUTINE ft_basfun1
+!===========================================================================
+  SUBROUTINE basfun_recur(xp, sp, fun, left)
+!
+!  Define the basis function and its derivatives at x, in interval i=1,2,
+!  using recurrence construct in function BVALUE
+!  fun(i,j) = (j-1)th derivative of ith basis function.
+!
+    DOUBLE PRECISION, INTENT(in) :: xp
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(out) :: fun(:,:)
+    INTEGER, INTENT(in) :: left
+    DOUBLE PRECISION :: bcoef(1)=1.0d0, bvalue, x
+    INTEGER :: p, n, kleft
+    INTEGER :: ierr, j, k
+!
+    p = sp%order - 1
+    n = sp%nints
+    fun = 0.0d0
+!
+    IF( sp%period ) THEN   !  ** Applly periodicity **
+       x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod)
+    ELSE
+       x = xp
+    END IF
+!
+    kleft = left-1
+    DO j=kleft-p, kleft
+       DO k=0, SIZE(fun,2)-1
+          fun(j-kleft+p+1, k+1) = bvalue(sp%knots(j), bcoef, 1, p+1, x, k) 
+       END DO
+    END DO
+  END SUBROUTINE basfun_recur
+!===========================================================================
+  SUBROUTINE gauleg(x1,x2,x,w,n)
+!
+!   Compute Gauss-Legendre abscissas and weights in interval [x1, x2]
+!
+    INTEGER, INTENT(in) :: n
+    DOUBLE PRECISION, INTENT(in) :: x1,x2
+    DOUBLE PRECISION, INTENT(out) :: x(n),w(n)
+    DOUBLE PRECISION :: EPS
+    INTEGER i,j,m
+    DOUBLE PRECISION p1,p2,p3,pp,xl,xm,z,z1
+!
+    eps=EPSILON(eps)
+    m=(n+1)/2
+    xm=0.5d0*(x2+x1)
+    xl=0.5d0*(x2-x1)
+    DO i=1,m
+       z=COS(3.141592654d0*(i-.25d0)/(n+.5d0))
+       DO
+          p1=1.d0; p2=0.d0
+          DO j=1,n
+             p3=p2; p2=p1
+             p1=((2.d0*j-1.d0)*z*p2-(j-1.d0)*p3)/j
+          END DO
+          pp=n*(z*p1-p2)/(z*z-1.d0)
+          z1=z
+          z=z1-p1/pp
+          IF( ABS(z-z1) .LE. EPS ) EXIT
+       END DO
+       x(i)=xm-xl*z
+       x(n+1-i)=xm+xl*z
+       w(i)=2.d0*xl/((1.d0-z*z)*pp*pp)
+       w(n+1-i)=w(i)
+    END DO
+  END SUBROUTINE gauleg
+!===========================================================================
+  SUBROUTINE gridval1dz(sp, xp, f, jder, c, ppformz)
+!
+!  Compute values or jder-th dervivative of f(x) from ppform 
+!  of spline sp. Recompute the ppform if the optional spline 
+!  coefficients are given.
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: f
+    INTEGER, INTENT(in) :: jder
+    DOUBLE COMPLEX, DIMENSION(:), OPTIONAL, INTENT(in) :: c
+    DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL :: ppformz
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+    DOUBLE PRECISION :: x(SIZE(xp)), h, fact
+    INTEGER :: order, nints, i, j, nidbas
+    INTEGER :: leftx(SIZE(xp))
+!
+    order = sp%order
+    nints = sp%nints
+    nidbas = order-1
+!
+!   Compute PPFORM/BCOEFS if spline coefs are passed
+!
+    IF (PRESENT(c)) THEN
+       IF (sp%nlppform) THEN
+          IF( PRESENT(ppformz) ) THEN
+             CALL topp0z(sp, c, ppformz)
+          ELSE
+             IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz)
+             ALLOCATE(sp%ppformz(order,nints))
+             CALL topp0z(sp, c, sp%ppformz)
+          END IF
+       ELSE
+          IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
+          ALLOCATE(sp%bcoefsc(SIZE(c)))
+          sp%bcoefsc = c
+       END IF
+    END IF
+!
+!  Applly periodicity if required
+!
+    IF( sp%period ) THEN
+       x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod)
+    ELSE
+       x = xp
+    END IF
+!
+!  Locate the intervals containing x
+!
+    CALL locintv(sp, x, leftx)
+!
+!  Compute function/derivatives
+!
+    IF( sp%nlppform ) THEN   ! using PP form
+       DO i=1,SIZE(x)
+          IF( PRESENT(ppformz) ) THEN
+             CALL ppval(sp, x(i), ppformz(:,leftx(i)+1), leftx(i), jder, f(i))
+          ELSE
+             CALL ppval(sp, x(i), sp%ppformz(:,leftx(i)+1), leftx(i), jder, f(i))
+          END IF
+       END DO
+    ELSE                     ! using spline expansion
+       ALLOCATE(fun(0:nidbas,0:jder))
+       f = 0.0d0
+       DO i=1,SIZE(x)
+          CALL basfun(x(i), sp, fun, leftx(i)+1)
+          DO j=0,nidbas
+             f(i) = f(i) + sp%bcoefsc(leftx(i)+j+1)*fun(j,jder)
+          END DO
+       END DO
+       DEALLOCATE(fun)
+    END IF
+!
+  END SUBROUTINE gridval1dz
+!===========================================================================
+  SUBROUTINE gridval1d(sp, xp, f, jder, c, ppform)
+!
+!  Compute values or jder-th dervivative of f(x) from ppform 
+!  of spline sp. Recompute the ppform if the optional spline 
+!  coefficients are given.
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: f
+    INTEGER, INTENT(in) :: jder
+    DOUBLE PRECISION, DIMENSION(:), OPTIONAL, INTENT(in) :: c
+    DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL :: ppform
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+    DOUBLE PRECISION :: x(SIZE(xp)), h, fact
+    INTEGER :: order, nints, i, j, nidbas
+    INTEGER :: leftx(SIZE(xp))
+!
+    order = sp%order
+    nints = sp%nints
+    nidbas = order-1
+!
+!   Compute PPFORM/BCOEFS if spline coefs are passed
+!
+    IF (PRESENT(c)) THEN
+       IF (sp%nlppform) THEN
+          IF( PRESENT(ppform) ) THEN
+             CALL topp0(sp, c, ppform)
+          ELSE
+             IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
+             ALLOCATE(sp%ppform(order,nints))
+             CALL topp0(sp, c, sp%ppform)
+          END IF
+       ELSE
+          IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
+          ALLOCATE(sp%bcoefs(SIZE(c)))
+          sp%bcoefs = c
+       END IF
+    END IF
+!
+!  Applly periodicity if required
+!
+    IF( sp%period ) THEN
+       x =sp%knots(0) + MODULO(xp-sp%knots(0), sp%lperiod)
+    ELSE
+       x = xp
+    END IF
+!
+!  Locate the intervals containing x
+!
+    CALL locintv(sp, x, leftx)
+!
+!  Compute function/derivatives
+!
+    IF( sp%nlppform ) THEN   ! using PP form
+       DO i=1,SIZE(x)
+          IF( PRESENT(ppform) ) THEN
+             CALL ppval(sp, x(i), ppform(:,leftx(i)+1), leftx(i), jder, f(i))
+          ELSE
+             CALL ppval(sp, x(i), sp%ppform(:,leftx(i)+1), leftx(i), jder, f(i))
+          END IF
+       END DO
+    ELSE                     ! using spline expansion
+       ALLOCATE(fun(0:nidbas,0:jder))
+       f = 0.0d0
+       DO i=1,SIZE(x)
+          CALL basfun(x(i), sp, fun, leftx(i)+1)
+          DO j=0,nidbas
+             f(i) = f(i) + sp%bcoefs(leftx(i)+j+1)*fun(j,jder)
+          END DO
+       END DO
+       DEALLOCATE(fun)
+    END IF
+!
+  END SUBROUTINE gridval1d
+!===========================================================================
+  SUBROUTINE def_knots(p, xg, knots, period, nlskip)
+!
+!   Define spline knots for interpolating at sites given by xg
+!
+    INTEGER, INTENT(in) :: p
+    DOUBLE PRECISION, INTENT(in) :: xg(0:)
+    DOUBLE PRECISION, POINTER :: knots(:)
+    LOGICAL, OPTIONAL, INTENT(in) :: period, nlskip
+    LOGICAL :: kperiod, mlskip
+    INTEGER :: npt, dim, nx, i, ii
+!
+    kperiod=.FALSE.
+    mlskip = .TRUE.
+    IF( PRESENT(period) ) kperiod=period
+    IF( PRESENT(nlskip) ) mlskip = nlskip
+!
+!    Periodic splines
+!
+    IF( kperiod ) THEN
+       nx = SIZE(xg) -1
+       IF( ASSOCIATED(knots) ) DEALLOCATE(knots)
+       ALLOCATE(knots(0:nx))
+       IF( MODULO(p,2) .NE. 0 ) THEN  ! Odd degree
+          knots(0:nx) = xg(0:nx)
+       ELSE                           ! Even degree
+          DO i=1,nx
+             knots(i) = 0.5d0*(xg(i-1)+xg(i))
+          END DO
+          knots(0) = knots(nx) - (xg(nx)-xg(0))
+       END IF
+       RETURN
+    END IF
+!
+!    Non-periodic splines
+!
+    npt = SIZE(xg)
+    dim = npt
+    IF( .NOT. mlskip ) THEN
+       dim = dim + 2*(p/2) ! Add BC on derivatives
+    END IF
+    nx = dim-p
+    IF( ASSOCIATED(knots) ) DEALLOCATE(knots)
+    ALLOCATE(knots(0:nx))
+!
+    knots(0) = xg(0)
+    knots(nx) = xg(npt-1)
+!
+    IF( MODULO(p,2) .EQ. 0 ) THEN
+       ii = 0
+       IF( mlskip ) ii = p/2     ! skip first p/2 intervals
+       DO i=1,nx-1
+          ii = ii+1
+          knots(i) = (xg(ii)+xg(ii-1))/2
+       END DO
+    ELSE
+       ii = 0
+       IF( mlskip ) ii = (p-1)/2  ! skip (p-1)/2 points after the first point ii=0
+       DO i=1,nx-1
+          ii = ii+1
+          knots(i) = xg(ii)
+       END DO
+    END IF
+!
+  END SUBROUTINE def_knots
+!===========================================================================
+  SUBROUTINE allsplines(sp, xpt, splines)
+!
+!   Return all splines defined on points xpt
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: xpt(:)
+    DOUBLE PRECISION, POINTER :: splines(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+    INTEGER :: i, n, left, dim, p
+!
+    p = sp%order - 1
+    dim = sp%dim
+    n = SIZE(xpt)
+!
+    IF( ASSOCIATED(splines) ) DEALLOCATE(splines)
+    ALLOCATE(splines(n,dim), fun(p+1,1))
+    splines = 0.0d0
+    DO i=1,n
+       CALL locintv(sp, xpt(i), left)
+       CALL basfun(xpt(i), sp, fun, left+1)
+       splines(i,left+1:left+p+1) = fun(1:p+1,1)
+    END DO
+    DEALLOCATE(fun)
+  END SUBROUTINE allsplines
+!===========================================================================
+  SUBROUTINE set_splcoef1d(p, x, sp, period, ibc)
+!
+!   Setup 1d interpolation matrix for spline of degree p
+!
+    INTEGER, INTENT(in) :: p
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    TYPE(spline1d), INTENT(out) :: sp
+    LOGICAL, OPTIONAL, INTENT(in) :: period
+    INTEGER, OPTIONAL :: ibc(:,:)
+!
+    LOGICAL :: kperiod
+!
+    kperiod = .FALSE.
+    IF( PRESENT(period) )  kperiod = period
+!
+    IF( kperiod ) THEN
+       CALL splcoefp_setup(p, x, sp)
+    ELSE
+       IF( PRESENT(ibc) ) THEN 
+          CALL splcoef_setup(p, x, sp, ibc)
+       ELSE
+          CALL splcoef_setup(p, x, sp)
+       END IF
+    END IF
+  END SUBROUTINE set_splcoef1d
+!===========================================================================
+  SUBROUTINE set_splcoef2d(p, x1, x2, sp, period, ibc1, ibc2)
+!
+!   Setup 2d interpolation matrix for spline of degree p
+!
+    INTEGER, INTENT(in) :: p(2)
+    DOUBLE PRECISION, INTENT(in) :: x1(:), x2(:)
+    TYPE(spline2d), INTENT(out) :: sp
+    LOGICAL, OPTIONAL, INTENT(in) :: period(2)
+    INTEGER, OPTIONAL :: ibc1(:,:),ibc2(:,:) 
+!
+    LOGICAL :: kperiod(2)
+!
+    kperiod = .FALSE.
+    IF( PRESENT(period) )  kperiod = period
+!
+!  Direction 1
+    IF( kperiod(1) ) THEN
+       CALL splcoefp_setup(p(1), x1, sp%sp1)
+    ELSE
+       IF( PRESENT(ibc1) ) THEN 
+          CALL splcoef_setup(p(1), x1, sp%sp1, ibc1)
+       ELSE
+          CALL splcoef_setup(p(1), x1, sp%sp1)
+       END IF
+    END IF
+!
+!  Direction 2
+    IF( kperiod(2) ) THEN
+       CALL splcoefp_setup(p(2), x2, sp%sp2)
+    ELSE
+       IF( PRESENT(ibc2) ) THEN 
+          CALL splcoef_setup(p(2), x2, sp%sp2, ibc2)
+       ELSE
+          CALL splcoef_setup(p(2), x2, sp%sp2)
+       END IF
+    END IF
+  END SUBROUTINE set_splcoef2d
+!===========================================================================
+  SUBROUTINE get_splcoef1(sp, f, c, fbc)
+!
+!   Compute the spline coefficients c from grid values f
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: f(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+    DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:)
+!
+    IF( sp%period ) THEN
+       CALL splcoefp1(sp, f, c)
+    ELSE
+       IF( PRESENT(fbc) ) THEN
+          CALL splcoef1(sp, f, c, fbc)
+       ELSE
+          CALL splcoef1(sp, f, c)
+       END IF
+    END IF
+  END SUBROUTINE get_splcoef1
+!===========================================================================
+  SUBROUTINE get_splcoef2d(sp, f, c, fbc1, fbc2)
+!
+!   Compute the spline coefficients c from 2d grid values f
+!
+    TYPE(spline2d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: f(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:,:)
+    DOUBLE PRECISION :: ctr(SIZE(c,2), SIZE(f,1))
+    DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc1(:,:,:), fbc2(:,:,:)
+
+    DOUBLE PRECISION, DIMENSION(:, :)   , ALLOCATABLE :: c_fbc1_left, c_fbc1_right
+    DOUBLE PRECISION, DIMENSION(:, :, :), ALLOCATABLE :: c_fbc1_all
+!
+!  Along direction 2
+!
+    IF( PRESENT(fbc2) ) THEN
+       CALL get_splcoefn(sp%sp2, TRANSPOSE(f), ctr, fbc2)
+    ELSE
+       CALL get_splcoefn(sp%sp2, TRANSPOSE(f), ctr)
+    END IF
+!
+!  Along direction 1
+!
+    IF( PRESENT(fbc1) ) THEN
+       ALLOCATE( c_fbc1_left(SIZE(c, 2), SIZE(fbc1, 2)))
+       ALLOCATE(c_fbc1_right(SIZE(c, 2), SIZE(fbc1, 2)))
+       ALLOCATE(c_fbc1_all(2, SIZE(fbc1, 2), SIZE(c, 2)))
+
+       CALL get_splcoefn(sp%sp2, TRANSPOSE(fbc1(1, :, :)), c_fbc1_left )
+       CALL get_splcoefn(sp%sp2, TRANSPOSE(fbc1(2, :, :)), c_fbc1_right)
+
+       c_fbc1_all(1, :, :) = TRANSPOSE(c_fbc1_left ) 
+       c_fbc1_all(2, :, :) = TRANSPOSE(c_fbc1_right) 
+
+       CALL get_splcoefn(sp%sp1, TRANSPOSE(ctr), c, c_fbc1_all)
+
+       DEALLOCATE(c_fbc1_left, c_fbc1_right, c_fbc1_all)
+    ELSE
+       CALL get_splcoefn(sp%sp1, TRANSPOSE(ctr), c)
+    END IF
+!
+  END SUBROUTINE get_splcoef2d
+!===========================================================================
+  SUBROUTINE get_splcoef2dz(sp, f, c)
+!
+!   Compute the spline coefficients c from 2d grid values f
+!
+    TYPE(spline2d) :: sp
+    DOUBLE COMPLEX, INTENT(in) :: f(:,:)
+    DOUBLE COMPLEX, INTENT(out) :: c(:,:)
+    DOUBLE PRECISION, DIMENSION(SIZE(c,1), SIZE(c,2),2) :: pc
+!
+    CALL get_splcoef2d(sp, REAL(f), pc(:,:,1))
+    CALL get_splcoef2d(sp, AIMAG(f), pc(:,:,2))
+    c(:,:) = CMPLX(pc(:,:,1),pc(:,:,2))
+  END SUBROUTINE get_splcoef2dz
+!===========================================================================
+  SUBROUTINE get_splcoef1z(sp, f, c, fbc)
+!
+!   Compute the spline coefficients c from grid values f
+!
+    TYPE(spline1d) :: sp
+    DOUBLE COMPLEX, INTENT(in) :: f(:)
+    DOUBLE COMPLEX, INTENT(out) :: c(:)
+    DOUBLE COMPLEX, INTENT(in), OPTIONAL :: fbc(:,:)
+    DOUBLE PRECISION :: pf(SIZE(f),2), pc(SIZE(c),2)
+    DOUBLE PRECISION, ALLOCATABLE ::  pfbc(:,:,:)
+!
+    pf(:,1) = REAL(f(:))
+    pf(:,2) = AIMAG(f(:))
+    IF(PRESENT(fbc)) THEN
+       ALLOCATE(pfbc(SIZE(fbc,1),SIZE(fbc,2),2))
+       pfbc(:,:,1) = REAL(fbc(:,:))
+       pfbc(:,:,2) = AIMAG(fbc(:,:))
+       CALL get_splcoefn(sp, pf, pc, pfbc)
+       DEALLOCATE(pfbc)
+    ELSE
+       CALL get_splcoefn(sp, pf, pc)       
+    END IF
+    c(:) = CMPLX(pc(:,1), pc(:,2))
+!
+  END SUBROUTINE get_splcoef1z
+!===========================================================================
+  SUBROUTINE get_splcoefn(sp, f, c, fbc)
+!
+!   Compute the spline coefficients c from grid values f
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: f(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:,:)
+    DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:,:)
+!
+    IF( sp%period ) THEN
+       CALL splcoefpn(sp, f, c)
+    ELSE
+       IF( PRESENT(fbc) ) THEN
+          CALL splcoefn(sp, f, c, fbc)
+       ELSE
+          CALL splcoefn(sp, f, c)
+       END IF
+    END IF
+  END SUBROUTINE get_splcoefn
+!===========================================================================
+   SUBROUTINE splcoef_setup(p, x, sp, ibc)
+!
+!   Setup the interpolation matrix
+!   for spline of degree p
+!
+    INTEGER, INTENT(in) :: p
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    TYPE(spline1d), INTENT(out) :: sp
+    INTEGER, OPTIONAL :: ibc(:,:)
+    DOUBLE PRECISION, POINTER  :: knots(:)=>NULL(), arow(:)=>NULL(), &
+         &                        fun(:,:)=>NULL()
+    INTEGER :: nx, dim, kl, ku, rank
+    INTEGER :: i, left, ishift
+    LOGICAL :: nlskip
+!
+!    Type of Boundary Conditions
+    nlskip = .TRUE.
+    ishift = 0
+    IF( PRESENT(ibc) ) THEN
+       nlskip = .FALSE.
+       ishift = p/2
+    END IF
+!
+!    Set up spline
+    nx = SIZE(x) - 1   ! X is the interpolation sites
+    CALL def_knots(p, x, knots, nlskip=nlskip)
+    CALL set_spline(p, 0, knots, sp)
+    sp%nsites = nx + 1 ! Store away the number of interpolation sites
+    DEALLOCATE(knots)
+!
+!    Set up interpolation matrix
+    dim = sp%dim
+    kl = MAX(p-1,0)
+    ku = MAX(p-1,0)
+    rank = dim
+    CALL init(kl, ku, rank, 0, sp%mat)
+!!$    WRITE(*,'(a,3i6)') 'Interpolation matrix:, kl, ku, rank ', kl, ku, rank
+!
+!    COMPUTE matrix row by row
+    ALLOCATE(arow(dim), fun(p+1,0:p))
+    DO i=1,SIZE(x)
+       arow = 0.0d0
+       CALL locintv(sp, x(i), left)
+       CALL basfun(x(i), sp, fun(:,0:0), left+1)
+       arow(left+1:left+p+1) = fun(1:p+1,0)
+       CALL putrow(sp%mat, i+ishift, arow)
+!!$       WRITE(*,'(i5,13f8.3)') i+ishift, arow
+    END DO
+!
+!    Add BC if specified
+    IF( PRESENT(ibc) ) THEN
+       CALL locintv(sp, x(1), left)
+       CALL basfun(x(1), sp, fun, left+1)      ! BC at the left side
+       DO i=1,p/2
+          arow = 0.0d0
+          arow(left+1:left+p+1) = fun(1:p+1,ibc(1,i))
+          CALL putrow(sp%mat, i, arow)
+!!$          WRITE(*,'(i5,13f8.3)') i, arow
+       END DO
+       CALL locintv(sp, x(SIZE(x)), left)
+       CALL basfun(x(SIZE(x)), sp, fun, left+1) ! BC at the right side
+       DO i=1,p/2
+          arow = 0.0d0
+          arow(left+1:left+p+1) = fun(1:p+1,ibc(2,i))
+          CALL putrow(sp%mat, dim-i+1, arow)
+!!$          WRITE(*,'(i5,13f8.3)') dim-i+1, arow
+       END DO
+    END IF
+    DEALLOCATE(arow, fun)
+!
+!    Factor the matrix
+    CALL factor(sp%mat)
+!
+  END SUBROUTINE splcoef_setup
+!===========================================================================
+  SUBROUTINE splcoefp_setup(p, x, sp)
+!
+!   Set up the interpolation matrix
+!   for periodic case
+!
+    USE matrix
+    INTEGER, INTENT(in) :: p
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    TYPE(spline1d), INTENT(out) :: sp
+!
+    TYPE(gemat) :: hmat
+    DOUBLE PRECISION, POINTER :: knots(:)=>NULL(), arow(:)=>NULL(), &
+         &                       fun(:,:)=>NULL()
+!!$    DOUBLE PRECISION, POINTER :: arr2d(:,:)=>null()
+    INTEGER :: nx, kl, ku, rank, mr, nc
+    INTEGER :: i, left, j, jj
+    DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0
+!________________________________________________________________________________
+!
+!    Set up spline
+!
+    nx = SIZE(x) - 1   ! X is the interpolation sites
+    CALL def_knots(p, x, knots, period=.TRUE.)
+    CALL set_spline(p, 0, knots, sp, period=.TRUE.)
+    sp%nsites = nx + 1 ! Store away the number of interpolation sites
+    DEALLOCATE(knots)
+!________________________________________________________________________________
+!
+!    Set up interpolation matrix sp%matp
+!
+    kl = MAX(p/2,0)
+    ku = kl
+    rank = nx
+!!$    WRITE(*,'(a,3i6)') 'Interpolation matrix:, kl, ku, rank ', kl, ku, rank
+!
+    CALL init(kl, ku, rank, 0, sp%matp%mat)    ! matp%mat is a GB matrix
+    ALLOCATE(sp%matp%matu(rank, kl+ku), sp%matp%matvt(kl+ku,rank))
+!
+    sp%matp%matu = zero
+    sp%matp%matvt = zero             !    kl = ku = 2
+    DO j=1,kl                        !  [ 1 0 0 . . . . ]
+       sp%matp%matu(j,j) = one       !  [ 0 1 0 . . . . ]
+    END DO                           !  [ . 0 . . . . . ]
+    DO j=1,ku                        !  [ . . . . . 0 . ]
+       i=rank-ku+j                   !  [ . . . . 0 1 0 ]
+       sp%matp%matu(i,kl+j) = one    !  [ . . . . 0 0 1 ] 
+    END DO
+!________________________________________________________________________________
+!
+!    COMPUTE matrix row by row
+!
+    ALLOCATE(arow(rank), fun(p+1,1))
+    DO i=1,rank
+       arow = zero
+       CALL locintv(sp, x(i), left)
+       CALL basfun(x(i), sp, fun, left+1)
+       left = left-p/2
+       DO j=0,p
+          jj = MODULO(left+j, rank) + 1
+          arow(jj) = fun(j+1,1)
+       END DO
+       CALL putrow(sp%matp%mat, i, arow)
+       IF( i .LE. kl ) THEN 
+          sp%matp%matvt(i,rank-kl+1:rank) = arow(rank-kl+1:rank)
+       ELSE IF ( i .GE. rank-ku+1 ) THEN
+          j = i-(rank-ku+1) + 1
+          sp%matp%matvt(kl+j,1:ku) = arow(1:ku)
+       END IF
+!!$       WRITE(8, '(i5, 12(1pe12.3))') left, x(i), fun
+!!$       WRITE(*,'(i5, 12(1pe12.3))') i, arow
+    END DO
+    DEALLOCATE(arow, fun)
+!
+!!$    PRINT*, 'Matrix U, V'
+!!$    DO i=1,rank
+!!$       WRITE(*, '(i5,12(1pe12.3))') i, sp%matp%matu(i,:), sp%matp%matvt(:,i)
+!!$    END DO
+!!$    ALLOCATE(arr2d(rank,rank))
+!!$    arr2d = MATMUL(sp%matp%matu, sp%matp%matvt)
+!!$    PRINT*, 'Product U*V^T'
+!!$    DO i=1,rank
+!!$       WRITE(*, '(i5,12(1pe12.3))') i, arr2d(i,:)
+!!$    END DO
+!!$    DEALLOCATE(arr2d)
+!________________________________________________________________________________
+!
+!     Factorisation
+!
+! Factor A
+    CALL factor(sp%matp%mat)
+!
+!  For constant and linear splines, A is diagnonal!
+!  Should skip the rest
+!
+    IF( kl.EQ.0 .OR. ku.EQ.0 ) THEN 
+       RETURN 
+    END IF
+!
+! U <-- A^(-1) * U
+    CALL bsolve(sp%matp%mat, sp%matp%matu)
+!
+! H <-- 1 + V^T * U
+    mr = SIZE(sp%matp%matvt, 1)
+    nc = SIZE(sp%matp%matvt, 2)
+    CALL init(mr, 0, hmat)  ! hmat is initialized to 0!
+    DO i=1,mr
+       hmat%val(i,i) = one
+    END DO
+    CALL dgemm('N', 'N', mr, mr, nc, one, sp%matp%matvt, mr, &
+         &      sp%matp%matu, nc, one, hmat%val, mr)
+!
+!!$    hmat%val = MATMUL(sp%matp%matvt, sp%matp%matu)
+!!$    DO i=1,kl+ku
+!!$       hmat%val(i,i) = 1.0d0 + hmat%val(i,i)
+!!$    END DO
+!
+! V^T <-- H^(-1) V^T
+    CALL factor(hmat)
+    CALL bsolve(hmat, sp%matp%matvt)
+    CALL destroy(hmat)
+!
+  END SUBROUTINE splcoefp_setup
+!===========================================================================
+  SUBROUTINE splcoef1(sp, f, c, fbc)
+!
+!   Compute the spline coefficients c from grid values f and BC fbc
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in)  :: f(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+    DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:)
+    INTEGER :: p, dim, i, ishift
+!
+    p = sp%order-1
+    dim = sp%dim
+!
+!    BC at left and right boundary
+    ishift = 0
+    IF( PRESENT(fbc) ) THEN
+       DO i=1,p/2
+          c(i)       = fbc(1,i)     ! Left boundary
+          c(dim-i+1) = fbc(2,i)     ! Right boundary
+       END DO
+       ishift = p/2
+    END IF
+!
+!    Interior points
+    DO i=1,sp%nsites
+       c(i+ishift) = f(i)
+    END DO
+!    WRITE(*,'(a/(13f8.3))') 'RHS', c
+!
+!    Solve for the interpolation coefs. using the factored sp%mat
+    CALL bsolve(sp%mat, c)
+!
+  END SUBROUTINE splcoef1
+!===========================================================================
+  SUBROUTINE splcoefn(sp, f, c, fbc)
+!
+!   Compute the spline coefficients c from grid values f and BC fbc
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in)  :: f(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:,:)
+    DOUBLE PRECISION, INTENT(in), OPTIONAL :: fbc(:,:,:)
+    INTEGER :: p, dim, i, ishift
+!
+    p = sp%order-1
+    dim = sp%dim
+!
+!    BC at left and right boundary
+    ishift = 0
+    IF( PRESENT(fbc) ) THEN
+       DO i=1,p/2
+          c(i,:)       = fbc(1,i,:)     ! Left boundary
+          c(dim-i+1,:) = fbc(2,i,:)     ! Right boundary
+       END DO
+       ishift = p/2
+    END IF
+!
+!    Interior points   ! c(:,j) for j>SIZE(f,2) could be anything 
+    DO i=1,sp%nsites   ! (periodicity in the 2nd dimension)!
+       c(i+ishift,1:SIZE(f,2)) = f(i,:)
+    END DO
+!!$    WRITE(*,'(a/(13f8.3))') 'RHS', c
+!
+!    Solve for the interpolation coefs. using the factored sp%mat
+    CALL bsolve(sp%mat, c)
+!
+  END SUBROUTINE splcoefn
+!===========================================================================
+  SUBROUTINE splcoefp1(sp, f, c)
+!
+!   Compute the spline coefficient c from grid values f
+!   f(x) is periodic
+!
+    USE matrix
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: f(:)
+    DOUBLE PRECISION, INTENT(out) :: c(:)
+!
+    DOUBLE PRECISION, POINTER  :: arow(:), brow(:)
+    DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0, minus1=-1.0d0
+    INTEGER :: dim, p, rank, bandw
+    INTEGER :: i, j
+!________________________________________________________________________________
+!
+    p     = sp%order-1
+    rank  = sp%nints
+    bandw = SIZE(sp%matp%matvt,1)
+!
+!    Solve the interpolation system
+!
+!  Solve Ay = f
+    ALLOCATE(arow(rank), brow(bandw))
+!
+    arow(1:rank) = f(1:rank)
+    CALL bsolve(sp%matp%mat, arow)
+!
+!  For constant and linear splines, A is diagnonal!
+!  Should skip the rest
+!
+    IF( p.LE.1 ) GOTO 100
+!
+!
+!  t = V^T*y
+    CALL dgemv('N', bandw, rank, one, sp%matp%matvt, bandw, arow, 1, zero, &
+         &      brow, 1)
+!
+!  y = y - Ut
+    CALL dgemv('N', rank, bandw, minus1, sp%matp%matu, rank, brow, 1, one, &
+         &      arow, 1)
+!
+100 CONTINUE
+!
+!  Interpolation coefficients
+    dim   = sp%dim
+    DO i=1,dim
+       j = MODULO(i-1-p/2, rank) + 1
+       c(i) = arow(j)
+    END DO
+!
+    DEALLOCATE(arow,brow)
+!
+  END SUBROUTINE splcoefp1
+!===========================================================================
+  SUBROUTINE splcoefpn(sp, f, c)
+!
+!   Compute the spline coefficient c from grid values f
+!   f(x) is periodic
+!
+    USE matrix
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: f(:,:)
+    DOUBLE PRECISION, INTENT(out) :: c(:,:)
+!
+    DOUBLE PRECISION, POINTER  :: arow(:,:), brow(:,:)
+    DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0, minus1=-1.0d0
+    INTEGER :: p, dim, rank, bandw, nrhs
+    INTEGER :: i, j, k
+!________________________________________________________________________________
+!
+    p     = sp%order-1
+    rank  = sp%nints
+    bandw = SIZE(sp%matp%matvt,1)
+    nrhs = SIZE(f,2)
+!
+!
+!    Solve the interpolation system
+!
+!  Solve Ay = f
+    ALLOCATE(arow(rank,nrhs), brow(bandw,nrhs))
+!
+    arow(1:rank,1:nrhs) = f(1:rank,1:nrhs)
+    CALL bsolve(sp%matp%mat, arow)
+!
+!  For constant and linear splines, A is diagnonal!
+!  Should skip the rest
+!
+    IF( p.LE.1 ) GOTO 100
+!
+!
+!  t = V^T*y
+    CALL dgemm('N', 'N', bandw, nrhs, rank, one, sp%matp%matvt, bandw, arow, &
+         &      rank, zero, brow, bandw)
+!
+!  y = y - Ut
+    CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, sp%matp%matu, rank, brow, &
+         &      bandw, one, arow, rank)
+!
+100 CONTINUE
+!
+!  Interpolation coefficients
+    dim   = sp%dim
+    DO k=1,nrhs
+       DO i=1,dim
+          j = MODULO(i-1-p/2, rank) + 1
+          c(i,k) = arow(j,k)
+       END DO
+    END DO
+!
+    DEALLOCATE(arow,brow)
+!
+  END SUBROUTINE splcoefpn
+!
+!===========================================================================
+  SUBROUTINE topp0(sp, c, ppform)
+!
+!   Compute PPFORM of a fuction defined by the spline SP
+!   and spline coefficients C(1:d)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: c(:)
+    DOUBLE PRECISION, INTENT(out) :: ppform(0:,:)
+    INTEGER :: p, nints, i, j, k
+!
+    p = sp%order - 1
+    nints = sp%nints
+!
+    ppform = 0.0d0
+    DO i=1,nints       ! on each knot interval
+       DO j=1,p+1      ! all spline in interval i
+          DO k=0,p     ! k_th derivatives
+             ppform(k,i) = ppform(k,i) + sp%val0(k,j,i)*c(j+i-1)
+          END DO
+       END DO
+    END DO
+!
+  END SUBROUTINE topp0
+!===========================================================================
+  SUBROUTINE topp0z(sp, c, ppform)
+!
+!   Compute PPFORM of a fuction defined by the spline SP
+!   and spline coefficients C(1:d)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE COMPLEX, INTENT(in) :: c(:)
+    DOUBLE COMPLEX, INTENT(out) :: ppform(0:,:)
+    INTEGER :: p, nints, i, j, k
+!
+    p = sp%order - 1
+    nints = sp%nints
+!
+    ppform = (0.0d0, 0.0d0)
+    DO i=1,nints       ! on each knot interval
+       DO j=1,p+1      ! all spline in interval i
+          DO k=0,p     ! k_th derivatives
+             ppform(k,i) = ppform(k,i) + sp%val0(k,j,i)*c(j+i-1)
+          END DO
+       END DO
+    END DO
+!
+  END SUBROUTINE topp0z
+!===========================================================================
+  SUBROUTINE topp1(sp, c, ppform)
+!
+!   Compute PPFORM of a fuction defined by the spline SP
+!   and spline coefficients C(1:d,:)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: c(:,:)
+    DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:)
+    INTEGER :: m
+!
+    DO m=1,SIZE(c,2)
+       CALL topp0(sp, c(:,m), ppform(m,:,:))
+    END DO
+!
+  END SUBROUTINE topp1
+!===========================================================================
+  SUBROUTINE topp1z(sp, c, ppform)
+!
+!   Compute PPFORM of a fuction defined by the spline SP
+!   and spline coefficients C(1:d,:)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE COMPLEX, INTENT(in) :: c(:,:)
+    DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:)
+    INTEGER :: m
+!
+    DO m=1,SIZE(c,2)
+       CALL topp0z(sp, c(:,m), ppform(m,:,:))
+    END DO
+!
+  END SUBROUTINE topp1z
+!===========================================================================
+  SUBROUTINE topp2(sp, c, ppform)
+!
+!   Compute PPFORM of a fuction defined by the spline SP
+!   and spline coefficients C(1:d,:,:)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: c(:,:,:)
+    DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:)
+    INTEGER :: m, mm
+!
+    DO mm=1,SIZE(c,3)
+       DO m=1,SIZE(c,2)
+          CALL topp0(sp, c(:,m,mm), ppform(m,mm,:,:))
+       END DO
+    END DO
+!
+  END SUBROUTINE topp2
+!===========================================================================
+  SUBROUTINE topp2z(sp, c, ppform)
+!
+!   Compute PPFORM of a fuction defined by the spline SP
+!   and spline coefficients C(1:d,:,:)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE COMPLEX, INTENT(in) :: c(:,:,:)
+    DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:)
+    INTEGER :: m, mm
+!
+    DO mm=1,SIZE(c,3)
+       DO m=1,SIZE(c,2)
+          CALL topp0z(sp, c(:,m,mm), ppform(m,mm,:,:))
+       END DO
+    END DO
+!
+  END SUBROUTINE topp2z
+!===========================================================================
+  SUBROUTINE topp3(sp, c, ppform)
+!
+!   Compute PPFORM of a fuction defined by the spline SP
+!   and spline coefficients C(1:d,:,:,:)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: c(:,:,:,:)
+    DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:,:)
+    INTEGER :: m, mm, mmm
+!
+    DO mmm=1,SIZE(c,4)
+       DO mm=1,SIZE(c,3)
+          DO m=1,SIZE(c,2)
+                 CALL topp0(sp, c(:,m,mm,mmm), ppform(m,mm,mmm,:,:))
+          END DO
+       END DO
+    END DO
+!
+  END SUBROUTINE topp3
+!===========================================================================
+  SUBROUTINE topp3z(sp, c, ppform)
+!
+!   Compute PPFORM of a fuction defined by the spline SP
+!   and spline coefficients C(1:d,:,:,:)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE COMPLEX, INTENT(in) :: c(:,:,:,:)
+    DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:,:)
+    INTEGER :: m, mm, mmm
+!
+    DO mmm=1,SIZE(c,4)
+       DO mm=1,SIZE(c,3)
+          DO m=1,SIZE(c,2)
+                 CALL topp0z(sp, c(:,m,mm,mmm), ppform(m,mm,mmm,:,:))
+          END DO
+       END DO
+    END DO
+!
+  END SUBROUTINE topp3z
+!===========================================================================
+  SUBROUTINE topp4(sp, c, ppform)
+!
+!   Compute PPFORM of a fuction defined by the spline SP
+!   and spline coefficients C(1:d,:,:)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: c(:,:,:,:,:)
+    DOUBLE PRECISION, INTENT(out) :: ppform(:,:,:,:,:,:)
+    INTEGER :: m, mm, mmm, mmmm
+!
+    DO mmmm=1,SIZE(c,5)
+       DO mmm=1,SIZE(c,4)
+          DO mm=1,SIZE(c,3)
+             DO m=1,SIZE(c,2)
+                CALL topp0(sp, c(:,m,mm,mmm,mmmm), ppform(m,mm,mmm,mmmm,:,:))
+             END DO
+          END DO
+       END DO
+    END DO
+!
+  END SUBROUTINE topp4
+!===========================================================================
+  SUBROUTINE topp4z(sp, c, ppform)
+!
+!   Compute PPFORM of a fuction defined by the spline SP
+!   and spline coefficients C(1:d,:,:)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE COMPLEX, INTENT(in) :: c(:,:,:,:,:)
+    DOUBLE COMPLEX, INTENT(out) :: ppform(:,:,:,:,:,:)
+    INTEGER :: m, mm, mmm, mmmm
+!
+    DO mmmm=1,SIZE(c,5)
+       DO mmm=1,SIZE(c,4)
+          DO mm=1,SIZE(c,3)
+             DO m=1,SIZE(c,2)
+                CALL topp0z(sp, c(:,m,mm,mmm,mmmm), ppform(m,mm,mmm,mmmm,:,:))
+             END DO
+          END DO
+       END DO
+    END DO
+!
+  END SUBROUTINE topp4z
+!===========================================================================
+  SUBROUTINE ppval0(sp, x, ppform, left, jder, f)
+!
+!   Compute function and derivatives from the PP representation
+!    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x, ppform(:)
+    INTEGER, INTENT(in) :: left, jder
+    DOUBLE PRECISION, INTENT(out) :: f
+    DOUBLE PRECISION :: h, fact
+    INTEGER :: j, order
+!
+    order = sp%order   ! Polynomial degree p + 1
+!
+    h = x-sp%knots(left)
+    f = 0.0d0
+    IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
+!
+    SELECT CASE (jder)
+    CASE(0)            ! function value
+       DO j=order,1,-1
+          f = f*h + ppform(j)
+       END DO
+    CASE(1)            ! 1st derivative
+       DO j=order-1,1,-1
+          f = f*h + j*ppform(j+1)
+       END DO
+    CASE default       ! 2nd and higher derivatives
+       fact = order-jder
+       DO j=order,jder+1,-1
+          f = f/fact*j*h + ppform(j)
+          fact = fact-1.0d0
+       END DO
+       DO j=2,jder
+          f = f*j
+       END DO
+    END SELECT
+  END SUBROUTINE ppval0
+!===========================================================================
+  SUBROUTINE ppval0z(sp, x, ppform, left, jder, f)
+!
+!   Compute function and derivatives from the PP representation
+!    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x
+    DOUBLE COMPLEX, INTENT(in) :: ppform(:)
+    INTEGER, INTENT(in) :: left, jder
+    DOUBLE COMPLEX, INTENT(out) :: f
+    DOUBLE PRECISION :: h, fact
+    INTEGER :: j, order
+!
+    order = sp%order   ! Polynomial degree p + 1
+!
+    h = x-sp%knots(left)
+    f = (0.0d0,0.0d0)
+    IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
+!
+    SELECT CASE (jder)
+    CASE(0)            ! function value
+       DO j=order,1,-1
+          f = f*h + ppform(j)
+       END DO
+    CASE(1)            ! 1st derivative
+       DO j=order-1,1,-1
+          f = f*h + j*ppform(j+1)
+       END DO
+    CASE default       ! 2nd and higher derivatives
+       fact = order-jder
+       DO j=order,jder+1,-1
+          f = f/fact*j*h + ppform(j)
+          fact = fact-1.0d0
+       END DO
+       DO j=2,jder
+          f = f*j
+       END DO
+    END SELECT
+  END SUBROUTINE ppval0z
+!===========================================================================
+  SUBROUTINE ppval0z_n(sp, x, ppform, left, jder, f)
+!
+!   PPVAL0Z for many points x(:)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    DOUBLE COMPLEX, INTENT(in) :: ppform(:,:)
+    INTEGER, INTENT(in) :: left(:), jder
+    DOUBLE COMPLEX, INTENT(out) :: f(:)
+    DOUBLE PRECISION :: h(SIZE(x)), fact
+    INTEGER :: j, order
+!
+    order = sp%order   ! Polynomial degree p + 1
+!
+    h(:) = x(:)-sp%knots(left(:))
+    f(:) = 0.0d0
+    IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
+!
+    SELECT CASE (jder)
+    CASE(0)            ! function value
+       DO j=order,1,-1
+          f(:) = f(:)*h(:) + ppform(:,j)
+       END DO
+    CASE(1)            ! 1st derivative
+       DO j=order-1,1,-1
+          f(:) = f(:)*h(:) + j*ppform(:,j+1)
+       END DO
+    CASE default       ! 2nd and higher derivatives
+       fact = order-jder
+       DO j=order,jder+1,-1
+          f(:) = f(:)/fact*j*h(:) + ppform(:,j)
+          fact = fact-1.0d0
+       END DO
+       DO j=2,jder
+          f(:) = f(:)*j
+       END DO
+    END SELECT
+  END SUBROUTINE ppval0z_n
+!===========================================================================
+  SUBROUTINE ppval0_n(sp, x, ppform, left, jder, f)
+!
+!   PPVAL0 for many points x(:)
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x(:), ppform(:,:)
+    INTEGER, INTENT(in) :: left(:), jder
+    DOUBLE PRECISION, INTENT(out) :: f(:)
+    DOUBLE PRECISION :: h(SIZE(x)), fact
+    INTEGER :: j, order
+!
+    order = sp%order   ! Polynomial degree p + 1
+!
+    h(:) = x(:)-sp%knots(left(:))
+    f(:) = 0.0d0
+    IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
+!
+    SELECT CASE (jder)
+    CASE(0)            ! function value
+       DO j=order,1,-1
+          f(:) = f(:)*h(:) + ppform(:,j)
+       END DO
+    CASE(1)            ! 1st derivative
+       DO j=order-1,1,-1
+          f(:) = f(:)*h(:) + j*ppform(:,j+1)
+       END DO
+    CASE default       ! 2nd and higher derivatives
+       fact = order-jder
+       DO j=order,jder+1,-1
+          f(:) = f(:)/fact*j*h(:) + ppform(:,j)
+          fact = fact-1.0d0
+       END DO
+       DO j=2,jder
+          f(:) = f(:)*j
+       END DO
+    END SELECT
+  END SUBROUTINE ppval0_n
+!===========================================================================
+  SUBROUTINE ppval1(sp, x, ppform, left, jder, f)
+!
+!   Compute function and derivatives from the PP representation
+!    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:)
+    INTEGER, INTENT(in) :: left, jder
+    DOUBLE PRECISION, INTENT(out) :: f(:)
+    DOUBLE PRECISION :: h, fact
+    INTEGER :: j, order
+!
+    order = sp%order   ! Polynomial degree p + 1
+!
+    h = x-sp%knots(left)
+    f = 0.0d0
+    IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
+!
+    SELECT CASE (jder)
+    CASE(0)            ! function value
+       DO j=order,1,-1
+          f(:) = f(:)*h + ppform(j,:)
+       END DO
+    CASE(1)            ! 1st derivative
+       DO j=order-1,1,-1
+          f(:) = f(:)*h + j*ppform(j+1,:)
+       END DO
+    CASE default       ! 2nd and higher derivatives
+       fact = order-jder
+       DO j=order,jder+1,-1
+          f(:) = f(:)/fact*j*h + ppform(j,:)
+          fact = fact-1.0d0
+       END DO
+       DO j=2,jder
+          f(:) = f(:)*j
+       END DO
+    END SELECT
+  END SUBROUTINE ppval1
+!===========================================================================
+  SUBROUTINE ppval1z(sp, x, ppform, left, jder, f)
+!
+!   Compute function and derivatives from the PP representation
+!    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x
+    DOUBLE COMPLEX, INTENT(in) :: ppform(:,:)
+    INTEGER, INTENT(in) :: left, jder
+    DOUBLE COMPLEX, INTENT(out) :: f(:)
+    DOUBLE PRECISION :: h, fact
+    INTEGER :: j, order
+!
+    order = sp%order   ! Polynomial degree p + 1
+!
+    h = x-sp%knots(left)
+    f = (0.0d0,0.0d0)
+    IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
+!
+    SELECT CASE (jder)
+    CASE(0)            ! function value
+       DO j=order,1,-1
+          f(:) = f(:)*h + ppform(j,:)
+       END DO
+    CASE(1)            ! 1st derivative
+       DO j=order-1,1,-1
+          f(:) = f(:)*h + j*ppform(j+1,:)
+       END DO
+    CASE default       ! 2nd and higher derivatives
+       fact = order-jder
+       DO j=order,jder+1,-1
+          f(:) = f(:)/fact*j*h + ppform(j,:)
+          fact = fact-1.0d0
+       END DO
+       DO j=2,jder
+          f(:) = f(:)*j
+       END DO
+    END SELECT
+  END SUBROUTINE ppval1z
+!===========================================================================
+  SUBROUTINE ppval2(sp, x, ppform, left, jder, f)
+!
+!   Compute function and derivatives from the PP representation
+!    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x, ppform(:,:,:)
+    INTEGER, INTENT(in) :: left, jder
+    DOUBLE PRECISION, INTENT(out) :: f(:,:)
+    DOUBLE PRECISION :: h, fact
+    INTEGER :: j, order
+!
+    order = sp%order   ! Polynomial degree p + 1
+!
+    h = x-sp%knots(left)
+    f = 0.0d0
+    IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
+!
+    SELECT CASE (jder)
+    CASE(0)            ! function value
+       DO j=order,1,-1
+          f(:,:) = f(:,:)*h + ppform(j,:,:)
+       END DO
+    CASE(1)            ! 1st derivative
+       DO j=order-1,1,-1
+          f(:,:) = f(:,:)*h + j*ppform(j+1,:,:)
+       END DO
+    CASE default       ! 2nd and higher derivatives
+       fact = order-jder
+       DO j=order,jder+1,-1
+          f(:,:) = f(:,:)/fact*j*h + ppform(j,:,:)
+          fact = fact-1.0d0
+       END DO
+       DO j=2,jder
+          f(:,:) = f(:,:)*j
+       END DO
+    END SELECT
+  END SUBROUTINE ppval2
+!===========================================================================
+  SUBROUTINE ppval2z(sp, x, ppform, left, jder, f)
+!
+!   Compute function and derivatives from the PP representation
+!    f(x) = \sum_{k=0}^p ppform(k)*(x-t_{left})^k
+!
+    TYPE(spline1d), INTENT(in) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x
+    DOUBLE COMPLEX, INTENT(in) :: ppform(:,:,:)
+    INTEGER, INTENT(in) :: left, jder
+    DOUBLE COMPLEX, INTENT(out) :: f(:,:)
+    DOUBLE PRECISION :: h, fact
+    INTEGER :: j, order
+!
+    order = sp%order   ! Polynomial degree p + 1
+!
+    h = x-sp%knots(left)
+    f = (0.0d0,0.0d0)
+    IF( jder .LT. 0 .OR. jder .GE. order ) RETURN
+!
+    SELECT CASE (jder)
+    CASE(0)            ! function value
+       DO j=order,1,-1
+          f(:,:) = f(:,:)*h + ppform(j,:,:)
+       END DO
+    CASE(1)            ! 1st derivative
+       DO j=order-1,1,-1
+          f(:,:) = f(:,:)*h + j*ppform(j+1,:,:)
+       END DO
+    CASE default       ! 2nd and higher derivatives
+       fact = order-jder
+       DO j=order,jder+1,-1
+          f(:,:) = f(:,:)/fact*j*h + ppform(j,:,:)
+          fact = fact-1.0d0
+       END DO
+       DO j=2,jder
+          f(:,:) = f(:,:)*j
+       END DO
+    END SELECT
+  END SUBROUTINE ppval2z
+!===========================================================================
+  SUBROUTINE locintv0_old(sp, x, left)
+!
+!   Locate the interval containing x
+!   Should be in [0, nints-1]
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: left
+    DOUBLE PRECISION :: hinv
+    INTEGER :: nints
+!
+    nints = sp%nints
+!
+!  Case of equidistant mesh
+    IF( sp%nlequid) THEN
+       hinv = sp%hinv
+       left = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nints-1)) 
+       RETURN
+    END IF
+!
+!  Non-equistant mesh
+    left = sp%left
+    DO
+       IF( left .EQ. nints ) THEN 
+          left = nints-1
+          EXIT
+       END IF
+       IF( left .LT. 0 ) THEN 
+          left = 0
+          EXIT
+       END IF
+       IF( x .LT. sp%knots(left+1) ) THEN
+          IF( x .GE. sp%knots(left) ) THEN
+             EXIT
+          ELSE
+             left = left-1
+          END IF
+       ELSE
+          left = left+1
+       END IF
+    END DO
+    IF(left .GT. 0 .AND. left .LT. nints) THEN
+       sp%left = left
+    END IF
+  END SUBROUTINE locintv0_old
+!
+!===========================================================================
+  SUBROUTINE locintv1_old(sp, x, left)
+!
+!   Locate the intervals left(:) containing x(:)
+!   Should be in [0, nints-1]
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    INTEGER, INTENT(out) :: left(:)
+    DOUBLE PRECISION :: hinv
+    INTEGER :: nints, i
+!
+!  Case of equidistant mesh
+    nints = sp%nints
+    IF( sp%nlequid) THEN
+       hinv = sp%hinv
+       left(:) = MAX(0,MIN(FLOOR((x(:)-sp%knots(0))*hinv),nints-1))
+       RETURN
+    END IF
+!
+!  Non-equistant mesh
+    DO i=1,SIZE(x)
+       CALL locintv0_old(sp, x(i), left(i))
+    END DO
+  END SUBROUTINE locintv1_old
+!
+!===========================================================================
+  SUBROUTINE locintv0(sp, x, left)
+!
+!   Locate the interval containing x
+!   Should be in [0, nints-1]
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x
+    INTEGER, INTENT(out) :: left
+    DOUBLE PRECISION :: hinv
+    INTEGER :: l, nf, nints
+!
+    nints = sp%nints
+!
+!  Case of equidistant mesh
+    IF( sp%nlequid) THEN
+       hinv = sp%hinv
+       left = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nints-1)) 
+       RETURN
+    END IF
+!
+!  Non-equistant mesh
+    hinv = sp%hinv
+    nf = SIZE(sp%fmap) - 1
+    l = MAX(0,MIN(FLOOR((x-sp%knots(0))*hinv),nf-1))   ! left on fine mesh
+    left = sp%fmap(l)
+    IF( x.GE.sp%knots(left+1) ) left = MIN(left+1,nints-1)
+  END SUBROUTINE locintv0
+!
+!===========================================================================
+  SUBROUTINE locintv1(sp, x, left)
+!
+!   Locate the intervals left(:) containing x(:)
+!   Should be in [0, nints-1]
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: x(:)
+    INTEGER, INTENT(out) :: left(:)
+    INTEGER :: l(SIZE(x))
+    DOUBLE PRECISION :: hinv
+    INTEGER :: npt, nf, nints, i
+!
+    npt = SIZE(x)
+!
+!  Case of equidistant mesh
+    nints = sp%nints
+    IF( sp%nlequid) THEN
+       hinv = sp%hinv
+       left(1:npt) = MAX(0,MIN(FLOOR((x(1:npt)-sp%knots(0))*hinv),nints-1))
+       RETURN
+    END IF
+!
+!  Non-equistant mesh
+    hinv = sp%hinv
+    nf = SIZE(sp%fmap) - 1
+    l(:) = MAX(0,MIN(FLOOR((x(:)-sp%knots(0))*hinv),nf-1))   ! left on fine mesh
+    left(1:npt) = sp%fmap(l(1:npt))
+    WHERE( x.GE.sp%knots(left+1) ) left = MIN(left+1,nints-1)
+  END SUBROUTINE locintv1
+!
+!===========================================================================
+  SUBROUTINE destroy_sp1d(sp)
+!
+!  Clean up 1d spline object
+!
+    TYPE(spline1d) :: sp
+!
+    IF( ASSOCIATED(sp%knots) ) DEALLOCATE (sp%knots)
+    IF( ASSOCIATED(sp%val0) ) DEALLOCATE (sp%val0)
+    IF( ASSOCIATED(sp%valc) ) DEALLOCATE (sp%valc)
+    IF( ASSOCIATED(sp%gausx) ) DEALLOCATE (sp%gausx)
+    IF( ASSOCIATED(sp%gausw) ) DEALLOCATE (sp%gausw)
+    IF( ASSOCIATED(sp%intspl) ) DEALLOCATE (sp%intspl)
+    IF( ASSOCIATED(sp%ppform) ) DEALLOCATE (sp%ppform)
+    IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE (sp%ppformz)
+    IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
+    IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
+    IF( ASSOCIATED(sp%fmap) ) DEALLOCATE(sp%fmap)
+!
+    CALL destroy(sp%mat)
+    CALL destroy(sp%matp)
+    CALL destroy_dftmap(sp%dft)
+  END SUBROUTINE destroy_sp1d
+!
+!===========================================================================
+  SUBROUTINE destroy_dftmap(m)
+!
+!   Clean up DFTMAP
+!
+    TYPE(dftmap) :: m
+!
+    IF(ASSOCIATED(m%coefs)) DEALLOCATE(m%coefs)
+  END SUBROUTINE destroy_dftmap
+!===========================================================================
+  SUBROUTINE destroy_sp2d(sp)
+!
+!  Clean up 2d spline object
+!
+    TYPE(spline2d) :: sp
+!
+    IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
+    IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz)
+    IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
+    IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
+    CALL destroy_sp1d(sp%sp1)
+    CALL destroy_sp1d(sp%sp2)
+  END SUBROUTINE destroy_sp2d
+!===========================================================================
+  SUBROUTINE destroy_sp2d1d(sp)
+!
+!  Clean up 2d1d spline object
+!
+    TYPE(spline2d1d) :: sp
+!
+    IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
+    IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE (sp%ppformz)
+    IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
+    IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
+    CALL destroy_sp2d(sp%sp12)
+    CALL destroy_sp1d(sp%sp3)
+  END SUBROUTINE destroy_sp2d1d
+!
+!===========================================================================
+  SUBROUTINE calc_integ0(sp, finteg)
+!
+!   Compute integral of splines
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(out) :: finteg(0:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:), fun(:,:)
+    DOUBLE PRECISION :: x1, x2
+    INTEGER :: dim, nx, nidbas, ng, i, ig, j, jj, left
+!
+    CALL get_dim(sp, dim, nx, nidbas)
+    ng = MAX(2, (nidbas+2)/2)
+    ALLOCATE(xg(ng), wg(ng), fun(0:nidbas,1))
+    fun = 0.0d0
+    finteg = 0.0d0
+    DO i=1,nx             ! Loop thru the intervals
+       left = i
+       x1 = sp%knots(i-1)
+       x2 = sp%knots(i)
+       CALL gauleg(x1, x2, xg, wg, ng)
+       DO ig=1,ng         ! Loop thru Gauss points
+          CALL basfun(xg(ig), sp, fun, i)
+          left = i-1
+          DO j=0,nidbas   ! Loop thru the splines [left:left+nidbas]
+             jj = left+j  ! in this interval
+             IF( sp%period ) jj = MODULO(left+j, nx)
+             finteg(jj) = finteg(jj) + wg(ig)*fun(j,1)
+          END DO
+       END DO
+    END DO
+    DEALLOCATE(xg, wg, fun)
+  END SUBROUTINE calc_integ0
+!===========================================================================
+  SUBROUTINE calc_integn(sp, finteg)
+!
+!   Compute integrals = Int( x^a \Lambda_j(x) ), a=0,1,... n
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(out) :: finteg(0:,0:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:), fun(:,:)
+    DOUBLE PRECISION :: x1, x2, xpow
+    INTEGER :: dim, nx, nidbas, ng, i, ig, j, k, jj, left
+    INTEGER :: nord
+!
+    nord = SIZE(finteg,2)-1
+    CALL get_dim(sp, dim, nx, nidbas)
+    ng = MAX(2, (nidbas+nord+2)/2)
+    ALLOCATE(xg(ng), wg(ng), fun(0:nidbas,1))
+    fun = 0.0d0
+    finteg = 0.0d0
+    DO i=1,nx             ! Loop thru the intervals
+       left = i
+       x1 = sp%knots(i-1)
+       x2 = sp%knots(i)
+       CALL gauleg(x1, x2, xg, wg, ng)
+       DO ig=1,ng         ! Loop thru Gauss points
+          CALL basfun(xg(ig), sp, fun, i)
+          left=i-1
+          DO j=0,nidbas   ! Loop thru the splines [left:left+nidbas]
+             jj = left+j  ! in this interval
+             IF( sp%period ) jj = MODULO(left+j, nx)
+             xpow = wg(ig)*fun(j,1)
+             DO k=0,nord
+                finteg(jj,k) = finteg(jj,k) + xpow
+                xpow = xpow*xg(ig)
+             END DO
+          END DO
+       END DO
+    END DO
+    DEALLOCATE(xg, wg, fun)
+  END SUBROUTINE calc_integn
+!===========================================================================
+!
+  DOUBLE PRECISION FUNCTION fintg1(sp, c)
+!
+!   Integral of 1d function from its spline coefs c.
+!
+    TYPE(spline1d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: c(0:)
+    INTEGER :: dim
+    dim = sp%dim
+    fintg1 = DOT_PRODUCT(sp%intspl(0:dim-1), c(0:dim-1))
+  END FUNCTION fintg1
+!===========================================================================
+  DOUBLE PRECISION FUNCTION fintg2(sp, c)
+!
+!   Integral of 2d function from its spline coefs c.
+!
+    TYPE(spline2d) :: sp
+    DOUBLE PRECISION, INTENT(in) :: c(0:,0:)
+    INTEGER :: dim1, dim2, i, j
+    dim1 = sp%sp1%dim
+    dim2 = sp%sp2%dim
+    fintg2 = 0.0d0
+    DO j=0,dim2-1
+       DO i=0,dim1-1
+          fintg2 = fintg2 + c(i,j)*sp%sp1%intspl(i)*sp%sp2%intspl(j)
+       END DO
+    END DO
+  END FUNCTION fintg2
+!===========================================================================
+  SUBROUTINE gridval2d_2d(sp, xp, yp, fp, jder, c, ppform)
+!
+!  Compute values or jder-th dervivative of f(x,y) from ppform 
+!  of spline sp. Recompute the ppform if the optional spline 
+!  coefficients c are given.
+!
+!              F(I,J) = F(X(I), Y(J))
+!
+    TYPE(spline2d), INTENT(inout) :: sp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(out) :: fp
+    INTEGER, INTENT(in) :: jder(2)
+    DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
+    DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform
+!
+    INTEGER :: d1, d2, k1, k2, n1, n2
+    DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:)
+    DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
+    DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
+    INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
+    INTEGER :: i, j, k, ii, jj
+    LOGICAL :: nlppform
+!
+    d1 = sp%sp1%dim
+    d2 = sp%sp2%dim
+    k1 = sp%sp1%order
+    k2 = sp%sp2%order
+    n1 = sp%sp1%nints
+    n2 = sp%sp2%nints
+    nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
+!
+!   Compute PPFORMM/BCOEFS if spline coefs are passed
+!
+    IF( PRESENT(c) ) THEN
+       IF( nlppform ) THEN
+          ALLOCATE(work(d2,k1,n1))
+          CALL topp1(sp%sp1, c , work)
+          IF(PRESENT(ppform)) THEN
+             CALL topp2(sp%sp2, work, ppform)
+          ELSE
+             IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
+             ALLOCATE(sp%ppform(k1,n1,k2,n2))
+             CALL topp2(sp%sp2, work, sp%ppform)
+          END IF
+          DEALLOCATE(work)
+       ELSE
+          IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
+          ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2)))
+          sp%bcoefs = c
+       END IF
+    END IF
+!
+!   Applly periodicity if required
+!
+    IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
+       x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
+    ELSE
+       x = xp
+    END IF
+    IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
+       y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
+    ELSE
+       y = yp
+    END IF
+!
+!   Locate interval containing (x,y)
+!
+    CALL locintv(sp%sp1, x, leftx)
+    CALL locintv(sp%sp2, y, lefty)
+!
+!  Compute function/derivatives
+!
+    IF( nlppform ) THEN   ! using PP form
+       ALLOCATE(temp(k2))
+       DO j=1,SIZE(y)
+          DO i=1,SIZE(x)
+             IF(PRESENT(ppform)) THEN
+                CALL ppval(sp%sp1, x(i), ppform(:,leftx(i)+1,:,lefty(j)+1),&
+                     &     leftx(i), jder(1), temp)
+             ELSE
+                CALL ppval(sp%sp1, x(i), sp%ppform(:,leftx(i)+1,:,lefty(j)+1),&
+                     &     leftx(i), jder(1), temp)
+             END IF
+             CALL ppval(sp%sp2, y(j), temp, lefty(j), jder(2), fp(i,j))
+          END DO
+       END DO
+       DEALLOCATE(temp)
+    ELSE                   ! using spline expansion
+       ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
+       fp = 0.0d0
+       DO j=1,SIZE(y)
+          CALL basfun(y(j), sp%sp2, funy, lefty(j)+1)
+          DO i=1,SIZE(x)
+             CALL basfun(x(i), sp%sp1, funx, leftx(i)+1)
+             DO jj=1,k2
+                DO ii=1,k1
+                   fp(i,j) = fp(i,j) + sp%bcoefs(leftx(i)+ii,lefty(j)+jj) * &
+                        &   funx(ii,jder(1))*funy(jj,jder(2))
+                END DO
+             END DO
+          END DO
+       END DO
+       DEALLOCATE(funx, funy)
+    END IF
+  END SUBROUTINE gridval2d_2d
+!===========================================================================
+  SUBROUTINE gridval2d1d_3d(sp, xp, yp, zp, fp, jder, c, ppform)
+!
+!  Compute values or jder-th dervivative of f(x,y,z) from spline
+!  coefficients (nlppform=.false.) or ppform (nlppform=.true.)
+!
+!              F(I,J,K) = F(X(I), Y(J), Z(K))
+!
+    TYPE(spline2d1d), TARGET :: sp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp, zp
+    DOUBLE PRECISION, DIMENSION(:,:,:), INTENT(out) :: fp
+    INTEGER, INTENT(in) :: jder(3)
+    DOUBLE PRECISION, DIMENSION(:,:,:), OPTIONAL, INTENT(in) :: c
+    DOUBLE PRECISION, DIMENSION(:,:,:,:,:,:), OPTIONAL :: ppform
+!
+    TYPE(spline2d), POINTER :: sp2
+    DOUBLE PRECISION, ALLOCATABLE :: work1(:,:,:,:), work2(:,:,:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: temp1(:,:), temp2(:)
+    DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:), funz(:,:)
+    DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)), z(SIZE(zp))
+    INTEGER          :: leftx(SIZE(xp)), lefty(SIZE(yp)), leftz(SIZE(zp))
+    INTEGER :: d1, d2, d3, k1, k2, k3, n1, n2, n3
+    INTEGER :: ipx, ipy, ipz, k, ii, jj, kk
+    LOGICAL :: nlppform
+!--------------------------------------------------------------------------------
+!                               1. Prologue
+!
+    sp2 => sp%sp12
+    d1 = sp2%sp1%dim
+    d2 = sp2%sp2%dim
+    d3 = sp%sp3%dim
+    k1 = sp2%sp1%order
+    k2 = sp2%sp2%order
+    k3 = sp%sp3%order
+    n1 = sp2%sp1%nints
+    n2 = sp2%sp2%nints
+    n3 = sp%sp3%nints
+    nlppform = sp2%sp1%nlppform .OR. sp2%sp2%nlppform .OR. sp%sp3%nlppform
+!
+!   Applly periodicity if required
+    IF( sp2%sp1%period ) THEN
+       x = sp2%sp1%knots(0) + MODULO(xp-sp2%sp1%knots(0), sp2%sp1%lperiod)
+    ELSE
+       x = xp
+    END IF
+    IF( sp2%sp2%period ) THEN
+       y = sp2%sp2%knots(0) + MODULO(yp-sp2%sp2%knots(0), sp2%sp2%lperiod)
+    ELSE
+       y = yp
+    END IF
+    IF( sp%sp3%period ) THEN
+       z = sp%sp3%knots(0) + MODULO(zp-sp%sp3%knots(0), sp%sp3%lperiod)
+    ELSE
+       z = zp
+    END IF
+!
+!   Locate interval containing (x,y,z)
+    CALL locintv(sp2%sp1, x, leftx)
+    CALL locintv(sp2%sp2, y, lefty)
+    CALL locintv(sp%sp3,  z, leftz)
+!--------------------------------------------------------------------------------
+!                               2. Using PPFORM
+!
+    IF( nlppform ) THEN
+!
+!   Compute PPFORM from BCOEF
+       IF( PRESENT(c) ) THEN
+          ALLOCATE(work2(d3,k1,n1,k2,n2))
+          ALLOCATE(work1(d2,d3,k1,n1))
+          CALL topp2(sp2%sp1, c, work1)
+          CALL topp3(sp2%sp2, work1, work2)
+          DEALLOCATE(work1)
+          IF( PRESENT(ppform) )THEN
+             CALL topp4(sp%sp3, work2, ppform)
+          ELSE
+             IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
+             ALLOCATE(sp%ppform(k1,n1,k2,n2,k3,n3))
+             CALL topp4(sp%sp3, work2, sp%ppform)
+          END IF
+          DEALLOCATE(work2)
+       END IF
+!
+!   Compute function/derivatives
+       ALLOCATE(temp1(k2,k3))
+       ALLOCATE(temp2(k3))
+       DO ipz=1,SIZE(z)
+          DO ipy=1,SIZE(y)
+             DO ipx=1,SIZE(x)
+                IF(PRESENT(ppform)) THEN
+                   CALL ppval(sp2%sp1, x(ipx), &
+                        &  ppform(:,leftx(ipx)+1,:,lefty(ipy)+1,:,leftz(ipz)+1),&
+                        &  leftx(ipx), jder(1), temp1)
+                ELSE
+                   CALL ppval(sp2%sp1, x(ipx), &
+                        &  sp%ppform(:,leftx(ipx)+1,:,lefty(ipy)+1,:,leftz(ipz)+1),&
+                        &  leftx(ipx), jder(1), temp1)
+                END IF
+                CALL ppval(sp2%sp2, y(ipy), temp1, lefty(ipy), jder(2), &
+                     &     temp2)
+                CALL ppval(sp%sp3, z(ipz), temp2, leftz(ipz), jder(3), &
+                     &     fp(ipx,ipy,ipz))
+             END DO
+          END DO
+       END DO
+       DEALLOCATE(temp1)
+       DEALLOCATE(temp2)
+!--------------------------------------------------------------------------------
+!                               3. Using spline expansion
+!
+    ELSE
+       IF( PRESENT(c) ) THEN
+          IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
+          ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2),SIZE(c,3)))
+          sp%bcoefs = c
+       END IF
+!
+!   Compute function/derivatives
+       ALLOCATE(funx(1:k1,0:jder(1)))
+       ALLOCATE(funy(1:k2,0:jder(2)))
+       ALLOCATE(funz(1:k3,0:jder(3)))
+       fp = 0.0d0
+       DO ipz=1,SIZE(z)
+          CALL basfun(z(ipz), sp%sp3, funz, leftz(ipz)+1)
+          DO ipy=1,SIZE(y)
+             CALL basfun(y(ipy), sp2%sp2, funy, lefty(ipy)+1)
+             DO ipx=1,SIZE(x)
+                CALL basfun(x(ipx), sp2%sp1, funx, leftx(ipx)+1)
+                DO kk=1,k3
+                   DO jj=1,k2
+                      DO ii=1,k1
+                         fp(ipx,ipy,ipz) = fp(ipx,ipy,ipz) + &
+                           &  sp%bcoefs(leftx(ipx)+ii,lefty(ipy)+jj,leftz(ipz)+kk) * &
+                           &  funx(ii,jder(1)) * funy(jj,jder(2)) * funz(kk,jder(3))
+                      END DO
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+       DEALLOCATE(funx, funy, funz)
+    END IF
+  END SUBROUTINE gridval2d1d_3d
+!===========================================================================
+  SUBROUTINE gridval2d1d_1d(sp, xp, yp, zp, fp, jder, c, ppform)
+!
+!  Compute values or jder-th dervivative of f(x,y,z) from spline
+!  coefficients (nlppform=.false.) or ppform (nlppform=.true.)
+!
+!              F(I) = F(X(I),Y(I),Z(I))
+!
+    TYPE(spline2d1d), TARGET :: sp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp, zp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: fp
+    INTEGER, INTENT(in) :: jder(3)
+    DOUBLE PRECISION, DIMENSION(:,:,:), OPTIONAL, INTENT(in) :: c
+    DOUBLE PRECISION, DIMENSION(:,:,:,:,:,:), OPTIONAL :: ppform
+!
+    TYPE(spline2d), POINTER :: sp2
+    DOUBLE PRECISION, ALLOCATABLE :: work1(:,:,:,:), work2(:,:,:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: temp1(:,:), temp2(:)
+    DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:), funz(:,:)
+    DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp)), z(SIZE(zp))
+    INTEGER          :: leftx(SIZE(xp)), lefty(SIZE(yp)), leftz(SIZE(zp))
+    INTEGER :: d1, d2, d3, k1, k2, k3, n1, n2, n3
+    INTEGER :: np, ip, ii, jj, kk
+    LOGICAL :: nlppform
+!--------------------------------------------------------------------------------
+!                               1. Prologue
+!
+    sp2 => sp%sp12
+    d1 = sp2%sp1%dim
+    d2 = sp2%sp2%dim
+    d3 = sp%sp3%dim
+    k1 = sp2%sp1%order
+    k2 = sp2%sp2%order
+    k3 = sp%sp3%order
+    n1 = sp2%sp1%nints
+    n2 = sp2%sp2%nints
+    n3 = sp%sp3%nints
+    np = SIZE(xp)
+    nlppform = sp2%sp1%nlppform .OR. sp2%sp2%nlppform .OR. sp%sp3%nlppform
+!
+!   Applly periodicity if required
+    IF( sp2%sp1%period ) THEN
+       x = sp2%sp1%knots(0) + MODULO(xp-sp2%sp1%knots(0), sp2%sp1%lperiod)
+    ELSE
+       x = xp
+    END IF
+    IF( sp2%sp2%period ) THEN
+       y = sp2%sp2%knots(0) + MODULO(yp-sp2%sp2%knots(0), sp2%sp2%lperiod)
+    ELSE
+       y = yp
+    END IF
+    IF( sp%sp3%period ) THEN
+       z = sp%sp3%knots(0) + MODULO(zp-sp%sp3%knots(0), sp%sp3%lperiod)
+    ELSE
+       z = zp
+    END IF
+!
+!   Locate interval containing (x,y,z)
+    CALL locintv(sp2%sp1, x, leftx)
+    CALL locintv(sp2%sp2, y, lefty)
+    CALL locintv(sp%sp3,  z, leftz)
+!--------------------------------------------------------------------------------
+!                               2. Using PPFORM
+!
+    IF( nlppform ) THEN
+!
+!   Compute PPFORM from BCOEF
+       IF( PRESENT(c) ) THEN
+          ALLOCATE(work2(d3,k1,n1,k2,n2))
+          ALLOCATE(work1(d2,d3,k1,n1))
+          CALL topp2(sp2%sp1, c, work1)
+          CALL topp3(sp2%sp2, work1, work2)
+          DEALLOCATE(work1)
+          IF( PRESENT(ppform) )THEN
+             CALL topp4(sp%sp3, work2, ppform)
+          ELSE
+             IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
+             ALLOCATE(sp%ppform(k1,n1,k2,n2,k3,n3))
+             CALL topp4(sp%sp3, work2, sp%ppform)
+          END IF
+          DEALLOCATE(work2)
+       END IF
+!
+!   Compute function/derivatives
+       ALLOCATE(temp1(k2,k3))
+       ALLOCATE(temp2(k3))
+       DO ip=1,np
+          IF(PRESENT(ppform)) THEN
+             CALL ppval(sp2%sp1, x(ip), &
+                  &  ppform(:,leftx(ip)+1,:,lefty(ip)+1,:,leftz(ip)+1),&
+                  &  leftx(ip), jder(1), temp1)
+          ELSE
+             CALL ppval(sp2%sp1, x(ip), &
+                  &  sp%ppform(:,leftx(ip)+1,:,lefty(ip)+1,:,leftz(ip)+1),&
+                  &  leftx(ip), jder(1), temp1)
+          END IF
+          CALL ppval(sp2%sp2, y(ip), temp1, lefty(ip), jder(2), temp2)
+          CALL ppval(sp%sp3, z(ip), temp2, leftz(ip), jder(3), fp(ip))
+       END DO
+       DEALLOCATE(temp1)
+       DEALLOCATE(temp2)
+!--------------------------------------------------------------------------------
+!                               3. Using spline expansion
+!
+    ELSE
+       IF( PRESENT(c) ) THEN
+          IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
+          ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2),SIZE(c,3)))
+          sp%bcoefs = c
+       END IF
+!
+!   Compute function/derivatives
+       ALLOCATE(funx(1:k1,0:jder(1)))
+       ALLOCATE(funy(1:k2,0:jder(2)))
+       ALLOCATE(funz(1:k3,0:jder(3)))
+       fp = 0.0d0
+       DO ip=1,np
+          CALL basfun(x(ip), sp2%sp1, funx, leftx(ip)+1)
+          CALL basfun(y(ip), sp2%sp2, funy, lefty(ip)+1)
+          CALL basfun(z(ip), sp%sp3,  funz, leftz(ip)+1)
+          DO kk=1,k3
+             DO jj=1,k2
+                DO ii=1,k1
+                   fp(ip) = fp(ip) + &
+                           &  sp%bcoefs(leftx(ip)+ii,lefty(ip)+jj,leftz(ip)+kk) * &
+                           &  funx(ii,jder(1))*funy(jj,jder(2))*funz(kk,jder(3))
+                END DO
+             END DO
+          END DO
+       END DO
+       DEALLOCATE(funx, funy, funz)
+    END IF
+  END SUBROUTINE gridval2d1d_1d
+!===========================================================================
+  SUBROUTINE gridval2d(sp, xp, yp, fp, jder, c, ppform)
+!
+!  Compute values or jder-th dervivative of f(x,y) from ppform 
+!  of spline sp. Recompute the ppform if the optional spline 
+!  coefficients c are given.
+!
+!              F = F(X, Y)
+!
+    TYPE(spline2d), INTENT(inout) :: sp
+    DOUBLE PRECISION, INTENT(in) :: xp, yp
+    DOUBLE PRECISION, INTENT(out) :: fp
+    INTEGER, INTENT(in) :: jder(2)
+    DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
+    DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform
+!
+    INTEGER :: d1, d2, k1, k2, n1, n2
+    DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:)
+    DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
+    DOUBLE PRECISION :: x, y
+    INTEGER :: leftx, lefty
+    INTEGER :: i, j, k, ii, jj
+    LOGICAL :: nlppform
+!
+    d1 = sp%sp1%dim
+    d2 = sp%sp2%dim
+    k1 = sp%sp1%order
+    k2 = sp%sp2%order
+    n1 = sp%sp1%nints
+    n2 = sp%sp2%nints
+    nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
+!
+!   Compute PPFORM/BCOEFS if spline coefs are passed
+!
+    IF( PRESENT(c)) THEN
+       IF( nlppform ) THEN
+          ALLOCATE(work(d2,k1,n1))
+          CALL topp1(sp%sp1, c , work)
+          IF(PRESENT(ppform)) THEN
+             CALL topp2(sp%sp2, work, ppform)
+          ELSE
+             IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
+             ALLOCATE(sp%ppform(k1,n1,k2,n2))
+             CALL topp2(sp%sp2, work, sp%ppform)
+          END IF
+          DEALLOCATE(work)
+       ELSE
+          IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
+          ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2)))
+          sp%bcoefs = c
+       END IF
+    END IF
+!
+!   Applly periodicity if required
+!
+    IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
+       x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
+    ELSE
+       x = xp
+    END IF
+    IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
+       y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
+    ELSE
+       y = yp
+    END IF
+!
+!  Locate the interval containing x, y
+!
+    CALL locintv(sp%sp1, x, leftx)
+    CALL locintv(sp%sp2, y, lefty)
+!
+!  Compute function/derivatives
+!
+    IF( nlppform ) THEN   ! using PP form
+       ALLOCATE(temp(k2))
+       IF(PRESENT(ppform)) THEN
+          CALL ppval(sp%sp1, x, ppform(:,leftx+1,:,lefty+1),&
+               &  leftx, jder(1), temp)
+       ELSE
+          CALL ppval(sp%sp1, x, sp%ppform(:,leftx+1,:,lefty+1),&
+               &  leftx, jder(1), temp)
+       END IF
+       CALL ppval(sp%sp2, y, temp, lefty, jder(2), fp)
+       DEALLOCATE(temp)
+    ELSE                   ! using spline expansion
+       ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
+       fp = 0.0d0
+       CALL basfun(x, sp%sp1, funx, leftx+1)
+       CALL basfun(y, sp%sp2, funy, lefty+1)
+       DO jj=1,k2
+          DO ii=1,k1
+             fp = fp + &
+                  &   funy(jj,jder(2))*sp%bcoefs(leftx+ii,lefty+jj)* &
+                  &   funx(ii,jder(1))
+          END DO
+       END DO
+       DEALLOCATE(funx, funy)
+    END IF
+  END SUBROUTINE gridval2d
+!===========================================================================
+  SUBROUTINE gridval2d_1d(sp, xp, yp, fp, jder, c, ppform)
+!
+!  Compute values or jder-th dervivative of f(x,y) from ppform 
+!  of spline sp. Recompute the ppform if the optional spline 
+!  coefficients c are given.
+!
+!              F(I) = F(X(I), Y(I))
+!
+    TYPE(spline2d), INTENT(inout) :: sp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: fp
+    INTEGER, INTENT(in) :: jder(2)
+    DOUBLE PRECISION, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
+    DOUBLE PRECISION, DIMENSION(:,:,:,:), OPTIONAL :: ppform
+!
+    INTEGER :: d1, d2, k1, k2, n1, n2, np
+    DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:), temp(:)
+    DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
+    DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
+    INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
+    INTEGER :: i, j, k, ii, jj
+    LOGICAL :: nlppform
+!
+    d1 = sp%sp1%dim
+    d2 = sp%sp2%dim
+    k1 = sp%sp1%order
+    k2 = sp%sp2%order
+    n1 = sp%sp1%nints
+    n2 = sp%sp2%nints
+    nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
+!
+!   Compute PPFORM/BCOEFS if spline coefs are passed
+!
+    IF( PRESENT(c)) THEN
+       IF( nlppform ) THEN
+          ALLOCATE(work(d2,k1,n1))
+          CALL topp1(sp%sp1, c , work)
+          IF(PRESENT(ppform)) THEN
+             CALL topp2(sp%sp2, work, ppform)
+          ELSE
+             IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
+             ALLOCATE(sp%ppform(k1,n1,k2,n2))
+             CALL topp2(sp%sp2, work, sp%ppform)
+          END IF
+          DEALLOCATE(work)
+       ELSE
+          IF( ASSOCIATED(sp%bcoefs) ) DEALLOCATE(sp%bcoefs)
+          ALLOCATE(sp%bcoefs(SIZE(c,1),SIZE(c,2)))
+          sp%bcoefs = c
+       END IF
+    END IF
+!
+!   Applly periodicity if required
+!
+    np = SIZE(xp)
+    IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
+       x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
+    ELSE
+       x = xp
+    END IF
+    IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
+       y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
+    ELSE
+       y = yp
+    END IF
+!
+!  Locate the interval containing x, y
+!
+    CALL locintv(sp%sp1, x, leftx)
+    CALL locintv(sp%sp2, y, lefty)
+!
+!  Compute function/derivatives
+!
+    IF( nlppform ) THEN   ! using PP form
+       ALLOCATE(temp(k2))
+       DO i=1,np
+          IF(PRESENT(ppform)) THEN
+             CALL ppval(sp%sp1, x(i), ppform(:,leftx(i)+1,:,lefty(i)+1),&
+                  &  leftx(i), jder(1), temp)
+          ELSE
+             CALL ppval(sp%sp1, x(i), sp%ppform(:,leftx(i)+1,:,lefty(i)+1),&
+                  &  leftx(i), jder(1), temp)
+          END IF
+          CALL ppval(sp%sp2, y(i), temp, lefty(i), jder(2), fp(i))
+       END DO
+       DEALLOCATE(temp)
+    ELSE                   ! using spline expansion
+       ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
+       fp = 0.0d0
+       DO i=1,np
+          CALL basfun(x(i), sp%sp1, funx, leftx(i)+1)
+          CALL basfun(y(i), sp%sp2, funy, lefty(i)+1)
+          DO jj=1,k2
+             DO ii=1,k1
+                fp(i) = fp(i) + &
+                   &   funy(jj,jder(2))*sp%bcoefs(leftx(i)+ii,lefty(i)+jj)* &
+                   &   funx(ii,jder(1))
+             END DO
+          END DO
+       END DO
+       DEALLOCATE(funx, funy)
+    END IF
+  END SUBROUTINE gridval2d_1d
+!===========================================================================
+  SUBROUTINE gridval2dz(sp, xp, yp, fp, jder, c, ppformz)
+!
+!  Compute values or jder-th dervivative of f(x,y) from ppform 
+!  of spline sp. Recompute the ppform if the optional spline 
+!  coefficients c are given.
+!
+!              F = F(X, Y)
+!
+    TYPE(spline2d), INTENT(inout) :: sp
+    DOUBLE PRECISION, INTENT(in) :: xp, yp
+    DOUBLE COMPLEX, INTENT(out) :: fp
+    INTEGER, INTENT(in) :: jder(2)
+    DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
+    DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz
+!
+    INTEGER :: d1, d2, k1, k2, n1, n2
+    DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:)
+    DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
+    DOUBLE PRECISION :: x, y
+    INTEGER :: leftx, lefty
+    INTEGER :: i, j, k, ii, jj
+    LOGICAL :: nlppform
+!
+    d1 = sp%sp1%dim
+    d2 = sp%sp2%dim
+    k1 = sp%sp1%order
+    k2 = sp%sp2%order
+    n1 = sp%sp1%nints
+    n2 = sp%sp2%nints
+    nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
+!
+!   Compute PPFORM/BCOEFS if spline coefs are passed
+!
+    IF( PRESENT(c)) THEN
+       IF( nlppform ) THEN
+          ALLOCATE(work(d2,k1,n1))
+          CALL topp1z(sp%sp1, c , work)
+          IF(PRESENT(ppformz)) THEN
+             CALL topp2z(sp%sp2, work, ppformz)
+          ELSE
+             IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz)
+             ALLOCATE(sp%ppformz(k1,n1,k2,n2))
+             CALL topp2z(sp%sp2, work, sp%ppformz)
+          END IF
+          DEALLOCATE(work)
+       ELSE
+          IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
+          ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2)))
+          sp%bcoefsc = c
+       END IF
+    END IF
+!
+!   Applly periodicity if required
+!
+    IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
+       x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
+    ELSE
+       x = xp
+    END IF
+    IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
+       y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
+    ELSE
+       y = yp
+    END IF
+!
+!  Locate the interval containing x, y
+!
+    CALL locintv(sp%sp1, x, leftx)
+    CALL locintv(sp%sp2, y, lefty)
+!
+!  Compute function/derivatives
+!
+    IF( nlppform ) THEN   ! using PP form
+       ALLOCATE(temp(k2))
+       IF(PRESENT(ppformz)) THEN
+          CALL ppval(sp%sp1, x, ppformz(:,leftx+1,:,lefty+1),&
+               &  leftx, jder(1), temp)
+       ELSE
+          CALL ppval(sp%sp1, x, sp%ppformz(:,leftx+1,:,lefty+1),&
+               &  leftx, jder(1), temp)
+       END IF
+       CALL ppval(sp%sp2, y, temp, lefty, jder(2), fp)
+       DEALLOCATE(temp)
+    ELSE                   ! using spline expansion
+       ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
+       fp = (0.0d0,0.0d0)
+       CALL basfun(x, sp%sp1, funx, leftx+1)
+       CALL basfun(y, sp%sp2, funy, lefty+1)
+       DO jj=1,k2
+          DO ii=1,k1
+             fp = fp + &
+                  &   funy(jj,jder(2))*sp%bcoefsc(leftx+ii,lefty+jj)* &
+                  &   funx(ii,jder(1))
+          END DO
+       END DO
+       DEALLOCATE(funx, funy)
+    END IF
+  END SUBROUTINE gridval2dz
+!===========================================================================
+  SUBROUTINE gridval2d_1dz(sp, xp, yp, fp, jder, c, ppformz)
+!
+!  Compute values or jder-th dervivative of f(x,y) from ppform 
+!  of spline sp. Recompute the ppform if the optional spline 
+!  coefficients c are given.
+!
+!              F(I) = F(X(I), Y(I))
+!
+    TYPE(spline2d), INTENT(inout) :: sp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: fp
+    INTEGER, INTENT(in) :: jder(2)
+    DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
+    DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz
+!
+    INTEGER :: d1, d2, k1, k2, n1, n2, np
+    DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:)
+    DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
+    DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
+    INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
+    INTEGER :: i, j, k, ii, jj
+    LOGICAL :: nlppform
+!
+    d1 = sp%sp1%dim
+    d2 = sp%sp2%dim
+    k1 = sp%sp1%order
+    k2 = sp%sp2%order
+    n1 = sp%sp1%nints
+    n2 = sp%sp2%nints
+    nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
+!
+!   Compute PPFORM/BCOEFS if spline coefs are passed
+!
+    IF( PRESENT(c)) THEN
+       IF( nlppform ) THEN
+          ALLOCATE(work(d2,k1,n1))
+          CALL topp1z(sp%sp1, c , work)
+          IF(PRESENT(ppformz)) THEN
+             CALL topp2z(sp%sp2, work, ppformz)
+          ELSE
+             IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz)
+             ALLOCATE(sp%ppformz(k1,n1,k2,n2))
+             CALL topp2z(sp%sp2, work, sp%ppformz)
+          END IF
+          DEALLOCATE(work)
+       ELSE
+          IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
+          ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2)))
+          sp%bcoefsc = c
+       END IF
+    END IF
+!
+!   Applly periodicity if required
+!
+    np = SIZE(xp)
+    IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
+       x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
+    ELSE
+       x = xp
+    END IF
+    IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
+       y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
+    ELSE
+       y = yp
+    END IF
+!
+!  Locate the interval containing x, y
+!
+    CALL locintv(sp%sp1, x, leftx)
+    CALL locintv(sp%sp2, y, lefty)
+!
+!  Compute function/derivatives
+!
+    IF( nlppform ) THEN   ! using PP form
+       ALLOCATE(temp(k2))
+       DO i=1,np
+          IF(PRESENT(ppformz)) THEN
+             CALL ppval(sp%sp1, x(i), ppformz(:,leftx(i)+1,:,lefty(i)+1),&
+                  &  leftx(i), jder(1), temp)
+          ELSE
+             CALL ppval(sp%sp1, x(i), sp%ppformz(:,leftx(i)+1,:,lefty(i)+1),&
+                  &  leftx(i), jder(1), temp)
+          END IF
+          CALL ppval(sp%sp2, y(i), temp, lefty(i), jder(2), fp(i))
+       END DO
+       DEALLOCATE(temp)
+    ELSE                   ! using spline expansion
+       ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
+       fp = (0.0d0,0.0d0)
+       DO i=1,np
+          CALL basfun(x(i), sp%sp1, funx, leftx(i)+1)
+          CALL basfun(y(i), sp%sp2, funy, lefty(i)+1)
+          DO jj=1,k2
+             DO ii=1,k1
+                fp(i) = fp(i) + &
+                   &   funy(jj,jder(2))*sp%bcoefsc(leftx(i)+ii,lefty(i)+jj)* &
+                   &   funx(ii,jder(1))
+             END DO
+          END DO
+       END DO
+       DEALLOCATE(funx, funy)
+    END IF
+  END SUBROUTINE gridval2d_1dz
+!===========================================================================
+  SUBROUTINE gridval2d_2dz(sp, xp, yp, fp, jder, c, ppformz)
+!
+!  Compute values or jder-th dervivative of f(x,y) from ppform 
+!  of spline sp. Recompute the ppform if the optional spline 
+!  coefficients c are given.
+!
+!              F(I,J) = F(X(I), Y(J))
+!
+    TYPE(spline2d), INTENT(inout) :: sp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: xp, yp
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(out) :: fp
+    INTEGER, INTENT(in) :: jder(2)
+    DOUBLE COMPLEX, DIMENSION(:,:), OPTIONAL, INTENT(in) :: c
+    DOUBLE COMPLEX, DIMENSION(:,:,:,:), OPTIONAL :: ppformz
+!
+    INTEGER :: d1, d2, k1, k2, n1, n2
+    DOUBLE COMPLEX, ALLOCATABLE :: work(:,:,:), temp(:)
+    DOUBLE PRECISION, ALLOCATABLE :: funx(:,:), funy(:,:)
+    DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
+    INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
+    INTEGER :: i, j, k, ii, jj
+    LOGICAL :: nlppform
+!
+    d1 = sp%sp1%dim
+    d2 = sp%sp2%dim
+    k1 = sp%sp1%order
+    k2 = sp%sp2%order
+    n1 = sp%sp1%nints
+    n2 = sp%sp2%nints
+    nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
+!
+!   Compute PPFORMM/BCOEFS if spline coefs are passed
+!
+    IF( PRESENT(c) ) THEN
+       IF( nlppform ) THEN
+          ALLOCATE(work(d2,k1,n1))
+          CALL topp1z(sp%sp1, c , work)
+          IF(PRESENT(ppformz)) THEN
+             CALL topp2z(sp%sp2, work, ppformz)
+          ELSE
+             IF( ASSOCIATED(sp%ppformz) ) DEALLOCATE(sp%ppformz)
+             ALLOCATE(sp%ppformz(k1,n1,k2,n2))
+             CALL topp2z(sp%sp2, work, sp%ppformz)
+          END IF
+          DEALLOCATE(work)
+       ELSE
+          IF( ASSOCIATED(sp%bcoefsc) ) DEALLOCATE(sp%bcoefsc)
+          ALLOCATE(sp%bcoefsc(SIZE(c,1),SIZE(c,2)))
+          sp%bcoefsc = c
+       END IF
+    END IF
+!
+!   Applly periodicity if required
+!
+    IF( sp%sp1%period ) THEN   !  ** Applly periodicity **
+       x = sp%sp1%knots(0) + MODULO(xp-sp%sp1%knots(0), sp%sp1%lperiod)
+    ELSE
+       x = xp
+    END IF
+    IF( sp%sp2%period ) THEN   !  ** Applly periodicity **
+       y = sp%sp2%knots(0) + MODULO(yp-sp%sp2%knots(0), sp%sp2%lperiod)
+    ELSE
+       y = yp
+    END IF
+!
+!   Locate interval containing (x,y)
+!
+    CALL locintv(sp%sp1, x, leftx)
+    CALL locintv(sp%sp2, y, lefty)
+!
+!  Compute function/derivatives
+!
+    IF( nlppform ) THEN   ! using PP form
+       ALLOCATE(temp(k2))
+       DO j=1,SIZE(y)
+          DO i=1,SIZE(x)
+             IF(PRESENT(ppformz)) THEN
+                CALL ppval(sp%sp1, x(i), ppformz(:,leftx(i)+1,:,lefty(j)+1),&
+                     &     leftx(i), jder(1), temp)
+             ELSE
+                CALL ppval(sp%sp1, x(i), sp%ppformz(:,leftx(i)+1,:,lefty(j)+1),&
+                     &     leftx(i), jder(1), temp)
+             END IF
+             CALL ppval(sp%sp2, y(j), temp, lefty(j), jder(2), fp(i,j))
+          END DO
+       END DO
+       DEALLOCATE(temp)
+    ELSE                   ! using spline expansion
+       ALLOCATE(funx(1:k1,0:jder(1)), funy(1:k2,0:jder(2)))
+       fp = 0.0d0
+       DO j=1,SIZE(y)
+          CALL basfun(y(j), sp%sp2, funy, lefty(j)+1)
+          DO i=1,SIZE(x)
+             CALL basfun(x(i), sp%sp1, funx, leftx(i)+1)
+             DO jj=1,k2
+                DO ii=1,k1
+                   fp(i,j) = fp(i,j) + sp%bcoefsc(leftx(i)+ii,lefty(j)+jj) * &
+                        &   funx(ii,jder(1))*funy(jj,jder(2))
+                END DO
+             END DO
+          END DO
+       END DO
+       DEALLOCATE(funx, funy)
+    END IF
+  END SUBROUTINE gridval2d_2dz
+!===========================================================================
+  SUBROUTINE calc_fftmass(spl, fftmat)
+!
+!  Compute FT of mass matrix for periodic spline on equidistant mesh
+!
+    TYPE(spline1d)                :: spl
+    DOUBLE PRECISION, INTENT(out) :: fftmat(0:)
+!
+    INTEGER :: dim, nx, nidbas, ngauss
+    DOUBLE PRECISION, ALLOCATABLE :: xgauss(:), wgauss(:)
+    DOUBLE COMPLEX, ALLOCATABLE   :: ft_fun(:,:)
+    INTEGER :: igauss, intv
+!
+    CALL get_dim(spl, dim, nx, nidbas)
+    CALL get_gauss(spl, ngauss)
+    ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+    ALLOCATE(ft_fun(0:nx-1,1))
+!
+!  Integrate on first interval
+    intv = 1
+    CALL get_gauss(spl, ngauss, intv, xgauss, wgauss)
+    fftmat = 0.0d0
+    DO igauss=1,ngauss
+       CALL ft_basfun(xgauss(igauss), spl, ft_fun, intv)
+       fftmat(:) = fftmat(:) + wgauss(igauss)*ft_fun(:,1)*CONJG(ft_fun(:,1))
+    END DO
+!
+    DEALLOCATE(ft_fun)
+    DEALLOCATE(xgauss, wgauss)
+  END SUBROUTINE calc_fftmass
+!===========================================================================
+  SUBROUTINE calc_fftmass_old(spl, fftmat)
+!
+!  Compute FT of mass matrix for periodic spline on equidistant mesh
+!
+    TYPE(spline1d) :: spl
+    DOUBLE PRECISION, INTENT(out) :: fftmat(0:)
+    INTEGER :: dim, nx, nidbas, ngauss
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), xgauss(:), wgauss(:), arow(:)
+    INTEGER :: i, j, k, igauss, intv
+    DOUBLE PRECISION :: pi, arg0, arg
+!
+    CALL get_dim(spl, dim, nx, nidbas)
+    CALL get_gauss(spl, ngauss)
+    ALLOCATE(fun(0:nidbas,1)) ! Spline
+    ALLOCATE(xgauss(ngauss), wgauss(ngauss))
+    ALLOCATE(arow(0:nidbas))
+!
+!  Assemble the first row of the upper mass matrix
+    arow = 0.0d0
+    intv = 1   ! Get splines on Gauss points in first interval
+    CALL get_gauss(spl, ngauss, intv, xgauss, wgauss)
+    DO igauss=1,ngauss
+       CALL basfun(xgauss(igauss), spl, fun, intv)
+       DO i=0,nidbas
+          DO j=0,nidbas-i
+             arow(i)=arow(i)+fun(j,1)*fun(i+j,1)*wgauss(igauss)
+          END DO
+       END DO
+    END DO
+!
+!  Fourier transform
+    pi = 4.0d0*ATAN(1.0d0)
+    arg0 = 2.0d0*pi/REAL(nx,8)
+    DO k=0,nx-1
+       fftmat(k) = arow(0)
+       arg = k*arg0
+       DO i=1,nidbas
+          fftmat(k) = fftmat(k) + 2.0d0*arow(i)*COS(i*arg)
+       END DO
+    END DO
+!
+    DEALLOCATE(arow)
+    DEALLOCATE(fun)
+    DEALLOCATE(xgauss, wgauss)
+  END SUBROUTINE calc_fftmass_old
+!===========================================================================
+  SUBROUTINE CompMassMatrix1(sp1, sp2, a, b, MassMatrix)
+  
+! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over
+! interval [a, b]
+  
+    IMPLICIT NONE
+  
+    TYPE(spline1d), INTENT(IN) :: sp1, sp2
+    DOUBLE PRECISION, INTENT(IN) :: a, b
+    DOUBLE PRECISION, DIMENSION(:, :), POINTER :: MassMatrix
+    
+    INTEGER :: ndim1, n1, nidbas1
+    INTEGER :: ndim2, n2, nidbas2
+    INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg 
+    DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2
+  
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(sp1, ndim1, n1, nidbas1)
+    CALL get_dim(sp2, ndim2, n2, nidbas2)
+    
+!    PRINT "('In CompMassMatrix1')"
+!    PRINT "('sp1: dim, #intervals, degree', I, I, I)", ndim1, n1, nidbas1 
+!    PRINT "('sp2: dim, #intervals, degree', I, I, I)", ndim2, n2, nidbas2 
+    
+    ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives)
+  
+    IF (sp1%period) THEN
+       IF (sp2%period) THEN
+          ALLOCATE(MassMatrix(n1, n2))
+       ELSE
+          ALLOCATE(MassMatrix(n1, ndim2))
+       END IF
+    ELSE
+       IF (sp2%period) THEN
+          ALLOCATE(MassMatrix(ndim1, n2))
+       ELSE
+          ALLOCATE(MassMatrix(ndim1, ndim2))
+       END IF
+    END IF
+  
+!
+!   Gauss quadature
+!
+    ALLOCATE(allknots(0:n1+n2+3))
+    CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint)
+    nint = nint-1
+  
+    ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0)
+    ALLOCATE(xg(ngauss), wg(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    MassMatrix = 0.d0
+    DO int = 1, nint
+       ! Get gauss abscissas and weights for current interval
+       CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss)
+       DO ig = 1, ngauss
+          CALL locintv(sp1, xg(ig), left1)
+          CALL locintv(sp2, xg(ig), left2)
+          CALL basfun(xg(ig), sp1, fun1, left1+1)
+          CALL basfun(xg(ig), sp2, fun2, left2+1)
+          DO k1 = 0, nidbas1
+             IF (sp1%period) THEN
+                i1 = modulo(left1+1 + k1 -1, n1) +1
+             ELSE
+                i1 = left1+1 + k1
+             END IF
+             
+             DO k2 = 0, nidbas2
+                IF (sp2%period) THEN
+                   j2 = modulo(left2+1 + k2 -1, n2) +1
+                ELSE
+                   j2 = left2+1 + k2
+                END IF
+                
+                MassMatrix(i1, j2) = MassMatrix(i1, j2) + wg(ig)*fun1(k1, 1)*fun2(k2, 1)
+             END DO
+          END DO
+       END DO
+    END DO
+  
+!===========================================================================
+!              3.0  Epilogue
+!
+    DEALLOCATE(xg, wg)
+    DEALLOCATE(fun1, fun2)
+    DEALLOCATE(allknots)
+    
+  END SUBROUTINE CompMassMatrix1
+!===========================================================================
+  SUBROUTINE CompMassMatrix_gb(sp1, sp2, a, b, MassMatrix)
+  
+! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over
+! interval [a, b]
+  
+    IMPLICIT NONE
+  
+    TYPE(spline1d), INTENT(IN)   :: sp1, sp2
+    DOUBLE PRECISION, INTENT(IN) :: a, b
+    TYPE(gbmat)                  :: MassMatrix
+    
+    INTEGER :: ndim1, n1, nidbas1
+    INTEGER :: ndim2, n2, nidbas2
+    INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg 
+    DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2
+    DOUBLE PRECISION :: val  
+    !===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(sp1, ndim1, n1, nidbas1)
+    CALL get_dim(sp2, ndim2, n2, nidbas2)
+    ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    ALLOCATE(allknots(0:n1+n2+3))
+    CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint)
+    nint = nint-1
+  
+    ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0)
+    ALLOCATE(xg(ngauss), wg(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    DO int = 1, nint
+       ! Get gauss abscissas and weights for current interval
+       CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss)
+       DO ig = 1, ngauss
+          CALL locintv(sp1, xg(ig), left1)
+          CALL locintv(sp2, xg(ig), left2)
+          CALL basfun(xg(ig), sp1, fun1, left1+1)
+          CALL basfun(xg(ig), sp2, fun2, left2+1)
+          DO k1 = 0, nidbas1
+             IF (sp1%period) THEN
+                i1 = modulo(left1+1 + k1 -1, n1) +1
+             ELSE
+                i1 = left1+1 + k1
+             END IF
+             
+             DO k2 = 0, nidbas2
+                IF (sp2%period) THEN
+                   j2 = modulo(left2+1 + k2 -1, n2) +1
+                ELSE
+                   j2 = left2+1 + k2
+                END IF
+                val = wg(ig)*fun1(k1, 1)*fun2(k2, 1)
+                CALL updtmat(MassMatrix, i1, j2, val)
+             END DO
+          END DO
+       END DO
+    END DO
+  
+!===========================================================================
+!              3.0  Epilogue
+!
+    DEALLOCATE(xg, wg)
+    DEALLOCATE(fun1, fun2)
+    DEALLOCATE(allknots)
+    
+  END SUBROUTINE CompMassMatrix_gb
+!===========================================================================
+  SUBROUTINE CompMassMatrix_zgb(sp1, sp2, a, b, MassMatrix)
+  
+! Compute cross mass matrix MassMatrix between splines sp1 and sp2 over
+! interval [a, b]
+  
+    IMPLICIT NONE
+  
+    TYPE(spline1d), INTENT(IN)   :: sp1, sp2
+    DOUBLE PRECISION, INTENT(IN) :: a, b
+    TYPE(zgbmat)                 :: MassMatrix
+    
+    INTEGER :: ndim1, n1, nidbas1
+    INTEGER :: ndim2, n2, nidbas2
+    INTEGER :: nint, int, ngauss, ig, k1, k2, i1, j2, left1, left2
+    DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: allknots, xg, wg 
+    DOUBLE PRECISION, DIMENSION(:, :), ALLOCATABLE :: fun1, fun2
+    DOUBLE COMPLEX :: val  
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(sp1, ndim1, n1, nidbas1)
+    CALL get_dim(sp2, ndim2, n2, nidbas2)
+    ALLOCATE(fun1(0:nidbas1, 1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2, 1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    ALLOCATE(allknots(0:n1+n2+3))
+    CALL sorted_merge(sp1%knots(0:n1), n1+1, sp2%knots(0:n2), n2+1, a, b, allknots, nint)
+    nint = nint-1
+  
+    ngauss = CEILING(REAL(nidbas1 + nidbas2 + 1, 8)/2.D0)
+    ALLOCATE(xg(ngauss), wg(ngauss))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    DO int = 1, nint
+       ! Get gauss abscissas and weights for current interval
+       CALL gauleg(allknots(int-1), allknots(int), xg, wg, ngauss)
+       DO ig = 1, ngauss
+          CALL locintv(sp1, xg(ig), left1)
+          CALL locintv(sp2, xg(ig), left2)
+          CALL basfun(xg(ig), sp1, fun1, left1+1)
+          CALL basfun(xg(ig), sp2, fun2, left2+1)
+          DO k1 = 0, nidbas1
+             IF (sp1%period) THEN
+                i1 = modulo(left1+1 + k1 -1, n1) +1
+             ELSE
+                i1 = left1+1 + k1
+             END IF
+             
+             DO k2 = 0, nidbas2
+                IF (sp2%period) THEN
+                   j2 = modulo(left2+1 + k2 -1, n2) +1
+                ELSE
+                   j2 = left2+1 + k2
+                END IF
+                val = wg(ig)*fun1(k1, 1)*fun2(k2, 1)
+                CALL updtmat(MassMatrix, i1, j2, val)
+             END DO
+          END DO
+       END DO
+    END DO
+  
+!===========================================================================
+!              3.0  Epilogue
+!
+    DEALLOCATE(xg, wg)
+    DEALLOCATE(fun1, fun2)
+    DEALLOCATE(allknots)
+    
+  END SUBROUTINE CompMassMatrix_zgb
+!===========================================================================  
+  SUBROUTINE sorted_merge(arr1, n1, arr2, n2, a, b, arrm, nm) 
+  
+    IMPLICIT NONE
+  
+! Peforms:
+! 1) Merge of arrays arr1 & arr2 including boundary values a & b
+! 2) Sorts the merged arrays keeping only values in [a, b]
+! 3) Removes duplicates
+  
+    INTEGER, INTENT(IN)  :: n1, n2
+    INTEGER, INTENT(OUT) :: nm
+    
+    DOUBLE PRECISION, INTENT(IN) :: a, b
+    DOUBLE PRECISION, INTENT(IN) :: arr1(n1), arr2(n2)
+    DOUBLE PRECISION, DIMENSION(*), INTENT(OUT) :: arrm
+  
+    INTEGER :: i, j
+  
+! Merge the two arrays including a & b
+    nm = n1 + n2 + 2
+    arrm(1:nm) = (/ a, arr1(1:n1), b, arr2(1:n2) /)
+  
+! Sort
+    CALL sort(arrm, nm)
+  
+! Remove duplicates 
+    j = 1
+    DO i = 2, nm
+       IF(arrm(i) .GT. arrm(j)) THEN
+          j = j + 1
+          arrm(j) = arrm(i)
+       END IF
+    END DO
+    nm = j
+  
+! Remove values outside [a, b]
+    j = 0
+    DO i = 1, nm
+       IF((arrm(i) .GE. a) .AND. (arrm(i) .LE. b)) THEN
+          j = j + 1
+          arrm(j) = arrm(i)
+       END IF
+    END DO
+    nm = j
+  
+  END SUBROUTINE sorted_merge
+!===========================================================================
+  SUBROUTINE sort(arr, n)
+  
+! Sorts array ARR of length N into ascending numerical order by the 
+! Shell-Mezgar algorithm. 
+! See Sec. 8.1 of Numerical Recipes
+  
+    IMPLICIT NONE
+    
+    INTEGER, INTENT(IN) :: n 
+    DOUBLE PRECISION, DIMENSION(n), INTENT(INOUT) :: arr
+   
+    INTEGER :: nsort, is, i, j, l, m
+    DOUBLE PRECISION :: tmp
+    DOUBLE PRECISION, PARAMETER :: tiny = 1D-5
+    
+    nsort = INT(LOG(REAL(n, 8))/LOG(2.D0) + tiny)
+  
+    m = n
+    DO is = 1, nsort
+       m = m/2
+       DO j = 1, n-m
+          i = j
+          DO 
+             l = i+m
+             IF (arr(l) .LT. arr(i)) THEN
+                tmp = arr(i)
+                arr(i) = arr(l)
+                arr(l) = tmp
+                i = i-m
+                IF (i .LT. 1) EXIT
+             ELSE
+                EXIT
+             END IF
+          END DO
+       END DO
+    END DO
+  
+  END SUBROUTINE sort
+!===========================================================================
+  LOGICAL FUNCTION is_equid(x, dev)
+!
+!   Check whether mesh is euidistant or not
+!
+    DOUBLE PRECISION, INTENT(in)            :: x(0:)
+    DOUBLE PRECISION, INTENT(out), OPTIONAL :: dev
+!
+    DOUBLE PRECISION :: dx(SIZE(x)-1), dxmin, dxmax, dxaver, e
+    DOUBLE PRECISION, PARAMETER :: tol=1.d-6
+    INTEGER :: n, i
+    n=SIZE(x)-1
+    dx = (/ (x(i)-x(i-1),i=1,n) /)
+    dxmin = MINVAL(dx)
+    dxmax = MAXVAL(dx)
+    dxaver = (x(n)-x(0))/REAL(n,8)
+    e = (dxmax-dxmin)/dxaver
+!!$    e = (dxmax-dxmin)/(SUM(x)/REAL(n+1))
+    is_equid = e.LT.tol
+    IF(PRESENT(dev)) dev = e
+  END FUNCTION is_equid
+!===========================================================================
+  SUBROUTINE create_fine(cmesh, h, fmap)
+!
+!   Create a fine mesh from a coarse mesh and returns its mapping 
+!
+    DOUBLE PRECISION, INTENT(in)  :: cmesh(0:)
+    DOUBLE PRECISION, INTENT(out) :: h
+    INTEGER, POINTER, INTENT(out) :: fmap(:)
+!
+    DOUBLE PRECISION, ALLOCATABLE :: fmesh(:)
+    DOUBLE PRECISION :: xlen, hmin
+    INTEGER :: n, nfine, i, ic
+!
+    n = SIZE(cmesh)-1
+    xlen = cmesh(n)-cmesh(0)
+!
+!     Minimum interval size
+    hmin = xlen
+    DO i=1,n
+       hmin = MIN(hmin, cmesh(i)-cmesh(i-1))
+    END DO
+!
+!     Create the fine mesh
+    nfine = CEILING(xlen/hmin) 
+    h = xlen / REAL(nfine,8)
+    ALLOCATE(fmap(0:nfine))
+    ALLOCATE(fmesh(0:nfine))
+    fmesh = cmesh(0) + (/ (i*h, i=0,nfine) /)
+    fmesh(nfine) = cmesh(n)
+!
+!    Map fine to coarse mesh
+    ic = 0
+    fmap(0) = ic
+    DO i=1,nfine-1
+       DO
+          IF(fmesh(i).GE.cmesh(ic+1)) THEN 
+             ic = ic+1
+          ELSE
+             EXIT
+          END IF
+       END DO
+       fmap(i) = ic
+    END DO
+    fmap(nfine) = n-1
+!
+    DEALLOCATE(fmesh)
+  END SUBROUTINE create_fine
+!===========================================================================
+  SUBROUTINE getgradr(sp, xp, yp, f00, f10, f01)
+!
+!   Compute the function f00 and its derivatives
+!     f10 = d/dx f
+!     f01 = d/dy f
+!   assuming that its PPFORM/BCOEFSC was already computed!
+!
+    TYPE(spline2d), INTENT(inout)      :: sp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in)     :: xp, yp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: f00, f10, f01
+!
+    INTEGER :: np
+    DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
+    INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
+    INTEGER :: i, ip, ii, jj, nidbas(2)
+    DOUBLE PRECISION :: temp0(SIZE(xp),sp%sp2%order), temp1(SIZE(xp),sp%sp2%order)
+    DOUBLE PRECISION, ALLOCATABLE, SAVE :: funx(:,:), funy(:,:)
+    DOUBLE PRECISION, ALLOCATABLE, SAVE :: ftemp0(:), ftemp1(:)
+    LOGICAL :: nlppform
+!
+!   Apply periodicity if required
+!
+    np = SIZE(xp)
+    nidbas(1) = sp%sp1%order-1
+    nidbas(2) = sp%sp2%order-1
+    nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
+!
+!  Locate the interval containing x, y
+!
+    CALL locintv(sp%sp1, xp, leftx)
+    CALL locintv(sp%sp2, yp, lefty)
+    x(:) = xp(:) - sp%sp1%knots(leftx(:))
+    y(:) = yp(:) - sp%sp2%knots(lefty(:))
+!
+!  Compute function/derivatives
+!
+    IF(nlppform) THEN
+!
+!    Using PPFORM
+!----------
+       DO i=1,np
+          CALL my_ppval1(nidbas(1), x(i), sp%ppform(:,leftx(i)+1,:,lefty(i)+1), &
+               & temp0(i,:), temp1(i,:))
+       END DO
+!
+       CALL my_ppval0(nidbas(2), y, temp0, 0, f00)
+       CALL my_ppval0(nidbas(2), y, temp0, 1, f01)
+       CALL my_ppval0(nidbas(2), y, temp1, 0, f10)
+    ELSE
+!
+!    Using spline expansion with sp%bcoefsc
+!----------
+       IF(.NOT.ALLOCATED(funx)) THEN
+          ALLOCATE(funx(0:nidbas(1),0:1))  ! Spline and its first derivative
+          ALLOCATE(funy(0:nidbas(2),0:1))
+          ALLOCATE(ftemp0(0:nidbas(1)))
+          ALLOCATE(ftemp1(0:nidbas(1)))
+       END IF
+!
+       DO ip=1,np
+          CALL my_splines(nidbas(1), x(ip), sp%sp1%val0(:,:,leftx(ip)+1), funx)
+          CALL my_splines(nidbas(2), y(ip), sp%sp2%val0(:,:,lefty(ip)+1), funy)
+          DO ii=0,nidbas(1)
+             ftemp0(ii) = (0.d0,0.d0) 
+             ftemp1(ii) = (0.d0,0.d0) 
+             DO jj=0,nidbas(2)
+                ftemp0(ii) = ftemp0(ii) + sp%bcoefs(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,0)
+                ftemp1(ii) = ftemp1(ii) + sp%bcoefs(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,1)
+             END DO
+          END DO
+          f00(ip) = SUM(funx(:,0)*ftemp0(:))
+          f01(ip) = SUM(funx(:,0)*ftemp1(:))
+          f10(ip) = SUM(funx(:,1)*ftemp0(:))
+       END DO
+!-----------
+    END IF
+  CONTAINS
+!+++
+    SUBROUTINE my_ppval0(p, x, ppform, jder, f)
+!
+!   Compute function and derivatives from the PP representation
+!   for many points x(:)
+      INTEGER, INTENT(in) :: p
+      DOUBLE PRECISION, INTENT(in) :: x(:)
+      DOUBLE PRECISION, INTENT(in) :: ppform(:,:)
+      INTEGER, INTENT(in) :: jder
+      DOUBLE PRECISION, INTENT(out) :: f(:)
+      DOUBLE PRECISION :: fact
+      INTEGER :: j
+      SELECT CASE (jder)
+      CASE(0)            ! function value
+         SELECT CASE(p)
+         CASE(1)
+            f(:) = ppform(:,1) + x(:)*ppform(:,2)
+         CASE(2)
+            f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3))
+!!$         CASE(3)
+!!$            f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4)))
+         CASE(3:)
+            f(:) = ppform(:,p+1)
+            DO j=p,1,-1
+               f(:) = f(:)*x(:) + ppform(:,j)
+            END DO
+         END SELECT
+      CASE(1)            ! 1st derivative
+         SELECT CASE(p)
+         CASE(1)
+            f(:) = ppform(:,2)
+         CASE(2)
+            f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3)
+!!$         CASE(3)
+!!$            f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4))
+         CASE(3:)
+            f(:) = p*ppform(:,p+1)
+            DO j=p-1,1,-1
+               f(:) = f(:)*x(:) + j*ppform(:,j+1)
+            END DO
+         END SELECT
+      CASE default       ! 2nd and higher derivatives
+         f(:) = ppform(:,p+1)
+         fact = p-jder
+         DO j=p,jder+1,-1
+            f(:) = f(:)/fact*j*x(:) + ppform(:,j)
+            fact = fact-1.0d0
+         END DO
+         DO j=2,jder
+            f(:) = f(:)*j
+         END DO
+      END SELECT
+    END SUBROUTINE my_ppval0
+!+++
+    SUBROUTINE my_ppval1(p, x, ppform, f0, f1)
+!
+!   Compute function and first derivative from the PP representation
+      INTEGER, INTENT(in) :: p
+      DOUBLE PRECISION, INTENT(in) :: x
+      DOUBLE PRECISION, INTENT(in) :: ppform(:,:)
+      DOUBLE PRECISION, INTENT(out) :: f0(:)
+      DOUBLE PRECISION, INTENT(out) :: f1(:)
+      DOUBLE PRECISION :: fact
+      INTEGER :: j
+      SELECT CASE(p)
+      CASE(1)
+         f0(:) = ppform(1,:) + x*ppform(2,:)
+         f1(:) = ppform(2,:)
+      CASE(2)
+         f0(:) = ppform(1,:) + x*(ppform(2,:)+x*ppform(3,:))
+         f1(:) = ppform(2,:) + x*2.d0*ppform(3,:)
+      CASE(3)
+         f0(:) = ppform(1,:) + x*(ppform(2,:)+x*(ppform(3,:)+x*ppform(4,:)))
+         f1(:) = ppform(2,:) + x*(2.d0*ppform(3,:)+x*3.0d0*ppform(4,:))
+      CASE(4:)
+         f0 = ppform(p+1,:)
+         f1 = f0
+         DO j=p,2,-1
+            f0(:) = ppform(j,:) + x*f0(:)
+            f1(:) = f0(:) + x*f1(:)
+         END DO
+         f0(:) = ppform(1,:) + x*f0(:)
+      END SELECT
+    END SUBROUTINE my_ppval1
+!+++
+    SUBROUTINE my_splines(p, x, ppform, f)
+      INTEGER, INTENT(in) :: p
+      DOUBLE PRECISION, INTENT(in) :: x
+      DOUBLE PRECISION, INTENT(in) :: ppform(0:p,0:p)
+      DOUBLE PRECISION, INTENT(out) :: f(0:p,0:1)
+      INTEGER :: i
+      DOUBLE PRECISION :: powerx(0:p)
+      SELECT CASE(p)
+      CASE(1)
+         f(0,0) = ppform(0,0) + x*ppform(1,0)
+         f(0,1) = ppform(1,0)
+         f(1,0) = 1.0-f(0,0)
+         f(1,1) = -f(0,1)
+      CASE(2)
+         f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*ppform(2,0))
+         f(0,1) = ppform(1,0) + 2.d0*x*ppform(2,0)
+         f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*ppform(2,1))
+         f(1,1) = ppform(1,1) + 2.d0*x*ppform(2,1)
+         f(2,0) = 1.0 - f(0,0) - f(1,0)
+         f(2,1) = - f(0,1) - f(1,1)
+      CASE(3)
+         f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*(ppform(2,0)+x*ppform(3,0)))
+         f(0,1) = ppform(1,0) + x*(2.d0*ppform(2,0)+3.d0*x*ppform(3,0))
+         f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*(ppform(2,1)+x*ppform(3,1)))
+         f(1,1) = ppform(1,1) + x*(2.d0*ppform(2,1)+3.d0*x*ppform(3,1))
+         f(2,0) = ppform(0,2) + x*(ppform(1,2)+x*(ppform(2,2)+x*ppform(3,2)))
+         f(2,1) = ppform(1,2) + x*(2.d0*ppform(2,2)+3.d0*x*ppform(3,2))
+         f(3,0) = 1.0 - f(0,0) - f(1,0) - f(2,0)
+         f(3,1) = - f(0,1) - f(1,1) - f(2,1)
+      CASE(4:)
+         powerx(0) = 1.d0
+         DO i=1,p
+            powerx(i) = powerx(i-1)*x
+         END DO
+         DO i=0,p-1
+            f(i,0) = DOT_PRODUCT(ppform(:,i),powerx(:))
+         END DO
+         f(p,0) = 1.d0 - SUM(f(0:p-1,0))
+         f(p,1) = - SUM(f(0:p-1,1))
+      END SELECT
+    END SUBROUTINE my_splines
+!+++
+  END SUBROUTINE getgradr
+!===========================================================================
+  SUBROUTINE getgradz(sp, xp, yp, f00, f10, f01)
+!
+!   Compute the function f00 and its derivatives
+!     f10 = d/dx f
+!     f01 = d/dy f
+!   assuming that its PPFORM/BCOEFSC was already computed!
+!
+    TYPE(spline2d), INTENT(inout)      :: sp
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in)     :: xp, yp
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: f00, f10, f01
+!
+    INTEGER :: np
+    DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
+    INTEGER :: leftx(SIZE(xp)), lefty(SIZE(yp))
+    INTEGER :: i, ip, ii, jj, nidbas(2)
+    DOUBLE COMPLEX :: temp0(SIZE(xp),sp%sp2%order), temp1(SIZE(xp),sp%sp2%order)
+    DOUBLE PRECISION, ALLOCATABLE, SAVE :: funx(:,:), funy(:,:)
+    DOUBLE COMPLEX, ALLOCATABLE, SAVE :: ftemp0(:), ftemp1(:)
+    LOGICAL :: nlppform
+!
+!   Apply periodicity if required
+!
+    np = SIZE(xp)
+    nidbas(1) = sp%sp1%order-1
+    nidbas(2) = sp%sp2%order-1
+    nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
+!
+!  Locate the interval containing x, y
+!
+    CALL locintv(sp%sp1, xp, leftx)
+    CALL locintv(sp%sp2, yp, lefty)
+    x(:) = xp(:) - sp%sp1%knots(leftx(:))
+    y(:) = yp(:) - sp%sp2%knots(lefty(:))
+!
+!  Compute function/derivatives
+!
+    IF(nlppform) THEN
+!
+!    Using PPFORM
+!----------
+       DO i=1,np
+          CALL my_ppval1(nidbas(1), x(i), sp%ppformz(:,leftx(i)+1,:,lefty(i)+1), &
+               & temp0(i,:), temp1(i,:))
+       END DO
+!
+       CALL my_ppval0(nidbas(2), y, temp0, 0, f00)
+       CALL my_ppval0(nidbas(2), y, temp0, 1, f01)
+       CALL my_ppval0(nidbas(2), y, temp1, 0, f10)
+    ELSE
+!
+!    Using spline expansion with sp%bcoefsc
+!----------
+       IF(.NOT.ALLOCATED(funx)) THEN
+          ALLOCATE(funx(0:nidbas(1),0:1))  ! Spline and its first derivative
+          ALLOCATE(funy(0:nidbas(2),0:1))
+          ALLOCATE(ftemp0(0:nidbas(1)))
+          ALLOCATE(ftemp1(0:nidbas(1)))
+       END IF
+!
+       DO ip=1,np
+          CALL my_splines(nidbas(1), x(ip), sp%sp1%val0(:,:,leftx(ip)+1), funx)
+          CALL my_splines(nidbas(2), y(ip), sp%sp2%val0(:,:,lefty(ip)+1), funy)
+          DO ii=0,nidbas(1)
+             ftemp0(ii) = (0.d0,0.d0) 
+             ftemp1(ii) = (0.d0,0.d0) 
+             DO jj=0,nidbas(2)
+                ftemp0(ii) = ftemp0(ii) + sp%bcoefsc(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,0)
+                ftemp1(ii) = ftemp1(ii) + sp%bcoefsc(leftx(ip)+1+ii,lefty(ip)+1+jj)*funy(jj,1)
+             END DO
+          END DO
+          f00(ip) = SUM(funx(:,0)*ftemp0(:))
+          f01(ip) = SUM(funx(:,0)*ftemp1(:))
+          f10(ip) = SUM(funx(:,1)*ftemp0(:))
+       END DO
+!-----------
+    END IF
+  CONTAINS
+!+++
+    SUBROUTINE my_ppval0(p, x, ppform, jder, f)
+!
+!   Compute function and derivatives from the PP representation
+!   for many points x(:)
+      INTEGER, INTENT(in) :: p
+      DOUBLE PRECISION, INTENT(in) :: x(:)
+      DOUBLE COMPLEX, INTENT(in) :: ppform(:,:)
+      INTEGER, INTENT(in) :: jder
+      DOUBLE COMPLEX, INTENT(out) :: f(:)
+      DOUBLE PRECISION :: fact
+      INTEGER :: j
+      SELECT CASE (jder)
+      CASE(0)            ! function value
+         SELECT CASE(p)
+         CASE(1)
+            f(:) = ppform(:,1) + x(:)*ppform(:,2)
+         CASE(2)
+            f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3))
+         CASE(3)
+            f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4)))
+         CASE(4:)
+            DO j=p+1,1,-1
+               f(:) = f(:)*x(:) + ppform(:,j)
+            END DO
+         END SELECT
+      CASE(1)            ! 1st derivative
+         SELECT CASE(p)
+         CASE(1)
+            f(:) = ppform(:,2)
+         CASE(2)
+            f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3)
+         CASE(3)
+            f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4))
+         CASE(4:)
+            DO j=p,1,-1
+               f(:) = f(:)*x(:) + j*ppform(:,j+1)
+            END DO
+         END SELECT
+      CASE default       ! 2nd and higher derivatives
+         f(:) = ppform(:,p+1)
+         fact = p-jder
+         DO j=p,jder+1,-1
+            f(:) = f(:)/fact*j*x(:) + ppform(:,j)
+            fact = fact-1.0d0
+         END DO
+         DO j=2,jder
+            f(:) = f(:)*j
+         END DO
+      END SELECT
+    END SUBROUTINE my_ppval0
+!+++
+    SUBROUTINE my_ppval1(p, x, ppform, f0, f1)
+!
+!   Compute function and first derivative from the PP representation
+      INTEGER, INTENT(in) :: p
+      DOUBLE PRECISION, INTENT(in) :: x
+      DOUBLE COMPLEX, INTENT(in) :: ppform(:,:)
+      DOUBLE COMPLEX, INTENT(out) :: f0(:)
+      DOUBLE COMPLEX, INTENT(out) :: f1(:)
+      DOUBLE PRECISION :: fact
+      INTEGER :: j
+      SELECT CASE(p)
+      CASE(1)
+         f0(:) = ppform(1,:) + x*ppform(2,:)
+         f1(:) = ppform(2,:)
+      CASE(2)
+         f0(:) = ppform(1,:) + x*(ppform(2,:)+x*ppform(3,:))
+         f1(:) = ppform(2,:) + x*2.d0*ppform(3,:)
+      CASE(3)
+         f0(:) = ppform(1,:) + x*(ppform(2,:)+x*(ppform(3,:)+x*ppform(4,:)))
+         f1(:) = ppform(2,:) + x*(2.d0*ppform(3,:)+x*3.0d0*ppform(4,:))
+      CASE(4:)
+         f0 = ppform(p+1,:)
+         f1 = f0
+         DO j=p,2,-1
+            f0(:) = ppform(j,:) + x*f0(:)
+            f1(:) = f0(:) + x*f1(:)
+         END DO
+         f0(:) = ppform(1,:) + x*f0(:)
+      END SELECT
+    END SUBROUTINE my_ppval1
+!+++
+    SUBROUTINE my_splines(p, x, ppform, f)
+      INTEGER, INTENT(in) :: p
+      DOUBLE PRECISION, INTENT(in) :: x
+      DOUBLE PRECISION, INTENT(in) :: ppform(0:p,0:p)
+      DOUBLE PRECISION, INTENT(out) :: f(0:p,0:1)
+      INTEGER :: i
+      DOUBLE PRECISION :: powerx(0:p)
+      SELECT CASE(p)
+      CASE(1)
+         f(0,0) = ppform(0,0) + x*ppform(1,0)
+         f(0,1) = ppform(1,0)
+         f(1,0) = 1.0-f(0,0)
+         f(1,1) = -f(0,1)
+      CASE(2)
+         f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*ppform(2,0))
+         f(0,1) = ppform(1,0) + 2.d0*x*ppform(2,0)
+         f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*ppform(2,1))
+         f(1,1) = ppform(1,1) + 2.d0*x*ppform(2,1)
+         f(2,0) = 1.0 - f(0,0) - f(1,0)
+         f(2,1) = - f(0,1) - f(1,1)
+      CASE(3)
+         f(0,0) = ppform(0,0) + x*(ppform(1,0)+x*(ppform(2,0)+x*ppform(3,0)))
+         f(0,1) = ppform(1,0) + x*(2.d0*ppform(2,0)+3.d0*x*ppform(3,0))
+         f(1,0) = ppform(0,1) + x*(ppform(1,1)+x*(ppform(2,1)+x*ppform(3,1)))
+         f(1,1) = ppform(1,1) + x*(2.d0*ppform(2,1)+3.d0*x*ppform(3,1))
+         f(2,0) = ppform(0,2) + x*(ppform(1,2)+x*(ppform(2,2)+x*ppform(3,2)))
+         f(2,1) = ppform(1,2) + x*(2.d0*ppform(2,2)+3.d0*x*ppform(3,2))
+         f(3,0) = 1.0 - f(0,0) - f(1,0) - f(2,0)
+         f(3,1) = - f(0,1) - f(1,1) - f(2,1)
+      CASE(4:)
+         powerx(0) = 1.d0
+         DO i=1,p
+            powerx(i) = powerx(i-1)*x
+         END DO
+         DO i=0,p-1
+            f(i,0) = DOT_PRODUCT(ppform(:,i),powerx(:))
+         END DO
+         f(p,0) = 1.d0 - SUM(f(0:p-1,0))
+         f(p,1) = - SUM(f(0:p-1,1))
+      END SELECT
+    END SUBROUTINE my_splines
+!+++
+  END SUBROUTINE getgradz
+END MODULE bsplines
diff --git a/src/cds_mod.f90 b/src/cds_mod.f90
new file mode 100644
index 0000000..b0ca9d6
--- /dev/null
+++ b/src/cds_mod.f90
@@ -0,0 +1,626 @@
+!>
+!> @file cds_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE cds
+!
+!    CDSMAT: Implement sparse matrix using Compressed
+!            Diagonal Storage.
+!
+!    T.M. Tran, CRPP-EPFL
+!    November 2010
+!
+  USE mumps_bsplines
+  IMPLICIT NONE
+!
+  TYPE cds_mat  ! Compressed Diagonal Storage
+     INTEGER :: rank
+     INTEGER :: kl, ku, ndiags
+     INTEGER :: nterms, kmat
+     INTEGER :: ny
+     INTEGER, DIMENSION(:), POINTER            :: dists => NULL()
+     DOUBLE PRECISION, DIMENSION(:), POINTER   :: rowv => NULL()
+     DOUBLE PRECISION, DIMENSION(:), POINTER   :: colh => NULL()
+     DOUBLE PRECISION, DIMENSION(:), POINTER   :: bal  => NULL()
+     DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => NULL()
+     TYPE(mumps_mat), ALLOCATABLE :: mumps
+  END TYPE cds_mat
+!
+!--------------------------------------------------------------------------------
+  INTERFACE init
+     MODULE PROCEDURE init_cds_mat
+  END INTERFACE init
+  INTERFACE clear_mat
+     MODULE PROCEDURE clear_cds_mat
+  END INTERFACE clear_mat
+  INTERFACE destroy
+     MODULE PROCEDURE destroy_cds_mat
+  END INTERFACE destroy
+  INTERFACE updtmat
+     MODULE PROCEDURE updt_cds
+  END INTERFACE updtmat
+  INTERFACE getele
+     MODULE PROCEDURE getele_cds
+  END INTERFACE getele
+  INTERFACE putele
+     MODULE PROCEDURE putele_cds
+  END INTERFACE putele
+  INTERFACE getcol
+     MODULE PROCEDURE getcol_cds
+  END INTERFACE getcol
+  INTERFACE getrow
+     MODULE PROCEDURE getrow_cds
+  END INTERFACE getrow
+  INTERFACE putcol
+     MODULE PROCEDURE putcol_cds
+  END INTERFACE putcol
+  INTERFACE putrow
+     MODULE PROCEDURE putrow_cds
+  END INTERFACE putrow
+  INTERFACE getdiag
+     MODULE PROCEDURE getdiag_cds
+  END INTERFACE getdiag
+  INTERFACE vmx
+     MODULE PROCEDURE vmx_cds, vmxn_cds
+  END INTERFACE
+  INTERFACE putmat
+     MODULE PROCEDURE putmat_cds
+  END INTERFACE
+  INTERFACE  getmat
+     MODULE PROCEDURE getmat_cds
+  END INTERFACE
+  INTERFACE flops
+     MODULE PROCEDURE flops_cds
+  END INTERFACE flops
+  INTERFACE matnorm
+     MODULE PROCEDURE matnorm_cds
+  END INTERFACE matnorm
+!
+CONTAINS
+ !===========================================================================
+  SUBROUTINE init_cds_mat(rank, dists, nterms, mat, bw0, kmat)
+!
+!  Initialize a CDS matrix obtained for a 2d FE discretization
+!  using Splines of orders p(1) and p(2).
+!  Number first the 2nd (periodic) dimension.
+!
+    INTEGER, INTENT(in)              :: rank
+    INTEGER, ALLOCATABLE, INTENT(in) :: dists(:)
+    INTEGER, INTENT(in)              :: nterms
+    TYPE(cds_mat)                    :: mat
+    INTEGER, OPTIONAL, INTENT(in)    :: bw0, kmat
+!
+    INTEGER :: kl, ku
+!
+    mat%rank = rank
+    mat%nterms = nterms
+    mat%ny = 0           ! Used for unicity condition in cyl. geometry.
+    IF(PRESENT(kmat)) mat%kmat = kmat
+!
+    kl = -LBOUND(dists,1)
+    ku =  UBOUND(dists,1)
+!
+    mat%kl = kl
+    mat%ku = ku
+    mat%ndiags = ku + kl + 1
+    IF(ASSOCIATED(mat%dists)) DEALLOCATE(mat%dists)
+    ALLOCATE(mat%dists(-kl:ku))
+    mat%dists = dists
+!
+    IF(ASSOCIATED(mat%rowv)) DEALLOCATE(mat%rowv)
+    IF(ASSOCIATED(mat%colh)) DEALLOCATE(mat%colh)
+    IF(PRESENT(bw0)) THEN
+       ALLOCATE(mat%rowv(bw0), mat%colh(bw0))
+       mat%rowv = 0.0d0
+       mat%colh = 0.0d0
+    ELSE
+       ALLOCATE(mat%rowv(0))
+       ALLOCATE(mat%colh(0))
+    END IF
+!
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%val(rank, -kl:ku))
+    mat%val = 0.0d0
+!
+    IF(ASSOCIATED(mat%bal)) DEALLOCATE(mat%bal)
+    ALLOCATE(mat%bal(rank))
+    mat%bal = 0.0d0
+  END SUBROUTINE init_cds_mat
+!===========================================================================
+  SUBROUTINE clear_cds_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(cds_mat) :: mat
+!
+    mat%val = 0.0d0
+  END SUBROUTINE clear_cds_mat
+!===========================================================================
+  SUBROUTINE destroy_cds_mat(mat)
+!
+!   Deallocate pointers in mat
+!
+    TYPE(cds_mat) :: mat
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF( ASSOCIATED(mat%dists)) DEALLOCATE(mat%dists)
+    IF( ASSOCIATED(mat%rowv)) DEALLOCATE(mat%rowv)
+    IF( ASSOCIATED(mat%colh)) DEALLOCATE(mat%colh)
+  END SUBROUTINE destroy_cds_mat
+!===========================================================================
+  SUBROUTINE updt_cds(mat, i, j, val)
+!
+!  Update element Aij into sparse CDS matrix
+!
+    TYPE(cds_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: d, k
+!
+    d = j-i
+    DO k = -mat%kl, mat%ku
+       IF( d .EQ. mat%dists(k) ) THEN
+          mat%val(i,k) =  mat%val(i,k)+val
+          RETURN
+       END IF
+    END DO
+    WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+    WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists
+    STOP '*** Abnormal EXIT in MODULE matrix ***'
+  END SUBROUTINE updt_cds
+!===========================================================================
+  SUBROUTINE getele_cds(mat, i, j, val)
+!
+!  Get element Aij of sparse CDS matrix
+!
+    TYPE(cds_mat), INTENT(in)     :: mat
+    INTEGER, INTENT(in)           :: i, j
+    DOUBLE PRECISION, INTENT(out) :: val
+    INTEGER :: d, k
+!
+    d = j-i
+    DO k = -mat%kl, mat%ku
+       IF( d .EQ. mat%dists(k) ) THEN
+          val = mat%val(i,k)
+          RETURN
+       END IF
+    END DO
+    WRITE(*,'(a,2i6)') 'GETELE: i, j out of range ', i, j
+    WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists
+    STOP '*** Abnormal EXIT in MODULE matrix ***'
+  END SUBROUTINE getele_cds
+!===========================================================================
+  SUBROUTINE putele_cds(mat, i, j, val)
+!
+!  Update element Aij into sparse CDS matrix
+!
+    TYPE(cds_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: d, k
+!
+    d = j-i
+    DO k = -mat%kl, mat%ku
+       IF( d .EQ. mat%dists(k) ) THEN
+          mat%val(i,k) =  val
+          RETURN
+       END IF
+    END DO
+    WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+    WRITE(*,'(a/(10i6))') 'Valid distances', mat%dists
+    STOP '*** Abnormal EXIT in MODULE matrix ***'
+  END SUBROUTINE putele_cds
+!===========================================================================
+  SUBROUTINE getcol_cds(mat, j, arr)
+!
+!   Get a column from matrix
+!
+    TYPE(cds_mat), INTENT(in)     :: mat
+    INTEGER, INTENT(in)           :: j
+    DOUBLE PRECISION, INTENT(out) :: arr(:)
+    INTEGER :: n,i,  k
+!
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'GETCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    arr(1:n) = 0.0d0
+    DO k=-mat%kl, mat%ku
+       i = j-mat%dists(k)
+       IF( i.GE.1 .AND. i.LE.n ) arr(i) = mat%val(i,k)
+    END DO
+  END SUBROUTINE getcol_cds
+!===========================================================================
+  SUBROUTINE getrow_cds(mat, i, arr)
+!
+!   Get a row from matrix
+!
+    TYPE(cds_mat), INTENT(in)      :: mat
+    INTEGER, INTENT(in)           :: i
+    DOUBLE PRECISION, INTENT(out) :: arr(:)
+    INTEGER :: n, j,  k
+!
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'GETROW: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    arr(1:n) = 0.0d0
+    DO k=-mat%kl, mat%ku
+       j = i+mat%dists(k)
+       IF( j.GE.1 .AND. j.LE.n ) arr(j) = mat%val(i,k)
+    END DO
+  END SUBROUTINE getrow_cds
+!===========================================================================
+  SUBROUTINE putcol_cds(mat, j, arr)
+!
+!   Put a column to matrix
+!
+    TYPE(cds_mat)                :: mat
+    INTEGER, INTENT(in)          :: j
+    DOUBLE PRECISION, INTENT(in) :: arr(:)
+    INTEGER :: n,i,  k
+!
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'PUTCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    DO k=-mat%kl, mat%ku
+       i = j-mat%dists(k)
+       IF( i.GE. 1 .AND. i.LE.n ) mat%val(i,k) = arr(i)
+    END DO
+  END SUBROUTINE putcol_cds
+!===========================================================================
+  SUBROUTINE putrow_cds(mat, i, arr)
+!
+!   Put a row from matrix
+!
+    TYPE(cds_mat)                :: mat
+    INTEGER, INTENT(in)          :: i
+    DOUBLE PRECISION, INTENT(in) :: arr(:)
+    INTEGER :: n, j,  k
+!
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'PUTROW: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    DO k=-mat%kl, mat%ku
+       j = i+mat%dists(k)
+       IF( j.GE.1 .AND. j.LE.n )  mat%val(i,k) = arr(j)
+    END DO
+  END SUBROUTINE putrow_cds
+!===========================================================================
+  SUBROUTINE getdiag_cds(mat, d)
+!
+!   Returns diagonal of matrix
+!
+    TYPE(cds_mat)    :: mat
+    DOUBLE PRECISION :: d(:)
+    INTEGER          :: ny
+!
+    d(:) = mat%val(:,0)
+!
+! The extra row and column implied by periodic BC
+!!$    ny = mat%ny
+!!$    IF( ny .NE. 0 ) THEN
+!!$       d(ny) = mat%rowv(ny) + mat%colh(ny)
+!!$    END IF
+!!$    WRITE(*,'(a/(8(1pe12.3)))') 'd', d
+  END SUBROUTINE getdiag_cds
+!===========================================================================
+  FUNCTION vmx_cds(mat, xarr)
+!
+!   Return product mat*x
+!
+    TYPE(cds_mat)                 :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:)
+    DOUBLE PRECISION              :: vmx_cds(SIZE(xarr))
+!
+    DOUBLE PRECISION :: alpha=1.0d0, beta=1.0d0
+    INTEGER :: m, bw0, ny, k, d, i, i1, i2
+!
+    m = mat%rank
+    bw0 = SIZE(mat%rowv)
+    ny = mat%ny
+    vmx_cds = 0.0d0
+!
+    IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
+       vmx_cds(ny:bw0) = mat%colh(ny:bw0)*xarr(ny)
+       vmx_cds(ny) = vmx_cds(ny) + DOT_PRODUCT(mat%rowv(ny:bw0), xarr(ny:bw0))
+    END IF
+!
+#ifdef MKL
+    CALL mkl_ddiamv('n', m, m, alpha, 'g', mat%val, m, mat%dists, &
+          &         mat%ndiags, xarr, beta, vmx_cds)
+#else
+    DO k=-mat%kl,mat%ku
+       d = mat%dists(k)
+       i1 = MAX(1,1-d)
+       i2 = MIN(mat%rank,mat%rank-d)
+       DO i=i1,i2
+          vmx_cds(i) = vmx_cds(i) + mat%val(i,k)*xarr(i+d)
+       END DO
+    END DO
+#endif
+  END FUNCTION vmx_cds
+!===========================================================================
+  FUNCTION vmxn_cds(mat, xarr)
+!
+!   Return product mat*x
+!
+    TYPE(cds_mat)                 :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
+    DOUBLE PRECISION              :: vmxn_cds(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE PRECISION :: alpha=1.0d0, beta=1.0d0
+    INTEGER :: m, nrhs, bw0, ny, k, d, i, j, i1, i2
+!
+    m = mat%rank
+    nrhs = SIZE(xarr,2)
+    bw0 = SIZE(mat%rowv)
+    ny = mat%ny
+    vmxn_cds = 0.0d0
+!
+    IF( ny .NE. 0 ) THEN  ! Contributions from unicity BC
+       DO j=1,nrhs
+          vmxn_cds(ny:bw0,j) =  mat%colh(ny:bw0)*xarr(ny,j)
+          vmxn_cds(ny,j) = vmxn_cds(ny,j) + &
+               &           DOT_PRODUCT(mat%rowv(ny:bw0), xarr(ny:bw0,j))
+       END DO
+    END IF
+!
+#ifdef MKL
+    CALL mkl_ddiamm('n', m, nrhs, m, alpha, 'g', mat%val, m, &
+         &          mat%dists, mat%ndiags, xarr, m, beta, vmxn_cds, m)
+#else
+    DO k=-mat%kl,mat%ku
+       d = mat%dists(k)
+       i1 = MAX(1,1-d)
+       i2 = MIN(mat%rank,mat%rank-d)
+       DO j=1,nrhs
+          DO i=i1,i2
+             vmxn_cds(i,j) = vmxn_cds(i,j) + mat%val(i,k)*xarr(i+d,j)
+          END DO
+       END DO
+    END DO
+#endif
+  END FUNCTION vmxn_cds
+!===========================================================================
+  SUBROUTINE getmat_cds(fid, label, mat)
+!
+!   Read in CDS matrix from hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                     :: fid
+    CHARACTER(len=*), INTENT(in)            :: label
+    TYPE(cds_mat)                           :: mat
+!
+    CALL getatt(fid, label, 'RANK', mat%rank)
+    CALL getatt(fid, label, 'KL', mat%kl)
+    CALL getatt(fid, label, 'KU', mat%ku)
+    CALL getatt(fid, label, 'NDIAGS', mat%ndiags)
+    CALL getatt(fid, label, 'NTERMS', mat%nterms)
+    CALL getatt(fid, label, 'KMAT', mat%kmat)
+    CALL getatt(fid, label, 'NY', mat%ny)
+    IF( ASSOCIATED(mat%dists) ) THEN
+       CALL getarr(fid, TRIM(label)//'/dists', mat%dists)
+    END IF
+    IF(ASSOCIATED(mat%bal)) THEN
+       CALL getarr(fid, TRIM(label)//'/bal', mat%bal)
+    END IF
+    CALL getarr(fid, TRIM(label)//'/vals', mat%val)
+    IF(ASSOCIATED(mat%rowv)) THEN
+       CALL getarr(fid, TRIM(label)//'/rowv', mat%rowv)
+       CALL getarr(fid, TRIM(label)//'/colh', mat%colh)
+    END IF
+  END SUBROUTINE getmat_cds
+!===========================================================================
+  SUBROUTINE putmat_cds(fid, label, mat, str)
+!
+!   Write CDS matrix in hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(cds_mat)                          :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'KL', mat%kl)
+    CALL attach(fid, label, 'KU', mat%ku)
+    CALL attach(fid, label, 'NDIAGS', mat%ndiags)
+    CALL attach(fid, label, 'NTERMS', mat%nterms)
+    CALL attach(fid, label, 'KMAT', mat%kmat)
+    CALL attach(fid, label, 'NY', mat%ny)
+    IF( ASSOCIATED(mat%dists) ) THEN
+       CALL putarr(fid, TRIM(label)//'/dists', mat%dists)
+    END IF
+    IF(ASSOCIATED(mat%bal)) THEN
+       CALL putarr(fid, TRIM(label)//'/bal', mat%bal)
+    END IF
+    CALL putarr(fid, TRIM(label)//'/vals', mat%val)
+    IF(ASSOCIATED(mat%rowv)) THEN
+       CALL putarr(fid, TRIM(label)//'/rowv', mat%rowv)
+       CALL putarr(fid, TRIM(label)//'/colh', mat%colh)
+    END IF
+  END SUBROUTINE putmat_cds
+!===========================================================================
+  FUNCTION flops_cds(mat, xarr, ny)
+!
+!   Return FLOPS in product mat*x
+!
+    TYPE(cds_mat)                 :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:)
+    DOUBLE PRECISION              :: flops_cds
+    INTEGER, OPTIONAL, INTENT(in) :: ny
+!
+    INTEGER :: k, d, i, i1, i2
+!
+    flops_cds = 0.0d0
+    IF( PRESENT(ny) ) THEN  ! Contributions from unicity BC
+       flops_cds = 4.0d0*(SIZE(mat%rowv)-ny+1)
+    END IF
+    DO k=-mat%kl,mat%ku
+       d = mat%dists(k)
+       i1 = MAX(1,1-d)
+       i2 = MIN(mat%rank,mat%rank-d)
+       flops_cds = flops_cds + 2.0d0*(i2-i1+1)
+    END DO
+  END FUNCTION flops_cds
+!===========================================================================
+  SUBROUTINE cds2mumps(mat, mat_mumps)
+!
+!   Fill mumps structure (based on routine to_mumps_mat)
+!
+    INCLUDE 'mpif.h'
+    TYPE(cds_mat)   :: mat
+    TYPE(mumps_mat) :: mat_mumps
+!
+    INTEGER :: i, ii, i1, i2, j, k, rank, d, bw0, s, e
+    INTEGER :: comm, ierr, nnz_loc
+!
+    CALL init(mat%rank, mat%nterms, mat_mumps)
+!
+    comm = mat_mumps%mumps_par%COMM
+    mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix
+!
+!    Compute nnz_loc
+!
+    rank = mat_mumps%rank
+    s = mat_mumps%istart
+    e = mat_mumps%iend
+!
+    nnz_loc=0
+    DO k=-mat%kl,mat%ku
+       d = mat%dists(k)
+       i1 = MAX(s,1-d)
+       i2 = MIN(e,rank-d)
+       nnz_loc = nnz_loc + (i2-i1+1)
+    END DO
+!
+!    Extra col and row from unicity conditions
+!
+    bw0 = SIZE(mat%rowv)
+    IF(bw0.GT.0) THEN
+       IF(mat%ny.GE.s .AND. mat%ny.LE.e) THEN
+          nnz_loc = nnz_loc + bw0-mat%ny ! rowh(ny+1:bw0) 
+       END IF
+       nnz_loc = nnz_loc + (MIN(bw0,e)-MAX(mat%ny,s)) ! colh(ny+1:bw0)
+    END IF
+!
+    mat_mumps%nnz_start = 0
+    CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+    mat_mumps%nnz_start = mat_mumps%nnz_start + 1
+    mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1
+    mat_mumps%nnz_loc = nnz_loc
+    CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+!
+    mat_mumps%mumps_par%N = rank
+    mat_mumps%mumps_par%NZ_loc = nnz_loc
+!
+!     Construct MUMPS (IRN, JCN, A)
+!
+    ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc))
+    ALLOCATE(mat_mumps%mumps_par%JCN_loc(nnz_loc))
+    ALLOCATE(mat_mumps%mumps_par%A_loc(nnz_loc))
+!
+    ii=0
+    DO k=-mat%kl,mat%ku
+       d = mat%dists(k)
+       i1 = MAX(s,1-d)
+       i2 = MIN(e,rank-d)
+       DO i=i1,i2
+          ii = ii+1
+          mat_mumps%mumps_par%IRN_loc(ii) = i
+          mat_mumps%mumps_par%JCN_loc(ii) = i+d
+          mat_mumps%mumps_par%A_loc(ii)   = mat%val(i,k)
+       END DO
+    END DO
+!
+    IF(bw0.GT.0) THEN
+       IF(mat%ny.GE.s .AND. mat%ny.LE.e) THEN
+          DO j=mat%ny+1,bw0    ! rowh(ny+1:bw0)
+             ii = ii+1
+             mat_mumps%mumps_par%IRN_loc(ii) = mat%ny
+             mat_mumps%mumps_par%JCN_loc(ii) = j
+             mat_mumps%mumps_par%A_loc(ii)   = mat%rowv(j)
+          END DO
+       END IF
+       DO i=MAX(mat%ny,s)+1,MIN(bw0,e) ! colh(ny+1:bw0)
+          ii = ii+1
+          mat_mumps%mumps_par%IRN_loc(ii) = i
+          mat_mumps%mumps_par%JCN_loc(ii) = mat%ny
+          mat_mumps%mumps_par%A_loc(ii)   = mat%colh(i)
+       END DO
+    END IF
+!    
+    CALL destroy(mat_mumps%mat)
+    NULLIFY(mat_mumps%mat)
+  END SUBROUTINE cds2mumps
+!===========================================================================
+  DOUBLE PRECISION FUNCTION matnorm_cds(mat, p)
+!
+!   Compute matrix norm
+!
+    TYPE(cds_mat), INTENT(in)              :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: p
+!
+    CHARACTER(len=4) :: norm_type
+    INTEGER          :: i, j, k, d
+    DOUBLE PRECISION :: temp(mat%rank)
+!
+    norm_type = 'fro'
+    IF(PRESENT(p)) norm_type = p
+!
+    SELECT CASE (norm_type)
+    CASE ('inf')
+       DO i=1,mat%rank
+          temp(i) = SUM(ABS(mat%val(i,:)))
+       END DO
+       matnorm_cds = MAXVAL(temp)
+    CASE ('1')
+       temp = 0.0d0
+       DO k=-mat%kl,mat%ku
+          d = mat%dists(k)
+          DO i=MAX(1,1-d),MIN(mat%rank,mat%rank-d)
+             temp(i+d) = temp(i+d) + ABS(mat%val(i,k))
+          END DO
+       END DO
+       matnorm_cds = MAXVAL(temp)
+    CASE('fro')
+       matnorm_cds = SQRT(SUM(mat%val**2))
+    END SELECT
+  END FUNCTION matnorm_cds
+!===========================================================================
+!
+END MODULE cds
diff --git a/src/conmat.f90 b/src/conmat.f90
new file mode 100644
index 0000000..3a489e5
--- /dev/null
+++ b/src/conmat.f90
@@ -0,0 +1,257 @@
+!>
+!> @file conmat.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE conmat_mod
+!
+!    CONMAT: Matrix construction for FE discretization.
+!
+!    T.M. Tran, CRPP-EPFL
+!    November 2011
+!
+  USE bsplines
+  USE matrix
+#ifdef MKL
+  USE pardiso_bsplines
+#endif
+  IMPLICIT NONE
+!
+  INTERFACE conrhs
+     MODULE PROCEDURE conrhs_r, conrhs_z
+  END INTERFACE conrhs
+  INTERFACE conmat
+     MODULE PROCEDURE conmat_1d_gb, conmat_1d_ge, conmat_1d_pb, conmat_1d_periodic, &
+          &           conmat_1d_zgb, conmat_1d_zpb, conmat_1d_zperiodic, &
+          &           conmat_gb, conmat_pb, &
+          &           conmat_zgb, conmat_zpb
+  END INTERFACE conmat
+#ifdef MKL
+  INTERFACE conmat
+     MODULE PROCEDURE conmat_1d_pardiso,  conmat_1d_zpardiso, &
+          &            conmat_pardiso, conmat_zpardiso
+  END INTERFACE conmat
+#endif
+!
+CONTAINS
+!===========================================================================
+  SUBROUTINE conmat_1d_gb(spl, mat, coefeq, maxder)
+!
+!   Construction of FE matrix mat for 1D differential operator
+!   using spline spl
+!
+    TYPE(gbmat)                :: mat
+    TYPE(spline1d), INTENT(in) :: spl
+!
+    INCLUDE 'conmat_1d.tpl'
+  END SUBROUTINE conmat_1d_gb
+!===========================================================================
+  SUBROUTINE conmat_1d_ge(spl, mat, coefeq, maxder)
+!
+!   Construction of FE matrix mat for 1D differential operator
+!   using spline spl
+!
+    TYPE(gemat)                :: mat
+    TYPE(spline1d), INTENT(in) :: spl
+!
+    INCLUDE 'conmat_1d.tpl'
+  END SUBROUTINE conmat_1d_ge
+!===========================================================================
+  SUBROUTINE conmat_1d_pb(spl, mat, coefeq, maxder)
+!
+!   Construction of FE matrix mat for 1D differential operator
+!   using spline spl
+!
+    TYPE(pbmat)                :: mat
+    TYPE(spline1d), INTENT(in) :: spl
+!
+    INCLUDE 'conmat_1d.tpl'
+  END SUBROUTINE conmat_1d_pb
+!===========================================================================
+  SUBROUTINE conmat_1d_periodic(spl, mat, coefeq, maxder)
+!
+!   Construction of FE matrix mat for 1D differential operator
+!   using spline spl
+!
+    TYPE(periodic_mat)         :: mat
+    TYPE(spline1d), INTENT(in) :: spl
+!
+    INCLUDE 'conmat_1d.tpl'
+  END SUBROUTINE conmat_1d_periodic
+!===========================================================================
+  SUBROUTINE conmat_1d_zgb(spl, mat, coefeq, maxder)
+!
+!   Construction of FE matrix mat for 1D differential operator
+!   using spline spl
+!
+    TYPE(zgbmat)               :: mat
+    TYPE(spline1d), INTENT(in) :: spl
+!
+    INCLUDE 'zconmat_1d.tpl'
+  END SUBROUTINE conmat_1d_zgb
+!===========================================================================
+  SUBROUTINE conmat_1d_zpb(spl, mat, coefeq, maxder)
+!
+!   Construction of FE matrix mat for 1D differential operator
+!   using spline spl
+!
+    TYPE(zpbmat)               :: mat
+    TYPE(spline1d), INTENT(in) :: spl
+!
+    INCLUDE 'zconmat_1d.tpl'
+  END SUBROUTINE conmat_1d_zpb
+!===========================================================================
+  SUBROUTINE conmat_1d_zperiodic(spl, mat, coefeq, maxder)
+!
+!   Construction of FE matrix mat for 1D differential operator
+!   using spline spl
+!
+    TYPE(zperiodic_mat)        :: mat
+    TYPE(spline1d), INTENT(in) :: spl
+!
+    INCLUDE 'zconmat_1d.tpl'
+  END SUBROUTINE conmat_1d_zperiodic
+!===========================================================================
+  SUBROUTINE conmat_gb(spl, mat, coefeq, maxder, nat_order)
+!
+!   Construction of FE matrix mat for 2D differential operator
+!   using spline spl
+!
+    TYPE(gbmat)                :: mat
+    TYPE(spline2d), INTENT(in) :: spl
+!
+    INCLUDE 'conmat.tpl'
+  END SUBROUTINE conmat_gb
+!===========================================================================
+  SUBROUTINE conmat_pb(spl, mat, coefeq, maxder, nat_order)
+!
+!   Construction of FE matrix mat for 2D differential operator
+!   using spline spl
+!
+    TYPE(pbmat)                :: mat
+    TYPE(spline2d), INTENT(in) :: spl
+!
+    INCLUDE 'conmat.tpl'
+  END SUBROUTINE conmat_pb
+!===========================================================================
+  SUBROUTINE conmat_zgb(spl, mat, coefeq, maxder, nat_order)
+!
+!   Construction of FE matrix mat for 2D differential operator
+!   using spline spl
+!
+    TYPE(zgbmat)               :: mat
+    TYPE(spline2d), INTENT(in) :: spl
+!
+    INCLUDE 'zconmat.tpl'
+  END SUBROUTINE conmat_zgb
+!===========================================================================
+  SUBROUTINE conmat_zpb(spl, mat, coefeq, maxder, nat_order)
+!
+!   Construction of FE matrix mat for 2D differential operator
+!   using spline spl
+!
+    TYPE(zpbmat)               :: mat
+    TYPE(spline2d), INTENT(in) :: spl
+!
+    INCLUDE 'zconmat.tpl'
+  END SUBROUTINE conmat_zpb
+!===========================================================================
+  SUBROUTINE conrhs_r(spl, farr, frhs)
+!
+!  Projection of RHS on spline basis functions
+!
+    TYPE(spline1d)                :: spl
+    DOUBLE PRECISION, INTENT(out) :: farr(:)
+    INTERFACE
+       DOUBLE PRECISION FUNCTION frhs(x)
+         DOUBLE PRECISION, INTENT(in) :: x
+       END FUNCTION frhs
+    END INTERFACE
+    DOUBLE PRECISION :: contrib
+!
+    INCLUDE 'conrhs.tpl'
+  END SUBROUTINE conrhs_r
+!===========================================================================
+  SUBROUTINE conrhs_z(spl, farr, frhs)
+!
+!  Projection of RHS on spline basis functions
+!
+    TYPE(spline1d)              :: spl
+    DOUBLE COMPLEX, INTENT(out) :: farr(:)
+    INTERFACE
+       DOUBLE COMPLEX FUNCTION frhs(x)
+         DOUBLE PRECISION, INTENT(in) :: x
+       END FUNCTION frhs
+    END INTERFACE
+    DOUBLE COMPLEX :: contrib
+!
+    INCLUDE 'conrhs.tpl'
+  END SUBROUTINE conrhs_z
+!===========================================================================
+#ifdef MKL
+  SUBROUTINE conmat_1d_pardiso(spl, mat, coefeq, maxder)
+!
+!   Construction of FE matrix mat for 1D differential operator
+!   using spline spl
+!
+    TYPE(pardiso_mat)          :: mat
+    TYPE(spline1d), INTENT(in) :: spl
+!
+    INCLUDE 'conmat_1d.tpl'
+  END SUBROUTINE conmat_1d_pardiso
+!===========================================================================
+  SUBROUTINE conmat_1d_zpardiso(spl, mat, coefeq, maxder)
+!
+!   Construction of FE matrix mat for 1D differential operator
+!   using spline spl
+!
+    TYPE(zpardiso_mat)         :: mat
+    TYPE(spline1d), INTENT(in) :: spl
+!
+    INCLUDE 'zconmat_1d.tpl'
+  END SUBROUTINE conmat_1d_zpardiso
+!===========================================================================
+  SUBROUTINE conmat_pardiso(spl, mat, coefeq, maxder, nat_order)
+!
+!   Construction of FE matrix mat for 2D differential operator
+!   using spline spl
+!
+    TYPE(pardiso_mat)          :: mat
+    TYPE(spline2d), INTENT(in) :: spl
+!
+    INCLUDE 'conmat.tpl'
+  END SUBROUTINE conmat_pardiso
+!===========================================================================
+  SUBROUTINE conmat_zpardiso(spl, mat, coefeq, maxder, nat_order)
+!
+!   Construction of FE matrix mat for 2D differential operator
+!   using spline spl
+!
+    TYPE(zpardiso_mat)         :: mat
+    TYPE(spline2d), INTENT(in) :: spl
+!
+    INCLUDE 'zconmat.tpl'
+  END SUBROUTINE conmat_zpardiso
+!===========================================================================
+#endif
+END MODULE conmat_mod
diff --git a/src/conmat.tpl b/src/conmat.tpl
new file mode 100644
index 0000000..09f96c6
--- /dev/null
+++ b/src/conmat.tpl
@@ -0,0 +1,213 @@
+!>
+!> @file conmat.tpl
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!   In this version s[lines are precalculted
+!   (on all n1/n2 intervals
+!
+  INTERFACE
+     SUBROUTINE coefeq(x, y, idt, idw, c)
+       DOUBLE PRECISION, INTENT(in) :: x, y
+       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+       DOUBLE PRECISION, INTENT(out) :: c(:)
+     END SUBROUTINE coefeq
+  END INTERFACE
+  INTEGER, OPTIONAL :: maxder(2)   ! maximum oder of derivatives
+  LOGICAL, OPTIONAL :: nat_order   ! Natural ordering for 2d-1d mapping
+!
+  INTEGER :: n1, nidbas1, ndim1, n1e
+  INTEGER :: n2, nidbas2, ndim2, n2e
+  INTEGER :: ng1, ng2
+  INTEGER :: i1, i2, ig1, ig2
+  INTEGER :: igt1, igt2, igw1, igw2, irow, jcol
+  INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!
+  LOGICAL :: nlper1, nlper2, nlnat
+!
+  INTEGER :: kterms         ! Number of terms in weak form
+  INTEGER :: k, kmaxder, it1, iw1, it2, iw2
+  INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)  ! Terms in weak form
+!
+  DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: mata(:,:,:,:), matc(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: matg(:,:,:), matf(:,:,:), matcg(:,:,:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+  CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+  nlper1 = spl%sp1%period
+  nlper2 = spl%sp2%period
+!
+  n1e = n1+nidbas1  ! Number of elements in 1st coordinate
+  n2e = n2+nidbas2  ! Number of elements in 2nd coordinate
+  iF(nlper2) n2e = n2
+!
+!   Gauss points and weights on all intervals
+!
+  xg1 => spl%sp1%gausx  ! xg1(ng1,n1)
+  wg1 => spl%sp1%gausw  ! wg1(ng1,n1)
+  ng1 = SIZE(xg1,1)
+  xg2 => spl%sp2%gausx
+  wg2 => spl%sp2%gausw
+  ng2 = SIZE(xg2,1)
+!
+!   Splines on all intervals
+!
+  kmaxder = 1
+  IF(PRESENT(maxder)) kmaxder = maxder(1)
+  ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1))
+  ALLOCATE(left1(ng1))
+  DO i1=1,n1
+     left1 = i1
+     CALL basfun(xg1(:,i1), spl%sp1, fun1(:,:,:,i1), left1)
+  END DO
+  DEALLOCATE(left1)
+!
+  kmaxder = 1
+  IF(PRESENT(maxder)) kmaxder = maxder(2)
+  ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2,n2))
+  ALLOCATE(left2(ng2))
+  DO i2=1,n2
+     left2 = i2
+     CALL basfun(xg2(:,i2), spl%sp2, fun2(:,:,:,i2), left2)
+  END DO
+  DEALLOCATE(left2)
+!
+!   Ordering in local to global matrix mapping
+!
+  nlnat = .FALSE.
+  IF(PRESENT(nat_order)) nlnat = nat_order
+!===========================================================================
+!              2.0 Assembly loop
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms,2))
+  ALLOCATE(iderw(kterms,2))
+  ALLOCATE(coefs(kterms,ng1,ng2))
+!
+!   Allocate local matrices
+!
+  ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2))
+  ALLOCATE(matc(ng1,ng2))
+  ALLOCATE(matg(0:nidbas2,0:nidbas2,ng2))
+  ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1))
+  ALLOCATE(matcg(ng1,0:nidbas2,0:nidbas2))
+!
+  DO i1=1,n1
+     DO i2=1,n2
+!
+!   Coefficients of the weak form
+!
+        DO ig1=1,ng1
+           DO ig2=1,ng2
+              CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), &
+                   &      idert, iderw, coefs(:,ig1,ig2))
+           END DO
+        END DO
+!
+!   Compute local matrix: A <- E*(C*D^T) + A
+!
+        mata = 0.0d0
+        DO k=1,kterms
+!
+           matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2)
+!
+           DO it1=0,nidbas1
+              DO iw1=0,nidbas1
+                 DO ig1=1,ng1
+                    matf(it1,iw1,ig1) = wg1(ig1,i1) * &
+                         &  fun1(it1,idert(k,1),ig1,i1) * &
+                         &  fun1(iw1,iderw(k,1),ig1,i1)
+                 END DO
+              END DO
+           END DO
+!
+           DO it2=0,nidbas2
+              DO iw2=0,nidbas2
+                 DO ig2=1,ng2
+                    matg(it2,iw2,ig2) = wg2(ig2,i2) * &
+                         &  fun2(it2,idert(k,2),ig2,i2) * &
+                         &  fun2(iw2,iderw(k,2),ig2,i2)
+                 END DO
+              END DO
+           END DO
+!
+           CALL dgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, 1.0d0, &
+                &      matc, ng1, matg, (nidbas2+1)*(nidbas2+1), 0.0d0, &
+                &      matcg, ng1)
+           CALL dgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), &
+                &      ng1, 1.0d0, matf, (nidbas1+1)*(nidbas1+1), matcg, ng1, 1.0d0, &
+                &      mata, (nidbas1+1)*(nidbas1+1))
+!
+        END DO
+!
+!   Map local matrix A to global matrix
+!
+        DO it1=0,nidbas1
+           igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1
+           DO it2=0,nidbas2
+              igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1
+              irow = glmap(igt1, igt2, n1e, n2e)
+              DO iw1=0,nidbas1
+                 igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1
+                 DO iw2=0,nidbas2
+                    igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1
+                    jcol = glmap(igw1, igw2, n1e, n2e)
+                    CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2))
+                 END DO
+              END DO
+           END DO
+        END DO
+!
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun1)
+  DEALLOCATE(fun2)
+  DEALLOCATE(idert, iderw, coefs)
+  DEALLOCATE(mata)
+  DEALLOCATE(matc)
+  DEALLOCATE(matg)
+  DEALLOCATE(matcg)
+  DEALLOCATE(matf)
+!
+CONTAINS
+  INTEGER FUNCTION glmap(i,j,n1,n2)
+    INTEGER, INTENT(in) :: i,j,n1,n2
+    IF(nlnat) THEN
+       glmap = (j-1)*n1 + i
+    ELSE
+       glmap = (i-1)*n2 + j
+    END IF
+  END FUNCTION glmap
diff --git a/src/conmat2.tpl b/src/conmat2.tpl
new file mode 100644
index 0000000..27f7e4d
--- /dev/null
+++ b/src/conmat2.tpl
@@ -0,0 +1,202 @@
+!>
+!> @file conmat2.tpl
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!   In this version local matrices E and D are precalculted
+!   (on all n1/n2 intervals and nterms weak-form terms
+!
+  INTERFACE
+     SUBROUTINE coefeq(x, y, idt, idw, c)
+       DOUBLE PRECISION, INTENT(in) :: x, y
+       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+       DOUBLE PRECISION, INTENT(out) :: c(:)
+     END SUBROUTINE coefeq
+  END INTERFACE
+  INTEGER, OPTIONAL :: maxder(2)   ! maximum oder of derivatives
+!
+  INTEGER :: n1, nidbas1, ndim1
+  INTEGER :: n2, nidbas2, ndim2
+  INTEGER :: ng1, ng2
+  INTEGER :: i1, i2, ig1, ig2
+  INTEGER :: igt1, igt2, igw1, igw2, irow, jcol
+  INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!
+  LOGICAL :: nlper1, nlper2
+!
+  INTEGER :: kterms         ! Number of terms in weak form
+  INTEGER :: k, kmaxder, it1, iw1, it2, iw2
+  INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)  ! Terms in weak form
+  DOUBLE PRECISION :: dummy(mat%nterms)
+!
+  DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:), fun2(:,:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: mata(:,:,:,:), matc(:,:), matcd(:,:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: matd(:,:,:,:,:), mate(:,:,:,:,:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+  CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+  nlper1 = spl%sp1%period
+  nlper2 = spl%sp2%period
+!
+!   Gauss points and weights on all intervals
+!
+  xg1 => spl%sp1%gausx  ! xg1(ng1,n1)
+  wg1 => spl%sp1%gausw  ! wg1(ng1,n1)
+  ng1 = SIZE(xg1,1)
+  xg2 => spl%sp2%gausx
+  wg2 => spl%sp2%gausw
+  ng2 = SIZE(xg2,1)
+!
+!   Derivative orders in the weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms,2))
+  ALLOCATE(iderw(kterms,2))
+  CALL coefeq(xg1(1,1), xg2(1,1), idert, iderw, dummy)
+!
+!   Precalc matrix E  in dimension 1
+!
+  kmaxder = 1
+  IF(PRESENT(maxder)) kmaxder = maxder(1)
+  ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1))
+  ALLOCATE(left1(ng1))
+  ALLOCATE(mate(0:nidbas1,0:nidbas1,ng1,kterms,n1))
+  DO i1=1,n1
+     left1 = i1
+     CALL basfun(xg1(:,i1), spl%sp1, fun1, left1)
+     DO k=1,kterms
+        DO ig1=1,ng1
+           DO iw1=0,nidbas1
+              DO it1=0,nidbas1
+                 mate(it1,iw1,ig1,k,i1) = wg1(ig1,i1) * &
+                      &  fun1(it1,idert(k,1),ig1) * &
+                      &  fun1(iw1,iderw(k,1),ig1)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+  DEALLOCATE(fun1)
+  DEALLOCATE(left1)
+!
+!   Precalc matrix D  in dimension 2
+!
+  kmaxder = 1
+  IF(PRESENT(maxder)) kmaxder = maxder(2)
+  ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2))
+  ALLOCATE(left2(ng2))
+  ALLOCATE(matd(0:nidbas2,0:nidbas2,ng2,kterms,n2))
+  DO i2=1,n2
+     left2 = i2
+     CALL basfun(xg2(:,i2), spl%sp2, fun2, left2)
+     DO k=1,kterms
+        DO ig2=1,ng2
+           DO iw2=0,nidbas2
+              DO it2=0,nidbas2
+                 matd(it2,iw2,ig2,k,i2) = wg2(ig2,i2) * &
+                      &  fun2(it2,idert(k,2),ig2) * &
+                      &  fun2(iw2,iderw(k,2),ig2)
+              END DO
+           END DO
+        END DO
+     END DO
+  END DO
+  DEALLOCATE(fun2)
+  DEALLOCATE(left2)
+!===========================================================================
+!              2.0 Assembly loop
+!
+!   Physical coefficients in Weak form
+!
+  ALLOCATE(coefs(kterms,ng1,ng2))
+  ALLOCATE(matc(ng1,ng2))
+!
+!   Allocate local matrix A
+!
+  ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2))
+  ALLOCATE(matcd(ng1,0:nidbas2,0:nidbas2))
+!
+  DO i1=1,n1
+     DO i2=1,n2
+!
+!   Coefficients of the weak form
+!
+        DO ig1=1,ng1
+           DO ig2=1,ng2
+              CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), &
+                   &      idert, iderw, coefs(:,ig1,ig2))
+           END DO
+        END DO
+!
+!   Compute local matrix: A <- E*(C*D^T) + A
+!
+        mata = 0.0d0
+        DO k=1,kterms
+!
+           matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2)
+!
+           CALL dgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, 1.0d0, &
+                &      matc, ng1, matd(0,0,1,k,i2), (nidbas2+1)*(nidbas2+1), 0.0d0, &
+                &      matcd, ng1)
+           CALL dgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), &
+                &      ng1, 1.0d0, mate(0,0,1,k,i1), (nidbas1+1)*(nidbas1+1), matcd, ng1, 1.0d0, &
+                &      mata, (nidbas1+1)*(nidbas1+1))
+!
+        END DO
+!
+!   Map local matrix A to global matrix
+!
+        DO it1=0,nidbas1
+           igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1
+           DO it2=0,nidbas2
+              igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1
+              irow = igt2 + (igt1-1)*n2
+              DO iw1=0,nidbas1
+                 igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1
+                 DO iw2=0,nidbas2
+                    igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1
+                    jcol = igw2 + (igw1-1)*n2
+                    CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2))
+                 END DO
+              END DO
+           END DO
+        END DO
+!
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(idert, iderw, coefs)
+  DEALLOCATE(mata)
+  DEALLOCATE(matc)
+  DEALLOCATE(matd)
+  DEALLOCATE(matcd)
+  DEALLOCATE(mate)
diff --git a/src/conmat_1d.tpl b/src/conmat_1d.tpl
new file mode 100644
index 0000000..ce06d1d
--- /dev/null
+++ b/src/conmat_1d.tpl
@@ -0,0 +1,156 @@
+!>
+!> @file conmat_1d.tpl
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!   In this version s[lines are precalculted
+!   (on all n1/n2 intervals
+!
+  INTERFACE
+     SUBROUTINE coefeq(x, idt, idw, c)
+       DOUBLE PRECISION, INTENT(in) :: x
+       INTEGER, INTENT(out) :: idt(:), idw(:)
+       DOUBLE PRECISION, INTENT(out) :: c(:)
+     END SUBROUTINE coefeq
+  END INTERFACE
+  INTEGER, OPTIONAL :: maxder   ! maximum oder of derivatives
+!
+  INTEGER :: n1, nidbas1, ndim1, ng1
+  INTEGER :: i1, ig1
+  INTEGER :: irow, jcol
+  INTEGER, ALLOCATABLE :: left1(:)
+!
+  LOGICAL :: nlper1
+!
+  INTEGER :: kterms         ! Number of terms in weak form
+  INTEGER :: k, kmaxder, it1, iw1
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:)  ! Terms in weak form
+!
+  DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: mata(:,:), matc(:)
+  DOUBLE PRECISION, ALLOCATABLE :: matf(:,:,:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, ndim1, n1, nidbas1)
+  nlper1 = spl%period
+!
+!   Gauss points and weights on all intervals
+!
+  xg1 => spl%gausx  ! xg1(ng1,n1)
+  wg1 => spl%gausw  ! wg1(ng1,n1)
+  ng1 = SIZE(xg1,1)
+!
+!   Splines on all intervals
+!
+  kmaxder = 1
+  IF(PRESENT(maxder)) kmaxder = maxder
+  ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1))
+  ALLOCATE(left1(ng1))
+  DO i1=1,n1
+     left1 = i1
+!!$     DO ig1=1,ng1
+!!$        CALL basfun(xg1(ig1,i1), spl, fun1(:,:,ig1,i1), left1(ig1))
+!!$     END DO
+     CALL basfun(xg1(:,i1), spl, fun1(:,:,:,i1), left1)
+  END DO
+  DEALLOCATE(left1)
+!===========================================================================
+!              2.0 Assembly loop
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms))
+  ALLOCATE(iderw(kterms))
+  ALLOCATE(coefs(kterms,ng1))
+!
+!   Allocate local matrices
+!
+  ALLOCATE(mata(0:nidbas1,0:nidbas1))
+  ALLOCATE(matc(ng1))
+  ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1))
+!
+  DO i1=1,n1
+!
+!   Coefficients of the weak form
+!
+     DO ig1=1,ng1
+        CALL coefeq(xg1(ig1,i1), idert, iderw, coefs(:,ig1))
+     END DO
+!
+!   Compute local matrix: A <- F*c + A
+!
+     mata = 0.0d0
+     DO k=1,kterms
+!
+        matc(1:ng1) = coefs(k,1:ng1)
+!
+        DO it1=0,nidbas1
+           DO iw1=0,nidbas1
+              DO ig1=1,ng1
+                 matf(it1,iw1,ig1) = wg1(ig1,i1) * &
+                      &  fun1(it1,idert(k),ig1,i1) * &
+                      &  fun1(iw1,iderw(k),ig1,i1)
+              END DO
+           END DO
+        END DO
+!
+        CALL dgemv('N', (nidbas1+1)*(nidbas1+1), ng1, 1.0d0, matf, &
+             &          (nidbas1+1)*(nidbas1+1), matc, 1, 1.0d0, mata, 1)
+     END DO
+!
+!   Map local matrix A to global matrix
+!
+!!$     WRITE(*,'(/a,i3)') "Lambda, i =", i1
+!!$     DO ig1=1,ng1
+!!$        WRITE(*,'(10(1pe12.3))') fun1(:,0,ig1,i1)
+!!$     END DO
+!!$     WRITE(*,'(a,i3)') "Lambda', i =", i1
+!!$     DO ig1=1,ng1
+!!$        WRITE(*,'(10(1pe12.3))') fun1(:,1,ig1,i1)
+!!$     END DO
+!!$     WRITE(*,'(/a)') 'local matrix'
+     DO it1=0,nidbas1
+        irow = i1+it1; IF(nlper1) irow = MODULO(irow-1,n1) + 1
+        DO iw1=0,nidbas1
+           jcol = i1+iw1; IF(nlper1) jcol = MODULO(jcol-1,n1) + 1
+           CALL updtmat(mat, irow, jcol, mata(it1,iw1))
+        END DO
+!!$        WRITE(*,'(10(1pe12.3))') mata(it1,:)
+     END DO
+!
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun1)
+  DEALLOCATE(idert, iderw, coefs)
+  DEALLOCATE(mata)
+  DEALLOCATE(matc)
+  DEALLOCATE(matf)
diff --git a/src/conrhs.tpl b/src/conrhs.tpl
new file mode 100644
index 0000000..5278381
--- /dev/null
+++ b/src/conrhs.tpl
@@ -0,0 +1,52 @@
+!>
+!> @file conrhs.tpl
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+    DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+    INTEGER :: ndim, n, nidbas, ng
+    INTEGER :: i, ig, it, irow
+    LOGICAL :: nlper
+!
+    CALL get_dim(spl, ndim, n, nidbas)
+    nlper = spl%period
+    xg => spl%gausx  ! xg(ng,n)
+    wg => spl%gausw  ! wg(ng,n)
+    ng = SIZE(xg,1)
+    ALLOCATE(fun(0:nidbas,1))
+!
+    farr = 0.0d0
+    DO i=1,n
+       DO ig=1,ng
+          CALL basfun(xg(ig,i), spl, fun, i)
+          contrib = wg(ig,i)*frhs(xg(ig,i))
+          DO it=0,nidbas
+             irow = i+it
+             IF(nlper) irow = MODULO(irow-1,n) +1
+             farr(irow) = farr(irow)+contrib*fun(it,1)
+          END DO
+       END DO
+    END DO
+!
+    DEALLOCATE(fun)
diff --git a/src/csr_mod.f90 b/src/csr_mod.f90
new file mode 100644
index 0000000..ca67f83
--- /dev/null
+++ b/src/csr_mod.f90
@@ -0,0 +1,1255 @@
+!>
+!> @file csr_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE csr
+!
+!   CSR: Implement CSR (Compressed Sparse Row) matrice
+!
+!    T.M. Tran, CRPP-EPFL
+!    October 2012
+!
+  USE sparse
+  USE mumps_bsplines
+  IMPLICIT NONE
+!
+  TYPE, EXTENDS(spmat) :: csr_mat
+     INTEGER                   :: mrows, ncols
+     INTEGER                   :: nnz = 0            ! Number of non-zeros
+     INTEGER                   :: nterms             ! Number of terms in weak form
+     LOGICAL                   :: nlforce_zero       ! Keep exixting nodes with zero value if .true.
+     INTEGER, POINTER          :: irow(:) => NULL()  ! points to start of row
+     INTEGER, POINTER          :: idiag(:) => NULL() ! points to diagonal element
+     INTEGER, POINTER          :: cols(:) => NULL()  ! Column indices
+     DOUBLE PRECISION, POINTER :: val(:) => NULL()   ! Elelement values
+     TYPE(mumps_mat), ALLOCATABLE :: mumps
+  END TYPE csr_mat
+!
+  TYPE, EXTENDS(zspmat) :: zcsr_mat
+     INTEGER                   :: mrows, ncols
+     INTEGER                   :: nnz = 0            ! Number of non-zeros
+     INTEGER                   :: nterms             ! Number of terms in weak form
+     LOGICAL                   :: nlforce_zero       ! Keep exixting nodes with zero value if .true.
+     INTEGER, POINTER          :: irow(:) => NULL()  ! points to start of row
+     INTEGER, POINTER          :: idiag(:) => NULL() ! points to diagonal element
+     INTEGER, POINTER          :: cols(:) => NULL()  ! Column indices
+     DOUBLE COMPLEX, POINTER :: val(:) => NULL()   ! Elelement values
+!
+     TYPE(zmumps_mat), ALLOCATABLE :: mumps
+  END TYPE zcsr_mat
+!
+  INTERFACE init
+     MODULE PROCEDURE init_csr_mat, init_zcsr_mat
+  END INTERFACE init
+  INTERFACE clear_mat
+     MODULE PROCEDURE clear_csr_mat, clear_zcsr_mat
+  END INTERFACE clear_mat
+  INTERFACE updtmat
+     MODULE PROCEDURE updt_csr_mat, updt_zcsr_mat
+  END INTERFACE updtmat
+  INTERFACE putele
+     MODULE PROCEDURE putele_csr_mat, putele_zcsr_mat
+  END INTERFACE putele
+  INTERFACE getele
+     MODULE PROCEDURE getele_csr_mat, getele_zcsr_mat
+  END INTERFACE getele
+  INTERFACE putrow
+     MODULE PROCEDURE putrow_csr_mat, putrow_zcsr_mat
+  END INTERFACE putrow
+  INTERFACE getrow
+     MODULE PROCEDURE getrow_csr_mat, getrow_zcsr_mat
+  END INTERFACE getrow
+  INTERFACE getdiag
+     MODULE PROCEDURE getdiag_csr_mat, getdiag_zcsr_mat
+  END INTERFACE getdiag
+  INTERFACE putcol
+     MODULE PROCEDURE putcol_csr_mat, putcol_zcsr_mat
+  END INTERFACE putcol
+  INTERFACE getcol
+     MODULE PROCEDURE getcol_csr_mat, getcol_zcsr_mat
+  END INTERFACE getcol
+  INTERFACE to_mat
+     MODULE PROCEDURE to_csr_mat, to_zcsr_mat
+  END INTERFACE to_mat
+  INTERFACE vmx
+     MODULE PROCEDURE vmx_csr_mat,  vmx_csr_matn, vmx_zcsr_mat,  vmx_zcsr_matn
+  END INTERFACE vmx
+  INTERFACE destroy
+     MODULE PROCEDURE destroy_csr_mat, destroy_zcsr_mat
+  END INTERFACE destroy
+  INTERFACE putmat
+     MODULE PROCEDURE put_csr_mat, put_zcsr_mat
+  END INTERFACE putmat
+!>>>>>
+!>>>>> CONMAT
+!>>>>
+  INTERFACE conmat
+     MODULE PROCEDURE conmat_1d_csr, conmat_2d_csr, conmat_1d_zcsr, conmat_2d_zcsr
+  END INTERFACE conmat
+!>>>>
+!>>>> MULTIGRID_MOD
+!>>>>
+  INTERFACE femat
+     MODULE PROCEDURE femat_csr
+  END INTERFACE femat
+  INTERFACE matnorm
+     MODULE PROCEDURE matnorm_csr
+  END INTERFACE matnorm
+  INTERFACE kron
+     MODULE PROCEDURE kron_csr
+  END INTERFACE kron
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_csr_mat(n, nterms, mat, nlforce_zero, ncols)
+!
+!   Initialize an empty CSR matrix
+!
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE(csr_mat)                 :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+    INTEGER, OPTIONAL, intent(in) :: ncols
+!
+    CALL init(n, mat%spmat)
+    mat%mrows = n
+    mat%ncols = n
+    IF(PRESENT(ncols)) mat%ncols = ncols
+    mat%nterms = nterms
+    mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
+    IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
+!
+  END SUBROUTINE init_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_zcsr_mat(n, nterms, mat, nlforce_zero, ncols)
+!
+!   Initialize an empty CSR matrix
+!
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE(zcsr_mat)                :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+    INTEGER, OPTIONAL, intent(in) :: ncols
+!
+    CALL init(n, mat%zspmat)
+    mat%mrows = n
+    mat%ncols = n
+    IF(PRESENT(ncols)) mat%ncols = ncols
+    mat%nterms = nterms
+    mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
+    IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
+!
+  END SUBROUTINE init_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE clear_zcsr_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(zcsr_mat) :: mat
+!
+    mat%val = (0.0d0,0.0d0)
+  END SUBROUTINE clear_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE clear_csr_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(csr_mat) :: mat
+!
+    mat%val = 0.0d0
+  END SUBROUTINE clear_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_csr_mat(mat, i, j, val)
+!
+!   Update element Aij of csr  matrix
+!
+    TYPE(csr_mat)                :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: k, s, e
+!
+    IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
+       CALL updtmat(mat%spmat, i, j, val)
+    ELSE
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       k = isearch(mat%cols(s:e), j)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  mat%val(s+k)+val
+       ELSE
+          WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE csr_mod ***'
+       END IF
+    END IF
+  END SUBROUTINE updt_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_zcsr_mat(mat, i, j, val)
+!
+!   Update element Aij of csr  matrix
+!
+    TYPE(zcsr_mat)               :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE COMPLEX, INTENT(in)   :: val
+    INTEGER :: k, s, e
+!
+    IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
+       CALL updtmat(mat%zspmat, i, j, val)
+    ELSE
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       k = isearch(mat%cols(s:e), j)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  mat%val(s+k)+val
+       ELSE
+          WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE csr_mod ***'
+       END IF
+    END IF
+  END SUBROUTINE updt_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_csr_mat(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(csr_mat)                 :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: iput, jput
+    INTEGER :: k, s, e
+!
+    iput = i
+    jput = j
+!
+    IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
+       CALL putele(mat%spmat, iput, jput, val, &
+            &      nlforce_zero=mat%nlforce_zero)
+    ELSE
+       s = mat%irow(iput)
+       e = mat%irow(iput+1)-1
+       k = isearch(mat%cols(s:e), jput)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  val
+       ELSE
+          IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
+             WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
+             PRINT*, 'val', val
+             PRINT*, 'matrix m, n', mat%mrows, mat%ncols
+             STOP '*** Abnormal EXIT in MODULE csr_mod ***'
+          END IF
+       END IF
+   END IF
+  END SUBROUTINE putele_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_zcsr_mat(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(zcsr_mat)                :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE COMPLEX, INTENT(in)  :: val
+    INTEGER :: iput, jput
+    INTEGER :: k, s, e
+!
+    iput = i
+    jput = j
+!
+    IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
+       CALL putele(mat%zspmat, iput, jput, val, &
+            &      nlforce_zero=mat%nlforce_zero)
+    ELSE
+       s = mat%irow(iput)
+       e = mat%irow(iput+1)-1
+       k = isearch(mat%cols(s:e), jput)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  val
+       ELSE
+          IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
+             WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
+             PRINT*, 'val', val
+             PRINT*, 'matrix m, n', mat%mrows, mat%ncols
+             STOP '*** Abnormal EXIT in MODULE csr_mod ***'
+          END IF
+       END IF
+   END IF
+ END SUBROUTINE putele_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_csr_mat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(csr_mat)                 :: mat
+    INTEGER, INTENT(in)           :: i, j
+    DOUBLE PRECISION, INTENT(out) :: val
+    INTEGER :: iget, jget
+    INTEGER :: k, s, e
+!
+    iget = i
+    jget = j
+    IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
+       CALL getele(mat%spmat, iget, jget, val)
+    ELSE
+       s = mat%irow(iget)
+       e = mat%irow(iget+1)-1
+       k = isearch(mat%cols(s:e), jget)
+       IF( k.GE.0 ) THEN
+          val =mat%val(s+k) 
+       ELSE
+          val = 0.0d0   ! Assume zero val if not found
+       END IF
+    END IF
+  END SUBROUTINE getele_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_zcsr_mat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(zcsr_mat)                :: mat
+    INTEGER, INTENT(in)           :: i, j
+    DOUBLE COMPLEX, INTENT(out)   :: val
+    INTEGER :: iget, jget
+    INTEGER :: k, s, e
+!
+    iget = i
+    jget = j
+    IF(mat%nnz.EQ.0) THEN   ! Still using linked lists
+       CALL getele(mat%zspmat, iget, jget, val)
+    ELSE
+       s = mat%irow(iget)
+       e = mat%irow(iget+1)-1
+       k = isearch(mat%cols(s:e), jget)
+       IF( k.GE.0 ) THEN
+          val =mat%val(s+k) 
+       ELSE
+          val = 0.0d0   ! Assume zero val if not found
+       END IF
+    END IF
+  END SUBROUTINE getele_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_csr_mat(amat, i, arr)
+!
+! Put a row into sparse matrix
+!
+    TYPE(csr_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)          :: i
+    DOUBLE PRECISION, INTENT(in) :: arr(:)
+    INTEGER :: s, e, j
+!
+    IF(amat%nnz.EQ.0) THEN   ! Still using linked lists
+       DO j=1,amat%ncols
+          CALL putele(amat, i, j, arr(j))
+       END DO
+    ELSE
+       s = amat%irow(i)
+       e = amat%irow(i+1)-1
+       DO j=s,e
+          amat%val(j) = arr(amat%cols(j))
+       END DO
+    END IF
+  END SUBROUTINE putrow_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_zcsr_mat(amat, i, arr)
+!
+! Put a row into sparse matrix
+!
+    TYPE(zcsr_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)           :: i
+    DOUBLE COMPLEX, INTENT(in)    :: arr(:)
+    INTEGER :: s, e, j
+!
+    IF(amat%nnz.EQ.0) THEN   ! Still using linked lists
+       DO j=1,amat%ncols
+          CALL putele(amat, i, j, arr(j))
+       END DO
+    ELSE
+       s = amat%irow(i)
+       e = amat%irow(i+1)-1
+       DO j=s,e
+          amat%val(j) = arr(amat%cols(j))
+       END DO
+    END IF
+  END SUBROUTINE putrow_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_csr_mat(amat, i, arr)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(csr_mat), INTENT(in)     :: amat
+    INTEGER, INTENT(in)           :: i
+    DOUBLE PRECISION, INTENT(out) :: arr(:)
+    INTEGER :: s, e, j
+!
+    arr = 0.0d0
+    IF(amat%nnz.EQ.0) THEN   ! Still using linked lists
+       DO j=1,amat%ncols
+          CALL getele(amat, i, j, arr(j))
+       END DO
+    ELSE
+       s = amat%irow(i)
+       e = amat%irow(i+1)-1
+       DO j=s,e
+          arr(amat%cols(j)) = amat%val(j)
+       END DO
+    END IF
+  END SUBROUTINE getrow_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_zcsr_mat(amat, i, arr)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(zcsr_mat), INTENT(in)    :: amat
+    INTEGER, INTENT(in)           :: i
+    DOUBLE COMPLEX, INTENT(out)   :: arr(:)
+    INTEGER :: s, e, j
+!
+    arr = 0.0d0
+    IF(amat%nnz.EQ.0) THEN   ! Still using linked lists
+       DO j=1,amat%ncols
+          CALL getele(amat, i, j, arr(j))
+       END DO
+    ELSE
+       s = amat%irow(i)
+       e = amat%irow(i+1)-1
+       DO j=s,e
+          arr(amat%cols(j)) = amat%val(j)
+       END DO
+    END IF
+  END SUBROUTINE getrow_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getdiag_csr_mat(amat, arr)
+!
+!   Get the diagonal from matrix
+!
+    TYPE(csr_mat), INTENT(in)     :: amat
+    DOUBLE PRECISION, INTENT(out) :: arr(:)
+!
+!  WARNING: assume that CSR matrix has been converted from linked lists
+!
+    arr(:) = amat%val(amat%idiag(:))
+  END SUBROUTINE getdiag_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getdiag_zcsr_mat(amat, arr)
+!
+!   Get the diagonal from matrix
+!
+    TYPE(zcsr_mat), INTENT(in)    :: amat
+    DOUBLE COMPLEX, INTENT(out)   :: arr(:)
+!
+!  WARNING: assume that CSR matrix has been converted from linked lists
+!
+    arr(:) = amat%val(amat%idiag(:))
+  END SUBROUTINE getdiag_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_csr_mat(amat, j, arr)
+!
+! Put a column into sparse matrix
+!
+    TYPE(csr_mat), INTENT(inout)     :: amat
+    INTEGER, INTENT(in)              :: j
+    DOUBLE PRECISION, INTENT(in)     :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%mrows
+       CALL putele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE putcol_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_zcsr_mat(amat, j, arr)
+!
+! Put a column into sparse matrix
+!
+    TYPE(zcsr_mat), INTENT(inout)    :: amat
+    INTEGER, INTENT(in)              :: j
+    DOUBLE COMPLEX, INTENT(in)       :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%mrows
+       CALL putele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE putcol_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_csr_mat(amat, j, arr)
+!
+!   Get a column from sparse matrix
+!
+    TYPE(csr_mat), INTENT(in)     :: amat
+    INTEGER, INTENT(in)           :: j
+    DOUBLE PRECISION, INTENT(out) :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%mrows
+       CALL getele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_zcsr_mat(amat, j, arr)
+!
+!   Get a column from sparse matrix
+!
+    TYPE(zcsr_mat), INTENT(in)    :: amat
+    INTEGER, INTENT(in)           :: j
+    DOUBLE COMPLEX, INTENT(out)   :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%mrows
+       CALL getele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE to_csr_mat(mat, nlkeep)
+!
+!   Convert linked list spmat to csr matrice structure
+!
+    TYPE(csr_mat)             :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nlkeep
+    INTEGER :: nnz_arr(mat%rank)
+    INTEGER :: i, nnz, rank, s, e
+    LOGICAL :: nlclean
+!
+    nlclean = .TRUE.
+    IF(PRESENT(nlkeep)) THEN
+       nlclean = .NOT. nlkeep
+    END IF
+!
+!    Allocate the csr matrix structure
+!
+    nnz = get_count(mat%spmat, nnz_arr)
+    rank = mat%rank
+    mat%nnz = nnz
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag)
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%irow(rank+1))
+    ALLOCATE(mat%idiag(rank))
+    ALLOCATE(mat%cols(nnz))
+    ALLOCATE(mat%val(nnz))
+!
+!    Fill csr structure and optionally deallocate the sparse rows
+!
+    mat%irow = 1
+    DO i=1,rank
+       mat%irow(i+1) = mat%irow(i) + nnz_arr(i)
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       CALL getrow(mat%spmat%row(i), mat%val(s:e), mat%cols(s:e))
+       mat%idiag(i) = isearch(mat%cols(s:e), i) + s
+       IF(nlclean) CALL destroy(mat%spmat%row(i))
+    END DO
+!!$!
+!!$!    MUMPS mat for direct solver
+!!$!
+!!$    ALLOCATE(mat%mumps)
+!!$    CALL csr2mumps(mat, mat%mumps)
+  END SUBROUTINE to_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE to_zcsr_mat(mat, nlkeep)
+!
+!   Convert linked list spmat to csr matrice structure
+!
+    TYPE(zcsr_mat)                :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nlkeep
+    INTEGER :: nnz_arr(mat%rank)
+    INTEGER :: i, nnz, rank, s, e
+    LOGICAL :: nlclean
+!
+    nlclean = .TRUE.
+    IF(PRESENT(nlkeep)) THEN
+       nlclean = .NOT. nlkeep
+    END IF
+!
+!    Allocate the csr matrix structure
+!
+    nnz = get_count(mat%zspmat, nnz_arr)
+    rank = mat%rank
+    mat%nnz = nnz
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag)
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%irow(rank+1))
+    ALLOCATE(mat%idiag(rank))
+    ALLOCATE(mat%cols(nnz))
+    ALLOCATE(mat%val(nnz))
+!
+!    Fill csr structure and optionally deallocate the sparse rows
+!
+    mat%irow = 1
+    DO i=1,rank
+       mat%irow(i+1) = mat%irow(i) + nnz_arr(i)
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       CALL getrow(mat%zspmat%row(i), mat%val(s:e), mat%cols(s:e))
+       mat%idiag(i) = isearch(mat%cols(s:e), i) + s
+       IF(nlclean) CALL destroy(mat%zspmat%row(i))
+    END DO
+!!$!
+!!$!    MUMPS mat for direct solver
+!!$!
+!!$    ALLOCATE(mat%mumps)
+!!$    CALL csr2mumps(mat, mat%mumps)
+  END SUBROUTINE to_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE csr2mumps(mat, mat_mumps)
+!
+!   Fill mumps structure (based on routine to_mumps_mat)
+!
+    INCLUDE 'mpif.h'
+    TYPE(csr_mat)   :: mat
+    TYPE(mumps_mat) :: mat_mumps
+!
+    INTEGER :: i, rank, s, e
+    INTEGER :: comm, ierr, nnz_loc
+!
+    CALL init(mat%rank, mat%nterms, mat_mumps)
+!
+    comm = mat_mumps%mumps_par%COMM
+    mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix
+!
+!    Allocate the Mumps matrix structure
+!      CSR format: (cols, irow, val) or (JCN, irow, A)
+!      COO format: (IRN, JCN, A)  or (IRN, cols, val)
+!
+    rank = mat_mumps%rank
+    nnz_loc = mat%nnz
+    mat_mumps%nnz_start = 0
+    CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+    mat_mumps%nnz_start = mat_mumps%nnz_start + 1
+    mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1
+    mat_mumps%nnz_loc = nnz_loc
+    CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+!
+    mat_mumps%mumps_par%N = rank
+    mat_mumps%mumps_par%NZ_loc = nnz_loc
+!
+    mat_mumps%cols => mat%cols
+    mat_mumps%irow => mat%irow
+    mat_mumps%val => mat%val
+!
+!     (A,JCN) picked from CSR mat
+    mat_mumps%mumps_par%A_loc => mat_mumps%val
+    mat_mumps%mumps_par%JCN_loc => mat_mumps%cols
+!
+!    Determine IRN array
+    IF(ASSOCIATED(mat_mumps%mumps_par%IRN_loc)) DEALLOCATE(mat_mumps%mumps_par%IRN_loc)
+    ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc))
+    DO i=mat_mumps%istart,mat_mumps%iend
+       s = mat_mumps%irow(i) - mat_mumps%nnz_start + 1
+       e = mat_mumps%irow(i+1) - mat_mumps%nnz_start
+       mat_mumps%mumps_par%IRN_loc(s:e) = i
+    END DO
+    CALL destroy(mat_mumps%mat)
+    NULLIFY(mat_mumps%mat)
+!    
+  END SUBROUTINE csr2mumps
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE zcsr2mumps(mat, mat_mumps)
+!
+!   Fill mumps structure (based on routine to_mumps_mat)
+!
+    INCLUDE 'mpif.h'
+    TYPE(zcsr_mat)   :: mat
+    TYPE(zmumps_mat) :: mat_mumps
+!
+    INTEGER :: i, rank, s, e
+    INTEGER :: comm, ierr, nnz_loc
+!
+    CALL init(mat%rank, mat%nterms, mat_mumps)
+!
+    comm = mat_mumps%mumps_par%COMM
+    mat_mumps%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix
+!
+!    Allocate the Mumps matrix structure
+!      CSR format: (cols, irow, val) or (JCN, irow, A)
+!      COO format: (IRN, JCN, A)  or (IRN, cols, val)
+!
+    rank = mat_mumps%rank
+    nnz_loc = mat%nnz
+    mat_mumps%nnz_start = 0
+    CALL mpi_exscan(nnz_loc, mat_mumps%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+    mat_mumps%nnz_start = mat_mumps%nnz_start + 1
+    mat_mumps%nnz_end = mat_mumps%nnz_start + nnz_loc - 1
+    mat_mumps%nnz_loc = nnz_loc
+    CALL mpi_allreduce(nnz_loc, mat_mumps%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+!
+    mat_mumps%mumps_par%N = rank
+    mat_mumps%mumps_par%NZ_loc = nnz_loc
+!
+    mat_mumps%cols => mat%cols
+    mat_mumps%irow => mat%irow
+    mat_mumps%val => mat%val
+!
+!     (A,JCN) picked from CSR mat
+    mat_mumps%mumps_par%A_loc => mat_mumps%val
+    mat_mumps%mumps_par%JCN_loc => mat_mumps%cols
+!
+!    Determine IRN array
+    IF(ASSOCIATED(mat_mumps%mumps_par%IRN_loc)) DEALLOCATE(mat_mumps%mumps_par%IRN_loc)
+    ALLOCATE(mat_mumps%mumps_par%IRN_loc(nnz_loc))
+    DO i=mat_mumps%istart,mat_mumps%iend
+       s = mat_mumps%irow(i) - mat_mumps%nnz_start + 1
+       e = mat_mumps%irow(i+1) - mat_mumps%nnz_start
+       mat_mumps%mumps_par%IRN_loc(s:e) = i
+    END DO
+    CALL destroy(mat_mumps%mat)
+    NULLIFY(mat_mumps%mat)
+!    
+  END SUBROUTINE zcsr2mumps
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_csr_mat(mat)
+!
+!   Deallocate csr mat
+!
+    TYPE(csr_mat) :: mat
+!
+    CALL destroy(mat%spmat)
+    IF(mat%nnz.GT.0) THEN
+       DEALLOCATE(mat%irow)
+       DEALLOCATE(mat%idiag)
+       DEALLOCATE(mat%cols)
+       DEALLOCATE(mat%val)
+    END IF
+    mat%nnz = 0
+  END SUBROUTINE destroy_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_zcsr_mat(mat)
+!
+!   Deallocate csr mat
+!
+    TYPE(zcsr_mat) :: mat
+!
+    CALL destroy(mat%zspmat)
+    IF(mat%nnz.GT.0) THEN
+       DEALLOCATE(mat%irow)
+       DEALLOCATE(mat%idiag)
+       DEALLOCATE(mat%cols)
+       DEALLOCATE(mat%val)
+    END IF
+    mat%nnz = 0
+  END SUBROUTINE destroy_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_csr_mat(mat, xarr, transa_in) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(csr_mat)                          :: mat
+    DOUBLE PRECISION, INTENT(in)           :: xarr(:)
+    CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in
+    DOUBLE PRECISION                       :: yarr(SIZE(xarr))
+!
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    CHARACTER(len=1) :: transa
+    CHARACTER(len=6) :: matdescra
+    INTEGER :: n, i, j
+!
+    n = mat%rank
+    transa = 'N'
+    IF(PRESENT(transa_in)) transa = transa_in
+!
+#ifdef MKL
+    matdescra = 'g'
+    CALL mkl_dcsrmv(transa, n, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &          beta, yarr)
+#else
+    yarr = 0.0d0
+    DO i=1,mat%rank
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          IF(transa .EQ. 'N') THEN
+             yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
+          ELSE
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) + mat%val(j)*xarr(i)
+          END IF
+       END DO
+    END DO
+#endif
+!
+  END FUNCTION vmx_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_zcsr_mat(mat, xarr, transa_in) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(zcsr_mat)                         :: mat
+    DOUBLE COMPLEX, INTENT(in)             :: xarr(:)
+    CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in
+    DOUBLE COMPLEX                         :: yarr(SIZE(xarr))
+!
+    DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
+    CHARACTER(len=1) :: transa
+    CHARACTER(len=6) :: matdescra
+    INTEGER :: n, i, j
+!
+    n = mat%rank
+    transa = 'N'
+    IF(PRESENT(transa_in)) transa = transa_in
+!
+#ifdef MKL
+    matdescra = 'g'
+    CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &          beta, yarr)
+#else
+    yarr = 0.0d0
+    DO i=1,mat%rank
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          IF(transa .EQ. 'N') THEN
+             yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
+          ELSE
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) + mat%val(j)*xarr(i)
+          END IF
+       END DO
+    END DO
+#endif
+!
+  END FUNCTION vmx_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_csr_matn(mat, xarr, transa_in) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(csr_mat)                          :: mat
+    DOUBLE PRECISION, INTENT(in)           :: xarr(:,:)
+    CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in
+    DOUBLE PRECISION                       :: yarr(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    CHARACTER(len=1) :: transa
+    INTEGER :: n, nrhs, i, j
+    CHARACTER(len=6) :: matdescra
+!
+    n = mat%rank
+    nrhs = SIZE(xarr,2)
+    transa = 'N'
+    IF(PRESENT(transa_in)) transa = transa_in
+!
+#ifdef MKL
+    matdescra = 'g'
+    CALL mkl_dcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val,&
+         &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &           n, beta, yarr, n)
+#else
+    yarr = 0.0d0
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          IF(transa .EQ. 'N') THEN
+             yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
+          ELSE
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) + mat%val(j)*xarr(i,:)
+          END IF
+       END DO
+    END DO
+#endif
+!
+  END FUNCTION vmx_csr_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_zcsr_matn(mat, xarr, transa_in) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(zcsr_mat)                         :: mat
+    DOUBLE COMPLEX, INTENT(in)             :: xarr(:,:)
+    CHARACTER(len=1), OPTIONAL, INTENT(in) :: transa_in
+    DOUBLE COMPLEX                          :: yarr(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE COMPLEX   :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
+    CHARACTER(len=1) :: transa
+    INTEGER :: n, nrhs, i, j
+    CHARACTER(len=6) :: matdescra
+!
+    n = mat%rank
+    nrhs = SIZE(xarr,2)
+    transa = 'N'
+    IF(PRESENT(transa_in)) transa = transa_in
+!
+#ifdef MKL
+    matdescra = 'g'
+    CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val,&
+         &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &           n, beta, yarr, n)
+#else
+    yarr = (0.0d0,0.0d0)
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          IF(transa .EQ. 'N') THEN
+             yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
+          ELSE
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) + mat%val(j)*xarr(i,:)
+          END IF
+       END DO
+    END DO
+#endif
+!
+  END FUNCTION vmx_zcsr_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE conmat_1d_csr(spl, mat, coefeq, maxder)
+!
+!   Construction of FE matrix mat for 1D differential operator
+!   using spline spl
+!
+    USE bsplines
+    TYPE(csr_mat)              :: mat
+    TYPE(spline1d), INTENT(in) :: spl
+!
+    INCLUDE '../../bsplines/src/conmat_1d.tpl'
+  END SUBROUTINE conmat_1d_csr
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE conmat_1d_zcsr(spl, mat, coefeq, maxder)
+!
+!   Construction of FE matrix mat for 1D differential operator
+!   using spline spl
+!
+    USE bsplines
+    TYPE(zcsr_mat)              :: mat
+    TYPE(spline1d), INTENT(in) :: spl
+!
+    INCLUDE '../../bsplines/src/zconmat_1d.tpl'
+  END SUBROUTINE conmat_1d_zcsr
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE conmat_2d_csr(spl, mat, coefeq, maxder, nat_order)
+!
+!   Construction of FE matrix mat for 2D differential operator
+!   using spline spl
+!
+    USE bsplines
+    TYPE(spline2d), INTENT(in) :: spl
+    TYPE(csr_mat)              :: mat
+!
+    INCLUDE '../../bsplines/src/conmat.tpl'
+  END SUBROUTINE conmat_2d_csr
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE conmat_2d_zcsr(spl, mat, coefeq, maxder, nat_order)
+!
+!   Construction of FE matrix mat for 2D differential operator
+!   using spline spl
+!
+    USE bsplines
+    TYPE(spline2d), INTENT(in) :: spl
+    TYPE(zcsr_mat)             :: mat
+!
+    INCLUDE '../../bsplines/src/zconmat.tpl'
+  END SUBROUTINE conmat_2d_zcsr
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE femat_csr(spl, mat, coefeq, nterms)
+!
+!   Compute fe matrix
+!
+    USE bsplines
+    TYPE(spline1d), INTENT(in)   :: spl
+    TYPE(csr_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in)          :: nterms
+    INTERFACE
+       SUBROUTINE coefeq(x, idt, idw, c)
+         DOUBLE PRECISION, INTENT(in) :: x
+         INTEGER, INTENT(out) :: idt(:), idw(:)
+         DOUBLE PRECISION, INTENT(out) :: c(:)
+       END SUBROUTINE coefeq
+    END INTERFACE
+!
+    INTEGER :: nrank, nx, nidbas
+!
+    CALL get_dim(spl, nrank, nx, nidbas)
+    IF(spl%period) nrank = nx
+    IF(mat%nnz.EQ.0) THEN
+       WRITE(*,'(a,i0,a)') 'FEMAT: Initialize mat with ', &
+            &    nterms, ' terms ...'
+       CALL init(nrank, nterms, mat)
+    END IF
+    CALL conmat(spl, mat, coefeq)
+  END SUBROUTINE femat_csr
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE put_csr_mat(fid, label, mat, str)
+!
+!   Write matrix to hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(csr_mat)                          :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'NNZ',  mat%nnz)
+    CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL putarr(fid, TRIM(label)//'/idiag', mat%idiag)
+    CALL putarr(fid, TRIM(label)//'/val', mat%val)
+  END SUBROUTINE put_csr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE put_zcsr_mat(fid, label, mat, str)
+!
+!   Write matrix to hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(zcsr_mat)                         :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'NNZ',  mat%nnz)
+    CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL putarr(fid, TRIM(label)//'/idiag', mat%idiag)
+    CALL putarr(fid, TRIM(label)//'/val', mat%val)
+  END SUBROUTINE put_zcsr_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  DOUBLE PRECISION FUNCTION matnorm_csr(mat, p)
+!
+!   Compute matrix norm
+!
+    TYPE(csr_mat), INTENT(in)              :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: p
+!
+    CHARACTER(len=4) :: norm_type
+    INTEGER          :: i, j
+    DOUBLE PRECISION :: temp(mat%rank)
+!
+    norm_type = 'fro'
+    IF(PRESENT(p)) norm_type = p
+!
+    SELECT CASE (norm_type)
+    CASE ('inf')
+       DO i=1,mat%rank
+          temp(i) = SUM(ABS(mat%val(mat%irow(i):mat%irow(i+1)-1)))
+       END DO
+       matnorm_csr = MAXVAL(temp)
+    CASE ('1')
+       temp = 0.0d0
+       DO i=1,mat%rank
+          DO j=mat%irow(i), mat%irow(i+1)-1
+             temp(mat%cols(j)) = temp(mat%cols(j)) + ABS(mat%val(j))
+          END DO
+       END DO
+       matnorm_csr = MAXVAL(temp)
+    CASE('fro')
+       matnorm_csr = SQRT(DOT_PRODUCT(mat%val, mat%val))
+    END SELECT
+  END FUNCTION matnorm_csr
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE full_to_csr(fullmat, mat)
+!
+!   Convert full rectangular matrix to csr mat
+!
+    DOUBLE PRECISION, INTENT(inout) :: fullmat(:,:)
+    TYPE(csr_mat), INTENT(out)      :: mat
+!
+    INTEGER :: m, n, nnz
+    INTEGER :: i, j, k
+!
+    m = SIZE(fullmat,1)
+    n = SIZE(fullmat,2)
+    CALL init(m, 0, mat, ncols=n)
+!
+!   Determine nnz of matrix
+!
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag)
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+!
+    ALLOCATE(mat%irow(m+1))
+    ALLOCATE(mat%idiag(m))
+!
+! Clear matrix small elements of fullmat
+    WHERE( ABS(fullmat) < 1.d-8) fullmat=0.0d0
+!
+    mat%irow(1) = 1
+    nnz = 0
+    DO i=1,m
+       DO j=1,n
+          IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN 
+             nnz = nnz+1
+             IF(m.EQ.n .AND. i.EQ.j) THEN ! Only for square matrix
+                mat%idiag(i) = nnz
+             END IF
+          END IF
+       END DO
+       mat%irow(i+1) = nnz+1
+    END DO
+!
+!    Allocate and fill the csr matrix structure
+!
+    mat%nnz = nnz
+    ALLOCATE(mat%cols(nnz))
+    ALLOCATE(mat%val(nnz))
+    k=0
+    DO i=1,m
+       DO j=1,n
+          IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN 
+             k=k+1
+             mat%cols(k) = j
+             mat%val(k) = fullmat(i,j)
+          END IF
+       END DO
+    END DO
+  END SUBROUTINE full_to_csr
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE full_to_zcsr(fullmat, mat)
+!
+!   Convert full rectangular matrix to csr mat
+!
+    DOUBLE COMPLEX, INTENT(inout) :: fullmat(:,:)
+    TYPE(zcsr_mat), INTENT(out)   :: mat
+!
+    INTEGER :: m, n, nnz
+    INTEGER :: i, j, k
+!
+    m = SIZE(fullmat,1)
+    n = SIZE(fullmat,2)
+    CALL init(m, 0, mat, ncols=n)
+!
+!   Determine nnz of matrix
+!
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%idiag)) DEALLOCATE(mat%idiag)
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+!
+    ALLOCATE(mat%irow(m+1))
+    ALLOCATE(mat%idiag(m))
+!
+! Clear matrix small elements of fullmat
+    WHERE( ABS(fullmat) < 1.d-8) fullmat=(0.0d0,0.0d0)
+!
+    mat%irow(1) = 1
+    nnz = 0
+    DO i=1,m
+       DO j=1,n
+          IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN 
+             nnz = nnz+1
+             IF(m.EQ.n .AND. i.EQ.j) THEN ! Only for square matrix
+                mat%idiag(i) = nnz
+             END IF
+          END IF
+       END DO
+       mat%irow(i+1) = nnz+1
+    END DO
+!
+!    Allocate and fill the csr matrix structure
+!
+    mat%nnz = nnz
+    ALLOCATE(mat%cols(nnz))
+    ALLOCATE(mat%val(nnz))
+    k=0
+    DO i=1,m
+       DO j=1,n
+          IF(ABS(fullmat(i,j)).GT.EPSILON(0.0d0)) THEN 
+             k=k+1
+             mat%cols(k) = j
+             mat%val(k) = fullmat(i,j)
+          END IF
+       END DO
+    END DO
+  END SUBROUTINE full_to_zcsr
+!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE check_dom(mat)
+!
+!  Check whether mat is strict diagonal dominabce.
+! 
+    TYPE(csr_mat), INTENT(in) :: mat
+    DOUBLE PRECISION :: arow(mat%rank), asum(mat%rank)
+    INTEGER :: n, i, j1, j2, jdiag
+!
+    n = mat%rank
+    DO i=1,n
+       j1 = mat%irow(i)
+       jdiag = mat%idiag(i)
+       j2 = mat%irow(i+1)-1
+       asum(i) = SUM(ABS(mat%val(j1:j2))) / ABS(mat%val(jdiag)) - 1.0d0
+    END DO
+    WRITE(*,'(/a,1pe12.3)') 'Max of sum of off-diag', MAXVAL(asum)
+!
+  END SUBROUTINE check_dom
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE kron_csr(mata, matb, matc)
+!
+!   Kronecker product of 2 CSR matrices
+!
+    USE sparse, ONLY : isearch
+    TYPE(csr_mat), INTENT(in)  :: mata, matb
+    TYPE(csr_mat), INTENT(out) :: matc
+!
+    INTEGER :: m1, n1, nnz1, m2, n2, nnz2, m, n, nnz
+    INTEGER :: i,i1,i2,j1,s,s1,s2,e,e1,e2,k,nc2
+!
+    m1 = mata%mrows
+    n1 = mata%ncols
+    nnz1 = mata%nnz
+    m2 = matb%mrows
+    n2 = matb%ncols
+    nnz2 = matb%nnz
+    m = m1*m2
+    n = n1*n2
+    nnz = nnz1*nnz2
+!
+    CALL init(m, 0, matc, ncols=n)
+    matc%nnz = nnz
+    IF(ASSOCIATED(matc%irow)) DEALLOCATE(matc%irow)
+    IF(ASSOCIATED(matc%idiag)) DEALLOCATE(matc%idiag)
+    IF(ASSOCIATED(matc%cols)) DEALLOCATE(matc%cols)
+    IF(ASSOCIATED(matc%val)) DEALLOCATE(matc%val)
+    ALLOCATE(matc%irow(m+1))
+    IF(m.EQ.n) THEN
+       ALLOCATE(matc%idiag(m))     ! Only for square matrices
+    END IF
+    ALLOCATE(matc%cols(nnz))
+    ALLOCATE(matc%val(nnz))
+!
+    k = 0
+    matc%irow(1) = 1
+    DO i1=1,m1
+       s1=mata%irow(i1)
+       e1=mata%irow(i1+1)-1
+       DO i2=1,m2
+          s2=matb%irow(i2)
+          e2=matb%irow(i2+1)-1
+          nc2=e2-s2+1
+          DO j1=s1,e1
+             matc%val(k+1:k+nc2) = mata%val(j1)*matb%val(s2:e2)
+             matc%cols(k+1:k+nc2) = (mata%cols(j1)-1)*n2 + matb%cols(s2:e2)
+             k = k+nc2
+             matc%irow((i1-1)*m2+i2+1) = k+1  ! Points to next row
+          END DO
+       END DO
+    END DO
+!
+!  Search the diagonals
+    DO i=1,matc%mrows
+       s = matc%irow(i)
+       e = matc%irow(i+1)-1
+       matc%idiag(i) = isearch(matc%cols(s:e),i) + s
+    END DO
+  END SUBROUTINE kron_csr
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE csr
diff --git a/src/lapack_extra.f b/src/lapack_extra.f
new file mode 100644
index 0000000..8f5248f
--- /dev/null
+++ b/src/lapack_extra.f
@@ -0,0 +1,718 @@
+!>
+!> @file lapack_extra.f
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+      DOUBLE PRECISION FUNCTION DOPGB( SUBNAM, M, N, KL, KU, IPIV )
+*
+*  -- LAPACK timing routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER*6        SUBNAM
+      INTEGER            KL, KU, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DOPGB counts operations for the LU factorization of a band matrix
+*  xGBTRF.
+*
+*  Arguments
+*  =========
+*
+*  SUBNAM  (input) CHARACTER*6
+*          The name of the subroutine.
+*
+*  M       (input) INTEGER
+*          The number of rows of the coefficient matrix.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the coefficient matrix.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals of the matrix.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals of the matrix.  KU >= 0.
+*
+*  IPIV    (input)  INTEGER array, dimension (min(M,N))
+*          The vector of pivot indices from DGBTRF or ZGBTRF.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            CORZ, SORD
+      CHARACTER          C1
+      CHARACTER*2        C2
+      CHARACTER*3        C3
+      INTEGER            I, J, JP, JU, KM
+      DOUBLE PRECISION   ADDFAC, ADDS, MULFAC, MULTS
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME, LSAMEN
+      EXTERNAL           LSAME, LSAMEN
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      DOPGB = 0
+      MULTS = 0
+      ADDS = 0
+      C1 = SUBNAM( 1: 1 )
+      C2 = SUBNAM( 2: 3 )
+      C3 = SUBNAM( 4: 6 )
+      SORD = LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' )
+      CORZ = LSAME( C1, 'C' ) .OR. LSAME( C1, 'Z' )
+      IF( .NOT.( SORD .OR. CORZ ) )
+     $   RETURN
+      IF( LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) ) THEN
+         ADDFAC = 1
+         MULFAC = 1
+      ELSE
+         ADDFAC = 2
+         MULFAC = 6
+      END IF
+*
+*     --------------------------
+*     GB:  General Band matrices
+*     --------------------------
+*
+      IF( LSAMEN( 2, C2, 'GB' ) ) THEN
+*
+*        xGBTRF:  M, N, KL, KU  =>  M, N, KL, KU
+*
+         IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
+            JU = 1
+            DO 10 J = 1, MIN( M, N )
+               KM = MIN( KL, M-J )
+               JP = IPIV( J )
+               JU = MAX( JU, MIN( JP+KU, N ) )
+               IF( KM.GT.0 ) THEN
+                  MULTS = MULTS + KM*( 1+JU-J )
+                  ADDS = ADDS + KM*( JU-J )
+               END IF
+   10       CONTINUE
+         END IF
+*
+*     ---------------------------------
+*     GT:  General Tridiagonal matrices
+*     ---------------------------------
+*
+      ELSE IF( LSAMEN( 2, C2, 'GT' ) ) THEN
+*
+*        xGTTRF:  N  =>  M
+*
+         IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
+            MULTS = 2*( M-1 )
+            ADDS = M - 1
+            DO 20 I = 1, M - 2
+               IF( IPIV( I ).NE.I )
+     $            MULTS = MULTS + 1
+   20       CONTINUE
+*
+*        xGTTRS:  N, NRHS  =>  M, N
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
+            MULTS = 4*N*( M-1 )
+            ADDS = 3*N*( M-1 )
+*
+*        xGTSV:   N, NRHS  =>  M, N
+*
+         ELSE IF( LSAMEN( 3, C3, 'SV ' ) ) THEN
+            MULTS = ( 4*N+2 )*( M-1 )
+            ADDS = ( 3*N+1 )*( M-1 )
+            DO 30 I = 1, M - 2
+               IF( IPIV( I ).NE.I )
+     $            MULTS = MULTS + 1
+   30       CONTINUE
+         END IF
+      END IF
+*
+      DOPGB = MULFAC*MULTS + ADDFAC*ADDS
+      RETURN
+*
+*     End of DOPGB
+*
+      END
+      DOUBLE PRECISION FUNCTION DOPLA( SUBNAM, M, N, KL, KU, NB )
+*
+*  -- LAPACK timing routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER*6        SUBNAM
+      INTEGER            KL, KU, M, N, NB
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DOPLA computes an approximation of the number of floating point
+*  operations used by the subroutine SUBNAM with the given values
+*  of the parameters M, N, KL, KU, and NB.
+*
+*  This version counts operations for the LAPACK subroutines.
+*
+*  Arguments
+*  =========
+*
+*  SUBNAM  (input) CHARACTER*6
+*          The name of the subroutine.
+*
+*  M       (input) INTEGER
+*          The number of rows of the coefficient matrix.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the coefficient matrix.
+*          For solve routine when the matrix is square,
+*          N is the number of right hand sides.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The lower band width of the coefficient matrix.
+*          If needed, 0 <= KL <= M-1.
+*          For xGEQRS, KL is the number of right hand sides.
+*
+*  KU      (input) INTEGER
+*          The upper band width of the coefficient matrix.
+*          If needed, 0 <= KU <= N-1.
+*
+*  NB      (input) INTEGER
+*          The block size.  If needed, NB >= 1.
+*
+*  Notes
+*  =====
+*
+*  In the comments below, the association is given between arguments
+*  in the requested subroutine and local arguments.  For example,
+*
+*  xGETRS:  N, NRHS  =>  M, N
+*
+*  means that arguments N and NRHS in DGETRS are passed to arguments
+*  M and N in this procedure.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            CORZ, SORD
+      CHARACTER          C1
+      CHARACTER*2        C2
+      CHARACTER*3        C3
+      INTEGER            I
+      DOUBLE PRECISION   ADDFAC, ADDS, EK, EM, EMN, EN, MULFAC, MULTS,
+     $                   WL, WU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME, LSAMEN
+      EXTERNAL           LSAME, LSAMEN
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     --------------------------------------------------------
+*     Initialize DOPLA to 0 and do a quick return if possible.
+*     --------------------------------------------------------
+*
+      DOPLA = 0
+      MULTS = 0
+      ADDS = 0
+      C1 = SUBNAM( 1: 1 )
+      C2 = SUBNAM( 2: 3 )
+      C3 = SUBNAM( 4: 6 )
+      SORD = LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' )
+      CORZ = LSAME( C1, 'C' ) .OR. LSAME( C1, 'Z' )
+      IF( M.LE.0 .OR. .NOT.( SORD .OR. CORZ ) )
+     $   RETURN
+*
+*     ---------------------------------------------------------
+*     If the coefficient matrix is real, count each add as 1
+*     operation and each multiply as 1 operation.
+*     If the coefficient matrix is complex, count each add as 2
+*     operations and each multiply as 6 operations.
+*     ---------------------------------------------------------
+*
+      IF( LSAME( C1, 'S' ) .OR. LSAME( C1, 'D' ) ) THEN
+         ADDFAC = 1
+         MULFAC = 1
+      ELSE
+         ADDFAC = 2
+         MULFAC = 6
+      END IF
+      EM = M
+      EN = N
+      EK = KL
+*
+*     ---------------------------------
+*     GE:  GEneral rectangular matrices
+*     ---------------------------------
+*
+      IF( LSAMEN( 2, C2, 'GE' ) ) THEN
+*
+*        xGETRF:  M, N  =>  M, N
+*
+         IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
+            EMN = MIN( M, N )
+            ADDS = EMN*( EM*EN-( EM+EN )*( EMN+1.D0 ) / 2.D0+
+     $             ( EMN+1.D0 )*( 2.D0*EMN+1.D0 ) / 6.D0 )
+            MULTS = ADDS + EMN*( EM-( EMN+1.D0 ) / 2.D0 )
+*
+*        xGETRS:  N, NRHS  =>  M, N
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
+            MULTS = EN*EM*EM
+            ADDS = EN*( EM*( EM-1.D0 ) )
+*
+*        xGETRI:  N  =>  M
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN
+            MULTS = EM*( 5.D0 / 6.D0+EM*( 1.D0 / 2.D0+EM*( 2.D0 /
+     $              3.D0 ) ) )
+            ADDS = EM*( 5.D0 / 6.D0+EM*( -3.D0 / 2.D0+EM*( 2.D0 /
+     $             3.D0 ) ) )
+*
+*        xGEQRF or xGEQLF:  M, N  =>  M, N
+*
+         ELSE IF( LSAMEN( 3, C3, 'QRF' ) .OR.
+     $            LSAMEN( 3, C3, 'QR2' ) .OR.
+     $            LSAMEN( 3, C3, 'QLF' ) .OR. LSAMEN( 3, C3, 'QL2' ) )
+     $             THEN
+            IF( M.GE.N ) THEN
+               MULTS = EN*( ( ( 23.D0 / 6.D0 )+EM+EN / 2.D0 )+EN*
+     $                 ( EM-EN / 3.D0 ) )
+               ADDS = EN*( ( 5.D0 / 6.D0 )+EN*
+     $                ( 1.D0 / 2.D0+( EM-EN / 3.D0 ) ) )
+            ELSE
+               MULTS = EM*( ( ( 23.D0 / 6.D0 )+2.D0*EN-EM / 2.D0 )+EM*
+     $                 ( EN-EM / 3.D0 ) )
+               ADDS = EM*( ( 5.D0 / 6.D0 )+EN-EM / 2.D0+EM*
+     $                ( EN-EM / 3.D0 ) )
+            END IF
+*
+*        xGERQF or xGELQF:  M, N  =>  M, N
+*
+         ELSE IF( LSAMEN( 3, C3, 'RQF' ) .OR.
+     $            LSAMEN( 3, C3, 'RQ2' ) .OR.
+     $            LSAMEN( 3, C3, 'LQF' ) .OR. LSAMEN( 3, C3, 'LQ2' ) )
+     $             THEN
+            IF( M.GE.N ) THEN
+               MULTS = EN*( ( ( 29.D0 / 6.D0 )+EM+EN / 2.D0 )+EN*
+     $                 ( EM-EN / 3.D0 ) )
+               ADDS = EN*( ( 5.D0 / 6.D0 )+EM+EN*
+     $                ( -1.D0 / 2.D0+( EM-EN / 3.D0 ) ) )
+            ELSE
+               MULTS = EM*( ( ( 29.D0 / 6.D0 )+2.D0*EN-EM / 2.D0 )+EM*
+     $                 ( EN-EM / 3.D0 ) )
+               ADDS = EM*( ( 5.D0 / 6.D0 )+EM / 2.D0+EM*
+     $                ( EN-EM / 3.D0 ) )
+            END IF
+*
+*        xGEQPF: M, N => M, N
+*
+         ELSE IF( LSAMEN( 3, C3, 'QPF' ) ) THEN
+            EMN = MIN( M, N )
+            MULTS = 2*EN*EN + EMN*( 3*EM+5*EN+2*EM*EN-( EMN+1 )*
+     $              ( 4+EN+EM-( 2*EMN+1 ) / 3 ) )
+            ADDS = EN*EN + EMN*( 2*EM+EN+2*EM*EN-( EMN+1 )*
+     $             ( 2+EN+EM-( 2*EMN+1 ) / 3 ) )
+*
+*        xGEQRS or xGERQS:  M, N, NRHS  =>  M, N, KL
+*
+         ELSE IF( LSAMEN( 3, C3, 'QRS' ) .OR. LSAMEN( 3, C3, 'RQS' ) )
+     $             THEN
+            MULTS = EK*( EN*( 2.D0-EK )+EM*
+     $              ( 2.D0*EN+( EM+1.D0 ) / 2.D0 ) )
+            ADDS = EK*( EN*( 1.D0-EK )+EM*
+     $             ( 2.D0*EN+( EM-1.D0 ) / 2.D0 ) )
+*
+*        xGELQS or xGEQLS:  M, N, NRHS  =>  M, N, KL
+*
+         ELSE IF( LSAMEN( 3, C3, 'LQS' ) .OR. LSAMEN( 3, C3, 'QLS' ) )
+     $             THEN
+            MULTS = EK*( EM*( 2.D0-EK )+EN*
+     $              ( 2.D0*EM+( EN+1.D0 ) / 2.D0 ) )
+            ADDS = EK*( EM*( 1.D0-EK )+EN*
+     $             ( 2.D0*EM+( EN-1.D0 ) / 2.D0 ) )
+*
+*        xGEBRD:  M, N  =>  M, N
+*
+         ELSE IF( LSAMEN( 3, C3, 'BRD' ) ) THEN
+            IF( M.GE.N ) THEN
+               MULTS = EN*( 20.D0 / 3.D0+EN*
+     $                 ( 2.D0+( 2.D0*EM-( 2.D0 / 3.D0 )*EN ) ) )
+               ADDS = EN*( 5.D0 / 3.D0+( EN-EM )+EN*
+     $                ( 2.D0*EM-( 2.D0 / 3.D0 )*EN ) )
+            ELSE
+               MULTS = EM*( 20.D0 / 3.D0+EM*
+     $                 ( 2.D0+( 2.D0*EN-( 2.D0 / 3.D0 )*EM ) ) )
+               ADDS = EM*( 5.D0 / 3.D0+( EM-EN )+EM*
+     $                ( 2.D0*EN-( 2.D0 / 3.D0 )*EM ) )
+            END IF
+*
+*        xGEHRD:  N  =>  M
+*
+         ELSE IF( LSAMEN( 3, C3, 'HRD' ) ) THEN
+            IF( M.EQ.1 ) THEN
+               MULTS = 0.D0
+               ADDS = 0.D0
+            ELSE
+               MULTS = -13.D0 + EM*( -7.D0 / 6.D0+EM*
+     $                 ( 0.5D0+EM*( 5.D0 / 3.D0 ) ) )
+               ADDS = -8.D0 + EM*( -2.D0 / 3.D0+EM*
+     $                ( -1.D0+EM*( 5.D0 / 3.D0 ) ) )
+            END IF
+*
+         END IF
+*
+*     ----------------------------
+*     GB:  General Banded matrices
+*     ----------------------------
+*        Note:  The operation count is overestimated because
+*        it is assumed that the factor U fills in to the maximum
+*        extent, i.e., that its bandwidth goes from KU to KL + KU.
+*
+      ELSE IF( LSAMEN( 2, C2, 'GB' ) ) THEN
+*
+*        xGBTRF:  M, N, KL, KU  =>  M, N, KL, KU
+*
+         IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
+            DO 10 I = MIN( M, N ), 1, -1
+               WL = MAX( 0, MIN( KL, M-I ) )
+               WU = MAX( 0, MIN( KL+KU, N-I ) )
+               MULTS = MULTS + WL*( 1.D0+WU )
+               ADDS = ADDS + WL*WU
+   10       CONTINUE
+*
+*        xGBTRS:  N, NRHS, KL, KU  =>  M, N, KL, KU
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
+            WL = MAX( 0, MIN( KL, M-1 ) )
+            WU = MAX( 0, MIN( KL+KU, M-1 ) )
+            MULTS = EN*( EM*( WL+1.D0+WU )-0.5D0*
+     $              ( WL*( WL+1.D0 )+WU*( WU+1.D0 ) ) )
+            ADDS = EN*( EM*( WL+WU )-0.5D0*
+     $             ( WL*( WL+1.D0 )+WU*( WU+1.D0 ) ) )
+*
+         END IF
+*
+*     --------------------------------------
+*     PO:  POsitive definite matrices
+*     PP:  Positive definite Packed matrices
+*     --------------------------------------
+*
+      ELSE IF( LSAMEN( 2, C2, 'PO' ) .OR. LSAMEN( 2, C2, 'PP' ) ) THEN
+*
+*        xPOTRF:  N  =>  M
+*
+         IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
+            MULTS = EM*( 1.D0 / 3.D0+EM*( 1.D0 / 2.D0+EM*( 1.D0 /
+     $              6.D0 ) ) )
+            ADDS = ( 1.D0 / 6.D0 )*EM*( -1.D0+EM*EM )
+*
+*        xPOTRS:  N, NRHS  =>  M, N
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
+            MULTS = EN*( EM*( EM+1.D0 ) )
+            ADDS = EN*( EM*( EM-1.D0 ) )
+*
+*        xPOTRI:  N  =>  M
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN
+            MULTS = EM*( 2.D0 / 3.D0+EM*( 1.D0+EM*( 1.D0 / 3.D0 ) ) )
+            ADDS = EM*( 1.D0 / 6.D0+EM*( -1.D0 / 2.D0+EM*( 1.D0 /
+     $             3.D0 ) ) )
+*
+         END IF
+*
+*     ------------------------------------
+*     PB:  Positive definite Band matrices
+*     ------------------------------------
+*
+      ELSE IF( LSAMEN( 2, C2, 'PB' ) ) THEN
+*
+*        xPBTRF:  N, K  =>  M, KL
+*
+         IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
+            MULTS = EK*( -2.D0 / 3.D0+EK*( -1.D0+EK*( -1.D0 / 3.D0 ) ) )
+     $               + EM*( 1.D0+EK*( 3.D0 / 2.D0+EK*( 1.D0 / 2.D0 ) ) )
+            ADDS = EK*( -1.D0 / 6.D0+EK*( -1.D0 / 2.D0+EK*( -1.D0 /
+     $             3.D0 ) ) ) + EM*( EK / 2.D0*( 1.D0+EK ) )
+*
+*        xPBTRS:  N, NRHS, K  =>  M, N, KL
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
+            MULTS = EN*( ( 2*EM-EK )*( EK+1.D0 ) )
+            ADDS = EN*( EK*( 2*EM-( EK+1.D0 ) ) )
+*
+         END IF
+*
+*     ----------------------------------
+*     PT:  Positive definite Tridiagonal
+*     ----------------------------------
+*
+      ELSE IF( LSAMEN( 2, C2, 'PT' ) ) THEN
+*
+*        xPTTRF:  N  =>  M
+*
+         IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
+            MULTS = 2*( EM-1 )
+            ADDS = EM - 1
+*
+*        xPTTRS:  N, NRHS  =>  M, N
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
+            MULTS = EN*( 3*EM-2 )
+            ADDS = EN*( 2*( EM-1 ) )
+*
+*        xPTSV:  N, NRHS  =>  M, N
+*
+         ELSE IF( LSAMEN( 3, C3, 'SV ' ) ) THEN
+            MULTS = 2*( EM-1 ) + EN*( 3*EM-2 )
+            ADDS = EM - 1 + EN*( 2*( EM-1 ) )
+         END IF
+*
+*     --------------------------------------------------------
+*     SY:  SYmmetric indefinite matrices
+*     SP:  Symmetric indefinite Packed matrices
+*     HE:  HErmitian indefinite matrices (complex only)
+*     HP:  Hermitian indefinite Packed matrices (complex only)
+*     --------------------------------------------------------
+*
+      ELSE IF( LSAMEN( 2, C2, 'SY' ) .OR. LSAMEN( 2, C2, 'SP' ) .OR.
+     $         LSAMEN( 3, SUBNAM, 'CHE' ) .OR.
+     $         LSAMEN( 3, SUBNAM, 'ZHE' ) .OR.
+     $         LSAMEN( 3, SUBNAM, 'CHP' ) .OR.
+     $         LSAMEN( 3, SUBNAM, 'ZHP' ) ) THEN
+*
+*        xSYTRF:  N  =>  M
+*
+         IF( LSAMEN( 3, C3, 'TRF' ) ) THEN
+            MULTS = EM*( 10.D0 / 3.D0+EM*
+     $              ( 1.D0 / 2.D0+EM*( 1.D0 / 6.D0 ) ) )
+            ADDS = EM / 6.D0*( -1.D0+EM*EM )
+*
+*        xSYTRS:  N, NRHS  =>  M, N
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
+            MULTS = EN*EM*EM
+            ADDS = EN*( EM*( EM-1.D0 ) )
+*
+*        xSYTRI:  N  =>  M
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN
+            MULTS = EM*( 2.D0 / 3.D0+EM*EM*( 1.D0 / 3.D0 ) )
+            ADDS = EM*( -1.D0 / 3.D0+EM*EM*( 1.D0 / 3.D0 ) )
+*
+*        xSYTRD, xSYTD2:  N  =>  M
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRD' ) .OR. LSAMEN( 3, C3, 'TD2' ) )
+     $             THEN
+            IF( M.EQ.1 ) THEN
+               MULTS = 0.D0
+               ADDS = 0.D0
+            ELSE
+               MULTS = -15.D0 + EM*( -1.D0 / 6.D0+EM*
+     $                 ( 5.D0 / 2.D0+EM*( 2.D0 / 3.D0 ) ) )
+               ADDS = -4.D0 + EM*( -8.D0 / 3.D0+EM*
+     $                ( 1.D0+EM*( 2.D0 / 3.D0 ) ) )
+            END IF
+         END IF
+*
+*     -------------------
+*     Triangular matrices
+*     -------------------
+*
+      ELSE IF( LSAMEN( 2, C2, 'TR' ) .OR. LSAMEN( 2, C2, 'TP' ) ) THEN
+*
+*        xTRTRS:  N, NRHS  =>  M, N
+*
+         IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
+            MULTS = EN*EM*( EM+1.D0 ) / 2.D0
+            ADDS = EN*EM*( EM-1.D0 ) / 2.D0
+*
+*        xTRTRI:  N  =>  M
+*
+         ELSE IF( LSAMEN( 3, C3, 'TRI' ) ) THEN
+            MULTS = EM*( 1.D0 / 3.D0+EM*( 1.D0 / 2.D0+EM*( 1.D0 /
+     $              6.D0 ) ) )
+            ADDS = EM*( 1.D0 / 3.D0+EM*( -1.D0 / 2.D0+EM*( 1.D0 /
+     $             6.D0 ) ) )
+*
+         END IF
+*
+      ELSE IF( LSAMEN( 2, C2, 'TB' ) ) THEN
+*
+*        xTBTRS:  N, NRHS, K  =>  M, N, KL
+*
+         IF( LSAMEN( 3, C3, 'TRS' ) ) THEN
+            MULTS = EN*( EM*( EM+1.D0 ) / 2.D0-( EM-EK-1.D0 )*
+     $              ( EM-EK ) / 2.D0 )
+            ADDS = EN*( EM*( EM-1.D0 ) / 2.D0-( EM-EK-1.D0 )*( EM-EK ) /
+     $             2.D0 )
+         END IF
+*
+*     --------------------
+*     Trapezoidal matrices
+*     --------------------
+*
+      ELSE IF( LSAMEN( 2, C2, 'TZ' ) ) THEN
+*
+*        xTZRQF:  M, N => M, N
+*
+         IF( LSAMEN( 3, C3, 'RQF' ) ) THEN
+            EMN = MIN( M, N )
+            MULTS = 3*EM*( EN-EM+1 ) + ( 2*EN-2*EM+3 )*
+     $              ( EM*EM-EMN*( EMN+1 ) / 2 )
+            ADDS = ( EN-EM+1 )*( EM+2*EM*EM-EMN*( EMN+1 ) )
+         END IF
+*
+*     -------------------
+*     Orthogonal matrices
+*     -------------------
+*
+      ELSE IF( ( SORD .AND. LSAMEN( 2, C2, 'OR' ) ) .OR.
+     $         ( CORZ .AND. LSAMEN( 2, C2, 'UN' ) ) ) THEN
+*
+*        -MQR, -MLQ, -MQL, or -MRQ:  M, N, K, SIDE  =>  M, N, KL, KU
+*           where KU<= 0 indicates SIDE = 'L'
+*           and   KU> 0  indicates SIDE = 'R'
+*
+         IF( LSAMEN( 3, C3, 'MQR' ) .OR. LSAMEN( 3, C3, 'MLQ' ) .OR.
+     $       LSAMEN( 3, C3, 'MQL' ) .OR. LSAMEN( 3, C3, 'MRQ' ) ) THEN
+            IF( KU.LE.0 ) THEN
+               MULTS = EK*EN*( 2.D0*EM+2.D0-EK )
+               ADDS = EK*EN*( 2.D0*EM+1.D0-EK )
+            ELSE
+               MULTS = EK*( EM*( 2.D0*EN-EK )+
+     $                 ( EM+EN+( 1.D0-EK ) / 2.D0 ) )
+               ADDS = EK*EM*( 2.D0*EN+1.D0-EK )
+            END IF
+*
+*        -GQR or -GQL:  M, N, K  =>  M, N, KL
+*
+         ELSE IF( LSAMEN( 3, C3, 'GQR' ) .OR. LSAMEN( 3, C3, 'GQL' ) )
+     $             THEN
+            MULTS = EK*( -5.D0 / 3.D0+( 2.D0*EN-EK )+
+     $              ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) )
+            ADDS = EK*( 1.D0 / 3.D0+( EN-EM )+
+     $             ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) )
+*
+*        -GLQ or -GRQ:  M, N, K  =>  M, N, KL
+*
+         ELSE IF( LSAMEN( 3, C3, 'GLQ' ) .OR. LSAMEN( 3, C3, 'GRQ' ) )
+     $             THEN
+            MULTS = EK*( -2.D0 / 3.D0+( EM+EN-EK )+
+     $              ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) )
+            ADDS = EK*( 1.D0 / 3.D0+( EM-EN )+
+     $             ( 2.D0*EM*EN+EK*( ( 2.D0 / 3.D0 )*EK-EM-EN ) ) )
+*
+         END IF
+*
+      END IF
+*
+      DOPLA = MULFAC*MULTS + ADDFAC*ADDS
+*
+      RETURN
+*
+*     End of DOPLA
+*
+      END
+      LOGICAL          FUNCTION LSAMEN( N, CA, CB )
+*
+*  -- LAPACK auxiliary routine (version 2.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER*( * )    CA, CB
+      INTEGER            N
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  LSAMEN  tests if the first N letters of CA are the same as the
+*  first N letters of CB, regardless of case.
+*  LSAMEN returns .TRUE. if CA and CB are equivalent except for case
+*  and .FALSE. otherwise.  LSAMEN also returns .FALSE. if LEN( CA )
+*  or LEN( CB ) is less than N.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of characters in CA and CB to be compared.
+*
+*  CA      (input) CHARACTER*(*)
+*  CB      (input) CHARACTER*(*)
+*          CA and CB specify two character strings of length at least N.
+*          Only the first N characters of each string will be accessed.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          LEN
+*     ..
+*     .. Executable Statements ..
+*
+      LSAMEN = .FALSE.
+      IF( LEN( CA ).LT.N .OR. LEN( CB ).LT.N )
+     $   GO TO 20
+*
+*     Do for each character in the two strings.
+*
+      DO 10 I = 1, N
+*
+*        Test if the characters are equal using LSAME.
+*
+         IF( .NOT.LSAME( CA( I: I ), CB( I: I ) ) )
+     $      GO TO 20
+*
+   10 CONTINUE
+      LSAMEN = .TRUE.
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of LSAMEN
+*
+      END
diff --git a/src/math_util.f90 b/src/math_util.f90
new file mode 100644
index 0000000..60817ff
--- /dev/null
+++ b/src/math_util.f90
@@ -0,0 +1,291 @@
+!>
+!> @file math_util.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE math_util
+!
+!    MATH_UTIL: Some math utilities.
+!
+!    T.M. Tran, CRPP-EPFL
+!    December 2012
+!
+!
+!   Notes:
+!     - Assume the Fortran 2008 intrinsic BESSEL_JN(n,x) exists!
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, PARAMETER :: pi=4.0d0*ATAN(1.0d0)
+!
+CONTAINS
+  ELEMENTAL FUNCTION bessjp(n,x)
+!
+!    Derivative of J_n
+!
+    DOUBLE PRECISION :: bessjp
+    INTEGER, INTENT(in) :: n
+    DOUBLE PRECISION, INTENT(in) :: x
+!
+    IF(n.EQ.0) THEN
+       bessjp = -bessel_jn(1,x)
+    ELSE
+       bessjp = 0.5d0*(bessel_jn(n-1,x)-bessel_jn(n+1,x))
+    END IF
+  END FUNCTION bessjp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION root_bessj(n, s, info)
+!
+!   s^th root of j_n
+!
+    DOUBLE PRECISION :: root_bessj
+    INTEGER, INTENT(in) :: n
+    INTEGER, INTENT(in) :: s
+    INTEGER, OPTIONAL, INTENT(out) :: info
+!
+    DOUBLE PRECISION :: b0,b1,b2,b3,b5,b7,t0,t1,t3,t5,t7,fn,fk,f1,f2,f3
+    DOUBLE PRECISION :: c1=1.8557571d0, c2=1.033150d0, c3=.00397d0, c4=.0908d0,&
+         &              c5=.043d0
+    DOUBLE PRECISION :: zero
+    INTEGER :: iter
+!
+    fn = REAL(ABS(n),8)
+    IF(s.EQ.1) THEN ! first zero
+       IF(n.EQ.0) THEN
+          zero = c1+c2-c3-c4+c5
+       ELSE
+          f1 = fn**(1.d0/3.d0)
+          f2 = f1*f1*fn
+          f3 = f1*fn*fn
+          zero = fn+c1*f1+(c2/f1)-(c3/fn)-(c4/f2)+(c5/f3)
+       END IF
+    ELSE           ! Other zeros
+       t0 = 4.d0*fn*fn
+       t1 = t0-1.d0
+       t3 = 4.d0*t1*(7.d0*t0-31.d0)
+       t5 = 32.d0*t1*((83.d0*t0-982.d0)*t0+3779.d0)
+       t7 = 64.d0*t1*(((6949.d0*t0-153855.d0)*t0+1585743.d0)*t0  &
+            -6277237.d0)
+       fk = REAL(s,8)
+!
+       b0 = (fk+.5d0*fn-.25d0)*pi!    mac mahon's series for k>>n
+       b1 = 8.d0*b0
+       b2 = b1*b1
+       b3 = 3.d0*b1*b2
+       b5 = 5.d0*b3*b2
+       b7 = 7.d0*b5*b2
+       zero = b0-(t1/b1)-(t3/b3)-(t5/b5)-(t7/b7)
+    END IF
+    CALL newton(iter)
+    IF(PRESENT(info)) info = iter
+    root_bessj = zero
+  CONTAINS
+    SUBROUTINE newton(iter)
+      INTEGER, INTENT(out) :: iter
+      INTEGER :: itermx = 20
+      DOUBLE PRECISION :: dx, tol
+      tol = EPSILON(1.0d0)*zero
+      iter = 0
+      DO
+         iter = iter+1
+         dx = -bessel_jn(n,zero)/bessjp(n,zero)
+         zero = zero+dx
+         IF(iter.GE.itermx .OR. ABS(dx).LT.tol) EXIT
+      END DO
+    END SUBROUTINE newton
+  END FUNCTION root_bessj
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION root_bessjp(n, s, info)
+!
+!   s^th root of derivative of j_n
+!
+    DOUBLE PRECISION :: root_bessjp
+    INTEGER, INTENT(in) :: n
+    INTEGER, INTENT(in) :: s
+    INTEGER, OPTIONAL, INTENT(out) :: info
+!
+    DOUBLE PRECISION :: c1=0.8086165D0, c2=0.072490D0, c3=.05097D0, c4=.0094D0
+    DOUBLE PRECISION :: b0,b1,b2,b3,b5,b7,t0,t1,t3,t5,t7,fn,fk,f1,f2
+    INTEGER :: iter
+    DOUBLE PRECISION :: zero
+!
+    IF(n.EQ.0 .AND. s.EQ.1) THEN
+       root_bessjp = 0.0d0
+       IF(PRESENT(info)) info = 0
+       RETURN
+    END IF
+!
+    fn = REAL(ABS(n),8)
+    fk = REAL(s,8)
+!
+    IF(s.GT.1) THEN
+!
+!        McMahon's series for s >> n
+       b0 = (fk+.5d0*fn-.75d0)*pi
+       b1 = 8.d0*b0
+       b2 = b1*b1
+       b3 = 3.d0*b1*b2
+       b5 = 5.d0*b3*b2
+       b7 = 7.d0*b5*b2
+       t0 = 4.d0*fn*fn
+       t1 = t0+3.d0
+       t3 = 4.d0*((7.d0*t0+82.d0)*t0-9.d0)
+       t5 = 32.d0*(((83.d0*t0+2075.d0)*t0-3039.d0)*t0+3537.d0)
+       t7 = 64.d0*((((6949.d0*t0+296492.d0)*t0-1248002.d0)*t0  &
+            +7414380.d0)*t0-5853627.d0)
+       zero = b0-(t1/b1)-(t3/b3)-(t5/b5)-(t7/b7)
+    ELSE              
+!
+!        Tchebychev's series for s <= n
+       f1 = fn**(1.d0/3.d0)
+       f2 = f1*f1*fn
+       zero = fn+c1*f1+(c2/f1)-(c3/fn)+(c4/f2)
+    END IF
+!
+    CALL newton(iter)
+    root_bessjp = zero
+    IF(PRESENT(info)) info = iter
+  CONTAINS
+    SUBROUTINE newton(iter)
+      INTEGER, INTENT(out) :: iter
+      INTEGER :: itermx = 20
+      DOUBLE PRECISION :: dx, tol
+      tol = EPSILON(1.0d0)*zero
+      iter = 0
+      DO
+         iter = iter+1
+         dx = -bessel_jn(n,zero)/bessjp(n,zero)
+         dx = -2.0d0 * (bessel_jn(n-1,zero)-bessel_jn(n+1,zero)) / &
+              &  (bessel_jn(n-2,zero)-2.d0*bessel_jn(n,zero)+&
+              &   bessel_jn(n+2,zero))
+         zero = zero+dx
+         IF(iter.GE.itermx .OR. ABS(dx).LT.tol) EXIT
+      END DO
+    END SUBROUTINE newton
+  END FUNCTION root_bessjp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!!$  PURE FUNCTION BESSEL_JN(n,x) RESULT(bessj)
+!!$    DOUBLE PRECISION, INTENT(in) :: x
+!!$    DOUBLE PRECISION ::  bessj
+!!$    DOUBLE PRECISION BIGNO,BIGNI
+!!$    INTEGER n,IACC
+!!$    PARAMETER (IACC=40,BIGNO=1.d10,BIGNI=1.d-10)
+!!$    INTEGER j,jsum,m
+!!$    DOUBLE PRECISION ax,bj,bjm,bjp,sum,tox,bessj0,bessj1
+!!$    IF( n.EQ.0 ) THEN 
+!!$       bessj = bessj0(x)
+!!$       RETURN
+!!$    ELSE IF( n.EQ.1 ) THEN 
+!!$       bessj = bessj1(x)
+!!$       RETURN
+!!$    ENDIF
+!!$    ax=ABS(x)
+!!$    IF(ax.EQ.0.d0)THEN
+!!$       bessj=0.d0
+!!$    ELSE IF(ax.GT.float(n))THEN
+!!$       tox=2./ax
+!!$       bjm=bessj0(ax)
+!!$       bj=bessj1(ax)
+!!$       DO j=1,n-1
+!!$          bjp=j*tox*bj-bjm
+!!$          bjm=bj
+!!$          bj=bjp
+!!$       END DO
+!!$       bessj=bj
+!!$    ELSE
+!!$       tox=2./ax
+!!$       m=2*((n+INT(SQRT(float(IACC*n))))/2)
+!!$       bessj=0.d0
+!!$       jsum=0
+!!$       sum=0.d0
+!!$       bjp=0.d0
+!!$       bj=1.
+!!$       DO j=m,1,-1
+!!$          bjm=j*tox*bj-bjp
+!!$          bjp=bj
+!!$          bj=bjm
+!!$          IF(ABS(bj).GT.BIGNO)THEN
+!!$             bj=bj*BIGNI
+!!$             bjp=bjp*BIGNI
+!!$             bessj=bessj*BIGNI
+!!$             sum=sum*BIGNI
+!!$          ENDIF
+!!$          IF(jsum.NE.0)sum=sum+bj
+!!$          jsum=1-jsum
+!!$          IF(j.EQ.n)bessj=bjp
+!!$       END DO
+!!$       sum=2.*sum-bj
+!!$       bessj=bessj/sum
+!!$    ENDIF
+!!$    IF(x.LT.0.d0.AND.MOD(n,2).EQ.1)bessj=-bessj
+!!$    RETURN
+!!$  END FUNCTION bessel_jn
+!!$
+!!$  PURE FUNCTION bessj0(x)
+!!$    DOUBLE PRECISION, INTENT(in) :: x
+!!$    DOUBLE PRECISION bessj0
+!!$    DOUBLE PRECISION ax,xx,z
+!!$    DOUBLE PRECISION p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6,y
+!!$    SAVE p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6
+!!$    DATA p1,p2,p3,p4,p5/1.d0,-.1098628627d-2,.2734510407d-4,-.2073370639d-5,.2093887211d-6/
+!!$    DATA q1,q2,q3,q4,q5/-.1562499995d-1,.1430488765d-3,-.6911147651d-5,.7621095161d-6,-.934945152d-7/
+!!$    DATA r1,r2,r3,r4,r5,r6/57568490574.d0,-13362590354.d0,651619640.7d0,-11214424.18d0,&
+!!$         &   77392.33017d0,-184.9052456d0/
+!!$    DATA s1,s2,s3,s4,s5,s6/57568490411.d0,1029532985.d0,9494680.718d0,59272.64853d0,267.8532712d0,1.d0/
+!!$    IF(ABS(x).LT.8.)THEN
+!!$       y=x**2
+!!$       bessj0=(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))))/(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))
+!!$    ELSE
+!!$       ax=ABS(x)
+!!$       z=8./ax
+!!$       y=z**2
+!!$       xx=ax-.785398164
+!!$       bessj0=SQRT(.636619772/ax)*(COS(xx)*(p1+y*(p2+y*(p3+y*(p4+y*p5))))-z*SIN(xx)*(q1+y*(q2+y*(q3+y*(q4+y*q5)))))
+!!$    ENDIF
+!!$    RETURN
+!!$  END FUNCTION bessj0
+!!$
+!!$  PURE FUNCTION bessj1(x)
+!!$    DOUBLE PRECISION, INTENT(in) :: x
+!!$    DOUBLE PRECISION bessj1
+!!$    DOUBLE PRECISION ax,xx,z
+!!$    DOUBLE PRECISION p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6,y
+!!$    SAVE p1,p2,p3,p4,p5,q1,q2,q3,q4,q5,r1,r2,r3,r4,r5,r6,s1,s2,s3,s4,s5,s6
+!!$    DATA r1,r2,r3,r4,r5,r6/72362614232.d0,-7895059235.d0,242396853.1d0,-2972611.439d0,15704.48260d0,-30.16036606d0/
+!!$    DATA s1,s2,s3,s4,s5,s6/144725228442.d0,2300535178.d0,18583304.74d0,99447.43394d0,376.9991397d0,1.d0/
+!!$    DATA p1,p2,p3,p4,p5/1.d0,.183105d-2,-.3516396496d-4,.2457520174d-5,-.240337019d-6/
+!!$    DATA q1,q2,q3,q4,q5/.04687499995d0,-.2002690873d-3,.8449199096d-5,-.88228987d-6,.105787412d-6/
+!!$    IF(ABS(x).LT.8.)THEN
+!!$       y=x**2
+!!$       bessj1=x*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))))/(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6)))))
+!!$    ELSE
+!!$       ax=ABS(x)
+!!$       z=8.d0/ax
+!!$       y=z**2
+!!$       xx=ax-2.356194491d0
+!!$       bessj1=SQRT(.636619772d0/ax)*(COS(xx)*(p1+y*(p2+y*(p3+y*(p4+y*p5))))- &
+!!$            & z*SIN(xx)*(q1+y*(q2+y*(q3+y*(q4+y*q5)))))*SIGN(1.d0,x)
+!!$    ENDIF
+!!$    RETURN
+!!$  END FUNCTION bessj1
+
+END MODULE math_util
diff --git a/src/matrix.f90 b/src/matrix.f90
new file mode 100644
index 0000000..22ea822
--- /dev/null
+++ b/src/matrix.f90
@@ -0,0 +1,3295 @@
+!>
+!> @file matrix.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE matrix
+!
+!    MATRIX: Simple interface to the direct solver LAPACK.
+!
+!    T.M. Tran, CRPP-EPFL
+!    February 2007
+!
+  IMPLICIT NONE
+!
+  TYPE gbmat   ! Lapack General Band matrix storage
+     INTEGER :: kl, ku, rank
+     INTEGER :: mrows, ncols
+     INTEGER :: nterms, kmat
+     INTEGER, DIMENSION(:), POINTER :: piv => null()
+     DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null()
+  END TYPE gbmat
+!
+  TYPE gemat   ! Lapack General DENSE matrix storage
+     INTEGER :: rank
+     INTEGER :: mrows, ncols
+     INTEGER :: nterms, kmat
+     INTEGER, DIMENSION(:), POINTER :: piv => null()
+     DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null()
+  END TYPE gemat
+!
+  TYPE pbmat   ! Lapack Pack Band matrix storage (super-diagonals)
+     INTEGER :: ku, rank
+     INTEGER :: nterms, kmat
+     DOUBLE PRECISION, DIMENSION(:,:), POINTER :: val => null()
+  END TYPE pbmat
+!
+  TYPE zgbmat   ! Lapack General Band matrix storage
+     INTEGER :: kl, ku, rank
+     INTEGER :: mrows, ncols
+     INTEGER :: nterms, kmat
+     INTEGER, DIMENSION(:), POINTER :: piv => null()
+     DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => NULL()
+  END TYPE zgbmat
+!
+  TYPE zgemat   ! Lapack General DENSE matrix storage
+     INTEGER :: rank
+     INTEGER :: mrows, ncols
+     INTEGER :: nterms, kmat
+     INTEGER, DIMENSION(:), POINTER :: piv => null()
+     DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => null()
+  END TYPE zgemat
+!
+  TYPE zpbmat   ! Lapack Pack Band matrix storage (super-diagonals)
+     INTEGER :: ku, rank
+     INTEGER :: nterms, kmat
+     DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: val => null()
+  END TYPE zpbmat
+!
+  TYPE periodic_mat
+    TYPE(gbmat) :: mat
+    INTEGER :: nterms
+    DOUBLE PRECISION, DIMENSION(:,:), POINTER :: &
+         &  matu => null(), &
+         &  matvt => null()
+  END TYPE periodic_mat
+!
+  TYPE zperiodic_mat
+    TYPE(zgbmat) :: mat
+    INTEGER :: nterms
+    DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: &
+         &  matu => null(), &
+         &  matvt => null()
+ END TYPE zperiodic_mat
+!
+!--------------------------------------------------------------------------------
+  INTERFACE init
+     MODULE PROCEDURE init_gb,  init_ge,  init_pb, &
+          &           init_zgb, init_zge, init_zpb, &
+          &           init_periodic, init_zperiodic
+  END INTERFACE
+  INTERFACE getvalp
+     MODULE PROCEDURE getvalp_gb,  getvalp_ge,  getvalp_pb, &
+          &           getvalp_zgb, getvalp_zge, getvalp_zpb
+  END INTERFACE
+  INTERFACE mcopy
+     MODULE PROCEDURE mcopy_gb,  mcopy_ge,  mcopy_pb, &
+          &           mcopy_zgb, mcopy_zge, mcopy_zpb, &
+          &           mcopy_periodic, mcopy_zperiodic
+  END INTERFACE
+  INTERFACE maddto
+     MODULE PROCEDURE maddto_gb,  maddto_ge,  maddto_pb, &
+          &           maddto_zgb, maddto_zge, maddto_zpb, &
+          &           maddto_periodic, maddto_zperiodic
+  END INTERFACE
+  INTERFACE destroy
+     MODULE PROCEDURE destroy_gb,  destroy_ge,  destroy_pb,  &
+          &           destroy_zgb, destroy_zge, destroy_zpb, &
+          &           destroy_periodic, destroy_zperiodic
+  END INTERFACE
+  INTERFACE updtmat
+     MODULE PROCEDURE updt_gb,  updt_ge,  updt_pb, &
+          &           updt_zgb, updt_zpb, &
+          &           updt_periodic, updt_zperiodic
+  END INTERFACE
+  INTERFACE getele
+     MODULE PROCEDURE getele_gb,  getele_pb, &
+          &           getele_zgb, getele_zpb, &
+          &           getele_periodic, getele_zperiodic
+  END INTERFACE
+  INTERFACE putele
+     MODULE PROCEDURE putele_gb,  putele_pb, &
+          &           putele_zgb, putele_zpb, &
+          &           putele_periodic, putele_zperiodic
+  END INTERFACE
+  INTERFACE getcol
+     MODULE PROCEDURE getcol_gb,  getcol_pb, &
+          &           getcol_zgb, getcol_zpb, &
+          &           getcol_periodic, getcol_zperiodic
+  END INTERFACE
+  INTERFACE getrow
+     MODULE PROCEDURE getrow_gb,  getrow_ge,  getrow_pb, &
+          &           getrow_zgb, getrow_zpb, &
+          &           getrow_periodic, getrow_zperiodic
+  END INTERFACE
+  INTERFACE putcol
+     MODULE PROCEDURE putcol_gb,  putcol_ge,  putcol_pb, &
+          &           putcol_zgb, putcol_zpb, &
+          &           putcol_periodic, putcol_zperiodic
+  END INTERFACE
+  INTERFACE putrow
+     MODULE PROCEDURE putrow_gb,   putrow_ge,  putrow_pb, &
+          &           putrow_zgb, putrow_zpb, &
+          &           putrow_periodic, putrow_zperiodic
+  END INTERFACE
+  INTERFACE factor
+     MODULE PROCEDURE factor_gb,  factor_ge,  factor_pb, &
+          &           factor_zgb, factor_zge, factor_zpb, &
+          &           factor_periodic, factor_zperiodic
+  END INTERFACE
+  INTERFACE bsolve
+     MODULE PROCEDURE bsolve_gb1,  bsolve_gbn,  bsolve_ge1,  bsolve_gen, &
+          &           bsolve_pb1,  bsolve_pbn, &
+          &           bsolve_periodic1, bsolve_periodicn, &
+          &           bsolve_zperiodic1, bsolve_zperiodicn, &
+          &           bsolve_zgb1, bsolve_zgbn, bsolve_zge1, bsolve_zgen, &
+          &           bsolve_zpb1, bsolve_zpbn
+  END INTERFACE
+  INTERFACE vmx
+     MODULE PROCEDURE vmx_gb,  vmx_gbn,  vmx_pb,  vmx_pbn, &
+          &           vmx_zgb, vmx_zgbn, vmx_zpb, vmx_zpbn, &
+          &           vmx_ge,  vmx_gen,  vmx_zge, vmx_zgen, &
+          &           vmx_periodic, vmx_zperiodic
+  END INTERFACE
+  INTERFACE determinant
+     MODULE PROCEDURE determinant_ge,  determinant_gb,  determinant_pb, &
+          &           determinant_zge, determinant_zgb, determinant_zpb
+  END INTERFACE
+  INTERFACE putmat
+     MODULE PROCEDURE putmat_gb
+  END INTERFACE
+  INTERFACE  getmat
+     MODULE PROCEDURE getmat_gb
+  END INTERFACE
+  INTERFACE kron
+     MODULE PROCEDURE kron_ge
+  END INTERFACE kron
+!
+CONTAINS
+!===========================================================================
+  SUBROUTINE init_ge(n, nterms, mat, kmat, mrows)
+!
+!  Initialize Lapack General Dense matrice
+!
+    INTEGER, INTENT(in) :: n, nterms
+    INTEGER, OPTIONAL :: kmat
+    INTEGER, OPTIONAL :: mrows
+    TYPE(gemat) :: mat
+!
+    mat%ncols = n
+    mat%mrows = n
+    IF(PRESENT(mrows)) THEN
+       mat%mrows = mrows
+    END IF
+    mat%rank = n   ! Warning: ok if square matrix
+    mat%nterms = nterms
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)
+    ALLOCATE(mat%val(mat%mrows,mat%ncols))
+    ALLOCATE(mat%piv(MIN(mat%mrows,mat%ncols)))
+    mat%val = 0.0d0
+    mat%piv = 0
+  END SUBROUTINE init_ge
+!===========================================================================
+  SUBROUTINE init_gb(kl, ku, n, nterms, mat, kmat, mrows)
+!
+!  Initialize Lapack General Banded matrice
+!
+    INTEGER, INTENT(in) :: kl, ku, n, nterms
+    INTEGER, OPTIONAL :: kmat
+    INTEGER, OPTIONAL :: mrows
+    TYPE(gbmat) :: mat
+    INTEGER :: lda
+!
+    mat%kl = kl
+    mat%ku = ku
+    mat%ncols = n
+    mat%mrows = n
+    IF(PRESENT(mrows)) THEN
+       mat%mrows = mrows
+    END IF
+    mat%rank = n   ! Warning: ok if square matrix
+    mat%nterms = nterms
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    lda = 2*kl + ku + 1
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)
+    ALLOCATE(mat%val(lda,n))
+    ALLOCATE(mat%piv(n))
+    mat%val = 0.0d0
+    mat%piv = 0
+  END SUBROUTINE init_gb
+!===========================================================================
+  SUBROUTINE init_periodic(kl, ku, n, nterms, mat, kmat)
+!
+!  Initialize Lapack Periodic General Banded matrice
+!
+    INTEGER, INTENT(in) :: kl, ku, n, nterms
+    INTEGER, OPTIONAL :: kmat
+    TYPE(periodic_mat) :: mat
+    INTEGER :: i,j
+    DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0
+!
+! In band matrix matp%mat is a GB matrix
+    IF( PRESENT(kmat))  THEN
+       CALL init(kl, ku, n, nterms, mat%mat, kmat)
+    ELSE
+       CALL init(kl, ku, n, nterms, mat%mat)
+    END IF
+    mat%nterms = nterms
+!
+!  Off band matrices
+    IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu)
+    IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt)
+    ALLOCATE(mat%matu(n, kl+ku))
+    ALLOCATE(mat%matvt(kl+ku,n))
+!
+    mat%matu = zero
+    mat%matvt = zero                 !    kl=3,  ku=2
+    DO j=1,kl                        !   [ 1 0 0 . . ]
+       mat%matu(j,j) = one           !   [ 0 1 0 . . ]
+    END DO                           !   [ 0 0 1 . . ]
+!                                    !   [ 0 . . . . ]
+    DO j=1,ku                        !   [ . . . . . ]
+       i=n-ku+j                      !   [ . . . 1 0 ]
+       mat%matu(i,ku+j) = one        !   [ . . . 0 1 ] 
+    END DO
+  END SUBROUTINE init_periodic
+!===========================================================================
+  SUBROUTINE init_zperiodic(kl, ku, n, nterms, mat, kmat)
+!
+!  Initialize Lapack Periodic General Banded matrice
+!
+    INTEGER, INTENT(in) :: kl, ku, n, nterms
+    INTEGER, OPTIONAL :: kmat
+    TYPE(zperiodic_mat) :: mat
+    INTEGER :: i,j
+    DOUBLE PRECISION, PARAMETER :: zero=0.0d0, one=1.0d0
+!
+! In band matrix matp%mat is a GB matrix
+    IF( PRESENT(kmat))  THEN
+       CALL init(kl, ku, n, nterms, mat%mat, kmat)
+    ELSE
+       CALL init(kl, ku, n, nterms, mat%mat)
+    END IF
+    mat%nterms = nterms
+!
+!  Off band matrices
+    IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu)
+    IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt)
+    ALLOCATE(mat%matu(n, kl+ku))
+    ALLOCATE(mat%matvt(kl+ku,n))
+!
+    mat%matu = zero
+    mat%matvt = zero                 !    kl=3,  ku=2
+    DO j=1,kl                        !   [ 1 0 0 . . ]
+       mat%matu(j,j) = one           !   [ 0 1 0 . . ]
+    END DO                           !   [ 0 0 1 . . ]
+!                                    !   [ 0 . . . . ]
+    DO j=1,ku                        !   [ . . . . . ]
+       i=n-ku+j                      !   [ . . . 1 0 ]
+       mat%matu(i,ku+j) = one        !   [ . . . 0 1 ] 
+    END DO
+  END SUBROUTINE init_zperiodic
+!===========================================================================
+  SUBROUTINE init_pb(ku, n, nterms, mat, kmat)
+!
+!  Initialize Lapack Packed Banded matrice
+!
+    INTEGER, INTENT(in) :: ku, n, nterms
+    INTEGER, OPTIONAL :: kmat
+    TYPE(pbmat) :: mat
+    INTEGER :: lda
+!
+    mat%ku = ku
+    mat%rank = n
+    mat%nterms = nterms
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    lda = ku + 1
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%val(lda,n))
+    mat%val = 0.0d0
+  END SUBROUTINE init_pb
+!===========================================================================
+  SUBROUTINE init_zge(n, nterms, mat, kmat, mrows)
+!
+!  Initialize Lapack General Dense matrice
+!
+    INTEGER, INTENT(in) :: n, nterms
+    INTEGER, OPTIONAL :: kmat
+    INTEGER, OPTIONAL :: mrows
+    TYPE(zgemat) :: mat
+!
+    mat%ncols = n
+    mat%mrows = n
+    IF(PRESENT(mrows)) THEN
+       mat%mrows = mrows
+    END IF
+    mat%rank = n
+    mat%nterms = nterms
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)
+    ALLOCATE(mat%val(mat%mrows,mat%ncols))
+    ALLOCATE(mat%piv(MIN(mat%mrows,mat%ncols)))
+    mat%val = 0.0d0
+    mat%piv = 0
+  END SUBROUTINE init_zge
+!===========================================================================
+  SUBROUTINE init_zgb(kl, ku, n, nterms, mat, kmat, mrows)
+!
+!  Initialize Lapack General Banded matrice
+!
+    INTEGER, INTENT(in) :: kl, ku, n, nterms
+    INTEGER, OPTIONAL :: kmat
+    INTEGER, OPTIONAL :: mrows
+    TYPE(zgbmat) :: mat
+    INTEGER :: lda
+!
+    mat%kl = kl
+    mat%ku = ku
+    mat%ncols = n
+    mat%mrows = n
+    IF(PRESENT(mrows)) THEN
+       mat%mrows = mrows
+    END IF
+    mat%rank = n   ! Warning: ok if square matrix
+    mat%nterms = nterms
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    lda = 2*kl + ku + 1
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)
+    ALLOCATE(mat%val(lda,n))
+    ALLOCATE(mat%piv(n))
+    mat%val = 0.0d0
+    mat%piv = 0
+  END SUBROUTINE init_zgb
+!===========================================================================
+  SUBROUTINE init_zpb(ku, n, nterms, mat, kmat)
+!
+!  Initialize Lapack Packed Banded matrice
+!
+    INTEGER, INTENT(in) :: ku, n, nterms
+     INTEGER, OPTIONAL :: kmat
+   TYPE(zpbmat) :: mat
+    INTEGER :: lda
+!
+    mat%ku = ku
+    mat%rank = n
+    mat%nterms = nterms
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    lda = ku + 1
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%val(lda,n))
+    mat%val = 0.0d0
+  END SUBROUTINE init_zpb
+!===========================================================================
+  SUBROUTINE mcopy_ge(mata, matb)
+!
+!  Matrix copy: B = A
+!
+    TYPE(gemat) :: mata, matb
+!
+    matb%rank   = mata%rank
+    matb%mrows  = mata%mrows
+    matb%ncols  = mata%ncols
+    matb%nterms = mata%nterms
+    matb%kmat   = mata%kmat
+    IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv)
+    ALLOCATE(matb%val(matb%mrows,matb%ncols))
+    ALLOCATE(matb%piv(MIN(matb%mrows,matb%ncols)))
+    matb%val = mata%val
+    matb%piv = mata%piv
+  END SUBROUTINE mcopy_ge
+!===========================================================================
+  SUBROUTINE mcopy_gb(mata, matb)
+!
+!  Matrix copy: B = A
+!
+    TYPE(gbmat) :: mata, matb
+    INTEGER :: n, lda
+!
+    n = mata%rank
+    matb%kl = mata%kl
+    matb%ku = mata%ku
+    matb%rank   = mata%rank
+    matb%mrows  = mata%mrows
+    matb%ncols  = mata%ncols
+    matb%nterms = mata%nterms
+    matb%kmat   = mata%kmat
+    lda = 2*mata%kl + mata%ku + 1
+    IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv)
+    ALLOCATE(matb%val(lda,n))
+    ALLOCATE(matb%piv(n))
+    matb%val = mata%val
+    matb%piv = mata%piv
+  END SUBROUTINE mcopy_gb
+!===========================================================================
+  SUBROUTINE mcopy_periodic(mata, matb)
+!
+!  Matrix copy: B = A
+!
+    TYPE(periodic_mat) :: mata, matb
+    INTEGER :: n, kl, ku
+!
+    kl = mata%mat%kl
+    ku = mata%mat%ku
+    n  = mata%mat%rank
+!
+    CALL mcopy(mata%mat, matb%mat)
+    IF( ASSOCIATED(matb%matu)) DEALLOCATE(matb%matu)
+    IF( ASSOCIATED(matb%matvt)) DEALLOCATE(matb%matvt)
+    ALLOCATE(matb%matu(n,kl+ku))
+    ALLOCATE(matb%matvt(kl+ku,n))
+    matb%matu = mata%matu
+    matb%matvt = mata%matvt
+  END SUBROUTINE mcopy_periodic
+!===========================================================================
+  SUBROUTINE mcopy_zperiodic(mata, matb)
+!
+!  Matrix copy: B = A
+!
+    TYPE(zperiodic_mat) :: mata, matb
+    INTEGER :: n, kl, ku
+!
+    kl = mata%mat%kl
+    ku = mata%mat%ku
+    n  = mata%mat%rank
+!
+    CALL mcopy(mata%mat, matb%mat)
+    IF( ASSOCIATED(matb%matu)) DEALLOCATE(matb%matu)
+    IF( ASSOCIATED(matb%matvt)) DEALLOCATE(matb%matvt)
+    ALLOCATE(matb%matu(n,kl+ku))
+    ALLOCATE(matb%matvt(kl+ku,n))
+    matb%matu = mata%matu
+    matb%matvt = mata%matvt
+  END SUBROUTINE mcopy_zperiodic
+!===========================================================================
+  SUBROUTINE mcopy_pb(mata, matb)
+!
+!  Matrix copy: B = A
+!
+    TYPE(pbmat) :: mata, matb
+    INTEGER :: n, lda
+!
+    n = mata%rank
+    matb%ku = mata%ku
+    matb%rank = mata%rank
+    matb%nterms = mata%nterms
+    lda = mata%ku + 1
+    IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    ALLOCATE(matb%val(lda,n))
+    matb%val = mata%val
+  END SUBROUTINE mcopy_pb
+!===========================================================================
+  SUBROUTINE mcopy_zge(mata, matb)
+!
+!  Matrix copy: B = A
+!
+    TYPE(zgemat) :: mata, matb
+    INTEGER :: n
+!
+    n = mata%rank
+    matb%rank = n
+    IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv)
+    ALLOCATE(matb%val(n,n))
+    ALLOCATE(matb%piv(n))
+    matb%val = mata%val
+    matb%piv = mata%piv
+  END SUBROUTINE mcopy_zge
+!===========================================================================
+  SUBROUTINE mcopy_zgb(mata, matb)
+!
+!  Matrix copy: B = A
+!
+    TYPE(zgbmat) :: mata, matb
+    INTEGER :: n, lda
+!
+    n = mata%rank
+    matb%kl = mata%kl
+    matb%ku = mata%ku
+    matb%rank = mata%rank
+    matb%nterms = mata%nterms
+    lda = 2*mata%kl + mata%ku + 1
+    IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF( ASSOCIATED(matb%piv)) DEALLOCATE(matb%piv)
+    ALLOCATE(matb%val(lda,n))
+    ALLOCATE(matb%piv(n))
+    matb%val = mata%val
+    matb%piv = mata%piv
+  END SUBROUTINE mcopy_zgb
+!===========================================================================
+  SUBROUTINE mcopy_zpb(mata, matb)
+!
+!  Matrix copy: B = A
+!
+    TYPE(zpbmat) :: mata, matb
+    INTEGER :: n, lda
+!
+    n = mata%rank
+    matb%ku = mata%ku
+    matb%rank = mata%rank
+    matb%nterms = mata%nterms
+    lda = mata%ku + 1
+    IF( ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    ALLOCATE(matb%val(lda,n))
+    matb%val = mata%val
+  END SUBROUTINE mcopy_zpb
+!===========================================================================
+  SUBROUTINE maddto_ge(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(gemat) :: mata, matb
+    DOUBLE PRECISION :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_ge
+!===========================================================================
+  SUBROUTINE maddto_gb(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(gbmat) :: mata, matb
+    DOUBLE PRECISION :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_gb
+!===========================================================================
+  SUBROUTINE maddto_periodic(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(periodic_mat) :: mata, matb
+    DOUBLE PRECISION :: alpha
+!
+    mata%mat%val = mata%mat%val + alpha*matb%mat%val
+    mata%matvt   = mata%matvt + alpha*matb%matvt
+  END SUBROUTINE maddto_periodic
+!===========================================================================
+  SUBROUTINE maddto_zperiodic(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(zperiodic_mat) :: mata, matb
+    DOUBLE COMPLEX :: alpha
+!
+    mata%mat%val = mata%mat%val + alpha*matb%mat%val
+    mata%matvt   = mata%matvt + alpha*matb%matvt
+  END SUBROUTINE maddto_zperiodic
+!===========================================================================
+  SUBROUTINE maddto_pb(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(pbmat) :: mata, matb
+    DOUBLE PRECISION :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_pb
+!===========================================================================
+  SUBROUTINE maddto_zge(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(zgemat) :: mata, matb
+    DOUBLE COMPLEX :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_zge
+!===========================================================================
+  SUBROUTINE maddto_zgb(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(zgbmat) :: mata, matb
+    DOUBLE COMPLEX :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_zgb
+!===========================================================================
+  SUBROUTINE maddto_zpb(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(zpbmat) :: mata, matb
+    DOUBLE COMPLEX :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_zpb
+!===========================================================================
+  SUBROUTINE getvalp_ge(mat, p)
+!
+!  Get pointer to matrix coefficients
+!
+    TYPE(gemat) :: mat
+    DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p
+!
+    p => mat%val
+  END SUBROUTINE getvalp_ge
+!===========================================================================
+  SUBROUTINE getvalp_gb(mat, p)
+!
+!  Get pointer to matrix coefficients
+!
+    TYPE(gbmat) :: mat
+    DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p
+!
+    p => mat%val
+  END SUBROUTINE getvalp_gb
+!===========================================================================
+  SUBROUTINE getvalp_pb(mat, p)
+!
+!  Get pointer to matrix coefficients
+!
+    TYPE(pbmat) :: mat
+    DOUBLE PRECISION, DIMENSION(:,:), POINTER :: p
+!
+    p => mat%val
+  END SUBROUTINE getvalp_pb
+!===========================================================================
+  SUBROUTINE getvalp_zge(mat, p)
+!
+!  Get pointer to matrix coefficients
+!
+    TYPE(zgemat) :: mat
+    DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p
+!
+    p => mat%val
+  END SUBROUTINE getvalp_zge
+!===========================================================================
+  SUBROUTINE getvalp_zgb(mat, p)
+!
+!  Get pointer to matrix coefficients
+!
+    TYPE(zgbmat) :: mat
+    DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p
+!
+    p => mat%val
+  END SUBROUTINE getvalp_zgb
+!===========================================================================
+  SUBROUTINE getvalp_zpb(mat, p)
+!
+!  Get pointer to matrix coefficients
+!
+    TYPE(zpbmat) :: mat
+    DOUBLE COMPLEX, DIMENSION(:,:), POINTER :: p
+!
+    p => mat%val
+  END SUBROUTINE getvalp_zpb
+!===========================================================================
+  SUBROUTINE destroy_gb(mat)
+!
+!   Deallocate pointers in mat
+!
+    TYPE(gbmat) :: mat
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)    
+  END SUBROUTINE destroy_gb
+!===========================================================================
+  SUBROUTINE destroy_ge(mat)
+!
+!   Deallocate pointers in mat
+!
+    TYPE(gemat) :: mat
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)    
+  END SUBROUTINE destroy_ge
+!===========================================================================
+  SUBROUTINE destroy_pb(mat)
+!
+!   Deallocate pointers in mat
+!
+    TYPE(pbmat) :: mat
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+  END SUBROUTINE destroy_pb
+!===========================================================================
+  SUBROUTINE destroy_zgb(mat)
+!
+!   Deallocate pointers in mat
+!
+    TYPE(zgbmat) :: mat
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)    
+  END SUBROUTINE destroy_zgb
+!===========================================================================
+  SUBROUTINE destroy_zge(mat)
+!
+!   Deallocate pointers in mat
+!
+    TYPE(zgemat) :: mat
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF( ASSOCIATED(mat%piv)) DEALLOCATE(mat%piv)    
+  END SUBROUTINE destroy_zge
+!===========================================================================
+  SUBROUTINE destroy_zpb(mat)
+!
+!   Deallocate pointers in mat
+!
+    TYPE(zpbmat) :: mat
+    IF( ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+  END SUBROUTINE destroy_zpb
+!===========================================================================
+  SUBROUTINE destroy_periodic(mat)
+!
+!   Deallocate pointers in mat
+!
+    TYPE(periodic_mat) :: mat
+    IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu)
+    IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt)
+    CALL destroy(mat%mat)
+  END SUBROUTINE destroy_periodic
+!===========================================================================
+  SUBROUTINE destroy_zperiodic(mat)
+!
+!   Deallocate pointers in mat
+!
+    TYPE(zperiodic_mat) :: mat
+    IF( ASSOCIATED(mat%matu)) DEALLOCATE(mat%matu)
+    IF( ASSOCIATED(mat%matvt)) DEALLOCATE(mat%matvt)
+    CALL destroy(mat%mat)
+  END SUBROUTINE destroy_zperiodic
+!===========================================================================
+  SUBROUTINE updt_gb(mat, i, j, val)
+!
+!  Update element Aij into banded matrix
+!
+    TYPE(gbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: lda, n, ib
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+    ib = mat%kl + mat%ku + i - j + 1
+    IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN
+       WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    mat%val(ib,j) = mat%val(ib,j) + val    
+  END SUBROUTINE updt_gb
+!===========================================================================
+  SUBROUTINE updt_ge(mat, i, j, val)
+!
+!  Update element Aij into banded matrix
+!
+    TYPE(gemat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+!
+    IF( (i .GT. mat%mrows) .OR. (j .GT. mat%ncols) .OR. (j .LT. 1) .OR. (i.LT.1)) THEN
+       WRITE(*,*) 'UPDT: i, j out of range ', i, j, mat%mrows, mat%ncols
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    mat%val(i,j) = mat%val(i,j) + val    
+  END SUBROUTINE updt_ge
+!===========================================================================
+  SUBROUTINE updt_periodic(mat, i, j, val)
+!
+!  Update element Aij into periodic banded matrix
+!
+    TYPE(periodic_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: n, kl, ku
+!
+    n = mat%mat%rank
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+!
+    IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
+! 
+! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
+       mat%matvt(i,j) = mat%matvt(i,j) + val
+!
+    ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
+!
+! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
+       mat%matvt(i-n+kl+ku,j) = mat%matvt(i-n+kl+ku,j) + val
+!
+    ELSE
+!
+! Put into the banded matrix
+       CALL updtmat(mat%mat, i, j, val)
+!
+    END IF
+  END SUBROUTINE updt_periodic
+!===========================================================================
+  SUBROUTINE updt_zperiodic(mat, i, j, val)
+!
+!  Update element Aij into periodic banded matrix
+!
+    TYPE(zperiodic_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    INTEGER :: n, kl, ku
+!
+    n = mat%mat%rank
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+!
+    IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
+! 
+! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
+       mat%matvt(i,j) = mat%matvt(i,j) + val
+!
+    ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
+!
+! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
+       mat%matvt(i-n+kl+ku,j) = mat%matvt(i-n+kl+ku,j) + val
+!
+    ELSE
+!
+! Put into the banded matrix
+       CALL updtmat(mat%mat, i, j, val)
+!
+    END IF
+  END SUBROUTINE updt_zperiodic
+!===========================================================================
+  SUBROUTINE updt_pb(mat, i, j, val)
+!
+!  Update element Aij into banded matrix
+!
+    TYPE(pbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: lda, n, ib
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+    IF( i .LE. j ) THEN
+       ib = mat%ku + i - j + 1
+       IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN
+          WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n
+          STOP '*** Abnormal EXIT in MODULE matrix ***'
+       END IF
+       mat%val(ib,j) = mat%val(ib,j) + val
+    END IF
+  END SUBROUTINE updt_pb
+!===========================================================================
+  SUBROUTINE updt_zgb(mat, i, j, val)
+!
+!  Update element Aij into banded matrix
+!
+    TYPE(zgbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    INTEGER :: lda, n, ib
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+    ib = mat%kl + mat%ku + i - j + 1
+    IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN
+       WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    mat%val(ib,j) = mat%val(ib,j) + val    
+  END SUBROUTINE updt_zgb
+!===========================================================================
+  SUBROUTINE updt_zpb(mat, i, j, val)
+!
+!  Update element Aij into banded matrix
+!
+    TYPE(zpbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    INTEGER :: lda, n, ib
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+    IF( i .LE. j ) THEN
+       ib = mat%ku + i - j + 1
+       IF( (ib .GT. lda) .OR. (j .GT. n) .OR. (j .LT. 1)) THEN
+          WRITE(*,*) 'UPDT: i, j out of range ', i, j, lda, n
+          STOP '*** Abnormal EXIT in MODULE matrix ***'
+       END IF
+       mat%val(ib,j) = mat%val(ib,j) + val
+    END IF
+  END SUBROUTINE updt_zpb
+!===========================================================================
+  SUBROUTINE getele_gb(mat, i, j, val)
+!
+!  Get element (i,j) of matrix
+!
+    TYPE(gbmat), INTENT(in) :: mat
+    DOUBLE PRECISION, INTENT (OUT) :: val
+    INTEGER, INTENT (IN) :: i, j
+    INTEGER :: lda, n, ib
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+    ib = mat%kl + mat%ku + i - j + 1
+    IF( (ib .GT. lda) .OR. (j .GT. n)) THEN
+       WRITE(*,*) 'GETELE: i, j out of range ', i, j
+       STOP '*** Abnormal EXIT in MODULE matrix***'
+    END IF
+    val = mat%val(ib,j)
+  END SUBROUTINE getele_gb
+!===========================================================================
+  SUBROUTINE getele_periodic(mat, i, j, val)
+!
+!  Get element Aij of periodic banded matrix
+!
+    TYPE(periodic_mat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: i, j
+    DOUBLE PRECISION, INTENT(out) :: val
+    INTEGER :: n, kl, ku
+!
+    n = mat%mat%rank
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+!
+    IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
+! 
+! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
+       val =mat%matvt(i,j)
+!
+    ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
+!
+! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
+       val = mat%matvt(i-n+kl+ku,j)
+!
+    ELSE
+!
+! Put into the banded matrix
+       CALL getele(mat%mat, i, j, val)
+!
+    END IF
+  END SUBROUTINE getele_periodic
+!===========================================================================
+  SUBROUTINE getele_zperiodic(mat, i, j, val)
+!
+!  Get element Aij of periodic banded matrix
+!
+    TYPE(zperiodic_mat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: i, j
+    DOUBLE COMPLEX, INTENT(out) :: val
+    INTEGER :: n, kl, ku
+!
+    n = mat%mat%rank
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+!
+    IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
+! 
+! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
+       val =mat%matvt(i,j)
+!
+    ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
+!
+! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
+       val = mat%matvt(i-n+kl+ku,j)
+!
+    ELSE
+!
+! Put into the banded matrix
+       CALL getele(mat%mat, i, j, val)
+!
+    END IF
+  END SUBROUTINE getele_zperiodic
+!===========================================================================
+  SUBROUTINE getele_pb(mat, i, j, val)
+!
+!  Get element (i,j) of matrix
+!
+    TYPE(pbmat), INTENT(in) :: mat
+    DOUBLE PRECISION, INTENT (OUT) :: val
+    INTEGER, INTENT (IN) :: i, j
+    INTEGER :: lda, n, ib, irow, jcol
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+    IF( i .LE. j ) THEN   ! Upper triangular matrix
+       irow = i; jcol = j
+    ELSE                  ! Lower triangular matrix
+       irow = j; jcol = i
+    END IF 
+    ib = mat%ku + irow - jcol + 1
+    IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
+       WRITE(*,*) 'GETELE: i, j out of range ', i, j
+       STOP '*** Abnormal EXIT in MODULE matrix***'
+    END IF
+    val = mat%val(ib,jcol)
+  END SUBROUTINE getele_pb
+!===========================================================================
+  SUBROUTINE getele_zgb(mat, i, j, val)
+!
+!  Get element (i,j) of matrix
+!
+    TYPE(zgbmat), INTENT(in) :: mat
+    DOUBLE COMPLEX, INTENT (OUT) :: val
+    INTEGER, INTENT (IN) :: i, j
+    INTEGER :: lda, n, ib
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+    ib = mat%kl + mat%ku + i - j + 1
+    IF( (ib .GT. lda) .OR. (j .GT. n)) THEN
+       WRITE(*,*) 'GETELE: i, j out of range ', i, j
+       STOP '*** Abnormal EXIT in MODULE matrix***'
+    END IF
+    val = mat%val(ib,j)
+  END SUBROUTINE getele_zgb
+!===========================================================================
+  SUBROUTINE getele_zpb(mat, i, j, val)
+!
+!  Get element (i,j) of matrix
+!
+    TYPE(zpbmat), INTENT(in) :: mat
+    DOUBLE COMPLEX, INTENT (OUT) :: val
+    INTEGER, INTENT (IN) :: i, j
+    INTEGER :: lda, n, ib, irow, jcol
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+!
+    IF(  i .LE. j ) THEN   ! Upper triangular matrix
+       irow = i; jcol = j
+       ib = mat%ku + irow - jcol + 1
+       IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
+          WRITE(*,*) 'GETELE: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE matrix***'
+       END IF
+       val = mat%val(ib,jcol)
+       RETURN
+    ELSE                  ! Lower triangular matrix
+       irow = j; jcol = i
+       ib = mat%ku + irow - jcol + 1
+       IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
+          WRITE(*,*) 'GETELE: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE matrix***'
+       END IF
+       val = CONJG(mat%val(ib,jcol))
+    END IF 
+  END SUBROUTINE getele_zpb
+!===========================================================================
+  SUBROUTINE putele_gb(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(gbmat), INTENT(inout) :: mat
+    DOUBLE PRECISION, INTENT (in) :: val
+    INTEGER, INTENT (in) :: i, j
+    INTEGER :: lda, n, ib
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+    ib = mat%kl + mat%ku + i - j + 1
+    IF( (ib .GT. lda) .OR. (j .GT. n)) THEN
+       WRITE(*,*) 'GETELE: i, j out of range ', i, j
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    mat%val(ib,j) = val
+  END SUBROUTINE putele_gb
+!===========================================================================
+  SUBROUTINE putele_periodic(mat, i, j, val)
+!
+!  Put element Aij into periodic banded matrix
+!
+    TYPE(periodic_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: n, kl, ku
+!
+    n = mat%mat%rank
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+!
+    IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
+! 
+! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
+       mat%matvt(i,j) = val
+!
+    ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
+!
+! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
+       mat%matvt(i-n+kl+ku,j) = val
+!
+    ELSE
+!
+! Put into the banded matrix
+       CALL putele(mat%mat, i, j, val)
+!
+    END IF
+  END SUBROUTINE putele_periodic
+!===========================================================================
+  SUBROUTINE putele_zperiodic(mat, i, j, val)
+!
+!  Put element Aij into periodic banded matrix
+!
+    TYPE(zperiodic_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    INTEGER :: n, kl, ku
+!
+    n = mat%mat%rank
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+!
+    IF( i.LE.kl .AND. j.GE.n-kl+1 ) THEN
+! 
+! Put into C(1:kl, 1:kl) = V^T(1:kl, n-kl+1:n)
+       mat%matvt(i,j) = val
+!
+    ELSE IF( i.GE.n-ku+1 .AND. j.LE.ku ) THEN
+!
+! Put into D(1:ku,1:ku) = V^T(kl+1:kl+ku, 1:ku)
+       mat%matvt(i-n+kl+ku,j) = val
+!
+    ELSE
+!
+! Put into the banded matrix
+       CALL putele(mat%mat, i, j, val)
+!
+    END IF
+  END SUBROUTINE putele_zperiodic
+!===========================================================================
+  SUBROUTINE putele_pb(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(pbmat), INTENT(inout) :: mat
+    DOUBLE PRECISION, INTENT (in) :: val
+    INTEGER, INTENT (IN) :: i, j
+    INTEGER :: lda, n, ib, irow, jcol
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+    IF( i .LE. j ) THEN   ! Upper triangular matrix
+       irow = i; jcol = j
+    ELSE                  ! Lower triangular matrix
+       irow = j; jcol = i
+    END IF 
+    ib = mat%ku + irow - jcol + 1
+    IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
+       WRITE(*,*) 'PUTELE: i, j out of range ', i, j
+       STOP '*** Abnormal EXIT in MODULE matrix***'
+    END IF
+    mat%val(ib,jcol) = val
+  END SUBROUTINE putele_pb
+!===========================================================================
+  SUBROUTINE putele_zgb(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(zgbmat), INTENT(inout) :: mat
+    DOUBLE COMPLEX, INTENT (in) :: val
+    INTEGER, INTENT (in) :: i, j
+    INTEGER :: lda, n, ib
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+    ib = mat%kl + mat%ku + i - j + 1
+    IF( (ib .GT. lda) .OR. (j .GT. n)) THEN
+       WRITE(*,*) 'GETELE: i, j out of range ', i, j
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    mat%val(ib,j) = val
+  END SUBROUTINE putele_zgb
+!===========================================================================
+  SUBROUTINE putele_zpb(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(zpbmat), INTENT(inout) :: mat
+    DOUBLE COMPLEX, INTENT (in) :: val
+    INTEGER, INTENT (IN) :: i, j
+    INTEGER :: lda, n, ib, irow, jcol
+!
+    lda = SIZE(mat%val, 1)
+    n = mat%rank
+    IF( i .LE. j ) THEN   ! Upper triangular matrix
+       irow = i; jcol = j
+       ib = mat%ku + irow - jcol + 1
+       IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
+          WRITE(*,*) 'GETELE: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE matrix***'
+       END IF
+       mat%val(ib,jcol) = val
+    ELSE                  ! Lower triangular matrix
+       irow = j; jcol = i
+       ib = mat%ku + irow - jcol + 1
+       IF( (ib .GT. lda) .OR. (jcol .GT. n)) THEN
+          WRITE(*,*) 'PUTELE: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE matrix***'
+       END IF
+       mat%val(ib,jcol) = CONJG(val)
+    END IF 
+  END SUBROUTINE putele_zpb
+!===========================================================================
+  SUBROUTINE getcol_gb(mat, j, arr)
+!
+!   Get a column from matrix
+!
+    TYPE(gbmat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: m, kl, ku
+    INTEGER :: ibmin, ibmax, imin, imax
+!
+    kl = mat%kl
+    ku = mat%ku
+    m = mat%mrows
+    IF( SIZE(arr) .LT. m ) THEN
+       WRITE(*,*) 'GETCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    arr(1:m) = 0.0d0
+    imin = MAX(1,j-ku)
+    imax = MIN(m, j+kl)
+    ibmin =  kl+ku+imin-j+1
+    ibmax =  kl+ku+imax-j+1
+    arr(imin:imax) = mat%val(ibmin:ibmax,j)
+  END SUBROUTINE getcol_gb
+!===========================================================================
+  SUBROUTINE getcol_periodic(mat, j, arr)
+!
+!   Get a column from matrix
+!
+    TYPE(periodic_mat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: n, kl, ku
+!
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+    n  = mat%mat%rank
+!
+    CALL getcol(mat%mat, j, arr)
+!
+    IF( j.GE.n-kl+1 ) THEN
+       arr(1:kl) = mat%matvt(1:kl,j)
+    ELSE IF( j.LE.ku ) THEN
+       arr(n-ku+1:n) = mat%matvt(kl+1:kl+ku,j)
+    END IF
+  END SUBROUTINE getcol_periodic
+!===========================================================================
+  SUBROUTINE getcol_zperiodic(mat, j, arr)
+!
+!   Get a column from matrix
+!
+    TYPE(zperiodic_mat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: n, kl, ku
+!
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+    n  = mat%mat%rank
+!
+    CALL getcol(mat%mat, j, arr)
+!
+    IF( j.GE.n-kl+1 ) THEN
+       arr(1:kl) = mat%matvt(1:kl,j)
+    ELSE IF( j.LE.ku ) THEN
+       arr(n-ku+1:n) = mat%matvt(kl+1:kl+ku,j)
+    END IF
+  END SUBROUTINE getcol_zperiodic
+!===========================================================================
+  SUBROUTINE getcol_pb(mat, j, arr)
+!
+!   Get a column from matrix
+!
+    TYPE(pbmat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: n, ku
+    INTEGER :: i, ib, ibmin, ibmax, imin, imax
+!
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'GETCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    arr(1:n) = 0.0d0
+!
+    imin=MAX(1,j-ku); imax=j  ! The column in the upper diagonal part
+    ibmin=ku+1+imin-j  ; ibmax=ku+1+imax-j
+    arr(imin:imax) = mat%val(ibmin:ibmax,j)
+!
+    imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
+    DO i=imin,imax
+       ib = ku+1+j-i
+       arr(i) = mat%val(ib,i)
+    END DO
+  END SUBROUTINE getcol_pb
+!===========================================================================
+  SUBROUTINE getcol_zgb(mat, j, arr)
+!
+!   Get a column from matrix
+!
+    TYPE(zgbmat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: m, kl, ku
+    INTEGER :: ibmin, ibmax, imin, imax
+!
+    kl = mat%kl
+    ku = mat%ku
+    m = mat%mrows
+    IF( SIZE(arr) .LT. m ) THEN
+       WRITE(*,*) 'GETCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    arr(1:m) = 0.0d0
+    imin = MAX(1,j-ku)
+    imax = MIN(m, j+kl)
+    ibmin =  kl+ku+imin-j+1
+    ibmax =  kl+ku+imax-j+1
+    arr(imin:imax) = mat%val(ibmin:ibmax,j)
+  END SUBROUTINE getcol_zgb
+!===========================================================================
+  SUBROUTINE getcol_zpb(mat, j, arr)
+!
+!   Get a column from matrix
+!
+    TYPE(zpbmat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: n, ku
+    INTEGER :: i, ib, ibmin, ibmax, imin, imax
+!
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'GETCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    arr(1:n) = 0.0d0
+!
+    imin=MAX(1,j-ku); imax=j  ! The column in the upper diagonal part
+    ibmin=ku+1+imin-j  ; ibmax=ku+1+imax-j
+    arr(imin:imax) = mat%val(ibmin:ibmax,j)
+!
+    imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
+    DO i=imin,imax
+       ib = ku+1+j-i
+       arr(i) = CONJG(mat%val(ib,i))
+    END DO
+  END SUBROUTINE getcol_zpb
+!===========================================================================
+  SUBROUTINE getrow_gb(mat, i, arr)
+!
+!   Get a row from matrix
+!
+    TYPE(gbmat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: n, kl, ku
+    INTEGER :: j, ib, jmin, jmax
+!
+    kl = mat%kl
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'GETROW: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    arr(1:n) = 0.0d0
+    jmin = MAX(1,i-kl)
+    jmax = MIN(n, i+ku)
+    DO j=jmin,jmax
+       ib =  kl+ku+i-j+1
+       arr(j) = mat%val(ib,j)
+    END DO
+  END SUBROUTINE getrow_gb
+!===========================================================================
+  SUBROUTINE getrow_ge(mat, i, arr)
+!
+!   Get a row from matrix
+!
+    TYPE(gemat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: n
+!
+    n = mat%ncols
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'GETROW: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    arr(1:n) = mat%val(i,1:n)
+  END SUBROUTINE getrow_ge
+!===========================================================================
+  SUBROUTINE getrow_periodic(mat, i, arr)
+!
+!   Get a row from matrix
+!
+    TYPE(periodic_mat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: n, kl, ku
+!
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+    n  = mat%mat%rank
+!
+    CALL getrow(mat%mat, i, arr)
+!
+    IF( i.LE.kl ) THEN
+       arr(n-kl+1:n) = mat%matvt(i,n-kl+1:n)
+    ELSE IF( i.GE.n-ku+1 ) THEN
+       arr(1:ku) = mat%matvt(i-n+kl+ku,1:ku)
+    END IF
+  END SUBROUTINE getrow_periodic
+!===========================================================================
+  SUBROUTINE getrow_zperiodic(mat, i, arr)
+!
+!   Get a row from matrix
+!
+    TYPE(zperiodic_mat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: n, kl, ku
+!
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+    n  = mat%mat%rank
+!
+    CALL getrow(mat%mat, i, arr)
+!
+    IF( i.LE.kl ) THEN
+       arr(n-kl+1:n) = mat%matvt(i,n-kl+1:n)
+    ELSE IF( i.GE.n-ku+1 ) THEN
+       arr(1:ku) = mat%matvt(i-n+kl+ku,1:ku)
+    END IF
+  END SUBROUTINE getrow_zperiodic
+!===========================================================================
+  SUBROUTINE getrow_pb(mat, i, arr)
+!
+!   Get a row from matrix
+!
+    TYPE(pbmat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: n, ku
+    INTEGER :: j, ib, ibmin, ibmax, jmin, jmax
+!
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'GETROW: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    arr(1:n) = 0.0d0
+!
+    jmin=i; jmax=MIN(n,i+ku)
+    DO j=jmin,jmax
+       ib=ku+1+i-j
+       arr(j) = mat%val(ib,j)
+    END DO
+!
+    jmin=MAX(1,i-ku); jmax=i-1
+    DO j=jmin,jmax
+       ib=ku+1+j-i
+       arr(j) = mat%val(ib,i)
+    END DO
+  END SUBROUTINE getrow_pb
+!===========================================================================
+  SUBROUTINE getrow_zgb(mat, i, arr)
+!
+!   Get a row from matrix
+!
+    TYPE(zgbmat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: n, kl, ku
+    INTEGER :: j, ib, jmin, jmax
+!
+    kl = mat%kl
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'GETROW: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    arr(1:n) = 0.0d0
+    jmin = MAX(1,i-kl)
+    jmax = MIN(n, i+ku)
+    DO j=jmin,jmax
+       ib =  kl+ku+i-j+1
+       arr(j) = mat%val(ib,j)
+    END DO
+  END SUBROUTINE getrow_zgb
+!===========================================================================
+  SUBROUTINE getrow_zpb(mat, i, arr)
+!
+!   Get a row from matrix
+!
+    TYPE(zpbmat), INTENT(in) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(out) :: arr
+    INTEGER :: n, ku
+    INTEGER :: j, ib, ibmin, ibmax, jmin, jmax
+!
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'GETROW: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    arr(1:n) = 0.0d0
+!
+    jmin=i; jmax=MIN(n,i+ku)
+    DO j=jmin,jmax
+       ib=ku+1+i-j
+       arr(j) = mat%val(ib,j)
+    END DO
+!
+    jmin=MAX(1,i-ku); jmax=i-1
+    DO j=jmin,jmax
+       ib=ku+1+j-i
+       arr(j) = CONJG(mat%val(ib,i))
+    END DO
+  END SUBROUTINE getrow_zpb
+!===========================================================================
+  SUBROUTINE putcol_gb(mat, j, arr)
+!
+!   Put a column from matrix
+!
+    TYPE(gbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: m, kl, ku
+    INTEGER :: ibmin, ibmax, imin, imax
+!
+    kl = mat%kl
+    ku = mat%ku
+    m = mat%mrows
+    IF( SIZE(arr) .LT. m ) THEN
+       WRITE(*,*) 'PUTCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    imin = MAX(1,j-ku)
+    imax = MIN(m, j+kl)
+    ibmin =  kl+ku+imin-j+1
+    ibmax =  kl+ku+imax-j+1
+    mat%val(ibmin:ibmax,j) = arr(imin:imax)
+  END SUBROUTINE putcol_gb
+!===========================================================================
+  SUBROUTINE putcol_ge(mat, j, arr)
+!
+!   Put a column from matrix
+!
+    TYPE(gemat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: m
+!
+    m = mat%mrows
+    IF( SIZE(arr) .LT. m ) THEN
+       WRITE(*,*) 'PUTCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    mat%val(:,j) = arr(:)
+  END SUBROUTINE putcol_ge
+!===========================================================================
+  SUBROUTINE putrow_periodic(mat, i, arr)
+!
+!   Put a row to matrix
+!
+    TYPE(periodic_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: n, kl, ku
+!
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+    n  = mat%mat%rank
+!
+    CALL putrow(mat%mat, i, arr)
+!
+    IF( i.LE.kl ) THEN
+       mat%matvt(i,n-kl+1:n) = arr(n-kl+1:n)
+    ELSE IF( i.GE.n-ku+1 ) THEN
+       mat%matvt(i-n+kl+ku,1:ku) = arr(1:ku)
+    END IF
+  END SUBROUTINE putrow_periodic
+!===========================================================================
+  SUBROUTINE putrow_zperiodic(mat, i, arr)
+!
+!   Put a row to matrix
+!
+    TYPE(zperiodic_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: n, kl, ku
+!
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+    n  = mat%mat%rank
+!
+    CALL putrow(mat%mat, i, arr)
+!
+    IF( i.LE.kl ) THEN
+       mat%matvt(i,n-kl+1:n) = arr(n-kl+1:n)
+    ELSE IF( i.GE.n-ku+1 ) THEN
+       mat%matvt(i-n+kl+ku,1:ku) = arr(1:ku)
+    END IF
+  END SUBROUTINE putrow_zperiodic
+!===========================================================================
+  SUBROUTINE putcol_periodic(mat, j, arr)
+!
+!   Put a column into matrix
+!
+    TYPE(periodic_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: n, kl, ku
+!
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+    n  = mat%mat%rank
+!
+    CALL putcol(mat%mat, j, arr)
+!
+    IF( j.GE.n-kl+1 ) THEN
+       mat%matvt(1:kl,j) = arr(1:kl)
+    ELSE IF( j.LE.ku ) THEN
+       mat%matvt(kl+1:kl+ku,j) = arr(n-ku+1:n)
+    END IF
+  END SUBROUTINE putcol_periodic
+!===========================================================================
+  SUBROUTINE putcol_zperiodic(mat, j, arr)
+!
+!   Put a column into matrix
+!
+    TYPE(zperiodic_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: n, kl, ku
+!
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+    n  = mat%mat%rank
+!
+    CALL putcol(mat%mat, j, arr)
+!
+    IF( j.GE.n-kl+1 ) THEN
+       mat%matvt(1:kl,j) = arr(1:kl)
+    ELSE IF( j.LE.ku ) THEN
+       mat%matvt(kl+1:kl+ku,j) = arr(n-ku+1:n)
+    END IF
+  END SUBROUTINE putcol_zperiodic
+!===========================================================================
+  SUBROUTINE putcol_pb(mat, j, arr)
+!
+!   Put a column from matrix
+!
+    TYPE(pbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: n, ku
+    INTEGER :: i, ib, ibmin, ibmax, imin, imax
+!
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'PUTCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    imin = MAX(1,j-ku); imax = j  ! The column in the upper diagonal part
+    ibmin =  ku+imin-j+1; ibmax =  ku+imax-j+1
+    mat%val(ibmin:ibmax,j) = arr(imin:imax)
+ !
+    imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
+    DO i=imin,imax
+       ib = ku+1+j-i
+       mat%val(ib,i) = arr(i)
+    END DO
+ END SUBROUTINE putcol_pb
+!===========================================================================
+  SUBROUTINE putcol_zgb(mat, j, arr)
+!
+!   Put a column from matrix
+!
+    TYPE(zgbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: m, kl, ku
+    INTEGER :: ibmin, ibmax, imin, imax
+!
+    kl = mat%kl
+    ku = mat%ku
+    m = mat%mrows
+    IF( SIZE(arr) .LT. m ) THEN
+       WRITE(*,*) 'PUTCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    imin = MAX(1,j-ku)
+    imax = MIN(m, j+kl)
+    ibmin =  kl+ku+imin-j+1
+    ibmax =  kl+ku+imax-j+1
+    mat%val(ibmin:ibmax,j) = arr(imin:imax)
+  END SUBROUTINE putcol_zgb
+!===========================================================================
+  SUBROUTINE putcol_zpb(mat, j, arr)
+!
+!   Put a column from matrix
+!
+    TYPE(zpbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: j
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: n, ku, i, ib
+    INTEGER :: ibmin, ibmax, imin, imax
+!
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'PUTCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+!
+    imin=MAX(1,j-ku); imax=j  ! The column in the upper diagonal part
+    ibmin=ku+1+imin-j  ; ibmax=ku+1+imax-j
+    mat%val(ibmin:ibmax,j) = arr(imin:imax)
+!
+    imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
+    DO i=imin,imax
+       ib = ku+1+j-i
+       mat%val(ib,i) = CONJG(arr(i))
+    END DO
+  END SUBROUTINE putcol_zpb
+!===========================================================================
+  SUBROUTINE putrow_gb(mat, i, arr)
+!
+!   Put a row from matrix
+!
+    TYPE(gbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: n, kl, ku
+    INTEGER :: j, ib, jmin, jmax
+!
+    kl = mat%kl
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'GETCOL: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    jmin = MAX(1,i-kl)
+    jmax = MIN(n, i+ku)
+    DO j=jmin,jmax
+       ib =  kl+ku+i-j+1
+       mat%val(ib,j) = arr(j)
+    END DO
+  END SUBROUTINE putrow_gb
+!===========================================================================
+  SUBROUTINE putrow_ge(mat, i, arr)
+!
+!   Put a row from matrix
+!
+    TYPE(gemat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: n, kl, ku
+    INTEGER :: j, ib, jmin, jmax
+!
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'PUTROW: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    mat%val(i,:) = arr(:)
+  END SUBROUTINE putrow_ge
+!===========================================================================
+  SUBROUTINE putrow_pb(mat, i, arr)
+!
+!   Put a row from matrix
+!
+    TYPE(pbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: n, ku
+    INTEGER :: j, ib, jmin, jmax
+!
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'PUTROW: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    jmin = i
+    jmax = MIN(n, i+ku)
+    DO j=jmin,jmax
+       ib =  ku+i-j+1
+       mat%val(ib,j) = arr(j)
+    END DO
+!
+    jmin=MAX(1,i-ku); jmax=i-1
+    DO j=jmin,jmax
+       ib=ku+1+j-i
+       mat%val(ib,i) = arr(j)
+    END DO
+  END SUBROUTINE putrow_pb
+!===========================================================================
+  SUBROUTINE putrow_zgb(mat, i, arr)
+!
+!   Put a row from matrix
+!
+    TYPE(zgbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: n, kl, ku
+    INTEGER :: j, ib, jmin, jmax
+!
+    kl = mat%kl
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'PUTROW: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    jmin = MAX(1,i-kl)
+    jmax = MIN(n, i+ku)
+    DO j=jmin,jmax
+       ib =  kl+ku+i-j+1
+       mat%val(ib,j) = arr(j)
+    END DO
+  END SUBROUTINE putrow_zgb
+!===========================================================================
+  SUBROUTINE putrow_zpb(mat, i, arr)
+!
+!   Put a row from matrix
+!
+    TYPE(zpbmat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: i
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: arr
+    INTEGER :: n, ku
+    INTEGER :: j, ib, jmin, jmax
+!
+    ku = mat%ku
+    n = mat%rank
+    IF( SIZE(arr) .LT. n ) THEN
+       WRITE(*,*) 'PUTROW: size of arr too small'
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    jmin = i
+    jmax = MIN(n, i+ku)
+    DO j=jmin,jmax
+       ib =  ku+i-j+1
+       mat%val(ib,j) = arr(j)
+    END DO
+!
+    jmin=MAX(1,i-ku); jmax=i-1
+    DO j=jmin,jmax
+       ib=ku+1+j-i
+       mat%val(ib,i) = CONJG(arr(j))
+    END DO
+  END SUBROUTINE putrow_zpb
+!===========================================================================
+  SUBROUTINE factor_gb(mat,flops)
+!
+!  Factor the matrix, using Lapack
+!
+    TYPE(gbmat), INTENT(inout) :: mat
+    DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
+    INTEGER :: lda, n, m, kl, ku
+    INTEGER :: info
+    DOUBLE PRECISION :: dopgb
+    EXTERNAL dopgb
+!
+    lda = SIZE(mat%val, 1)
+    kl = mat%kl
+    ku = mat%ku
+    m = mat%mrows
+    n = mat%ncols
+    CALL dgbtrf(m, n, kl, ku, mat%val, lda, mat%piv, info)
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GBTRF ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    IF( PRESENT(flops) ) THEN
+       flops = dopgb('DGBTRF',m, n, kl, ku, mat%piv)
+    END IF
+  END SUBROUTINE factor_gb
+!===========================================================================
+  SUBROUTINE factor_periodic(mat)
+!
+!  Factor the periodic GB matrix, using the 
+!  Sherman-Morrisson-Woodburry formula
+!
+    TYPE(periodic_mat), INTENT(inout) :: mat
+    TYPE(gemat) :: hmat
+    DOUBLE PRECISION :: one=1.0d0
+    INTEGER :: bandw, mr, nc, i
+!
+    bandw = SIZE(mat%matvt,1)
+!
+! Factor A
+    CALL factor(mat%mat)
+!
+    IF(bandw .EQ. 0 ) RETURN   !  No off band terms
+!
+! U <-- A^(-1) * U
+    CALL bsolve(mat%mat, mat%matu)
+!
+! H <-- 1 + V^T * U
+    mr = SIZE(mat%matvt, 1)
+    nc = SIZE(mat%matvt, 2)
+    CALL init(mr, 0, hmat)  ! hmat is initialized to 0!
+    DO i=1,mr
+       hmat%val(i,i) = one
+    END DO
+    CALL dgemm('N', 'N', mr, mr, nc, one, mat%matvt, mr, &
+         &      mat%matu, nc, one, hmat%val, mr)
+!
+! V^T <-- H^(-1) V^T
+    CALL factor(hmat)
+    CALL bsolve(hmat, mat%matvt)
+    CALL destroy(hmat)
+!
+  END SUBROUTINE factor_periodic
+!===========================================================================
+  SUBROUTINE factor_zperiodic(mat)
+!
+!  Factor the periodic GB matrix, using the 
+!  Sherman-Morrisson-Woodburry formula
+!
+    TYPE(zperiodic_mat), INTENT(inout) :: mat
+    TYPE(zgemat) :: hmat
+    DOUBLE COMPLEX :: one=1.0d0
+    INTEGER :: bandw, mr, nc, i
+!
+    bandw = SIZE(mat%matvt,1)
+!
+! Factor A
+    CALL factor(mat%mat)
+!
+    IF(bandw .EQ. 0 ) RETURN   !  No off band terms
+!
+! U <-- A^(-1) * U
+    CALL bsolve(mat%mat, mat%matu)
+!
+! H <-- 1 + V^T * U
+    mr = SIZE(mat%matvt, 1)
+    nc = SIZE(mat%matvt, 2)
+    CALL init(mr, 0, hmat)  ! hmat is initialized to 0!
+    DO i=1,mr
+       hmat%val(i,i) = one
+    END DO
+    CALL zgemm('N', 'N', mr, mr, nc, one, mat%matvt, mr, &
+         &      mat%matu, nc, one, hmat%val, mr)
+!
+! V^T <-- H^(-1) V^T
+    CALL factor(hmat)
+    CALL bsolve(hmat, mat%matvt)
+    CALL destroy(hmat)
+!
+  END SUBROUTINE factor_zperiodic
+!===========================================================================
+  SUBROUTINE factor_pb(mat,flops)
+!
+!  Factor the matrix, using Lapack
+!
+    TYPE(pbmat), INTENT(inout) :: mat
+    DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
+    INTEGER :: lda, n, ku
+    INTEGER :: info
+    DOUBLE PRECISION :: dopla
+    EXTERNAL dopla
+!
+    lda = SIZE(mat%val, 1)
+    ku = mat%ku
+    n = mat%rank
+    CALL dpbtrf('U', n, ku, mat%val, lda, info)
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from PBTRF ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    IF( PRESENT(flops) ) THEN
+       flops = dopla('DPBTRF', n, n, ku, ku, 1)
+    END IF
+  END SUBROUTINE factor_pb
+!===========================================================================
+  SUBROUTINE factor_ge(mat,flops)
+!
+!  Factor the matrix, using Lapack
+!
+    TYPE(gemat), INTENT(inout) :: mat
+    DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
+    INTEGER :: n, m
+    INTEGER :: info
+    DOUBLE PRECISION :: dopla
+    EXTERNAL dopla
+!
+    m = mat%mrows
+    n = mat%ncols
+    CALL dgetrf(m, n, mat%val, m, mat%piv, info)
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GETRF ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    IF( PRESENT(flops) ) THEN
+       flops = dopla('DGETRF',m, n, 0, 0, 0)
+    END IF
+  END SUBROUTINE factor_ge
+!===========================================================================
+  SUBROUTINE factor_zgb(mat,flops)
+!
+!  Factor the matrix, using Lapack
+!
+    TYPE(zgbmat), INTENT(inout) :: mat
+    DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
+    INTEGER :: lda, n, m, kl, ku
+    INTEGER :: info
+    DOUBLE PRECISION :: dopgb
+    EXTERNAL dopgb
+!
+    lda = SIZE(mat%val, 1)
+    kl = mat%kl
+    ku = mat%ku
+    m = mat%mrows
+    n = mat%ncols
+    CALL zgbtrf(m, n, kl, ku, mat%val, lda, mat%piv, info)
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GBTRF ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    IF( PRESENT(flops) ) THEN
+       flops = dopgb('ZGBTRF',m, n, kl, ku, mat%piv)
+    END IF
+  END SUBROUTINE factor_zgb
+!===========================================================================
+  SUBROUTINE factor_zpb(mat,flops)
+!
+!  Factor the matrix, using Lapack
+!
+    TYPE(zpbmat), INTENT(inout) :: mat
+    DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
+    INTEGER :: lda, n, ku
+    INTEGER :: info
+    DOUBLE PRECISION :: dopla
+    EXTERNAL dopla
+!
+    lda = SIZE(mat%val, 1)
+    ku = mat%ku
+    n = mat%rank
+    CALL zpbtrf('U', n, ku, mat%val, lda, info)
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from PBTRF ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    IF( PRESENT(flops) ) THEN
+       flops = dopla('ZPBTRF', n, n, ku, ku, 1)
+    END IF
+  END SUBROUTINE factor_zpb
+!===========================================================================
+  SUBROUTINE factor_zge(mat,flops)
+!
+!  Factor the matrix, using Lapack
+!
+    TYPE(zgemat), INTENT(inout) :: mat
+    DOUBLE PRECISION, OPTIONAL, INTENT (OUT)  :: flops
+    INTEGER :: n, m
+    INTEGER :: info
+    DOUBLE PRECISION :: dopla
+    EXTERNAL dopla
+!
+    m = mat%mrows
+    n = mat%ncols
+    CALL zgetrf(m, n, mat%val, m, mat%piv, info)
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GETRF ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+    IF( PRESENT(flops) ) THEN
+       flops = dopla('ZGETRF',m, n, 0, 0, 0)
+    END IF
+  END SUBROUTINE factor_zge
+!===========================================================================
+  SUBROUTINE bsolve_gb1(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(gbmat), INTENT(inout) :: mat
+    DOUBLE PRECISION, DIMENSION (:) :: rhs
+    DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: lda, n, kl, ku
+    INTEGER :: info
+!----------------------------------------------------------------------
+    lda = SIZE(mat%val, 1)
+    kl = mat%kl
+    ku = mat%ku
+    n = mat%rank
+!
+    IF( PRESENT(sol) ) THEN
+       sol = rhs
+       CALL dgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, sol, n, info)
+    ELSE
+       CALL dgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GBTRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_gb1
+!===========================================================================
+  SUBROUTINE bsolve_periodic1(mat, rhs, sol)
+!
+!   Backsolve, using the Sherman-Morrison-Woodburry formula
+!
+    TYPE(periodic_mat), INTENT(inout) :: mat
+    DOUBLE PRECISION, DIMENSION (:) :: rhs
+    DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
+    DOUBLE PRECISION :: one=1.0d0, zero=0.0d0, minus1=-1.0d0
+    DOUBLE PRECISION, ALLOCATABLE :: tarr(:,:)
+    INTEGER :: rank, bandw, nrhs
+    INTEGER :: info
+!----------------------------------------------------------------------
+    rank = mat%mat%rank
+    bandw = SIZE(mat%matvt,1)
+    nrhs = 1
+!
+!  Solve Ay = f
+    IF( PRESENT(sol) ) THEN
+       CALL bsolve(mat%mat, rhs, sol)
+    ELSE
+       CALL bsolve(mat%mat, rhs)
+    END IF
+!
+    IF(bandw .EQ. 0 ) RETURN   !  No off band terms
+!
+!  t = V^T*y ( = W^T*y )
+    ALLOCATE(tarr(bandw,nrhs))
+    IF( PRESENT(sol) ) THEN
+       CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, &
+            &      rank, zero, tarr, bandw)
+    ELSE
+       CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, &
+            &      rank, zero, tarr, bandw)
+    END IF
+!
+!  y = y - U*t ( = y-Z*t)
+   IF( PRESENT(sol) ) THEN
+      CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
+           &      bandw, one, sol, rank)
+   ELSE
+      CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
+           &      bandw, one, rhs, rank)
+   END IF
+!
+    DEALLOCATE(tarr)
+  END SUBROUTINE bsolve_periodic1
+!===========================================================================
+  SUBROUTINE bsolve_zperiodic1(mat, rhs, sol)
+!
+!   Backsolve, using the Sherman-Morrison-Woodburry formula
+!
+    TYPE(zperiodic_mat), INTENT(inout) :: mat
+    DOUBLE COMPLEX, DIMENSION (:) :: rhs
+    DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
+    DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0, minus1=-1.0d0
+    DOUBLE COMPLEX, ALLOCATABLE :: tarr(:,:)
+    INTEGER :: rank, bandw, nrhs
+    INTEGER :: info
+!----------------------------------------------------------------------
+    rank = mat%mat%rank
+    bandw = SIZE(mat%matvt,1)
+    nrhs = 1
+!
+!  Solve Ay = f
+    IF( PRESENT(sol) ) THEN
+       CALL bsolve(mat%mat, rhs, sol)
+    ELSE
+       CALL bsolve(mat%mat, rhs)
+    END IF
+!
+    IF(bandw .EQ. 0 ) RETURN   !  No off band terms
+!
+!  t = V^T*y ( = W^T*y )
+    ALLOCATE(tarr(bandw,nrhs))
+    IF( PRESENT(sol) ) THEN
+       CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, &
+            &      rank, zero, tarr, bandw)
+    ELSE
+       CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, &
+            &      rank, zero, tarr, bandw)
+    END IF
+!
+!  y = y - U*t ( = y-Z*t)
+   IF( PRESENT(sol) ) THEN
+      CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
+           &      bandw, one, sol, rank)
+   ELSE
+      CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
+           &      bandw, one, rhs, rank)
+   END IF
+!
+    DEALLOCATE(tarr)
+  END SUBROUTINE bsolve_zperiodic1
+!===========================================================================
+  SUBROUTINE bsolve_pb1(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(pbmat), INTENT(inout) :: mat
+    DOUBLE PRECISION, DIMENSION (:) :: rhs
+    DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: lda, n, ku
+    INTEGER :: info
+!----------------------------------------------------------------------
+    lda = SIZE(mat%val, 1)
+    ku = mat%ku
+    n = mat%rank
+!
+    IF( PRESENT(sol) ) THEN
+       sol = rhs
+       CALL dpbtrs('U', n, ku, 1, mat%val, lda, sol, n, info)
+    ELSE
+       CALL dpbtrs('U', n, ku, 1, mat%val, lda, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from PBTRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_pb1
+!===========================================================================
+  SUBROUTINE bsolve_ge1(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(gemat), INTENT(inout) :: mat
+    DOUBLE PRECISION, DIMENSION (:) :: rhs
+    DOUBLE PRECISION, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: n
+    INTEGER :: info
+!----------------------------------------------------------------------
+    n = mat%rank
+!
+    IF( PRESENT(sol) ) THEN
+       sol = rhs
+       CALL dgetrs('N', n, 1, mat%val, n, mat%piv, sol, n, info)
+    ELSE
+       CALL dgetrs('N', n, 1, mat%val, n, mat%piv, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GETRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_ge1
+!===========================================================================
+  SUBROUTINE bsolve_gbn(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(gbmat), INTENT(inout) :: mat
+    DOUBLE PRECISION, DIMENSION (:,:) :: rhs
+    DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: lda, n, nrhs, kl, ku
+    INTEGER :: info
+!----------------------------------------------------------------------
+    lda = SIZE(mat%val, 1)
+    kl = mat%kl
+    ku = mat%ku
+    n = mat%rank
+    nrhs = SIZE(rhs,2)
+!
+    IF( PRESENT(sol) ) THEN
+       sol(:,1:nrhs) = rhs(:,1:nrhs)
+       CALL dgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, sol, n, info)
+    ELSE
+       CALL dgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GBTRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_gbn
+!===========================================================================
+  SUBROUTINE bsolve_periodicn(mat, rhs, sol)
+!
+!   Backsolve, using the Sherman-Morrison-Woodburry formula
+!
+    TYPE(periodic_mat), INTENT(inout) :: mat
+    DOUBLE PRECISION, DIMENSION (:,:) :: rhs
+    DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
+    DOUBLE PRECISION :: one=1.0d0, zero=0.0d0, minus1=-1.0d0
+    DOUBLE PRECISION, ALLOCATABLE :: tarr(:,:)
+    INTEGER :: rank, bandw, nrhs
+    INTEGER :: info
+!----------------------------------------------------------------------
+    rank = mat%mat%rank
+    bandw = SIZE(mat%matvt,1)
+    nrhs = SIZE(rhs,2)
+!
+!  Solve Ay = f
+    IF( PRESENT(sol) ) THEN
+       CALL bsolve(mat%mat, rhs, sol)
+    ELSE
+       CALL bsolve(mat%mat, rhs)
+    END IF
+!
+    IF(bandw .EQ. 0 ) RETURN   !  No off band terms
+!
+!  t = V^T*y ( = W^T*y )
+    ALLOCATE(tarr(bandw,nrhs))
+    IF( PRESENT(sol) ) THEN
+       CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, &
+            &      rank, zero, tarr, bandw)
+    ELSE
+       CALL dgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, &
+            &      rank, zero, tarr, bandw)
+    END IF
+!
+!  y = y - U*t ( = y-Z*t)
+   IF( PRESENT(sol) ) THEN
+      CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
+           &      bandw, one, sol, rank)
+   ELSE
+      CALL dgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
+           &      bandw, one, rhs, rank)
+   END IF
+!
+    DEALLOCATE(tarr)
+  END SUBROUTINE bsolve_periodicn
+!===========================================================================
+  SUBROUTINE bsolve_zperiodicn(mat, rhs, sol)
+!
+!   Backsolve, using the Sherman-Morrison-Woodburry formula
+!
+    TYPE(zperiodic_mat), INTENT(inout) :: mat
+    DOUBLE COMPLEX, DIMENSION (:,:) :: rhs
+    DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
+    DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0, minus1=-1.0d0
+    DOUBLE COMPLEX, ALLOCATABLE :: tarr(:,:)
+    INTEGER :: rank, bandw, nrhs
+    INTEGER :: info
+!----------------------------------------------------------------------
+    rank = mat%mat%rank
+    bandw = SIZE(mat%matvt,1)
+    nrhs = SIZE(rhs,2)
+!
+!  Solve Ay = f
+    IF( PRESENT(sol) ) THEN
+       CALL bsolve(mat%mat, rhs, sol)
+    ELSE
+       CALL bsolve(mat%mat, rhs)
+    END IF
+!
+    IF(bandw .EQ. 0 ) RETURN   !  No off band terms
+!
+!  t = V^T*y ( = W^T*y )
+    ALLOCATE(tarr(bandw,nrhs))
+    IF( PRESENT(sol) ) THEN
+       CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, sol, &
+            &      rank, zero, tarr, bandw)
+    ELSE
+       CALL zgemm('N', 'N', bandw, nrhs, rank, one, mat%matvt, bandw, rhs, &
+            &      rank, zero, tarr, bandw)
+    END IF
+!
+!  y = y - U*t ( = y-Z*t)
+   IF( PRESENT(sol) ) THEN
+      CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
+           &      bandw, one, sol, rank)
+   ELSE
+      CALL zgemm('N', 'N', rank, nrhs, bandw, minus1, mat%matu, rank, tarr, &
+           &      bandw, one, rhs, rank)
+   END IF
+!
+    DEALLOCATE(tarr)
+  END SUBROUTINE bsolve_zperiodicn
+!===========================================================================
+  SUBROUTINE bsolve_pbn(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(pbmat), INTENT(inout) :: mat
+    DOUBLE PRECISION, DIMENSION (:,:) :: rhs
+    DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: lda, n, nrhs, ku
+    INTEGER :: info
+!----------------------------------------------------------------------
+    lda = SIZE(mat%val, 1)
+    ku = mat%ku
+    n = mat%rank
+    nrhs = SIZE(rhs,2)
+!
+    IF( PRESENT(sol) ) THEN
+       sol(:,1:nrhs) = rhs(:,1:nrhs)
+       CALL dpbtrs('U', n, ku, nrhs, mat%val, lda, sol, n, info)
+    ELSE
+       CALL dpbtrs('U', n, ku, nrhs, mat%val, lda, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GBTRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_pbn
+!===========================================================================
+  SUBROUTINE bsolve_gen(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(gemat), INTENT(inout) :: mat
+    DOUBLE PRECISION, DIMENSION (:,:) :: rhs
+    DOUBLE PRECISION, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: n, nrhs
+    INTEGER :: info
+!----------------------------------------------------------------------
+    n = mat%rank
+    nrhs = SIZE(rhs,2)
+!
+    IF( PRESENT(sol) ) THEN
+       sol(:,1:nrhs) = rhs
+       CALL dgetrs('N', n, nrhs, mat%val, n, mat%piv, sol, n, info)
+    ELSE
+       CALL dgetrs('N', n, nrhs, mat%val, n, mat%piv, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GETRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_gen
+!===========================================================================
+  SUBROUTINE bsolve_zgb1(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(zgbmat), INTENT(inout) :: mat
+    DOUBLE COMPLEX, DIMENSION (:) :: rhs
+    DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: lda, n, kl, ku
+    INTEGER :: info
+!----------------------------------------------------------------------
+    lda = SIZE(mat%val, 1)
+    kl = mat%kl
+    ku = mat%ku
+    n = mat%rank
+!
+    IF( PRESENT(sol) ) THEN
+       sol = rhs
+       CALL zgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, sol, n, info)
+    ELSE
+       CALL zgbtrs('N', n, kl, ku, 1, mat%val, lda, mat%piv, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GBTRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_zgb1
+!===========================================================================
+  SUBROUTINE bsolve_zpb1(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(zpbmat), INTENT(inout) :: mat
+    DOUBLE COMPLEX, DIMENSION (:) :: rhs
+    DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: lda, n, ku
+    INTEGER :: info
+!----------------------------------------------------------------------
+    lda = SIZE(mat%val, 1)
+    ku = mat%ku
+    n = mat%rank
+!
+    IF( PRESENT(sol) ) THEN
+       sol = rhs
+       CALL zpbtrs('U', n, ku, 1, mat%val, lda, sol, n, info)
+    ELSE
+       CALL zpbtrs('U', n, ku, 1, mat%val, lda, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from PBTRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_zpb1
+!===========================================================================
+  SUBROUTINE bsolve_zge1(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(zgemat), INTENT(inout) :: mat
+    DOUBLE COMPLEX, DIMENSION (:) :: rhs
+    DOUBLE COMPLEX, DIMENSION (:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: n
+    INTEGER :: info
+!----------------------------------------------------------------------
+    n = mat%rank
+!
+    IF( PRESENT(sol) ) THEN
+       sol = rhs
+       CALL zgetrs('N', n, 1, mat%val, n, mat%piv, sol, n, info)
+    ELSE
+       CALL zgetrs('N', n, 1, mat%val, n, mat%piv, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GETRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_zge1
+!===========================================================================
+  SUBROUTINE bsolve_zgbn(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(zgbmat), INTENT(inout) :: mat
+    DOUBLE COMPLEX, DIMENSION (:,:) :: rhs
+    DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: lda, n, nrhs, kl, ku
+    INTEGER :: info
+!----------------------------------------------------------------------
+    lda = SIZE(mat%val, 1)
+    kl = mat%kl
+    ku = mat%ku
+    n = mat%rank
+    nrhs = SIZE(rhs,2)
+!
+    IF( PRESENT(sol) ) THEN
+       sol(:,1:nrhs) = rhs(:,1:nrhs)
+       CALL zgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, sol, n, info)
+    ELSE
+       CALL zgbtrs('N', n, kl, ku, nrhs, mat%val, lda, mat%piv, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GBTRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_zgbn
+!===========================================================================
+  SUBROUTINE bsolve_zpbn(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(zpbmat), INTENT(inout) :: mat
+    DOUBLE COMPLEX, DIMENSION (:,:) :: rhs
+    DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: lda, n, nrhs, ku
+    INTEGER :: info
+!----------------------------------------------------------------------
+    lda = SIZE(mat%val, 1)
+    ku = mat%ku
+    n = mat%rank
+    nrhs = SIZE(rhs,2)
+!
+    IF( PRESENT(sol) ) THEN
+       sol(:,1:nrhs) = rhs(:,1:nrhs)
+       CALL zpbtrs('U', n, ku, nrhs, mat%val, lda, sol, n, info)
+    ELSE
+       CALL zpbtrs('U', n, ku, nrhs, mat%val, lda, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GBTRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_zpbn
+!===========================================================================
+  SUBROUTINE bsolve_zgen(mat, rhs, sol)
+!
+!   Backsolve, using Lapack
+!
+    TYPE(zgemat), INTENT(inout) :: mat
+    DOUBLE COMPLEX, DIMENSION (:,:) :: rhs
+    DOUBLE COMPLEX, DIMENSION (:,:), OPTIONAL, INTENT (out) :: sol
+    INTEGER :: n, nrhs
+    INTEGER :: info
+!----------------------------------------------------------------------
+    n = mat%rank
+    nrhs = SIZE(rhs,2)
+!
+    IF( PRESENT(sol) ) THEN
+       sol(:,1:nrhs) = rhs
+       CALL zgetrs('N', n, nrhs, mat%val, n, mat%piv, sol, n, info)
+    ELSE
+       CALL zgetrs('N', n, nrhs, mat%val, n, mat%piv, rhs, n, info)
+    END IF
+    IF( info .NE. 0) THEN
+       WRITE(*,*) 'FACTOR: info from GETRS ', info
+       STOP '*** Abnormal EXIT in MODULE matrix ***'
+    END IF
+  END SUBROUTINE bsolve_zgen
+!===========================================================================
+  FUNCTION vmx_gb(mat, x, trans) RESULT(vmx)
+!
+!   Return product mat*x
+!
+    TYPE(gbmat), INTENT(in) :: mat
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x
+    CHARACTER(len=1), OPTIONAL :: trans
+    DOUBLE PRECISION, ALLOCATABLE :: vmx(:)
+    DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
+    INTEGER :: lda, kl, ku, m, n, j, imin, imax, ibmin, ibmax
+    CHARACTER(len=1) :: trans_loc
+!
+    lda = SIZE(mat%val, 1)
+    kl = mat%kl
+    ku = mat%ku
+    m = mat%mrows
+    n = mat%ncols
+    trans_loc = 'N'
+    IF(PRESENT(trans)) trans_loc = trans
+!
+    IF(trans_loc.EQ.'N') THEN
+       ALLOCATE(vmx(m))
+    ELSE
+       ALLOCATE(vmx(n))
+    END IF
+!
+    CALL dgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, x, 1, zero,&
+         &      vmx, 1)
+  END FUNCTION vmx_gb
+!===========================================================================
+  FUNCTION vmx_ge(mat, x, trans) RESULT(vmx)
+!
+!   Return product mat*x
+!
+    TYPE(gemat), INTENT(in) :: mat
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x
+    CHARACTER(len=1), OPTIONAL :: trans
+    DOUBLE PRECISION, ALLOCATABLE :: vmx(:)
+    DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
+    INTEGER :: lda, m, n
+    CHARACTER(len=1) :: trans_loc
+!
+    lda = SIZE(mat%val, 1)
+    m = mat%mrows
+    n = mat%ncols
+    trans_loc = 'N'
+    IF(PRESENT(trans)) trans_loc = trans
+!
+    IF(trans_loc.EQ.'N') THEN
+       ALLOCATE(vmx(m))
+    ELSE
+       ALLOCATE(vmx(n))
+    END IF
+!
+    CALL dgemv(trans_loc, m, n, one, mat%val, lda, x, 1, zero, vmx, 1)
+  END FUNCTION vmx_ge
+!===========================================================================
+  FUNCTION vmx_gen(mat, x, trans) RESULT(vmx)
+!
+!   Return product mat*x
+!
+    TYPE(gemat), INTENT(in) :: mat
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x
+    CHARACTER(len=1), OPTIONAL :: trans
+    DOUBLE PRECISION, ALLOCATABLE :: vmx(:,:)
+    DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
+    INTEGER :: lda, ldb, m, n, k
+    CHARACTER(len=1) :: trans_loc
+!
+    lda = SIZE(mat%val, 1)
+    ldb = SIZE(x,1)
+    trans_loc = 'N'
+    IF(PRESENT(trans)) trans_loc = trans
+!
+    IF(trans_loc.EQ.'N') THEN
+       m = mat%mrows
+       n = SIZE(x,2)
+       k = mat%ncols
+       ALLOCATE(vmx(m,n))
+    ELSE
+       m = mat%ncols
+       n = SIZE(x,2)
+       k = mat%mrows
+       ALLOCATE(vmx(m,n))
+    END IF
+!
+    CALL dgemm(trans_loc, 'N', m, n, k, one, mat%val, lda, x, ldb, zero, vmx, &
+         &     lda)
+!
+  END FUNCTION vmx_gen
+!===========================================================================
+  FUNCTION vmx_zge(mat, x, trans) RESULT(vmx)
+!
+!   Return product mat*x
+!
+    TYPE(zgemat), INTENT(in) :: mat
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x
+    CHARACTER(len=1), OPTIONAL :: trans
+    DOUBLE COMPLEX, ALLOCATABLE :: vmx(:)
+    DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0
+    INTEGER :: lda, m, n
+    CHARACTER(len=1) :: trans_loc
+!
+    lda = SIZE(mat%val, 1)
+    m = mat%mrows
+    n = mat%ncols
+    trans_loc = 'N'
+    IF(PRESENT(trans)) trans_loc = trans
+!
+    IF(trans_loc.EQ.'N') THEN
+       ALLOCATE(vmx(m))
+    ELSE
+       ALLOCATE(vmx(n))
+    END IF
+!
+    CALL zgemv(trans_loc, m, n, one, mat%val, lda, x, 1, zero, vmx, 1)
+  END FUNCTION vmx_zge
+!===========================================================================
+  FUNCTION vmx_zgen(mat, x, trans) RESULT(vmx)
+!
+!   Return product mat*x
+!
+    TYPE(zgemat), INTENT(in) :: mat
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x
+    CHARACTER(len=1), OPTIONAL :: trans
+    DOUBLE COMPLEX, ALLOCATABLE :: vmx(:,:)
+    DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0
+    INTEGER :: lda, ldb, m, n, k
+    CHARACTER(len=1) :: trans_loc
+!
+    lda = SIZE(mat%val, 1)
+    ldb = SIZE(x,1)
+    trans_loc = 'N'
+    IF(PRESENT(trans)) trans_loc = trans
+!
+    IF(trans_loc.EQ.'N') THEN
+       m = mat%mrows
+       n = SIZE(x,2)
+       k = mat%ncols
+       ALLOCATE(vmx(m,n))
+    ELSE
+       m = mat%ncols
+       n = SIZE(x,2)
+       k = mat%mrows
+       ALLOCATE(vmx(m,n))
+    END IF
+!
+    CALL zgemm(trans_loc, 'N', m, n, k, one, mat%val, lda, x, ldb, zero, vmx, &
+         &     lda)
+!
+  END FUNCTION vmx_zgen
+!===========================================================================
+  FUNCTION vmx_periodic(mat, x)
+!
+!   Return product mat*x
+!
+    TYPE(periodic_mat), INTENT(in) :: mat
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x
+    DOUBLE PRECISION, DIMENSION(SIZE(x)) :: vmx_periodic
+    INTEGER :: kl, ku, n, i, ii
+!
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+    n  = mat%mat%rank
+!
+    vmx_periodic = vmx(mat%mat, x)
+!
+    DO i=1,kl
+       vmx_periodic(i) =  vmx_periodic(i) + &
+            &             DOT_PRODUCT(mat%matvt(i,n-kl+1:n), x(n-kl+1:n))
+    END DO
+!
+    DO i=n-ku+1,n
+       ii = i-n+ku+kl
+       vmx_periodic(i) =  vmx_periodic(i) + &
+            &             DOT_PRODUCT(mat%matvt(ii,1:ku), x(1:ku))
+    END DO
+  END FUNCTION vmx_periodic
+!===========================================================================
+  FUNCTION vmx_zperiodic(mat, x)
+!
+!   Return product mat*x
+!
+    TYPE(zperiodic_mat), INTENT(in) :: mat
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x
+    DOUBLE COMPLEX, DIMENSION(SIZE(x)) :: vmx_zperiodic
+    INTEGER :: kl, ku, n, i, ii
+!
+    kl = mat%mat%kl
+    ku = mat%mat%ku
+    n  = mat%mat%rank
+!
+    vmx_zperiodic = vmx(mat%mat, x)
+!
+    DO i=1,kl
+       vmx_zperiodic(i) =  vmx_zperiodic(i) + &
+            &             DOT_PRODUCT(mat%matvt(i,n-kl+1:n), x(n-kl+1:n))
+    END DO
+!
+    DO i=n-ku+1,n
+       ii = i-n+ku+kl
+       vmx_zperiodic(i) =  vmx_zperiodic(i) + &
+            &             DOT_PRODUCT(mat%matvt(ii,1:ku), x(1:ku))
+    END DO
+  END FUNCTION vmx_zperiodic
+!===========================================================================
+  FUNCTION vmx_gbn(mat, x, trans) RESULT(vmx)
+!
+!   Return product mat*x
+!
+    TYPE(gbmat), INTENT(in) :: mat
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x
+    CHARACTER(len=1), OPTIONAL :: trans
+    DOUBLE PRECISION, ALLOCATABLE :: vmx(:,:)
+    DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
+    INTEGER :: lda, kl, ku, m, n, j, k, imin, imax, ibmin, ibmax
+    CHARACTER(len=1) :: trans_loc
+!
+    lda = SIZE(mat%val, 1)
+    kl = mat%kl
+    ku = mat%ku
+    m = mat%mrows
+    n = mat%ncols
+    trans_loc = 'N'
+    IF(PRESENT(trans)) trans_loc = trans
+!
+    IF(trans_loc.EQ.'N') THEN
+       ALLOCATE(vmx(m,SIZE(x,2)))
+    ELSE
+       ALLOCATE(vmx(n,SIZE(x,2)))
+    END IF
+!
+    DO k=1,SIZE(x,2)
+       CALL dgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, &
+               &      x(1,k), 1, zero, vmx(1,k), 1)
+    END DO
+  END FUNCTION vmx_gbn
+!===========================================================================
+  FUNCTION vmx_pb(mat, x)
+!
+!   Return product mat*x
+!
+    TYPE(pbmat), INTENT(in) :: mat
+    DOUBLE PRECISION, DIMENSION(:), INTENT(in) :: x
+    DOUBLE PRECISION, DIMENSION(SIZE(x)) :: vmx_pb
+    INTEGER :: lda, ku, n, i, j, imin, imax, ib, ibmin, ibmax
+!
+    lda = SIZE(mat%val, 1)
+    ku = mat%ku
+    n = mat%rank
+!
+    vmx_pb = 0.0d0
+    DO j=1,n
+       imin = MAX(1,j-ku); imax = j  ! The column in the upper diagonal part
+       ibmin =  ku+imin-j+1; ibmax =  ku+imax-j+1
+       vmx_pb(imin:imax) =  vmx_pb(imin:imax) + mat%val(ibmin:ibmax,j)*x(j)
+!
+       imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
+       DO i=imin,imax
+          ib = ku+1+j-i
+          vmx_pb(i) = vmx_pb(i) + mat%val(ib,i)*x(j)
+       END DO
+    END DO
+  END FUNCTION vmx_pb
+!===========================================================================
+  FUNCTION vmx_pbn(mat, x)
+!
+!   Return product mat*x
+!
+    TYPE(pbmat), INTENT(in) :: mat
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: x
+    DOUBLE PRECISION, DIMENSION(SIZE(x,1),SIZE(x,2)) :: vmx_pbn
+    INTEGER :: lda, ku, n, i, j, k, imin, imax, ib, ibmin, ibmax
+!
+    lda = SIZE(mat%val, 1)
+    ku = mat%ku
+    n = mat%rank
+!
+    vmx_pbn = 0.0d0
+    DO j=1,n
+       imin = MAX(1,j-ku); imax = j  ! The column in the upper diagonal part
+       ibmin =  ku+imin-j+1; ibmax =  ku+imax-j+1
+       DO k=1,SIZE(x,2)
+          vmx_pbn(imin:imax,k) =  vmx_pbn(imin:imax,k) + &
+               &                  mat%val(ibmin:ibmax,j)*x(j,k)
+       END DO
+!
+       imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
+       DO i=imin,imax
+          ib = ku+1+j-i
+          vmx_pbn(i,:) = vmx_pbn(i,:) + mat%val(ib,i)*x(j,:)
+       END DO
+    END DO
+  END FUNCTION vmx_pbn
+!===========================================================================
+  FUNCTION vmx_zgb(mat, x, trans) RESULT(vmx)
+!
+!   Return product mat*x
+!
+    TYPE(zgbmat), INTENT(in) :: mat
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x
+    CHARACTER(len=1), OPTIONAL :: trans
+    DOUBLE COMPLEX, ALLOCATABLE :: vmx(:)
+    DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0
+    INTEGER :: lda, kl, ku, m, n, j, imin, imax, ibmin, ibmax
+    CHARACTER(len=1) :: trans_loc
+!
+    lda = SIZE(mat%val, 1)
+    kl = mat%kl
+    ku = mat%ku
+    m = mat%mrows
+    n = mat%ncols
+    trans_loc = 'N'
+    IF(PRESENT(trans)) trans_loc = trans
+!
+    IF(trans_loc.EQ.'N') THEN
+       ALLOCATE(vmx(m))
+    ELSE
+       ALLOCATE(vmx(n))
+    END IF
+!
+    CALL zgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, x, 1, zero,&
+         &      vmx, 1)
+  END FUNCTION vmx_zgb
+!===========================================================================
+  FUNCTION vmx_zgbn(mat, x, trans) RESULT(vmx)
+!
+!   Return product mat*x
+!
+    TYPE(zgbmat), INTENT(in) :: mat
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x
+    CHARACTER(len=1), OPTIONAL :: trans
+    DOUBLE COMPLEX, ALLOCATABLE :: vmx(:,:)
+    DOUBLE COMPLEX :: one=1.0d0, zero=0.0d0
+    INTEGER :: lda, kl, ku, m, n, j, k, imin, imax, ibmin, ibmax
+    CHARACTER(len=1) :: trans_loc
+!
+    lda = SIZE(mat%val, 1)
+    kl = mat%kl
+    ku = mat%ku
+    m = mat%mrows
+    n = mat%ncols
+    trans_loc = 'N'
+    IF(PRESENT(trans)) trans_loc = trans
+!
+    IF(trans_loc.EQ.'N') THEN
+       ALLOCATE(vmx(m,SIZE(x,2)))
+    ELSE
+       ALLOCATE(vmx(n,SIZE(x,2)))
+    END IF
+!
+    DO k=1,SIZE(x,2)
+       CALL zgbmv(trans_loc, m, n, kl, ku, one, mat%val(kl+1,1), lda, &
+               &      x(1,k), 1, zero, vmx(1,k), 1)
+    END DO
+  END FUNCTION vmx_zgbn
+!===========================================================================
+  FUNCTION vmx_zpb(mat, x)
+!
+!   Return product mat*x
+!
+    TYPE(zpbmat), INTENT(in) :: mat
+    DOUBLE COMPLEX, DIMENSION(:), INTENT(in) :: x
+    DOUBLE COMPLEX, DIMENSION(SIZE(x)) :: vmx_zpb
+    INTEGER :: lda, ku, n, i, j, imin, imax, ib, ibmin, ibmax
+!
+    lda = SIZE(mat%val, 1)
+    ku = mat%ku
+    n = mat%rank
+!
+    vmx_zpb = 0.0d0
+    DO j=1,n
+       imin = MAX(1,j-ku); imax = j  ! The column in the upper diagonal part
+       ibmin =  ku+imin-j+1; ibmax =  ku+imax-j+1
+       vmx_zpb(imin:imax) =  vmx_zpb(imin:imax) + mat%val(ibmin:ibmax,j)*x(j)
+!
+       imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
+       DO i=imin,imax
+          ib = ku+1+j-i
+          vmx_zpb(i) = vmx_zpb(i) + CONJG(mat%val(ib,i))*x(j)
+       END DO
+    END DO
+  END FUNCTION vmx_zpb
+!===========================================================================
+  FUNCTION vmx_zpbn(mat, x)
+!
+!   Return product mat*x
+!
+    TYPE(zpbmat), INTENT(in) :: mat
+    DOUBLE COMPLEX, DIMENSION(:,:), INTENT(in) :: x
+    DOUBLE COMPLEX, DIMENSION(SIZE(x,1),SIZE(x,2)) :: vmx_zpbn
+    INTEGER :: lda, ku, n, i, j, k, imin, imax, ib, ibmin, ibmax
+!
+    lda = SIZE(mat%val, 1)
+    ku = mat%ku
+    n = mat%rank
+!
+    vmx_zpbn = 0.0d0
+    DO j=1,n
+       imin = MAX(1,j-ku); imax = j  ! The column in the upper diagonal part
+       ibmin =  ku+imin-j+1; ibmax =  ku+imax-j+1
+       DO k=1,SIZE(x,2)
+          vmx_zpbn(imin:imax,k) = vmx_zpbn(imin:imax,k) + &
+               &                  mat%val(ibmin:ibmax,j)*x(j,k)
+       END DO
+!
+       imin=j+1; imax = MIN(n,j+ku)  ! The column in the lower diagonal part
+       DO i=imin,imax
+          ib = ku+1+j-i
+          vmx_zpbn(i,:) = vmx_zpbn(i,:) + CONJG(mat%val(ib,i))*x(j,:)
+       END DO
+    END DO
+  END FUNCTION vmx_zpbn
+!===========================================================================
+  SUBROUTINE determinant_ge(mat, base, pow)
+!
+!   Return the determinant of mat
+!
+    TYPE(gemat) :: mat
+    INTEGER :: pow, i
+    DOUBLE PRECISION :: base
+!
+    CALL factor(mat)
+    base = 1.0d0
+    pow = 0
+    DO i=1,mat%rank
+       IF( mat%piv(i) .NE. i) base = -base
+       base = mat%val(i,i)*base
+       IF( base .EQ. 0.0d0 ) THEN
+          WRITE(*,*) 'DETERMINANT_GE: matrix is singular'
+          STOP '*** Abnormal EXIT in MODULE matrix ***'
+       END IF
+       DO
+          IF( ABS(base) .GE. 1.0d0 ) EXIT
+          base = 10.0d0*base
+          pow = pow - 1
+       END DO
+       DO
+          IF( ABS(base) .LT. 10.0d0 ) EXIT
+          base = base/10.0d0
+          pow = pow + 1
+       END DO
+    END DO
+  END SUBROUTINE determinant_ge
+!===========================================================================
+  SUBROUTINE determinant_gb(mat, base, pow)
+!
+!   Return the determinant of mat
+!
+    TYPE(gbmat) :: mat
+    INTEGER :: pow, i, ib
+    DOUBLE PRECISION :: base
+!
+    CALL factor(mat)
+    base = 1.0d0
+    pow = 0
+    ib=mat%kl + mat%ku + 1
+    DO i=1,mat%rank
+       IF( mat%piv(i) .NE. i) base = -base
+       base = mat%val(ib,i)*base
+       IF( base .EQ. 0.0d0 ) THEN
+          WRITE(*,*) 'DETERMINANT_GB: matrix is singular'
+          STOP '*** Abnormal EXIT in MODULE matrix ***'
+       END IF
+       DO
+          IF( ABS(base) .GE. 1.0d0 ) EXIT
+          base = 10.0d0*base
+          pow = pow - 1
+       END DO
+       DO
+          IF( ABS(base) .LT. 10.0d0 ) EXIT
+          base = base/10.0d0
+          pow = pow + 1
+       END DO
+    END DO
+  END SUBROUTINE determinant_gb
+!===========================================================================
+  SUBROUTINE determinant_pb(mat, base, pow)
+!
+!   Return the determinant of mat
+!
+    TYPE(pbmat) :: mat
+    INTEGER :: pow, i, ib
+    DOUBLE PRECISION :: base
+!
+    CALL factor(mat)
+    base = 1.0d0
+    pow = 0
+    ib = mat%ku + 1
+    DO i=1,mat%rank
+       base = mat%val(ib,i)*base
+       IF( base .EQ. 0.0d0 ) THEN
+          WRITE(*,*) 'DETERMINANT_PB: matrix is singular'
+          STOP '*** Abnormal EXIT in MODULE matrix ***'
+       END IF
+       DO
+          IF( ABS(base) .GE. 1.0d0 ) EXIT
+          base = 10.0d0*base
+          pow = pow - 1
+       END DO
+       DO
+          IF( ABS(base) .LT. 10.0d0 ) EXIT
+          base = base/10.0d0
+          pow = pow + 1
+       END DO
+    END DO
+    base=base**2
+    pow=pow*2
+  END SUBROUTINE determinant_pb
+!===========================================================================
+  SUBROUTINE determinant_zge(mat, base, pow)
+!
+!   Return the determinant of mat
+!
+    TYPE(zgemat) :: mat
+    INTEGER :: pow, i
+    DOUBLE COMPLEX :: base
+!
+    CALL factor(mat)
+    base = 1.0d0
+    pow = 0
+    DO i=1,mat%rank
+       IF( mat%piv(i) .NE. i) base = -base
+       base = mat%val(i,i)*base
+       IF( base .EQ. 0.0d0 ) THEN
+          WRITE(*,*) 'DETERMINANT_ZGE: matrix is singular'
+          STOP '*** Abnormal EXIT in MODULE matrix ***'
+       END IF
+       DO
+          IF( ABS(base) .GE. 1.0d0 ) EXIT
+          base = 10.0d0*base
+          pow = pow - 1
+       END DO
+       DO
+          IF( ABS(base) .LT. 10.0d0 ) EXIT
+          base = base/10.0d0
+          pow = pow + 1
+       END DO
+    END DO
+  END SUBROUTINE determinant_zge
+!===========================================================================
+  SUBROUTINE determinant_zgb(mat, base, pow)
+!
+!   Return the determinant of mat
+!
+    TYPE(zgbmat) :: mat
+    INTEGER :: pow, i, ib
+    DOUBLE COMPLEX :: base
+!
+    CALL factor(mat)
+    base = 1.0d0
+    pow = 0
+    ib=mat%kl + mat%ku + 1
+    DO i=1,mat%rank
+       IF( mat%piv(i) .NE. i) base = -base
+       base = mat%val(ib,i)*base
+       IF( base .EQ. 0.0d0 ) THEN
+          WRITE(*,*) 'DETERMINANT_GB: matrix is singular'
+          STOP '*** Abnormal EXIT in MODULE matrix ***'
+       END IF
+       DO
+          IF( ABS(base) .GE. 1.0d0 ) EXIT
+          base = 10.0d0*base
+          pow = pow - 1
+       END DO
+       DO
+          IF( ABS(base) .LT. 10.0d0 ) EXIT
+          base = base/10.0d0
+          pow = pow + 1
+       END DO
+    END DO
+  END SUBROUTINE determinant_zgb
+!===========================================================================
+  SUBROUTINE determinant_zpb(mat, base, pow)
+!
+!   Return the determinant of mat
+!
+    TYPE(zpbmat) :: mat
+    INTEGER :: pow, i, ib
+    DOUBLE COMPLEX :: base
+!
+    CALL factor(mat)
+    base = 1.0d0
+    pow = 0
+    ib = mat%ku + 1
+    DO i=1,mat%rank
+       base = mat%val(ib,i)*base
+       IF( base .EQ. 0.0d0 ) THEN
+          WRITE(*,*) 'DETERMINANT_PB: matrix is singular'
+          STOP '*** Abnormal EXIT in MODULE matrix ***'
+       END IF
+       DO
+          IF( ABS(base) .GE. 1.0d0 ) EXIT
+          base = 10.0d0*base
+          pow = pow - 1
+       END DO
+       DO
+          IF( ABS(base) .LT. 10.0d0 ) EXIT
+          base = base/10.0d0
+          pow = pow + 1
+       END DO
+    END DO
+    base=base**2
+    pow=pow*2
+  END SUBROUTINE determinant_zpb
+!===========================================================================
+  SUBROUTINE putmat_gb(fid, label, mat, str)
+!
+!   Write GB matrix in hdf5 file
+!
+    USE futils
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(gbmat)                            :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+    IF(PRESENT(str)) THEN
+       CALL putarr(fid, label, mat%val, str)
+    ELSE
+       CALL putarr(fid, label, mat%val)
+    END IF
+    CALL attach(fid, label, 'KL', mat%kl)
+    CALL attach(fid, label, 'KU', mat%ku)
+    CALL attach(fid, label, 'RANK', mat%rank)
+  END SUBROUTINE putmat_gb
+!===========================================================================
+  SUBROUTINE getmat_gb(fid, label, mat, str)
+!
+!   Read in GB matrix from hdf5 file
+!
+    USE futils
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(gbmat)                            :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+    CALL getatt(fid, label, 'KL', mat%kl)
+    CALL getatt(fid, label, 'KU', mat%ku)
+    CALL getatt(fid, label, 'RANK', mat%rank)
+    CALL getarr(fid, label, mat%val)
+  END SUBROUTINE getmat_gb
+!===========================================================================
+  SUBROUTINE kron_ge(mata, matb, matc)
+!
+!  Krocnecker product of 2 dense matrices
+!
+    TYPE(gemat), INTENT(in)  :: mata, matb
+    TYPE(gemat), INTENT(out) :: matc
+!
+    INTEGER :: i1, j1, i3, j3, m1, n1, m2, n2, m3, n3
+!
+    m1 = mata%mrows
+    n1 = mata%ncols
+    m2 = matb%mrows
+    n2 = matb%ncols
+    m3 = m1*m2
+    n3 = n1*n2
+!
+    CALL init(n3, 0, matc, mrows=m3)
+    DO i1=1,m1
+       i3 = (i1-1)*m2
+       DO j1=1,n1
+          j3 = (j1-1)*n2
+          matc%val(i3+1:i3+m2,j3+1:j3+n2) = mata%val(i1,j1)*matb%val(1:m2,1:n2)
+       END DO
+    END DO
+  END SUBROUTINE kron_ge
+!===========================================================================
+END MODULE matrix
diff --git a/src/multigrid_mod.f90 b/src/multigrid_mod.f90
new file mode 100644
index 0000000..192c76c
--- /dev/null
+++ b/src/multigrid_mod.f90
@@ -0,0 +1,2373 @@
+!>
+!> @file multigrid_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE multigrid
+!
+!    MULTIGRID: Implement Multigrid solver for Finite Elements
+!               and Fiinite Differences.
+!
+!    T.M. Tran, CRPP-EPFL
+!    September 2012
+!
+  USE bsplines
+  USE matrix
+  USE conmat_mod
+  USE csr
+  USE cds
+  IMPLICIT NONE
+!
+  TYPE grid1d
+     INTEGER                         :: n    ! Number of intervals
+     INTEGER                         :: rank ! Dimension of FE space
+     DOUBLE PRECISION                :: h
+     DOUBLE PRECISION, ALLOCATABLE   :: x(:)
+     DOUBLE PRECISION, ALLOCATABLE   :: v(:)
+     DOUBLE PRECISION, ALLOCATABLE   :: f(:)
+     TYPE(spline1d)                  :: spl
+     TYPE(gemat)                     :: transf    ! Coarse to fine transfer matrix
+     TYPE(gbmat), ALLOCATABLE        :: mata      ! FE matrix
+     TYPE(gbmat), ALLOCATABLE        :: matm      ! mass matrix
+     TYPE(gbmat), ALLOCATABLE        :: mata_copy ! Used for direct_solve
+     TYPE(gemat), ALLOCATABLE        :: matap     ! FE matrix
+     TYPE(gemat), ALLOCATABLE        :: matmp     ! mass matrix
+     TYPE(gemat), ALLOCATABLE        :: matap_copy! Used for direct_solve
+  END TYPE grid1d
+!
+  TYPE grid2d
+     INTEGER                         :: n(2)    ! Number of intervals
+     INTEGER                         :: rank(2) ! Dimension of FE space
+     DOUBLE PRECISION                :: h(2)
+     DOUBLE PRECISION, ALLOCATABLE   :: x(:), y(:)
+!
+     DOUBLE PRECISION, ALLOCATABLE :: v(:,:) ! sol
+     DOUBLE PRECISION, ALLOCATABLE :: f(:,:) ! rhs
+     DOUBLE PRECISION, POINTER     :: v1d(:) ! flatten sol
+     DOUBLE PRECISION, POINTER     :: f1d(:) ! flatten rhs
+!
+     TYPE(csr_mat),ALLOCATABLE  :: mata
+     TYPE(cds_mat),ALLOCATABLE  :: mata_cds
+     TYPE(spline2d) :: spl
+     TYPE(gemat)    :: transf(2)
+!
+     TYPE(csr_mat)  :: matp(2)
+  END TYPE grid2d
+!
+  TYPE mg_info
+     INTEGER :: nu1    ! Relaxation down sweeps
+     INTEGER :: nu2    ! Relaxation up sweeps
+     INTEGER :: mu     ! mu-cycle number
+     INTEGER :: nu0    ! Number of FMG cycles
+     INTEGER :: levels ! Number of mg levels
+     CHARACTER(len=4) :: relax ! Type of relation
+     DOUBLE PRECISION :: omega ! for weighted Jacobi relaxation
+     LOGICAL          :: nlscale=.FALSE. ! Scale restriction if .TRUE.
+  END TYPE mg_info
+!
+  INTERFACE create_grid
+     MODULE PROCEDURE create_grid_1d, create_grid_2d
+  END INTERFACE create_grid
+  INTERFACE disrhs
+     MODULE PROCEDURE disrhs_1d, disrhs_2d
+  END INTERFACE disrhs
+  INTERFACE direct_solve
+     MODULE PROCEDURE direct_solve_1d, direct_solve_2d
+  END INTERFACE direct_solve
+  INTERFACE mg
+     MODULE PROCEDURE mg_1d, mg_2d
+  END INTERFACE mg
+  INTERFACE disc_err
+     MODULE PROCEDURE disc_err_1d, disc_err_2d
+  END INTERFACE disc_err
+  INTERFACE jacobi
+     MODULE PROCEDURE jacobi_cds, jacobi_csr, jacobi_gb, jacobi_ge
+  END INTERFACE jacobi
+  INTERFACE gs
+     MODULE PROCEDURE gs_cds, gs_csr, gs_gb, gs_ge
+  END INTERFACE gs
+  INTERFACE restrict
+     MODULE PROCEDURE restrict_1d, restrict_2d, restrict_2d_csr
+  END INTERFACE restrict
+  INTERFACE prolong
+     MODULE PROCEDURE prolong_1d, prolong_2d, prolong_2d_csr
+  END INTERFACE prolong
+  INTERFACE printmat
+     MODULE PROCEDURE printmat_mat, printmat_ge, printmat_gb, printmat_periodic
+  END INTERFACE printmat
+  INTERFACE massmat
+     MODULE PROCEDURE massmat_ge, massmat_gb, massmat_periodic
+  END INTERFACE massmat
+  INTERFACE femat
+     MODULE PROCEDURE femat_2d_csr, femat_ge, femat_gb, femat_periodic
+  END INTERFACE femat
+  INTERFACE ibcmat
+     MODULE PROCEDURE ibcmat_1d, ibcmat_2d
+  END INTERFACE ibcmat
+  INTERFACE mod_transf
+     MODULE PROCEDURE mod_transf_full, mod_transf_csr
+  END INTERFACE mod_transf
+  INTERFACE normf
+     MODULE PROCEDURE normf_gb, normf_ge
+  END INTERFACE normf
+  INTERFACE residue
+     MODULE PROCEDURE residue_gen, residue_csr, residue_cds, residue_gb, residue_ge
+  END INTERFACE residue
+!
+CONTAINS
+!--------------------------------------------------------------------------------
+  SUBROUTINE create_grid_1d(n, nidbas, ng_in, alpha, grids, period)
+!
+!    Create an array of levels  grids
+!    Compute mass matrix and prolongation matrices.
+!
+    INTEGER, INTENT(in) :: n      ! Number of intervals in the finest grid
+    INTEGER, INTENT(in) :: nidbas ! Order of splines
+    INTEGER, INTENT(in) :: ng_in  ! Number of proposed Gauss points
+    INTEGER, INTENT(in) :: alpha  ! geometric exponent
+    TYPE(grid1d), INTENT(out)     :: grids(:)
+    LOGICAL, INTENT(in), OPTIONAL :: period
+!
+    LOGICAL :: nlper
+    INTEGER :: n_current, nrank, ngauss
+    INTEGER :: levels, l, i
+    DOUBLE PRECISION :: h_current
+    TYPE(gbmat) :: matm
+    TYPE(gemat) :: matmp
+!
+    levels = SIZE(grids)
+    nlper = .FALSE.
+    IF(PRESENT(period)) nlper = period
+!    
+    ngauss = CEILING(REAL(2*nidbas+alpha+1,8)/2.d0)
+    ngauss = MAX(ng_in, ngauss)
+    WRITE(*,'(a,i0)') 'ngauss = ', ngauss
+!
+!    Allocate some matrices
+!
+    DO l=1,levels
+       IF(nlper) THEN
+          ALLOCATE(grids(l)%matmp)
+          ALLOCATE(grids(l)%matap)
+       ELSE
+          ALLOCATE(grids(l)%matm)
+          ALLOCATE(grids(l)%mata)
+       END IF
+    END DO
+!
+    n_current = n
+    h_current = 1.0d0/REAL(n_current,8)
+    DO l=1,levels
+       IF(n_current .LT. 2 ) THEN
+          PRINT*, 'CREATE_GRID: number intervals too small!'
+          STOP
+       END IF
+       grids(l)%n = n_current
+       grids(l)%h = h_current
+       ALLOCATE(grids(l)%x(0:n_current))
+       grids(l)%x(0:n_current) = (/ (REAL(i,8)*h_current, i=0,n_current) /)
+       CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%spl, period=nlper)
+       CALL get_dim(grids(l)%spl, nrank)
+       IF(nlper) nrank = n_current
+       grids(l)%rank = nrank
+       ALLOCATE(grids(l)%v(nrank))
+       ALLOCATE(grids(l)%f(nrank))
+       IF(nlper) THEN
+          CALL massmat(grids(l)%spl, alpha, grids(l)%matmp)
+       ELSE
+          CALL massmat(grids(l)%spl, alpha, grids(l)%matm)
+       END IF
+       IF(l.GT.1) THEN 
+          CALL ctof_massmat(grids(l-1)%spl, grids(l)%spl, alpha, grids(l)%transf)
+          IF(nlper) THEN
+             CALL mcopy(grids(l-1)%matmp, matmp)
+             CALL factor(matmp)
+             CALL bsolve(matmp, grids(l)%transf%val)
+             CALL destroy(matmp)
+          ELSE
+             CALL mcopy(grids(l-1)%matm, matm)
+             CALL factor(matm)
+             CALL bsolve(matm, grids(l)%transf%val)
+             CALL destroy(matm)
+          END IF
+      END IF
+       n_current = n_current/2
+       h_current = 2.0d0*h_current
+    END DO
+  END SUBROUTINE create_grid_1d
+!--------------------------------------------------------------------------------
+  SUBROUTINE create_grid_2d(x, y, nidbas, ng_in, alpha, grids, mat_type, period, &
+       &                   debug_in)
+!
+!    Create an array of levels  grids
+!    Compute mass matrix and prolongation matrices.
+!
+    DOUBLE PRECISION, INTENT(in) :: x(0:), y(0:) ! Finest grid points
+    INTEGER, INTENT(in)          :: nidbas(2)    ! Order of splines
+    INTEGER, INTENT(in)          :: alpha(2)     ! geometric exponent
+    INTEGER, INTENT(in)          :: ng_in(2)     ! Number of proposed Gauss points
+    TYPE(grid2d), INTENT(out), TARGET :: grids(:)
+    CHARACTER(*), INTENT(in), OPTIONAL :: mat_type ! csr (default) or cds
+    LOGICAL, INTENT(in), OPTIONAL     :: period(2)
+    LOGICAL, INTENT(in), OPTIONAL     :: debug_in
+!
+    LOGICAL, DIMENSION(2) :: nlper
+    INTEGER, DIMENSION(2) :: n, sp_dim, ngauss
+    LOGICAL     :: nlcds
+    LOGICAL     :: debug
+    INTEGER     :: levels, l, rank2d
+    TYPE(gemat) :: matm
+!
+    DOUBLE PRECISION, PARAMETER :: pi = 4.0d0*ATAN(1.0d0)
+!
+!  Process input args
+!
+    n(1) = SIZE(x)-1
+    n(2) = SIZE(y)-1
+    levels = SIZE(grids)
+    nlper = .FALSE.
+    IF(PRESENT(period)) THEN 
+       nlper = period
+    END IF
+    IF(PRESENT(debug_in)) THEN
+       debug = debug_in
+    ELSE
+       debug = .FALSE.
+    END IF
+    IF(PRESENT(mat_type)) THEN  ! CSR matrix by default
+       nlcds = mat_type.EQ.'cds'
+    ELSE
+       nlcds = .FALSE.
+    END IF
+!
+!  WARNING: Assume that only 2nd dim can be periodic!!!
+    IF(nlper(1)) THEN
+       WRITE(*,'(A)') 'CREATE_GRID: First dimension could not be periodic!'
+       STOP
+    END IF
+!
+    ngauss = CEILING(REAL(2*nidbas+1,8)/2.d0)
+    ngauss = MAX(ng_in, ngauss)
+    WRITE(*,'(a,2i4)') 'ngauss = ', ngauss
+!
+    DO l=1,levels
+!
+!  Create mesh from finest grid mesh
+       IF(MINVAL(n) .LT. 2 ) THEN
+          PRINT*, 'CREATE_GRID: number intervals too small!'
+          STOP
+       END IF
+       grids(l)%n = n
+       ALLOCATE(grids(l)%x(0:n(1)))
+       ALLOCATE(grids(l)%y(0:n(2)))
+       IF(l.EQ.1) THEN
+          grids(1)%x = x
+          grids(1)%y = y
+       ELSE
+          grids(l)%x(:) = grids(l-1)%x(0::2)
+          grids(l)%y(:) = grids(l-1)%y(0::2)
+       END IF
+       IF(debug) THEN
+          WRITE(*,'(/a,i4,a,2l2)') 'l =', l, '  nlper =', nlper
+          WRITE(*,'(a/(10(1pe12.3)))') 'x', grids(l)%x
+          WRITE(*,'(a/(10(1pe12.3)))') 'y', grids(l)%y
+       END IF
+!
+!  Allocate mem for solution v and RHS f
+       CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%y, grids(l)%spl, period=nlper)
+       CALL get_dim(grids(l)%spl%sp1, sp_dim(1))
+       CALL get_dim(grids(l)%spl%sp2, sp_dim(2))
+       ALLOCATE(grids(l)%v(sp_dim(1), sp_dim(2)))
+       ALLOCATE(grids(l)%f(sp_dim(1), sp_dim(2)))
+!
+!  WARNING: Assume that only 2nd dim can be periodic!!!
+       grids(l)%rank = sp_dim
+       IF(nlper(2)) THEN
+          grids(l)%rank(2) = n(2)
+       END IF
+       IF(debug) THEN
+          WRITE(*,'(a,2i6)') 'Grid ranks', grids(l)%rank
+       END IF
+!
+!  Flatten version of sol and rhs
+       rank2d = PRODUCT(grids(l)%rank)
+       grids(l)%v1d(1:rank2d) => grids(l)%v
+       grids(l)%f1d(1:rank2d) => grids(l)%f
+!
+!  Matrix format for FE matrix
+       IF(nlcds) THEN
+          ALLOCATE(grids(l)%mata_cds)
+       ELSE
+          ALLOCATE(grids(l)%mata)
+       END IF
+!
+!  Coarse to fine mesh transfers for l>1
+       IF(l.GT.1) THEN
+          CALL ctof_massmat(grids(l-1)%spl%sp1, grids(l)%spl%sp1, alpha(1), grids(l)%transf(1))
+          CALL ctof_massmat(grids(l-1)%spl%sp2, grids(l)%spl%sp2, alpha(2), grids(l)%transf(2))
+!
+          CALL massmat(grids(l-1)%spl%sp1, alpha(1), matm)
+          CALL factor(matm)
+          CALL bsolve(matm, grids(l)%transf(1)%val)
+          CALL full_to_csr(grids(l)%transf(1)%val, grids(l)%matp(1))
+          CALL destroy(matm)
+          CALL destroy(grids(l)%transf(1))
+!
+          CALL massmat(grids(l-1)%spl%sp2, alpha(2), matm)
+          CALL factor(matm)
+          CALL bsolve(matm, grids(l)%transf(2)%val)
+          CALL full_to_csr(grids(l)%transf(2)%val, grids(l)%matp(2))
+          CALL destroy(matm)
+          CALL destroy(grids(l)%transf(2))
+       END IF
+!
+!   Next coarse grid
+       n = n/2
+    END DO
+  END SUBROUTINE create_grid_2d
+!--------------------------------------------------------------------------------
+  SUBROUTINE create_grid_fd(x, y, grids, info, mat_type, period, debug)
+!
+!    FD version of create_grid
+!
+    DOUBLE PRECISION, INTENT(in)       :: x(0:), y(0:)
+    TYPE(grid2d), INTENT(out), TARGET  :: grids(:)
+    TYPE(mg_info), INTENT(inout)       :: info      ! info for MG
+    CHARACTER(*), INTENT(in), OPTIONAL :: mat_type  ! csr (default) or cds
+    LOGICAL, INTENT(in), OPTIONAL      :: period(2)
+    LOGICAL, INTENT(in), OPTIONAL      :: debug
+!
+    INTEGER :: nidbas(2)=1, ngauss(2)=4    ! Linear Splines \equiv 1st FD
+    INTEGER :: alpha(2) = 1                ! Cartesian coordinate
+    LOGICAL, DIMENSION(2) :: nlper
+    LOGICAL :: nldebug
+    LOGICAL :: nlcds
+    INTEGER :: levels, n(2), sp_dim(2)
+    INTEGER :: l, rank2d    
+    TYPE(gemat)  :: matm
+!--------------------------------------------------------------------------------    
+!
+!  Process input args
+!
+    n(1) = SIZE(x)-1
+    n(2) = SIZE(y)-1
+    levels = SIZE(grids)
+    info%nlscale = .TRUE.   ! Restriction should be scaled for FD
+    nlper = .FALSE.
+    IF(PRESENT(period)) nlper = period
+    nldebug = .FALSE.
+    IF(PRESENT(debug))  nldebug = debug
+    IF(PRESENT(mat_type)) THEN  ! CSR matrix by default
+       nlcds = mat_type.EQ.'cds'
+    ELSE
+       nlcds = .FALSE.
+    END IF
+!
+    DO l=1,levels
+!
+!  Create mesh from finest grid mesh
+       IF(MINVAL(n) .LT. 2 ) THEN
+          PRINT*, 'CREATE_GRID: number intervals too small!'
+          STOP
+       END IF
+       grids(l)%n = n
+       ALLOCATE(grids(l)%x(0:n(1)))
+       ALLOCATE(grids(l)%y(0:n(2)))
+       IF(l.EQ.1) THEN
+          grids(1)%x = x
+          grids(1)%y = y
+       ELSE
+          grids(l)%x(:) = grids(l-1)%x(0::2)
+          grids(l)%y(:) = grids(l-1)%y(0::2)
+       END IF
+       IF(nldebug) THEN
+          WRITE(*,'(/a,i4,a,2l2)') 'l =', l, '  nlper =', nlper
+          WRITE(*,'(a/(10(1pe12.3)))') 'x', grids(l)%x
+          WRITE(*,'(a/(10(1pe12.3)))') 'y', grids(l)%y
+       END IF
+!
+!  Allocate mem for solution v and RHS f
+       CALL set_spline(nidbas, ngauss, grids(l)%x, grids(l)%y, grids(l)%spl, period=nlper)
+       CALL get_dim(grids(l)%spl%sp1, sp_dim(1))
+       CALL get_dim(grids(l)%spl%sp2, sp_dim(2))
+       ALLOCATE(grids(l)%v(0:sp_dim(1)-1, 0:sp_dim(2)-1))
+       ALLOCATE(grids(l)%f(0:sp_dim(1)-1, 0:sp_dim(2)-1))
+!
+!  WARNING: Assume that only 2nd dim can be periodic!!!
+       grids(l)%rank = sp_dim
+       IF(nlper(2)) THEN
+          grids(l)%rank(2) = n(2)
+       END IF
+       IF(nldebug) THEN
+          WRITE(*,'(a,2i6)') 'Grid ranks', grids(l)%rank
+       END IF
+!
+!  Flatten version of sol and rhs
+       rank2d = PRODUCT(grids(l)%rank)
+       grids(l)%v1d(1:rank2d) => grids(l)%v
+       grids(l)%f1d(1:rank2d) => grids(l)%f
+!
+!  Matrix format for FD matrix
+       IF(nlcds) THEN
+          ALLOCATE(grids(l)%mata_cds)
+       ELSE
+          ALLOCATE(grids(l)%mata)
+       END IF
+!
+!  Coarse to fine mesh transfers for l>1
+       IF(l.GT.1) THEN
+          CALL ctof_massmat(grids(l-1)%spl%sp1, grids(l)%spl%sp1, alpha(1), grids(l)%transf(1))
+          CALL ctof_massmat(grids(l-1)%spl%sp2, grids(l)%spl%sp2, alpha(2), grids(l)%transf(2))
+!
+          CALL massmat(grids(l-1)%spl%sp1, alpha(1), matm)
+          CALL factor(matm)
+          CALL bsolve(matm, grids(l)%transf(1)%val)
+          CALL full_to_csr(grids(l)%transf(1)%val, grids(l)%matp(1))
+          CALL destroy(matm)
+          CALL destroy(grids(l)%transf(1))
+!
+          CALL massmat(grids(l-1)%spl%sp2, alpha(2), matm)
+          CALL factor(matm)
+          CALL bsolve(matm, grids(l)%transf(2)%val)
+          CALL full_to_csr(grids(l)%transf(2)%val, grids(l)%matp(2))
+          CALL destroy(matm)
+          CALL destroy(grids(l)%transf(2))
+       END IF
+!
+!   Next coarse grid
+       n = n/2
+    END DO
+  END SUBROUTINE create_grid_fd
+!--------------------------------------------------------------------------------
+  RECURSIVE SUBROUTINE fmg(grids, info, l)
+!
+!   Execute a full multigrid V-cycle
+!
+    TYPE(grid1d), INTENT(inout) :: grids(:)
+    TYPE(mg_info), INTENT(in)    :: info
+    INTEGER, INTENT(in)          :: l
+    INTEGER :: levels, k
+    levels = info%levels
+!
+    IF(l.EQ.levels) THEN
+       CALL direct_solve(grids(levels), grids(levels)%v)
+    ELSE
+       grids(l+1)%f = restrict(grids(l+1)%transf,grids(l)%f)
+       CALL fmg(grids, info, l+1)       
+       grids(l)%v = prolong(grids(l+1)%transf,grids(l+1)%v)
+       DO k=1,info%nu0
+          CALL mg(grids, info, l)
+       END DO
+    END IF
+  END SUBROUTINE fmg
+!--------------------------------------------------------------------------------
+  RECURSIVE SUBROUTINE mg_1d(grids, info, l)
+!
+!   Execute a recursive V-cycle
+!
+    TYPE(grid1d), INTENT(inout)  :: grids(:)
+    TYPE(mg_info), INTENT(in)    :: info
+    INTEGER, INTENT(in)          :: l
+    INTEGER :: levels, k
+    LOGICAL :: nlper
+!
+    levels = info%levels
+    nlper = grids(1)%spl%period
+!
+    IF(l.EQ.levels) THEN
+       CALL direct_solve(grids(levels), grids(levels)%v)
+    ELSE
+       CALL relax(info%nu1)
+       IF(nlper) THEN
+          grids(l+1)%f = restrict(grids(l+1)%transf, &
+               &   grids(l)%f-vmx(grids(l)%matap, grids(l)%v))
+       ELSE
+          grids(l+1)%f = restrict(grids(l+1)%transf, &
+               &   grids(l)%f-vmx(grids(l)%mata, grids(l)%v))
+       END IF
+       grids(l+1)%v = 0.0d0
+!
+!   Only 1 call to the coarsest level
+       DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) 
+          CALL mg(grids, info, l+1)
+       END DO
+!
+       grids(l)%v = grids(l)%v + prolong(grids(l+1)%transf,grids(l+1)%v)
+       CALL relax(info%nu2)
+    END IF
+!
+  CONTAINS
+    SUBROUTINE relax(nu)
+      INTEGER, INTENT(in) :: nu
+      SELECT CASE (TRIM(info%relax))
+      CASE ("jac")
+         IF(nlper) THEN
+            CALL jacobi(grids(l)%matap, info%omega, nu, grids(l)%v, grids(l)%f)
+         ELSE
+            CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v, grids(l)%f)
+         END IF
+      CASE ("gs")
+         IF(nlper) THEN
+            CALL gs(grids(l)%matap, nu, grids(l)%v, grids(l)%f)
+         ELSE
+            CALL gs(grids(l)%mata, nu, grids(l)%v, grids(l)%f)
+         END IF
+      CASE default
+         PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!"
+         STOP
+      END SELECT
+    END SUBROUTINE relax
+  END SUBROUTINE mg_1d
+!--------------------------------------------------------------------------------
+  RECURSIVE SUBROUTINE mg_2d(grids, info, l)
+!
+!   Execute a recursive V-cycle
+!
+    TYPE(grid2d), INTENT(inout)  :: grids(:)
+    TYPE(mg_info), INTENT(in)    :: info
+    INTEGER, INTENT(in)          :: l
+!
+    DOUBLE PRECISION, ALLOCATABLE, TARGET :: resid(:,:)
+    DOUBLE PRECISION, POINTER             :: resid1d(:)
+    INTEGER :: levels, k, m1, m2
+!
+    levels = info%levels
+    m1 = SIZE(grids(l)%v,1)
+    m2 = SIZE(grids(l)%v,2)
+!
+    IF(l.EQ.levels) THEN
+       grids(levels)%v = grids(levels)%f
+       CALL direct_solve(grids(levels), grids(levels)%v1d)
+    ELSE
+       CALL relax(info%nu1)
+       ALLOCATE(resid(m1,m2)); resid1d(1:m1*m2) => resid
+       IF(ALLOCATED(grids(l)%mata)) THEN
+          resid1d = grids(l)%f1d - vmx(grids(l)%mata, grids(l)%v1d)
+       ELSE
+          resid1d = grids(l)%f1d - vmx(grids(l)%mata_cds, grids(l)%v1d)
+       END IF
+       grids(l+1)%f = restrict(grids(l+1)%matp, resid)
+       IF(info%nlscale) grids(l+1)%f = 0.25d0*grids(l+1)%f
+       DEALLOCATE(resid)
+       grids(l+1)%v = 0.0d0
+!
+!   Only 1 call to the coarsest level
+       DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) 
+          CALL mg(grids, info, l+1)
+       END DO
+!
+       grids(l)%v = grids(l)%v + prolong(grids(l+1)%matp,grids(l+1)%v)
+       CALL relax(info%nu2)
+    END IF
+!
+  CONTAINS
+    SUBROUTINE relax(nu)
+      INTEGER, INTENT(in) :: nu
+      SELECT CASE (TRIM(info%relax))
+      CASE ("jac")
+         IF(ALLOCATED(grids(l)%mata)) THEN
+            CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v1d, grids(l)%f1d)
+         ELSE
+            CALL jacobi(grids(l)%mata_cds, info%omega, nu, grids(l)%v1d, grids(l)%f1d)
+         END IF
+      CASE ("gs")
+         IF(ALLOCATED(grids(l)%mata)) THEN
+            CALL gs(grids(l)%mata, nu, grids(l)%v1d, grids(l)%f1d)
+         ELSE
+            CALL gs(grids(l)%mata_cds, nu, grids(l)%v1d, grids(l)%f1d)
+         END IF
+      CASE default
+         PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!"
+         STOP
+      END SELECT
+    END SUBROUTINE relax
+  END SUBROUTINE mg_2d
+!--------------------------------------------------------------------------------
+  RECURSIVE SUBROUTINE mg_cyl(grids, info, l, nluniq_in)
+!
+!   Execute a recursive V-cycle
+!
+    TYPE(grid2d), INTENT(inout)   :: grids(:)
+    TYPE(mg_info), INTENT(in)     :: info
+    INTEGER, INTENT(in)           :: l
+    LOGICAL, INTENT(in), OPTIONAL :: nluniq_in
+!
+    DOUBLE PRECISION, ALLOCATABLE, TARGET :: resid(:,:)
+    DOUBLE PRECISION, POINTER             :: resid1d(:)
+    INTEGER :: levels, k, m1, m2, r1, r2
+    LOGICAL :: nluniq
+!
+    levels = info%levels
+    m1 = SIZE(grids(l)%v,1)
+    m2 = SIZE(grids(l)%v,2)
+    r1 = grids(l)%rank(1)     ! r1 = m1
+    r2 = grids(l)%rank(2)     ! r2 = m2-1
+    IF(PRESENT(nluniq_in)) THEN
+       nluniq = nluniq_in
+    ELSE
+       nluniq = .TRUE.
+    END IF
+!
+    IF(l.EQ.levels) THEN
+       grids(levels)%v1d = grids(levels)%f1d
+       CALL direct_solve(grids(levels), grids(levels)%v1d, debug=.FALSE.)
+    ELSE
+       CALL relax(info%nu1)
+       ALLOCATE(resid(m1,m2)); resid1d(1:r1*r2) => resid
+       IF(ALLOCATED(grids(l)%mata)) THEN
+          resid1d(:) = grids(l)%f1d(:) - vmx(grids(l)%mata, grids(l)%v1d)
+       ELSE
+          resid1d(:) = grids(l)%f1d(:) - vmx(grids(l)%mata_cds, grids(l)%v1d)
+       END IF
+!
+       grids(l+1)%f(:,:) = restrict_cyl(grids(l+1), resid, nluniq)
+!
+       DEALLOCATE(resid)
+       grids(l+1)%v1d = 0.0d0
+!
+!   Only 1 call to the coarsest level
+       DO k=1,MERGE(info%mu, 1, (l+1).NE.levels) 
+          CALL mg_cyl(grids, info, l+1, nluniq)
+       END DO
+!
+       grids(l)%v(:,1:r2) = grids(l)%v(:,1:r2) + &
+            &               prolong_cyl(grids(l+1),grids(l+1)%v, nluniq)
+!
+       CALL relax(info%nu2)
+    END IF
+!
+  CONTAINS
+    SUBROUTINE relax(nu)
+      INTEGER, INTENT(in) :: nu
+      SELECT CASE (TRIM(info%relax))
+      CASE ("jac")
+         IF(ALLOCATED(grids(l)%mata)) THEN
+            CALL jacobi(grids(l)%mata, info%omega, nu, grids(l)%v1d, grids(l)%f1d)
+         ELSE
+            CALL jacobi(grids(l)%mata_cds, info%omega, nu, grids(l)%v1d, grids(l)%f1d)
+         END IF
+      CASE ("gs")
+         IF(ALLOCATED(grids(l)%mata)) THEN
+            CALL gs(grids(l)%mata, nu, grids(l)%v1d, grids(l)%f1d)
+         ELSE
+            CALL gs(grids(l)%mata_cds, nu, grids(l)%v1d, grids(l)%f1d)
+        END IF
+      CASE default
+         PRINT*, "relax ", info%relax, " NOT IMPLEMENTED!"
+         STOP
+      END SELECT
+    END SUBROUTINE relax
+  END SUBROUTINE mg_cyl
+!--------------------------------------------------------------------------------
+  FUNCTION prolong_1d(matp,vcoarse) RESULT(vfine)
+!
+!   Prolongation
+!
+    TYPE(gemat), INTENT(in) :: matp
+    DOUBLE PRECISION, INTENT(in) :: vcoarse(:)
+    DOUBLE PRECISION             :: vfine(matp%mrows)
+!
+    vfine = vmx(matp,vcoarse)
+  END FUNCTION prolong_1d
+!--------------------------------------------------------------------------------
+  FUNCTION restrict_1d(matp,vfine) RESULT(vcoarse)
+!
+!   Restriction
+!
+    TYPE(gemat), INTENT(in) :: matp
+    DOUBLE PRECISION, INTENT(in) :: vfine(:)
+    DOUBLE PRECISION             :: vcoarse(matp%ncols)
+!
+    vcoarse = vmx(matp,vfine,'T')
+  END FUNCTION restrict_1d
+!--------------------------------------------------------------------------------
+  FUNCTION prolong_2d(matp,vcoarse) RESULT(vfine)
+!
+!   Prolongation
+!
+    TYPE(gemat), INTENT(in) :: matp(2)
+    DOUBLE PRECISION, INTENT(in)  :: vcoarse(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: vfine(:,:)
+!
+    DOUBLE PRECISION, POINTER     :: pmat1(:,:), pmat2(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
+    DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
+    INTEGER :: m1, m1p, m2, m2p
+!
+    pmat1 => matp(1)%val
+    pmat2 => matp(2)%val
+    m1 = SIZE(pmat1,1); m1p = SIZE(pmat1,2)
+    m2 = SIZE(pmat2,1); m2p = SIZE(pmat2,2)
+    ALLOCATE(vfine(m1,m2))
+    ALLOCATE(temp(m1,m2p))
+!
+!    Compute (P1) * V
+!
+    CALL dgemm('N', 'N', m1, m2p, m1p, one, pmat1, m1, vcoarse, m1p, zero, &
+         &     temp, m1)
+!
+!    Compute (P1) * V * (P2)^T
+!
+    CALL dgemm('N', 'T', m1, m2, m2p, one, temp, m1, pmat2, m2, zero, &
+         &     vfine, m1)
+!
+    DEALLOCATE(temp)
+  END FUNCTION prolong_2d
+!--------------------------------------------------------------------------------
+  FUNCTION prolong_2d_csr(matp,vcoarse) RESULT(vfine)
+!
+!   Prolongation using CSR prolongation matrix
+!
+    TYPE(csr_mat), INTENT(in)     :: matp(2)
+    DOUBLE PRECISION, INTENT(in)  :: vcoarse(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: vfine(:,:)
+!
+    DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
+    INTEGER :: m1, m1p, m2, m2p
+    INTEGER :: i, j, k, kk
+!
+    m1 = matp(1)%mrows; m1p = matp(1)%ncols
+    m2 = matp(2)%mrows; m2p = matp(2)%ncols
+    ALLOCATE(vfine(m1,m2))
+    ALLOCATE(temp(m1p,m2))
+    temp = 0.0d0
+    vfine = 0.0d0
+!
+!    Compute temp = V * (P2)^T
+!     t_ij = sum_{k=1}^{m2p} V_ik (P2)_jk, i=1:m1p, j=1:m2
+!
+    DO j=1,m2
+       DO kk=matp(2)%irow(j),matp(2)%irow(j+1)-1
+          k = matp(2)%cols(kk)
+          temp(:,j) = temp(:,j) + vcoarse(:,k)*matp(2)%val(kk)
+       END DO
+    END DO
+!
+!    Compute (P1) * V * (P2)^T
+!     V_ij = sum_{k=1}^{m1p} (P1)_ik t_kj, i=1:m1, j=1:m2
+!
+    DO i=1,m1
+       DO kk=matp(1)%irow(i),matp(1)%irow(i+1)-1
+          k = matp(1)%cols(kk)
+          vfine(i,:) = vfine(i,:) + matp(1)%val(kk)*temp(k,:)
+       END DO
+    END DO
+!
+    DEALLOCATE(temp)
+  END FUNCTION prolong_2d_csr
+!--------------------------------------------------------------------------------
+  FUNCTION restrict_2d(matp,vfine) RESULT(vcoarse)
+!
+!   Restriction
+!
+    TYPE(gemat), INTENT(in) :: matp(2)
+    DOUBLE PRECISION, INTENT(in)  :: vfine(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: vcoarse(:,:)
+!
+    DOUBLE PRECISION, POINTER     :: pmat1(:,:), pmat2(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
+    DOUBLE PRECISION :: one=1.0d0, zero=0.0d0
+    INTEGER :: m1, m1p, m2, m2p
+!
+    pmat1 => matp(1)%val
+    pmat2 => matp(2)%val
+    m1 = SIZE(pmat1,1); m1p = SIZE(pmat1,2)
+    m2 = SIZE(pmat2,1); m2p = SIZE(pmat2,2)
+    ALLOCATE(vcoarse(m1p,m2p))
+    ALLOCATE(temp(m1p,m2))
+!
+!    Compute (P1)^T * V
+!
+    CALL dgemm('T', 'N', m1p, m2, m1, one, pmat1, m1, vfine, m1, zero, &
+         &     temp, m1p)
+!
+!    Compute (P1)^T * V * P2
+!
+    CALL dgemm('N', 'N', m1p, m2p, m2, one, temp, m1p, pmat2, m2, zero, &
+         &     vcoarse, m1p)
+!
+    DEALLOCATE(temp)
+  END FUNCTION restrict_2d
+!--------------------------------------------------------------------------------
+  FUNCTION restrict_2d_csr(matp,vfine) RESULT(vcoarse)
+!
+!   Restriction using CSR prolongation matrix
+!
+    TYPE(csr_mat), INTENT(in)     :: matp(2)
+    DOUBLE PRECISION, INTENT(in)  :: vfine(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: vcoarse(:,:)
+!
+    DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
+    INTEGER :: m1, m1p, m2, m2p
+    INTEGER :: i, ii, j, jj, k
+!
+    m1 = matp(1)%mrows; m1p = matp(1)%ncols
+    m2 = matp(2)%mrows; m2p = matp(2)%ncols
+    ALLOCATE(vcoarse(m1p,m2p))
+    ALLOCATE(temp(m1,m2p))
+    temp = 0.0d0
+    vcoarse = 0.0d0
+!
+!    Compute temp = V * (R2)^T = V * (P2)
+!     t_ij = sum_{k=1}^{m2} V_ik (P2)_kj, i=1:m1, j=1:m2p
+!
+    DO k=1,m2
+       DO jj=matp(2)%irow(k),matp(2)%irow(k+1)-1
+          j = matp(2)%cols(jj)
+          temp(:,j) = temp(:,j) + vfine(:,k)*matp(2)%val(jj)
+       END DO
+    END DO
+!
+!    Compute (R1) * V * (R2)^T = (P1)^T) * V * (P2)
+!     V_ij = sum_{k=1}^{m1p} (P1)_ki t_kj, i=1:m1p, j=1:m2p
+!
+    DO k=1,m1
+       DO ii=matp(1)%irow(k),matp(1)%irow(k+1)-1
+          i = matp(1)%cols(ii)
+          vcoarse(i,:) = vcoarse(i,:) + matp(1)%val(ii)*temp(k,:)
+       END DO
+    END DO
+!
+    DEALLOCATE(temp)
+  END FUNCTION restrict_2d_csr
+!--------------------------------------------------------------------------------
+  FUNCTION prolong_cyl(grid, vcoarse, nluniq) RESULT(vfine)
+!
+!   Prolongation (cylindrical case)
+!
+    TYPE(grid2d)                    :: grid
+    DOUBLE PRECISION, INTENT(inout) :: vcoarse(:,:)
+    DOUBLE PRECISION, ALLOCATABLE   :: vfine(:,:)
+    LOGICAL, INTENT(in)             :: nluniq
+!
+    DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
+    INTEGER :: m1, m1p, m2, m2p
+    INTEGER :: i, j, k, kk
+!
+    m1 = grid%matp(1)%mrows; m1p = grid%matp(1)%ncols
+    m2 = grid%matp(2)%mrows; m2p = grid%matp(2)%ncols
+    ALLOCATE(vfine(m1,m2))
+    ALLOCATE(temp(m1p,m2))
+    temp = 0.0d0
+    vfine = 0.0d0
+!
+    IF(nluniq) vcoarse(1,1:m2p-1) = vcoarse(1,m2p)
+!
+!    Compute temp = V * (P2)^T
+!     t_ij = sum_{k=1}^{m2p} V_ik (P2)_jk, i=1:m1p, j=1:m2
+!
+    DO j=1,m2
+       DO kk=grid%matp(2)%irow(j),grid%matp(2)%irow(j+1)-1
+          k = grid%matp(2)%cols(kk)
+          temp(:,j) = temp(:,j) + vcoarse(:,k)*grid%matp(2)%val(kk)
+       END DO
+    END DO
+!
+!    Compute (P1) * V * (P2)^T
+!     V_ij = sum_{k=1}^{m1p} (P1)_ik t_kj, i=1:m1, j=1:m2
+!
+    DO i=1,m1
+       DO kk=grid%matp(1)%irow(i),grid%matp(1)%irow(i+1)-1
+          k = grid%matp(1)%cols(kk)
+          vfine(i,:) = vfine(i,:) + grid%matp(1)%val(kk)*temp(k,:)
+       END DO
+    END DO
+!!$!
+!!$!    Compute (P1) * V
+!!$!
+!!$    CALL dgemm('N', 'N', m1, m2p, m1p, one, pmat1, m1, vcoarse, m1p, zero, &
+!!$         &     temp, m1)
+!!$!
+!!$!    Compute (P1) * V * (P2)^T
+!!$!
+!!$    CALL dgemm('N', 'T', m1, m2, m2p, one, temp, m1, pmat2, m2, zero, &
+!!$         &     vfine, m1)
+!
+    IF(nluniq) THEN
+       vcoarse(1,1:m2p-1) = vcoarse(1,1:m2p-1) - vcoarse(1,m2p)
+       vfine(1,1:m2-1) =  vfine(1,1:m2-1) - vfine(1,m2)
+    END IF
+!
+    DEALLOCATE(temp)
+  END FUNCTION prolong_cyl
+!--------------------------------------------------------------------------------
+  FUNCTION restrict_cyl(grid, vfine, nluniq) RESULT(vcoarse)
+!
+!   Restriction (cylindrical case)
+!
+    TYPE(grid2d)                    :: grid
+    DOUBLE PRECISION, INTENT(inout) :: vfine(:,:)
+    DOUBLE PRECISION, ALLOCATABLE   :: vcoarse(:,:)
+    LOGICAL, INTENT(in)             :: nluniq
+!
+    DOUBLE PRECISION, ALLOCATABLE :: temp(:,:)
+    INTEGER :: m1, m1p, m2, m2p
+    INTEGER :: i, ii, j, jj, k
+!
+    m1 = grid%matp(1)%mrows; m1p = grid%matp(1)%ncols
+    m2 = grid%matp(2)%mrows; m2p = grid%matp(2)%ncols
+    ALLOCATE(vcoarse(m1p,m2p))
+    ALLOCATE(temp(m1,m2p))
+    temp = 0.0d0
+    vcoarse = 0.0d0
+!
+    IF(nluniq) vfine(1,1:m2) = vfine(1,m2)/REAL(m2,8)
+!
+!    Compute temp = V * (R2)^T = V * (P2)
+!     t_ij = sum_{k=1}^{m2} V_ik (P2)_kj, i=1:m1, j=1:m2p
+!
+    DO k=1,m2
+       DO jj=grid%matp(2)%irow(k),grid%matp(2)%irow(k+1)-1
+          j = grid%matp(2)%cols(jj)
+          temp(:,j) = temp(:,j) + vfine(:,k)*grid%matp(2)%val(jj)
+       END DO
+    END DO
+!
+!    Compute (R1) * V * (R2)^T = (P1)^T) * V * (P2)
+!     V_ij = sum_{k=1}^{m1p} (P1)_ki t_kj, i=1:m1p, j=1:m2p
+!
+    DO k=1,m1
+       DO ii=grid%matp(1)%irow(k),grid%matp(1)%irow(k+1)-1
+          i = grid%matp(1)%cols(ii)
+          vcoarse(i,:) = vcoarse(i,:) + grid%matp(1)%val(ii)*temp(k,:)
+       END DO
+    END DO
+!
+!!$!    Compute (P1)^T * V
+!!$!
+!!$    CALL dgemm('T', 'N', m1p, m2, m1, one, pmat1, m1, vfine, m1, zero, &
+!!$         &     temp, m1p)
+!!$!
+!!$!    Compute (P1)^T * V * P2
+!!$!
+!!$    CALL dgemm('N', 'N', m1p, m2p, m2, one, temp, m1p, pmat2, m2, zero, &
+!!$         &     vcoarse, m1p)
+!
+    IF(nluniq) THEN
+       vfine(1,m2) = SUM(vfine(1,1:m2)); vfine(1,1:m2-1) = 0.0d0
+       vcoarse(1,m2p) =  SUM(vcoarse(1,1:m2p)); vcoarse(1,m2p-1) = 0.0d0
+    END IF
+!
+    DEALLOCATE(temp)
+!
+  END FUNCTION restrict_cyl
+!--------------------------------------------------------------------------------
+  SUBROUTINE massmat_ge(spl, alpha, matm)
+!
+!   Compute mass matrix
+!
+    TYPE(spline1d), INTENT(in)   :: spl
+    INTEGER, INTENT(in)          :: alpha
+    TYPE(gemat), INTENT(out)     :: matm
+!
+    INTEGER :: nrank, nx, nidbas, kl, ku
+!
+    CALL get_dim(spl, nrank, nx, nidbas)
+    kl=nidbas; ku=kl
+    IF(spl%period) nrank = nx
+    CALL init(nrank, 1, matm)
+    CALL conmat(spl, matm, coefeq)
+  CONTAINS
+    SUBROUTINE coefeq(x, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x
+      INTEGER, INTENT(out) :: idt(:), idw(:)
+      DOUBLE PRECISION, INTENT(out) :: c(:)
+      c(1) = x**alpha
+      idt(1) = 0
+      idw(1) = 0
+    END SUBROUTINE coefeq
+  END SUBROUTINE massmat_ge
+!--------------------------------------------------------------------------------
+  SUBROUTINE massmat_gb(spl, alpha, matm)
+!
+!   Compute mass matrix
+!
+    TYPE(spline1d), INTENT(in)   :: spl
+    INTEGER, INTENT(in)          :: alpha
+    TYPE(gbmat), INTENT(out)     :: matm
+!
+    INTEGER :: nrank, nx, nidbas, kl, ku
+!
+    CALL get_dim(spl, nrank, nx, nidbas)
+    kl=nidbas; ku=kl
+    CALL init(kl, ku, nrank, 1, matm)
+    CALL conmat(spl, matm, coefeq)
+  CONTAINS
+    SUBROUTINE coefeq(x, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x
+      INTEGER, INTENT(out) :: idt(:), idw(:)
+      DOUBLE PRECISION, INTENT(out) :: c(:)
+      c(1) = x**alpha
+      idt(1) = 0
+      idw(1) = 0
+    END SUBROUTINE coefeq
+  END SUBROUTINE massmat_gb
+!--------------------------------------------------------------------------------
+  SUBROUTINE massmat_periodic(spl, alpha, matm)
+!
+!   Compute mass matrix (periodic case)
+!
+    TYPE(spline1d), INTENT(in)      :: spl
+    INTEGER, INTENT(in)             :: alpha
+    TYPE(periodic_mat), INTENT(out) :: matm
+!
+    INTEGER :: dim, nrank, nx, nidbas, kl, ku
+!
+    CALL get_dim(spl, dim, nx, nidbas)
+    kl=nidbas; ku=kl
+    nrank = nx
+    CALL init(kl, ku, nrank, 1, matm)
+    CALL conmat(spl, matm, coefeq)
+  CONTAINS
+    SUBROUTINE coefeq(x, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x
+      INTEGER, INTENT(out) :: idt(:), idw(:)
+      DOUBLE PRECISION, INTENT(out) :: c(:)
+      c(1) = x**alpha
+      idt(1) = 0
+      idw(1) = 0
+    END SUBROUTINE coefeq
+  END SUBROUTINE massmat_periodic
+!--------------------------------------------------------------------------------
+  SUBROUTINE femat_2d_csr(spl, mat, coefeq, nterms, maxder_in, nat_order_in, &
+       &                  noinit)
+!
+!   Compute 2d fe CSR matrix
+!
+    TYPE(spline2d), INTENT(in)    :: spl
+    TYPE(csr_mat), INTENT(inout)  :: mat
+    INTEGER, INTENT(in)           :: nterms
+    INTEGER, INTENT(in), OPTIONAL :: maxder_in(2)
+    LOGICAL, INTENT(in), OPTIONAL :: nat_order_in
+    LOGICAL, INTENT(in), OPTIONAL :: noinit
+    INTERFACE
+       SUBROUTINE coefeq(x, y, idt, idw, c)
+         DOUBLE PRECISION, INTENT(in) :: x, y
+         INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+         DOUBLE PRECISION, INTENT(out) :: c(:)
+       END SUBROUTINE coefeq
+    END INTERFACE
+!
+    INTEGER :: nrank, ndim(2), nints(2), maxder(2)
+    LOGICAL :: nat_order, run_init
+!
+    CALL get_dim(spl, ndim, nints)
+    IF(spl%sp2%period) THEN
+       nrank = ndim(1)*nints(2)
+    ELSE
+       nrank = PRODUCT(ndim)
+    END IF
+!
+    maxder = 1;         IF(PRESENT(maxder_in)) maxder = maxder_in
+    nat_order = .TRUE.; IF(PRESENT(nat_order_in)) nat_order = nat_order_in
+!
+    run_init = .TRUE.
+    IF(PRESENT(noinit)) run_init = .NOT.noinit
+    IF(run_init)       CALL init(nrank, nterms, mat)
+!
+    CALL conmat(spl, mat, coefeq, maxder, nat_order)
+  END SUBROUTINE femat_2d_csr
+!--------------------------------------------------------------------------------
+  SUBROUTINE femat_ge(spl, mat, coefeq)
+!
+!   Compute fe matrix
+!
+    TYPE(spline1d), INTENT(in)   :: spl
+    TYPE(gemat), INTENT(out)     :: mat
+    INTERFACE
+       SUBROUTINE coefeq(x, idt, idw, c)
+         DOUBLE PRECISION, INTENT(in) :: x
+         INTEGER, INTENT(out) :: idt(:), idw(:)
+         DOUBLE PRECISION, INTENT(out) :: c(:)
+       END SUBROUTINE coefeq
+    END INTERFACE
+!
+    INTEGER :: nrank, nx, nidbas, kl, ku
+!
+    CALL get_dim(spl, nrank, nx, nidbas)
+    kl=nidbas; ku=kl
+    IF(spl%period) nrank = nx
+    CALL init(nrank, 2, mat)
+    CALL conmat(spl, mat, coefeq)
+  END SUBROUTINE femat_ge
+!--------------------------------------------------------------------------------
+  SUBROUTINE femat_gb(spl, mat, coefeq)
+!
+!   Compute fe matrix
+!
+    TYPE(spline1d), INTENT(in)   :: spl
+    TYPE(gbmat), INTENT(out)     :: mat
+    INTERFACE
+       SUBROUTINE coefeq(x, idt, idw, c)
+         DOUBLE PRECISION, INTENT(in) :: x
+         INTEGER, INTENT(out) :: idt(:), idw(:)
+         DOUBLE PRECISION, INTENT(out) :: c(:)
+       END SUBROUTINE coefeq
+    END INTERFACE
+!
+    INTEGER :: nrank, nx, nidbas, kl, ku
+!
+    CALL get_dim(spl, nrank, nx, nidbas)
+    kl=nidbas; ku=kl
+    CALL init(kl, ku, nrank, 2, mat)
+    CALL conmat(spl, mat, coefeq)
+  END SUBROUTINE femat_gb
+!--------------------------------------------------------------------------------
+  SUBROUTINE femat_periodic(spl, mat, coefeq)
+!
+!   Compute fe matrix
+!
+    TYPE(spline1d), INTENT(in)      :: spl
+    TYPE(periodic_mat), INTENT(out) :: mat
+    INTERFACE
+       SUBROUTINE coefeq(x, idt, idw, c)
+         DOUBLE PRECISION, INTENT(in) :: x
+         INTEGER, INTENT(out) :: idt(:), idw(:)
+         DOUBLE PRECISION, INTENT(out) :: c(:)
+       END SUBROUTINE coefeq
+    END INTERFACE
+!
+    INTEGER :: nrank, dim, nx, nidbas, kl, ku
+!
+    CALL get_dim(spl, dim, nx, nidbas)
+    kl=nidbas; ku=kl
+    nrank = nx
+    CALL init(kl, ku, nrank, 2, mat)
+    CALL conmat(spl, mat, coefeq)
+  END SUBROUTINE femat_periodic
+!--------------------------------------------------------------------------------
+  SUBROUTINE ibcmat_1d(irow, mat)
+!
+!   Set BC at row irow to 0
+!
+    INTEGER, INTENT(in)        :: irow
+    TYPE(gbmat), INTENT(inout) :: mat
+!
+    DOUBLE PRECISION :: a(mat%rank)
+!
+    a(:)=0.0d0; a(irow)=1.0d0
+    CALL putrow(mat, irow, a)
+    CALL putcol(mat, irow, a)
+  END SUBROUTINE ibcmat_1d
+!--------------------------------------------------------------------------------
+ SUBROUTINE ibcmat_2d(grid, mat, nluniq_in)
+!
+!   Impose BC on matrix (asume natural ordering)
+!      I = (j-1)*N1 + i, i=1:N1, j=1:N2
+!
+    TYPE(grid2d), INTENT(in)      :: grid
+    TYPE(csr_mat), INTENT(inout)  :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nluniq_in
+!
+    DOUBLE PRECISION :: temp(mat%rank)
+    INTEGER :: n1e, n2e, nrank, i, j, irow, jcol
+    LOGICAL :: nlper1, nlper2, nlcart, nluniq
+!
+    n1e = grid%rank(1)
+    n2e = grid%rank(2)
+    nrank = mat%rank
+    nlper1 = grid%spl%sp1%period
+    nlper2 = grid%spl%sp2%period
+    nlcart = (.NOT.nlper1) .AND. (.NOT.nlper2)
+    IF(PRESENT(nluniq_in)) THEN
+       nluniq = nluniq_in
+    ELSE
+       nluniq = .TRUE.
+    END IF
+!
+! BC at x=0   ! Dirichlet for Cartesian, unicity for cylindrical problem
+    IF(nlcart) THEN
+       i=1
+       DO j=1,n2e
+          irow = (j-1)*n1e + i
+          temp = 0.0d0; temp(irow) = 1.0d0
+          CALL putrow(mat, irow, temp)
+          CALL putcol(mat, irow, temp)
+       END DO
+    ELSE
+       i=1
+       IF(nluniq) THEN
+          CALL unicity
+       END IF
+    END IF
+!
+! BC at x=1  ! For both Cartesian and cylindrical
+    i=n1e
+    DO j=1,n2e
+       irow = (j-1)*n1e + i
+       temp = 0.0d0; temp(irow) = 1.0d0
+       CALL putrow(mat, irow, temp)
+       CALL putcol(mat, irow, temp)
+    END DO
+!
+! BC at y=0   ! Only for Cartesian problem
+    IF(nlcart) THEN
+       j=1
+       DO i=1,n1e
+          irow = (j-1)*n1e + i
+          temp = 0.0d0; temp(irow) = 1.0d0
+          CALL putrow(mat, irow, temp)
+          CALL putcol(mat, irow, temp)
+       END DO
+    END IF
+!
+! BC at y=1   ! Only for Cartesian problem
+    IF(nlcart) THEN
+       j=n2e
+       DO i=1,n1e
+          irow = (j-1)*n1e + i
+          temp = 0.0d0; temp(irow) = 1.0d0
+          CALL putrow(mat, irow, temp)
+          CALL putcol(mat, irow, temp)
+       END DO
+    END IF
+!
+  CONTAINS
+    SUBROUTINE unicity
+      INTEGER          :: irow0, jcol0
+      DOUBLE PRECISION :: temp_sum(mat%rank) 
+!
+      irow0 = (n2e-1)*n1e + i
+      jcol0 = irow0
+!
+!   Vertical sum
+      temp_sum(:) = 0.0d0
+      DO j=1,n2e
+         irow = (j-1)*n1e + i
+         temp = 0.0d0
+         CALL getrow(mat, irow, temp)
+         temp_sum(:) = temp_sum(:) + temp(:)
+      END DO
+      CALL putrow(mat, irow0, temp_sum)
+!
+!  Horizontal sum
+      temp_sum(:) = 0.0d0
+      DO j=1,n2e
+         jcol = (j-1)*n1e + i
+         temp = 0.0d0
+         CALL getcol(mat, jcol, temp)
+         temp_sum(:) = temp_sum(:) + temp(:)
+      END DO
+      CALL putcol(mat, jcol0, temp_sum)
+!
+!  The away operator
+      DO j=1,n2e-1
+         irow = (j-1)*n1e + i
+         temp = 0.0d0; temp(irow) = 1.0d0
+         CALL putrow(mat, irow, temp)
+         CALL putcol(mat, irow, temp)
+      END DO
+    END SUBROUTINE unicity
+  END SUBROUTINE ibcmat_2d
+!--------------------------------------------------------------------------------
+  SUBROUTINE mod_transf_full(mat,k)
+!
+!    Modify grid transfer matrix.
+!
+    DOUBLE PRECISION, INTENT(inout) :: mat(:,:)
+    INTEGER, INTENT(in) :: k
+    INTEGER :: m, n
+!
+    m=SIZE(mat,1)
+    n=SIZE(mat,2)
+!
+! Clear matrix small elements.
+    WHERE( ABS(mat) < 1.d-8) mat=0.0d0
+!
+! Left boundary
+    IF(k.EQ.1 .OR. k.EQ.3) THEN
+       mat(2:m,1) = 0.0d0
+    END IF
+!
+! Right boundary
+    IF(k.EQ.2 .OR. k.EQ.3) THEN
+       mat(1:m-1,n) = 0.0d0
+    END IF
+  END SUBROUTINE mod_transf_full
+!--------------------------------------------------------------------------------
+  SUBROUTINE mod_transf_csr(mat,k)
+!
+!    Modify grid transfer matrix.
+!
+    TYPE(csr_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in)          :: k
+!
+    DOUBLE PRECISION :: acol(mat%mrows)
+    INTEGER :: m, n
+!
+    m=mat%mrows
+    n=mat%ncols
+!
+! Left boundary
+    acol = 0.0d0
+    IF(k.EQ.1 .OR. k.EQ.3) THEN
+       CALL getele(mat, 1, 1, acol(1))
+       CALL putcol(mat, 1, acol)
+    END IF
+!
+! Right boundary
+    acol = 0.0d0
+    IF(k.EQ.2 .OR. k.EQ.3) THEN
+       CALL getele(mat, m, n, acol(m))
+       CALL putcol(mat, n, acol)
+    END IF
+  END SUBROUTINE mod_transf_csr
+!--------------------------------------------------------------------------------
+  SUBROUTINE calc_pmat(grid1, grid2, pmat, debug_in)
+!
+!   Compute prolongation matrix by collocation
+!
+    TYPE(grid1d), INTENT(in)      :: grid1, grid2
+    TYPE(gemat), INTENT(out)      :: pmat
+    LOGICAL, OPTIONAL, INTENT(in) :: debug_in
+!
+    TYPE(gemat) :: mat_interp
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:), fun2(:,:)
+    DOUBLE PRECISION :: xinter
+    INTEGER, ALLOCATABLE :: jcol(:)
+    INTEGER :: nidbas, nfine, ncoarse, nderv
+    LOGICAL :: nlper, debug
+    INTEGER :: i, i0, ii, k, irow
+!================================================================================
+!                       0. Prologue
+!
+    debug = .FALSE.
+    IF(PRESENT(debug_in)) debug = debug_in
+!
+    nfine = grid1%n
+    ncoarse = grid2%n
+    nidbas = grid1%spl%order - 1
+    nlper = grid1%spl%period
+!
+    IF(nlper) THEN
+       IF(ncoarse .LT. nidbas+1) THEN
+          WRITE(*,'(/a)') '** NX/2 should be larger or equal to NIDBAS+1 **'
+          STOP
+       END IF
+   END IF
+!
+    IF(debug) THEN
+       WRITE(*,'(/2(a,i0))') 'nfine = ', nfine, ', ncoarse = ', ncoarse
+       IF(nlper) WRITE(*,'(a)') 'Grids are periodic!'
+    END IF
+!
+    ALLOCATE(jcol(0:nidbas))
+    ALLOCATE(fun(0:nidbas,1))
+    IF(nlper) THEN
+       CALL init(ncoarse, 1, pmat, mrows=nfine)
+       CALL init(nfine, 1, mat_interp)
+    ELSE
+       CALL init(ncoarse+nidbas, 1, pmat, mrows=nfine+nidbas)
+       CALL init(nfine+nidbas, 1, mat_interp)
+    END IF
+!================================================================================
+!                       1. Interpolation matrix
+!
+    irow = 0
+    i0 = 1
+!
+!    Left bound
+    IF(.NOT.nlper) THEN 
+       IF(MODULO(nidbas,2).EQ.1) THEN
+          nderv = (nidbas-1)/2   ! ndidbas = 1, 3, 5, ...
+          ALLOCATE(fun2(0:nidbas,nderv+1))
+          CALL basfun(grid1%x(0), grid1%spl, fun2, 1)
+          jcol = 1 + (/ (i, i=0,nidbas) /)
+          DO k=1,nderv+1
+             irow = irow+1
+             mat_interp%val(irow,jcol) = fun2(0:nidbas,k)
+          END DO
+          i0 = 2  ! Skip the first grid point
+       ELSE
+          nderv = nidbas/2-1   ! ndidbas = 2, 4, ...
+          ALLOCATE(fun2(0:nidbas,nderv+1))
+          CALL basfun(grid1%x(0), grid1%spl, fun2, 1)
+          jcol = 1 + (/ (i, i=0,nidbas) /)
+          DO k=1,nderv+1
+             irow = irow+1
+             mat_interp%val(irow,jcol) = fun2(0:nidbas,k)
+          END DO
+       END IF
+    END IF
+    DO i=i0,nfine
+       IF(MODULO(nidbas,2).EQ.0) THEN
+          xinter = (grid1%x(i-1)+grid1%x(i))/2.0d0  ! Left bound of interval
+       ELSE
+          xinter = grid1%x(i-1)                     ! Left bound of interval
+       END IF
+       CALL basfun(xinter, grid1%spl, fun, i)
+       irow = irow+1
+       DO k=0,nidbas
+          jcol(k) = i+k
+       END DO
+       IF(nlper) jcol = MODULO(jcol-1,nfine)+1
+       mat_interp%val(irow,jcol) = fun(0:nidbas,1)
+    END DO
+!
+!   Right bound
+    IF(.NOT.nlper) THEN
+       CALL basfun(grid1%x(nfine), grid1%spl, fun2, nfine)
+       jcol = nfine + (/ (i, i=0,nidbas) /)
+       DO k=nderv+1,1,-1
+          irow = irow+1
+          mat_interp%val(irow,jcol) = fun2(0:nidbas,k)
+       END DO
+    END IF
+    IF(debug) CALL printmat('** Interpolation matrix **', mat_interp)
+!================================================================================
+!                       2. RHS matrix
+!
+    irow = 0
+    i0 = 1
+    DO i=1,ncoarse
+       ii = 2*i-1
+       CALL comp_rhs(ii)
+       CALL comp_rhs(ii+1)
+    END DO
+    IF(debug) CALL printmat('** RHS matrix **', pmat)
+!================================================================================
+!                       3. Compute prolongation matrix
+!
+    CALL factor(mat_interp)
+    CALL bsolve(mat_interp, pmat%val)
+!================================================================================
+!                       9. Epilogue
+!
+    CALL destroy(mat_interp)
+    DEALLOCATE(jcol)
+    DEALLOCATE(fun)
+    IF(ALLOCATED(fun2)) DEALLOCATE(fun2)
+!
+  CONTAINS
+    SUBROUTINE comp_rhs(ii)
+      INTEGER, INTENT(in) :: ii
+      INTEGER :: k
+!
+!    Left  bounds for non-periodic cases
+      IF(.NOT.nlper .AND. ii.EQ.1) THEN
+         CALL basfun(grid1%x(0), grid2%spl, fun2, 1)
+         jcol = 1 + (/ (k, k=0,nidbas) /)
+         DO k=1,nderv+1
+            irow = irow+1
+            pmat%val(irow,jcol) = fun2(0:nidbas,k)
+         END DO
+         IF(MODULO(nidbas,2).EQ.1) RETURN   ! Skip
+      END IF
+!
+      IF(MODULO(nidbas,2).EQ.0) THEN
+         xinter = (grid1%x(ii-1)+grid1%x(ii))/2.0d0 ! Left bound of interval
+      ELSE
+         xinter = grid1%x(ii-1)                     ! Left bound of interval
+      END IF
+      CALL basfun(xinter, grid2%spl, fun, i)
+      irow = irow+1
+      DO k=0,nidbas
+         jcol(k) = i+k
+      END DO
+      IF(nlper) jcol = MODULO(jcol-1,ncoarse)+1
+      pmat%val(irow,jcol) = fun(0:nidbas,1)
+!
+!    Right  bounds for non-periodic cases
+      IF(.NOT.nlper .AND. ii.EQ.nfine) THEN
+         CALL basfun(grid1%x(nfine), grid2%spl, fun2, ncoarse)
+         jcol = ncoarse + (/ (k, k=0,nidbas) /)
+         DO k=nderv+1,1,-1
+            irow = irow+1
+            pmat%val(irow,jcol) = fun2(0:nidbas,k)
+         END DO
+      END IF
+    END SUBROUTINE comp_rhs
+  END SUBROUTINE calc_pmat
+!--------------------------------------------------------------------------------
+  SUBROUTINE disrhs_1d(spl, farr, frhs)
+!
+!  Projection of RHS on spline basis functions
+!
+    TYPE(spline1d)                :: spl
+    DOUBLE PRECISION, INTENT(out) :: farr(:)
+    INTERFACE
+       DOUBLE PRECISION FUNCTION frhs(x)
+         DOUBLE PRECISION, INTENT(in) :: x
+       END FUNCTION frhs
+    END INTERFACE
+    DOUBLE PRECISION :: contrib
+!
+    DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: fun(:,:)
+    INTEGER :: ndim, n, nidbas, ng
+    INTEGER :: i, ig, it, irow
+    LOGICAL :: nlper
+!
+    CALL get_dim(spl, ndim, n, nidbas)
+    nlper = spl%period
+    xg => spl%gausx ! xg(ng,n)
+    wg => spl%gausw ! wg(ng,n)
+    ng = SIZE(xg,1)
+    ALLOCATE(fun(0:nidbas,1))
+!
+    farr = 0.0d0
+    DO i=1,n
+       DO ig=1,ng
+          CALL basfun(xg(ig,i), spl, fun, i)
+          contrib = wg(ig,i)*frhs(xg(ig,i))
+          DO it=0,nidbas
+             irow = i+it
+             IF(nlper) irow = MODULO(irow-1,n) +1
+             farr(irow) = farr(irow)+contrib*fun(it,1)
+          END DO
+       END DO
+    END DO
+!
+    DEALLOCATE(fun)
+  END SUBROUTINE disrhs_1d
+!--------------------------------------------------------------------------------
+  SUBROUTINE disrhs_2d(spl, farr, frhs)
+!
+!  Projection of RHS on spline basis functions
+!
+    TYPE(spline2d)                :: spl
+    DOUBLE PRECISION, INTENT(out) :: farr(:,:)
+    INTERFACE
+       DOUBLE PRECISION FUNCTION frhs(x,y)
+         DOUBLE PRECISION, INTENT(in) :: x,y
+       END FUNCTION frhs
+    END INTERFACE
+    DOUBLE PRECISION :: contrib
+!
+    DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:)
+    DOUBLE PRECISION, POINTER :: xg2(:,:), wg2(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: fun2(:,:)
+    INTEGER :: ndim1, n1, nidbas1, ng1
+    INTEGER :: ndim2, n2, nidbas2, ng2
+    INTEGER :: i1, ig1, it1, irow
+    INTEGER :: i2, ig2, it2, jcol
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+!
+    xg1 => spl%sp1%gausx ! xg(ng,n)
+    wg1 => spl%sp1%gausw ! wg(ng,n)
+    ng1 = SIZE(xg1,1)
+    xg2 => spl%sp2%gausx ! xg(ng,n)
+    wg2 => spl%sp2%gausw ! wg(ng,n)
+    ng2 = SIZE(xg2,1)
+!
+    ALLOCATE(fun1(0:nidbas1,1))
+    ALLOCATE(fun2(0:nidbas2,1))
+!
+    farr = 0.0d0
+    DO i1=1,n1
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1,i1), spl%sp1, fun1, i1)
+          DO i2=1,n2
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2,i2), spl%sp2, fun2, i2)
+                contrib = wg1(ig1,i1)*wg2(ig2,i2)* &
+                     & frhs(xg1(ig1,i1), xg2(ig2,i2))
+                DO it1=0,nidbas1
+                   irow = i1+it1
+                   DO it2=0,nidbas2
+                      jcol = i2+it2
+                      farr(irow,jcol) = farr(irow,jcol) + &
+                           &   contrib*fun1(it1,1)*fun2(it2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!
+!  Case of periodic BC (only in 2nd dimension!)
+!
+    IF(spl%sp2%period) THEN
+       DO jcol=1,nidbas2
+          farr(:,jcol) = farr(:,jcol)+farr(:,jcol+n2)
+          farr(:,jcol+n2) = 0.0d0
+       END DO
+    END IF
+    DEALLOCATE(fun1)
+    DEALLOCATE(fun2)
+  END SUBROUTINE disrhs_2d
+!--------------------------------------------------------------------------------
+  SUBROUTINE ibcrhs(grid, f, nluniq_in)
+!
+! Impose BC on RHS
+!
+    TYPE(grid2d)                    :: grid
+    DOUBLE PRECISION, INTENT(inout) :: f(:,:)
+    LOGICAL, INTENT(in), OPTIONAL   :: nluniq_in
+!
+    DOUBLE PRECISION :: temp
+    INTEGER :: n1, n2
+    LOGICAL :: nlper1, nlper2, nlcyl, nluniq
+!
+    n1 = grid%rank(1)
+    n2 = grid%rank(2)
+    nlper1 = grid%spl%sp1%period
+    nlper2 = grid%spl%sp2%period
+    nlcyl = (.NOT.nlper1) .AND. (nlper2)
+    IF(PRESENT(nluniq_in)) THEN 
+       nluniq = nluniq_in
+    ELSE
+       nluniq=.TRUE.
+    END IF
+!
+!  Cylindrical case, unicity at the axis, 0 at the right side
+    IF(nlcyl) THEN  ! 
+       IF(nluniq) THEN
+          temp = SUM(f(1,1:n2))
+          f(1,n2) = temp
+          f(1,1:n2-1) = 0.0d0
+       END IF
+       f(n1,1:n2) = 0.0d0
+!
+!  Cartesian case: 0 on all 4 boundaries
+    ELSE
+       f(1,:) = 0.0d0; f(n1,:) = 0.0d0
+       f(:,1) = 0.0d0; f(:,n2) = 0.0d0
+    END IF
+  END SUBROUTINE ibcrhs
+!--------------------------------------------------------------------------------
+  FUNCTION disc_err_1d(spl, f, fexact) RESULT(disc_err)
+!
+!  L2 norm of discretization error
+!
+    TYPE(spline1d)               :: spl
+    DOUBLE PRECISION, INTENT(in) :: f(:)
+    DOUBLE PRECISION             :: disc_err
+    INTERFACE
+       FUNCTION fexact(x)
+         DOUBLE PRECISION, INTENT(in) :: x(:)
+         DOUBLE PRECISION :: fexact(SIZE(x))
+       END FUNCTION fexact
+    END INTERFACE
+!
+    DOUBLE PRECISION, ALLOCATABLE :: err(:,:)
+    DOUBLE PRECISION, POINTER :: xg(:,:), wg(:,:)
+    INTEGER :: ndim, n, nidbas, ng
+    INTEGER :: ig
+!
+    CALL get_dim(spl, ndim, n, nidbas)
+    xg => spl%gausx ! xg(ng,n)
+    wg => spl%gausw ! wg(ng,n)
+    ng = SIZE(xg,1)
+!
+    ALLOCATE(err(ng,n))
+    CALL gridval(spl, xg(1,:), err(1,:), 0, f)
+    err(1,:) = (err(1,:) - fexact(xg(1,:)))**2
+    DO ig=2,ng
+       CALL gridval(spl, xg(ig,:), err(ig,:), 0)
+       err(ig,:) = (err(ig,:) - fexact(xg(ig,:)))**2
+    END DO
+!
+    disc_err = SQRT(SUM(wg*err))
+!
+    DEALLOCATE(err)
+  END FUNCTION disc_err_1d
+!--------------------------------------------------------------------------------
+  FUNCTION disc_err_2d(spl, f, fexact) RESULT(disc_err)
+!
+!  L2 norm of discretization error
+!
+    TYPE(spline2d)               :: spl
+    DOUBLE PRECISION, INTENT(in) :: f(:,:)
+    DOUBLE PRECISION             :: disc_err
+    INTERFACE
+       FUNCTION fexact(x,y)
+         DOUBLE PRECISION, INTENT(in) :: x(:), y(:)
+         DOUBLE PRECISION :: fexact(SIZE(x),SIZE(y))
+       END FUNCTION fexact
+    END INTERFACE
+!
+    DOUBLE PRECISION, ALLOCATABLE :: err(:,:)
+    DOUBLE PRECISION, POINTER :: xg(:,:), wg1(:,:), yg(:,:), wg2(:,:)
+    INTEGER, DIMENSION(2) :: ndim, n, ng
+    INTEGER :: i, j, ig, jg
+    LOGICAL :: nlper1, nlper2, nlcyl
+!
+    CALL get_dim(spl, ndim, n)
+    xg => spl%sp1%gausx ! xg(ng,n)
+    wg1 => spl%sp1%gausw ! wg(ng,n)
+    ng(1) = SIZE(xg,1)
+    yg => spl%sp2%gausx ! xg(ng,n)
+    wg2 => spl%sp2%gausw ! wg(ng,n)
+    ng(2) = SIZE(yg,1)
+!
+    nlper1 = spl%sp1%period
+    nlper2 = spl%sp2%period
+    nlcyl = (.NOT.nlper1) .AND. (nlper2)
+!
+    disc_err = 0.0d0
+    ALLOCATE(err(n(1),n(2)))
+    DO ig=1,ng(1)
+       DO jg=1,ng(2)
+          IF(ig.EQ.1.AND.jg.EQ.1) THEN
+             CALL gridval(spl, xg(ig,:), yg(jg,:), err, [0,0], f)
+          ELSE
+             CALL gridval(spl, xg(ig,:), yg(jg,:), err, [0,0])
+          END IF
+          err = (err - fexact(xg(ig,:), yg(jg,:)))**2
+          DO i=1,n(1)
+             DO j=1,n(2)
+                IF(nlcyl) THEN
+                   disc_err = disc_err + xg(ig,i)*wg1(ig,i)*wg2(jg,j)*err(i,j)
+                ELSE
+                   disc_err = disc_err + wg1(ig,i)*wg2(jg,j)*err(i,j)
+                END IF
+             END DO
+          END DO
+       END DO
+    END DO
+    disc_err = SQRT(disc_err)
+!
+    DEALLOCATE(err)
+  END FUNCTION disc_err_2d
+!--------------------------------------------------------------------------------
+  SUBROUTINE back_transf(grid, u, nluniq_in)
+!
+!   Back transform solution and use  periodicity (cylindrical problem)
+!
+    TYPE(grid2d), INTENT(in)        :: grid
+    DOUBLE PRECISION, INTENT(inout) :: u(:,:)
+    LOGICAL, INTENT(in), OPTIONAL   :: nluniq_in
+!
+    LOGICAL :: nluniq
+    INTEGER :: n, nidbas, j
+!
+    n = grid%n(2)
+    nidbas = grid%spl%sp2%order-1
+    IF(PRESENT(nluniq_in)) THEN
+       nluniq = nluniq_in
+    ELSE
+       nluniq = .TRUE.
+    END IF
+!
+!  Back transform
+    IF(nluniq) THEN
+       u(1,1:n-1) = u(1,n)
+    END IF
+!
+!  Periodicity
+    DO j=1,nidbas
+       u(:,j+n) = u(:,j)
+    END DO
+  END SUBROUTINE back_transf
+!--------------------------------------------------------------------------------
+  DOUBLE PRECISION FUNCTION normf_gb(matm, f)
+!
+!    L2 norm of f represented by its expansion coefficients.
+!
+    TYPE(gbmat), INTENT(in)      :: matm
+    DOUBLE PRECISION, INTENT(in) :: f(:)
+    normf_gb = SQRT(DOT_PRODUCT(f, vmx(matm,f)))
+  END FUNCTION normf_gb
+!--------------------------------------------------------------------------------
+  DOUBLE PRECISION FUNCTION normf_ge(matm, f)
+!
+!    L2 norm of f represented by its expansion coefficients.
+!
+    TYPE(gemat), INTENT(in)      :: matm
+    DOUBLE PRECISION, INTENT(in) :: f(:)
+    normf_ge = SQRT(DOT_PRODUCT(f, vmx(matm,f)))
+  END FUNCTION normf_ge
+!--------------------------------------------------------------------------------
+  DOUBLE PRECISION FUNCTION residue_gen(grid, f, u, p)
+!
+!   Generic version of residue
+!
+    TYPE(grid2d) :: grid
+    DOUBLE PRECISION, INTENT(in) :: f(:), u(:)
+    DOUBLE PRECISION :: r(SIZE(f))
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: p
+!
+    IF(ALLOCATED(grid%mata)) THEN
+       residue_gen = residue_csr(grid%mata, f, u, p)
+    ELSE
+       residue_gen = residue_cds(grid%mata_cds, f, u, p)
+    END IF
+  END FUNCTION residue_gen
+!--------------------------------------------------------------------------------
+  DOUBLE PRECISION FUNCTION residue_csr(mat, f, u, p)
+!
+!    L2 norm of residue ||f-Av||
+!
+    TYPE(csr_mat), INTENT(in)    :: mat
+    DOUBLE PRECISION, INTENT(in) :: f(:), u(:)
+    DOUBLE PRECISION :: r(SIZE(f))
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: p
+!
+    CHARACTER(len=4) :: norm_type
+    norm_type = '2'
+    IF(PRESENT(p)) norm_type = p
+!
+    r = f-vmx(mat,u)
+    SELECT CASE (norm_type)
+    CASE('1') 
+       residue_csr = SUM(ABS(r))
+    CASE ('2')
+       residue_csr = SQRT(DOT_PRODUCT(r,r))
+    CASE ('inf')
+       residue_csr = MAXVAL(ABS(r))
+    END SELECT
+  END FUNCTION residue_csr
+!--------------------------------------------------------------------------------
+  DOUBLE PRECISION FUNCTION residue_cds(mat, f, u, p)
+!
+!    L2 norm of residue ||f-Av||
+!
+    TYPE(cds_mat), INTENT(in)    :: mat
+    DOUBLE PRECISION, INTENT(in) :: f(:), u(:)
+    DOUBLE PRECISION :: r(SIZE(f))
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: p
+!
+    CHARACTER(len=4) :: norm_type
+    norm_type = '2'
+    IF(PRESENT(p)) norm_type = p
+!
+    r = f-vmx(mat,u)
+    SELECT CASE (norm_type)
+    CASE('1') 
+       residue_cds = SUM(ABS(r))
+    CASE ('2')
+       residue_cds = SQRT(DOT_PRODUCT(r,r))
+    CASE ('inf')
+       residue_cds = MAXVAL(ABS(r))
+    END SELECT
+  END FUNCTION residue_cds
+!--------------------------------------------------------------------------------
+  DOUBLE PRECISION FUNCTION residue_ge(mat, f, u)
+!
+!    L2 norm of residue ||f-Av||
+!
+    TYPE(gemat), INTENT(in)      :: mat
+    DOUBLE PRECISION, INTENT(in) :: f(:), u(:)
+    DOUBLE PRECISION :: r(SIZE(f))
+    r = f-vmx(mat,u)
+    residue_ge = SQRT(DOT_PRODUCT(r,r))
+  END FUNCTION residue_ge
+!--------------------------------------------------------------------------------
+  DOUBLE PRECISION FUNCTION residue_gb(mat, f, u)
+!
+!    L2 norm of residue ||f-Av||
+!
+    TYPE(gbmat), INTENT(in)      :: mat
+    DOUBLE PRECISION, INTENT(in) :: f(:), u(:)
+    DOUBLE PRECISION :: r(SIZE(f))
+    r = f-vmx(mat,u)
+    residue_gb = SQRT(DOT_PRODUCT(r,r))
+  END FUNCTION residue_gb
+!--------------------------------------------------------------------------------
+  SUBROUTINE ctof_massmat(splf, splc, alpha, matm)
+!
+!   Compute coarse to fine mass matrix M(h,2h)
+!
+    TYPE(spline1d), INTENT(in)   :: splf ! Spline on fine mesh
+    TYPE(spline1d), INTENT(in)   :: splc ! Spline on coarse mesh
+    INTEGER, INTENT(in)          :: alpha
+    TYPE(gemat), INTENT(out)     :: matm
+!
+    LOGICAL :: nlper
+    INTEGER :: nf, nxf, nc, nxc, nidbas, kl, ku
+    INTEGER :: ig, ngauss
+    INTEGER :: i, ic, it, jw, irow, jcol
+!
+    DOUBLE PRECISION              :: contrib
+    DOUBLE PRECISION, ALLOCATABLE :: xg(:), wg(:)
+    DOUBLE PRECISION, ALLOCATABLE :: funf(:,:), func(:,:)
+!
+    nlper = splf%period .OR. splc%period
+    CALL get_dim(splf, nf, nxf, nidbas)
+    CALL get_gauss(splf, ngauss)
+    CALL get_dim(splc, nc, nxc, nidbas)
+    kl=nidbas; ku=kl
+    IF(nlper) THEN
+       nf = nxf
+       nc = nxc
+    END IF
+    CALL init(nc, 1, matm, mrows=nf)  ! Defne nf x nc matrix
+!
+    ALLOCATE(xg(ngauss),wg(ngauss))
+    ALLOCATE(funf(0:nidbas,1))
+    ALLOCATE(func(0:nidbas,1))
+    DO i=1,nxf
+       ic = (i-1)/2+1
+       CALL get_gauss(splf, ngauss, i, xg, wg)
+       DO ig=1,ngauss
+          CALL basfun(xg(ig), splf, funf, i)
+          CALL basfun(xg(ig), splc, func, ic)
+          DO it=0,nidbas
+             DO jw=0,nidbas
+                contrib = wg(ig)*funf(it,1)*func(jw,1)*xg(ig)**alpha
+                irow = i+it;  IF(nlper) irow=MODULO(irow-1,nxf)+1
+                jcol = ic+jw; IF(nlper) jcol=MODULO(jcol-1,nxc)+1
+                CALL updtmat(matm, irow, jcol, contrib)
+             END DO
+          END DO
+       END DO
+    END DO
+!
+    DEALLOCATE(xg,wg)
+    DEALLOCATE(funf,func)
+  END SUBROUTINE ctof_massmat
+!--------------------------------------------------------------------------------
+  SUBROUTINE direct_solve_1d(grid, v)
+!
+!   1D direct solver
+!
+    TYPE(grid1d), INTENT(inout)   :: grid
+    DOUBLE PRECISION, INTENT(out) :: v(:)
+    LOGICAL :: nlper
+!
+    nlper = grid%spl%period
+    IF(nlper) THEN
+       IF(.NOT.ALLOCATED(grid%matap_copy)) THEN 
+          ALLOCATE(grid%matap_copy)
+          CALL mcopy(grid%matap, grid%matap_copy)
+          CALL factor(grid%matap_copy)
+       END IF
+       CALL bsolve(grid%matap_copy, grid%f, v)
+    ELSE
+       IF(.NOT.ALLOCATED(grid%mata_copy)) THEN 
+          ALLOCATE(grid%mata_copy)
+          CALL mcopy(grid%mata, grid%mata_copy)
+          CALL factor(grid%mata_copy)
+       END IF
+       CALL bsolve(grid%mata_copy, grid%f, v)
+    END IF
+  END SUBROUTINE direct_solve_1d
+!--------------------------------------------------------------------------------
+  SUBROUTINE direct_solve_2d(grid, v, debug)
+!
+!   2D direct solver
+!
+    TYPE(grid2d), INTENT(inout)     :: grid
+    DOUBLE PRECISION, INTENT(inout) :: v(:)
+    LOGICAL, INTENT(in), OPTIONAL   :: debug
+    LOGICAL :: dbg
+!
+    dbg = .FALSE.
+    IF(PRESENT(debug)) dbg=debug
+!
+    IF(ALLOCATED(grid%mata)) THEN
+       IF(.NOT.ALLOCATED(grid%mata%mumps)) THEN
+          ALLOCATE(grid%mata%mumps)
+          CALL csr2mumps(grid%mata, grid%mata%mumps)
+          CALL factor(grid%mata%mumps, debug=dbg)
+       END IF
+       CALL bsolve(grid%mata%mumps, v, debug=dbg)
+    ELSE
+       IF(.NOT.ALLOCATED(grid%mata_cds%mumps)) THEN
+          ALLOCATE(grid%mata_cds%mumps)
+          CALL cds2mumps(grid%mata_cds, grid%mata_cds%mumps)
+          CALL factor(grid%mata_cds%mumps, debug=dbg)
+       END IF
+       CALL bsolve(grid%mata_cds%mumps, v, debug=dbg)
+    END IF
+!
+!  Only cylindrical case
+    IF(.NOT.grid%spl%sp1%period .AND. grid%spl%sp2%period) THEN
+    END IF
+  END SUBROUTINE direct_solve_2d
+!--------------------------------------------------------------------------------
+  SUBROUTINE jacobi_gb(mat, omega, nu, v, f)
+!
+!   Weighted Jacobi relaxation
+!
+    TYPE(gbmat),INTENT(in) :: mat
+    DOUBLE PRECISION, INTENT(in)    :: omega
+    INTEGER, INTENT(in)             :: nu
+    DOUBLE PRECISION, INTENT(inout) :: v(:)
+    DOUBLE PRECISION, INTENT(in)    :: f(:)
+!
+    DOUBLE PRECISION :: temp(SIZE(v))
+    DOUBLE PRECISION :: inv_diag(SIZE(v))
+    INTEGER          :: k, kl, ku, n, i, j, jmin, jmax
+!
+    kl = mat%kl
+    ku = mat%ku
+    n = mat%rank
+!
+    inv_diag(:) = omega/mat%val(kl+ku+1,:)
+    DO k=1,nu
+       DO i=1,n
+          jmin = MAX(1,i-kl)
+          jmax = MIN(n, i+ku)
+          temp(i) = f(i)
+          DO j=jmin,i-1
+             temp(i) = temp(i) - mat%val(kl+ku+i-j+1,j)*v(j)
+          END DO
+          DO j=i+1,jmax
+             temp(i) = temp(i) - mat%val(kl+ku+i-j+1,j)*v(j)
+          END DO
+          temp(i) = temp(i)*inv_diag(i)
+       END DO
+       v(:) = (1.d0-omega)*v(:) + temp(:)   
+    END DO
+  END SUBROUTINE jacobi_gb
+!--------------------------------------------------------------------------------
+  SUBROUTINE jacobi_ge(mat, omega, nu, v, f)
+!
+!   Weighted Jacobi relaxation
+!
+    TYPE(gemat),INTENT(in) :: mat
+    DOUBLE PRECISION, INTENT(in)    :: omega
+    INTEGER, INTENT(in)             :: nu
+    DOUBLE PRECISION, INTENT(inout) :: v(:)
+    DOUBLE PRECISION, INTENT(in)    :: f(:)
+!
+    DOUBLE PRECISION :: temp(SIZE(v))
+    DOUBLE PRECISION :: inv_diag(SIZE(v))
+    INTEGER          :: k, n, i, j
+!
+    n = mat%rank
+!
+    DO i=1,n
+       inv_diag(i) = omega/mat%val(i,i)
+    END DO
+!
+    DO k=1,nu
+       DO i=1,n
+          temp(i) = f(i)
+          DO j=1,i-1
+             temp(i) = temp(i) - mat%val(i,j)*v(j)
+          END DO
+          DO j=i+1,n
+             temp(i) = temp(i) - mat%val(i-j+1,j)*v(j)
+          END DO
+          temp(i) = temp(i)*inv_diag(i)
+       END DO
+       v(:) = (1.d0-omega)*v(:) + temp(:)   
+    END DO
+  END SUBROUTINE jacobi_ge
+!--------------------------------------------------------------------------------
+  SUBROUTINE jacobi_csr(mat, omega, nu, v, f)
+!
+!   Weighted Jacobi relaxation
+!
+    TYPE(csr_mat),INTENT(in)        :: mat
+    DOUBLE PRECISION, INTENT(in)    :: omega
+    INTEGER, INTENT(in)             :: nu
+    DOUBLE PRECISION, INTENT(inout) :: v(:)
+    DOUBLE PRECISION, INTENT(in)    :: f(:)
+!
+    DOUBLE PRECISION :: temp(SIZE(v))
+    DOUBLE PRECISION :: inv_diag(SIZE(v))
+    INTEGER          :: k, n, i, j, jcol
+!
+    n = mat%rank
+!
+    inv_diag(:) = omega/mat%val(mat%idiag)
+    DO k=1,nu
+       temp(:) = f(:)
+       DO i=1,n
+          DO j = mat%irow(i), mat%irow(i+1)-1
+             jcol = mat%cols(j)
+             IF(jcol.NE.i) THEN   ! The diagonal
+                temp(i) = temp(i) - mat%val(j)*v(jcol)
+             END IF
+          END DO
+       END DO
+       temp(:) = temp(:)*inv_diag(:)
+       v(:)    = (1.d0-omega)*v(:) + temp(:)   
+    END DO
+  END SUBROUTINE jacobi_csr
+!--------------------------------------------------------------------------------
+  SUBROUTINE jacobi_cds(mat, omega, nu, v, f)
+!
+!   Weighted Jacobi relaxation
+!
+    TYPE(cds_mat),INTENT(in)        :: mat
+    DOUBLE PRECISION, INTENT(in)    :: omega
+    INTEGER, INTENT(in)             :: nu
+    DOUBLE PRECISION, INTENT(inout) :: v(:)
+    DOUBLE PRECISION, INTENT(in)    :: f(:)
+!
+    DOUBLE PRECISION :: temp(SIZE(v))
+    DOUBLE PRECISION :: inv_diag(SIZE(v))
+    INTEGER          :: k, n, i, id, d
+!
+    n = mat%rank
+!
+    inv_diag(:) = omega/mat%val(:,mat%dists(0))
+    DO k=1,nu
+       temp(:) = f(:)
+       DO id=-mat%kl,mat%ku  ! f - (L+U)*v
+          IF(id.EQ.0) CYCLE
+          d = mat%dists(id)
+          DO i=MAX(1,1-d), MIN(n,mat%rank-d)
+             temp(i) = temp(i) - mat%val(i,id)*v(i+d)
+          END DO
+       END DO
+       temp(:) = temp(:)*inv_diag(:)
+       v(:)    = (1.d0-omega)*v(:) + temp(:)   
+    END DO
+  END SUBROUTINE jacobi_cds
+!--------------------------------------------------------------------------------
+  SUBROUTINE gs_gb(mat, nu, v, f)
+!
+!   Gauss-Seidel relaxation
+!
+    TYPE(gbmat),INTENT(in) :: mat
+    INTEGER, INTENT(in)    :: nu
+    DOUBLE PRECISION, INTENT(inout) :: v(:)
+    DOUBLE PRECISION, INTENT(in)    :: f(:)
+!
+    INTEGER          :: k, kl, ku, n, i, j, jmin, jmax
+    DOUBLE PRECISION :: inv_diag(SIZE(v))
+!
+    kl = mat%kl
+    ku = mat%ku
+    n = mat%rank
+!
+    inv_diag(:) = 1.d0/mat%val(kl+ku+1,:)
+    DO k=1,nu
+       DO i=1,n
+          jmin = MAX(1,i-kl)
+          jmax = MIN(n, i+ku)
+          v(i) = f(i)
+          DO j=jmin,i-1
+             v(i) = v(i) - mat%val(kl+ku+i-j+1,j)*v(j)
+          END DO
+          DO j=i+1,jmax
+             v(i) = v(i) - mat%val(kl+ku+i-j+1,j)*v(j)
+          END DO
+          v(i) = inv_diag(i)*v(i)
+       END DO
+    END DO
+  END SUBROUTINE gs_gb
+!--------------------------------------------------------------------------------
+  SUBROUTINE gs_ge(mat, nu, v, f)
+!
+!   Gauss-Seidel relaxation
+!
+    TYPE(gemat),INTENT(in) :: mat
+    INTEGER, INTENT(in)    :: nu
+    DOUBLE PRECISION, INTENT(inout) :: v(:)
+    DOUBLE PRECISION, INTENT(in)    :: f(:)
+!
+    INTEGER          :: k, n, i, j
+    DOUBLE PRECISION :: inv_diag(SIZE(v))
+!
+    n = mat%rank
+!
+    DO i=1,n
+       inv_diag(i) = 1.d0/mat%val(i,i)
+    END DO
+    DO k=1,nu
+       DO i=1,n
+          v(i) = f(i)
+          DO j=1,i-1
+             v(i) = v(i) - mat%val(i,j)*v(j)
+          END DO
+          DO j=i+1,n
+             v(i) = v(i) - mat%val(i,j)*v(j)
+          END DO
+          v(i) = inv_diag(i)*v(i)
+       END DO
+    END DO
+  END SUBROUTINE gs_ge
+!--------------------------------------------------------------------------------
+  SUBROUTINE gs_csr(mat, nu, v, f)
+!
+!   Gauss-Seidel relaxation
+!
+    TYPE(csr_mat),INTENT(in)        :: mat
+    INTEGER, INTENT(in)             :: nu
+    DOUBLE PRECISION, INTENT(inout) :: v(:)
+    DOUBLE PRECISION, INTENT(in)    :: f(:)
+!
+    DOUBLE PRECISION :: inv_diag(SIZE(v))
+    INTEGER          :: k, n, i, j, jcol
+!
+    n = mat%rank
+!
+    inv_diag(:) = 1.0d0/mat%val(mat%idiag)
+    DO k=1,nu
+       DO i=1,n
+          v(i) = f(i)
+          DO j = mat%irow(i), mat%irow(i+1)-1
+             jcol = mat%cols(j)
+             IF(jcol.NE.i) THEN   ! The diagonal
+                v(i) = v(i) - mat%val(j)*v(jcol)
+             END IF
+          END DO
+          v(i) = v(i)*inv_diag(i)
+       END DO
+    END DO
+  END SUBROUTINE gs_csr
+!--------------------------------------------------------------------------------
+  SUBROUTINE gs_cds(mat, nu, v, f)
+!
+!   Gauss-Seidel relaxation
+!
+    TYPE(cds_mat),INTENT(in)        :: mat
+    INTEGER, INTENT(in)             :: nu
+    DOUBLE PRECISION, INTENT(inout) :: v(:)
+    DOUBLE PRECISION, INTENT(in)    :: f(:)
+!
+    DOUBLE PRECISION :: temp(SIZE(v))
+    DOUBLE PRECISION :: inv_diag(SIZE(v))
+    INTEGER          :: k, n, i, id, d
+!
+    n = mat%rank
+!
+    inv_diag(:) = 1.0d0/mat%val(:,mat%dists(0))
+    DO k=1,nu
+!
+       temp(:) = f(:)
+       DO id=1,mat%ku  ! t <- f - U*v
+          d = mat%dists(id)
+          DO i=MAX(1,1-d), MIN(n,n-d)
+             temp(i) = temp(i) - mat%val(i,id)*v(i+d)
+          END DO
+       END DO
+!
+       DO i=1,n        ! Solve (L+D)v=t
+          v(i) = temp(i)
+          DO id=-1,-mat%kl,-1
+             d = mat%dists(id)
+             IF(i+d.LT.1) EXIT
+             v(i) = v(i) - mat%val(i,id)*v(i+d)
+          END DO
+          v(i) = v(i)*inv_diag(i)
+       END DO
+    END DO
+  END SUBROUTINE gs_cds
+!--------------------------------------------------------------------------------
+  SUBROUTINE printmat_mat(str, val)
+!
+    CHARACTER(len=*), INTENT(in) :: str
+    DOUBLE PRECISION, INTENT(in)  :: val(:,:)
+    INTEGER :: mrows, ncols,i
+    mrows=SIZE(val,1)
+    ncols=SIZE(val,2)
+    WRITE(*,'(/a)') TRIM(str)
+    WRITE(*,'(2(a,i6))') 'M =', mrows, ', N =', ncols
+    DO i=1,mrows
+       WRITE(*,'(12(1pe12.3))') val(i,:)
+    END DO
+    WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', SUM(val,2)
+    WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', SUM(val,1)
+  END SUBROUTINE printmat_mat
+!--------------------------------------------------------------------------------
+  SUBROUTINE printmat_ge(str, mat)
+!
+    CHARACTER(len=*), INTENT(in) :: str
+    TYPE(gemat), INTENT(in)      :: mat
+    INTEGER :: i
+    DOUBLE PRECISION :: arow(mat%ncols)
+    DOUBLE PRECISION :: sum_cols(mat%ncols), sum_rows(mat%mrows)
+    sum_cols = 0.0d0
+    arow = 0.0d0
+    WRITE(*,'(/a)') TRIM(str)
+    WRITE(*,'(2(a,i6))') 'M =', mat%mrows, ', N =', mat%ncols
+    DO i=1,mat%mrows
+       CALL getrow(mat,i,arow)
+       sum_rows(i) = SUM(arow)
+       sum_cols(:) = sum_cols(:) + arow(:)
+       WRITE(*,'(12(1pe12.3))') arow
+    END DO
+    WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', sum_rows
+    WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', sum_cols
+  END SUBROUTINE printmat_ge
+!--------------------------------------------------------------------------------
+  SUBROUTINE printmat_gb(str, mat)
+!
+    CHARACTER(len=*), INTENT(in) :: str
+    TYPE(gbmat), INTENT(in)      :: mat
+    INTEGER :: i
+    DOUBLE PRECISION :: arow(mat%ncols)
+    DOUBLE PRECISION :: sum_cols(mat%ncols), sum_rows(mat%mrows)
+    sum_cols = 0.0d0
+    WRITE(*,'(/a)') TRIM(str)
+    WRITE(*,'(2(a,i6))') 'M =', mat%mrows, ', N =', mat%ncols
+    DO i=1,mat%mrows
+       CALL getrow(mat,i,arow)
+       sum_rows(i) = SUM(arow)
+       sum_cols(:) = sum_cols(:) + arow(:)
+       WRITE(*,'(8(1pe12.3))') arow
+    END DO
+    WRITE(*,'(a/(12(1pe12.3)))') 'Sum or rows', sum_rows
+    WRITE(*,'(a/(12(1pe12.3)))') 'Sum or cols', sum_cols
+  END SUBROUTINE printmat_gb
+!--------------------------------------------------------------------------------
+  SUBROUTINE printmat_periodic(str, mat)
+!
+    CHARACTER(len=*), INTENT(in) :: str
+    TYPE(periodic_mat), INTENT(in)      :: mat
+    INTEGER :: i
+    DOUBLE PRECISION :: arow(mat%mat%ncols)
+    DOUBLE PRECISION :: sum_cols(mat%mat%ncols), sum_rows(mat%mat%mrows)
+    sum_cols = 0.0d0
+    WRITE(*,'(/a)') TRIM(str)
+    WRITE(*,'(2(a,i6))') 'M =', mat%mat%mrows, ', N =', mat%mat%ncols
+    DO i=1,mat%mat%mrows
+       CALL getrow(mat,i,arow)
+       sum_rows(i) = SUM(arow)
+       sum_cols(:) = sum_cols(:) + arow(:)
+       WRITE(*,'(8(1pe12.3))') arow
+    END DO
+    WRITE(*,'(a/(8(1pe12.3)))') 'Sum or rows', sum_rows
+    WRITE(*,'(a/(8(1pe12.3)))') 'Sum or cols', sum_cols
+  END SUBROUTINE printmat_periodic
+!--------------------------------------------------------------------------------
+  SUBROUTINE printdiag_gb(str, mat)
+!
+    CHARACTER(len=*), INTENT(in) :: str
+    TYPE(gbmat), INTENT(in)      :: mat
+    INTEGER :: ku, kl
+    kl = mat%kl
+    ku = mat%ku
+    WRITE(*,'(a/(8(1pe12.3)))') str, mat%val(kl+ku+1,:)
+  END SUBROUTINE printdiag_gb
+!--------------------------------------------------------------------------------
+  INTEGER FUNCTION get_lmax(n)
+    INTEGER, INTENT(in) :: n
+    INTEGER :: l, ncur
+    l=1
+    ncur = n
+    DO
+       IF(ncur.EQ.2 .OR. MODULO(ncur,2).NE.0) EXIT  ! Minimum N is 2 or odd.
+       l=l+1
+       ncur = ncur/2
+    END DO
+    get_lmax = l
+  END FUNCTION get_lmax
+!--------------------------------------------------------------------------------
+  SUBROUTINE ibc_transf(grids, dir, k)
+!
+!   Impose BC on transfer matrix
+!
+    TYPE(grid2d), INTENT(inout) :: grids(:)
+    INTEGER, INTENT(in)         :: dir
+    INTEGER, INTENT(in)         :: k
+!
+    INTEGER :: levels, l
+    levels = SIZE(grids)
+    DO l=2,levels
+     CALL mod_transf(grids(l)%matp(dir),k)
+    END DO
+  END SUBROUTINE ibc_transf
+!--------------------------------------------------------------------------------
+END MODULE multigrid
diff --git a/src/mumps_mod.f90 b/src/mumps_mod.f90
new file mode 100644
index 0000000..fe23546
--- /dev/null
+++ b/src/mumps_mod.f90
@@ -0,0 +1,1728 @@
+!>
+!> @file mumps_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE mumps_bsplines
+!
+!    MUMPS_BSPLINES: Simple interface to the sparse direct solver MUMPS
+!
+!    T.M. Tran, CRPP-EPFL
+!    June 2011
+!
+  USE sparse
+  IMPLICIT NONE
+  INCLUDE 'dmumps_struc.h'
+  INCLUDE 'zmumps_struc.h'
+!
+  TYPE mumps_mat
+     INTEGER :: rank, nnz
+     INTEGER :: nterms, kmat
+     INTEGER :: istart, iend
+     INTEGER :: nnz_start, nnz_end, nnz_loc
+     LOGICAL :: nlsym
+     LOGICAL :: nlpos
+     LOGICAL :: nlforce_zero
+     TYPE(spmat), POINTER      :: mat => NULL()
+     INTEGER, POINTER          :: cols(:) => NULL()
+     INTEGER, POINTER          :: irow(:) => NULL()
+     INTEGER, POINTER          :: perm(:) => NULL()
+     DOUBLE PRECISION, POINTER :: val(:) => NULL()
+     TYPE(dmumps_struc)        :: mumps_par
+  END TYPE mumps_mat
+!
+  TYPE zmumps_mat
+     INTEGER :: rank, nnz
+     INTEGER :: nterms, kmat
+     INTEGER :: istart, iend
+     INTEGER :: nnz_start, nnz_end, nnz_loc
+     LOGICAL :: nlsym
+     LOGICAL :: nlherm
+     LOGICAL :: nlpos
+     LOGICAL :: nlforce_zero
+     TYPE(zspmat), POINTER   :: mat => NULL()
+     INTEGER, POINTER        :: cols(:) => NULL()
+     INTEGER, POINTER        :: irow(:) => NULL()
+     INTEGER, POINTER        :: perm(:) => NULL()
+     DOUBLE COMPLEX, POINTER :: val(:) => NULL()
+     TYPE(zmumps_struc)      :: mumps_par
+  END TYPE zmumps_mat
+!
+  INTERFACE init
+     MODULE PROCEDURE init_mumps_mat, init_zmumps_mat
+  END INTERFACE init
+!
+  INTERFACE clear_mat
+     MODULE PROCEDURE clear_mumps_mat, clear_zmumps_mat
+  END INTERFACE clear_mat
+!
+  INTERFACE updtmat
+     MODULE PROCEDURE updt_mumps_mat, updt_zmumps_mat
+  END INTERFACE updtmat
+!
+  INTERFACE putele
+     MODULE PROCEDURE putele_mumps_mat, putele_zmumps_mat
+  END INTERFACE putele
+!
+  INTERFACE getele
+     MODULE PROCEDURE getele_mumps_mat, getele_zmumps_mat
+  END INTERFACE getele
+!
+  INTERFACE putrow
+     MODULE PROCEDURE putrow_mumps_mat, putrow_zmumps_mat
+  END INTERFACE putrow
+!
+  INTERFACE getrow
+     MODULE PROCEDURE getrow_mumps_mat, getrow_zmumps_mat
+  END INTERFACE getrow
+!
+  INTERFACE putcol
+     MODULE PROCEDURE putcol_mumps_mat, putcol_zmumps_mat
+  END INTERFACE putcol
+!
+  INTERFACE getcol
+     MODULE PROCEDURE getcol_mumps_mat, getcol_zmumps_mat
+  END INTERFACE getcol
+!
+  INTERFACE get_count
+     MODULE PROCEDURE get_count_mumps_mat, get_count_zmumps_mat
+  END INTERFACE get_count
+!
+  INTERFACE to_mat
+     MODULE PROCEDURE to_mumps_mat, to_zmumps_mat
+  END INTERFACE to_mat
+!
+  INTERFACE reord_mat
+     MODULE PROCEDURE reord_mumps_mat, reord_zmumps_mat
+  END INTERFACE reord_mat
+!
+  INTERFACE numfact
+     MODULE PROCEDURE numfact_mumps_mat, numfact_zmumps_mat
+  END INTERFACE numfact
+!
+  INTERFACE factor
+     MODULE PROCEDURE factor_mumps_mat, factor_zmumps_mat
+  END INTERFACE factor
+!
+  INTERFACE bsolve
+     MODULE PROCEDURE bsolve_mumps_mat1,  bsolve_mumps_matn, &
+          &           bsolve_zmumps_mat1, bsolve_zmumps_matn
+  END INTERFACE bsolve
+!
+  INTERFACE vmx
+     MODULE PROCEDURE vmx_mumps_mat,  vmx_mumps_matn, &
+          &           vmx_zmumps_mat, vmx_zmumps_matn
+  END INTERFACE vmx
+!
+  INTERFACE destroy
+     MODULE PROCEDURE destroy_mumps_mat, destroy_zmumps_mat
+ END INTERFACE destroy
+!
+ INTERFACE putmat
+    MODULE PROCEDURE put_mumps_mat, put_zmumps_mat
+ END INTERFACE putmat
+!
+ INTERFACE getmat
+    MODULE PROCEDURE get_mumps_mat, get_zmumps_mat
+ END INTERFACE getmat
+!
+ INTERFACE mcopy
+    MODULE PROCEDURE mcopy_mumps_mat, mcopy_zmumps_mat
+ END INTERFACE mcopy
+!
+ INTERFACE maddto
+    MODULE PROCEDURE maddto_mumps_mat, maddto_zmumps_mat
+ END INTERFACE maddto
+!
+ INTERFACE psum_mat
+    MODULE PROCEDURE psum_mumps_mat, psum_zmumps_mat
+ END INTERFACE psum_mat
+!
+ INTERFACE p2p_mat
+    MODULE PROCEDURE p2p_mumps_mat, p2p_zmumps_mat
+ END INTERFACE p2p_mat
+!
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_mumps_mat(n, nterms, mat, kmat, nlsym, nlpos, &
+       &                      nlforce_zero, comm_in)
+!
+!   Initialize an empty sparse mumps matrix
+!
+    USE pputils2
+    INCLUDE 'mpif.h'
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE(mumps_mat)               :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlpos
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+    INTEGER, OPTIONAL, INTENT(in) :: comm_in
+!
+    INTEGER :: comm, nloc
+!
+    comm = MPI_COMM_SELF    ! Default is serial!
+    IF(PRESENT(comm_in)) comm = comm_in
+!
+    mat%rank = n
+    mat%nterms = nterms
+    mat%nnz = 0
+    mat%nlsym = .FALSE.
+    mat%nlpos = .TRUE.
+    mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    IF(PRESENT(nlsym)) mat%nlsym = nlsym
+    IF(PRESENT(nlpos)) mat%nlpos = nlpos
+    IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
+!
+!    Matrix partition
+!
+    CALL dist1d(comm, 1, n, mat%istart, nloc)
+    mat%iend = mat%istart + nloc - 1
+!
+    IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
+    ALLOCATE(mat%mat)
+    CALL init(n, mat%mat, mat%istart, mat%iend)
+!
+!    Initialize a MUMPS instance
+!
+    mat%mumps_par%N = n
+    mat%mumps_par%NZ = 0
+    mat%mumps_par%COMM = comm
+    mat%mumps_par%PAR = 1        ! Host involved in calculations
+    IF(mat%nlsym) THEN
+       IF(mat%nlpos) THEN
+          mat%mumps_par%SYM = 1  ! symmetric, positive definite
+       ELSE
+          mat%mumps_par%SYM = 2  ! symmetric, indefinite
+       END IF
+    ELSE
+       mat%mumps_par%SYM = 0     ! unsymmetric
+    END IF
+!
+    mat%mumps_par%JOB = -1        ! Init phase
+    CALL dmumps(mat%mumps_par)
+!
+!    Nullify MUMPS pointers for distributed matrix
+!
+    NULLIFY(mat%mumps_par%A_loc)
+    NULLIFY(mat%mumps_par%IRN_loc)
+    NULLIFY(mat%mumps_par%JCN_loc)
+    NULLIFY(mat%mumps_par%RHS)
+!
+  END SUBROUTINE init_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_zmumps_mat(n, nterms, mat, kmat, nlsym, nlherm, &
+       &                      nlpos, nlforce_zero, comm_in)
+!
+!   Initialize an empty sparse mumps matrix
+!
+    USE pputils2
+    INCLUDE 'mpif.h'
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE(zmumps_mat)              :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlherm
+    LOGICAL, OPTIONAL, INTENT(in) :: nlpos
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+    INTEGER, OPTIONAL, INTENT(in) :: comm_in
+!
+    INTEGER :: comm, nloc
+!
+    comm = MPI_COMM_SELF    ! Default is serial!
+    IF(PRESENT(comm_in)) comm = comm_in
+!
+    mat%rank = n
+    mat%nterms = nterms
+    mat%nnz = 0
+    mat%nlsym = .FALSE.
+    mat%nlherm = .FALSE.
+    mat%nlpos = .TRUE.
+    mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    IF(PRESENT(nlsym)) mat%nlsym = nlsym
+    IF(PRESENT(nlpos)) mat%nlpos = nlpos
+    IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
+!
+!    Matrix partition
+!
+    CALL dist1d(comm, 1, n, mat%istart, nloc)
+    mat%iend = mat%istart + nloc - 1
+!
+    IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
+    ALLOCATE(mat%mat)
+    CALL init(n, mat%mat, mat%istart, mat%iend)
+!
+!    Initialize a MUMPS instance
+!
+    mat%mumps_par%N = n
+    mat%mumps_par%NZ = 0
+    mat%mumps_par%COMM = comm
+    mat%mumps_par%PAR = 1        ! Host involved in calculations
+    mat%mumps_par%SYM = 0        ! General unsymmetric
+    IF(mat%nlsym) THEN
+       IF(mat%nlpos) THEN
+          mat%mumps_par%SYM = 1  ! symmetric, positive definite
+       ELSE
+          mat%mumps_par%SYM = 2  ! symmetric, indefinite
+       END IF
+    END IF
+!
+    mat%mumps_par%JOB = -1       ! Init phase
+    CALL zmumps(mat%mumps_par)
+!
+! WARNING: SYM=1 is currently (version 4.10.0) is treated as SYM=2.
+! The Hermitian case is not implemented yet!
+!
+!    Nullify MUMPS pointers for distributed matrix
+!
+    NULLIFY(mat%mumps_par%A_loc)
+    NULLIFY(mat%mumps_par%IRN_loc)
+    NULLIFY(mat%mumps_par%JCN_loc)
+    NULLIFY(mat%mumps_par%RHS)
+!
+  END SUBROUTINE init_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE clear_mumps_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(mumps_mat) :: mat
+!
+    mat%val = 0.0d0
+  END SUBROUTINE clear_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE clear_zmumps_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(zmumps_mat) :: mat
+!
+    mat%val = (0.0d0, 0.0d0)
+  END SUBROUTINE clear_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_mumps_mat(mat, i, j, val)
+!
+!   Update element Aij of mumps  matrix
+!
+    TYPE(mumps_mat)              :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: k, s, e
+!
+    IF(mat%nlsym) THEN   ! Store only upper part for symmetric matrices
+       IF(i.GT.j) RETURN
+    END IF
+    IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN
+       WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+       WRITE(*,'(a,2i6)') '      istart, iend      ', mat%istart, mat%iend
+       STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL updtmat(mat%mat, i, j, val)
+    ELSE
+       s = mat%irow(i) - mat%nnz_start + 1
+       e = mat%irow(i+1) - mat%nnz_start
+       k = isearch(mat%cols(s:e), j)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  mat%val(s+k)+val
+       ELSE
+          WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
+       END IF
+    END IF
+  END SUBROUTINE updt_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_zmumps_mat(mat, i, j, val)
+!
+!   Update element Aij of mumps  matrix
+!
+    TYPE(zmumps_mat)           :: mat
+    INTEGER, INTENT(in)        :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    INTEGER :: k, s, e
+!
+    IF(mat%nlherm .OR. mat%nlsym) THEN   ! Store only upper part
+       IF(i.GT.j) RETURN
+    END IF
+    IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN
+       WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+       WRITE(*,'(a,2i6)') '      istart, iend      ', mat%istart, mat%iend
+       STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL updtmat(mat%mat, i, j, val)
+    ELSE
+       s = mat%irow(i) - mat%nnz_start + 1
+       e = mat%irow(i+1) - mat%nnz_start
+       k = isearch(mat%cols(s:e), j)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  mat%val(s+k)+val
+       ELSE
+          WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
+       END IF
+    END IF
+  END SUBROUTINE updt_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_mumps_mat(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(mumps_mat)            :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: iput, jput
+    INTEGER :: k, s, e
+!
+    iput = i
+    jput = j
+    IF(mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iput = j
+          jput = i
+       END IF
+    END IF
+!
+!    Do nothing if outside
+    IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL putele(mat%mat, iput, jput, val, &
+            &      nlforce_zero=mat%nlforce_zero)
+    ELSE
+       s = mat%irow(iput) - mat%nnz_start + 1
+       e = mat%irow(iput+1) - mat%nnz_start
+       k = isearch(mat%cols(s:e), jput)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  val
+       ELSE
+          IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
+             WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
+             STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
+          END IF
+       END IF
+   END IF
+  END SUBROUTINE putele_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_zmumps_mat(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(zmumps_mat)         :: mat
+    INTEGER, INTENT(in)        :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    DOUBLE COMPLEX :: valput
+    INTEGER :: iput, jput
+    INTEGER :: k, s, e
+!
+    iput = i
+    jput = j
+    valput = val
+    IF(mat%nlherm .OR. mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iput = j
+          jput = i
+          IF(mat%nlherm) THEN
+             valput = CONJG(val)
+          ELSE
+             valput = val
+          END IF
+       END IF
+    END IF
+!
+!    Do nothing if outside
+    IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL putele(mat%mat, iput, jput, valput, &
+            &      nlforce_zero=mat%nlforce_zero)
+    ELSE
+       s = mat%irow(iput) - mat%nnz_start + 1
+       e = mat%irow(iput+1) - mat%nnz_start
+       k = isearch(mat%cols(s:e), jput)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  valput
+       ELSE
+          IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
+             WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
+             STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
+          END IF
+       END IF
+   END IF
+ END SUBROUTINE putele_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_mumps_mat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(mumps_mat)               :: mat
+    INTEGER, INTENT(in)           :: i, j
+    DOUBLE PRECISION, INTENT(out) :: val
+    INTEGER :: iget, jget
+    INTEGER :: k, s, e
+!
+    iget = i
+    jget = j
+    IF(mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iget = j
+          jget = i
+       END IF
+    END IF
+!
+    val = 0.0d0   ! Assume zero val if outside
+    IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL getele(mat%mat, iget, jget, val)
+    ELSE
+       s = mat%irow(iget) - mat%nnz_start + 1
+       e = mat%irow(iget+1) - mat%nnz_start
+       k = isearch(mat%cols(s:e), jget)
+       IF( k.GE.0 ) THEN
+          val =mat%val(s+k) 
+       ELSE
+          val = 0.0d0   ! Assume zero val if not found
+       END IF
+    END IF
+  END SUBROUTINE getele_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_zmumps_mat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(zmumps_mat)            :: mat
+    INTEGER, INTENT(in)         :: i, j
+    DOUBLE COMPLEX, INTENT(out) :: val
+    DOUBLE COMPLEX :: valget
+    INTEGER :: iget, jget
+    INTEGER :: k, s, e
+!
+    iget = i
+    jget = j
+    IF(mat%nlherm .OR. mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iget = j
+          jget = i
+       END IF
+    END IF
+!
+    val = (0.0d0, 0.0d0)   ! Assume zero val if outside
+    IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL getele(mat%mat, iget, jget, valget)
+    ELSE
+       s = mat%irow(iget) - mat%nnz_start + 1
+       e = mat%irow(iget+1) - mat%nnz_start
+       k = isearch(mat%cols(s:e), jget)
+       IF( k.GE.0 ) THEN
+          valget =mat%val(s+k) 
+       ELSE
+          valget = (0.0d0,0.0d0)   ! Assume zero val if not found
+       END IF
+    END IF
+    val = valget
+    IF( i.GT.j ) THEN
+       IF(mat%nlherm) THEN 
+          val = CONJG(valget)
+       END IF
+    END IF
+  END SUBROUTINE getele_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_mumps_mat(amat, i, arr)
+!
+! Put a row into sparse matrix
+!
+    TYPE(mumps_mat), INTENT(inout)   :: amat
+    INTEGER, INTENT(in)              :: i
+    DOUBLE PRECISION, INTENT(in)     :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL putele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE putrow_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_zmumps_mat(amat, i, arr)
+!
+! Put a row into sparse matrix
+!
+    TYPE(zmumps_mat), INTENT(inout)   :: amat
+    INTEGER, INTENT(in)               :: i
+    DOUBLE COMPLEX, INTENT(in)        :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL putele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE putrow_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_mumps_mat(amat, i, arr)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(mumps_mat), INTENT(in)    :: amat
+    INTEGER, INTENT(in)            :: i
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL getele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE getrow_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_zmumps_mat(amat, i, arr)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(zmumps_mat), INTENT(in)    :: amat
+    INTEGER, INTENT(in)             :: i
+    DOUBLE COMPLEX, INTENT(out)     :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL getele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE getrow_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_mumps_mat(amat, j, arr)
+!
+! Put a column into sparse matrix
+!
+    TYPE(mumps_mat), INTENT(inout)   :: amat
+    INTEGER, INTENT(in)              :: j
+    DOUBLE PRECISION, INTENT(in)     :: arr(:)
+    INTEGER :: i
+!
+    DO i=amat%istart,amat%iend
+       CALL putele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE putcol_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_zmumps_mat(amat, j, arr)
+!
+! Put a column into sparse matrix
+!
+    TYPE(zmumps_mat), INTENT(inout)   :: amat
+    INTEGER, INTENT(in)               :: j
+    DOUBLE COMPLEX, INTENT(in)        :: arr(:)
+    INTEGER :: i
+!
+    DO i=amat%istart,amat%iend
+       CALL putele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE putcol_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_mumps_mat(amat, j, arr)
+!
+!   Get a column from sparse matrix
+!
+    TYPE(mumps_mat), INTENT(in)    :: amat
+    INTEGER, INTENT(in)            :: j
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER :: i
+!
+    DO i=amat%istart,amat%iend
+       CALL getele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_zmumps_mat(amat, j, arr)
+!
+!   Get a column from sparse matrix
+!
+    TYPE(zmumps_mat), INTENT(in)    :: amat
+    INTEGER, INTENT(in)             :: j
+    DOUBLE COMPLEX, INTENT(out)     :: arr(:)
+    INTEGER :: i
+!
+    DO i=amat%istart,amat%iend
+       CALL getele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_mumps_mat(mat, nnz)
+!
+!   Number of non-zeros in sparse matrix
+!
+    TYPE(mumps_mat)                :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+    INTEGER :: i
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       get_count_mumps_mat = get_count(mat%mat, nnz)
+    ELSE
+       get_count_mumps_mat = mat%nnz
+       IF(PRESENT(nnz)) THEN
+          DO i=mat%istart,mat%iend
+             nnz(i) = mat%irow(i+1)-mat%irow(i)
+          END DO
+       END IF
+    END IF
+  END FUNCTION get_count_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_zmumps_mat(mat, nnz)
+!
+!   Number of non-zeros in sparse matrix
+!
+    TYPE(zmumps_mat)               :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+    INTEGER :: i
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       get_count_zmumps_mat = get_count(mat%mat, nnz)
+    ELSE
+       get_count_zmumps_mat = mat%nnz
+       IF(PRESENT(nnz)) THEN
+          DO i=mat%istart,mat%iend
+             nnz(i) = mat%irow(i+1)-mat%irow(i)
+          END DO
+       END IF
+    END IF
+  END FUNCTION get_count_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE to_mumps_mat(mat, nlkeep)
+!
+!   Convert linked list spmat to mumps matrice structure
+!
+    INCLUDE 'mpif.h'
+    TYPE(mumps_mat)               :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nlkeep
+    INTEGER :: i, nnz, rank, s, e
+    INTEGER :: comm, ierr, nnz_loc
+    LOGICAL :: nlclean
+!
+    nlclean = .TRUE.
+    IF(PRESENT(nlkeep)) THEN
+       nlclean = .NOT. nlkeep
+    END IF
+!
+    comm = mat%mumps_par%COMM
+    mat%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix
+!
+!    Allocate the Mumps matrix structure
+!      CSR format: (cols, irow, val) or (JCN, irow, A)
+!      COO format: (IRN, JCN, A)  or (IRN, cols, val)
+!
+    rank = mat%rank
+    nnz_loc = get_count(mat)
+    mat%nnz_start = 0
+    CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+    mat%nnz_start = mat%nnz_start + 1
+    mat%nnz_end = mat%nnz_start + nnz_loc - 1
+    mat%nnz_loc = nnz_loc
+    CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+!
+    mat%mumps_par%N = rank
+    mat%mumps_par%NZ_loc = nnz_loc
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%val(nnz_loc))
+    ALLOCATE(mat%cols(nnz_loc))
+    ALLOCATE(mat%irow(mat%istart:mat%iend+1))
+!
+    IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc)
+    ALLOCATE(mat%mumps_par%IRN_loc(nnz_loc))
+    mat%mumps_par%A_loc => mat%val
+    mat%mumps_par%JCN_loc => mat%cols
+!
+!    Fill Mumps structure and deallocate the sparse rows
+!
+    mat%irow(mat%istart) = mat%nnz_start
+    DO i=mat%istart,mat%iend
+       mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
+       s = mat%irow(i) - mat%nnz_start + 1
+       e = mat%irow(i+1) - mat%nnz_start
+       CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) 
+       mat%mumps_par%IRN_loc(s:e) = i
+       IF(nlclean) CALL destroy(mat%mat%row(i))
+    END DO
+    IF(nlclean) DEALLOCATE(mat%mat)
+  END SUBROUTINE to_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE to_zmumps_mat(mat, nlkeep)
+!
+!   Convert linked list spmat to mumps matrice structure
+!
+    INCLUDE 'mpif.h'
+    TYPE(zmumps_mat)              :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nlkeep
+    INTEGER :: i, nnz, rank, s, e
+    INTEGER :: comm, ierr, nnz_loc
+    LOGICAL :: nlclean
+!
+    nlclean = .TRUE.
+    IF(PRESENT(nlkeep)) THEN
+       nlclean = .NOT. nlkeep
+    END IF
+!
+    comm = mat%mumps_par%COMM
+    mat%mumps_par%ICNTL(18) = 3 ! Distributed assembled matrix
+!
+!    Allocate the Mumps matrix structure
+!      CSR format: (cols, irow, val) or (JCN, irow, A)
+!      COO format: (IRN, JCN, A)  or (IRN, cols, val)
+!
+    rank = mat%rank
+    nnz_loc = get_count(mat)
+    mat%nnz_start = 0
+    CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+    mat%nnz_start = mat%nnz_start + 1
+    mat%nnz_end = mat%nnz_start + nnz_loc - 1
+    mat%nnz_loc = nnz_loc
+    CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+!
+    mat%mumps_par%N = rank
+    mat%mumps_par%NZ_loc = nnz_loc
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%val(nnz_loc))
+    ALLOCATE(mat%cols(nnz_loc))
+    ALLOCATE(mat%irow(mat%istart:mat%iend+1))
+!
+    IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc)
+    ALLOCATE(mat%mumps_par%IRN_loc(nnz_loc))
+    mat%mumps_par%A_loc => mat%val
+    mat%mumps_par%JCN_loc => mat%cols
+!
+!    Fill Mumps structure and deallocate the sparse rows
+!
+    mat%irow(mat%istart) = mat%nnz_start
+    DO i=mat%istart,mat%iend
+       mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
+       s = mat%irow(i) - mat%nnz_start + 1
+       e = mat%irow(i+1) - mat%nnz_start
+       CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e)) 
+       mat%mumps_par%IRN_loc(s:e) = i
+       IF(nlclean) CALL destroy(mat%mat%row(i))
+    END DO
+    IF(nlclean) DEALLOCATE(mat%mat)
+  END SUBROUTINE to_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE reord_mumps_mat(mat, nlmetis, debug)
+!
+!   Reordering and symbolic factorization
+!
+    TYPE(mumps_mat)               :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+!
+!    Verbose messages
+!
+    mat%mumps_par%ICNTL(3) = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%mumps_par%ICNTL(3) = 6
+    END IF
+!
+!    Ordering
+!
+    mat%mumps_par%ICNTL(7) = 7  ! Automatic choice
+    IF(PRESENT(nlmetis)) THEN
+       IF(nlmetis) mat%mumps_par%ICNTL(7) = 5 ! use METIS nested dissection
+    END IF
+!
+    mat%mumps_par%JOB = 1
+    CALL dmumps(mat%mumps_par)
+    mat%perm => mat%mumps_par%SYM_PERM
+
+    IF(mat%mumps_par%INFOG(1).NE.0) THEN
+       WRITE(*,'(a,2i12)') 'REORD: Reordering failed with error', &
+            &             mat%mumps_par%INFOG(1:2)
+       STOP
+    END IF
+  END SUBROUTINE reord_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE reord_zmumps_mat(mat, nlmetis, debug)
+!
+!   Reordering and symbolic factorization
+!
+    TYPE(zmumps_mat)              :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+!
+!    Verbose messages
+!
+    mat%mumps_par%ICNTL(3) = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%mumps_par%ICNTL(3) = 6
+    END IF
+!
+!    Ordering
+!
+    mat%mumps_par%ICNTL(7) = 7  ! Automatic choice
+    IF(PRESENT(nlmetis)) THEN
+       IF(nlmetis) mat%mumps_par%ICNTL(7) = 5 ! use METIS nested dissection
+    END IF
+!
+    mat%mumps_par%JOB = 1
+    CALL zmumps(mat%mumps_par)
+
+    IF(mat%mumps_par%INFOG(1).NE.0) THEN
+       WRITE(*,'(a,2i12)') 'REORD: Reordering failed with error', &
+            &             mat%mumps_par%INFOG(1:2)
+       STOP
+    END IF
+  END SUBROUTINE reord_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE numfact_mumps_mat(mat, debug)
+!
+!   Numerical factorization
+!
+    TYPE(mumps_mat)               :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+!
+!    Verbose messages
+!
+    mat%mumps_par%ICNTL(3) = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%mumps_par%ICNTL(3) = 6
+    END IF
+!
+    mat%mumps_par%JOB = 2
+    CALL dmumps(mat%mumps_par)
+
+    IF(mat%mumps_par%INFOG(1).NE.0) THEN
+       WRITE(*,'(a,2i12)') 'FACTOR: Reordering failed with error', &
+            &             mat%mumps_par%INFOG(1:2)
+       STOP
+    END IF
+!
+  END SUBROUTINE numfact_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE numfact_zmumps_mat(mat, debug)
+!
+!   Numerical factorization
+!
+    TYPE(zmumps_mat)              :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+!
+!    Verbose messages
+!
+    mat%mumps_par%ICNTL(3) = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%mumps_par%ICNTL(3) = 6
+    END IF
+!
+    mat%mumps_par%JOB = 2
+    CALL zmumps(mat%mumps_par)
+
+    IF(mat%mumps_par%INFOG(1).NE.0) THEN
+       WRITE(*,'(a,2i12)') 'FACTOR: Reordering failed with error', &
+            &             mat%mumps_par%INFOG(1:2)
+       STOP
+    END IF
+!
+  END SUBROUTINE numfact_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE factor_mumps_mat(mat, nlreord, nlmetis, debug)
+!
+!  Factor (create  +reorder + factor) a mumps_mat matrix
+!
+    TYPE(mumps_mat)             :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlreord
+    LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    LOGICAL :: mlreord
+!----------------------------------------------------------------------
+!               1.0  Creation from the sparse rows
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               2.0  Reordering and symbolic factorization phase
+!
+    mlreord = .TRUE.
+    IF(PRESENT(nlreord)) mlreord = nlreord
+    IF(mlreord) THEN
+       CALL reord_mat(mat, nlmetis, debug)
+    END IF
+!----------------------------------------------------------------------
+!               3.0  Numerical factorization
+!
+    CALL numfact(mat, debug)
+  END SUBROUTINE factor_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE factor_zmumps_mat(mat, nlreord, nlmetis, debug)
+!
+!  Factor (create  +reorder + factor) a mumps_mat matrix
+!
+    TYPE(zmumps_mat)            :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlreord
+    LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    LOGICAL :: mlreord
+!----------------------------------------------------------------------
+!               1.0  Creation from the sparse rows
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               2.0  Reordering and symbolic factorization phase
+!
+    mlreord = .TRUE.
+    IF(PRESENT(nlreord)) mlreord = nlreord
+    IF(mlreord) THEN
+       CALL reord_mat(mat, nlmetis, debug)
+    END IF
+!----------------------------------------------------------------------
+!               3.0  Numerical factorization
+!
+    CALL numfact(mat, debug)
+  END SUBROUTINE factor_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_mumps_mat1(mat, rhs, sol, nref, debug)
+!
+!   Backsolve, using Mumps
+!
+    INCLUDE 'mpif.h'
+    TYPE(mumps_mat)                           :: mat
+    DOUBLE PRECISION, INTENT(inout)           :: rhs(:)
+    DOUBLE PRECISION, OPTIONAL, INTENT(inout) :: sol(:)
+    INTEGER, OPTIONAL, INTENT(in)             :: nref
+    LOGICAL, OPTIONAL, INTENT(in)             :: debug
+!
+    INTEGER :: nrank, ierr
+!
+    nrank = SIZE(rhs,1)
+!
+!    Verbose messages
+!
+    mat%mumps_par%ICNTL(3) = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%mumps_par%ICNTL(3) = 6
+    END IF
+!
+    IF(mat%mumps_par%MYID .EQ. 0) THEN
+       mat%mumps_par%NRHS = 1
+       mat%mumps_par%LRHS = nrank
+       mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps
+       IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref
+!
+       ALLOCATE(mat%mumps_par%RHS(nrank))
+       mat%mumps_par%RHS = rhs
+    END IF
+!
+    mat%mumps_par%JOB = 3
+    CALL dmumps(mat%mumps_par)
+!
+!    The solution will be broadcasted to everyone
+!
+    IF(PRESENT(sol)) THEN
+       IF(mat%mumps_par%MYID .EQ. 0) sol=mat%mumps_par%RHS
+       CALL mpi_bcast(sol, nrank, MPI_DOUBLE_PRECISION, &
+            &         0, mat%mumps_par%COMM, ierr)
+    ELSE
+       IF(mat%mumps_par%MYID .EQ. 0) rhs=mat%mumps_par%RHS
+       CALL mpi_bcast(rhs, nrank, MPI_DOUBLE_PRECISION, &
+            &         0, mat%mumps_par%COMM, ierr)
+    END IF
+!
+    IF(mat%mumps_par%MYID .EQ. 0) THEN
+       DEALLOCATE(mat%mumps_par%RHS)
+       IF(mat%mumps_par%INFOG(1).NE.0) THEN
+          WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', &
+            &             mat%mumps_par%INFOG(1:2)
+          STOP
+       END IF
+    END IF
+  END SUBROUTINE bsolve_mumps_mat1
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_zmumps_mat1(mat, rhs, sol, nref, debug)
+!
+!   Backsolve, using Mumps
+!
+    INCLUDE 'mpif.h'
+    TYPE(zmumps_mat)                        :: mat
+    DOUBLE COMPLEX, INTENT(inout)           :: rhs(:)
+    DOUBLE COMPLEX, OPTIONAL, INTENT(inout) :: sol(:)
+    INTEGER, OPTIONAL, INTENT(in)           :: nref
+    LOGICAL, OPTIONAL, INTENT(in)           :: debug
+!
+    INTEGER ::  nrank, ierr
+!
+    nrank = SIZE(rhs,1)
+!
+!    Verbose messages
+!
+    mat%mumps_par%ICNTL(3) = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%mumps_par%ICNTL(3) = 6
+    END IF
+!
+    IF(mat%mumps_par%MYID .EQ. 0) THEN
+       mat%mumps_par%NRHS = 1
+       mat%mumps_par%LRHS = nrank
+       mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps
+       IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref
+!
+       ALLOCATE(mat%mumps_par%RHS(nrank))
+       mat%mumps_par%RHS = rhs
+    END IF
+!
+    mat%mumps_par%JOB = 3
+    CALL zmumps(mat%mumps_par)
+!
+!    The solution will be broadcasted to everyone
+!
+    IF(PRESENT(sol)) THEN
+       IF(mat%mumps_par%MYID .EQ. 0) sol=mat%mumps_par%RHS
+       CALL mpi_bcast(sol, SIZE(rhs), MPI_DOUBLE_COMPLEX, &
+            &         0, mat%mumps_par%COMM, ierr)
+    ELSE
+       IF(mat%mumps_par%MYID .EQ. 0) rhs=mat%mumps_par%RHS
+       CALL mpi_bcast(rhs, SIZE(rhs), MPI_DOUBLE_COMPLEX, &
+            &         0, mat%mumps_par%COMM, ierr)
+    END IF
+!
+    IF(mat%mumps_par%MYID .EQ. 0) THEN
+       DEALLOCATE(mat%mumps_par%RHS)
+       IF(mat%mumps_par%INFOG(1).NE.0) THEN
+          WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', &
+            &             mat%mumps_par%INFOG(1:2)
+          STOP
+       END IF
+    END IF
+  END SUBROUTINE bsolve_zmumps_mat1
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_mumps_matn(mat, rhs, sol, nref, debug)
+!
+!   Backsolve, using Mumps
+!
+    INCLUDE 'mpif.h'
+    TYPE(mumps_mat)                           :: mat
+    DOUBLE PRECISION, INTENT(inout)           :: rhs(:,:)
+    DOUBLE PRECISION, OPTIONAL, INTENT(inout) :: sol(:,:)
+    INTEGER, OPTIONAL, INTENT(in)             :: nref
+    LOGICAL, OPTIONAL, INTENT(in)             :: debug
+!
+    INTEGER :: nrank, nrhs, ierr
+!
+    nrank = SIZE(rhs,1)
+    nrhs = SIZE(rhs,2)
+!
+!    Verbose messages
+!
+    mat%mumps_par%ICNTL(3) = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%mumps_par%ICNTL(3) = 6
+    END IF
+!
+    IF(mat%mumps_par%MYID .EQ. 0) THEN
+       mat%mumps_par%NRHS = nrhs
+       mat%mumps_par%LRHS = nrank
+       mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps
+       IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref
+!
+       ALLOCATE(mat%mumps_par%RHS(nrhs*nrank))
+       mat%mumps_par%RHS = RESHAPE(rhs, SHAPE(mat%mumps_par%RHS))
+    END IF
+!
+    mat%mumps_par%JOB = 3
+    CALL dmumps(mat%mumps_par)
+!
+!    The solution will be broadcasted to everyone
+!
+    IF(PRESENT(sol)) THEN
+       IF(mat%mumps_par%MYID .EQ. 0) sol=RESHAPE(mat%mumps_par%RHS, SHAPE(sol))
+       CALL mpi_bcast(sol, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_PRECISION, &
+            &         0, mat%mumps_par%COMM, ierr)
+    ELSE
+       IF(mat%mumps_par%MYID .EQ. 0) rhs=RESHAPE(mat%mumps_par%RHS, SHAPE(rhs))
+       CALL mpi_bcast(rhs, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_PRECISION, &
+            &         0, mat%mumps_par%COMM, ierr)
+    END IF
+!
+    IF(mat%mumps_par%MYID .EQ. 0) THEN
+       DEALLOCATE(mat%mumps_par%RHS)
+       IF(mat%mumps_par%INFOG(1).NE.0) THEN
+          WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', &
+            &             mat%mumps_par%INFOG(1:2)
+          STOP
+       END IF
+    END IF
+  END SUBROUTINE bsolve_mumps_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_zmumps_matn(mat, rhs, sol, nref, debug)
+!
+!   Backsolve, using Mumps
+!
+    INCLUDE 'mpif.h'
+    TYPE(zmumps_mat)                        :: mat
+    DOUBLE COMPLEX, INTENT(inout)           :: rhs(:,:)
+    DOUBLE COMPLEX, OPTIONAL, INTENT(inout) :: sol(:,:)
+    INTEGER, OPTIONAL, INTENT(in)           :: nref
+    LOGICAL, OPTIONAL, INTENT(in)           :: debug
+!
+    INTEGER :: nrank, nrhs, ierr
+!
+    nrank = SIZE(rhs,1)
+    nrhs = SIZE(rhs,2)
+!
+!    Verbose messages
+!
+    mat%mumps_par%ICNTL(3) = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%mumps_par%ICNTL(3) = 6
+    END IF
+!
+    IF(mat%mumps_par%MYID .EQ. 0) THEN
+       mat%mumps_par%NRHS = nrhs
+       mat%mumps_par%LRHS = nrank
+       mat%mumps_par%ICNTL(10) = 0 ! Max numbers of iterative refinement steps
+       IF(PRESENT(nref)) mat%mumps_par%ICNTL(10) = nref
+!
+       ALLOCATE(mat%mumps_par%RHS(nrhs*nrank))
+       mat%mumps_par%RHS = RESHAPE(rhs, SHAPE(mat%mumps_par%RHS))
+    END IF
+!
+    mat%mumps_par%JOB = 3
+    CALL zmumps(mat%mumps_par)
+!
+!    The solution will be broadcasted to everyone
+!
+    IF(PRESENT(sol)) THEN
+       IF(mat%mumps_par%MYID .EQ. 0) sol=RESHAPE(mat%mumps_par%RHS, SHAPE(sol))
+       CALL mpi_bcast(sol, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_COMPLEX, &
+            &         0, mat%mumps_par%COMM, ierr)
+    ELSE
+       IF(mat%mumps_par%MYID .EQ. 0) rhs=RESHAPE(mat%mumps_par%RHS, SHAPE(rhs))
+       CALL mpi_bcast(rhs, PRODUCT(SHAPE(rhs)), MPI_DOUBLE_COMPLEX, &
+            &         0, mat%mumps_par%COMM, ierr)
+    END IF
+!
+    IF(mat%mumps_par%MYID .EQ. 0) THEN
+       DEALLOCATE(mat%mumps_par%RHS)
+       IF(mat%mumps_par%INFOG(1).NE.0) THEN
+          WRITE(*,'(a,2i12)') 'BSOLVE: Reordering failed with error', &
+            &             mat%mumps_par%INFOG(1:2)
+          STOP
+       END IF
+    END IF
+  END SUBROUTINE bsolve_zmumps_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_mumps_mat(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(mumps_mat)               :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:)
+    DOUBLE PRECISION              :: yarr(SIZE(xarr))
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    CHARACTER(len=6) :: matdescra
+    INTEGER :: n, i, j
+!
+    n = mat%rank
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &          beta, yarr)
+#else
+    yarr = 0.0d0
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + mat%val(j)*xarr(i)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_zmumps_mat(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(zmumps_mat)            :: mat
+    DOUBLE COMPLEX, INTENT(in)  :: xarr(:)
+    DOUBLE COMPLEX              :: yarr(SIZE(xarr))
+    DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
+    INTEGER :: n, i, j
+    CHARACTER(len=6) :: matdescra
+!
+    n = mat%rank
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE IF(mat%nlherm) THEN
+       matdescra = 'hun'
+    ELSE
+       matdescra = 'g'
+    END IF
+    CALL mkl_zcsrmv('N', n, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &          beta, yarr)
+#else
+    yarr = (0.0d0,0.0d0)
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + mat%val(j)*xarr(i)
+          END DO
+       ELSE IF(mat%nlherm) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + CONJG(mat%val(j))*xarr(i)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_mumps_matn(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(mumps_mat)               :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
+    DOUBLE PRECISION              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    INTEGER :: n, nrhs, i, j
+    CHARACTER(len=6) :: matdescra
+!
+    n = mat%rank
+    nrhs = SIZE(xarr,2)
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,&
+         &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &           n, beta, yarr, n)
+#else
+    yarr = 0.0d0
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &                + mat%val(j)*xarr(i,:)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_mumps_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_zmumps_matn(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(zmumps_mat)            :: mat
+    DOUBLE COMPLEX, INTENT(in)  :: xarr(:,:)
+    DOUBLE COMPLEX              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
+    INTEGER :: n, nrhs, i, j
+    CHARACTER(len=6) :: matdescra
+!
+    n = mat%rank
+    nrhs = SIZE(xarr,2)
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE IF(mat%nlherm) THEN
+       matdescra = 'hun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_zcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, n, &
+         &          beta, yarr, n)
+#else
+    yarr = (0.0d0,0.0d0)
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &                + mat%val(j)*xarr(i,:)
+          END DO
+       ELSE IF(mat%nlherm) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &               + CONJG(mat%val(j))*xarr(i,:)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_zmumps_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_mumps_mat(mat)
+!
+!  Deallocate the sparse matrix mat
+!
+    TYPE(mumps_mat)  :: mat
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL destroy(mat%mat)
+       DEALLOCATE(mat%mat)
+    END IF
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+!
+    IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc)
+    mat%mumps_par%JOB = -2
+    CALL dmumps(mat%mumps_par)
+    IF(mat%mumps_par%INFOG(1).NE.0) THEN
+       WRITE(*,'(a,2i12)') 'DESTROY: Reordering failed with error', &
+            &             mat%mumps_par%INFOG(1:2)
+       STOP
+    END IF
+  END SUBROUTINE destroy_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_zmumps_mat(mat)
+!
+!  Deallocate the sparse matrix mat
+!
+    TYPE(zmumps_mat) :: mat
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL destroy(mat%mat)
+       DEALLOCATE(mat%mat)
+    END IF
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+!
+    IF(ASSOCIATED(mat%mumps_par%IRN_loc)) DEALLOCATE(mat%mumps_par%IRN_loc)
+    mat%mumps_par%JOB = -2
+    CALL zmumps(mat%mumps_par)
+    IF(mat%mumps_par%INFOG(1).NE.0) THEN
+       WRITE(*,'(a,2i12)') 'DESTROY: Reordering failed with error', &
+            &             mat%mumps_par%INFOG(1:2)
+       STOP
+    END IF
+  END SUBROUTINE destroy_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE put_mumps_mat(fid, label, mat, str)
+!
+!   Write matrix to hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(mumps_mat)                        :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+    CHARACTER(len=128) :: mumps_grp
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'NNZ',  mat%nnz)
+    CALL attach(fid, label, 'NLSYM', mat%nlsym)
+    CALL attach(fid, label, 'NLPOS', mat%nlpos)
+    CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL putarr(fid, TRIM(label)//'/perm', mat%perm)
+    CALL putarr(fid, TRIM(label)//'/val', mat%val)
+!
+    mumps_grp = TRIM(label)//'/mumps_par'
+    CALL creatg(fid, mumps_grp)
+    CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR)
+    CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM)
+    CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN)
+!
+  END SUBROUTINE put_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE put_zmumps_mat(fid, label, mat, str)
+!
+!   Write matrix to hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(zmumps_mat)                       :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+    CHARACTER(len=128) :: mumps_grp
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'NNZ',  mat%nnz)
+    CALL attach(fid, label, 'NLSYM', mat%nlsym)
+    CALL attach(fid, label, 'NLPOS', mat%nlpos)
+    CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL putarr(fid, TRIM(label)//'/val', mat%val)
+!
+    mumps_grp = TRIM(label)//'/mumps_par'
+    CALL creatg(fid, mumps_grp)
+    CALL attach(fid, mumps_grp, 'PAR', mat%mumps_par%PAR)
+    CALL attach(fid, mumps_grp, 'SYM', mat%mumps_par%SYM)
+    CALL putarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN)
+!
+  END SUBROUTINE put_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE get_mumps_mat(fid, label, mat)
+!
+!   Read matrix from hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(mumps_mat)                        :: mat
+    CHARACTER(len=128) :: mumps_grp
+!
+    CALL getatt(fid, label, 'RANK', mat%rank)
+    CALL getatt(fid, label, 'NNZ',  mat%nnz)
+    CALL getatt(fid, label, 'NLSYM', mat%nlsym)
+    CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
+    CALL getarr(fid, TRIM(label)//'/val', mat%val)
+!
+    mumps_grp = TRIM(label)//'/mumps_par'
+    CALL getatt(fid, mumps_grp, 'PAR', mat%mumps_par%PAR)
+    CALL getatt(fid, mumps_grp, 'SYM', mat%mumps_par%SYM)
+    CALL getarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN)
+!
+  END SUBROUTINE get_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE get_zmumps_mat(fid, label, mat)
+!
+!   Read matrix from hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(zmumps_mat)                       :: mat
+    CHARACTER(len=128) :: mumps_grp
+!
+    CALL getatt(fid, label, 'RANK', mat%rank)
+    CALL getatt(fid, label, 'NNZ',  mat%nnz)
+    CALL getatt(fid, label, 'NLSYM', mat%nlsym)
+    CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
+    CALL getarr(fid, TRIM(label)//'/val', mat%val)
+!
+    mumps_grp = TRIM(label)//'/mumps_par'
+    CALL getatt(fid, mumps_grp, 'PAR', mat%mumps_par%PAR)
+    CALL getatt(fid, mumps_grp, 'SYM', mat%mumps_par%SYM)
+    CALL getarr(fid, TRIM(mumps_grp)//'/IRN', mat%mumps_par%IRN)
+!
+  END SUBROUTINE get_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE mcopy_mumps_mat(mata, matb)
+!
+!   Matrix copy: B = A (assume that B is already initialize)
+!
+    TYPE(mumps_mat) :: mata, matb
+    INTEGER :: n, nnz, nnz_loc
+!
+    IF(ASSOCIATED(matb%mat)) THEN  ! Sparse linled list not needed
+       CALL destroy(matb%mat)
+       DEALLOCATE(matb%mat)
+    END IF
+!
+    n = mata%rank
+    nnz = mata%nnz
+    nnz_loc = mata%nnz_loc
+    matb%nnz = nnz
+    matb%nnz_loc = nnz_loc
+    matb%nnz_start = mata%nnz_start
+    matb%nnz_end = mata%nnz_end
+    matb%istart = mata%istart
+    matb%iend = mata%iend
+!
+    matb%mumps_par%NZ_loc = mata%mumps_par%NZ_loc
+!
+    IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
+    IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
+    ALLOCATE(matb%val(nnz_loc)); matb%val = mata%val
+    ALLOCATE(matb%cols(nnz_loc)); matb%cols = mata%cols
+    ALLOCATE(matb%irow(matb%istart:matb%iend+1)); matb%irow = mata%irow
+!
+    ALLOCATE(matb%mumps_par%IRN_loc(nnz_loc))
+    matb%mumps_par%IRN_loc = mata%mumps_par%IRN_loc
+    matb%mumps_par%A_loc => matb%val
+    matb%mumps_par%JCN_loc => matb%cols
+  END SUBROUTINE mcopy_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE mcopy_zmumps_mat(mata, matb)
+!
+!   Matrix copy: B = A (assume that B is already initialize)
+!
+    TYPE(zmumps_mat) :: mata, matb
+    INTEGER :: n, nnz, nnz_loc
+!
+    IF(ASSOCIATED(matb%mat)) THEN  ! Sparse linled list not needed
+       CALL destroy(matb%mat)
+       DEALLOCATE(matb%mat)
+    END IF
+!
+    n = mata%rank
+    nnz = mata%nnz
+    nnz_loc = mata%nnz_loc
+    matb%nnz = nnz
+    matb%nnz_loc = nnz_loc
+    matb%nnz_start = mata%nnz_start
+    matb%nnz_end = mata%nnz_end
+    matb%istart = mata%istart
+    matb%iend = mata%iend
+!
+    matb%mumps_par%NZ_loc = mata%mumps_par%NZ_loc
+!
+    IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
+    IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
+    ALLOCATE(matb%val(nnz_loc)); matb%val = mata%val
+    ALLOCATE(matb%cols(nnz_loc)); matb%cols = mata%cols
+    ALLOCATE(matb%irow(matb%istart:matb%iend+1)); matb%irow = mata%irow
+!
+    ALLOCATE(matb%mumps_par%IRN_loc(nnz_loc))
+    matb%mumps_par%IRN_loc = mata%mumps_par%IRN_loc
+    matb%mumps_par%A_loc => matb%val
+    matb%mumps_par%JCN_loc => matb%cols
+  END SUBROUTINE mcopy_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE maddto_mumps_mat(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(mumps_mat) :: mata, matb
+    DOUBLE PRECISION  :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE maddto_zmumps_mat(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(zmumps_mat) :: mata, matb
+    DOUBLE COMPLEX     :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE psum_mumps_mat(mat, comm)
+!
+!   Parallel sum of sparse matrices
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(mumps_mat)   :: mat
+    INCLUDE 'psum_mat.tpl'
+  END SUBROUTINE psum_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE psum_zmumps_mat(mat, comm)
+!
+!   Parallel sum of sparse matrices
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(zmumps_mat)   :: mat
+    INCLUDE 'psum_mat.tpl'
+  END SUBROUTINE psum_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE p2p_mumps_mat(mat, dest, extyp, op, comm)
+!
+!   Point-to-point combine sparse matrix between 2 processes
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(mumps_mat)            :: mat
+    DOUBLE PRECISION, ALLOCATABLE :: val(:)
+    INTEGER :: mpi_type=MPI_DOUBLE_PRECISION
+!
+    INCLUDE "p2p_mat.tpl"
+  END SUBROUTINE p2p_mumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE p2p_zmumps_mat(mat, dest, extyp, op, comm)
+!
+!   Point-to-point combine sparse matrix between 2 processes
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(zmumps_mat)           :: mat
+    DOUBLE COMPLEX, ALLOCATABLE :: val(:)
+    INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX
+!
+    INCLUDE "p2p_mat.tpl"
+  END SUBROUTINE p2p_zmumps_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE mumps_bsplines
diff --git a/src/p2p_mat.tpl b/src/p2p_mat.tpl
new file mode 100644
index 0000000..c39e729
--- /dev/null
+++ b/src/p2p_mat.tpl
@@ -0,0 +1,119 @@
+!>
+!> @file p2p_mat.tpl
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+    INTEGER, INTENT(in)          :: dest
+    CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
+    CHARACTER(len=*), INTENT(in) :: op    ! ('put', 'updt')
+    INTEGER, INTENT(in)          :: comm
+!
+    INTEGER :: ierr
+    INTEGER :: nrank, nnz, nnz_rem
+    INTEGER :: i, s, idx, bufsize, position
+    CHARACTER(len=1), ALLOCATABLE :: sbuf(:), rbuf(:)
+    INTEGER, ALLOCATABLE :: irow(:), cols(:)
+!--------------------------------------------------------------------------
+!                    1.0  Prologue
+!
+    nrank = mat%rank
+    nnz = get_count(mat)
+    CALL mpi_sendrecv(nnz, 1, MPI_INTEGER, dest, 0, &
+         &            nnz_rem, 1, MPI_INTEGER, dest, 0, &
+         &            comm, MPI_STATUS_IGNORE, ierr)
+!--------------------------------------------------------------------------
+!                    2.0  Send or sendrecv
+!
+    IF(extyp.EQ.'send' .OR. extyp.EQ.'sendrecv') THEN
+!
+!    Allocate packed send buffer
+       bufsize = 0
+       CALL mpi_pack_size(nrank+1, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s
+       CALL mpi_pack_size(nnz, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s
+       CALL mpi_pack_size(nnz, mpi_type, comm, s, ierr); bufsize=bufsize+s
+       ALLOCATE(sbuf(bufsize))
+!
+!    Obtain matrix CSR arrays and pack
+       CALL to_mat(mat, nlkeep=.TRUE.)
+       position = 0
+       CALL mpi_pack(mat%irow, nrank+1, MPI_INTEGER, sbuf, bufsize, position, comm, ierr)
+       CALL mpi_pack(mat%cols, nnz, MPI_INTEGER, sbuf, bufsize, position, comm, ierr)
+       CALL mpi_pack(mat%val, nnz, mpi_type, sbuf, bufsize, position, comm, ierr)
+       DEALLOCATE(mat%irow)
+       DEALLOCATE(mat%cols)
+       DEALLOCATE(mat%val)
+!
+!   Communicate packed buffer
+       IF(extyp.EQ.'send') THEN
+          CALL mpi_send(sbuf, position, MPI_PACKED, dest, 0, comm, ierr)
+          DEALLOCATE(sbuf)
+       END IF
+    END IF
+!--------------------------------------------------------------------------
+!                    3.0  Sendrecv or recv
+!
+    IF(extyp.EQ.'recv' .OR. extyp.EQ.'sendrecv') THEN
+!
+!    Allocate unpacked received buffer
+       bufsize = 0
+       CALL mpi_pack_size(nrank+1, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s
+       CALL mpi_pack_size(nnz_rem, MPI_INTEGER, comm, s, ierr); bufsize=bufsize+s
+       CALL mpi_pack_size(nnz_rem, mpi_type, comm, s, ierr); bufsize=bufsize+s
+       ALLOCATE(rbuf(bufsize))
+!
+!    Communicate packed buffer
+       IF(extyp.EQ.'recv') THEN
+          CALL mpi_recv(rbuf, bufsize, MPI_PACKED, dest, 0, comm, MPI_STATUS_IGNORE, ierr)
+       ELSE IF(extyp.EQ.'sendrecv') THEN
+          CALL mpi_sendrecv(sbuf, position, MPI_PACKED, dest, 0, &
+               &            rbuf, bufsize, MPI_PACKED, dest, 0, &
+               &            comm, MPI_STATUS_IGNORE, ierr)
+          DEALLOCATE(sbuf)
+       END IF
+!
+!    Unpacked rbuf
+       ALLOCATE(irow(nrank+1))
+       ALLOCATE(cols(nnz_rem))
+       ALLOCATE(val(nnz_rem))
+       position = 0
+       CALL mpi_unpack(rbuf, bufsize, position, irow, nrank+1, MPI_INTEGER, comm, ierr)
+       CALL mpi_unpack(rbuf, bufsize, position, cols, nnz_rem, MPI_INTEGER, comm, ierr)
+       CALL mpi_unpack(rbuf, bufsize, position, val, nnz_rem, mpi_type, comm, ierr)
+       DEALLOCATE(rbuf)
+!
+!    Update/replace sparse matrix
+       DO i=1,nrank
+          DO idx=irow(i),irow(i+1)-1
+             IF(op.EQ.'updt') THEN
+                CALL updtmat(mat, i, cols(idx), val(idx))
+             ELSE IF(op.EQ.'put') THEN
+                CALL putele(mat, i, cols(idx), val(idx))
+             END IF
+          END DO
+       END DO
+       DEALLOCATE(irow)
+       DEALLOCATE(cols)
+       DEALLOCATE(val)
+!      
+    END IF
+!--------------------------------------------------------------------------
diff --git a/src/pardiso_mod.f90 b/src/pardiso_mod.f90
new file mode 100644
index 0000000..4491b9c
--- /dev/null
+++ b/src/pardiso_mod.f90
@@ -0,0 +1,1605 @@
+!>
+!> @file pardiso_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE pardiso_bsplines
+!
+!    PARDISO_BSPLINES: Simple interface to the sparse direct solver PARDISO
+!                      (MKL version).
+!
+!    T.M. Tran, CRPP-EPFL
+!    November 2010
+!
+  USE sparse
+  IMPLICIT NONE
+!
+  TYPE pardiso_param
+     INTEGER   :: error, mtype, msglvl, phase, maxfct, mnum, nrhs
+     INTEGER   :: iparm(64)
+     INTEGER*8 :: pt(64)
+  END TYPE pardiso_param
+!
+  TYPE pardiso_mat
+     INTEGER :: rank, nnz
+     INTEGER :: nterms, kmat
+     LOGICAL :: nlsym
+     LOGICAL :: nlpos
+     LOGICAL :: nlforce_zero
+     TYPE(spmat), POINTER      :: mat => NULL()
+     INTEGER, POINTER          :: cols(:) => NULL()
+     INTEGER, POINTER          :: irow(:) => NULL()
+     INTEGER, POINTER          :: perm(:) => NULL()
+     DOUBLE PRECISION, POINTER :: val(:) => NULL()
+     TYPE(pardiso_param)       :: p
+  END TYPE pardiso_mat
+!
+  TYPE zpardiso_mat
+     INTEGER :: rank, nnz
+     INTEGER :: nterms, kmat
+     LOGICAL :: nlsym
+     LOGICAL :: nlherm
+     LOGICAL :: nlpos
+     LOGICAL :: nlforce_zero
+     TYPE(zspmat), POINTER   :: mat => NULL()
+     INTEGER, POINTER        :: cols(:) => NULL()
+     INTEGER, POINTER        :: irow(:) => NULL()
+     INTEGER, POINTER        :: perm(:) => NULL()
+     DOUBLE COMPLEX, POINTER :: val(:) => NULL()
+     TYPE(pardiso_param)     :: p
+  END TYPE zpardiso_mat
+!
+  INTERFACE init
+     MODULE PROCEDURE init_pardiso_mat, init_zpardiso_mat
+  END INTERFACE init
+!
+  INTERFACE clear_mat
+     MODULE PROCEDURE clear_pardiso_mat, clear_zpardiso_mat
+  END INTERFACE clear_mat
+!
+  INTERFACE updtmat
+     MODULE PROCEDURE updt_pardiso_mat, updt_zpardiso_mat
+  END INTERFACE updtmat
+!
+  INTERFACE putele
+     MODULE PROCEDURE putele_pardiso_mat, putele_zpardiso_mat
+  END INTERFACE putele
+!
+  INTERFACE getele
+     MODULE PROCEDURE getele_pardiso_mat, getele_zpardiso_mat
+  END INTERFACE getele
+!
+  INTERFACE putrow
+     MODULE PROCEDURE putrow_pardiso_mat, putrow_zpardiso_mat
+  END INTERFACE putrow
+!
+  INTERFACE getrow
+     MODULE PROCEDURE getrow_pardiso_mat, getrow_zpardiso_mat
+  END INTERFACE getrow
+!
+  INTERFACE putcol
+     MODULE PROCEDURE putcol_pardiso_mat, putcol_zpardiso_mat
+  END INTERFACE putcol
+!
+  INTERFACE getcol
+     MODULE PROCEDURE getcol_pardiso_mat, getcol_zpardiso_mat
+  END INTERFACE getcol
+!
+  INTERFACE get_count
+     MODULE PROCEDURE get_count_pardiso_mat, get_count_zpardiso_mat
+  END INTERFACE get_count
+!
+  INTERFACE to_mat
+     MODULE PROCEDURE to_pardiso_mat, to_zpardiso_mat
+  END INTERFACE to_mat
+!
+  INTERFACE reord_mat
+     MODULE PROCEDURE reord_pardiso_mat, reord_zpardiso_mat
+  END INTERFACE reord_mat
+!
+  INTERFACE numfact
+     MODULE PROCEDURE numfact_pardiso_mat, numfact_zpardiso_mat
+  END INTERFACE numfact
+!
+  INTERFACE factor
+     MODULE PROCEDURE factor_pardiso_mat, factor_zpardiso_mat
+  END INTERFACE factor
+!
+  INTERFACE bsolve
+     MODULE PROCEDURE bsolve_pardiso_mat1,  bsolve_pardiso_matn, &
+          &           bsolve_zpardiso_mat1, bsolve_zpardiso_matn
+  END INTERFACE bsolve
+!
+  INTERFACE vmx
+     MODULE PROCEDURE vmx_pardiso_mat,  vmx_pardiso_matn, &
+          &           vmx_zpardiso_mat, vmx_zpardiso_matn
+  END INTERFACE vmx
+!
+  INTERFACE destroy
+     MODULE PROCEDURE destroy_pardiso_mat, destroy_zpardiso_mat
+ END INTERFACE destroy
+!
+ INTERFACE putmat
+    MODULE PROCEDURE put_pardiso_mat, put_zpardiso_mat
+ END INTERFACE putmat
+!
+ INTERFACE getmat
+    MODULE PROCEDURE get_pardiso_mat, get_zpardiso_mat
+ END INTERFACE getmat
+!
+ INTERFACE mcopy
+    MODULE PROCEDURE mcopy_pardiso_mat, mcopy_zpardiso_mat
+ END INTERFACE mcopy
+!
+ INTERFACE maddto
+    MODULE PROCEDURE maddto_pardiso_mat, maddto_zpardiso_mat
+ END INTERFACE maddto
+!
+ INTERFACE psum_mat
+    MODULE PROCEDURE psum_pardiso_mat, psum_zpardiso_mat
+ END INTERFACE psum_mat
+!
+ INTERFACE p2p_mat
+    MODULE PROCEDURE p2p_pardiso_mat, p2p_zpardiso_mat
+ END INTERFACE p2p_mat
+!
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_pardiso_mat(n, nterms, mat, kmat, nlsym, nlpos, &
+       &                      nlforce_zero)
+!
+!   Initialize an empty sparse pardiso matrix
+!
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE(pardiso_mat)             :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlpos
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+!
+    mat%rank = n
+    mat%nterms = nterms
+    mat%nnz = 0
+    mat%nlsym = .FALSE.
+    mat%nlpos = .TRUE.
+    mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    IF(PRESENT(nlsym)) mat%nlsym = nlsym
+    IF(PRESENT(nlpos)) mat%nlpos = nlpos
+    IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
+!
+    IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
+    ALLOCATE(mat%mat)
+    CALL init(n, mat%mat)
+!
+    mat%p%iparm = 0
+    CALL setup_pardiso(mat%p%iparm)
+    mat%p%maxfct = 1  ! Max number of factorizations
+    mat%p%mnum   = 1  ! Actual matrix, shoild be 1<= num <= maxfct
+    mat%p%error  = 0  ! initialize error flag
+    mat%p%msglvl = 1  ! print statistical information (0: no stat)
+    IF(mat%nlsym) THEN
+       IF(mat%nlpos) THEN
+          mat%p%mtype = 2   ! symmetric, positive definite
+       ELSE
+          mat%p%mtype = -2  ! symmetric, indefinite
+       END IF
+    ELSE
+       mat%p%mtype = 11  ! unsymmetric
+    END IF
+    mat%p%nrhs    = 1  ! number of RHSs
+    mat%p%pt(1:64) = 0 ! Initialize Pardiso address pointer (handle)
+!
+  CONTAINS
+    SUBROUTINE setup_pardiso(iparm)
+      INTEGER :: iparm(:)
+      iparm(1) = 1 ! no solver default
+!!$      iparm(2) = 2 ! fill-in reordering from METIS
+      iparm(2) = 0 ! Minimum degree fill-in reordering
+      iparm(3) = 1 ! numbers of processors
+      iparm(4) = 0 ! no iterative-direct algorithm
+      iparm(5) = 0 ! no user fill-in reducing permutation
+      iparm(6) = 0 ! =0 solution on the first n compoments of x
+      iparm(7) = 0 ! not in use
+      iparm(8) = 9 ! numbers of iterative refinement steps
+      iparm(9) = 0 ! not in use
+      iparm(10) = 13 ! perturbe the pivot elements with 1E-13
+      iparm(11) = 1 ! use nonsymmetric permutation and scaling MPS
+      iparm(12) = 0 ! not in use
+      iparm(13) = 0 ! not in use
+      iparm(14) = 0 ! Output: number of perturbed pivots
+      iparm(15) = 0 ! not in use
+      iparm(16) = 0 ! not in use
+      iparm(17) = 0 ! not in use
+      iparm(18) = -1 ! Output: number of nonzeros in the factor LU
+      iparm(19) = -1 ! Output: Mflops for LU factorization
+      iparm(20) = 0 ! Output: Numbers of CG Iterations
+    END SUBROUTINE setup_pardiso
+  END SUBROUTINE init_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_zpardiso_mat(n, nterms, mat, kmat, nlsym, nlherm, &
+       &                       nlpos, nlforce_zero)
+!
+!   Initialize an empty sparse pardiso matrix
+!
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE(zpardiso_mat)            :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlherm
+    LOGICAL, OPTIONAL, INTENT(in) :: nlpos
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+!
+    mat%rank = n
+    mat%nterms = nterms
+    mat%nnz = 0
+    mat%nlsym = .FALSE.
+    mat%nlherm = .FALSE.
+    mat%nlpos = .TRUE.
+    mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    IF(PRESENT(nlsym)) mat%nlsym = nlsym
+    IF(PRESENT(nlherm)) mat%nlherm = nlherm
+    IF(PRESENT(nlpos)) mat%nlpos = nlpos
+    IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
+!
+    IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
+    ALLOCATE(mat%mat)
+    CALL init(n, mat%mat)
+!
+    mat%p%iparm = 0
+    CALL setup_pardiso(mat%p%iparm)
+    mat%p%maxfct = 1  ! Max number of factorizations
+    mat%p%mnum   = 1  ! Actual matrix, shoild be 1<= num <= maxfct
+    mat%p%error  = 0  ! initialize error flag
+    mat%p%msglvl = 1  ! print statistical information (0: no stat)
+    IF(mat%nlherm) THEN
+       IF(mat%nlpos) THEN
+          mat%p%mtype = 4   ! hermitian, positive definite
+       ELSE
+          mat%p%mtype = -4  ! hermitian, indefinite
+       END IF
+    ELSE IF(mat%nlsym) THEN
+       mat%p%mtype = 6   ! symmetric
+    ELSE
+       mat%p%mtype = 13  ! unsymmetric
+    END IF
+    mat%p%nrhs    = 1  ! number of RHSs
+    mat%p%pt(1:64) = 0 ! Initialize Pardiso address pointer (handle)
+!
+  CONTAINS
+    SUBROUTINE setup_pardiso(iparm)
+      INTEGER :: iparm(:)
+      iparm(1) = 1 ! no solver default
+!!$      iparm(2) = 2 ! fill-in reordering from METIS
+      iparm(2) = 0 ! Minimum degree fill-in reordering
+      iparm(3) = 1 ! numbers of processors
+      iparm(4) = 0 ! no iterative-direct algorithm
+      iparm(5) = 0 ! no user fill-in reducing permutation
+      iparm(6) = 0 ! =0 solution on the first n compoments of x
+      iparm(7) = 0 ! not in use
+      iparm(8) = 9 ! numbers of iterative refinement steps
+      iparm(9) = 0 ! not in use
+      iparm(10) = 13 ! perturbe the pivot elements with 1E-13
+      iparm(11) = 1 ! use nonsymmetric permutation and scaling MPS
+      iparm(12) = 0 ! not in use
+      iparm(13) = 0 ! not in use
+      iparm(14) = 0 ! Output: number of perturbed pivots
+      iparm(15) = 0 ! not in use
+      iparm(16) = 0 ! not in use
+      iparm(17) = 0 ! not in use
+      iparm(18) = -1 ! Output: number of nonzeros in the factor LU
+      iparm(19) = -1 ! Output: Mflops for LU factorization
+      iparm(20) = 0 ! Output: Numbers of CG Iterations
+    END SUBROUTINE setup_pardiso
+  END SUBROUTINE init_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE clear_pardiso_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(pardiso_mat) :: mat
+!
+    mat%val = 0.0d0
+    mat%perm = 0
+  END SUBROUTINE clear_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE clear_zpardiso_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(zpardiso_mat) :: mat
+!
+    mat%val = (0.0d0, 0.0d0)
+    mat%perm = 0
+  END SUBROUTINE clear_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_pardiso_mat(mat, i, j, val)
+!
+!   Update element Aij of pardiso  matrix
+!
+    TYPE(pardiso_mat)            :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: k, s, e
+!
+    IF(mat%nlsym) THEN   ! Store only upper part for symmetric matrices
+       IF(i.GT.j) RETURN
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL updtmat(mat%mat, i, j, val)
+    ELSE
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       k = isearch(mat%cols(s:e), j)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  mat%val(s+k)+val
+       ELSE
+          WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
+       END IF
+    END IF
+  END SUBROUTINE updt_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_zpardiso_mat(mat, i, j, val)
+!
+!   Update element Aij of pardiso  matrix
+!
+    TYPE(zpardiso_mat)         :: mat
+    INTEGER, INTENT(in)        :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    INTEGER :: k, s, e
+!
+    IF(mat%nlherm .OR. mat%nlsym) THEN   ! Store only upper part
+       IF(i.GT.j) RETURN
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL updtmat(mat%mat, i, j, val)
+    ELSE
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       k = isearch(mat%cols(s:e), j)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  mat%val(s+k)+val
+       ELSE
+          WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
+       END IF
+    END IF
+  END SUBROUTINE updt_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_pardiso_mat(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(pardiso_mat)            :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: iput, jput
+    INTEGER :: k, s, e
+!
+    iput = i
+    jput = j
+    IF(mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iput = j
+          jput = i
+       END IF
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL putele(mat%mat, iput, jput, val, &
+            &      nlforce_zero=mat%nlforce_zero)
+    ELSE
+       s = mat%irow(iput)
+       e = mat%irow(iput+1)-1
+       k = isearch(mat%cols(s:e), jput)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  val
+       ELSE
+          IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
+             WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
+             STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
+          END IF
+       END IF
+   END IF
+  END SUBROUTINE putele_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_zpardiso_mat(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(zpardiso_mat)         :: mat
+    INTEGER, INTENT(in)        :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    DOUBLE COMPLEX :: valput
+    INTEGER :: iput, jput
+    INTEGER :: k, s, e
+!
+    iput = i
+    jput = j
+    valput = val
+    IF(mat%nlherm .OR. mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iput = j
+          jput = i
+          IF(mat%nlherm) THEN
+             valput = CONJG(val)
+          ELSE
+             valput = val
+          END IF
+       END IF
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL putele(mat%mat, iput, jput, valput, &
+            &      nlforce_zero=mat%nlforce_zero)
+    ELSE
+       s = mat%irow(iput)
+       e = mat%irow(iput+1)-1
+       k = isearch(mat%cols(s:e), jput)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  valput
+       ELSE
+          IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
+             WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
+             STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
+          END IF
+       END IF
+   END IF
+ END SUBROUTINE putele_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_pardiso_mat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(pardiso_mat)             :: mat
+    INTEGER, INTENT(in)           :: i, j
+    DOUBLE PRECISION, INTENT(out) :: val
+    INTEGER :: iget, jget
+    INTEGER :: k, s, e
+!
+    iget = i
+    jget = j
+    IF(mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iget = j
+          jget = i
+       END IF
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL getele(mat%mat, iget, jget, val)
+    ELSE
+       s = mat%irow(iget)
+       e = mat%irow(iget+1)-1
+       k = isearch(mat%cols(s:e), jget)
+       IF( k.GE.0 ) THEN
+          val =mat%val(s+k) 
+       ELSE
+          val = 0.0d0   ! Assume zero val if not found
+       END IF
+    END IF
+  END SUBROUTINE getele_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_zpardiso_mat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(zpardiso_mat)          :: mat
+    INTEGER, INTENT(in)         :: i, j
+    DOUBLE COMPLEX, INTENT(out) :: val
+    DOUBLE COMPLEX :: valget
+    INTEGER :: iget, jget
+    INTEGER :: k, s, e
+!
+    iget = i
+    jget = j
+    IF(mat%nlherm .OR. mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iget = j
+          jget = i
+       END IF
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL getele(mat%mat, iget, jget, valget)
+    ELSE
+       s = mat%irow(iget)
+       e = mat%irow(iget+1)-1
+       k = isearch(mat%cols(s:e), jget)
+       IF( k.GE.0 ) THEN
+          valget =mat%val(s+k) 
+       ELSE
+          valget = (0.0d0,0.0d0)   ! Assume zero val if not found
+       END IF
+    END IF
+    val = valget
+    IF( i.GT.j ) THEN
+       IF(mat%nlherm) THEN 
+          val = CONJG(valget)
+       END IF
+    END IF
+  END SUBROUTINE getele_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_pardiso_mat(amat, i, arr)
+!
+! Put a row into sparse matrix
+!
+    TYPE(pardiso_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)              :: i
+    DOUBLE PRECISION, INTENT(in)     :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL putele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE putrow_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_zpardiso_mat(amat, i, arr)
+!
+! Put a row into sparse matrix
+!
+    TYPE(zpardiso_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)               :: i
+    DOUBLE COMPLEX, INTENT(in)        :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL putele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE putrow_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_pardiso_mat(amat, i, arr)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(pardiso_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)            :: i
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL getele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE getrow_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_zpardiso_mat(amat, i, arr)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(zpardiso_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)             :: i
+    DOUBLE COMPLEX, INTENT(out)     :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL getele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE getrow_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_pardiso_mat(amat, j, arr)
+!
+! Put a column into sparse matrix
+!
+    TYPE(pardiso_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)              :: j
+    DOUBLE PRECISION, INTENT(in)     :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%rank
+       CALL putele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE putcol_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_zpardiso_mat(amat, j, arr)
+!
+! Put a column into sparse matrix
+!
+    TYPE(zpardiso_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)               :: j
+    DOUBLE COMPLEX, INTENT(in)        :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%rank
+       CALL putele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE putcol_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_pardiso_mat(amat, j, arr)
+!
+!   Get a column from sparse matrix
+!
+    TYPE(pardiso_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)            :: j
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%rank
+       CALL getele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_zpardiso_mat(amat, j, arr)
+!
+!   Get a column from sparse matrix
+!
+    TYPE(zpardiso_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)             :: j
+    DOUBLE COMPLEX, INTENT(out)     :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%rank
+       CALL getele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_pardiso_mat(mat, nnz)
+!
+!   Number of non-zeros in sparse matrix
+!
+    TYPE(pardiso_mat)              :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+    INTEGER :: i
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       get_count_pardiso_mat = get_count(mat%mat, nnz)
+    ELSE
+       get_count_pardiso_mat = mat%nnz
+       IF(PRESENT(nnz)) THEN
+          DO i=1,mat%rank
+             nnz(i) = mat%irow(i+1)-mat%irow(i)
+          END DO
+       END IF
+    END IF
+  END FUNCTION get_count_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_zpardiso_mat(mat, nnz)
+!
+!   Number of non-zeros in sparse matrix
+!
+    TYPE(zpardiso_mat)             :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+    INTEGER :: i
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       get_count_zpardiso_mat = get_count(mat%mat, nnz)
+    ELSE
+       get_count_zpardiso_mat = mat%nnz
+       IF(PRESENT(nnz)) THEN
+          DO i=1,mat%rank
+             nnz(i) = mat%irow(i+1)-mat%irow(i)
+          END DO
+       END IF
+    END IF
+  END FUNCTION get_count_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE to_pardiso_mat(mat, nlkeep)
+!
+!   Convert linked list spmat to pardiso matrice structure
+!
+    TYPE(pardiso_mat)             :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nlkeep
+    INTEGER :: i, nnz, rank, s, e
+    LOGICAL :: nlclean
+!
+    nlclean = .TRUE.
+    IF(PRESENT(nlkeep)) THEN
+       nlclean = .NOT. nlkeep
+    END IF
+!
+!    Allocate the Pardiso matrix structure
+!
+    nnz = get_count(mat)
+    rank = mat%rank
+    mat%nnz = nnz
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%val(nnz))
+    ALLOCATE(mat%perm(rank))
+    ALLOCATE(mat%irow(rank+1))
+    ALLOCATE(mat%cols(nnz))
+!
+!    Fill Pardiso structure and optionnaly deallocate the sparse rows
+!
+    mat%irow = 1
+    DO i=1,rank
+       mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
+       IF(nlclean) CALL destroy(mat%mat%row(i))
+    END DO
+    IF(nlclean) DEALLOCATE(mat%mat)
+  END SUBROUTINE to_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE to_zpardiso_mat(mat, nlkeep)
+!
+!   Convert linked list spmat to pardiso matrice structure
+!
+    TYPE(zpardiso_mat) :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nlkeep
+    INTEGER :: i, nnz, rank, s, e
+    LOGICAL :: nlclean
+!
+    nlclean = .TRUE.
+    IF(PRESENT(nlkeep)) THEN
+       nlclean = .NOT. nlkeep
+    END IF
+!
+!    Allocate the Pardiso matrix structure
+!
+    nnz = get_count(mat)
+    rank = mat%rank
+    mat%nnz = nnz
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%val(nnz))
+    ALLOCATE(mat%perm(rank))
+    ALLOCATE(mat%irow(rank+1))
+    ALLOCATE(mat%cols(nnz))
+!
+!    Fill Pardiso structure and deallocate the sparse rows
+!
+    mat%irow = 1
+    DO i=1,rank
+       mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
+       IF(nlclean) CALL destroy(mat%mat%row(i))
+    END DO
+    IF(nlclean) DEALLOCATE(mat%mat)
+  END SUBROUTINE to_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE reord_pardiso_mat(mat, nlmetis, debug)
+!
+!   Reordering and symbolic factorization
+!
+    TYPE(pardiso_mat)             :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    DOUBLE PRECISION :: dummy
+!
+    mat%p%iparm(2) = 0                ! use minimum degree algorithm
+    IF(PRESENT(nlmetis)) THEN
+       IF(nlmetis) mat%p%iparm(2) = 2 ! use METIS nested dissection
+    END IF
+    mat%p%iparm(5)= 2   ! return the permutation vector in mat%perm
+    mat%p%phase   = 11  ! Reordering and symbolic factorization
+    mat%p%msglvl  = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%p%msglvl = 1
+    END IF
+    CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+         &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+         &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+         &       dummy, dummy, mat%p%error)
+    IF(mat%p%error.NE.0) THEN
+       WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%error
+    END IF
+  END SUBROUTINE reord_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE reord_zpardiso_mat(mat, nlmetis, debug)
+!
+!   Reordering and symbolic factorization
+!
+    TYPE(zpardiso_mat)            :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    DOUBLE COMPLEX :: dummy
+!
+    mat%p%iparm(2) = 0                ! use minimum degree algorithm
+    IF(PRESENT(nlmetis)) THEN
+       IF(nlmetis) mat%p%iparm(2) = 2 ! use METIS nested dissection
+    END IF
+    mat%p%iparm(5)= 2   ! return the permutation vector in mat%perm
+    mat%p%phase   = 11  ! Reordering and symbolic factorization
+    mat%p%msglvl  = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%p%msglvl = 1
+    END IF
+    CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+         &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+         &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+         &       dummy, dummy, mat%p%error)
+    IF(mat%p%error.NE.0) THEN
+       WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%error
+    END IF
+  END SUBROUTINE reord_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE numfact_pardiso_mat(mat, debug)
+!
+!   Numerical factorization
+!
+    TYPE(pardiso_mat)             :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    DOUBLE PRECISION :: dummy
+!
+    mat%p%phase  = 22 ! Numerical factorization
+    mat%p%msglvl = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%p%msglvl = 1
+    END IF
+    CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+         &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+         &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+         &       dummy, dummy, mat%p%error)
+    IF(mat%p%error.NE.0) THEN
+       WRITE(*,'(a,i4)') 'FACTOR: Factorization failed with error', mat%p%error
+    END IF
+  END SUBROUTINE numfact_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE numfact_zpardiso_mat(mat, debug)
+!
+!   Numerical factorization
+!
+    TYPE(zpardiso_mat)            :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    DOUBLE COMPLEX :: dummy
+!
+    mat%p%phase  = 22 ! Numerical factorization
+    mat%p%msglvl = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) mat%p%msglvl = 1
+    END IF
+    CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+         &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+         &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+         &       dummy, dummy, mat%p%error)
+    IF(mat%p%error.NE.0) THEN
+       WRITE(*,'(a,i4)') 'FACTOR: Factorization failed with error', mat%p%error
+    END IF
+  END SUBROUTINE numfact_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE factor_pardiso_mat(mat, nlreord, nlmetis, debug)
+!
+!  Factor (create  +reorder + factor) a pardiso_mat matrix
+!
+    TYPE(pardiso_mat)             :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlreord
+    LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    LOGICAL :: mlreord
+!----------------------------------------------------------------------
+!               1.0  Creation from the sparse rows
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               2.0  Reordering and symbolic factorization phase
+!
+    mlreord = .TRUE.
+    IF(PRESENT(nlreord)) mlreord = nlreord
+    IF(mlreord) THEN
+       CALL reord_mat(mat, nlmetis, debug)
+    END IF
+!----------------------------------------------------------------------
+!               3.0  Numerical factorization
+!
+    CALL numfact(mat, debug)
+  END SUBROUTINE factor_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE factor_zpardiso_mat(mat, nlreord, nlmetis, debug)
+!
+!  Factor (create  +reorder + factor) a pardiso_mat matrix
+!
+    TYPE(zpardiso_mat)            :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlreord
+    LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    LOGICAL :: mlreord
+!----------------------------------------------------------------------
+!               1.0  Creation from the sparse rows
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               2.0  Reordering and symbolic factorization phase
+!
+    mlreord = .TRUE.
+    IF(PRESENT(nlreord)) mlreord = nlreord
+    IF(mlreord) THEN
+       CALL reord_mat(mat, nlmetis, debug)
+    END IF
+!----------------------------------------------------------------------
+!               3.0  Numerical factorization
+!
+    CALL numfact(mat, debug)
+  END SUBROUTINE factor_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_pardiso_mat1(mat, rhs, sol, nref, debug)
+!
+!   Backsolve, using Pardiso
+!
+    TYPE(pardiso_mat)             :: mat
+    DOUBLE PRECISION              :: rhs(:)
+    DOUBLE PRECISION, OPTIONAL    :: sol(:)
+    INTEGER, OPTIONAL             :: nref
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    DOUBLE PRECISION :: dummy(SIZE(rhs))
+!
+    mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) mat%p%iparm(8) = nref
+    mat%p%phase = 33   ! Backsolve
+    mat%p%nrhs = 1
+    mat%p%msglvl = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) THEN 
+          mat%p%msglvl = 1
+       END IF
+    END IF
+    IF(PRESENT(sol)) THEN
+       mat%p%iparm(6) = 0
+       CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+            &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+            &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+            &       rhs, sol, mat%p%error)
+    ELSE
+       mat%p%iparm(6) = 1
+       CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+            &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+            &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+            &       rhs, dummy, mat%p%error)
+    END IF
+    IF(mat%p%error.NE.0) THEN
+       WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error
+    END IF
+   END SUBROUTINE bsolve_pardiso_mat1
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_zpardiso_mat1(mat, rhs, sol, nref, debug)
+!
+!   Backsolve, using Pardiso
+!
+    TYPE(zpardiso_mat)            :: mat
+    DOUBLE COMPLEX                :: rhs(:)
+    DOUBLE COMPLEX, OPTIONAL      :: sol(:)
+    INTEGER, OPTIONAL             :: nref
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    DOUBLE COMPLEX :: dummy(SIZE(rhs))
+!
+    mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) mat%p%iparm(8) = nref
+    mat%p%phase = 33   ! Backsolve
+    mat%p%nrhs = 1
+    mat%p%msglvl = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) THEN 
+          mat%p%msglvl = 1
+       END IF
+    END IF
+    IF(PRESENT(sol)) THEN
+       mat%p%iparm(6) = 0
+       CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+            &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+            &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+            &       rhs, sol, mat%p%error)
+    ELSE
+       mat%p%iparm(6) = 1
+       CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+            &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+            &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+            &       rhs, dummy, mat%p%error)
+    END IF
+    IF(mat%p%error.NE.0) THEN
+       WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error
+    END IF
+  END SUBROUTINE bsolve_zpardiso_mat1
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_pardiso_matn(mat, rhs, sol, nref, debug)
+!
+!   Backsolve, using Pardiso, multiple RHS
+!
+    TYPE(pardiso_mat)             :: mat
+    DOUBLE PRECISION              :: rhs(:,:)
+    DOUBLE PRECISION, OPTIONAL    :: sol(:,:)
+    INTEGER, OPTIONAL             :: nref
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    DOUBLE PRECISION :: dummy(SIZE(rhs,1),SIZE(rhs,2))
+!
+    mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) mat%p%iparm(8) = nref
+    mat%p%phase = 33   ! Backsolve
+    mat%p%nrhs = SIZE(rhs,2)
+    mat%p%msglvl = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) THEN 
+          mat%p%msglvl = 1
+       END IF
+    END IF
+    IF(PRESENT(sol)) THEN
+       mat%p%iparm(6) = 0
+       CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+            &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+            &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+            &       rhs, sol, mat%p%error)
+    ELSE
+       mat%p%iparm(6) = 1
+       CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+            &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+            &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+            &       rhs, dummy, mat%p%error)
+    END IF
+    IF(mat%p%error.NE.0) THEN
+       WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error
+    END IF
+  END SUBROUTINE bsolve_pardiso_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_zpardiso_matn(mat, rhs, sol, nref, debug)
+!
+!   Backsolve, using Pardiso, multiple RHS
+!
+    TYPE(zpardiso_mat)            :: mat
+    DOUBLE COMPLEX                :: rhs(:,:)
+    DOUBLE COMPLEX, OPTIONAL      :: sol(:,:)
+    INTEGER, OPTIONAL             :: nref
+    LOGICAL, OPTIONAL, INTENT(in) :: debug
+    DOUBLE COMPLEX :: dummy(SIZE(rhs,1),SIZE(rhs,2))
+!
+    mat%p%iparm(8) = 0 ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) mat%p%iparm(8) = nref
+    mat%p%phase = 33   ! Backsolve
+    mat%p%nrhs = SIZE(rhs,2)
+    mat%p%msglvl = 0
+    IF(PRESENT(debug)) THEN
+       IF(debug) THEN 
+          mat%p%msglvl = 1
+       END IF
+    END IF
+    IF(PRESENT(sol)) THEN
+       mat%p%iparm(6) = 0
+       CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+            &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+            &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+            &       rhs, sol, mat%p%error)
+    ELSE
+       mat%p%iparm(6) = 1
+       CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+            &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+            &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+            &       rhs, dummy, mat%p%error)
+    END IF
+    IF(mat%p%error.NE.0) THEN
+       WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%error
+    END IF
+  END SUBROUTINE bsolve_zpardiso_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_pardiso_mat(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(pardiso_mat)             :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:)
+    DOUBLE PRECISION              :: yarr(SIZE(xarr))
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    CHARACTER(len=6) :: matdescra
+    INTEGER :: n, i, j
+!
+    n = mat%rank
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &          beta, yarr)
+#else
+    yarr = 0.0d0
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + mat%val(j)*xarr(i)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_zpardiso_mat(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(zpardiso_mat)          :: mat
+    DOUBLE COMPLEX, INTENT(in)  :: xarr(:)
+    DOUBLE COMPLEX              :: yarr(SIZE(xarr))
+    DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
+    INTEGER :: n, i, j
+    CHARACTER(len=6) :: matdescra
+!
+    n = mat%rank
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE IF(mat%nlherm) THEN
+       matdescra = 'hun'
+    ELSE
+       matdescra = 'g'
+    END IF
+    CALL mkl_zcsrmv('N', n, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &          beta, yarr)
+#else
+    yarr = (0.0d0,0.0d0)
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + mat%val(j)*xarr(i)
+          END DO
+       ELSE IF(mat%nlherm) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + CONJG(mat%val(j))*xarr(i)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_pardiso_matn(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(pardiso_mat)             :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
+    DOUBLE PRECISION              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    INTEGER :: n, nrhs, i, j
+    CHARACTER(len=6) :: matdescra
+!
+    n = mat%rank
+    nrhs = SIZE(xarr,2)
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,&
+         &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &           n, beta, yarr, n)
+#else
+    yarr = 0.0d0
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &                + mat%val(j)*xarr(i,:)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_pardiso_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_zpardiso_matn(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(zpardiso_mat)          :: mat
+    DOUBLE COMPLEX, INTENT(in)  :: xarr(:,:)
+    DOUBLE COMPLEX              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
+    INTEGER :: n, nrhs, i, j
+    CHARACTER(len=6) :: matdescra
+!
+    n = mat%rank
+    nrhs = SIZE(xarr,2)
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE IF(mat%nlherm) THEN
+       matdescra = 'hun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_zcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, n, &
+         &          beta, yarr, n)
+#else
+    yarr = (0.0d0,0.0d0)
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &                + mat%val(j)*xarr(i,:)
+          END DO
+       ELSE IF(mat%nlherm) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &               + CONJG(mat%val(j))*xarr(i,:)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_zpardiso_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_pardiso_mat(mat)
+!
+!  Deallocate the sparse matrix mat
+!
+    TYPE(pardiso_mat) :: mat
+    DOUBLE PRECISION :: dummy
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL destroy(mat%mat)
+       DEALLOCATE(mat%mat)
+    END IF
+!
+    IF(mat%p%phase .GT. 0) THEN
+       mat%p%phase   = 0  ! Release memory for factors
+       CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+            &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+            &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+            &       dummy, dummy, mat%p%error)
+       IF(mat%p%error.NE.0) THEN
+          WRITE(*,'(a,i4)') 'DESTROY: Mem release failed with error', mat%p%error
+       END IF
+    END IF
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+  END SUBROUTINE destroy_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_zpardiso_mat(mat)
+!
+!  Deallocate the sparse matrix mat
+!
+    TYPE(zpardiso_mat) :: mat
+    DOUBLE COMPLEX :: dummy
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL destroy(mat%mat)
+       DEALLOCATE(mat%mat)
+    END IF
+!
+    IF(mat%p%phase .GT. 0) THEN
+       mat%p%phase   = 0  ! Release memory for factors
+       CALL pardiso(mat%p%pt, mat%p%maxfct, mat%p%mnum, mat%p%mtype, &
+            &       mat%p%phase, mat%rank, mat%val, mat%irow, mat%cols, &
+            &       mat%perm, mat%p%nrhs, mat%p%iparm, mat%p%msglvl, &
+            &       dummy, dummy, mat%p%error)
+       IF(mat%p%error.NE.0) THEN
+          WRITE(*,'(a,i4)') 'DESTROY: Mem release failed with error', mat%p%error
+       END IF
+    END IF
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+  END SUBROUTINE destroy_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE put_pardiso_mat(fid, label, mat, str)
+!
+!   Write matrix to hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(pardiso_mat)                      :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'NNZ',  mat%nnz)
+    CALL attach(fid, label, 'NLSYM', mat%nlsym)
+    CALL attach(fid, label, 'NLPOS', mat%nlpos)
+    CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL putarr(fid, TRIM(label)//'/perm', mat%perm)
+    CALL putarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL creatg(fid, TRIM(label)//'/p')
+    CALL attach(fid, TRIM(label)//'/p', 'error', mat%p%error)
+    CALL attach(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype)
+    CALL attach(fid, TRIM(label)//'/p', 'phase', mat%p%phase)
+    CALL attach(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl)
+    CALL attach(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct)
+    CALL attach(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum)
+    CALL attach(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs)
+    CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+  END SUBROUTINE put_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE put_zpardiso_mat(fid, label, mat, str)
+!
+!   Write matrix to hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(zpardiso_mat)                     :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'NNZ',  mat%nnz)
+    CALL attach(fid, label, 'NLSYM', mat%nlsym)
+    CALL attach(fid, label, 'NLPOS', mat%nlpos)
+    CALL attach(fid, label, 'NLHERM', mat%nlherm)
+    CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL putarr(fid, TRIM(label)//'/perm', mat%perm)
+    CALL putarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL creatg(fid, TRIM(label)//'/p')
+    CALL attach(fid, TRIM(label)//'/p', 'error', mat%p%error)
+    CALL attach(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype)
+    CALL attach(fid, TRIM(label)//'/p', 'phase', mat%p%phase)
+    CALL attach(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl)
+    CALL attach(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct)
+    CALL attach(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum)
+    CALL attach(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs)
+    CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+  END SUBROUTINE put_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE get_pardiso_mat(fid, label, mat)
+!
+!   Read matrix from hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(pardiso_mat)                      :: mat
+!
+    CALL getatt(fid, label, 'RANK', mat%rank)
+    CALL getatt(fid, label, 'NNZ',  mat%nnz)
+    CALL getatt(fid, label, 'NLSYM', mat%nlsym)
+    CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
+    CALL getarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL getatt(fid, TRIM(label)//'/p', 'error', mat%p%error)
+    CALL getatt(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype)
+    CALL getatt(fid, TRIM(label)//'/p', 'phase', mat%p%phase)
+    CALL getatt(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl)
+    CALL getatt(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct)
+    CALL getatt(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum)
+    CALL getatt(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs)
+    CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+  END SUBROUTINE get_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE get_zpardiso_mat(fid, label, mat)
+!
+!   Read matrix from hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(zpardiso_mat)                      :: mat
+!
+    CALL getatt(fid, label, 'RANK', mat%rank)
+    CALL getatt(fid, label, 'NNZ',  mat%nnz)
+    CALL getatt(fid, label, 'SYM', mat%nlsym)
+    CALL getatt(fid, label, 'HERM', mat%nlherm)
+    CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
+    CALL getarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL getatt(fid, TRIM(label)//'/p', 'error', mat%p%error)
+    CALL getatt(fid, TRIM(label)//'/p', 'mtype', mat%p%mtype)
+    CALL getatt(fid, TRIM(label)//'/p', 'phase', mat%p%phase)
+    CALL getatt(fid, TRIM(label)//'/p', 'msglv', mat%p%msglvl)
+    CALL getatt(fid, TRIM(label)//'/p', 'maxfct', mat%p%maxfct)
+    CALL getatt(fid, TRIM(label)//'/p', 'mnum', mat%p%mnum)
+    CALL getatt(fid, TRIM(label)//'/p', 'nrhs', mat%p%nrhs)
+    CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+  END SUBROUTINE get_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE mcopy_pardiso_mat(mata, matb)
+!
+!   Matrix copy: B = A
+!
+    TYPE(pardiso_mat) :: mata, matb
+    INTEGER :: n, nnz
+!
+! Assume that matb was already initialized by init_wsmp_mat.
+    IF(matb%rank.LE.0) THEN
+       WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
+       STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
+    END IF
+!
+    IF(ASSOCIATED(matb%mat)) THEN 
+       CALL destroy(matb%mat)
+       DEALLOCATE(matb%mat)
+    END IF
+!
+    n = mata%rank
+    nnz = mata%nnz
+    matb%rank = n
+    matb%nnz = nnz
+    matb%nlsym = mata%nlsym
+    matb%nlpos = mata%nlpos
+    matb%nlforce_zero = mata%nlforce_zero
+!
+    IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
+    IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
+    IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
+    ALLOCATE(matb%val(nnz)); matb%val = mata%val
+    ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
+    ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
+    ALLOCATE(matb%perm(n))
+  END SUBROUTINE mcopy_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE mcopy_zpardiso_mat(mata, matb)
+!
+!   Matrix copy: B = A
+!
+    TYPE(zpardiso_mat) :: mata, matb
+    INTEGER :: n, nnz
+!
+! Assume that matb was already initialized by init_wsmp_mat.
+    IF(matb%rank.LE.0) THEN
+       WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
+       STOP '*** Abnormal EXIT in MODULE pardiso_mod ***'
+    END IF
+!
+    IF(ASSOCIATED(matb%mat)) THEN 
+       CALL destroy(matb%mat)
+       DEALLOCATE(matb%mat)
+    END IF
+!
+    n = mata%rank
+    nnz = mata%nnz
+    matb%rank = n
+    matb%nnz = nnz
+    matb%nlsym = mata%nlsym
+    matb%nlherm = mata%nlherm
+    matb%nlpos = mata%nlpos
+    matb%nlforce_zero = mata%nlforce_zero
+!
+    IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
+    IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
+    IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
+    ALLOCATE(matb%val(nnz)); matb%val = mata%val
+    ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
+    ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
+    ALLOCATE(matb%perm(n))
+  END SUBROUTINE mcopy_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE maddto_pardiso_mat(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(pardiso_mat) :: mata, matb
+    DOUBLE PRECISION  :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE maddto_zpardiso_mat(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(zpardiso_mat) :: mata, matb
+    DOUBLE COMPLEX     :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE psum_pardiso_mat(mat, comm)
+!
+!   Parallel sum of sparse matrices
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(pardiso_mat)   :: mat
+    INCLUDE 'psum_mat.tpl'
+  END SUBROUTINE psum_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE psum_zpardiso_mat(mat, comm)
+!
+!   Parallel sum of sparse matrices
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(zpardiso_mat)   :: mat
+    INCLUDE 'psum_mat.tpl'
+  END SUBROUTINE psum_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE p2p_pardiso_mat(mat, dest, extyp, op, comm)
+!
+!   Point-to-point combine sparse matrix between 2 processes
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(pardiso_mat)            :: mat
+    DOUBLE PRECISION, ALLOCATABLE :: val(:)
+    INTEGER :: mpi_type=MPI_DOUBLE_PRECISION
+!
+    INCLUDE "p2p_mat.tpl"
+  END SUBROUTINE p2p_pardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE p2p_zpardiso_mat(mat, dest, extyp, op, comm)
+!
+!   Point-to-point combine sparse matrix between 2 processes
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(zpardiso_mat)           :: mat
+    DOUBLE COMPLEX, ALLOCATABLE :: val(:)
+    INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX
+!
+    INCLUDE "p2p_mat.tpl"
+  END SUBROUTINE p2p_zpardiso_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pardiso_bsplines
diff --git a/src/petsc_mod.F90 b/src/petsc_mod.F90
new file mode 100644
index 0000000..9909f17
--- /dev/null
+++ b/src/petsc_mod.F90
@@ -0,0 +1,873 @@
+!>
+!> @file petsc_mod.F90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE petsc_bsplines
+!
+!    PETSC_BSPLINES: Simple interface to the parallel iterative
+!                    solver PETSC
+!
+!    T.M. Tran, CRPP-EPFL
+!    June 2011
+!
+  USE sparse
+  IMPLICIT NONE
+
+#include "finclude/petsc.h90"
+
+!
+  TYPE petsc_mat
+     INTEGER :: rank
+     INTEGER(8) :: nnz, nnz_loc
+     INTEGER :: nterms, kmat
+     INTEGER :: istart, iend
+     INTEGER, POINTER :: rcounts(:) => NULL()
+     INTEGER, POINTER :: rdispls(:) => NULL()
+     INTEGER :: comm
+     LOGICAL :: nlsym
+     LOGICAL :: nlforce_zero
+     TYPE(spmat), POINTER      :: mat => NULL()
+     INTEGER, POINTER          :: cols(:) => NULL()
+     INTEGER, POINTER          :: irow(:) => NULL()
+     DOUBLE PRECISION, POINTER :: val(:) => NULL()
+!
+     Mat :: AMAT
+     KSP :: SOLVER
+  END TYPE petsc_mat
+!
+  INTERFACE init
+     MODULE PROCEDURE init_petsc_mat
+  END INTERFACE init
+!
+  INTERFACE clear_mat
+     MODULE PROCEDURE clear_petsc_mat
+  END INTERFACE clear_mat
+!
+  INTERFACE updtmat
+     MODULE PROCEDURE updt_petsc_mat
+  END INTERFACE updtmat
+!
+  INTERFACE putele
+     MODULE PROCEDURE putele_petsc_mat
+  END INTERFACE putele
+!
+  INTERFACE getele
+     MODULE PROCEDURE getele_petsc_mat
+  END INTERFACE getele
+!
+  INTERFACE putrow
+     MODULE PROCEDURE putrow_petsc_mat
+  END INTERFACE putrow
+!
+  INTERFACE getrow
+     MODULE PROCEDURE getrow_petsc_mat
+  END INTERFACE getrow
+!
+  INTERFACE putcol
+     MODULE PROCEDURE putcol_petsc_mat
+  END INTERFACE putcol
+!
+  INTERFACE getcol
+     MODULE PROCEDURE getcol_petsc_mat
+  END INTERFACE getcol
+!
+  INTERFACE get_count
+     MODULE PROCEDURE get_count_petsc_mat
+  END INTERFACE get_count
+!
+  INTERFACE to_mat
+     MODULE PROCEDURE to_petsc_mat
+  END INTERFACE to_mat
+!
+  INTERFACE save_mat
+     MODULE PROCEDURE save_petsc_mat
+  END INTERFACE save_mat
+!
+  INTERFACE load_mat
+     MODULE PROCEDURE load_petsc_mat
+  END INTERFACE load_mat
+!
+  INTERFACE bsolve
+     MODULE PROCEDURE bsolve_petsc_mat1,  bsolve_petsc_matn
+  END INTERFACE bsolve
+!
+  INTERFACE vmx
+     MODULE PROCEDURE vmx_petsc_mat,  vmx_petsc_matn
+  END INTERFACE vmx
+!
+  INTERFACE destroy
+     MODULE PROCEDURE destroy_petsc_mat
+ END INTERFACE destroy
+!
+ INTERFACE mcopy
+    MODULE PROCEDURE mcopy_petsc_mat
+ END INTERFACE mcopy
+!
+ INTERFACE maddto
+    MODULE PROCEDURE maddto_petsc_mat
+ END INTERFACE maddto
+!
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_petsc_mat(n, nterms, mat, kmat, nlsym, &
+       &                      nlforce_zero, comm)
+!
+!   Initialize an empty sparse petsc matrix
+!
+    USE pputils2
+    INCLUDE 'mpif.h'
+!
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE(petsc_mat)               :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+    INTEGER, OPTIONAL, INTENT(in) :: comm
+!
+    INTEGER :: me, npes
+    INTEGER :: i, ierr, nloc
+    PetscBool  :: flg
+!!$    PetscTruth  :: flg ! Petsc version before 3.2
+!
+!    Prologue
+!
+    CALL mpi_comm_size(comm, npes, ierr)
+    CALL mpi_comm_rank(comm, me, ierr)
+!
+    mat%rank = n
+    mat%nterms = nterms
+    mat%nnz = 0
+    mat%nlsym = .FALSE.
+    mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    IF(PRESENT(nlsym)) mat%nlsym = nlsym
+    IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
+!
+!    Inititialize the PETSC environment
+!
+    IF(PRESENT(comm)) THEN
+       PETSC_COMM_WORLD = comm
+    ELSE  !  Single process Petsc
+       PETSC_COMM_WORLD = MPI_COMM_SELF 
+    END IF
+    CALL PetscInitialize(PETSC_NULL_CHARACTER, ierr)
+    mat%comm = PETSC_COMM_WORLD
+!
+!    Matrix partition
+!
+    CALL dist1d(mat%comm, 1, n, mat%istart, nloc)
+    mat%iend = mat%istart + nloc - 1
+!
+    IF(ASSOCIATED(mat%rcounts)) DEALLOCATE(mat%rcounts)
+    IF(ASSOCIATED(mat%rdispls)) DEALLOCATE(mat%rdispls)
+    ALLOCATE(mat%rcounts(0:npes-1))
+    ALLOCATE(mat%rdispls(0:npes-1))
+    CALL mpi_allgather(nloc, 1, MPI_INTEGER, mat%rcounts, 1, MPI_INTEGER, &
+       &          mat%comm, ierr)
+    mat%rdispls(0) = 0
+    DO i=1,npes-1
+       mat%rdispls(i) = mat%rdispls(i-1)+mat%rcounts(i-1)
+    END DO
+!
+!    Initialize linked list for sparse matrix
+!
+    IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
+    ALLOCATE(mat%mat)
+    CALL init(n, mat%mat, mat%istart, mat%iend)
+!
+!    Create PETSC matrix
+!
+    CALL MatCreate(mat%comm, mat%AMAT, ierr)
+    CALL MatSetSizes(mat%AMAT, nloc, nloc, n, n, ierr)
+    CALL MatSetFromOptions(mat%AMAT, ierr)
+!
+!   Create PETSC SOLVER
+!
+    CALL KSPCreate(mat%comm, mat%SOLVER, ierr)
+!
+  END SUBROUTINE init_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE clear_petsc_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(petsc_mat) :: mat
+!
+    IF(ASSOCIATED(mat%val)) THEN
+       mat%val = 0.0d0
+    ELSE
+       CALL MatZeroEntries(mat%AMAT)
+    END IF
+  END SUBROUTINE clear_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_petsc_mat(mat, i, j, val)
+!
+!   Update element Aij of petsc  matrix
+!
+    TYPE(petsc_mat)            :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: ierr
+!
+    IF(mat%nlsym) THEN   ! Store only upper part for symmetric matrices
+       IF(i.GT.j) RETURN
+    END IF
+    IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN
+       WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+       WRITE(*,'(a,2i6)') '      istart, iend      ', mat%istart, mat%iend
+       STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL updtmat(mat%mat, i, j, val)
+    ELSE
+       CALL MatSetValue(mat%AMAT, i-1, j-1, ADD_VALUES, ierr)
+    END IF
+  END SUBROUTINE updt_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_petsc_mat(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(petsc_mat)            :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: iput, jput
+    INTEGER :: ierr
+!
+    iput = i
+    jput = j
+    IF(mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iput = j
+          jput = i
+       END IF
+    END IF
+!
+!    Do nothing if outside
+    IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL putele(mat%mat, iput, jput, val, &
+            &      nlforce_zero=mat%nlforce_zero)
+    ELSE
+       CALL MatSetValue(mat%AMAT, iput-1, jput-1, val, INSERT_VALUES, ierr)
+    END IF
+  END SUBROUTINE putele_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_petsc_mat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(petsc_mat)             :: mat
+    INTEGER, INTENT(in)           :: i, j
+    DOUBLE PRECISION, INTENT(out) :: val
+    INTEGER :: iget, jget
+    INTEGER :: ierr
+!
+    iget = i
+    jget = j
+    IF(mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iget = j
+          jget = i
+       END IF
+    END IF
+!
+    val = 0.0d0   ! Assume zero val if outside
+    IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL getele(mat%mat, iget, jget, val)
+    ELSE
+       CALL MatGetValues(mat%AMAT, 1, iget-1, 1, jget-1, val, ierr)
+    END IF
+  END SUBROUTINE getele_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_petsc_mat(amat, i, arr, cols)
+!
+! Put a row into sparse matrix
+!
+    TYPE(petsc_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)            :: i
+    DOUBLE PRECISION, INTENT(in)   :: arr(:)
+    INTEGER, INTENT(in), OPTIONAL  :: cols(:)
+    INTEGER :: j
+!
+    IF(i.GT.amat%iend .OR. i.LT.amat%istart) RETURN ! Do nothing
+!
+    IF(PRESENT(cols)) THEN
+       DO j=1,SIZE(cols)
+          CALL putele(amat, i, cols(j), arr(j))
+       END DO
+    ELSE
+       DO j=1,amat%rank
+          CALL putele(amat, i, j, arr(j))
+       END DO
+    END IF
+  END SUBROUTINE putrow_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_petsc_mat(amat, i, arr)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(petsc_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)            :: i
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER :: j, ierr
+    INTEGER :: ncols, cols(amat%rank)
+    DOUBLE PRECISION :: vals(amat%rank)
+!
+    arr = 0.0d0
+    IF(i.GT.amat%iend .OR. i.LT.amat%istart) RETURN ! return 0 if outside
+    IF(ASSOCIATED(amat%mat)) THEN
+       DO j=1,amat%rank
+          CALL getele(amat%mat, i, j, arr(j))
+       END DO
+    ELSE
+       CALL MatGetRow(amat%AMAT, i-1, ncols, cols, vals,  ierr) ! 0-based array
+       DO j=1,ncols
+          arr(cols(j)+1) = vals(j)
+       END DO
+       CALL MatRestoreRow(amat%AMAT, i-1, ncols, cols, vals,  ierr)
+    END IF
+  END SUBROUTINE getrow_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_petsc_mat(amat, j, arr)
+!
+! Put a column into sparse matrix
+!
+    TYPE(petsc_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)              :: j
+    DOUBLE PRECISION, INTENT(in)     :: arr(:)
+    INTEGER :: i
+!
+    DO i=amat%istart,amat%iend
+       CALL putele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE putcol_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_petsc_mat(amat, j, arr)
+!
+!   Get a column from sparse matrix
+!
+    TYPE(petsc_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)            :: j
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER :: i
+!
+    arr = 0.0d0
+    DO i=amat%istart,amat%iend
+       CALL getele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_petsc_mat(mat, nnz)
+!
+!   Number of non-zeros in sparse matrix
+!
+    TYPE(petsc_mat)              :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+    INTEGER :: i, ierr
+    DOUBLE PRECISION :: info(MAT_INFO_SIZE)
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       get_count_petsc_mat = get_count(mat%mat, nnz)
+    ELSE
+       CALL MatGetInfo(mat%AMAT, MAT_LOCAL, info, ierr)
+       get_count_petsc_mat = info(MAT_INFO_NZ_ALLOCATED)
+!!$       IF(PRESENT(nnz)) THEN
+!!$          DO i=1,mat%rank
+!!$             nnz(i) = mat%irow(i+1)-mat%irow(i)
+!!$          END DO
+!!$       END IF
+    END IF
+  END FUNCTION get_count_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE to_petsc_mat(mat, nlkeep)
+!
+!   Convert linked list spmat to petsc matrice structure
+!
+    TYPE(petsc_mat) :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nlkeep
+!
+    INTEGER :: me, i, j, jj, nnz, rank, s, e
+    INTEGER :: istart, iend
+    INTEGER :: iloc, k1, k2, ncol
+    INTEGER :: d_nz, d_nnz(mat%istart:mat%iend)
+    INTEGER :: o_nz, o_nnz(mat%istart:mat%iend)
+    INTEGER :: comm, ierr
+    LOGICAL :: nlclean
+!
+    nlclean = .TRUE.
+    IF(PRESENT(nlkeep)) THEN
+       nlclean = .NOT. nlkeep
+    END IF
+!
+    comm = mat%comm
+    CALL mpi_comm_rank(comm, me, ierr)
+!
+!    Allocate the Petsc matrix structure
+!
+    rank = mat%rank
+    mat%nnz_loc = get_count(mat)
+    istart = mat%istart
+    iend = mat%iend
+    CALL mpi_allreduce(mat%nnz_loc, mat%nnz, 1, MPI_INTEGER8, MPI_SUM, comm, ierr)
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%val(mat%nnz_loc))
+    ALLOCATE(mat%cols(mat%nnz_loc))
+    ALLOCATE(mat%irow(mat%istart:mat%iend+1))
+!
+!    Get Sparse structure from linked list
+!
+    d_nnz(:) = 0
+    o_nnz(:) = 0
+    mat%irow(istart) = 1
+    DO i=istart,iend
+       mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
+       d_nnz(i) = COUNT(mat%cols(s:e) .GE. istart .AND. &
+            &           mat%cols(s:e) .LE. iend)
+       o_nnz(i) = mat%mat%row(i)%nnz - d_nnz(i)
+       IF(nlclean) CALL destroy(mat%mat%row(i))
+    END DO
+    IF(nlclean) DEALLOCATE(mat%mat)
+!
+!    Petsc matrix preallocation
+!
+    CALL MatMPIAIJSetPreallocation(mat%AMAT, PETSC_NULL_INTEGER, &
+         &      d_nnz, PETSC_NULL_INTEGER,  o_nnz, ierr)
+    CALL MatSeqAIJSetPreallocation(mat%AMAT, PETSC_NULL_INTEGER, &
+         &      d_nnz, ierr)
+!
+!    Petsc matrix assembly  
+!
+    mat%cols = mat%cols-1   ! Start column index = 0
+    DO i=istart,iend
+       iloc = i-istart+1
+       k1 = mat%irow(i)
+       k2 = mat%irow(i+1)
+       ncol = k2-k1
+       CALL MatSetValues(mat%AMAT, 1, i-1, ncol, mat%cols(k1:k2-1), &
+          &            mat%val(k1:k2-1), INSERT_VALUES, ierr)
+    END DO
+!
+    CALL MatAssemblyBegin(mat%AMAT, MAT_FINAL_ASSEMBLY ,ierr)
+    CALL MatAssemblyEnd(mat%AMAT, MAT_FINAL_ASSEMBLY, ierr)
+!
+    DEALLOCATE(mat%irow)
+    DEALLOCATE(mat%cols)
+    DEALLOCATE(mat%val)
+!
+  END SUBROUTINE to_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE save_petsc_mat(mat, file)
+!
+!   Save matrix in PETSC binary format
+!
+    TYPE(petsc_mat) :: mat
+    CHARACTER(len=*), INTENT(in) :: file
+!
+    INTEGER :: ierr
+    PetscViewer ::  viewer
+!
+    CALL PetscViewerBinaryOpen(mat%comm, TRIM(file), FILE_MODE_WRITE,&
+         &     viewer, ierr)
+    CALL MatView(mat%AMAT, viewer, ierr)
+    CALL PetscViewerDestroy(viewer, ierr)
+!    
+  END SUBROUTINE save_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE load_petsc_mat(mat, file)
+!
+!   Load matrix in PETSC binary format
+!
+    TYPE(petsc_mat) :: mat
+    CHARACTER(len=*), INTENT(in) :: file
+!
+    INTEGER :: nloc, i, npes, ierr
+    PetscViewer ::  viewer
+!
+    CALL mpi_comm_size(mat%comm, npes, ierr)
+!
+!   Clean up unneeded sparse matrix
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL destroy(mat%mat)
+       DEALLOCATE(mat%mat)
+    END IF
+!
+!   Load matrix from file
+!
+    CALL PetscViewerBinaryOpen(mat%comm, TRIM(file), FILE_MODE_READ,&
+         &     viewer, ierr)
+    CALL MatLoad(mat%AMAT, viewer, ierr)
+    CALL PetscViewerDestroy(viewer, ierr)
+!
+!  Some mat info
+!
+    CALL MatGetSize(mat%AMAT, mat%rank, PETSC_NULL_INTEGER, ierr)
+    mat%nnz_loc = get_count(mat)
+    CALL mpi_allreduce(mat%nnz_loc, mat%nnz, 1, MPI_INTEGER8, MPI_SUM, &
+         &    mat%comm, ierr)
+!
+!
+!  Recompute matrix partition from loaded matrix
+!
+    CALL MatGetOwnershipRange(mat%AMAT, mat%istart, mat%iend, ierr)
+    mat%istart = mat%istart+1   ! Convert from Petsc definition
+    nloc = mat%iend - mat%istart + 1
+    CALL mpi_allgather(nloc, 1, MPI_INTEGER, mat%rcounts, 1, MPI_INTEGER, &
+       &          mat%comm, ierr)
+    mat%rdispls(0) = 0
+    DO i=1,npes-1
+       mat%rdispls(i) = mat%rdispls(i-1)+mat%rcounts(i-1)
+    END DO
+!    
+  END SUBROUTINE load_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_petsc_mat1(mat, rhs, sol, rtol_in, nitmax_in, nits)
+!
+!   Backsolve, using Petsc
+!
+    TYPE(petsc_mat) :: mat
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    DOUBLE PRECISION, INTENT(out), OPTIONAL :: sol(:)
+    DOUBLE PRECISION, INTENT(in), OPTIONAL  :: rtol_in
+    INTEGER, INTENT(in), OPTIONAL :: nitmax_in
+    INTEGER, INTENT(out), OPTIONAL :: nits
+!
+    DOUBLE PRECISION :: rtol=PETSC_DEFAULT_DOUBLE_PRECISION
+    INTEGER :: nitmax=PETSC_DEFAULT_INTEGER
+    INTEGER :: i, istart, iend, nrank_loc, nrank
+    INTEGER :: npes, me, ierr
+    INTEGER :: idx(mat%istart:mat%iend)
+!
+    Vec :: vec_rhs, vec_sol
+    PetscScalar :: scal
+    PetscScalar, POINTER :: psol_loc(:)
+    KSPConvergedReason :: reason
+!
+    CALL mpi_comm_size(mat%comm, npes, ierr)
+    CALL mpi_comm_rank(mat%comm, me, ierr)
+!
+    istart = mat%istart
+    iend = mat%iend
+    nrank_loc = iend-istart+1
+    nrank = mat%rank
+    idx = (/ (i, i=istart,iend) /) - 1  ! 0-based petsc vector
+!
+!      Create Vectors
+!
+    CALL VecCreate(mat%comm, vec_rhs, ierr)
+    CALL VecSetSizes(vec_rhs, nrank_loc, nrank, ierr)
+    CALL VecSetFromOptions(vec_rhs, ierr)
+    CALL VecDuplicate(vec_rhs, vec_sol, ierr)
+!
+!     Set solver
+!
+    IF(PRESENT(rtol_in)) rtol = rtol_in
+    IF(PRESENT(nitmax_in)) nitmax = nitmax_in
+!
+    CALL KSPSetOperators(mat%SOLVER, mat%AMAT, mat%AMAT, SAME_PRECONDITIONER, ierr)
+    CALL KSPSetTolerances(mat%SOLVER, rtol, PETSC_DEFAULT_DOUBLE_PRECISION,&
+         &  PETSC_DEFAULT_DOUBLE_PRECISION, nitmax, ierr)
+    CALL KSPSetFromOptions(mat%SOLVER, ierr)
+!
+!     Set RHS
+!
+    CALL VecSetValues(vec_rhs, nrank_loc, idx, rhs(istart), INSERT_VALUES, ierr)
+    CALL VecAssemblyBegin(vec_rhs, ierr)
+    CALL VecAssemblyEnd(vec_rhs, ierr)
+!
+    CALL KSPSolve(mat%SOLVER, vec_rhs, vec_sol, ierr)
+    CALL KSPGetConvergedReason(mat%SOLVER, reason, ierr)
+    IF(reason .LT. 0) THEN
+       IF(me.EQ.0) WRITE(*,'(a,i4)') 'BSOLVE: diverges with reason', reason
+    END IF
+    IF(PRESENT(nits)) THEN
+       CALL KSPGetIterationNumber(mat%SOLVER, nits, ierr)
+    END IF
+!
+!     Get global solutions on all MPI processes
+!
+    CALL VecGetArrayF90(vec_sol, psol_loc, ierr)
+!
+    IF(PRESENT(sol)) THEN
+       CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, &
+            &      sol, mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, &
+            &      mat%comm, ierr)
+    ELSE
+       CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, &
+            &      rhs, mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, &
+            &      mat%comm, ierr)
+    END IF
+!
+    CALL VecRestoreArrayF90(vec_sol, psol_loc, ierr)
+   END SUBROUTINE bsolve_petsc_mat1
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_petsc_matn(mat, rhs, sol, rtol_in, nitmax_in, nits)
+!
+!   Backsolve, using Petsc, multiple RHS
+!
+    TYPE(petsc_mat)  :: mat
+    DOUBLE PRECISION :: rhs(:,:)
+    DOUBLE PRECISION, OPTIONAL :: sol(:,:)
+    DOUBLE PRECISION, INTENT(in), OPTIONAL  :: rtol_in
+    INTEGER, INTENT(in), OPTIONAL :: nitmax_in
+    INTEGER, INTENT(out), OPTIONAL :: nits(:)
+!
+    DOUBLE PRECISION :: rtol=PETSC_DEFAULT_DOUBLE_PRECISION
+    INTEGER :: nitmax=PETSC_DEFAULT_INTEGER
+    INTEGER :: j, nrhs
+    INTEGER :: i, istart, iend, nrank_loc, nrank
+    INTEGER :: npes, me, ierr
+    INTEGER :: idx(mat%istart:mat%iend)
+!
+    Vec :: vec_rhs, vec_sol
+    PetscScalar :: scal
+    PetscScalar, POINTER :: psol_loc(:)
+    KSPConvergedReason :: reason
+!
+    CALL mpi_comm_size(mat%comm, npes, ierr)
+    CALL mpi_comm_rank(mat%comm, me, ierr)
+!
+    nrhs = SIZE(rhs,2)
+    istart = mat%istart
+    iend = mat%iend
+    nrank_loc = iend-istart+1
+    nrank = mat%rank
+    idx = (/ (i, i=istart,iend) /) - 1  ! 0-based petsc vector
+!
+!      Create Vectors
+!
+    CALL VecCreate(mat%comm, vec_rhs, ierr)
+    CALL VecSetSizes(vec_rhs, nrank_loc, nrank, ierr)
+    CALL VecSetFromOptions(vec_rhs, ierr)
+    CALL VecDuplicate(vec_rhs, vec_sol, ierr)
+!
+!     Set solver
+!
+    IF(PRESENT(rtol_in)) rtol = rtol_in
+    IF(PRESENT(nitmax_in)) nitmax = nitmax_in
+!
+    CALL KSPSetOperators(mat%SOLVER, mat%AMAT, mat%AMAT, SAME_PRECONDITIONER, ierr)
+    CALL KSPSetTolerances(mat%SOLVER, rtol, PETSC_DEFAULT_DOUBLE_PRECISION,&
+         &  PETSC_DEFAULT_DOUBLE_PRECISION, nitmax, ierr)
+    CALL KSPSetFromOptions(mat%SOLVER, ierr)
+!
+!     Set RHS
+!
+    DO j=1,nrhs
+       CALL VecSetValues(vec_rhs, nrank_loc, idx, rhs(istart,j), INSERT_VALUES, ierr)
+       CALL VecAssemblyBegin(vec_rhs, ierr)
+       CALL VecAssemblyEnd(vec_rhs, ierr)
+!
+       CALL KSPSolve(mat%SOLVER, vec_rhs, vec_sol, ierr)
+       CALL KSPGetConvergedReason(mat%SOLVER, reason, ierr)
+       IF(reason .LT. 0) THEN
+          IF(me.EQ.0) THEN 
+             WRITE(*,'(a,i4,a,i8)') 'BSOLVE: diverges with reason', reason, &
+                  &  ' for j =', j
+          END IF
+       END IF
+       IF(PRESENT(nits)) THEN
+          CALL KSPGetIterationNumber(mat%SOLVER, nits(j), ierr)
+       END IF
+!
+!     Get global solutions on all MPI processes
+!
+       CALL VecGetArrayF90(vec_sol, psol_loc, ierr)
+!
+       IF(PRESENT(sol)) THEN
+          CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, &
+               &      sol(1,j), mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, &
+               &      mat%comm, ierr)
+       ELSE
+          CALL mpi_allgatherv(psol_loc, nrank_loc, MPI_DOUBLE_PRECISION, &
+               &      rhs(1,j), mat%rcounts, mat%rdispls, MPI_DOUBLE_PRECISION, &
+               &      mat%comm, ierr)
+       END IF
+!
+       CALL VecRestoreArrayF90(vec_sol, psol_loc, ierr)
+    END DO
+!
+  END SUBROUTINE bsolve_petsc_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_petsc_mat(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(petsc_mat)             :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:)
+    DOUBLE PRECISION              :: yarr(SIZE(xarr))
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    CHARACTER(len=6) :: matdescra
+    INTEGER :: n, i, j
+!
+    n = mat%rank
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &          beta, yarr)
+#else
+    yarr = 0.0d0
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + mat%val(j)*xarr(i)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_petsc_matn(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(petsc_mat)             :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
+    DOUBLE PRECISION              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    INTEGER :: n, nrhs, i, j
+    CHARACTER(len=6) :: matdescra
+!
+    n = mat%rank
+    nrhs = SIZE(xarr,2)
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,&
+         &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &           n, beta, yarr, n)
+#else
+    yarr = 0.0d0
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &                + mat%val(j)*xarr(i,:)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_petsc_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_petsc_mat(mat)
+!
+!  Deallocate the sparse matrix mat
+!
+    TYPE(petsc_mat) :: mat
+    INTEGER :: ierr
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL destroy(mat%mat)
+       DEALLOCATE(mat%mat)
+    END IF
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+!
+    CALL MatDestroy(mat%AMAT,ierr)
+  END SUBROUTINE destroy_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE mcopy_petsc_mat(mata, matb)
+!
+!   Matrix copy: B = A
+!
+    TYPE(petsc_mat) :: mata, matb
+    INTEGER :: ierr
+!
+! Assume that matb was already initialized by init_petsc_mat.
+    IF(matb%rank.LE.0) THEN
+       WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
+       STOP '*** Abnormal EXIT in MODULE petsc_mod ***'
+    END IF
+!
+    IF(ASSOCIATED(matb%mat)) THEN 
+       CALL destroy(matb%mat)
+       DEALLOCATE(matb%mat)
+    END IF
+!
+    matb%rank =  mata%rank
+    matb%nnz = mata%nnz
+    matb%nnz_loc = mata%nnz_loc
+    matb%istart = mata%istart
+    matb%iend = mata%iend
+    matb%nlsym = mata%nlsym
+    matb%nlforce_zero = mata%nlforce_zero
+!
+    IF(ASSOCIATED(matb%mat)) THEN 
+       CALL destroy(matb%mat)
+       DEALLOCATE(matb%mat)    
+    END IF
+!
+!    Destroy existing PETSC matrix and recreate a new one from scratch
+!
+    CALL MatDestroy(matb%AMAT, ierr)
+    CALL MatConvert(mata%AMAT, MATSAME, MAT_INITIAL_MATRIX, matb%AMAT, ierr)
+  END SUBROUTINE mcopy_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE maddto_petsc_mat(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(petsc_mat) :: mata, matb
+    DOUBLE PRECISION  :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_petsc_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+!
+END MODULE petsc_bsplines
diff --git a/src/psum_mat.tpl b/src/psum_mat.tpl
new file mode 100644
index 0000000..bf44e63
--- /dev/null
+++ b/src/psum_mat.tpl
@@ -0,0 +1,101 @@
+!>
+!> @file psum_mat.tpl
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+    INTEGER, INTENT(in) :: comm
+!
+    INTEGER :: me, npes, ierr
+    INTEGER :: n, r, i, base
+    INTEGER :: newrank
+!---------------------------------------------------------------------
+!                1.0    Prologue
+!
+    CALL mpi_comm_size(comm, npes, ierr)
+    CALL mpi_comm_rank(comm, me, ierr)
+!
+!    Compute n and r defined by npes = 2**n+r
+    i=1
+    n=0
+    DO WHILE (2*i.LE.npes)
+       n=n+1
+       i=2*i
+    END DO ! i=2**n
+    r = npes-i
+!---------------------------------------------------------------------
+!                2.0    Node partition
+!
+!  I: nodes with ranks < 2*r
+!      . nodes with even ranks receive from rank+1 and sum
+!      . odd ranks sends to rank-1
+! II: nodes with ranks >= 2*r
+!      . do nothing
+!
+  IF( me .LT. 2*r ) THEN
+     IF( MODULO(me,2) .EQ. 0 ) THEN
+        CALL p2p_mat(mat, me+1, 'recv', 'updt', comm)
+     ELSE
+        CALL p2p_mat(mat, me-1, 'send', 'updt', comm)
+     END IF
+  END IF
+!---------------------------------------------------------------------
+!                3.0    Binary tree reduction using new ranks
+!
+!   Define new ranks
+  IF( MODULO(me,2).EQ.0 .AND. me.LT.2*r ) THEN  ! new rank in I
+     newrank = me/2
+  ELSE IF(  me.GE.2*r ) THEN ! new ranks in II
+     newrank = me-r
+  ELSE   ! inactive ranks in I          
+     newrank = -1
+  END IF
+!
+!   Reduction with 2**n (positive) newranks 
+  IF( newrank .GE. 0 ) THEN   ! only for nodes with new rank > 0
+     base = 1
+     DO i=1,n
+        CALL p2p_mat(mat, oldrank(IEOR(newrank,base)), &
+             &       'sendrecv', 'updt', comm)
+        base = base*2
+     END DO
+  END IF
+!---------------------------------------------------------------------
+!                4.0    Final exchanche in I
+!
+  IF( me .LT. 2*r ) THEN
+     IF( MODULO(me,2).EQ.0 ) THEN
+        CALL p2p_mat(mat, me+1, 'send', 'put', comm)
+     ELSE
+        CALL p2p_mat(mat, me-1, 'recv', 'put', comm)
+     END IF
+  END IF
+!---------------------------------------------------------------------
+CONTAINS
+  INTEGER FUNCTION oldrank(rank)
+    INTEGER, INTENT(in) :: rank
+    IF(rank.LT.r) THEN
+       oldrank = 2*rank
+    ELSE
+       oldrank = rank+r
+    END IF
+  END FUNCTION oldrank
diff --git a/src/pwsmp_mod.f90 b/src/pwsmp_mod.f90
new file mode 100644
index 0000000..b981cde
--- /dev/null
+++ b/src/pwsmp_mod.f90
@@ -0,0 +1,2032 @@
+!>
+!> @file pwsmp_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE pwsmp_bsplines
+!
+!    PWSMP_BSPLINES: Simple interface to the parallel sparse direct 
+!                    solver PWSMP.
+!
+!    T.M. Tran, CRPP-EPFL
+!    December 2011
+!
+  USE sparse
+  IMPLICIT NONE
+!
+  INTEGER, SAVE :: current_matid = -1
+  INTEGER, SAVE :: last_matid = -1
+!
+  TYPE wsmp_param
+     INTEGER          :: iparm(64)
+     DOUBLE PRECISION :: dparm(64)
+  END TYPE wsmp_param
+!
+  TYPE wsmp_mat
+     INTEGER :: matid=-1
+     INTEGER :: rank=0, nnz
+     INTEGER :: nterms, kmat, nrhs
+     INTEGER :: comm
+     INTEGER :: istart, iend, rank_loc
+     INTEGER :: nnz_start, nnz_end, nnz_loc
+     LOGICAL :: nlsym
+     LOGICAL :: nlpos
+     LOGICAL :: nlforce_zero
+     TYPE(spmat), POINTER      :: mat => NULL()
+     INTEGER, POINTER          :: cols(:) => NULL()
+     INTEGER, POINTER          :: irow(:) => NULL()
+     INTEGER, POINTER          :: perm(:) => NULL()
+     INTEGER, POINTER          :: invp(:) => NULL()
+     INTEGER, POINTER          :: mrp(:) => NULL()
+     DOUBLE PRECISION, POINTER :: diag(:) => NULL()
+     DOUBLE PRECISION, POINTER :: val(:) => NULL()
+     DOUBLE PRECISION, POINTER :: aux(:) => NULL()
+     TYPE(wsmp_param)       :: p
+  END TYPE wsmp_mat
+!
+  TYPE zwsmp_mat
+     INTEGER :: matid=-1
+     INTEGER :: rank=0, nnz
+     INTEGER :: nterms, kmat, nrhs
+     INTEGER :: comm
+     INTEGER :: istart, iend, rank_loc
+     INTEGER :: nnz_start, nnz_end, nnz_loc
+     LOGICAL :: nlherm
+     LOGICAL :: nlsym
+     LOGICAL :: nlpos
+     LOGICAL :: nlforce_zero
+     TYPE(zspmat), POINTER     :: mat => NULL()
+     INTEGER, POINTER          :: cols(:) => NULL()
+     INTEGER, POINTER          :: irow(:) => NULL()
+     INTEGER, POINTER          :: perm(:) => NULL()
+     INTEGER, POINTER          :: invp(:) => NULL()
+     INTEGER, POINTER          :: mrp(:) => NULL()
+     DOUBLE COMPLEX, POINTER :: diag(:) => NULL()
+     DOUBLE COMPLEX, POINTER :: val(:) => NULL()
+     DOUBLE COMPLEX, POINTER :: aux(:) => NULL()
+     TYPE(wsmp_param)       :: p
+  END TYPE zwsmp_mat
+!
+  INTERFACE init
+     MODULE PROCEDURE init_wsmp_mat, init_zwsmp_mat
+  END INTERFACE init
+!
+  INTERFACE check_mat
+     MODULE PROCEDURE check_wsmp_mat, check_zwsmp_mat
+  END INTERFACE check_mat
+!
+  INTERFACE clear_mat
+     MODULE PROCEDURE clear_wsmp_mat, clear_zwsmp_mat
+  END INTERFACE clear_mat
+!
+  INTERFACE updtmat
+     MODULE PROCEDURE updt_wsmp_mat, updt_zwsmp_mat
+  END INTERFACE updtmat
+!
+  INTERFACE putele
+     MODULE PROCEDURE putele_wsmp_mat, putele_zwsmp_mat
+  END INTERFACE putele
+!
+  INTERFACE getele
+     MODULE PROCEDURE getele_wsmp_mat, getele_zwsmp_mat
+  END INTERFACE getele
+!
+  INTERFACE putrow
+     MODULE PROCEDURE putrow_wsmp_mat, putrow_zwsmp_mat
+  END INTERFACE putrow
+!
+  INTERFACE getrow
+     MODULE PROCEDURE getrow_wsmp_mat, getrow_zwsmp_mat
+  END INTERFACE getrow
+!
+  INTERFACE putcol
+     MODULE PROCEDURE putcol_wsmp_mat, putcol_zwsmp_mat
+  END INTERFACE putcol
+!
+  INTERFACE getcol
+     MODULE PROCEDURE getcol_wsmp_mat, getcol_zwsmp_mat
+  END INTERFACE getcol
+!
+  INTERFACE get_count
+     MODULE PROCEDURE get_count_wsmp_mat, get_count_zwsmp_mat
+  END INTERFACE get_count
+!
+  INTERFACE to_mat
+     MODULE PROCEDURE to_wsmp_mat, to_zwsmp_mat
+  END INTERFACE to_mat
+!
+  INTERFACE reord_mat
+     MODULE PROCEDURE reord_wsmp_mat, reord_zwsmp_mat
+  END INTERFACE reord_mat
+!
+  INTERFACE numfact
+     MODULE PROCEDURE numfact_wsmp_mat, numfact_zwsmp_mat
+  END INTERFACE numfact
+!
+  INTERFACE factor
+     MODULE PROCEDURE factor_wsmp_mat, factor_zwsmp_mat
+  END INTERFACE factor
+!
+  INTERFACE bsolve
+     MODULE PROCEDURE bsolve_wsmp_mat1,  bsolve_wsmp_matn, &
+          &           bsolve_zwsmp_mat1, bsolve_zwsmp_matn
+  END INTERFACE bsolve
+!
+  INTERFACE vmx
+     MODULE PROCEDURE vmx_wsmp_mat,  vmx_wsmp_matn, &
+          &           vmx_zwsmp_mat, vmx_zwsmp_matn
+  END INTERFACE vmx
+!
+  INTERFACE destroy
+     MODULE PROCEDURE destroy_wsmp_mat, destroy_zwsmp_mat
+ END INTERFACE destroy
+!
+ INTERFACE putmat
+    MODULE PROCEDURE put_wsmp_mat, put_zwsmp_mat
+ END INTERFACE putmat
+!
+ INTERFACE getmat
+    MODULE PROCEDURE get_wsmp_mat, get_zwsmp_mat
+ END INTERFACE getmat
+!
+ INTERFACE mcopy
+    MODULE PROCEDURE mcopy_wsmp_mat,  mcopy_zwsmp_mat
+ END INTERFACE mcopy
+!
+ INTERFACE maddto
+    MODULE PROCEDURE maddto_wsmp_mat, maddto_zwsmp_mat
+ END INTERFACE maddto
+!
+ INTERFACE psum_mat
+    MODULE PROCEDURE psum_wsmp_mat, psum_zwsmp_mat
+ END INTERFACE psum_mat
+!
+ INTERFACE p2p_mat
+    MODULE PROCEDURE p2p_wsmp_mat, p2p_zwsmp_mat
+ END INTERFACE p2p_mat
+!
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_wsmp_mat(n, nterms, mat, kmat, nlsym, nlpos, &
+       &                   nlforce_zero, comm_in)
+!
+!   Initialize an empty sparse wsmp matrix
+!
+    USE pputils2
+    INCLUDE 'mpif.h'
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE(wsmp_mat)                :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlpos
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+    INTEGER, OPTIONAL, INTENT(in) :: comm_in
+!
+    INTEGER :: comm, nloc
+    INTEGER :: info
+    INTEGER :: idummy = 0
+    DOUBLE PRECISION :: dummy = 0.0d0
+!
+    comm = MPI_COMM_WORLD
+    IF(PRESENT(comm_in)) comm = comm_in
+    mat%comm = comm
+!
+!  Store away (valid) current matrix id
+!
+    IF(current_matid .GE. 0) THEN
+       CALL wstoremat(current_matid, info)
+       IF(info.NE.0) THEN
+          WRITE(*,'(a,i12)') 'INIT: WSTOREMAT failed WITH error', info
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+    END IF
+    last_matid = last_matid+1
+    mat%matid = last_matid
+    current_matid = mat%matid
+!
+!  Initialize sparse matrice structure
+!
+    mat%rank = n
+    mat%nterms = nterms
+    mat%nnz = 0
+    mat%nlsym = .FALSE.
+    mat%nlpos = .TRUE.
+    mat%nrhs = 1
+    mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    IF(PRESENT(nlsym)) mat%nlsym = nlsym
+    IF(PRESENT(nlpos)) mat%nlpos = nlpos
+    IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
+!
+!    Matrix partition
+!
+    CALL dist1d(comm, 1, n, mat%istart, nloc)
+    mat%iend = mat%istart + nloc - 1
+    mat%rank_loc = nloc
+!
+    IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
+    ALLOCATE(mat%mat)
+    CALL init(n, mat%mat, mat%istart, mat%iend)
+!
+!  Fill 'iparm' and 'dparm' with default values
+!
+    mat%p%iparm(1:3) = 0
+    IF(mat%nlsym) THEN
+       CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+       IF(mat%nlpos) THEN
+          mat%p%iparm(31) = 0
+       ELSE
+!!$          mat%p%iparm(31) = 1  ! LDL^T without pivoting
+          mat%p%iparm(31) = 2  ! LDL^T with pivoting
+       END IF
+       CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i12)') 'INIT: Initialization failed with error', &
+            &             mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+    CALL setup_wsmp(mat%p%iparm, mat%p%dparm)
+!
+  CONTAINS
+    SUBROUTINE setup_wsmp(iparm, dparm)
+      INTEGER          :: iparm(:)
+      DOUBLE PRECISION :: dparm(:)
+    END SUBROUTINE setup_wsmp
+  END SUBROUTINE init_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_zwsmp_mat(n, nterms, mat, kmat, nlsym, nlherm, &
+       &                   nlpos, nlforce_zero, comm_in)
+!
+!   Initialize an empty sparse wsmp matrix
+!
+    USE pputils2
+    INCLUDE 'mpif.h'
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE(zwsmp_mat)               :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlherm
+    LOGICAL, OPTIONAL, INTENT(in) :: nlpos
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+    INTEGER, OPTIONAL, INTENT(in) :: comm_in
+!
+    INTEGER :: comm, nloc
+    INTEGER :: info
+    INTEGER :: idummy = 0
+    DOUBLE COMPLEX :: dummy = 0.0d0
+!
+    comm = MPI_COMM_WORLD
+    IF(PRESENT(comm_in)) comm = comm_in
+    mat%comm = comm
+!
+!  Store away (valid) current matrix id
+!
+    IF(current_matid .GE. 0) THEN
+       CALL wstoremat(current_matid, info)
+       IF(info.NE.0) THEN
+          WRITE(*,'(a,i12)') 'INIT: WSTOREMAT failed WITH error', info
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+    END IF
+    last_matid = last_matid+1
+    mat%matid = last_matid
+    current_matid = mat%matid
+!
+!  Initialize sparse matrice structure
+!
+    mat%rank = n
+    mat%nterms = nterms
+    mat%nnz = 0
+    mat%nlsym = .FALSE.
+    mat%nlherm = .FALSE.
+    mat%nlpos = .TRUE.
+    mat%nrhs = 1
+    mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    IF(PRESENT(nlsym)) mat%nlsym = nlsym
+    IF(PRESENT(nlherm)) mat%nlherm = nlherm
+    IF(PRESENT(nlpos)) mat%nlpos = nlpos
+    IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
+!
+!    Matrix partition
+!
+    CALL dist1d(comm, 1, n, mat%istart, nloc)
+    mat%iend = mat%istart + nloc - 1
+    mat%rank_loc = nloc
+!
+    IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
+    ALLOCATE(mat%mat)
+    CALL init(n, mat%mat, mat%istart, mat%iend)
+!
+!  Fill 'iparm' and 'dparm' with default values
+!
+    mat%p%iparm(1:3) = 0
+    IF(mat%nlherm .OR. mat%nlsym) THEN
+       CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+       IF(mat%nlherm) THEN
+          IF(mat%nlpos) THEN
+             mat%p%iparm(31) = 0  ! hermitian, positive definite 
+          ELSE
+             mat%p%iparm(31) = 2  ! hermitian, no-definite, LDL^T with pivoting
+          END IF
+       ELSE
+          mat%p%iparm(31) = 3     ! non-hermitian, symmetric
+       END IF
+    ELSE
+       CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i12)') 'INIT: Initialization failed with error', &
+            &             mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!!$    WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm
+!!$    WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm
+!
+    CALL setup_wsmp(mat%p%iparm, mat%p%dparm)
+!
+  CONTAINS
+    SUBROUTINE setup_wsmp(iparm, dparm)
+      INTEGER          :: iparm(:)
+      DOUBLE PRECISION :: dparm(:)
+    END SUBROUTINE setup_wsmp
+  END SUBROUTINE init_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE check_wsmp_mat(mat)
+!
+!  Check matrice id and  recall the matrice if not current
+!
+    TYPE(wsmp_mat) :: mat
+    INTEGER :: info
+!
+    IF(.NOT.mat%nlsym) THEN
+       IF( mat%matid.NE.current_matid ) THEN      
+          WRITE(*,'(a)') "Processing multi matrices is not possible "// &
+               &         "for non-symetric matrices."
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       ELSE
+          RETURN
+       END IF
+    END IF
+!
+    IF( mat%matid.NE.current_matid ) THEN
+       IF(current_matid .GE. 0) THEN
+          CALL wstoremat(current_matid, info)
+          IF(info.NE.0) THEN
+             WRITE(*,'(a,i3,a,i12)') 'Store matrix', current_matid, &
+                  &               ' failed with error', info
+             STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+          END IF
+       END IF
+       CALL wrecallmat(mat%matid, info)
+       IF(info.NE.0) THEN
+          WRITE(*,'(a,i3,a,i12)') 'Recall matrix', mat%matid, &
+               &              ' failed with error', info
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+       current_matid = mat%matid
+    END IF
+  END SUBROUTINE check_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE check_zwsmp_mat(mat)
+!
+!  Check matrice id and  recall the matrice if not current
+!
+    TYPE(zwsmp_mat) :: mat
+    INTEGER :: info
+!
+    IF(.NOT.mat%nlsym .AND. .NOT.mat%nlherm ) THEN
+       IF( mat%matid.NE.current_matid ) THEN      
+          WRITE(*,'(a)') "Processing multi matrices is not possible "// &
+               &         "for non-symetric/non-hermitian matrices."
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       ELSE
+          RETURN
+       END IF
+    END IF
+!
+    IF( mat%matid.NE.current_matid ) THEN
+       IF(current_matid .GE. 0) THEN
+          CALL wstoremat(current_matid, info)
+          IF(info.NE.0) THEN
+             WRITE(*,'(a,i3,a,i12)') 'Store matrix', current_matid, &
+                  &               ' failed with error', info
+             STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+          END IF
+       END IF
+       CALL wrecallmat(mat%matid, info)
+       IF(info.NE.0) THEN
+          WRITE(*,'(a,i3,a,i12)') 'Recall matrix', mat%matid, &
+               &              ' failed with error', info
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+       current_matid = mat%matid
+    END IF
+  END SUBROUTINE check_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE clear_wsmp_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(wsmp_mat) :: mat
+!
+    mat%val = 0.0d0
+  END SUBROUTINE clear_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE clear_zwsmp_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(zwsmp_mat) :: mat
+!
+    mat%val = (0.0d0, 0.0d0)
+  END SUBROUTINE clear_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_wsmp_mat(mat, i, j, val)
+!
+!   Update element Aij of wsmp  matrix
+!
+    TYPE(wsmp_mat)            :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: k, s, e
+!
+    IF(mat%nlsym) THEN   ! Store only upper part for symmetric matrices
+       IF(i.GT.j) RETURN
+    END IF
+    IF(i.LT.mat%istart .OR. i.GT.mat%iend) THEN
+       WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+       WRITE(*,'(a,2i6)') '      istart, iend      ', mat%istart, mat%iend
+       STOP '*** Abnormal EXIT in MODULE mumps_mod ***'
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL updtmat(mat%mat, i, j, val)
+    ELSE
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       k = isearch(mat%cols(s:e), j)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  mat%val(s+k)+val
+       ELSE
+          WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+    END IF
+  END SUBROUTINE updt_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_zwsmp_mat(mat, i, j, val)
+!
+!   Update element Aij of wsmp  matrix
+!
+    TYPE(zwsmp_mat)            :: mat
+    INTEGER, INTENT(in)        :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    INTEGER :: k, s, e
+!
+    IF(mat%nlherm .OR. mat%nlsym) THEN   ! Store only upper part
+       IF(i.GT.j) RETURN
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL updtmat(mat%mat, i, j, val)
+    ELSE
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       k = isearch(mat%cols(s:e), j)
+       IF( k.GE.0 ) THEN
+          IF(mat%nlherm) THEN
+             mat%val(s+k) =  mat%val(s+k)+CONJG(val)  ! CSR-UT* = CSC-LT
+          ELSE
+             mat%val(s+k) =  mat%val(s+k)+val
+       END IF
+       ELSE
+          WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+    END IF
+  END SUBROUTINE updt_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_wsmp_mat(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(wsmp_mat)            :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: iput, jput
+    INTEGER :: k, s, e
+!
+    iput = i
+    jput = j
+    IF(mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iput = j
+          jput = i
+       END IF
+    END IF
+!
+!    Do nothing if outside
+    IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL putele(mat%mat, iput, jput, val, &
+            &      nlforce_zero=mat%nlforce_zero)
+    ELSE
+       s = mat%irow(iput)
+       e = mat%irow(iput+1) - 1
+       k = isearch(mat%cols(s:e), jput)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  val
+       ELSE
+          IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
+             WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
+             STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+          END IF
+       END IF
+   END IF
+  END SUBROUTINE putele_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_zwsmp_mat(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(zwsmp_mat)            :: mat
+    INTEGER, INTENT(in)        :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    DOUBLE COMPLEX :: valput
+    INTEGER :: iput, jput
+    INTEGER :: k, s, e
+!
+    iput = i
+    jput = j
+    valput = val
+    IF(mat%nlsym .OR. mat%nlherm) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iput = j
+          jput = i
+          IF(mat%nlherm) THEN
+             valput = CONJG(val)
+          ELSE
+             valput = val
+          END IF
+       END IF
+    END IF
+!
+!    Do nothing if outside
+    IF(iput.LT.mat%istart .OR. iput.GT.mat%iend) RETURN
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL putele(mat%mat, iput, jput, valput, &
+            &      nlforce_zero=mat%nlforce_zero)
+    ELSE
+       s = mat%irow(iput)
+       e = mat%irow(iput+1) - 1
+       k = isearch(mat%cols(s:e), jput)
+       IF( k.GE.0 ) THEN
+          IF(mat%nlherm) THEN
+             mat%val(s+k) = CONJG(valput)   ! CSR-UT* = CSC-LT
+          ELSE
+             mat%val(s+k) =  valput
+          END IF
+       ELSE
+          IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
+             WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
+             STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+          END IF
+       END IF
+   END IF
+ END SUBROUTINE putele_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_wsmp_mat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(wsmp_mat)             :: mat
+    INTEGER, INTENT(in)           :: i, j
+    DOUBLE PRECISION, INTENT(out) :: val
+    INTEGER :: iget, jget
+    INTEGER :: k, s, e
+!
+    iget = i
+    jget = j
+    IF(mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iget = j
+          jget = i
+       END IF
+    END IF
+!
+    val = 0.0d0   ! Assume zero val if outside
+    IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL getele(mat%mat, iget, jget, val)
+    ELSE
+       s = mat%irow(iget)
+       e = mat%irow(iget+1) - 1
+       k = isearch(mat%cols(s:e), jget)
+       IF( k.GE.0 ) THEN
+          val =mat%val(s+k) 
+       ELSE
+          val = 0.0d0   ! Assume zero val if not found
+       END IF
+    END IF
+  END SUBROUTINE getele_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_zwsmp_mat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(zwsmp_mat)             :: mat
+    INTEGER, INTENT(in)         :: i, j
+    DOUBLE COMPLEX, INTENT(out) :: val
+    DOUBLE COMPLEX :: valget
+    INTEGER :: iget, jget
+    INTEGER :: k, s, e
+!
+    iget = i
+    jget = j
+    IF(mat%nlherm .OR. mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iget = j
+          jget = i
+       END IF
+    END IF
+!
+    val = (0.0d0, 0.0d0)   ! Assume zero val if outside
+    IF( iget.LT.mat%istart .OR. iget.GT.mat%iend ) RETURN
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL getele(mat%mat, iget, jget, valget)
+    ELSE
+       s = mat%irow(iget)
+       e = mat%irow(iget+1) - 1
+       k = isearch(mat%cols(s:e), jget)
+       IF( k.GE.0 ) THEN
+          IF(mat%nlherm) THEN
+             valget = CONJG(mat%val(s+k))   ! CSR-UT* = CSC-LT
+          ELSE
+             valget = mat%val(s+k) 
+          END IF
+       ELSE
+          valget = (0.0d0,0.0d0)   ! Assume zero val if not found
+       END IF
+    END IF
+    val = valget
+    IF( i.GT.j ) THEN
+       IF(mat%nlherm) THEN 
+          val = CONJG(valget)
+       END IF
+    END IF
+  END SUBROUTINE getele_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_wsmp_mat(amat, i, arr)
+!
+! Put a row into sparse matrix
+!
+    TYPE(wsmp_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)              :: i
+    DOUBLE PRECISION, INTENT(in)     :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL putele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE putrow_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_zwsmp_mat(amat, i, arr)
+!
+! Put a row into sparse matrix
+!
+    TYPE(zwsmp_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)            :: i
+    DOUBLE COMPLEX, INTENT(in)     :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL putele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE putrow_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_wsmp_mat(amat, i, arr)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(wsmp_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)            :: i
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL getele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE getrow_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_zwsmp_mat(amat, i, arr)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(zwsmp_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)          :: i
+    DOUBLE COMPLEX, INTENT(out)  :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL getele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE getrow_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_wsmp_mat(amat, j, arr)
+!
+! Put a column into sparse matrix
+!
+    TYPE(wsmp_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)              :: j
+    DOUBLE PRECISION, INTENT(in)     :: arr(:)
+    INTEGER :: i
+!
+    DO i=amat%istart,amat%iend
+       CALL putele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE putcol_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_zwsmp_mat(amat, j, arr)
+!
+! Put a column into sparse matrix
+!
+    TYPE(zwsmp_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)            :: j
+    DOUBLE COMPLEX, INTENT(in)     :: arr(:)
+    INTEGER :: i
+!
+    DO i=amat%istart,amat%iend
+       CALL putele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE putcol_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_wsmp_mat(amat, j, arr)
+!
+!   Get a column from sparse matrix
+!
+    TYPE(wsmp_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)            :: j
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER :: i
+!
+    DO i=amat%istart,amat%iend
+       CALL getele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_zwsmp_mat(amat, j, arr)
+!
+!   Get a column from sparse matrix
+!
+    TYPE(zwsmp_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)          :: j
+    DOUBLE COMPLEX, INTENT(out)  :: arr(:)
+    INTEGER :: i
+!
+    DO i=amat%istart,amat%iend
+       CALL getele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_wsmp_mat(mat, nnz)
+!
+!   Number of non-zeros in sparse matrix
+!
+    TYPE(wsmp_mat)                 :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+    INTEGER :: i
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       get_count_wsmp_mat = get_count(mat%mat, nnz)
+    ELSE
+       get_count_wsmp_mat = mat%nnz
+       IF(PRESENT(nnz)) THEN
+          DO i=mat%istart,mat%iend
+             nnz(i) = mat%irow(i+1)-mat%irow(i)
+          END DO
+       END IF
+    END IF
+  END FUNCTION get_count_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_zwsmp_mat(mat, nnz)
+!
+!   Number of non-zeros in sparse matrix
+!
+    TYPE(zwsmp_mat)                :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+    INTEGER :: i
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       get_count_zwsmp_mat = get_count(mat%mat, nnz)
+    ELSE
+       get_count_zwsmp_mat = mat%nnz
+       IF(PRESENT(nnz)) THEN
+          DO i=mat%istart,mat%iend
+             nnz(i) = mat%irow(i+1)-mat%irow(i)
+          END DO
+       END IF
+    END IF
+  END FUNCTION get_count_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE to_wsmp_mat(mat, nlkeep)
+!
+!   Convert linked list spmat to wsmp matrice structure
+!
+    INCLUDE 'mpif.h'
+    TYPE(wsmp_mat) :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nlkeep
+    INTEGER :: i, nnz, rank, s, e
+    INTEGER :: comm, ierr, nnz_loc, rank_loc
+    LOGICAL :: nlclean
+!
+    nlclean = .TRUE.
+    IF(PRESENT(nlkeep)) THEN
+       nlclean = .NOT. nlkeep
+    END IF
+!
+    comm = mat%comm
+!
+!    Allocate the WSMP matrix structure
+!
+    rank = mat%rank
+    rank_loc = mat%rank
+!
+    nnz_loc = get_count(mat)
+    mat%nnz_start = 0
+    CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+    mat%nnz_start = mat%nnz_start + 1
+    mat%nnz_end = mat%nnz_start + nnz_loc - 1
+    mat%nnz_loc = nnz_loc
+    CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+!
+!    Allocate LOCAL irow, cols and val
+    IF(mat%nlsym) THEN
+       ALLOCATE(mat%perm(rank))
+       ALLOCATE(mat%invp(rank))
+    END IF
+    ALLOCATE(mat%val(nnz_loc))
+    ALLOCATE(mat%cols(nnz_loc))
+    ALLOCATE(mat%irow(mat%istart:mat%iend+1))
+!
+!    Fill WSMP structure and deallocate the sparse rows
+!
+    mat%irow(mat%istart) = 1
+    DO i=mat%istart,mat%iend
+       mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
+       s = mat%irow(i)
+       e = mat%irow(i+1)+1
+       CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
+       IF(nlclean) CALL destroy(mat%mat%row(i))
+    END DO
+    IF(nlclean) DEALLOCATE(mat%mat)
+  END SUBROUTINE to_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE to_zwsmp_mat(mat, nlkeep)
+!
+!   Convert linked list spmat to wsmp matrice structure
+!
+    INCLUDE 'mpif.h'
+    TYPE(zwsmp_mat) :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nlkeep
+    INTEGER :: i, nnz, rank, s, e
+    INTEGER :: comm, ierr, nnz_loc, rank_loc
+    LOGICAL :: nlclean
+!
+    nlclean = .TRUE.
+    IF(PRESENT(nlkeep)) THEN
+       nlclean = .NOT. nlkeep
+    END IF
+!
+    comm = mat%comm
+!
+!    Allocate the WSMP matrix structure
+!
+    rank = mat%rank
+    rank_loc = mat%rank
+!
+    nnz_loc = get_count(mat)
+    mat%nnz_start = 0
+    CALL mpi_exscan(nnz_loc, mat%nnz_start, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+    mat%nnz_start = mat%nnz_start + 1
+    mat%nnz_end = mat%nnz_start + nnz_loc - 1
+    mat%nnz_loc = nnz_loc
+    CALL mpi_allreduce(nnz_loc, mat%nnz, 1, MPI_INTEGER, MPI_SUM, comm, ierr)
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+!
+!    Allocate LOCAL irow, cols and val
+    IF(mat%nlsym) THEN
+       ALLOCATE(mat%perm(rank))
+       ALLOCATE(mat%invp(rank))
+    END IF
+    ALLOCATE(mat%val(nnz_loc))
+    ALLOCATE(mat%cols(nnz_loc))
+    ALLOCATE(mat%irow(mat%istart:mat%iend+1))
+!
+!    Fill WSMP structure and deallocate the sparse rows
+!
+    mat%irow(mat%istart) = 1
+    DO i=mat%istart,mat%iend
+       mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
+       s = mat%irow(i)
+       e = mat%irow(i+1)+1
+       CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
+       IF(nlclean) CALL destroy(mat%mat%row(i))
+    END DO
+    IF(mat%nlherm) THEN
+       mat%val(:) = CONJG(mat%val(:))  ! CSR-UT* = CSC-LT
+    END IF
+    IF(nlclean) DEALLOCATE(mat%mat)
+  END SUBROUTINE to_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE reord_wsmp_mat(mat)
+!
+!   Reordering and symbolic factorization
+!
+    TYPE(wsmp_mat) :: mat
+    DOUBLE PRECISION :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym) THEN
+       mat%p%iparm(2) = 1  ! Ordering
+       mat%p%iparm(3) = 2  ! Symbolic factorization
+       CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+    ELSE
+       mat%p%iparm(2) = 1  ! Analysis and reordering
+       mat%p%iparm(3) = 1
+       CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i12)') 'REORD: Reordering failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+  END SUBROUTINE reord_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE reord_zwsmp_mat(mat)
+!
+!   Reordering and symbolic factorization
+!
+    TYPE(zwsmp_mat)             :: mat
+    DOUBLE COMPLEX :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym .OR. mat%nlherm) THEN
+       mat%p%iparm(2) = 1  ! Ordering
+       mat%p%iparm(3) = 2  ! Symbolic factorization
+       CALL zssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+!!$       WRITE(*,'(a,i3/(10i8))') 'REORD: matrice', mat%matid, mat%perm
+    ELSE
+       mat%p%iparm(2) = 1  ! Analysis and reordering
+       mat%p%iparm(3) = 1
+       CALL zgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i12)') 'REORD: Reordering failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+  END SUBROUTINE reord_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE numfact_wsmp_mat(mat)
+!
+!   Numerical factorization
+!
+    TYPE(wsmp_mat) :: mat
+    DOUBLE PRECISION :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym) THEN
+       mat%p%iparm(2) = 3   ! Numerical factorization
+       mat%p%iparm(3) = 3
+       CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+    ELSE
+       mat%p%iparm(2) = 2  ! Factorization
+       mat%p%iparm(3) = 2
+       CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i12)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+  END SUBROUTINE numfact_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE numfact_zwsmp_mat(mat)
+!
+!   Numerical factorization
+!
+    TYPE(zwsmp_mat) :: mat
+    DOUBLE COMPLEX :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym .OR. mat%nlherm) THEN
+       mat%p%iparm(2) = 3   ! Numerical factorization
+       mat%p%iparm(3) = 3
+       CALL zssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank_loc, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+    ELSE
+       mat%p%iparm(2) = 2  ! Factorization
+       mat%p%iparm(3) = 2
+       CALL zgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, dummy, mat%rank_loc, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i12)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+  END SUBROUTINE numfact_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE factor_wsmp_mat(mat, nlreord)
+!
+!  Factor (create  +reorder + factor) a wsmp_mat matrix
+!
+    TYPE(wsmp_mat)                :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlreord
+    LOGICAL :: mlreord
+!----------------------------------------------------------------------
+!               1.0  Creation from the sparse rows
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               2.0  Reordering and symbolic factorization phase
+!
+    mlreord = .TRUE.
+    IF(PRESENT(nlreord)) mlreord = nlreord
+    IF(mlreord) THEN
+       CALL reord_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               3.0  Numerical factorization
+!
+    CALL numfact(mat)
+  END SUBROUTINE factor_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE factor_zwsmp_mat(mat, nlreord)
+!
+!  Factor (create  +reorder + factor) a wsmp_mat matrix
+!
+    TYPE(zwsmp_mat)               :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlreord
+    LOGICAL :: mlreord
+!----------------------------------------------------------------------
+!               1.0  Creation from the sparse rows
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               2.0  Reordering and symbolic factorization phase
+!
+    mlreord = .TRUE.
+    IF(PRESENT(nlreord)) mlreord = nlreord
+    IF(mlreord) THEN
+       CALL reord_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               3.0  Numerical factorization
+!
+    CALL numfact(mat)
+  END SUBROUTINE factor_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref)
+!
+!   Backsolve, using Wsmp
+!
+    INCLUDE 'mpif.h'
+    TYPE(wsmp_mat)             :: mat
+    DOUBLE PRECISION           :: rhs(:)
+    DOUBLE PRECISION, OPTIONAL :: sol(:)
+    INTEGER, OPTIONAL          :: nref
+!
+    DOUBLE PRECISION :: sol_loc(mat%rank_loc)
+    INTEGER :: nloc, me, nprocs, ierr, i
+    INTEGER, ALLOCATABLE :: nlocs(:), displs(:)
+    DOUBLE PRECISION :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym) THEN
+       mat%p%iparm(2) = 4  ! Back substitution
+       mat%p%iparm(3) = 4
+    ELSE
+       mat%p%iparm(2) = 3  ! Back substitution
+       mat%p%iparm(3) = 3
+    END IF
+    mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) THEN
+       IF(mat%nlsym) THEN
+          mat%p%iparm(3) = 5
+       ELSE
+          mat%p%iparm(3) = 4
+       END IF
+       mat%p%iparm(6) = nref
+    END IF
+    mat%nrhs = 1
+!
+!  Extract local rhs from global rhs
+!
+    sol_loc = rhs(mat%istart:mat%iend)
+!
+    IF(mat%nlsym) THEN
+       CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+    ELSE
+       CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, &
+            &     mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+!
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+!   Allgatherv local sol
+!
+    CALL mpi_comm_rank(mat%comm, me, ierr)
+    CALL mpi_comm_size(mat%comm, nprocs, ierr)
+!
+    ALLOCATE(displs(0:nprocs))
+    ALLOCATE(nlocs(0:nprocs-1))
+!
+    nloc = mat%rank_loc
+    CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, &
+         &   mat%comm, ierr)
+!
+    displs(0) = 0
+    DO i=0,nprocs-1
+       displs(i+1) = displs(i)+nlocs(i)
+    END DO
+!
+    IF(PRESENT(sol)) THEN
+       CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_PRECISION, &
+            &   sol, nlocs, displs, MPI_DOUBLE_PRECISION, &
+            &   mat%comm, ierr)
+    ELSE
+       CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_PRECISION, &
+            &   rhs, nlocs, displs, MPI_DOUBLE_PRECISION, &
+            &   mat%comm, ierr)
+    END IF
+!
+    DEALLOCATE(nlocs)
+    DEALLOCATE(displs)
+!
+   END SUBROUTINE bsolve_wsmp_mat1
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_zwsmp_mat1(mat, rhs, sol, nref)
+!
+!   Backsolve, using Wsmp
+!
+    INCLUDE 'mpif.h'
+    TYPE(zwsmp_mat)          :: mat
+    DOUBLE COMPLEX           :: rhs(:)
+    DOUBLE COMPLEX, OPTIONAL :: sol(:)
+    INTEGER, OPTIONAL        :: nref
+!
+    DOUBLE COMPLEX :: sol_loc(mat%rank_loc)
+    INTEGER :: nloc, me, nprocs, ierr, i
+    INTEGER, ALLOCATABLE :: nlocs(:), displs(:)
+    DOUBLE COMPLEX :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym .OR. mat%nlherm) THEN
+       mat%p%iparm(2) = 4  ! Back substitution
+       mat%p%iparm(3) = 4
+    ELSE
+       mat%p%iparm(2) = 3  ! Back substitution
+       mat%p%iparm(3) = 3
+    END IF
+    mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) THEN
+       IF(mat%nlsym .OR. mat%nlherm) THEN
+          mat%p%iparm(3) = 5
+       ELSE
+          mat%p%iparm(3) = 4
+       END IF
+       mat%p%iparm(6) = nref
+    END IF
+    mat%nrhs = 1
+!
+!  Extract local rhs from global rhs
+!
+    sol_loc = rhs(mat%istart:mat%iend)
+!
+    IF(mat%nlsym .OR. mat%nlherm) THEN
+       CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+    ELSE
+       CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, &
+            &      mat%rank_loc,  mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+!
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+!   Allgatherv local sol
+!
+    CALL mpi_comm_rank(mat%comm, me, ierr)
+    CALL mpi_comm_size(mat%comm, nprocs, ierr)
+!
+    ALLOCATE(displs(0:nprocs))
+    ALLOCATE(nlocs(0:nprocs-1))
+!
+    nloc = mat%rank_loc
+    CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, &
+         &   mat%comm, ierr)
+!
+    displs(0) = 0
+    DO i=0,nprocs-1
+       displs(i+1) = displs(i)+nlocs(i)
+    END DO
+!
+    IF(PRESENT(sol)) THEN
+       CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_COMPLEX, &
+            &   sol, nlocs, displs, MPI_DOUBLE_COMPLEX, &
+            &   mat%comm, ierr)
+    ELSE
+       CALL mpi_allgatherv(sol_loc, nloc, MPI_DOUBLE_COMPLEX, &
+            &   rhs, nlocs, displs, MPI_DOUBLE_COMPLEX, &
+            &   mat%comm, ierr)
+    END IF
+!
+    DEALLOCATE(nlocs)
+    DEALLOCATE(displs)
+!
+  END SUBROUTINE bsolve_zwsmp_mat1
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_wsmp_matn(mat, rhs, sol, nref)
+!
+!   Backsolve, using Wsmp, multiple RHS
+!
+    INCLUDE 'mpif.h'
+    TYPE(wsmp_mat)             :: mat
+    DOUBLE PRECISION           :: rhs(:,:)
+    DOUBLE PRECISION, OPTIONAL :: sol(:,:)
+    INTEGER, OPTIONAL          :: nref
+!
+    DOUBLE PRECISION :: sol_loc(mat%rank_loc,SIZE(rhs,2))
+    INTEGER :: nloc, me, nprocs, ierr, i
+    INTEGER, ALLOCATABLE :: nlocs(:), displs(:)
+    DOUBLE PRECISION :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym) THEN
+       mat%p%iparm(2) = 4  ! Back substitution
+       mat%p%iparm(3) = 4
+    ELSE
+       mat%p%iparm(2) = 3  ! Back substitution
+       mat%p%iparm(3) = 3
+    END IF
+    mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) THEN
+       IF(mat%nlsym) THEN
+          mat%p%iparm(3) = 5
+       ELSE
+          mat%p%iparm(3) = 4
+       END IF
+       mat%p%iparm(6) = nref
+    END IF
+    mat%nrhs = SIZE(rhs,2)
+!
+!  Extract local rhs from global rhs
+!
+    sol_loc(:,:) = rhs(mat%istart:mat%iend,:)
+!
+    IF(mat%nlsym) THEN
+       CALL pwssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+    ELSE
+       CALL pwgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, &
+            &     mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+!
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+!   Allgatherv local sol
+!
+    CALL mpi_comm_rank(mat%comm, me, ierr)
+    CALL mpi_comm_size(mat%comm, nprocs, ierr)
+!
+    ALLOCATE(displs(0:nprocs))
+    ALLOCATE(nlocs(0:nprocs-1))
+!
+    nloc = mat%rank_loc
+    CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, &
+         &   mat%comm, ierr)
+!
+    displs(0) = 0
+    DO i=0,nprocs-1
+       displs(i+1) = displs(i)+nlocs(i)
+    END DO
+!
+    DO i=1,mat%nrhs
+       IF(PRESENT(sol)) THEN
+          CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_PRECISION, &
+               &   sol(1,i), nlocs, displs, MPI_DOUBLE_PRECISION, &
+               &   mat%comm, ierr)
+       ELSE
+          CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_PRECISION, &
+               &   rhs(1,i), nlocs, displs, MPI_DOUBLE_PRECISION, &
+               &   mat%comm, ierr)
+       END IF
+    END DO
+!
+    DEALLOCATE(nlocs)
+    DEALLOCATE(displs)
+!
+  END SUBROUTINE bsolve_wsmp_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_zwsmp_matn(mat, rhs, sol, nref)
+!
+!   Backsolve, using Wsmp, multiple RHS
+!
+    INCLUDE 'mpif.h'
+    TYPE(zwsmp_mat)          :: mat
+    DOUBLE COMPLEX           :: rhs(:,:)
+    DOUBLE COMPLEX, OPTIONAL :: sol(:,:)
+    INTEGER, OPTIONAL        :: nref
+!
+    DOUBLE COMPLEX :: sol_loc(mat%rank_loc,SIZE(rhs,2))
+    INTEGER :: nloc, me, nprocs, ierr, i
+    INTEGER, ALLOCATABLE :: nlocs(:), displs(:)
+    DOUBLE COMPLEX :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym .or. mat%nlherm) THEN
+       mat%p%iparm(2) = 4  ! Back substitution
+       mat%p%iparm(3) = 4
+    ELSE
+       mat%p%iparm(2) = 3  ! Back substitution
+       mat%p%iparm(3) = 3
+    END IF
+    mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) THEN
+       IF(mat%nlsym .OR. mat%nlherm) THEN
+          mat%p%iparm(3) = 5
+       ELSE
+          mat%p%iparm(3) = 4
+       END IF
+       mat%p%iparm(6) = nref
+    END IF
+    mat%nrhs = SIZE(rhs,2)
+!
+!  Extract local rhs from global rhs
+!
+    sol_loc(:,:) = rhs(mat%istart:mat%iend,:)
+!
+    IF(mat%nlsym) THEN
+       CALL pzssmp(mat%rank_loc, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, sol_loc, mat%rank_loc, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+    ELSE
+       CALL pzgsmp(mat%rank_loc, mat%irow, mat%cols, mat%val, sol_loc, &
+            &     mat%rank_loc, mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+!
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i12)') 'BSOLVE: Failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+!   Allgatherv local sol
+!
+    CALL mpi_comm_rank(mat%comm, me, ierr)
+    CALL mpi_comm_size(mat%comm, nprocs, ierr)
+!
+    ALLOCATE(displs(0:nprocs))
+    ALLOCATE(nlocs(0:nprocs-1))
+!
+    nloc = mat%rank_loc
+    CALL mpi_allgather(nloc, 1, MPI_INTEGER, nlocs, 1, MPI_INTEGER, &
+         &   mat%comm, ierr)
+!
+    displs(0) = 0
+    DO i=0,nprocs-1
+       displs(i+1) = displs(i)+nlocs(i)
+    END DO
+!
+    DO i=1,mat%nrhs
+       IF(PRESENT(sol)) THEN
+          CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_COMPLEX, &
+               &   sol(1,i), nlocs, displs, MPI_DOUBLE_COMPLEX, &
+               &   mat%comm, ierr)
+       ELSE
+          CALL mpi_allgatherv(sol_loc(1,i), nloc, MPI_DOUBLE_COMPLEX, &
+               &   rhs(1,i), nlocs, displs, MPI_DOUBLE_COMPLEX, &
+               &   mat%comm, ierr)
+       END IF
+    END DO
+!
+    DEALLOCATE(nlocs)
+    DEALLOCATE(displs)
+!
+  END SUBROUTINE bsolve_zwsmp_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_wsmp_mat(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(wsmp_mat)                :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:)
+    DOUBLE PRECISION              :: yarr(SIZE(xarr))
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    CHARACTER(len=6) :: matdescra
+    INTEGER :: n, i, j
+!
+    n = mat%rank
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &          beta, yarr)
+#else
+    yarr = 0.0d0
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + mat%val(j)*xarr(i)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_zwsmp_mat(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(zwsmp_mat)             :: mat
+    DOUBLE COMPLEX, INTENT(in)  :: xarr(:)
+    DOUBLE COMPLEX              :: yarr(SIZE(xarr))
+    DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
+    INTEGER :: n, i, j
+    CHARACTER(len=6) :: matdescra
+    CHARACTER(len=1) :: transa
+!
+    n = mat%rank
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE IF(mat%nlherm) THEN
+       matdescra = 'hun'
+    ELSE
+       matdescra = 'g'
+    END IF
+    transa='N'
+    IF(mat%nlherm) THEN
+       transa='T'   ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT
+    END IF
+    CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &          beta, yarr)
+#else
+    yarr = (0.0d0,0.0d0)
+    DO i=1,n
+       IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
+          DO j=mat%irow(i), mat%irow(i+1)-1
+             yarr(i) = yarr(i) + CONJG(mat%val(j))*xarr(mat%cols(j))
+          END DO
+       ELSE
+          DO j=mat%irow(i), mat%irow(i+1)-1
+             yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
+          END DO
+       END IF
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + mat%val(j)*xarr(i)
+          END DO
+       ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + mat%val(j)*xarr(i)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_wsmp_matn(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(wsmp_mat)                :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
+    DOUBLE PRECISION              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    INTEGER :: n, nrhs, i, j
+    CHARACTER(len=6) :: matdescra
+!
+    n = mat%rank
+    nrhs = SIZE(xarr,2)
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,&
+         &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &           n, beta, yarr, n)
+#else
+    yarr = 0.0d0
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &                + mat%val(j)*xarr(i,:)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_wsmp_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_zwsmp_matn(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(zwsmp_mat)             :: mat
+    DOUBLE COMPLEX, INTENT(in)  :: xarr(:,:)
+    DOUBLE COMPLEX              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
+    INTEGER :: n, nrhs, i, j
+    CHARACTER(len=6) :: matdescra
+    CHARACTER(len=1) :: transa
+!
+    n = mat%rank
+    nrhs = SIZE(xarr,2)
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE IF(mat%nlherm) THEN
+       matdescra = 'hun'
+    ELSE
+       matdescra = 'g'
+    END IF
+    transa='N'
+    IF(mat%nlherm) THEN
+       transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT
+    END IF
+!
+    CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, n, &
+         &          beta, yarr, n)
+#else
+    yarr = (0.0d0,0.0d0)
+    DO i=1,n
+       IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
+          DO j=mat%irow(i), mat%irow(i+1)-1
+             yarr(i,:) = yarr(i,:) + CONJG(mat%val(j))*xarr(mat%cols(j),:)
+          END DO
+       ELSE
+          DO j=mat%irow(i), mat%irow(i+1)-1
+             yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
+          END DO
+       END IF
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &                + mat%val(j)*xarr(i,:)
+          END DO
+       ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &               + mat%val(j)*xarr(i,:)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_zwsmp_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_wsmp_mat(mat)
+!
+!  Deallocate the sparse matrix mat
+!
+    TYPE(wsmp_mat) :: mat
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL destroy(mat%mat)
+       DEALLOCATE(mat%mat)
+    END IF
+!
+!  Release memory for factors for symmetric matrix
+    IF(mat%nlsym) THEN
+       CALL check_mat(mat)
+       CALL wsffree
+    END IF
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
+    IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp)
+    IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux)
+  END SUBROUTINE destroy_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_zwsmp_mat(mat)
+!
+!  Deallocate the sparse matrix mat
+!
+    TYPE(zwsmp_mat) :: mat
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL destroy(mat%mat)
+       DEALLOCATE(mat%mat)
+    END IF
+!
+!  Release memory for factors for symmetric/hermitian matrix
+    IF(mat%nlsym .OR. mat%nlherm) THEN
+       CALL check_mat(mat)
+       CALL wsffree
+    END IF
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
+    IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp)
+    IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux)
+  END SUBROUTINE destroy_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE put_wsmp_mat(fid, label, mat, str)
+!
+!   Write matrix to hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(wsmp_mat)                      :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'NNZ',  mat%nnz)
+    CALL attach(fid, label, 'NLSYM', mat%nlsym)
+    CALL attach(fid, label, 'NLPOS', mat%nlpos)
+    CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL putarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL creatg(fid, TRIM(label)//'/p')
+    CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+    CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
+  END SUBROUTINE put_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE put_zwsmp_mat(fid, label, mat, str)
+!
+!   Write matrix to hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(zwsmp_mat)                        :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'NNZ',  mat%nnz)
+    CALL attach(fid, label, 'NLSYM', mat%nlsym)
+    CALL attach(fid, label, 'NLPOS', mat%nlpos)
+    CALL attach(fid, label, 'NLHERM', mat%nlherm)
+    CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL putarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL creatg(fid, TRIM(label)//'/p')
+    CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+    CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
+  END SUBROUTINE put_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE get_wsmp_mat(fid, label, mat)
+!
+!   Read matrix from hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)          :: fid
+    CHARACTER(len=*), INTENT(in) :: label
+    TYPE(wsmp_mat)               :: mat
+!
+    CALL getatt(fid, label, 'RANK', mat%rank)
+    CALL getatt(fid, label, 'NNZ',  mat%nnz)
+    CALL getatt(fid, label, 'NLSYM', mat%nlsym)
+    CALL getatt(fid, label, 'NLPOS', mat%nlpos)
+    CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
+    IF(mat%nlsym) THEN
+       CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
+       CALL getarr(fid, TRIM(label)//'/invp', mat%invp)
+    END IF
+    CALL getarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+    CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
+  END SUBROUTINE get_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE get_zwsmp_mat(fid, label, mat)
+!
+!   Read matrix from hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)          :: fid
+    CHARACTER(len=*), INTENT(in) :: label
+    TYPE(zwsmp_mat)              :: mat
+!
+    CALL getatt(fid, label, 'RANK', mat%rank)
+    CALL getatt(fid, label, 'NNZ',  mat%nnz)
+    CALL getatt(fid, label, 'NLSYM', mat%nlsym)
+    CALL getatt(fid, label, 'NLPOS', mat%nlpos)
+    CALL getatt(fid, label, 'NLHERM', mat%nlherm)
+    CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
+    IF(mat%nlsym) THEN
+       CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
+       CALL getarr(fid, TRIM(label)//'/invp', mat%invp)
+    END IF
+    CALL getarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+    CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
+  END SUBROUTINE get_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE mcopy_wsmp_mat(mata, matb)
+!
+!   Matrix copy: B = A
+!
+    TYPE(wsmp_mat) :: mata, matb
+    INTEGER :: n, nnz
+!
+! Assume that matb was already initialized by init_wsmp_mat.
+    IF(matb%rank.LE.0) THEN
+       WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+    IF(ASSOCIATED(matb%mat)) THEN 
+       CALL destroy(matb%mat)
+       DEALLOCATE(matb%mat)
+    END IF
+!
+    n = mata%rank
+    nnz = mata%nnz
+    matb%rank = n
+    matb%nnz = nnz
+    matb%nlsym = mata%nlsym
+    matb%nlpos = mata%nlpos
+    matb%nlforce_zero = mata%nlforce_zero
+!
+    IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
+    IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
+    IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
+    IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp)
+    ALLOCATE(matb%val(nnz)); matb%val = mata%val
+    ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
+    ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
+    ALLOCATE(matb%perm(n))
+    IF(matb%nlsym) THEN
+       ALLOCATE(matb%perm(n))
+       ALLOCATE(matb%invp(n))
+    END IF
+  END SUBROUTINE mcopy_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE mcopy_zwsmp_mat(mata, matb)
+!
+!   Matrix copy: B = A
+!
+    TYPE(zwsmp_mat) :: mata, matb
+    INTEGER :: n, nnz
+!
+! Assume that matb was already initialized by init_wsmp_mat.
+    IF(matb%rank.LE.0) THEN
+       WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+    IF(ASSOCIATED(matb%mat)) THEN 
+       CALL destroy(matb%mat)
+       DEALLOCATE(matb%mat)
+    END IF
+!
+    n = mata%rank
+    nnz = mata%nnz
+    matb%rank = n
+    matb%nnz = nnz
+    matb%nlsym = mata%nlsym
+    matb%nlherm = mata%nlherm
+    matb%nlpos = mata%nlpos
+    matb%nlforce_zero = mata%nlforce_zero
+!
+    IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
+    IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
+    IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
+    IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp)
+    ALLOCATE(matb%val(nnz)); matb%val = mata%val
+    ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
+    ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
+    ALLOCATE(matb%perm(n))
+    IF(matb%nlsym) THEN
+       ALLOCATE(matb%perm(n))
+       ALLOCATE(matb%invp(n))
+    END IF
+  END SUBROUTINE mcopy_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE maddto_wsmp_mat(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(wsmp_mat)   :: mata, matb
+    DOUBLE PRECISION :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE maddto_zwsmp_mat(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(zwsmp_mat) :: mata, matb
+    DOUBLE COMPLEX  :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE psum_wsmp_mat(mat, comm)
+!
+!   Parallel sum of sparse matrices
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(wsmp_mat)   :: mat
+    INCLUDE 'psum_mat.tpl'
+  END SUBROUTINE psum_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE psum_zwsmp_mat(mat, comm)
+!
+!   Parallel sum of sparse matrices
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(zwsmp_mat)   :: mat
+    INCLUDE 'psum_mat.tpl'
+  END SUBROUTINE psum_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE p2p_wsmp_mat(mat, dest, extyp, op, comm)
+!
+!   Point-to-point combine sparse matrix between 2 processes
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(wsmp_mat)            :: mat
+    DOUBLE PRECISION, ALLOCATABLE :: val(:)
+    INTEGER :: mpi_type=MPI_DOUBLE_PRECISION
+!
+    INCLUDE "p2p_mat.tpl"
+  END SUBROUTINE p2p_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE p2p_zwsmp_mat(mat, dest, extyp, op, comm)
+!
+!   Point-to-point combine sparse matrix between 2 processes
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(zwsmp_mat)           :: mat
+    DOUBLE COMPLEX, ALLOCATABLE :: val(:)
+    INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX
+!
+    INCLUDE "p2p_mat.tpl"
+  END SUBROUTINE p2p_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pwsmp_bsplines
diff --git a/src/sparse_mod.f90 b/src/sparse_mod.f90
new file mode 100644
index 0000000..5bc289e
--- /dev/null
+++ b/src/sparse_mod.f90
@@ -0,0 +1,899 @@
+!>
+!> @file sparse_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE sparse
+!
+!    SPARSE: Implement sparse matrix using dynamic linked lists
+!            as matrix rows.
+!
+!    T.M. Tran, CRPP-EPFL
+!    October 2010
+!
+  IMPLICIT NONE
+!
+  TYPE elt
+     INTEGER            :: index=0
+     DOUBLE PRECISION   :: val=0.0d0
+     TYPE(elt), POINTER :: next => NULL()
+  END TYPE elt
+!
+  TYPE zelt
+     INTEGER             :: index=0
+     DOUBLE COMPLEX      :: val=(0.0d0, 0.0d0)
+     TYPE(zelt), POINTER :: next => NULL()
+  END TYPE zelt
+!
+  TYPE sprow
+     INTEGER            :: nnz=0           ! Number of non zeros in a row
+     TYPE(elt), POINTER :: row0 => NULL()  ! Points to head of a (sparse) row
+  END TYPE sprow
+!
+  TYPE zsprow
+     INTEGER             :: nnz=0          ! Number of non zeros in a row
+     TYPE(zelt), POINTER :: row0 => NULL() ! Points to head of a (sparse) row
+  END TYPE zsprow
+!
+  TYPE spmat
+     INTEGER :: rank
+     TYPE(sprow), POINTER :: row(:) => NULL()
+  END TYPE spmat
+!
+  TYPE zspmat
+     INTEGER :: rank
+     TYPE(zsprow), POINTER :: row(:) => NULL()
+  END TYPE zspmat
+!
+  INTERFACE init
+     MODULE PROCEDURE init_spmat, init_zspmat
+ END INTERFACE init
+!
+  INTERFACE updtmat
+     MODULE PROCEDURE updt_sp, updt_zsp, updt_spmat, updt_zspmat
+  END INTERFACE updtmat
+!
+  INTERFACE putele
+     MODULE PROCEDURE putele_sp, putele_zsp, putele_spmat, putele_zspmat
+  END INTERFACE putele
+!
+  INTERFACE getele
+     MODULE PROCEDURE getele_sp, getele_zsp, getele_spmat, getele_zspmat
+  END INTERFACE getele
+!
+  INTERFACE putrow
+     MODULE PROCEDURE putrow_csr,  putrow_full,  putrow_spmat, &
+          &           putrow_zcsr, putrow_zfull, putrow_zspmat
+  END INTERFACE putrow
+!
+  INTERFACE getrow
+     MODULE PROCEDURE getrow_csr,  getrow_full,  getrow_spmat, &
+          &           getrow_zcsr, getrow_zfull, getrow_zspmat
+  END INTERFACE getrow
+!
+  INTERFACE putcol
+     MODULE PROCEDURE putcol_spmat, putcol_zspmat
+  END INTERFACE putcol
+!
+  INTERFACE getcol
+     MODULE PROCEDURE getcol_spmat, getcol_zspmat
+  END INTERFACE getcol
+!
+  INTERFACE get_count
+     MODULE PROCEDURE get_count_sp,  get_count_spmat, &
+          &           get_count_zsp, get_count_zspmat
+  END INTERFACE get_count
+!
+  INTERFACE destroy
+     MODULE PROCEDURE destroy_spmat,  destroy_row,  destroy_node, &
+          &           destroy_zspmat, destroy_zrow, destroy_znode
+ END INTERFACE destroy
+!
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_spmat(n, mat, istart, iend)
+!
+!   Initial an empty  sparse matrix
+!
+    INTEGER, INTENT(in)           :: n
+    INTEGER, INTENT(in), OPTIONAL :: istart, iend
+    TYPE(spmat)                   :: mat
+!
+    mat%rank = n
+    IF(ASSOCIATED(mat%row)) DEALLOCATE(mat%row)
+    IF(PRESENT(istart)) THEN
+       ALLOCATE(mat%row(istart:iend))
+    ELSE
+       ALLOCATE(mat%row(n))
+    END IF
+!
+  END SUBROUTINE init_spmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_zspmat(n, mat, istart, iend)
+!
+!   Initial an empty  sparse matrix
+!
+    INTEGER, INTENT(in)           :: n
+    INTEGER, INTENT(in), OPTIONAL :: istart, iend
+    TYPE(zspmat)                  :: mat
+!
+    mat%rank = n
+    IF(ASSOCIATED(mat%row)) DEALLOCATE(mat%row)
+    IF(PRESENT(istart)) THEN
+       ALLOCATE(mat%row(istart:iend))
+    ELSE
+       ALLOCATE(mat%row(n))
+    END IF
+!
+  END SUBROUTINE init_zspmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_sp(arow, j, val)
+!
+!   Update element j of row arow or insert it in an increasing "index"
+!
+    TYPE(sprow), TARGET          :: arow
+    INTEGER, INTENT(in)          :: j
+    DOUBLE PRECISION, INTENT(in) :: val
+!
+    TYPE(elt), TARGET :: pre_root
+    TYPE(elt), POINTER :: t, p
+!
+    pre_root%next => arow%row0 ! pre_root is linked to the head of the list.
+    t => pre_root
+    DO WHILE( ASSOCIATED(t%next) )
+       p => t%next
+       IF( p%index .EQ. j ) THEN
+          p%val = p%val+val
+          RETURN
+       END IF
+       IF( p%index .GT. j ) EXIT
+       t => t%next
+    END DO
+    ALLOCATE(p)
+    p = elt(j, val, t%next)
+    t%next => p
+!
+    arow%nnz = arow%nnz+1
+    arow%row0 => pre_root%next ! In case the head is altered
+  END SUBROUTINE updt_sp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_zsp(arow, j, val)
+!
+!   Update element j of row arow or insert it in an increasing "index"
+!
+    TYPE(zsprow), TARGET       :: arow
+    INTEGER, INTENT(in)        :: j
+    DOUBLE COMPLEX, INTENT(in) :: val
+!
+    TYPE(zelt), TARGET :: pre_root
+    TYPE(zelt), POINTER :: t, p
+!
+    pre_root%next => arow%row0 ! pre_root is linked to the head of the list.
+    t => pre_root
+    DO WHILE( ASSOCIATED(t%next) )
+       p => t%next
+       IF( p%index .EQ. j ) THEN
+          p%val = p%val+val
+          RETURN
+       END IF
+       IF( p%index .GT. j ) EXIT
+       t => t%next
+    END DO
+    ALLOCATE(p)
+    p = zelt(j, val, t%next)
+    t%next => p
+!
+    arow%nnz = arow%nnz+1
+    arow%row0 => pre_root%next ! In case the head is altered
+  END SUBROUTINE updt_zsp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_spmat(mat, i, j, val)
+!
+!   Update element Aij of sparse matrix
+!
+    TYPE(spmat)                  :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+!
+    CALL updt_sp(mat%row(i), j, val)
+  END SUBROUTINE updt_spmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_zspmat(mat, i, j, val)
+!
+!   Update element Aij of sparse matrix
+!
+    TYPE(zspmat)               :: mat
+    INTEGER, INTENT(in)        :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+!
+    CALL updt_zsp(mat%row(i), j, val)
+  END SUBROUTINE updt_zspmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_sp(arow, j, val, found)
+!
+!   Get element j from row arow
+!
+    TYPE(sprow), TARGET            :: arow
+    INTEGER, INTENT(in)            :: j
+    DOUBLE PRECISION, INTENT(out)  :: val
+    LOGICAL, INTENT(out), OPTIONAL :: found
+!
+    TYPE(elt), POINTER :: t
+    INTEGER :: i
+!
+    val = 0.0d0
+    t => arow%row0  ! Start of a row
+    DO WHILE( ASSOCIATED(t) )
+       IF(t%index .EQ. j) THEN
+          val = t%val
+          IF(PRESENT(found)) found = .TRUE.
+          RETURN
+       END IF
+       t => t%next
+    END DO
+    IF(PRESENT(found)) found = .FALSE.
+  END SUBROUTINE getele_sp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_zsp(arow, j, val, found)
+!
+!   Get element j from row arow
+!
+    TYPE(zsprow), TARGET           :: arow
+    INTEGER, INTENT(in)            :: j
+    DOUBLE COMPLEX, INTENT(out)    :: val
+    LOGICAL, INTENT(out), OPTIONAL :: found
+!
+    TYPE(zelt), POINTER :: t
+    INTEGER :: i
+!
+    val = 0.0d0
+    t => arow%row0  ! Start of a row
+    DO WHILE( ASSOCIATED(t) )
+       IF(t%index .EQ. j) THEN
+          val = t%val
+          IF(PRESENT(found)) found = .TRUE.
+          RETURN
+       END IF
+       t => t%next
+    END DO
+    IF(PRESENT(found)) found = .FALSE.
+  END SUBROUTINE getele_zsp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_spmat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(spmat)                   :: mat
+    INTEGER, INTENT(in)           :: i, j
+    DOUBLE PRECISION, INTENT(out) :: val
+!
+    CALL getele(mat%row(i), j, val)
+  END SUBROUTINE getele_spmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_zspmat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(zspmat)                :: mat
+    INTEGER, INTENT(in)         :: i, j
+    DOUBLE COMPLEX, INTENT(out) :: val
+!
+    CALL getele(mat%row(i), j, val)
+  END SUBROUTINE getele_zspmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_sp(arow, j, val, nlforce_zero)
+!
+!   Put (overwrite) element j of row arow or insert it in an increasing "index"
+!
+    TYPE(sprow), TARGET           :: arow
+    INTEGER, INTENT(in)           :: j
+    DOUBLE PRECISION, INTENT(in)  :: val
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    TYPE(elt), TARGET :: pre_root
+    TYPE(elt), POINTER :: t, p
+    LOGICAL :: rmnode
+!
+    pre_root%next => arow%row0 ! pre_root is linked to the head of the list.
+    t => pre_root
+!
+!    Remove node which has zero val or not?
+!    But never create new node with zero val
+!
+    rmnode = .TRUE.
+    IF(PRESENT(nlforce_zero)) rmnode = .NOT.nlforce_zero
+!
+    DO WHILE( ASSOCIATED(t%next) )
+       p => t%next
+       IF( p%index .EQ. j ) THEN
+          IF(ABS(val).LE.EPSILON(0.0d0) .AND. rmnode) THEN ! Remove the node for zero val!
+             t%next => p%next
+             arow%nnz = arow%nnz-1
+             arow%row0 => pre_root%next ! In case the head is altered
+             DEALLOCATE(p)
+          ELSE
+             p%val = val
+          END IF
+          RETURN
+       END IF
+       IF( p%index .GT. j ) EXIT
+       t => t%next
+    END DO
+!
+!    Never create new node with zero val
+!
+    IF(ABS(val).GT.EPSILON(0.0d0)) THEN
+       ALLOCATE(p)
+       p = elt(j, val, t%next)
+       t%next => p
+       arow%nnz = arow%nnz+1
+       arow%row0 => pre_root%next
+    END IF
+  END SUBROUTINE putele_sp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_zsp(arow, j, val, nlforce_zero)
+!
+!   Put (overwrite) element j of row arow or insert it in an increasing "index"
+!
+    TYPE(zsprow), TARGET       :: arow
+    INTEGER, INTENT(in)        :: j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    TYPE(zelt), TARGET :: pre_root
+    TYPE(zelt), POINTER :: t, p
+    LOGICAL :: rmnode
+!
+    pre_root%next => arow%row0 ! pre_root is linked to the head of the list.
+    t => pre_root
+!
+!    Remove node which has zero val or not?
+!    But never create new node with zero val
+!
+    rmnode = .TRUE.
+    IF(PRESENT(nlforce_zero)) rmnode = .NOT.nlforce_zero
+!
+    DO WHILE( ASSOCIATED(t%next) )
+       p => t%next
+       IF( p%index .EQ. j ) THEN
+          IF(ABS(val).LE.EPSILON(0.0d0) .AND. rmnode) THEN ! Remove the node for zero val!
+             t%next => p%next
+             arow%nnz = arow%nnz-1
+             arow%row0 => pre_root%next ! In case the head is altered
+             DEALLOCATE(p)
+          ELSE
+             p%val = val
+          END IF
+          RETURN
+       END IF
+       IF( p%index .GT. j ) EXIT
+       t => t%next
+    END DO
+!
+!    Never create new node with zero val
+!
+    IF(ABS(val).GT.EPSILON(0.0d0)) THEN
+       ALLOCATE(p)
+       p = zelt(j, val, t%next)
+       t%next => p
+       arow%nnz = arow%nnz+1
+       arow%row0 => pre_root%next
+    END IF
+  END SUBROUTINE putele_zsp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_spmat(mat, i, j, val, nlforce_zero)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(spmat)                   :: mat
+    INTEGER, INTENT(in)           :: i, j
+    DOUBLE PRECISION, INTENT(in)  :: val
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    CALL putele(mat%row(i), j, val, nlforce_zero)
+  END SUBROUTINE putele_spmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_zspmat(mat, i, j, val, nlforce_zero)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(zspmat)                  :: mat
+    INTEGER, INTENT(in)           :: i, j
+    DOUBLE COMPLEX, INTENT(in)    :: val
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    CALL putele(mat%row(i), j, val, nlforce_zero)
+  END SUBROUTINE putele_zspmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_sp(arow)
+!
+!  Number of elements in arow
+!
+    TYPE(sprow), INTENT(in) :: arow
+    TYPE(elt), POINTER :: t
+    INTEGER :: i
+!
+    t => arow%row0  ! Start of a row
+    i = 0
+    DO WHILE( ASSOCIATED(t) )
+       i=i+1
+       t => t%next
+    END DO
+    get_count_sp = i
+  END FUNCTION get_count_sp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_zsp(arow)
+!
+!  Number of elements in arow
+!
+    TYPE(zsprow), INTENT(in) :: arow
+    TYPE(zelt), POINTER :: t
+    INTEGER :: i
+!
+    t => arow%row0  ! Start of a row
+    i = 0
+    DO WHILE( ASSOCIATED(t) )
+       i=i+1
+       t => t%next
+    END DO
+    get_count_zsp = i
+  END FUNCTION get_count_zsp
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_spmat(mat, nnz)
+!
+!   Number of non-zeros in sparse matrix
+!
+    TYPE(spmat)                    :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+!
+    INTEGER :: i, c(LBOUND(mat%row,1):UBOUND(mat%row,1))
+    DO i=LBOUND(mat%row,1),UBOUND(mat%row,1)
+       c(i) = get_count_sp(mat%row(i))
+    END DO
+    IF(PRESENT(nnz)) nnz = c
+    get_count_spmat = SUM(c)
+  END FUNCTION get_count_spmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_zspmat(mat, nnz)
+!
+!   Number of non-zeros in sparse matrix
+!
+    TYPE(zspmat)                   :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+!
+    INTEGER :: i, c(LBOUND(mat%row,1):UBOUND(mat%row,1))
+    DO i=LBOUND(mat%row,1),UBOUND(mat%row,1)
+       c(i) = get_count_zsp(mat%row(i))
+    END DO
+    IF(PRESENT(nnz)) nnz = c
+    get_count_zspmat = SUM(c)
+  END FUNCTION get_count_zspmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_csr(arow, arr, col, count)
+!
+!   Get a row from sparse row arow and put it in a CSR format
+!
+    TYPE(sprow), INTENT(in)        :: arow
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER, INTENT(out)           :: col(:)
+    INTEGER, OPTIONAL, INTENT(out) :: count
+!
+    TYPE(elt), POINTER :: t
+    INTEGER :: i
+!
+    t => arow%row0  ! Start of a row
+    i = 0
+    DO WHILE( ASSOCIATED(t) )
+       i=i+1
+       col(i) = t%index
+       arr(i) = t%val
+       t => t%next
+    END DO
+    IF(PRESENT(count)) count = i
+  END SUBROUTINE getrow_csr
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_zcsr(arow, arr, col, count)
+!
+!   Get a row from sparse row arow and put it in a CSR format
+!
+    TYPE(zsprow), INTENT(in)       :: arow
+    DOUBLE COMPLEX, INTENT(out)    :: arr(:)
+    INTEGER, INTENT(out)           :: col(:)
+    INTEGER, OPTIONAL, INTENT(out) :: count
+!
+    TYPE(zelt), POINTER :: t
+    INTEGER :: i
+!
+    t => arow%row0  ! Start of a row
+    i = 0
+    DO WHILE( ASSOCIATED(t) )
+       i=i+1
+       col(i) = t%index
+       arr(i) = t%val
+       t => t%next
+    END DO
+    IF(PRESENT(count)) count = i
+  END SUBROUTINE getrow_zcsr
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_full(arow, arr, count)
+!
+!   Get a row from sparse row arow and put it in an full row
+!
+    TYPE(sprow), INTENT(in)        :: arow
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER, OPTIONAL, INTENT(out) :: count
+!
+    TYPE(elt), POINTER :: t
+    INTEGER :: n, i, j
+!
+    n = SIZE(arr)
+    arr = 0.0d0
+    t => arow%row0  ! Start of a row
+    i = 0
+    DO WHILE( ASSOCIATED(t) )
+       i=i+1
+       j = t%index
+       IF(j.LE.n) THEN
+          arr(j) = t%val
+          t => t%next
+       ELSE
+          WRITE(*,'(a)') 'GETROW_FULL: size of input ARR too small!'
+          STOP
+       END IF
+    END DO
+    IF(PRESENT(count)) count = i
+  END SUBROUTINE getrow_full
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_zfull(arow, arr, count)
+!
+!   Get a row from sparse row arow and put it in an full row
+!
+    TYPE(zsprow), INTENT(in)       :: arow
+    DOUBLE COMPLEX, INTENT(out)    :: arr(:)
+    INTEGER, OPTIONAL, INTENT(out) :: count
+!
+    TYPE(zelt), POINTER :: t
+    INTEGER :: n, i, j
+!
+    n = SIZE(arr)
+    arr = 0.0d0
+    t => arow%row0  ! Start of a row
+    i = 0
+    DO WHILE( ASSOCIATED(t) )
+       i=i+1
+       j = t%index
+       IF(j.LE.n) THEN
+          arr(j) = t%val
+          t => t%next
+       ELSE
+          WRITE(*,'(a)') 'GETROW_FULL: size of input ARR too small!'
+          STOP
+       END IF
+    END DO
+    IF(PRESENT(count)) count = i
+  END SUBROUTINE getrow_zfull
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_spmat(mat, i, arr, col)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(spmat), INTENT(in)        :: mat
+    INTEGER, INTENT(in)            :: i
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER, INTENT(out), OPTIONAL :: col(:)
+!
+    IF(PRESENT(col)) THEN ! The output row is defined by (col, arr)
+       CALL getrow_csr(mat%row(i), arr, col)
+    ELSE
+       CALL getrow_full(mat%row(i), arr)
+    END IF
+  END SUBROUTINE getrow_spmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_zspmat(mat, i, arr, col)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(zspmat), INTENT(in)       :: mat
+    INTEGER, INTENT(in)            :: i
+    DOUBLE COMPLEX, INTENT(out)    :: arr(:)
+    INTEGER, INTENT(out), OPTIONAL :: col(:)
+!
+    IF(PRESENT(col)) THEN ! The output row is defined by (col, arr)
+       CALL getrow_zcsr(mat%row(i), arr, col)
+    ELSE
+       CALL getrow_zfull(mat%row(i), arr)
+    END IF
+  END SUBROUTINE getrow_zspmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_csr(arow, arr, col, nlforce_zero)
+!
+!   Put a row from sparse row arow and put it in a CSR format
+!
+    TYPE(sprow), INTENT(inout)    :: arow
+    DOUBLE PRECISION, INTENT(in)  :: arr(:)
+    INTEGER, INTENT(in)           :: col(:)
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    INTEGER :: n, i
+!
+    n=SIZE(arr)
+    DO i=1,n
+       CALL putele(arow, col(i), arr(i), nlforce_zero)
+    END DO
+  END SUBROUTINE putrow_csr
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_zcsr(arow, arr, col, nlforce_zero)
+!
+!   Put a row from sparse row arow and put it in a CSR format
+!
+    TYPE(zsprow), INTENT(inout)   :: arow
+    DOUBLE COMPLEX, INTENT(in)    :: arr(:)
+    INTEGER, INTENT(in)           :: col(:)
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    INTEGER :: n, i
+!
+    n=SIZE(arr)
+    DO i=1,n
+       CALL putele(arow, col(i), arr(i), nlforce_zero)
+    END DO
+  END SUBROUTINE putrow_zcsr
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_full(arow, arr, nlforce_zero)
+!
+!   Put a row from sparse row arow and put it in a full row
+!
+    TYPE(sprow), INTENT(inout)    :: arow
+    DOUBLE PRECISION, INTENT(in)  :: arr(:)
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    INTEGER :: n, i
+!
+    n=SIZE(arr)
+    DO i=1,n
+       CALL putele(arow, i, arr(i), nlforce_zero)
+    END DO
+  END SUBROUTINE putrow_full
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_zfull(arow, arr, nlforce_zero)
+!
+!   Put a row from sparse row arow and put it in a full row
+!
+    TYPE(zsprow), INTENT(inout)    :: arow
+    DOUBLE COMPLEX, INTENT(in)  :: arr(:)
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    INTEGER :: n, i
+!
+    n=SIZE(arr)
+    DO i=1,n
+       CALL putele(arow, i, arr(i), nlforce_zero)
+    END DO
+  END SUBROUTINE putrow_zfull
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_spmat(mat, i, arr, col, nlforce_zero)
+!
+!   Put a row to matrix
+!
+    TYPE(spmat)                   :: mat
+    INTEGER, intent(in)           :: i
+    DOUBLE PRECISION, INTENT(in)  :: arr(:)
+    INTEGER, INTENT(in), OPTIONAL :: col(:)
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    IF(PRESENT(col)) THEN ! The input row is defined by (col, arr)
+       CALL putrow_csr(mat%row(i), arr, col, nlforce_zero)
+    ELSE
+       CALL putrow_full(mat%row(i), arr, nlforce_zero)
+    END IF
+  END SUBROUTINE putrow_spmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_zspmat(mat, i, arr, col, nlforce_zero)
+!
+!   Put a row to matrix
+!
+    TYPE(zspmat)                  :: mat
+    INTEGER, intent(in)           :: i
+    DOUBLE COMPLEX, INTENT(in)    :: arr(:)
+    INTEGER, INTENT(in), OPTIONAL :: col(:)
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    IF(PRESENT(col)) THEN ! The input row is defined by (col, arr)
+       CALL putrow_zcsr(mat%row(i), arr, col, nlforce_zero)
+    ELSE
+       CALL putrow_zfull(mat%row(i), arr, nlforce_zero)
+    END IF
+  END SUBROUTINE putrow_zspmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_spmat(mat, j, arr, nlforce_zero)
+!
+!   Put a column to mtarix
+!
+    TYPE(spmat)                   :: mat
+    INTEGER, INTENT(in)           :: j
+    DOUBLE PRECISION, INTENT(in)  :: arr(:)
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    INTEGER :: i
+    DO i=1,mat%rank
+       CALL putele(mat, i, j, arr(i), nlforce_zero)
+    END DO
+  END SUBROUTINE putcol_spmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_zspmat(mat, j, arr, nlforce_zero)
+!
+!   Put a column to mtarix
+!
+    TYPE(zspmat)                  :: mat
+    INTEGER, INTENT(in)           :: j
+    DOUBLE COMPLEX, INTENT(in)    :: arr(:)
+    LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
+!
+    INTEGER :: i
+    DO i=1,mat%rank
+       CALL putele(mat, i, j, arr(i), nlforce_zero)
+    END DO
+  END SUBROUTINE putcol_zspmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_spmat(mat, j, arr)
+!
+!   Get column j of matrix
+!
+    TYPE(spmat)                   :: mat
+    INTEGER, INTENT(in)           :: j
+    DOUBLE PRECISION, INTENT(out) :: arr(:)
+    INTEGER :: i
+    DO i=1,mat%rank
+       CALL getele(mat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_spmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_zspmat(mat, j, arr)
+!
+!   Get column j of matrix
+!
+    TYPE(zspmat)                :: mat
+    INTEGER, INTENT(in)         :: j
+    DOUBLE COMPLEX, INTENT(out) :: arr(:)
+    INTEGER :: i
+    DO i=1,mat%rank
+       CALL getele(mat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_zspmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_row(arow)
+!
+!  Deallocate the sparse row
+!
+    TYPE(sprow), INTENT(inout) :: arow
+!
+    IF(ASSOCIATED(arow%row0)) CALL destroy_node(arow%row0)
+    arow%nnz = get_count(arow)
+  END SUBROUTINE destroy_row
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_zrow(arow)
+!
+!  Deallocate the sparse row
+!
+    TYPE(zsprow), INTENT(inout) :: arow
+!
+    IF(ASSOCIATED(arow%row0)) CALL destroy_znode(arow%row0)
+    arow%nnz = get_count(arow)
+  END SUBROUTINE destroy_zrow
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  RECURSIVE SUBROUTINE destroy_node(p)
+!
+!  Deallocate recursively the linked list
+!
+    TYPE(elt), POINTER :: p
+!
+    IF(ASSOCIATED(p%next)) CALL destroy_node(p%next)
+    DEALLOCATE(p)
+  END SUBROUTINE destroy_node
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  RECURSIVE SUBROUTINE destroy_znode(p)
+!
+!  Deallocate recursively the linked list
+!
+    TYPE(zelt), POINTER :: p
+!
+    IF(ASSOCIATED(p%next)) CALL destroy_znode(p%next)
+    DEALLOCATE(p)
+  END SUBROUTINE destroy_znode
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_spmat(mat)
+!
+!  Deallocate the sparse matrix
+!
+    TYPE(spmat) :: mat
+    INTEGER :: n, i
+!
+    n = mat%rank
+    DO i=LBOUND(mat%row,1),UBOUND(mat%row,1)
+       CALL destroy(mat%row(i))
+    END DO
+    DEALLOCATE(mat%row)
+  END SUBROUTINE destroy_spmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_zspmat(mat)
+!
+!  Deallocate the sparse matrix
+!
+    TYPE(zspmat) :: mat
+    INTEGER :: n, i
+!
+    n = mat%rank
+    DO i=LBOUND(mat%row,1),UBOUND(mat%row,1)
+       CALL destroy(mat%row(i))
+    END DO
+    DEALLOCATE(mat%row)
+  END SUBROUTINE destroy_zspmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION isearch(karr, k)
+!
+!   Sequential search an ordered table of integers
+!
+    INTEGER, INTENT(in) :: karr(0:)
+    INTEGER, INTENT(in) :: k
+    INTEGER :: n
+!
+    n=SIZE(karr)
+    isearch = -1   ! Failure
+    IF( k.GT.karr(n-1)) RETURN
+!
+    isearch=0
+    DO 
+       IF( k.LE.karr(isearch)) EXIT
+       isearch = isearch+1
+    END DO
+    IF( k.NE.karr(isearch)) isearch = -1  ! Failure
+  END FUNCTION isearch
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION isearch_bin(karr, k)
+!
+!   Binary search an ordered table of integers
+!
+    INTEGER, INTENT(in) :: karr(0:)
+    INTEGER, INTENT(in) :: k
+    INTEGER :: n
+    INTEGER :: l, u
+!
+    n=SIZE(karr)
+    isearch_bin = -1   ! Failure
+    IF( k.LT.karr(0) .OR. k.GT.karr(n-1)) RETURN
+!
+    l=0; u=n-1
+    DO WHILE(l.LE.u)
+       isearch_bin = (l+u)/2
+       IF(k.EQ.karr(isearch_bin)) THEN 
+          RETURN
+       ELSE IF(k.LT.karr(isearch_bin)) THEN
+          u = isearch_bin-1
+       ELSE
+          l = isearch_bin+1
+       END IF
+    END DO
+    isearch_bin = -1   ! Failure
+  END FUNCTION isearch_bin
+!
+END MODULE sparse
diff --git a/src/tsparse3.f90 b/src/tsparse3.f90
new file mode 100644
index 0000000..0e75703
--- /dev/null
+++ b/src/tsparse3.f90
@@ -0,0 +1,705 @@
+!>
+!> @file tsparse3.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the 2d PDE using splines and WSMP non-symmetric matrix
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+MODULE pde2d_mod
+  USE bsplines
+  USE wsmp_bsplines
+  IMPLICIT NONE
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in)       :: spl
+    TYPE(wsmp_mat), INTENT(inout)    :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+    DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
+    INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+    ALLOCATE(coefs(kterms,ng1,ng2))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
+!===========================================================================
+!              2.0 Assembly loop
+!
+    ALLOCATE(left1(ng1))
+    ALLOCATE(left2(ng2))
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       left1 = i
+       CALL basfun(xg1, spl%sp1, fun1, left1)
+       DO j=1,n2
+          CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+          left2 = j
+          CALL basfun(xg2, spl%sp2, fun2, left2)
+!
+          DO ig1=1,ng1
+             DO ig2=1,ng2
+                CALL coefeq(xg1(ig1), xg2(ig2), &
+                     &      idert(:,:,ig1,ig2), &
+                     &      iderw(:,:,ig1,ig2), &
+                     &      coefs(:,ig1,ig2))
+             END DO
+          END DO
+!
+          DO iw1=0,nidbas1  ! Weight function in dir 1
+             igw1 = i+iw1
+             DO iw2=0,nidbas2  ! Weight function in dir 2
+                igw2 = MODULO(j+iw2-1, n2) + 1
+                irow = igw2 + (igw1-1)*n2
+                DO it1=0,nidbas1  ! Test function in dir 1
+                   igt1 = i+it1
+                   DO it2=0,nidbas2  ! Test function in dir 2
+                      igt2 = MODULO(j+it2-1, n2) + 1
+                      jcol = igt2 + (igt1-1)*n2
+!-------------
+                      contrib = 0.0d0
+                      DO ig1=1,ng1
+                         DO ig2=1,ng2
+                            DO iterm=1,kterms
+                               contrib = contrib + &
+                                    &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                                    &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
+                                    &    coefs(iterm,ig1,ig2) *                 &
+                                    &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
+                                    &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                                    &    wg1(ig1) * wg2(ig2)
+                            END DO
+                         END DO
+                      END DO
+                      CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+    DEALLOCATE(left1,left2)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+      !
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    INTEGER, INTENT(in) :: mbess
+    TYPE(spline2d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2
+      INTEGER, INTENT(in) :: m
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(wsmp_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = mat%rank
+    ALLOCATE(zsum(nrank), arr(nrank))
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       zsum(:) = zsum(:) + arr(:)
+    END DO
+    CALL putrow(mat, ny, zsum)
+!
+!   The horizontal sum on the NY-th column
+!
+    zsum = 0.0d0
+    DO j=1,ny
+       arr = 0.0d0
+       CALL getcol(mat, j, arr)
+       zsum(ny:) = zsum(ny:) + arr(ny:)
+    END DO
+    CALL putcol(mat, ny, zsum)
+!
+!   The away operator
+!
+    DO j = 1,ny-1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO j = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(j) = 1.0d0
+       CALL putcol(mat, j, arr)     
+    END DO
+!
+    DO i = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(zsum, arr)
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-ny+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_mod
+PROGRAM main
+  USE pde2d_mod
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
+  TYPE(spline2d) :: splxy
+  TYPE(wsmp_mat) :: mat
+!
+  CHARACTER(len=128) :: file='pde2d_wsmp.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  DOUBLE PRECISION :: tconv, treord
+  INTEGER :: nits=100
+  LOGICAL :: nlmetis, nlforce_zero
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, &
+       &            nlforce_zero, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlmetis = .FALSE.   ! Use metis ordering or minimum degree
+  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'MBESS', mbess)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  WRITE(*,'(a,i8)') 'nrank', nrank
+!
+  CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero)
+  CALL dismat(splxy, mat)
+  ALLOCATE(arr(nrank))
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
+  END IF
+!
+!   BC on Matrix
+!
+  WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
+  WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  CALL ibcmat(mat, ny)
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
+     WRITE(*,'(a)') 'Last rows'
+     DO i=nrank-ny,nrank
+        CALL getrow(mat, i, arr)
+        WRITE(*,'(10(1pe12.3))') arr
+     END DO
+  END IF
+  tmat = seconds() - t0
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, ny)
+!
+  CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL to_mat(mat)
+  tconv = seconds() -t0
+  WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
+  WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
+!
+  t0 = seconds()
+  CALL reord_mat(mat, nlmetis=nlmetis, debug=.FALSE.)
+  CALL putmat(fid, '/MAT', mat)
+  treord = seconds() - t0
+!
+  t0 = seconds()
+  CALL numfact(mat, debug=.FALSE.)
+  tfact = seconds() - t0
+ 
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
+!
+  CALL bsolve(mat, rhs, sol, debug=.FALSE.)
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL bsolve(mat, rhs, sol)
+     sol(1:ny-1) = sol(ny)
+  END DO
+  WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(26)
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
+  tsolv = (seconds() - t0)/REAL(nits)
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+  errsol = solana - solcal
+  IF( SIZE(bcoef,2) .LE. 10 ) THEN
+     CALL prnmat('BCOEF', bcoef)
+     CALL prnmat('SOLANA', solana)
+     CALL prnmat('SOLCAL', solcal)
+     CALL prnmat('ERRSOL', errsol)
+  END IF
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = -2.0d0 * xgrid(i)
+        ELSE
+           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
+  WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!===========================================================================
+!              5.0 Clear the matrix and recompute
+!
+  WRITE(*,'(/a)') 'Recompute the solver ...'
+  t0 = seconds()
+  CALL clear_mat(mat)
+  CALL dismat(splxy, mat)
+  CALL ibcmat(mat, ny)
+  tmat = seconds()-t0
+!
+  t0 = seconds()
+  CALL numfact(mat, debug=.FALSE.)
+  tfact = seconds()-t0
+!
+  t0 = seconds()
+  ALLOCATE(newsol(nrank))
+  CALL bsolve(mat, rhs, newsol)
+  newsol(1:ny-1) = newsol(ny)
+  tsolv = seconds()-t0
+!
+  WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
+  WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
+!
+  DEALLOCATE(newsol)
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+!
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/src/tsparse4.f90 b/src/tsparse4.f90
new file mode 100644
index 0000000..40b900f
--- /dev/null
+++ b/src/tsparse4.f90
@@ -0,0 +1,722 @@
+!>
+!> @file tsparse4.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!  Solving the 2d PDE using splines and PARDISO non-symmetric matrix
+!
+!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
+!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
+!
+MODULE pde2d_mod
+  USE bsplines
+  USE pardiso_bsplines
+  IMPLICIT NONE
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE dismat(spl, mat)
+!
+!   Assembly of FE matrix mat using spline spl
+!
+    TYPE(spline2d), INTENT(in)       :: spl
+    TYPE(pardiso_mat), INTENT(inout) :: mat
+!
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2
+    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
+    DOUBLE PRECISION:: contrib
+!
+    INTEGER :: kterms         ! Number of terms in weak form
+    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
+    DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
+    INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
+    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!
+!   Weak form
+!
+    kterms = mat%nterms
+    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
+    ALLOCATE(coefs(kterms,ng1,ng2))
+!
+    ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
+    ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
+!===========================================================================
+!              2.0 Assembly loop
+!
+    ALLOCATE(left1(ng1))
+    ALLOCATE(left2(ng2))
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       left1 = i
+       CALL basfun(xg1, spl%sp1, fun1, left1)
+       DO j=1,n2
+          CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+          left2 = j
+          CALL basfun(xg2, spl%sp2, fun2, left2)
+!
+          DO ig1=1,ng1
+             DO ig2=1,ng2
+                CALL coefeq(xg1(ig1), xg2(ig2), &
+                     &      idert(:,:,ig1,ig2), &
+                     &      iderw(:,:,ig1,ig2), &
+                     &      coefs(:,ig1,ig2))
+             END DO
+          END DO
+!
+          DO iw1=0,nidbas1  ! Weight function in dir 1
+             igw1 = i+iw1
+             DO iw2=0,nidbas2  ! Weight function in dir 2
+                igw2 = MODULO(j+iw2-1, n2) + 1
+                irow = igw2 + (igw1-1)*n2
+                DO it1=0,nidbas1  ! Test function in dir 1
+                   igt1 = i+it1
+                   DO it2=0,nidbas2  ! Test function in dir 2
+                      igt2 = MODULO(j+it2-1, n2) + 1
+                      jcol = igt2 + (igt1-1)*n2
+!-------------
+                      contrib = 0.0d0
+                      DO ig1=1,ng1
+                         DO ig2=1,ng2
+                            DO iterm=1,kterms
+                               contrib = contrib + &
+                                    &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
+                                    &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
+                                    &    coefs(iterm,ig1,ig2) *                 &
+                                    &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
+                                    &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
+                                    &    wg1(ig1) * wg2(ig2)
+                            END DO
+                         END DO
+                      END DO
+                      CALL updtmat(mat, irow, jcol, contrib)
+!-------------
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+    DEALLOCATE(idert, iderw, coefs)
+    DEALLOCATE(left1,left2)
+!
+  CONTAINS
+    SUBROUTINE coefeq(x, y, idt, idw, c)
+      DOUBLE PRECISION, INTENT(in) :: x, y
+      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
+!
+! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
+!
+      c(1) = x        ! 
+      idt(1,1) = 1
+      idt(1,2) = 0
+      idw(1,1) = 1
+      idw(1,2) = 0
+      !
+      c(2) = 1.d0/x
+      idt(2,1) = 0
+      idt(2,2) = 1
+      idw(2,1) = 0
+      idw(2,2) = 1
+    END SUBROUTINE coefeq
+  END SUBROUTINE dismat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE disrhs(mbess, spl, rhs)
+!
+!   Assembly the RHS using 2d spline spl
+!
+    INTEGER, INTENT(in) :: mbess
+    TYPE(spline2d), INTENT(in) :: spl
+    DOUBLE PRECISION, INTENT(out) :: rhs(:)
+    INTEGER :: n1, nidbas1, ndim1, ng1
+    INTEGER :: n2, nidbas2, ndim2, ng2
+    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
+    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
+    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
+    DOUBLE PRECISION:: contrib
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
+!
+    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
+    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
+!
+!   Gauss quadature
+!
+    CALL get_gauss(spl%sp1, ng1)
+    CALL get_gauss(spl%sp2, ng2)
+    ALLOCATE(xg1(ng1), wg1(ng1))
+    ALLOCATE(xg2(ng1), wg2(ng1))
+!===========================================================================
+!              2.0 Assembly loop
+!
+    nrank = SIZE(rhs)
+    rhs(1:nrank) = 0.0d0
+!
+    DO i=1,n1
+       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
+       DO ig1=1,ng1
+          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
+          DO j=1,n2
+             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
+             DO ig2=1,ng2
+                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
+                contrib = wg1(ig1)*wg2(ig2) * &
+                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
+                DO k1=0,nidbas1
+                   i1 = i+k1
+                   DO k2=0,nidbas2
+                      j2 = MODULO(j+k2-1,n2) + 1
+                      ij = j2 + (i1-1)*n2
+                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
+                   END DO
+                END DO
+             END DO
+          END DO
+       END DO
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(xg1, wg1, fun1)
+    DEALLOCATE(xg2, wg2, fun2)
+!
+  CONTAINS
+    DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
+      DOUBLE PRECISION, INTENT(in) :: x1, x2
+      INTEGER, INTENT(in) :: m
+      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
+    END FUNCTION rhseq
+  END SUBROUTINE disrhs
+!
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcmat(mat, ny)
+!
+!   Apply BC on matrix
+!
+    TYPE(pardiso_mat), INTENT(inout) :: mat
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank, i, j
+    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = mat%rank
+    ALLOCATE(zsum(nrank), arr(nrank))
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum on the NY-th row
+!
+    zsum = 0.0d0
+    DO i=1,ny
+       arr = 0.0d0
+       CALL getrow(mat, i, arr)
+       zsum(:) = zsum(:) + arr(:)
+    END DO
+    zsum(ny) = SUM(zsum(1:ny))   ! using symmetry
+    CALL putrow(mat, ny, zsum)
+!
+!   The away operator
+!
+    DO i = 1,ny-1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!  
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    DO i = nrank, nrank-ny+1, -1
+       arr = 0.0d0; arr(i) = 1.0d0
+       CALL putrow(mat, i, arr)     
+    END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+    DEALLOCATE(zsum, arr)
+!
+  END SUBROUTINE ibcmat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE ibcrhs(rhs, ny)
+!
+!   Apply BC on RHS
+!
+    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
+    INTEGER, INTENT(in) :: ny
+    INTEGER :: nrank
+    DOUBLE PRECISION :: zsum
+!===========================================================================
+!              1.0 Prologue
+!
+    nrank = SIZE(rhs,1)
+!===========================================================================
+!              2.0 Unicity at the axis
+!
+!   The vertical sum
+!
+    zsum = SUM(rhs(1:ny))
+    rhs(ny) = zsum
+    rhs(1:ny-1) = 0.0d0
+!===========================================================================
+!              3.0 Dirichlet on right boundary
+!
+    rhs(nrank-ny+1:nrank) = 0.0d0
+  END SUBROUTINE ibcrhs
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE pde2d_mod
+PROGRAM main
+  USE pde2d_mod
+  USE futils
+!
+  IMPLICIT NONE
+  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
+  LOGICAL :: nlppform
+  INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
+  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
+  TYPE(spline2d) :: splxy
+  TYPE(pardiso_mat) :: mat, newmat
+!
+  CHARACTER(len=128) :: file='pde2d_sym_pardiso.h5'
+  INTEGER :: fid
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
+  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
+  DOUBLE PRECISION :: seconds, mem, dopla
+  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
+  DOUBLE PRECISION :: tconv, treord
+  INTEGER :: nits=100
+  LOGICAL :: nlmetis, nlforce_zero, nlpos
+!
+  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, nlmetis, &
+       &            nlpos, nlforce_zero, coefx, coefy
+!===========================================================================
+!              1.0 Prologue
+!
+!   Read in data specific to run
+!
+  nx = 8              ! Number of intervals in x
+  ny = 8              ! Number of intervals in y
+  nidbas = (/3,3/)    ! Degree of splines
+  ngauss = (/4,4/)    ! Number of Gauss points/interval
+  mbess = 2           ! Exponent of differential problem
+  nterms = 2          ! Number of terms in weak form
+  nlppform = .TRUE.   ! Use PPFORM for gridval or not
+  nlmetis = .FALSE.   ! Use metis ordering or minimum degree
+  nlforce_zero = .TRUE. ! Remove existing nodes with zero val in  putele/row/ele
+  nlpos = .TRUE.        ! Matrix is positive definite
+  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
+!
+  READ(*,newrun)
+  WRITE(*,newrun)
+!
+!   Define grid on x (=radial) & y (=poloidal) axis
+!
+  pi = 4.0d0*ATAN(1.0d0)
+  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
+  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
+  CALL meshdist(coefx, xgrid, nx)
+  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
+  CALL meshdist(coefy, ygrid, ny)
+!
+!   Create hdf5 file
+!
+  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
+  CALL attach(fid, '/', 'NX', nx)
+  CALL attach(fid, '/', 'NY', ny)
+  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
+  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
+  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
+  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
+  CALL attach(fid, '/', 'MBESS', mbess)
+!===========================================================================
+!              2.0 Discretize the PDE
+!
+!   Set up spline
+!
+  t0 = seconds()
+  CALL set_spline(nidbas, ngauss, &
+       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
+!
+!   FE matrix assembly
+!
+  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
+  WRITE(*,'(a,i8)') 'nrank', nrank
+!
+  CALL init(nrank, nterms, mat, nlsym=.FALSE., nlpos=nlpos, &
+       &    nlforce_zero=nlforce_zero)
+  CALL dismat(splxy, mat)
+  ALLOCATE(arr(nrank))
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
+  END IF
+!
+!   BC on Matrix
+!
+  WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
+  WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  CALL ibcmat(mat, ny)
+  IF(nrank.LT.100) THEN
+     DO i=1,nrank
+        CALL getele(mat, i, i, arr(i))
+     END DO
+     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
+     WRITE(*,'(a)') 'Last rows'
+     DO i=nrank-ny,nrank
+        CALL getrow(mat, i, arr)
+        WRITE(*,'(10(1pe12.3))') arr
+     END DO
+  END IF
+  tmat = seconds() - t0
+!
+!   RHS assembly
+!
+  ALLOCATE(rhs(nrank), sol(nrank))
+  CALL disrhs(mbess, splxy, rhs)
+!
+!   BC on RHS
+!
+  CALL ibcrhs(rhs, ny)
+!
+  CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
+  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
+!===========================================================================
+!              3.0 Solve the dicretized system
+!
+  t0 = seconds()
+  CALL to_mat(mat)
+  tconv = seconds() -t0
+  WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
+  WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
+!
+  t0 = seconds()
+  CALL reord_mat(mat)
+  CALL putmat(fid, '/MAT', mat)
+  treord = seconds() - t0
+!
+  t0 = seconds()
+  CALL numfact(mat)
+  tfact = seconds() - t0
+ 
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
+  WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
+  WRITE(*,'(a,i12)')  'Number of factorization MFLOPS      = ',mat%p%iparm(19)
+  gflops1 = mat%p%iparm(19) / tfact / 1.d3
+!
+  CALL bsolve(mat, rhs, sol)
+  WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol))
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL bsolve(mat, rhs, sol)
+     sol(1:ny-1) = sol(ny)
+  END DO
+  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
+  tsolv = (seconds() - t0)/REAL(nits)
+!
+!   Spline coefficients, taking into account of periodicity in y
+!   Note: in SOL, y was numbered first.
+!
+  dimx = splxy%sp1%dim
+  dimy = splxy%sp2%dim
+  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
+  DO j=0,dimy-1
+     DO i=0,dimx-1
+        ij = MODULO(j,ny) + i*ny + 1
+        bcoef(i,j) = sol(ij)
+     END DO
+  END DO
+  WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
+  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
+!===========================================================================
+!              4.0 Check the solution
+!
+!   Check function values computed with various method
+!
+  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
+     END DO
+  END DO
+  jder = (/0,0/)
+!
+!   Compute PPFORM/BCOEFS at first call to gridval
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
+!
+  WRITE(*,'(/a)') '*** Checking solutions'
+  t0 = seconds()
+  DO it=1,nits   ! nits iterations for timing
+     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  END DO
+  tgrid = (seconds() - t0)/REAL(nits)
+  errsol = solana - solcal
+  IF( SIZE(bcoef,2) .LE. 10 ) THEN
+     CALL prnmat('BCOEF', bcoef)
+     CALL prnmat('SOLANA', solana)
+     CALL prnmat('SOLCAL', solcal)
+     CALL prnmat('ERRSOL', errsol)
+  END IF
+  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
+       &    norm2(errsol) / norm2(solana)
+  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
+!
+  CALL putarr(fid, '/xgrid', xgrid, 'r')
+  CALL putarr(fid, '/ygrid', ygrid, '\theta')
+  CALL putarr(fid, '/sol', solcal, 'Solutions')
+  CALL putarr(fid, '/solana', solana,'Exact solutions')
+  CALL putarr(fid, '/errors', errsol, 'Errors')
+!
+!   Check derivatives d/dx and d/dy
+!
+  WRITE(*,'(/a)') '*** Checking gradient'
+  DO i=0,nx
+     DO j=0,ny
+        IF( mbess .EQ. 0 ) THEN
+           solana(i,j) = -2.0d0 * xgrid(i)
+        ELSE
+           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
+             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
+        END IF
+     END DO
+  END DO
+!
+  jder = (/1,0/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  errsol = solana - solcal
+  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
+!
+  DO i=0,nx
+     DO j=0,ny
+        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
+     END DO
+  END DO
+!
+  jder = (/0,1/)
+  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
+  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
+  errsol = solana - solcal
+  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
+!
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
+  WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!===========================================================================
+!              5.0 Clear the matrix and recompute
+!
+  WRITE(*,'(/a)') 'Recompute the solver ...'
+  t0 = seconds()
+  CALL clear_mat(mat)
+  CALL dismat(splxy, mat)
+  CALL ibcmat(mat, ny)
+  tmat = seconds()-t0
+!
+  t0 = seconds()
+  CALL numfact(mat)
+  tfact = seconds()-t0
+  gflops1 = mat%p%iparm(19) / tfact / 1.d3
+!
+  t0 = seconds()
+  ALLOCATE(newsol(nrank))
+  CALL bsolve(mat, rhs, newsol)
+  newsol(1:ny-1) = newsol(ny)
+  tsolv = seconds()-t0
+!
+  WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
+  WRITE(*,'(/a)') '---'
+  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
+  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
+  WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
+  WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
+  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
+!
+!===========================================================================
+!              6.0 Another matrix to solve
+!
+  WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol))
+!!$  PRINT*, 'current/last matid, matid', current_matid, last_matid, mat%matid
+!
+  WRITE(*,'(/a)') ' Another solver ...'
+!
+  CALL init(nrank, nterms, newmat, nlsym=.FALSE., nlpos=nlpos, &
+       &    nlforce_zero=nlforce_zero)
+  CALL mcopy(mat, newmat)
+!!$  CALL clear_mat(newmat)
+!!$  CALL maddto(newmat, 1000.0d0, mat)
+  CALL factor(newmat)
+!!$  CALL dismat(splxy, newmat)
+!!$  CALL ibcmat(newmat, ny)
+!!$  CALL to_mat(newmat)
+!!$  CALL reord_mat(newmat)
+!!$  CALL numfact(newmat)
+  CALL bsolve(newmat, rhs, newsol)
+  WRITE(*,'(/a, 1pe16.8)') 'Norm of newsol =', SQRT(DOT_PRODUCT(newsol,newsol))
+!!$  PRINT*, 'current/last matid, matid', current_matid, last_matid, newmat%matid
+!
+  CALL bsolve(mat, rhs, sol)
+  WRITE(*,'(/a, 1pe16.8)') 'Norm of sol =', SQRT(DOT_PRODUCT(sol,sol))
+!!$  PRINT*, 'current/last matid, matid', current_matid, last_matid, mat%matid
+!
+!===========================================================================
+
+!              9.0  Epilogue
+!
+  DEALLOCATE(xgrid, rhs, sol)
+  DEALLOCATE(solcal, solana, errsol)
+  DEALLOCATE(bcoef)
+  DEALLOCATE(arr)
+  DEALLOCATE(newsol)
+  CALL destroy_sp(splxy)
+  CALL destroy(mat)
+  CALL destroy(newmat)
+!
+  CALL closef(fid)
+!===========================================================================
+!
+CONTAINS
+  FUNCTION norm2(x)
+!
+!  Compute the 2-norm of array x
+!
+    IMPLICIT NONE
+    DOUBLE PRECISION :: norm2
+    DOUBLE PRECISION, INTENT(in) :: x(:,:)
+    DOUBLE PRECISION :: sum2
+    INTEGER :: i, j
+!
+    sum2 = 0.0d0
+    DO i=1,SIZE(x,1)
+       DO j=1,SIZE(x,2)
+          sum2 = sum2 + x(i,j)**2
+       END DO
+    END DO
+    norm2 = SQRT(sum2)
+  END FUNCTION norm2
+  SUBROUTINE prnmat(label, mat)
+    CHARACTER(len=*) :: label
+    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
+    INTEGER :: i
+    WRITE(*,'(/a)') TRIM(label)
+    DO i=1,SIZE(mat,1)
+       WRITE(*,'(10(1pe12.3))') mat(i,:)
+    END DO
+  END SUBROUTINE prnmat
+END PROGRAM main
+!
+!+++
+SUBROUTINE meshdist(c, x, nx)
+!
+!   Construct an 1d  non-equidistant mesh given a
+!   mesh distribution function.
+!
+  IMPLICIT NONE
+  DOUBLE PRECISION, INTENT(in) :: c(5)
+  INTEGER, INTENT(iN) :: nx
+  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
+  INTEGER :: nintg
+  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
+  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
+  INTEGER :: i, k
+!
+  a=x(0)
+  b=x(nx)
+  nintg = 10*nx
+  ALLOCATE(xint(0:nintg), fint(0:nintg))
+!
+!  Mesh distribution
+!
+  dx = (b-a)/REAL(nintg)
+  xint(0) = a
+  fint(0) = 0.0d0
+  f1 = fdist(xint(0))
+  DO i=1,nintg
+     f0 = f1
+     xint(i) = xint(i-1) + dx
+     f1 = fdist(xint(i))
+     fint(i) = fint(i-1) + 0.5*(f0+f1)
+  END DO
+!
+!  Normalization
+!
+  scal = REAL(nx) / fint(nintg)
+  fint(0:nintg) = fint(0:nintg) * scal
+!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
+!
+!  Obtain mesh point by (inverse) interpolation
+!
+  k = 1
+  DO i=1,nintg-1
+     IF( fint(i) .GE. REAL(k) ) THEN
+        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
+             &   (k-fint(i))
+        k = k+1
+     END IF
+  END DO
+!
+  DEALLOCATE(xint, fint)
+CONTAINS
+  DOUBLE PRECISION FUNCTION fdist(x)
+    DOUBLE PRECISION, INTENT(in) :: x
+    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
+  END FUNCTION fdist
+END SUBROUTINE meshdist
+!+++
diff --git a/src/wsmp_mod.f90 b/src/wsmp_mod.f90
new file mode 100644
index 0000000..8026640
--- /dev/null
+++ b/src/wsmp_mod.f90
@@ -0,0 +1,1835 @@
+!>
+!> @file wsmp_mod.f90
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+MODULE wsmp_bsplines
+!
+!    WSMP_BSPLINES: Simple interface to the sparse direct solver WSMP.
+!
+!    T.M. Tran, CRPP-EPFL
+!    November 2011
+!
+  USE sparse
+  IMPLICIT NONE
+!
+  INTEGER, SAVE :: current_matid = -1
+  INTEGER, SAVE :: last_matid = -1
+!
+  TYPE wsmp_param
+     INTEGER          :: iparm(64)
+     DOUBLE PRECISION :: dparm(64)
+  END TYPE wsmp_param
+!
+  TYPE wsmp_mat
+     INTEGER :: matid=-1
+     INTEGER :: rank=0, nnz
+     INTEGER :: nterms, kmat, nrhs
+     LOGICAL :: nlsym
+     LOGICAL :: nlpos
+     LOGICAL :: nlforce_zero
+     TYPE(spmat), POINTER      :: mat => NULL()
+     INTEGER, POINTER          :: cols(:) => NULL()
+     INTEGER, POINTER          :: irow(:) => NULL()
+     INTEGER, POINTER          :: perm(:) => NULL()
+     INTEGER, POINTER          :: invp(:) => NULL()
+     INTEGER, POINTER          :: mrp(:) => NULL()
+     DOUBLE PRECISION, POINTER :: diag(:) => NULL()
+     DOUBLE PRECISION, POINTER :: val(:) => NULL()
+     DOUBLE PRECISION, POINTER :: aux(:) => NULL()
+     TYPE(wsmp_param)       :: p
+  END TYPE wsmp_mat
+!
+  TYPE zwsmp_mat
+     INTEGER :: matid=-1
+     INTEGER :: rank=0, nnz
+     INTEGER :: nterms, kmat, nrhs
+     LOGICAL :: nlherm
+     LOGICAL :: nlsym
+     LOGICAL :: nlpos
+     LOGICAL :: nlforce_zero
+     TYPE(zspmat), POINTER     :: mat => NULL()
+     INTEGER, POINTER          :: cols(:) => NULL()
+     INTEGER, POINTER          :: irow(:) => NULL()
+     INTEGER, POINTER          :: perm(:) => NULL()
+     INTEGER, POINTER          :: invp(:) => NULL()
+     INTEGER, POINTER          :: mrp(:) => NULL()
+     DOUBLE COMPLEX, POINTER :: diag(:) => NULL()
+     DOUBLE COMPLEX, POINTER :: val(:) => NULL()
+     DOUBLE COMPLEX, POINTER :: aux(:) => NULL()
+     TYPE(wsmp_param)       :: p
+  END TYPE zwsmp_mat
+!
+  INTERFACE init
+     MODULE PROCEDURE init_wsmp_mat, init_zwsmp_mat
+  END INTERFACE init
+!
+  INTERFACE check_mat
+     MODULE PROCEDURE check_wsmp_mat, check_zwsmp_mat
+  END INTERFACE check_mat
+!
+  INTERFACE clear_mat
+     MODULE PROCEDURE clear_wsmp_mat, clear_zwsmp_mat
+  END INTERFACE clear_mat
+!
+  INTERFACE updtmat
+     MODULE PROCEDURE updt_wsmp_mat, updt_zwsmp_mat
+  END INTERFACE updtmat
+!
+  INTERFACE putele
+     MODULE PROCEDURE putele_wsmp_mat, putele_zwsmp_mat
+  END INTERFACE putele
+!
+  INTERFACE getele
+     MODULE PROCEDURE getele_wsmp_mat, getele_zwsmp_mat
+  END INTERFACE getele
+!
+  INTERFACE putrow
+     MODULE PROCEDURE putrow_wsmp_mat, putrow_zwsmp_mat
+  END INTERFACE putrow
+!
+  INTERFACE getrow
+     MODULE PROCEDURE getrow_wsmp_mat, getrow_zwsmp_mat
+  END INTERFACE getrow
+!
+  INTERFACE putcol
+     MODULE PROCEDURE putcol_wsmp_mat, putcol_zwsmp_mat
+  END INTERFACE putcol
+!
+  INTERFACE getcol
+     MODULE PROCEDURE getcol_wsmp_mat, getcol_zwsmp_mat
+  END INTERFACE getcol
+!
+  INTERFACE get_count
+     MODULE PROCEDURE get_count_wsmp_mat, get_count_zwsmp_mat
+  END INTERFACE get_count
+!
+  INTERFACE to_mat
+     MODULE PROCEDURE to_wsmp_mat, to_zwsmp_mat
+  END INTERFACE to_mat
+!
+  INTERFACE reord_mat
+     MODULE PROCEDURE reord_wsmp_mat, reord_zwsmp_mat
+  END INTERFACE reord_mat
+!
+  INTERFACE numfact
+     MODULE PROCEDURE numfact_wsmp_mat, numfact_zwsmp_mat
+  END INTERFACE numfact
+!
+  INTERFACE factor
+     MODULE PROCEDURE factor_wsmp_mat, factor_zwsmp_mat
+  END INTERFACE factor
+!
+  INTERFACE bsolve
+     MODULE PROCEDURE bsolve_wsmp_mat1,  bsolve_wsmp_matn, &
+          &           bsolve_zwsmp_mat1, bsolve_zwsmp_matn
+  END INTERFACE bsolve
+!
+  INTERFACE vmx
+     MODULE PROCEDURE vmx_wsmp_mat,  vmx_wsmp_matn, &
+          &           vmx_zwsmp_mat, vmx_zwsmp_matn
+  END INTERFACE vmx
+!
+  INTERFACE destroy
+     MODULE PROCEDURE destroy_wsmp_mat, destroy_zwsmp_mat
+ END INTERFACE destroy
+!
+ INTERFACE putmat
+    MODULE PROCEDURE put_wsmp_mat, put_zwsmp_mat
+ END INTERFACE putmat
+!
+ INTERFACE getmat
+    MODULE PROCEDURE get_wsmp_mat, get_zwsmp_mat
+ END INTERFACE getmat
+!
+ INTERFACE mcopy
+    MODULE PROCEDURE mcopy_wsmp_mat,  mcopy_zwsmp_mat
+ END INTERFACE mcopy
+!
+ INTERFACE maddto
+    MODULE PROCEDURE maddto_wsmp_mat, maddto_zwsmp_mat
+ END INTERFACE maddto
+!
+ INTERFACE psum_mat
+    MODULE PROCEDURE psum_wsmp_mat, psum_zwsmp_mat
+ END INTERFACE psum_mat
+!
+ INTERFACE p2p_mat
+    MODULE PROCEDURE p2p_wsmp_mat, p2p_zwsmp_mat
+ END INTERFACE p2p_mat
+!
+CONTAINS
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_wsmp_mat(n, nterms, mat, kmat, nlsym, nlpos, &
+       &                   nlforce_zero)
+!
+!   Initialize an empty sparse wsmp matrix
+!
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE(wsmp_mat)                :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlpos
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+    INTEGER :: info
+    INTEGER :: idummy = 0
+    DOUBLE PRECISION :: dummy = 0.0d0
+!
+!  Store away (valid) current matrix id
+!
+    IF(current_matid .GE. 0) THEN
+       CALL wstoremat(current_matid, info)
+       IF(info.NE.0) THEN
+          WRITE(*,'(a,i4)') 'INIT: WSTOREMAT failed WITH error', info
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+    END IF
+    last_matid = last_matid+1
+    mat%matid = last_matid
+    current_matid = mat%matid
+!
+!  Initialize sparse matrice structure
+!
+    mat%rank = n
+    mat%nterms = nterms
+    mat%nnz = 0
+    mat%nlsym = .FALSE.
+    mat%nlpos = .TRUE.
+    mat%nrhs = 1
+    mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    IF(PRESENT(nlsym)) mat%nlsym = nlsym
+    IF(PRESENT(nlpos)) mat%nlpos = nlpos
+    IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
+    IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
+    ALLOCATE(mat%mat)
+    CALL init(n, mat%mat)
+!
+!  Fill 'iparm' and 'dparm' with default values
+!
+    mat%p%iparm(1:3) = 0
+    IF(mat%nlsym) THEN
+       CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+       IF(mat%nlpos) THEN
+          mat%p%iparm(31) = 0
+       ELSE
+!!$          mat%p%iparm(31) = 1  ! LDL^T without pivoting
+          mat%p%iparm(31) = 2  ! LDL^T with pivoting
+       END IF
+    ELSE
+       CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+   END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i4)') 'INIT: Initialization failed with error', &
+            &             mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!!$    WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm
+!!$    WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm
+!
+    CALL setup_wsmp(mat%p%iparm, mat%p%dparm)
+!
+  CONTAINS
+    SUBROUTINE setup_wsmp(iparm, dparm)
+      INTEGER          :: iparm(:)
+      DOUBLE PRECISION :: dparm(:)
+    END SUBROUTINE setup_wsmp
+  END SUBROUTINE init_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE init_zwsmp_mat(n, nterms, mat, kmat, nlsym, nlherm, &
+       &                   nlpos, nlforce_zero)
+!
+!   Initialize an empty sparse wsmp matrix
+!
+    INTEGER, INTENT(in)           :: n, nterms
+    TYPE(zwsmp_mat)               :: mat
+    INTEGER, OPTIONAL, INTENT(in) :: kmat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlsym
+    LOGICAL, OPTIONAL, INTENT(in) :: nlherm
+    LOGICAL, OPTIONAL, INTENT(in) :: nlpos
+    LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
+    INTEGER :: info
+    INTEGER :: idummy = 0
+    DOUBLE COMPLEX :: dummy = 0.0d0
+!
+!  Store away (valid) current matrix id
+!
+    IF(current_matid .GE. 0) THEN
+       CALL wstoremat(current_matid, info)
+       IF(info.NE.0) THEN
+          WRITE(*,'(a,i4)') 'INIT: WSTOREMAT failed WITH error', info
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+    END IF
+    last_matid = last_matid+1
+    mat%matid = last_matid
+    current_matid = mat%matid
+!
+!  Initialize sparse matrice structure
+!
+    mat%rank = n
+    mat%nterms = nterms
+    mat%nnz = 0
+    mat%nlsym = .FALSE.
+    mat%nlherm = .FALSE.
+    mat%nlpos = .TRUE.
+    mat%nrhs = 1
+    mat%nlforce_zero = .TRUE. ! Do not remove existing nodes with zero val
+    IF(PRESENT(kmat)) mat%kmat = kmat
+    IF(PRESENT(nlsym)) mat%nlsym = nlsym
+    IF(PRESENT(nlherm)) mat%nlherm = nlherm
+    IF(PRESENT(nlpos)) mat%nlpos = nlpos
+    IF(PRESENT(nlforce_zero)) mat%nlforce_zero = nlforce_zero
+    IF(ASSOCIATED(mat%mat)) DEALLOCATE(mat%mat)
+    ALLOCATE(mat%mat)
+    CALL init(n, mat%mat)
+!
+!  Fill 'iparm' and 'dparm' with default values
+!
+    mat%p%iparm(1:3) = 0
+    IF(mat%nlherm .OR. mat%nlsym) THEN
+       CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+       IF(mat%nlherm) THEN
+          IF(mat%nlpos) THEN
+             mat%p%iparm(31) = 0  ! hermitian, positive definite 
+          ELSE
+             mat%p%iparm(31) = 2  ! hermitian, no-definite, LDL^T with pivoting
+          END IF
+       ELSE
+          mat%p%iparm(31) = 3     ! non-hermitian, symmetric
+       END IF
+    ELSE
+       CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+   END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i4)') 'INIT: Initialization failed with error', &
+            &             mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!!$    WRITE(*,'(/a/(10i8))') 'iparm', mat%p%iparm
+!!$    WRITE(*,'(/a/(10(1pe8.1)))') 'dparm', mat%p%dparm
+!
+    CALL setup_wsmp(mat%p%iparm, mat%p%dparm)
+!
+  CONTAINS
+    SUBROUTINE setup_wsmp(iparm, dparm)
+      INTEGER          :: iparm(:)
+      DOUBLE PRECISION :: dparm(:)
+    END SUBROUTINE setup_wsmp
+  END SUBROUTINE init_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE check_wsmp_mat(mat)
+!
+!  Check matrice id and  recall the matrice if not current
+!
+    TYPE(wsmp_mat) :: mat
+    INTEGER :: info
+!
+    IF(.NOT.mat%nlsym) THEN
+       IF( mat%matid.NE.current_matid ) THEN      
+          WRITE(*,'(a)') "Processing multi matrices is not possible "// &
+               &         "for non-symetric matrices."
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       ELSE
+          RETURN
+       END IF
+    END IF
+!
+    IF( mat%matid.NE.current_matid ) THEN
+       IF(current_matid .GE. 0) THEN
+          CALL wstoremat(current_matid, info)
+          IF(info.NE.0) THEN
+             WRITE(*,'(a,i3,a,i4)') 'Store matrix', current_matid, &
+                  &               ' failed with error', info
+             STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+          END IF
+       END IF
+       CALL wrecallmat(mat%matid, info)
+       IF(info.NE.0) THEN
+          WRITE(*,'(a,i3,a,i4)') 'Recall matrix', mat%matid, &
+               &              ' failed with error', info
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+       current_matid = mat%matid
+    END IF
+  END SUBROUTINE check_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE check_zwsmp_mat(mat)
+!
+!  Check matrice id and  recall the matrice if not current
+!
+    TYPE(zwsmp_mat) :: mat
+    INTEGER :: info
+!
+    IF(.NOT.mat%nlsym .AND. .NOT.mat%nlherm ) THEN
+       IF( mat%matid.NE.current_matid ) THEN      
+          WRITE(*,'(a)') "Processing multi matrices is not possible "// &
+               &         "for non-symetric/non-hermitian matrices."
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       ELSE
+          RETURN
+       END IF
+    END IF
+!
+    IF( mat%matid.NE.current_matid ) THEN
+       IF(current_matid .GE. 0) THEN
+          CALL wstoremat(current_matid, info)
+          IF(info.NE.0) THEN
+             WRITE(*,'(a,i3,a,i4)') 'Store matrix', current_matid, &
+                  &               ' failed with error', info
+             STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+          END IF
+       END IF
+       CALL wrecallmat(mat%matid, info)
+       IF(info.NE.0) THEN
+          WRITE(*,'(a,i3,a,i4)') 'Recall matrix', mat%matid, &
+               &              ' failed with error', info
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+       current_matid = mat%matid
+    END IF
+  END SUBROUTINE check_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE clear_wsmp_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(wsmp_mat) :: mat
+!
+    mat%val = 0.0d0
+  END SUBROUTINE clear_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE clear_zwsmp_mat(mat)
+!
+!   Clear matrix, keeping its sparse structure unchanged
+!
+    TYPE(zwsmp_mat) :: mat
+!
+    mat%val = (0.0d0, 0.0d0)
+  END SUBROUTINE clear_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_wsmp_mat(mat, i, j, val)
+!
+!   Update element Aij of wsmp  matrix
+!
+    TYPE(wsmp_mat)            :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: k, s, e
+!
+    IF(mat%nlsym) THEN   ! Store only upper part for symmetric matrices
+       IF(i.GT.j) RETURN
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL updtmat(mat%mat, i, j, val)
+    ELSE
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       k = isearch(mat%cols(s:e), j)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  mat%val(s+k)+val
+       ELSE
+          WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+    END IF
+  END SUBROUTINE updt_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE updt_zwsmp_mat(mat, i, j, val)
+!
+!   Update element Aij of wsmp  matrix
+!
+    TYPE(zwsmp_mat)            :: mat
+    INTEGER, INTENT(in)        :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    INTEGER :: k, s, e
+!
+    IF(mat%nlherm .OR. mat%nlsym) THEN   ! Store only upper part
+       IF(i.GT.j) RETURN
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL updtmat(mat%mat, i, j, val)
+    ELSE
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       k = isearch(mat%cols(s:e), j)
+       IF( k.GE.0 ) THEN
+          IF(mat%nlherm) THEN
+             mat%val(s+k) =  mat%val(s+k)+CONJG(val)  ! CSR-UT* = CSC-LT
+          ELSE
+             mat%val(s+k) =  mat%val(s+k)+val
+       END IF
+       ELSE
+          WRITE(*,'(a,2i6)') 'UPDT: i, j out of range ', i, j
+          STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+       END IF
+    END IF
+  END SUBROUTINE updt_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_wsmp_mat(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(wsmp_mat)            :: mat
+    INTEGER, INTENT(in)          :: i, j
+    DOUBLE PRECISION, INTENT(in) :: val
+    INTEGER :: iput, jput
+    INTEGER :: k, s, e
+!
+    iput = i
+    jput = j
+    IF(mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iput = j
+          jput = i
+       END IF
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL putele(mat%mat, iput, jput, val, &
+            &      nlforce_zero=mat%nlforce_zero)
+    ELSE
+       s = mat%irow(iput)
+       e = mat%irow(iput+1)-1
+       k = isearch(mat%cols(s:e), jput)
+       IF( k.GE.0 ) THEN
+          mat%val(s+k) =  val
+       ELSE
+          IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
+             WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
+             STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+          END IF
+       END IF
+   END IF
+  END SUBROUTINE putele_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putele_zwsmp_mat(mat, i, j, val)
+!
+!  Put element (i,j) of matrix
+!
+    TYPE(zwsmp_mat)            :: mat
+    INTEGER, INTENT(in)        :: i, j
+    DOUBLE COMPLEX, INTENT(in) :: val
+    DOUBLE COMPLEX :: valput
+    INTEGER :: iput, jput
+    INTEGER :: k, s, e
+!
+    iput = i
+    jput = j
+    valput = val
+    IF(mat%nlsym .OR. mat%nlherm) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iput = j
+          jput = i
+          IF(mat%nlherm) THEN
+             valput = CONJG(val)
+          ELSE
+             valput = val
+          END IF
+       END IF
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL putele(mat%mat, iput, jput, valput, &
+            &      nlforce_zero=mat%nlforce_zero)
+    ELSE
+       s = mat%irow(iput)
+       e = mat%irow(iput+1)-1
+       k = isearch(mat%cols(s:e), jput)
+       IF( k.GE.0 ) THEN
+          IF(mat%nlherm) THEN
+             mat%val(s+k) = CONJG(valput)   ! CSR-UT* = CSC-LT
+          ELSE
+             mat%val(s+k) =  valput
+          END IF
+       ELSE
+          IF(ABS(val) .GT. EPSILON(0.0d0)) THEN   ! Ok for zero val
+             WRITE(*,'(a,2i6)') 'PUTELE: i, j out of range ', i, j
+             STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+          END IF
+       END IF
+   END IF
+ END SUBROUTINE putele_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_wsmp_mat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(wsmp_mat)             :: mat
+    INTEGER, INTENT(in)           :: i, j
+    DOUBLE PRECISION, INTENT(out) :: val
+    INTEGER :: iget, jget
+    INTEGER :: k, s, e
+!
+    iget = i
+    jget = j
+    IF(mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iget = j
+          jget = i
+       END IF
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL getele(mat%mat, iget, jget, val)
+    ELSE
+       s = mat%irow(iget)
+       e = mat%irow(iget+1)-1
+       k = isearch(mat%cols(s:e), jget)
+       IF( k.GE.0 ) THEN
+          val =mat%val(s+k) 
+       ELSE
+          val = 0.0d0   ! Assume zero val if not found
+       END IF
+    END IF
+  END SUBROUTINE getele_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getele_zwsmp_mat(mat, i, j, val)
+!
+!   Get element (i,j) of sparse matrix
+!
+    TYPE(zwsmp_mat)             :: mat
+    INTEGER, INTENT(in)         :: i, j
+    DOUBLE COMPLEX, INTENT(out) :: val
+    DOUBLE COMPLEX :: valget
+    INTEGER :: iget, jget
+    INTEGER :: k, s, e
+!
+    iget = i
+    jget = j
+    IF(mat%nlherm .OR. mat%nlsym) THEN
+       IF( i.GT.j ) THEN    ! Lower triangular part
+          iget = j
+          jget = i
+       END IF
+    END IF
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL getele(mat%mat, iget, jget, valget)
+    ELSE
+       s = mat%irow(iget)
+       e = mat%irow(iget+1)-1
+       k = isearch(mat%cols(s:e), jget)
+       IF( k.GE.0 ) THEN
+          IF(mat%nlherm) THEN
+             valget = CONJG(mat%val(s+k))   ! CSR-UT* = CSC-LT
+          ELSE
+             valget = mat%val(s+k) 
+          END IF
+       ELSE
+          valget = (0.0d0,0.0d0)   ! Assume zero val if not found
+       END IF
+    END IF
+    val = valget
+    IF( i.GT.j ) THEN
+       IF(mat%nlherm) THEN 
+          val = CONJG(valget)
+       END IF
+    END IF
+  END SUBROUTINE getele_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_wsmp_mat(amat, i, arr)
+!
+! Put a row into sparse matrix
+!
+    TYPE(wsmp_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)              :: i
+    DOUBLE PRECISION, INTENT(in)     :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL putele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE putrow_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putrow_zwsmp_mat(amat, i, arr)
+!
+! Put a row into sparse matrix
+!
+    TYPE(zwsmp_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)            :: i
+    DOUBLE COMPLEX, INTENT(in)     :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL putele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE putrow_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_wsmp_mat(amat, i, arr)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(wsmp_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)            :: i
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL getele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE getrow_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getrow_zwsmp_mat(amat, i, arr)
+!
+!   Get a row from sparse matrix
+!
+    TYPE(zwsmp_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)          :: i
+    DOUBLE COMPLEX, INTENT(out)  :: arr(:)
+    INTEGER :: j
+!
+    DO j=1,amat%rank
+       CALL getele(amat, i, j, arr(j))
+    END DO
+  END SUBROUTINE getrow_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_wsmp_mat(amat, j, arr)
+!
+! Put a column into sparse matrix
+!
+    TYPE(wsmp_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)              :: j
+    DOUBLE PRECISION, INTENT(in)     :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%rank
+       CALL putele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE putcol_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE putcol_zwsmp_mat(amat, j, arr)
+!
+! Put a column into sparse matrix
+!
+    TYPE(zwsmp_mat), INTENT(inout) :: amat
+    INTEGER, INTENT(in)            :: j
+    DOUBLE COMPLEX, INTENT(in)     :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%rank
+       CALL putele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE putcol_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_wsmp_mat(amat, j, arr)
+!
+!   Get a column from sparse matrix
+!
+    TYPE(wsmp_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)            :: j
+    DOUBLE PRECISION, INTENT(out)  :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%rank
+       CALL getele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE getcol_zwsmp_mat(amat, j, arr)
+!
+!   Get a column from sparse matrix
+!
+    TYPE(zwsmp_mat), INTENT(in)  :: amat
+    INTEGER, INTENT(in)          :: j
+    DOUBLE COMPLEX, INTENT(out)  :: arr(:)
+    INTEGER :: i
+!
+    DO i=1,amat%rank
+       CALL getele(amat, i, j, arr(i))
+    END DO
+  END SUBROUTINE getcol_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_wsmp_mat(mat, nnz)
+!
+!   Number of non-zeros in sparse matrix
+!
+    TYPE(wsmp_mat)                 :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+    INTEGER :: i
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       get_count_wsmp_mat = get_count(mat%mat, nnz)
+    ELSE
+       get_count_wsmp_mat = mat%nnz
+       IF(PRESENT(nnz)) THEN
+          DO i=1,mat%rank
+             nnz(i) = mat%irow(i+1)-mat%irow(i)
+          END DO
+       END IF
+    END IF
+  END FUNCTION get_count_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  INTEGER FUNCTION get_count_zwsmp_mat(mat, nnz)
+!
+!   Number of non-zeros in sparse matrix
+!
+    TYPE(zwsmp_mat)                :: mat
+    INTEGER, INTENT(out), OPTIONAL :: nnz(:)
+    INTEGER :: i
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       get_count_zwsmp_mat = get_count(mat%mat, nnz)
+    ELSE
+       get_count_zwsmp_mat = mat%nnz
+       IF(PRESENT(nnz)) THEN
+          DO i=1,mat%rank
+             nnz(i) = mat%irow(i+1)-mat%irow(i)
+          END DO
+       END IF
+    END IF
+  END FUNCTION get_count_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE to_wsmp_mat(mat, nlkeep)
+!
+!   Convert linked list spmat to wsmp matrice structure
+!
+    TYPE(wsmp_mat) :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nlkeep
+    INTEGER :: i, nnz, rank, s, e
+    LOGICAL :: nlclean
+!
+    nlclean = .TRUE.
+    IF(PRESENT(nlkeep)) THEN
+       nlclean = .NOT. nlkeep
+    END IF
+!
+!    Allocate the WSMP matrix structure
+!
+    nnz = get_count(mat)
+    rank = mat%rank
+    mat%nnz = nnz
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%val(nnz))
+    IF(mat%nlsym) THEN
+       ALLOCATE(mat%perm(rank))
+       ALLOCATE(mat%invp(rank))
+    END IF
+    ALLOCATE(mat%irow(rank+1))
+    ALLOCATE(mat%cols(nnz))
+!
+!    Fill WSMP structure and deallocate the sparse rows
+!
+    mat%irow = 1
+    DO i=1,rank
+       mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
+       IF(nlclean) CALL destroy(mat%mat%row(i))
+    END DO
+    IF(nlclean) DEALLOCATE(mat%mat)
+  END SUBROUTINE to_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE to_zwsmp_mat(mat, nlkeep)
+!
+!   Convert linked list spmat to wsmp matrice structure
+!
+    TYPE(zwsmp_mat) :: mat
+    LOGICAL, INTENT(in), OPTIONAL :: nlkeep
+    INTEGER :: i, nnz, rank, s, e
+    LOGICAL :: nlclean
+!
+    nlclean = .TRUE.
+    IF(PRESENT(nlkeep)) THEN
+       nlclean = .NOT. nlkeep
+    END IF
+!
+!    Allocate the WSMP matrix structure
+!
+    nnz = get_count(mat)
+    rank = mat%rank
+    mat%nnz = nnz
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    ALLOCATE(mat%val(nnz))
+    IF(mat%nlsym) THEN
+       ALLOCATE(mat%perm(rank))
+       ALLOCATE(mat%invp(rank))
+    END IF
+    ALLOCATE(mat%irow(rank+1))
+    ALLOCATE(mat%cols(nnz))
+!
+!    Fill WSMP structure and deallocate the sparse rows
+!
+    mat%irow = 1
+    DO i=1,rank
+       mat%irow(i+1) = mat%irow(i) + mat%mat%row(i)%nnz
+       s = mat%irow(i)
+       e = mat%irow(i+1)-1
+       CALL getrow(mat%mat%row(i), mat%val(s:e), mat%cols(s:e))
+       IF(nlclean) CALL destroy(mat%mat%row(i))
+    END DO
+    IF(mat%nlherm) THEN
+       mat%val(:) = CONJG(mat%val(:))  ! CSR-UT* = CSC-LT
+    END IF
+    IF(nlclean) DEALLOCATE(mat%mat)
+  END SUBROUTINE to_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE reord_wsmp_mat(mat)
+!
+!   Reordering and symbolic factorization
+!
+    TYPE(wsmp_mat) :: mat
+    DOUBLE PRECISION :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym) THEN
+       mat%p%iparm(2) = 1  ! Ordering
+       mat%p%iparm(3) = 2  ! Symbolic factorization
+       CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+    ELSE
+       mat%p%iparm(2) = 1  ! Analysis and reordering
+       mat%p%iparm(3) = 1
+       CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+  END SUBROUTINE reord_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE reord_zwsmp_mat(mat)
+!
+!   Reordering and symbolic factorization
+!
+    TYPE(zwsmp_mat)             :: mat
+    DOUBLE COMPLEX :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym .OR. mat%nlherm) THEN
+       mat%p%iparm(2) = 1  ! Ordering
+       mat%p%iparm(3) = 2  ! Symbolic factorization
+       CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+!!$       WRITE(*,'(a,i3/(10i8))') 'REORD: matrice', mat%matid, mat%perm
+    ELSE
+       mat%p%iparm(2) = 1  ! Analysis and reordering
+       mat%p%iparm(3) = 1
+       CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i4)') 'REORD: Reordering failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+  END SUBROUTINE reord_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE numfact_wsmp_mat(mat)
+!
+!   Numerical factorization
+!
+    TYPE(wsmp_mat) :: mat
+    DOUBLE PRECISION :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym) THEN
+       mat%p%iparm(2) = 3   ! Numerical factorization
+       mat%p%iparm(3) = 3
+       CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+    ELSE
+       mat%p%iparm(2) = 2  ! Factorization
+       mat%p%iparm(3) = 2
+       CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i4)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+  END SUBROUTINE numfact_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE numfact_zwsmp_mat(mat)
+!
+!   Numerical factorization
+!
+    TYPE(zwsmp_mat) :: mat
+    DOUBLE COMPLEX :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym .OR. mat%nlherm) THEN
+       mat%p%iparm(2) = 3   ! Numerical factorization
+       mat%p%iparm(3) = 3
+       CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+            &      mat%perm, mat%invp, dummy, mat%rank, mat%nrhs, &
+            &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+            &      mat%p%dparm)
+    ELSE
+       mat%p%iparm(2) = 2  ! Factorization
+       mat%p%iparm(3) = 2
+       CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, dummy, mat%rank, &
+            &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i4)') 'NUMFACT: Num. Factor failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+  END SUBROUTINE numfact_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE factor_wsmp_mat(mat, nlreord)
+!
+!  Factor (create  +reorder + factor) a wsmp_mat matrix
+!
+    TYPE(wsmp_mat)                :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlreord
+    LOGICAL :: mlreord
+!----------------------------------------------------------------------
+!               1.0  Creation from the sparse rows
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               2.0  Reordering and symbolic factorization phase
+!
+    mlreord = .TRUE.
+    IF(PRESENT(nlreord)) mlreord = nlreord
+    IF(mlreord) THEN
+       CALL reord_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               3.0  Numerical factorization
+!
+    CALL numfact(mat)
+  END SUBROUTINE factor_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE factor_zwsmp_mat(mat, nlreord)
+!
+!  Factor (create  +reorder + factor) a wsmp_mat matrix
+!
+    TYPE(zwsmp_mat)               :: mat
+    LOGICAL, OPTIONAL, INTENT(in) :: nlreord
+    LOGICAL :: mlreord
+!----------------------------------------------------------------------
+!               1.0  Creation from the sparse rows
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL to_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               2.0  Reordering and symbolic factorization phase
+!
+    mlreord = .TRUE.
+    IF(PRESENT(nlreord)) mlreord = nlreord
+    IF(mlreord) THEN
+       CALL reord_mat(mat)
+    END IF
+!----------------------------------------------------------------------
+!               3.0  Numerical factorization
+!
+    CALL numfact(mat)
+  END SUBROUTINE factor_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref)
+!
+!   Backsolve, using Wsmp
+!
+    TYPE(wsmp_mat)             :: mat
+    DOUBLE PRECISION           :: rhs(:)
+    DOUBLE PRECISION, OPTIONAL :: sol(:)
+    INTEGER, OPTIONAL          :: nref
+    DOUBLE PRECISION :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym) THEN
+       mat%p%iparm(2) = 4  ! Back substitution
+       mat%p%iparm(3) = 4
+    ELSE
+       mat%p%iparm(2) = 3  ! Back substitution
+       mat%p%iparm(3) = 3
+    END IF
+    mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) THEN
+       IF(mat%nlsym) THEN
+          mat%p%iparm(3) = 5
+       ELSE
+          mat%p%iparm(3) = 4
+       END IF
+       mat%p%iparm(6) = nref
+    END IF
+    mat%nrhs = 1
+    IF(PRESENT(sol)) THEN
+       sol = rhs
+       IF(mat%nlsym) THEN
+          CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+               &      mat%perm, mat%invp, sol, mat%rank, mat%nrhs, &
+               &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+               &      mat%p%dparm)
+       ELSE
+          CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, &
+               &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+       END IF
+    ELSE
+       IF(mat%nlsym) THEN
+          CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+               &      mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, &
+               &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+               &      mat%p%dparm)
+       ELSE
+          CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, &
+               &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+       END IF
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+   END SUBROUTINE bsolve_wsmp_mat1
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_zwsmp_mat1(mat, rhs, sol, nref)
+!
+!   Backsolve, using Wsmp
+!
+    TYPE(zwsmp_mat)          :: mat
+    DOUBLE COMPLEX           :: rhs(:)
+    DOUBLE COMPLEX, OPTIONAL :: sol(:)
+    INTEGER, OPTIONAL        :: nref
+    DOUBLE COMPLEX :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym .OR. mat%nlherm) THEN
+       mat%p%iparm(2) = 4  ! Back substitution
+       mat%p%iparm(3) = 4
+    ELSE
+       mat%p%iparm(2) = 3  ! Back substitution
+       mat%p%iparm(3) = 3
+    END IF
+    mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) THEN
+       IF(mat%nlsym .OR. mat%nlherm) THEN
+          mat%p%iparm(3) = 5
+       ELSE
+          mat%p%iparm(3) = 4
+       END IF
+       mat%p%iparm(6) = nref
+    END IF
+    mat%nrhs = 1
+    IF(PRESENT(sol)) THEN
+       sol = rhs
+       IF(mat%nlsym .OR. mat%nlherm) THEN
+          CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+               &      mat%perm, mat%invp, sol, mat%rank, mat%nrhs, &
+               &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+               &      mat%p%dparm)
+       ELSE
+          CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, &
+               &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+       END IF
+    ELSE
+       IF(mat%nlsym .OR. mat%nlherm) THEN
+          CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+               &      mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, &
+               &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+               &      mat%p%dparm)
+       ELSE
+          CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, &
+               &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+       END IF
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+  END SUBROUTINE bsolve_zwsmp_mat1
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_wsmp_matn(mat, rhs, sol, nref)
+!
+!   Backsolve, using Wsmp, multiple RHS
+!
+    TYPE(wsmp_mat)             :: mat
+    DOUBLE PRECISION           :: rhs(:,:)
+    DOUBLE PRECISION, OPTIONAL :: sol(:,:)
+    INTEGER, OPTIONAL          :: nref
+    DOUBLE PRECISION :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym) THEN
+       mat%p%iparm(2) = 4  ! Back substitution
+       mat%p%iparm(3) = 4
+    ELSE
+       mat%p%iparm(2) = 3  ! Back substitution
+       mat%p%iparm(3) = 3
+    END IF
+    mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) THEN
+       IF(mat%nlsym) THEN
+          mat%p%iparm(3) = 5
+       ELSE
+          mat%p%iparm(3) = 4
+       END IF
+       mat%p%iparm(6) = nref
+    END IF
+    mat%nrhs = SIZE(rhs,2)
+    IF(PRESENT(sol)) THEN
+       sol = rhs
+       IF(mat%nlsym) THEN
+          CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+               &      mat%perm, mat%invp, sol, mat%rank, mat%nrhs, &
+               &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+               &      mat%p%dparm)
+       ELSE
+          CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, &
+               &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+       END IF
+    ELSE
+       IF(mat%nlsym) THEN
+          CALL wssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+               &      mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, &
+               &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+               &      mat%p%dparm)
+       ELSE
+          CALL wgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, &
+               &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+       END IF
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+  END SUBROUTINE bsolve_wsmp_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE bsolve_zwsmp_matn(mat, rhs, sol, nref)
+!
+!   Backsolve, using Wsmp, multiple RHS
+!
+    TYPE(zwsmp_mat)          :: mat
+    DOUBLE COMPLEX           :: rhs(:,:)
+    DOUBLE COMPLEX, OPTIONAL :: sol(:,:)
+    INTEGER, OPTIONAL        :: nref
+    DOUBLE COMPLEX :: dummy
+!
+!    Recall the matrice if not current
+!
+    CALL check_mat(mat)
+!
+    IF(mat%nlsym .or. mat%nlherm) THEN
+       mat%p%iparm(2) = 4  ! Back substitution
+       mat%p%iparm(3) = 4
+    ELSE
+       mat%p%iparm(2) = 3  ! Back substitution
+       mat%p%iparm(3) = 3
+    END IF
+    mat%p%iparm(6) = 0  ! Max numbers of iterative refinement steps
+    IF(PRESENT(nref)) THEN
+       IF(mat%nlsym .OR. mat%nlherm) THEN
+          mat%p%iparm(3) = 5
+       ELSE
+          mat%p%iparm(3) = 4
+       END IF
+       mat%p%iparm(6) = nref
+    END IF
+    mat%nrhs = SIZE(rhs,2)
+    IF(PRESENT(sol)) THEN
+       sol = rhs
+       IF(mat%nlsym .OR. mat%nlherm) THEN
+          CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+               &      mat%perm, mat%invp, sol, mat%rank, mat%nrhs, &
+               &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+               &      mat%p%dparm)
+       ELSE
+          CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, sol, mat%rank, &
+               &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+       END IF
+    ELSE
+       IF(mat%nlsym .OR. mat%nlherm) THEN
+          CALL zssmp(mat%rank, mat%irow, mat%cols, mat%val, mat%diag, &
+               &      mat%perm, mat%invp, rhs, mat%rank, mat%nrhs, &
+               &      mat%aux, SIZE(mat%aux), mat%mrp, mat%p%iparm, &
+               &      mat%p%dparm)
+       ELSE
+          CALL zgsmp(mat%rank, mat%irow, mat%cols, mat%val, rhs, mat%rank, &
+               &     mat%nrhs, dummy, mat%p%iparm, mat%p%dparm)
+       END IF
+    END IF
+    IF(mat%p%iparm(64).NE.0) THEN
+       WRITE(*,'(a,i4)') 'BSOLVE: Failed with error', mat%p%iparm(64)
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+  END SUBROUTINE bsolve_zwsmp_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_wsmp_mat(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(wsmp_mat)                :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:)
+    DOUBLE PRECISION              :: yarr(SIZE(xarr))
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    CHARACTER(len=6) :: matdescra
+    INTEGER :: n, i, j
+!
+    n = mat%rank
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_dcsrmv('N', n, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &          beta, yarr)
+#else
+    yarr = 0.0d0
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + mat%val(j)*xarr(i)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_zwsmp_mat(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(zwsmp_mat)             :: mat
+    DOUBLE COMPLEX, INTENT(in)  :: xarr(:)
+    DOUBLE COMPLEX              :: yarr(SIZE(xarr))
+    DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
+    INTEGER :: n, i, j
+    CHARACTER(len=6) :: matdescra
+    CHARACTER(len=1) :: transa
+!
+    n = mat%rank
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE IF(mat%nlherm) THEN
+       matdescra = 'hun'
+    ELSE
+       matdescra = 'g'
+    END IF
+    transa='N'
+    IF(mat%nlherm) THEN
+       transa='T'   ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT
+    END IF
+    CALL mkl_zcsrmv(transa, n, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &          beta, yarr)
+#else
+    yarr = (0.0d0,0.0d0)
+    DO i=1,n
+       IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
+          DO j=mat%irow(i), mat%irow(i+1)-1
+             yarr(i) = yarr(i) + CONJG(mat%val(j))*xarr(mat%cols(j))
+          END DO
+       ELSE
+          DO j=mat%irow(i), mat%irow(i+1)-1
+             yarr(i) = yarr(i) + mat%val(j)*xarr(mat%cols(j))
+          END DO
+       END IF
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + mat%val(j)*xarr(i)
+          END DO
+       ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j)) = yarr(mat%cols(j)) &
+                  &              + mat%val(j)*xarr(i)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_wsmp_matn(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(wsmp_mat)                :: mat
+    DOUBLE PRECISION, INTENT(in)  :: xarr(:,:)
+    DOUBLE PRECISION              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE PRECISION :: alpha=1.0d0, beta=0.0d0
+    INTEGER :: n, nrhs, i, j
+    CHARACTER(len=6) :: matdescra
+!
+    n = mat%rank
+    nrhs = SIZE(xarr,2)
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE
+       matdescra = 'g'
+    END IF
+!
+    CALL mkl_dcsrmm('N', n, nrhs, n, alpha, matdescra, mat%val,&
+         &           mat%cols, mat%irow(1), mat%irow(2), xarr, &
+         &           n, beta, yarr, n)
+#else
+    yarr = 0.0d0
+    DO i=1,n
+       DO j=mat%irow(i), mat%irow(i+1)-1
+          yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
+       END DO
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &                + mat%val(j)*xarr(i,:)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_wsmp_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  FUNCTION vmx_zwsmp_matn(mat, xarr) RESULT(yarr)
+!
+!   Return product mat*x
+!
+    TYPE(zwsmp_mat)             :: mat
+    DOUBLE COMPLEX, INTENT(in)  :: xarr(:,:)
+    DOUBLE COMPLEX              :: yarr(SIZE(xarr,1),SIZE(xarr,2))
+!
+    DOUBLE COMPLEX :: alpha=(1.0d0,0.0d0), beta=(0.0d0,0.0d0)
+    INTEGER :: n, nrhs, i, j
+    CHARACTER(len=6) :: matdescra
+    CHARACTER(len=1) :: transa
+!
+    n = mat%rank
+    nrhs = SIZE(xarr,2)
+!
+#ifdef MKL
+    IF(mat%nlsym) THEN
+       matdescra = 'sun'
+    ELSE IF(mat%nlherm) THEN
+       matdescra = 'hun'
+    ELSE
+       matdescra = 'g'
+    END IF
+    transa='N'
+    IF(mat%nlherm) THEN
+       transa='T' ! Transpose(CSR-UT*) = CSC-LT* = CSR-UT
+    END IF
+!
+    CALL mkl_zcsrmm(transa, n, nrhs, n, alpha, matdescra, mat%val, &
+         &          mat%cols, mat%irow(1), mat%irow(2), xarr, n, &
+         &          beta, yarr, n)
+#else
+    yarr = (0.0d0,0.0d0)
+    DO i=1,n
+       IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
+          DO j=mat%irow(i), mat%irow(i+1)-1
+             yarr(i,:) = yarr(i,:) + CONJG(mat%val(j))*xarr(mat%cols(j),:)
+          END DO
+       ELSE
+          DO j=mat%irow(i), mat%irow(i+1)-1
+             yarr(i,:) = yarr(i,:) + mat%val(j)*xarr(mat%cols(j),:)
+          END DO
+       END IF
+       IF(mat%nlsym) THEN
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &                + mat%val(j)*xarr(i,:)
+          END DO
+       ELSE IF(mat%nlherm) THEN ! CSR-UT = (CSR-UT*)*
+          DO j=mat%irow(i)+1, mat%irow(i+1)-1
+             yarr(mat%cols(j),:) = yarr(mat%cols(j),:) &
+                  &               + mat%val(j)*xarr(i,:)
+          END DO
+       END IF
+    END DO
+#endif
+!
+  END FUNCTION vmx_zwsmp_matn
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_wsmp_mat(mat)
+!
+!  Deallocate the sparse matrix mat
+!
+    TYPE(wsmp_mat) :: mat
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL destroy(mat%mat)
+       DEALLOCATE(mat%mat)
+    END IF
+!
+!  Release memory for factors for symmetric matrix
+    IF(mat%nlsym) THEN
+       CALL check_mat(mat)
+       CALL wsffree
+    END IF
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
+    IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp)
+    IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux)
+  END SUBROUTINE destroy_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE destroy_zwsmp_mat(mat)
+!
+!  Deallocate the sparse matrix mat
+!
+    TYPE(zwsmp_mat) :: mat
+!
+    IF(ASSOCIATED(mat%mat)) THEN
+       CALL destroy(mat%mat)
+       DEALLOCATE(mat%mat)
+    END IF
+!
+!  Release memory for factors for symmetric/hermitian matrix
+    IF(mat%nlsym .OR. mat%nlherm) THEN
+       CALL check_mat(mat)
+       CALL wsffree
+    END IF
+!
+    IF(ASSOCIATED(mat%cols)) DEALLOCATE(mat%cols)
+    IF(ASSOCIATED(mat%irow)) DEALLOCATE(mat%irow)
+    IF(ASSOCIATED(mat%perm)) DEALLOCATE(mat%perm)
+    IF(ASSOCIATED(mat%invp)) DEALLOCATE(mat%invp)
+    IF(ASSOCIATED(mat%mrp)) DEALLOCATE(mat%mrp)
+    IF(ASSOCIATED(mat%diag)) DEALLOCATE(mat%diag)
+    IF(ASSOCIATED(mat%val)) DEALLOCATE(mat%val)
+    IF(ASSOCIATED(mat%aux)) DEALLOCATE(mat%aux)
+  END SUBROUTINE destroy_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE put_wsmp_mat(fid, label, mat, str)
+!
+!   Write matrix to hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(wsmp_mat)                      :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'NNZ',  mat%nnz)
+    CALL attach(fid, label, 'NLSYM', mat%nlsym)
+    CALL attach(fid, label, 'NLPOS', mat%nlpos)
+    CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL putarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL creatg(fid, TRIM(label)//'/p')
+    CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+    CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
+  END SUBROUTINE put_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE put_zwsmp_mat(fid, label, mat, str)
+!
+!   Write matrix to hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)                    :: fid
+    CHARACTER(len=*), INTENT(in)           :: label
+    TYPE(zwsmp_mat)                        :: mat
+    CHARACTER(len=*), OPTIONAL, INTENT(in) :: str
+!
+    IF(PRESENT(str)) THEN
+       CALL creatg(fid, label, str)
+    ELSE
+       CALL creatg(fid, label)
+    END IF
+    CALL attach(fid, label, 'RANK', mat%rank)
+    CALL attach(fid, label, 'NNZ',  mat%nnz)
+    CALL attach(fid, label, 'NLSYM', mat%nlsym)
+    CALL attach(fid, label, 'NLPOS', mat%nlpos)
+    CALL attach(fid, label, 'NLHERM', mat%nlherm)
+    CALL putarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL putarr(fid, TRIM(label)//'/cols', mat%cols)
+    CALL putarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL creatg(fid, TRIM(label)//'/p')
+    CALL putarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+    CALL putarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
+  END SUBROUTINE put_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE get_wsmp_mat(fid, label, mat)
+!
+!   Read matrix from hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)          :: fid
+    CHARACTER(len=*), INTENT(in) :: label
+    TYPE(wsmp_mat)               :: mat
+!
+    CALL getatt(fid, label, 'RANK', mat%rank)
+    CALL getatt(fid, label, 'NNZ',  mat%nnz)
+    CALL getatt(fid, label, 'NLSYM', mat%nlsym)
+    CALL getatt(fid, label, 'NLPOS', mat%nlpos)
+    CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
+    IF(mat%nlsym) THEN
+       CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
+       CALL getarr(fid, TRIM(label)//'/invp', mat%invp)
+    END IF
+    CALL getarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+    CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
+  END SUBROUTINE get_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE get_zwsmp_mat(fid, label, mat)
+!
+!   Read matrix from hdf5 file
+!
+    USE futils
+!
+    INTEGER, INTENT(in)          :: fid
+    CHARACTER(len=*), INTENT(in) :: label
+    TYPE(zwsmp_mat)              :: mat
+!
+    CALL getatt(fid, label, 'RANK', mat%rank)
+    CALL getatt(fid, label, 'NNZ',  mat%nnz)
+    CALL getatt(fid, label, 'NLSYM', mat%nlsym)
+    CALL getatt(fid, label, 'NLPOS', mat%nlpos)
+    CALL getatt(fid, label, 'NLHERM', mat%nlherm)
+    CALL getarr(fid, TRIM(label)//'/irow', mat%irow)
+    CALL getarr(fid, TRIM(label)//'/cols', mat%cols)
+    IF(mat%nlsym) THEN
+       CALL getarr(fid, TRIM(label)//'/perm', mat%perm)
+       CALL getarr(fid, TRIM(label)//'/invp', mat%invp)
+    END IF
+    CALL getarr(fid, TRIM(label)//'/val', mat%val)
+!
+    CALL getarr(fid, TRIM(label)//'/p/iparm', mat%p%iparm)
+    CALL getarr(fid, TRIM(label)//'/p/dparm', mat%p%dparm)
+  END SUBROUTINE get_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE mcopy_wsmp_mat(mata, matb)
+!
+!   Matrix copy: B = A
+!
+    TYPE(wsmp_mat) :: mata, matb
+    INTEGER :: n, nnz
+!
+! Assume that matb was already initialized by init_wsmp_mat.
+    IF(matb%rank.LE.0) THEN
+       WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+    IF(ASSOCIATED(matb%mat)) THEN 
+       CALL destroy(matb%mat)
+       DEALLOCATE(matb%mat)
+    END IF
+!
+    n = mata%rank
+    nnz = mata%nnz
+    matb%rank = n
+    matb%nnz = nnz
+    matb%nlsym = mata%nlsym
+    matb%nlpos = mata%nlpos
+    matb%nlforce_zero = mata%nlforce_zero
+!
+    IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
+    IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
+    IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
+    IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp)
+    ALLOCATE(matb%val(nnz)); matb%val = mata%val
+    ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
+    ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
+    ALLOCATE(matb%perm(n))
+    IF(matb%nlsym) THEN
+       ALLOCATE(matb%perm(n))
+       ALLOCATE(matb%invp(n))
+    END IF
+  END SUBROUTINE mcopy_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE mcopy_zwsmp_mat(mata, matb)
+!
+!   Matrix copy: B = A
+!
+    TYPE(zwsmp_mat) :: mata, matb
+    INTEGER :: n, nnz
+!
+! Assume that matb was already initialized by init_wsmp_mat.
+    IF(matb%rank.LE.0) THEN
+       WRITE(*,'(a)') 'MCOPY: Mat B is not initialized with INIT'
+       STOP '*** Abnormal EXIT in MODULE wsmp_mod ***'
+    END IF
+!
+    IF(ASSOCIATED(matb%mat)) THEN 
+       CALL destroy(matb%mat)
+       DEALLOCATE(matb%mat)
+    END IF
+!
+    n = mata%rank
+    nnz = mata%nnz
+    matb%rank = n
+    matb%nnz = nnz
+    matb%nlsym = mata%nlsym
+    matb%nlherm = mata%nlherm
+    matb%nlpos = mata%nlpos
+    matb%nlforce_zero = mata%nlforce_zero
+!
+    IF(ASSOCIATED(matb%val)) DEALLOCATE(matb%val)
+    IF(ASSOCIATED(matb%cols)) DEALLOCATE(matb%cols)
+    IF(ASSOCIATED(matb%irow)) DEALLOCATE(matb%irow)
+    IF(ASSOCIATED(matb%perm)) DEALLOCATE(matb%perm)
+    IF(ASSOCIATED(matb%invp)) DEALLOCATE(matb%invp)
+    ALLOCATE(matb%val(nnz)); matb%val = mata%val
+    ALLOCATE(matb%cols(nnz)); matb%cols = mata%cols
+    ALLOCATE(matb%irow(n+1)); matb%irow = mata%irow
+    ALLOCATE(matb%perm(n))
+    IF(matb%nlsym) THEN
+       ALLOCATE(matb%perm(n))
+       ALLOCATE(matb%invp(n))
+    END IF
+  END SUBROUTINE mcopy_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE maddto_wsmp_mat(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(wsmp_mat)   :: mata, matb
+    DOUBLE PRECISION :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE maddto_zwsmp_mat(mata, alpha, matb)
+!
+!   A <- A + alpha*B
+!
+    TYPE(zwsmp_mat) :: mata, matb
+    DOUBLE COMPLEX  :: alpha
+!
+    mata%val = mata%val + alpha*matb%val
+  END SUBROUTINE maddto_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE psum_wsmp_mat(mat, comm)
+!
+!   Parallel sum of sparse matrices
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(wsmp_mat)   :: mat
+    INCLUDE 'psum_mat.tpl'
+  END SUBROUTINE psum_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE psum_zwsmp_mat(mat, comm)
+!
+!   Parallel sum of sparse matrices
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(zwsmp_mat)   :: mat
+    INCLUDE 'psum_mat.tpl'
+  END SUBROUTINE psum_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE p2p_wsmp_mat(mat, dest, extyp, op, comm)
+!
+!   Point-to-point combine sparse matrix between 2 processes
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(wsmp_mat)            :: mat
+    DOUBLE PRECISION, ALLOCATABLE :: val(:)
+    INTEGER :: mpi_type=MPI_DOUBLE_PRECISION
+!
+    INCLUDE "p2p_mat.tpl"
+  END SUBROUTINE p2p_wsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  SUBROUTINE p2p_zwsmp_mat(mat, dest, extyp, op, comm)
+!
+!   Point-to-point combine sparse matrix between 2 processes
+!
+    INCLUDE "mpif.h"
+!
+    TYPE(zwsmp_mat)           :: mat
+    DOUBLE COMPLEX, ALLOCATABLE :: val(:)
+    INTEGER :: mpi_type=MPI_DOUBLE_COMPLEX
+!
+    INCLUDE "p2p_mat.tpl"
+  END SUBROUTINE p2p_zwsmp_mat
+!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+END MODULE wsmp_bsplines
diff --git a/src/zconmat.tpl b/src/zconmat.tpl
new file mode 100644
index 0000000..2d40218
--- /dev/null
+++ b/src/zconmat.tpl
@@ -0,0 +1,214 @@
+!>
+!> @file zconmat.tpl
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!   In this version s[lines are precalculted
+!   (on all n1/n2 intervals
+!
+  INTERFACE
+     SUBROUTINE coefeq(x, y, idt, idw, c)
+       DOUBLE PRECISION, INTENT(in) :: x, y
+       INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
+       DOUBLE COMPLEX, INTENT(out) :: c(:)
+     END SUBROUTINE coefeq
+  END INTERFACE
+  INTEGER, OPTIONAL :: maxder(2)   ! maximum oder of derivatives
+  LOGICAL, OPTIONAL :: nat_order   ! Natural ordering for 2d-1d mapping
+!
+  INTEGER :: n1, nidbas1, ndim1, n1e
+  INTEGER :: n2, nidbas2, ndim2, n2e
+  INTEGER :: ng1, ng2
+  INTEGER :: i1, i2, ig1, ig2
+  INTEGER :: igt1, igt2, igw1, igw2, irow, jcol
+  INTEGER, ALLOCATABLE :: left1(:), left2(:)
+!
+  LOGICAL :: nlper1, nlper2, nlnat
+!
+  INTEGER :: kterms         ! Number of terms in weak form
+  INTEGER :: k, kmaxder, it1, iw1, it2, iw2
+  INTEGER, ALLOCATABLE :: idert(:,:), iderw(:,:)  ! Derivative order
+  DOUBLE COMPLEX :: one=(1.0d0,0.0d0), zero=(0.0d0,0.0d0)
+  DOUBLE COMPLEX, ALLOCATABLE  :: coefs(:,:,:)  ! Terms in weak form
+!
+  DOUBLE PRECISION, POINTER :: xg1(:,:), xg2(:,:), wg1(:,:), wg2(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:)
+  DOUBLE COMPLEX, ALLOCATABLE :: mata(:,:,:,:), matc(:,:)
+  DOUBLE COMPLEX, ALLOCATABLE :: matg(:,:,:), matf(:,:,:), matcg(:,:,:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
+  CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
+  nlper1 = spl%sp1%period
+  nlper2 = spl%sp2%period
+!
+  n1e = n1+nidbas1  ! Number of elements in 1st coordinate
+  n2e = n2+nidbas2  ! Number of elements in 2nd coordinate
+  IF(nlper2) n2e = n2
+!
+!   Gauss points and weights on all intervals
+!
+  xg1 => spl%sp1%gausx  ! xg1(ng1,n1)
+  wg1 => spl%sp1%gausw  ! wg1(ng1,n1)
+  ng1 = SIZE(xg1,1)
+  xg2 => spl%sp2%gausx
+  wg2 => spl%sp2%gausw
+  ng2 = SIZE(xg2,1)
+!
+!   Splines on all intervals
+!
+  kmaxder = 1
+  IF(PRESENT(maxder)) kmaxder = maxder(1)
+  ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1))
+  ALLOCATE(left1(ng1))
+  DO i1=1,n1
+     left1 = i1
+     CALL basfun(xg1(:,i1), spl%sp1, fun1(:,:,:,i1), left1)
+  END DO
+  DEALLOCATE(left1)
+!
+  kmaxder = 1
+  IF(PRESENT(maxder)) kmaxder = maxder(2)
+  ALLOCATE(fun2(0:nidbas2,0:kmaxder,ng2,n2))
+  ALLOCATE(left2(ng2))
+  DO i2=1,n2
+     left2 = i2
+     CALL basfun(xg2(:,i2), spl%sp2, fun2(:,:,:,i2), left2)
+  END DO
+  DEALLOCATE(left2)
+!
+!   Ordering in local to global matrix mapping
+!
+  nlnat = .FALSE.
+  IF(PRESENT(nat_order)) nlnat = nat_order
+!===========================================================================
+!              2.0 Assembly loop
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms,2))
+  ALLOCATE(iderw(kterms,2))
+  ALLOCATE(coefs(kterms,ng1,ng2))
+!
+!   Allocate local matrices
+!
+  ALLOCATE(mata(0:nidbas1,0:nidbas1,0:nidbas2,0:nidbas2))
+  ALLOCATE(matc(ng1,ng2))
+  ALLOCATE(matg(0:nidbas2,0:nidbas2,ng2))
+  ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1))
+  ALLOCATE(matcg(ng1,0:nidbas2,0:nidbas2))
+!
+  DO i1=1,n1
+     DO i2=1,n2
+!
+!   Coefficients of the weak form
+!
+        DO ig1=1,ng1
+           DO ig2=1,ng2
+              CALL coefeq(xg1(ig1,i1), xg2(ig2,i2), &
+                   &      idert, iderw, coefs(:,ig1,ig2))
+           END DO
+        END DO
+!
+!   Compute local matrix: A <- E*(C*D^T) + A
+!
+        mata = 0.0d0
+        DO k=1,kterms
+!
+           matc(1:ng1,1:ng2) = coefs(k,1:ng1,1:ng2)
+!
+           DO it1=0,nidbas1
+              DO iw1=0,nidbas1
+                 DO ig1=1,ng1
+                    matf(it1,iw1,ig1) = wg1(ig1,i1) * &
+                         &  fun1(it1,idert(k,1),ig1,i1) * &
+                         &  fun1(iw1,iderw(k,1),ig1,i1)
+                 END DO
+              END DO
+           END DO
+!
+           DO it2=0,nidbas2
+              DO iw2=0,nidbas2
+                 DO ig2=1,ng2
+                    matg(it2,iw2,ig2) = wg2(ig2,i2) * &
+                         &  fun2(it2,idert(k,2),ig2,i2) * &
+                         &  fun2(iw2,iderw(k,2),ig2,i2)
+                 END DO
+              END DO
+           END DO
+!
+           CALL zgemm('N', 'T', ng1, (nidbas2+1)*(nidbas2+1), ng2, one, &
+                &      matc, ng1, matg, (nidbas2+1)*(nidbas2+1), zero, &
+                &      matcg, ng1)
+           CALL zgemm('N', 'N', (nidbas1+1)*(nidbas1+1), (nidbas2+1)*(nidbas2+1), &
+                &      ng1, one, matf, (nidbas1+1)*(nidbas1+1), matcg, ng1, one, &
+                &      mata, (nidbas1+1)*(nidbas1+1))
+!
+        END DO
+!
+!   Map local matrix A to global matrix
+!
+        DO it1=0,nidbas1
+           igt1 = i1+it1; IF(nlper1) igt1 = MODULO(igt1-1,n1) + 1
+           DO it2=0,nidbas2
+              igt2 = i2+it2; IF(nlper2) igt2 = MODULO(igt2-1, n2) + 1
+              irow = glmap(igt1, igt2, n1e, n2e)
+              DO iw1=0,nidbas1
+                 igw1 = i1+iw1; IF(nlper1) igw1 = MODULO(igw1-1,n1) + 1
+                 DO iw2=0,nidbas2
+                    igw2 = i2+iw2; IF(nlper2) igw2 = MODULO(igw2-1, n2) + 1
+                    jcol = glmap(igw1, igw2, n1e, n2e)
+                    CALL updtmat(mat, irow, jcol, mata(it1,iw1,it2,iw2))
+                 END DO
+              END DO
+           END DO
+        END DO
+!
+     END DO
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun1)
+  DEALLOCATE(fun2)
+  DEALLOCATE(idert, iderw, coefs)
+  DEALLOCATE(mata)
+  DEALLOCATE(matc)
+  DEALLOCATE(matg)
+  DEALLOCATE(matcg)
+  DEALLOCATE(matf)
+!
+CONTAINS
+  INTEGER FUNCTION glmap(i,j,n1,n2)
+    INTEGER, INTENT(in) :: i,j,n1,n2
+    IF(nlnat) THEN
+       glmap = (j-1)*n1 + i
+    ELSE
+       glmap = (i-1)*n2 + j
+    END IF
+  END FUNCTION glmap
diff --git a/src/zconmat_1d.tpl b/src/zconmat_1d.tpl
new file mode 100644
index 0000000..d066822
--- /dev/null
+++ b/src/zconmat_1d.tpl
@@ -0,0 +1,144 @@
+!>
+!> @file zconmat_1d.tpl
+!>
+!> @brief
+!>
+!> @copyright
+!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+!> SPC (Swiss Plasma Center)
+!>
+!> spclibs is free software: you can redistribute it and/or modify it under
+!> the terms of the GNU Lesser General Public License as published by the Free
+!> Software Foundation, either version 3 of the License, or (at your option)
+!> any later version.
+!>
+!> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+!>
+!> You should have received a copy of the GNU Lesser General Public License
+!> along with this program. If not, see <https://www.gnu.org/licenses/>.
+!>
+!> @authors
+!> (in alphabetical order)
+!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+!>
+!
+!   In this version s[lines are precalculted
+!   (on all n1/n2 intervals
+!
+  INTERFACE
+     SUBROUTINE coefeq(x, idt, idw, c)
+       DOUBLE PRECISION, INTENT(in) :: x
+       INTEGER, INTENT(out) :: idt(:), idw(:)
+       DOUBLE COMPLEX, INTENT(out) :: c(:)
+     END SUBROUTINE coefeq
+  END INTERFACE
+  INTEGER, OPTIONAL :: maxder   ! maximum oder of derivatives
+!
+  INTEGER :: n1, nidbas1, ndim1, ng1
+  INTEGER :: i1, ig1
+  INTEGER :: irow, jcol
+  INTEGER, ALLOCATABLE :: left1(:)
+!
+  LOGICAL :: nlper1
+!
+  INTEGER :: kterms         ! Number of terms in weak form
+  INTEGER :: k, kmaxder, it1, iw1
+  INTEGER, ALLOCATABLE :: idert(:), iderw(:)  ! Derivative order
+  DOUBLE COMPLEX :: one=(1.0d0,0.0d0), zero=(0.0d0,0.0d0)
+  DOUBLE COMPLEX, ALLOCATABLE  :: coefs(:,:)  ! Terms in weak form
+!
+  DOUBLE PRECISION, POINTER :: xg1(:,:), wg1(:,:)
+  DOUBLE PRECISION, ALLOCATABLE :: fun1(:,:,:,:), fun2(:,:,:,:)
+  DOUBLE COMPLEX, ALLOCATABLE :: mata(:,:), matc(:)
+  DOUBLE COMPLEX, ALLOCATABLE :: matf(:,:,:)
+!===========================================================================
+!              1.0 Prologue
+!
+!   Properties of spline space
+!
+  CALL get_dim(spl, ndim1, n1, nidbas1)
+  nlper1 = spl%period
+!
+!   Gauss points and weights on all intervals
+!
+  xg1 => spl%gausx  ! xg1(ng1,n1)
+  wg1 => spl%gausw  ! wg1(ng1,n1)
+  ng1 = SIZE(xg1,1)
+!
+!   Splines on all intervals
+!
+  kmaxder = 1
+  IF(PRESENT(maxder)) kmaxder = maxder
+  ALLOCATE(fun1(0:nidbas1,0:kmaxder,ng1,n1))
+  ALLOCATE(left1(ng1))
+  DO i1=1,n1
+     left1 = i1
+     CALL basfun(xg1(:,i1), spl, fun1(:,:,:,i1), left1)
+  END DO
+  DEALLOCATE(left1)
+!===========================================================================
+!              2.0 Assembly loop
+!
+!   Weak form
+!
+  kterms = mat%nterms
+  ALLOCATE(idert(kterms))
+  ALLOCATE(iderw(kterms))
+  ALLOCATE(coefs(kterms,ng1))
+!
+!   Allocate local matrices
+!
+  ALLOCATE(mata(0:nidbas1,0:nidbas1))
+  ALLOCATE(matc(ng1))
+  ALLOCATE(matf(0:nidbas1,0:nidbas1,ng1))
+!
+  DO i1=1,n1
+!
+!   Coefficients of the weak form
+!
+     DO ig1=1,ng1
+        CALL coefeq(xg1(ig1,i1), idert, iderw, coefs(:,ig1))
+     END DO
+!
+!   Compute local matrix: A <- F*c + A
+!
+     mata = 0.0d0
+     DO k=1,kterms
+!
+        matc(1:ng1) = coefs(k,1:ng1)
+!
+        DO it1=0,nidbas1
+           DO iw1=0,nidbas1
+              DO ig1=1,ng1
+                 matf(it1,iw1,ig1) = wg1(ig1,i1) * &
+                      &  fun1(it1,idert(k),ig1,i1) * &
+                      &  fun1(iw1,iderw(k),ig1,i1)
+              END DO
+           END DO
+        END DO
+!
+        CALL zgemv('N', (nidbas1+1)*(nidbas1+1), ng1, one, matf, &
+             &          (nidbas1+1)*(nidbas1+1), matc, 1, one, mata, 1)
+     END DO
+!
+!   Map local matrix A to global matrix
+!
+     DO it1=0,nidbas1
+        irow = i1+it1; IF(nlper1) irow = MODULO(irow-1,n1) + 1
+        DO iw1=0,nidbas1
+           jcol = i1+iw1; IF(nlper1) jcol = MODULO(jcol-1,n1) + 1
+           CALL updtmat(mat, irow, jcol, mata(it1,iw1))
+        END DO
+     END DO
+!
+  END DO
+!===========================================================================
+!              9.0  Epilogue
+!
+  DEALLOCATE(fun1)
+  DEALLOCATE(idert, iderw, coefs)
+  DEALLOCATE(mata)
+  DEALLOCATE(matc)
+  DEALLOCATE(matf)
diff --git a/wk/CMakeLists.txt b/wk/CMakeLists.txt
new file mode 100644
index 0000000..e24be22
--- /dev/null
+++ b/wk/CMakeLists.txt
@@ -0,0 +1,79 @@
+/**
+ * @file CMakeLists.txt
+ *
+ * @brief
+ *
+ * @copyright
+ * Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+ * SPC (Swiss Plasma Center)
+ *
+ * spclibs is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * @authors
+ * (in alphabetical order)
+ * @author Nicolas Richart <nicolas.richart@epfl.ch>
+ * @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+ */
+set(BS_TESTS
+  driv1 driv2 driv3 driv4
+  pde1d pde1dp pde1dp_cmpl 
+  pde2d pde2d_pb
+  pde1dp_cmpl_dft 
+  pde3d
+  fit1d fit1dbc fit1dp 
+  fit2d fit2d1d fit2d_cmpl fit2dbc fit2dbc_x fit2dbc_y
+  moments optim1 optim2 optim3
+  tcdsmat tmassmat tbasfun tsparse1 test_kron
+  )
+
+if(HAS_PARDISO)
+  set(BS_TESTS ${BS_TESTS} 
+    pde1dp_cmpl_pardiso 
+    pde2d_pardiso
+    pde2d_sym_pardiso
+    pde2d_sym_pardiso_dft
+    )
+endif()
+
+if(HAS_MUMPS)
+  set(BS_TESTS ${BS_TESTS}
+    pde2d_mumps
+    pde1dp_cmpl_mumps
+    )
+endif()
+
+set(RUNTESTS "${CMAKE_CURRENT_SOURCE_DIR}/runtest.sh")
+set(BIN_DIR "${bsplines_tests_BINARY_DIR}")
+set(INPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
+
+foreach(prog ${BS_TESTS})
+  add_test(${prog} ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 
+    ${RUNTESTS} ${BIN_DIR}/${prog} ${INPUT_DIR}
+    )
+endforeach()
+
+# Special cases!
+if(HAS_PARDISO)
+  add_test(tsparse2  ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 
+    ${BIN_DIR}/tsparse2
+    )
+endif()
+
+add_test(ppde3d  ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1
+  ${BIN_DIR}/ppde3d ${INPUT_DIR}/ppde3d.in
+  )
+
+add_test(ppde3d_pb ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1
+  ${BIN_DIR}/ppde3d ${INPUT_DIR}/ppde3d_pb.in
+  )
+
diff --git a/wk/adv.in b/wk/adv.in
new file mode 100644
index 0000000..25a6abb
--- /dev/null
+++ b/wk/adv.in
@@ -0,0 +1,11 @@
+&newrun
+ nx = 100,
+ nidbas = 2,
+ a = 0.0,
+ b = 100.0,
+ dt = 0.3    ! Time step
+ u = -1.0     ! Velocity
+ w = 0.1    ! Shape of initial function
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/basfun_perf.in b/wk/basfun_perf.in
new file mode 100644
index 0000000..99b075e
--- /dev/null
+++ b/wk/basfun_perf.in
@@ -0,0 +1,17 @@
+&newrun
+ nx = 64,
+ nidbas = 3,
+ nits = 1000
+ npt = 100000
+ ngroup=4,
+ jdermx = 0, 
+ nlperiod = f,
+/
+
+
+
+
+
+
+
+
diff --git a/wk/driv1.in b/wk/driv1.in
new file mode 100644
index 0000000..7d70ec2
--- /dev/null
+++ b/wk/driv1.in
@@ -0,0 +1,11 @@
+&newrun
+ nx = 10, ny = 8,
+ nidbas = 4,
+ ngauss = 4,
+ a = 0.0
+ b = 1.0
+ coefy = 1., 0., 0., 0., 1.    ! Equidistant mesh
+ coefx = 1., 0., 0., 0., 1.    ! Equidistant mesh
+ coefx = 0., 1., 0., 0., 1.    ! fdist = x
+ coefx = 1., 0., 10., 0.5, 0.2 ! Gaussian peaks at 0.5
+/
diff --git a/wk/driv2.in b/wk/driv2.in
new file mode 100644
index 0000000..30cbdb9
--- /dev/null
+++ b/wk/driv2.in
@@ -0,0 +1,10 @@
+&newrun
+ periodic = f,
+ nx = 10,
+ nidbas = 3,
+ ngauss = 3,
+ a = 0.0
+ b = 1.0
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+/
diff --git a/wk/driv3.in b/wk/driv3.in
new file mode 100644
index 0000000..62fef5a
--- /dev/null
+++ b/wk/driv3.in
@@ -0,0 +1,2 @@
+&newrun
+/
diff --git a/wk/driv4.in b/wk/driv4.in
new file mode 100644
index 0000000..a9548e1
--- /dev/null
+++ b/wk/driv4.in
@@ -0,0 +1,5 @@
+&newrun
+  nx=10
+  a=0, b=1.0,
+  nidbas1=3 nidbas2=1,
+/
diff --git a/wk/fit1d.in b/wk/fit1d.in
new file mode 100644
index 0000000..ee6d020
--- /dev/null
+++ b/wk/fit1d.in
@@ -0,0 +1,8 @@
+&newrun
+ nx = 10,
+ nidbas = 3,
+ a = 0.0,
+ b = 1.0,
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/fit1dbc.in b/wk/fit1dbc.in
new file mode 100644
index 0000000..45187d5
--- /dev/null
+++ b/wk/fit1dbc.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 10,
+ nidbas = 3,
+ a = 0.0,
+ b = 1.0,
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ ibc = 2,2, 2,2, 3,3
+ fbc = 0.0
+/
diff --git a/wk/fit1dp.in b/wk/fit1dp.in
new file mode 100644
index 0000000..ee6d020
--- /dev/null
+++ b/wk/fit1dp.in
@@ -0,0 +1,8 @@
+&newrun
+ nx = 10,
+ nidbas = 3,
+ a = 0.0,
+ b = 1.0,
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/fit2d.in b/wk/fit2d.in
new file mode 100644
index 0000000..b4b46ca
--- /dev/null
+++ b/wk/fit2d.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 10,
+ ny = 10,
+ nidbas = 3,3
+ mbes = 2,
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+ coefy = 0., 1., 0., 0., 1. ! fdist = y
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/fit2d1d.in b/wk/fit2d1d.in
new file mode 100644
index 0000000..b345a49
--- /dev/null
+++ b/wk/fit2d1d.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 80,
+ ny = 80,
+ nidbas = 3,3
+ mbes = 2,
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+ coefy = 0., 1., 0., 0., 1. ! fdist = y
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/fit2d_cmpl.in b/wk/fit2d_cmpl.in
new file mode 100644
index 0000000..b345a49
--- /dev/null
+++ b/wk/fit2d_cmpl.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 80,
+ ny = 80,
+ nidbas = 3,3
+ mbes = 2,
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+ coefy = 0., 1., 0., 0., 1. ! fdist = y
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/fit2dbc.in b/wk/fit2dbc.in
new file mode 100644
index 0000000..b4b46ca
--- /dev/null
+++ b/wk/fit2dbc.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 10,
+ ny = 10,
+ nidbas = 3,3
+ mbes = 2,
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+ coefy = 0., 1., 0., 0., 1. ! fdist = y
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/fit2dbc_x.in b/wk/fit2dbc_x.in
new file mode 100644
index 0000000..b4b46ca
--- /dev/null
+++ b/wk/fit2dbc_x.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 10,
+ ny = 10,
+ nidbas = 3,3
+ mbes = 2,
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+ coefy = 0., 1., 0., 0., 1. ! fdist = y
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/fit2dbc_y.in b/wk/fit2dbc_y.in
new file mode 100644
index 0000000..b4b46ca
--- /dev/null
+++ b/wk/fit2dbc_y.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 10,
+ ny = 10,
+ nidbas = 3,3
+ mbes = 2,
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+ coefy = 0., 1., 0., 0., 1. ! fdist = y
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/getgrad_perf.in b/wk/getgrad_perf.in
new file mode 100644
index 0000000..0777787
--- /dev/null
+++ b/wk/getgrad_perf.in
@@ -0,0 +1,6 @@
+&newrun
+ nx = 64,
+ ny = 64, 
+ nidbas = 3,3
+ npt = 100000, nits=100
+/
diff --git a/wk/gridval_perf.in b/wk/gridval_perf.in
new file mode 100644
index 0000000..7e18794
--- /dev/null
+++ b/wk/gridval_perf.in
@@ -0,0 +1,6 @@
+&newrun
+ nx = 64,
+ ny = 64, 
+ nidbas = 3, 3
+ npt = 100000, nits=100
+/
diff --git a/wk/mesh.in b/wk/mesh.in
new file mode 100644
index 0000000..1af5979
--- /dev/null
+++ b/wk/mesh.in
@@ -0,0 +1,5 @@
+&newrun
+ nx = 2000,
+ coefs = 0., 1., 0., 0., 1. ! fdist = x
+ coefs = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/moments.in b/wk/moments.in
new file mode 100644
index 0000000..4deccb5
--- /dev/null
+++ b/wk/moments.in
@@ -0,0 +1 @@
+1, 0.0, 7.0, 16, 2
diff --git a/wk/optim1.in b/wk/optim1.in
new file mode 100644
index 0000000..0b327ec
--- /dev/null
+++ b/wk/optim1.in
@@ -0,0 +1,5 @@
+&newrun
+ nx = 64,
+ nidbas = 3,
+ npt = 10000000
+/
diff --git a/wk/optim2.in b/wk/optim2.in
new file mode 100644
index 0000000..b5ae359
--- /dev/null
+++ b/wk/optim2.in
@@ -0,0 +1,6 @@
+&newrun
+ nx = 64,
+ ny = 64, 
+ nidbas = 2, 2
+ npt = 10000000
+/
diff --git a/wk/optim3.in b/wk/optim3.in
new file mode 100644
index 0000000..060c954
--- /dev/null
+++ b/wk/optim3.in
@@ -0,0 +1,7 @@
+&newrun
+ nx = 64,
+ ny = 64, 
+ nz = 64, 
+ nidbas = 3*1,
+ npt = 10000000
+/
diff --git a/wk/pde1d.in b/wk/pde1d.in
new file mode 100644
index 0000000..4503514
--- /dev/null
+++ b/wk/pde1d.in
@@ -0,0 +1,9 @@
+&newrun
+ nx = 32,
+ nidbas = 3,
+ ngauss = 4,
+ kdiff = 10,
+ nlppform = t,
+ coefs = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefs = 0., 1., 0., 0., 1. ! fdist = x
+/
diff --git a/wk/pde1d_eig.in b/wk/pde1d_eig.in
new file mode 100644
index 0000000..4e790dc
--- /dev/null
+++ b/wk/pde1d_eig.in
@@ -0,0 +1,11 @@
+&newrun
+ nx = 32,
+ nidbas = 3,
+ ngauss = 4,
+ kdiff = 10,
+ nlppform = t,
+ coefs = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefs = 0., 1., 0., 0., 1. ! fdist = x
+ nev=20, ncv=25, which='SM'
+/
+
diff --git a/wk/pde1d_eig_zmumps.in b/wk/pde1d_eig_zmumps.in
new file mode 100644
index 0000000..1a1321f
--- /dev/null
+++ b/wk/pde1d_eig_zmumps.in
@@ -0,0 +1,12 @@
+&newrun
+ nx = 32,
+ nidbas = 3,
+ ngauss = 4,
+ kdiff = 10,
+ nlppform = t,
+ coefs = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefs = 0., 1., 0., 0., 1. ! fdist = x
+ nlinv=t, nev=20, ncv=25, which='LM'
+ nlinv=f, nev=20, ncv=25, which='SM'
+ tol=1.e-6
+/
diff --git a/wk/pde1dp.in b/wk/pde1dp.in
new file mode 100644
index 0000000..1500aaa
--- /dev/null
+++ b/wk/pde1dp.in
@@ -0,0 +1,8 @@
+&newrun
+ nx = 10,
+ nidbas = 3,
+ ngauss = 4,
+ ibcoef = 1,
+ coefs = 0., 1., 0., 0., 1. ! fdist = x
+ coefs = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/pde1dp_cmpl.in b/wk/pde1dp_cmpl.in
new file mode 100644
index 0000000..32035be
--- /dev/null
+++ b/wk/pde1dp_cmpl.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 8,
+ nidbas = 3,
+ ngauss = 4,
+ nlequid = f,
+ alpha = (1.0,1.0),
+ beta = (0.2,0.0),
+ mmode=3,
+ npt = 100,
+/
diff --git a/wk/pde1dp_cmpl_dft.in b/wk/pde1dp_cmpl_dft.in
new file mode 100644
index 0000000..c47df20
--- /dev/null
+++ b/wk/pde1dp_cmpl_dft.in
@@ -0,0 +1,9 @@
+&newrun
+ nx = 8,
+ nidbas = 3,
+ ngauss = 4,
+ alpha = (1.0,1.0),
+ beta = (0.2,0.0),
+ mmode=3,
+ npt = 100,
+/
diff --git a/wk/pde1dp_cmpl_mumps.in b/wk/pde1dp_cmpl_mumps.in
new file mode 100644
index 0000000..f6fb39a
--- /dev/null
+++ b/wk/pde1dp_cmpl_mumps.in
@@ -0,0 +1,13 @@
+&newrun
+ nx = 128,
+ nidbas = 3,
+ ngauss = 4,
+ nlequid = t,
+ alpha = (1.0,1.0),
+ beta = (0.2,0.0),
+ mmode=3,
+ npt = 100,
+ nlsym = t,
+ nlherm = f,
+ nlpos = f,
+/
diff --git a/wk/pde1dp_cmpl_pardiso.in b/wk/pde1dp_cmpl_pardiso.in
new file mode 100644
index 0000000..f32f54c
--- /dev/null
+++ b/wk/pde1dp_cmpl_pardiso.in
@@ -0,0 +1,13 @@
+&newrun
+ nx = 128,
+ nidbas = 3,
+ ngauss = 4,
+ nlequid = t,
+ alpha = (1.0,1.0),
+ beta = (0.2,0.0),
+ mmode=3,
+ npt = 100,
+ nlsym = f,
+ nlherm = f,
+ nlpos = f,
+/
diff --git a/wk/pde1dp_cmpl_wsmp.in b/wk/pde1dp_cmpl_wsmp.in
new file mode 100644
index 0000000..2155ffe
--- /dev/null
+++ b/wk/pde1dp_cmpl_wsmp.in
@@ -0,0 +1,13 @@
+&newrun
+ nx = 32,
+ nidbas = 3,
+ ngauss = 4,
+ nlequid = t,
+ alpha = (1.0,1.0),
+ beta = (0.2,0.0),
+ mmode=3,
+ npt = 100,
+ nlsym = t,
+ nlherm = f,
+ nlpos = f,
+/
diff --git a/wk/pde2d.in b/wk/pde2d.in
new file mode 100644
index 0000000..9e665ca
--- /dev/null
+++ b/wk/pde2d.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 32, ny = 32,
+ nidbas = 3,3,
+ ngauss = 4,4,
+ mbess = 3,
+ nlppform = t,
+ nlconmat  = t,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/pde2d_mumps.in b/wk/pde2d_mumps.in
new file mode 100644
index 0000000..33d4553
--- /dev/null
+++ b/wk/pde2d_mumps.in
@@ -0,0 +1,16 @@
+&newrun
+ debug_mumps=t
+ nx = 32, ny = 32,
+ nx =512, ny=256
+ nidbas = 3,3,
+ ngauss = 4,4,
+ mbess = 3,
+ nlppform = t,
+ nlsym = f, nlpos=t,
+ nlmetis = f,
+ nlforce_zero = t,
+ nlserial = f,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+ matfile = ''
+/
diff --git a/wk/pde2d_nh.in b/wk/pde2d_nh.in
new file mode 100644
index 0000000..daf02ef
--- /dev/null
+++ b/wk/pde2d_nh.in
@@ -0,0 +1,10 @@
+&newrun
+ nx =32, ny = 32,
+ nidbas = 3,4,
+ ngauss = 4,5,
+ nlfix = f, 
+ mbess = 3,
+ nlppform = t,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/pde2d_pardiso.in b/wk/pde2d_pardiso.in
new file mode 100644
index 0000000..e146f9d
--- /dev/null
+++ b/wk/pde2d_pardiso.in
@@ -0,0 +1,12 @@
+&newrun
+ nx = 32, ny = 32,
+ nidbas = 3,3,
+ ngauss = 4,4,
+ mbess = 3,
+ nlppform = t,
+ nlmetis = f,
+ nlforce_zero = t,
+ nlconmat = t,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/pde2d_pb.in b/wk/pde2d_pb.in
new file mode 120000
index 0000000..1ae3c0a
--- /dev/null
+++ b/wk/pde2d_pb.in
@@ -0,0 +1 @@
+pde2d.in
\ No newline at end of file
diff --git a/wk/pde2d_petsc.in b/wk/pde2d_petsc.in
new file mode 100644
index 0000000..5a53680
--- /dev/null
+++ b/wk/pde2d_petsc.in
@@ -0,0 +1,13 @@
+&newrun
+ nx = 32, ny = 32,
+ nidbas = 3,3,
+ ngauss = 4,4,
+ mbess = 3,
+ nlppform = t,
+ nlsym = f,
+ nlforce_zero = t,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+ nitmax=10000, rtol=1.e-9,
+ matfile = 'mat_32.dat'
+/
diff --git a/wk/pde2d_pwsmp.in b/wk/pde2d_pwsmp.in
new file mode 100644
index 0000000..d85f248
--- /dev/null
+++ b/wk/pde2d_pwsmp.in
@@ -0,0 +1,11 @@
+&newrun
+ nx = 32, ny = 32,
+ nidbas = 3,3,
+ ngauss = 4,4,
+ mbess = 3,
+ nlppform = t,
+ nlforce_zero = t,
+ nlsym = f,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/pde2d_sym_pardiso.in b/wk/pde2d_sym_pardiso.in
new file mode 100644
index 0000000..de9eb4f
--- /dev/null
+++ b/wk/pde2d_sym_pardiso.in
@@ -0,0 +1,13 @@
+&newrun
+ nx = 32, ny = 32,
+ nidbas = 3,3,
+ ngauss = 4,4,
+ mbess = 3,
+ epsi = 0.0, 
+ nlppform = t,
+ nlmetis = f,
+ nlforce_zero = t,
+ nlpos = t,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/pde2d_sym_pardiso_dft.in b/wk/pde2d_sym_pardiso_dft.in
new file mode 100644
index 0000000..6065900
--- /dev/null
+++ b/wk/pde2d_sym_pardiso_dft.in
@@ -0,0 +1,16 @@
+&newrun
+ nx = 32, ny = 32,
+ nidbas = 3,3,
+ ngauss = 4,4,
+ kmin=-4, kmax=4,
+ mbess = 3,
+ epsi = 0.9, 
+ nlppform = t,
+ nlmetis = f,
+ nlforce_zero = t,
+ nlpos = t,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
+3
+-1 0 1
diff --git a/wk/pde2d_sym_wsmp.in b/wk/pde2d_sym_wsmp.in
new file mode 100644
index 0000000..d1694bb
--- /dev/null
+++ b/wk/pde2d_sym_wsmp.in
@@ -0,0 +1,11 @@
+&newrun
+ nx = 32, ny = 32,
+ nidbas = 3,3,
+ ngauss = 4,4,
+ mbess = 3,
+ nlppform = t,
+ nlforce_zero = t,
+ nlpos = t,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/pde2d_sym_wsmp_dft.in b/wk/pde2d_sym_wsmp_dft.in
new file mode 100644
index 0000000..1b01d91
--- /dev/null
+++ b/wk/pde2d_sym_wsmp_dft.in
@@ -0,0 +1,15 @@
+&newrun
+ nx = 32, ny = 32,
+ nidbas = 3,3,
+ ngauss = 4,4,
+ kmin=-4, kmax=4,
+ mbess = 3,
+ epsi = 0.9, 
+ nlppform = t,
+ nlforce_zero = t,
+ nlpos = t,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
+3
+-1 0 1
diff --git a/wk/pde2d_wsmp.in b/wk/pde2d_wsmp.in
new file mode 100644
index 0000000..9cd0dca
--- /dev/null
+++ b/wk/pde2d_wsmp.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 32, ny = 32,
+ nidbas = 3,3,
+ ngauss = 4,4,
+ mbess = 3,
+ nlppform = t,
+ nlforce_zero = t,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/pde3d.in b/wk/pde3d.in
new file mode 100644
index 0000000..76f58d3
--- /dev/null
+++ b/wk/pde3d.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 32, ny = 16, nz=16
+ nx = 64, ny = 64, nz=32
+ nx = 32, ny = 16, nz=8
+ nidbas = 3,3,3
+ ngauss = 4,4,4
+ mbess = 3, npow=2,
+ nlppform = f,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/poisson_petsc.in b/wk/poisson_petsc.in
new file mode 100644
index 0000000..af12ee2
--- /dev/null
+++ b/wk/poisson_petsc.in
@@ -0,0 +1,6 @@
+&newrun
+ nx = 256, ny = 256,
+ nitmax=10000, rtol=1.e-9
+ matfile='mat_256x256.dat'
+ rhsfile='rhs_256x256.dat'
+/
diff --git a/wk/ppde3d.in b/wk/ppde3d.in
new file mode 100644
index 0000000..76f58d3
--- /dev/null
+++ b/wk/ppde3d.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 32, ny = 16, nz=16
+ nx = 64, ny = 64, nz=32
+ nx = 32, ny = 16, nz=8
+ nidbas = 3,3,3
+ ngauss = 4,4,4
+ mbess = 3, npow=2,
+ nlppform = f,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/ppde3d_pb.in b/wk/ppde3d_pb.in
new file mode 100644
index 0000000..76f58d3
--- /dev/null
+++ b/wk/ppde3d_pb.in
@@ -0,0 +1,10 @@
+&newrun
+ nx = 32, ny = 16, nz=16
+ nx = 64, ny = 64, nz=32
+ nx = 32, ny = 16, nz=8
+ nidbas = 3,3,3
+ ngauss = 4,4,4
+ mbess = 3, npow=2,
+ nlppform = f,
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+/
diff --git a/wk/runtest.sh b/wk/runtest.sh
new file mode 100644
index 0000000..810d2bb
--- /dev/null
+++ b/wk/runtest.sh
@@ -0,0 +1,37 @@
+#
+# @file runtest.sh
+#
+# @brief
+#
+# @copyright
+# Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
+# SPC (Swiss Plasma Center)
+#
+# spclibs is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# spclibs is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+# @authors
+# (in alphabetical order)
+# @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
+#
+#!/bin/sh
+
+progname=$1
+input_dir=$2
+
+prog=$(basename ${progname})
+input_file=${input_dir}/${prog}.in
+
+${progname} < $input_file
+
+exit $?
+
diff --git a/wk/runtests b/wk/runtests
new file mode 100755
index 0000000..3ae622e
--- /dev/null
+++ b/wk/runtests
@@ -0,0 +1,23 @@
+#!/bin/sh
+for e in ../bin/*; do
+    c=$(basename $e);
+    if [ -f $c.out ]; then #run only if reference output file exists.
+	echo -ne "\n*** Running $c ... "
+	temp=$c.$$
+	if [ -f $c.in ]; then
+	    $e < $c.in | grep -v 'time (s)' | grep -v 'Memory used' > $temp
+	else
+	    $e | grep -v 'time (s)' | grep -v 'Memory used' > $temp
+	fi
+	diff -w  $c.out $temp >/dev/null
+	stat=$?
+	if [ $stat -eq 1 ]; then
+	    echo "test failed! ***"
+	    echo "*** Diff of $c.out $temp ***"
+	    diff -w  $c.out $temp    
+	else
+	    echo "test passed! ***"
+	fi
+	rm $temp
+    fi
+done
diff --git a/wk/runtests.bgp b/wk/runtests.bgp
new file mode 100755
index 0000000..4559b03
--- /dev/null
+++ b/wk/runtests.bgp
@@ -0,0 +1,37 @@
+#!/bin/sh
+part=$1
+part=${part:?"missing (R00-M0-00 for example)"}
+
+d=`pwd`
+b=$d/../bin
+w=/bgscratch/$USER
+
+cd $w
+rm -f *.h5
+cp -p $d/*.out ./
+cp -p $d/*.in ./
+
+EXEC="mpirun -nofree -mode VN  -np 1 -cwd $w -partition $part -exe"
+
+for e in $b/*; do
+    c=$(basename $e);
+    if [ -f $c.out ]; then #run only if reference output file exists.
+	echo -ne "\n*** Running $c ... "
+	temp=$c.$$
+	if [ -f $c.in ]; then
+	    $EXEC $e < $c.in | grep -v 'time (s)' | grep -v 'Memory used' > $temp
+	else
+	    $EXEC $e | grep -v 'time (s)' | grep -v 'Memory used' > $temp
+	fi
+	diff -w  $c.out $temp >/dev/null
+	stat=$?
+	if [ $stat -eq 1 ]; then
+	    echo "test failed! ***"
+	    echo "*** Diff of $c.out $temp ***"
+	    diff -w  $c.out $temp    
+	else
+	    echo "test passed! ***"
+	fi
+	rm $temp
+    fi
+done
diff --git a/wk/runtests.mac b/wk/runtests.mac
new file mode 100644
index 0000000..77abbac
--- /dev/null
+++ b/wk/runtests.mac
@@ -0,0 +1,487 @@
+
+*** Running driv1 ... test failed! ***
+*** Diff of driv1.out driv1.4071 ***
+6,10c6,10
+<  A       =  0.000000000000000E+000,
+<  B       =   1.00000000000000     ,
+<  COEFX   =   1.00000000000000     ,  0.000000000000000E+000,   10.0000000000000     ,  0.500000000000000     ,
+<    0.200000000000000     ,
+<  COEFY   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  A=  0.0000000000000000     ,
+>  B=  1.0000000000000000     ,
+>  COEFX=  1.0000000000000000     ,  0.0000000000000000     ,  10.000000000000000     , 0.50000000000000000     , 0.20000000000000001     ,
+>  
+>  COEFY=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+
+*** Running driv2 ... test failed! ***
+*** Diff of driv2.out driv2.4071 ***
+6,8c6,8
+<  A       =  0.000000000000000E+000,
+<  B       =   1.00000000000000     ,
+<  COEFX   =  0.000000000000000E+000,   1.00000000000000     , 2*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  A=  0.0000000000000000     ,
+>  B=  1.0000000000000000     ,
+>  COEFX=  0.0000000000000000     ,  1.0000000000000000     , 2*0.0000000000000000       ,  1.0000000000000000     ,
+25c25
+<  Sum of finteg   1.00000000000000     
+---
+>  Sum of finteg  0.99999999999999967     
+29c29
+<  Sum of finteg   1.00000000000000     
+---
+>  Sum of finteg  0.99999999999999967     
+
+*** Running driv3 ... test failed! ***
+*** Diff of driv3.out driv3.4071 ***
+3,5c3,5
+<  A       =  0.000000000000000E+000,
+<  B       =   1.00000000000000     ,
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     ,
+---
+>  A=  0.0000000000000000     ,
+>  B=  1.0000000000000000     ,
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+9c9
+<  PERIODIC2       = F
+---
+>  PERIODIC2=F,
+19c19
+<    0.000   0.000   0.000   0.188   0.313   0.438   0.562   0.687   0.812   1.000
+---
+>    0.000   0.000   0.000   0.188   0.313   0.438   0.563   0.687   0.812   1.000
+
+*** Running fit1d ... test failed! ***
+*** Diff of fit1d.out fit1d.4071 ***
+4,6c4,6
+<  A       =  0.000000000000000E+000,
+<  B       =   1.00000000000000     ,
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  A=  0.0000000000000000     ,
+>  B=  1.0000000000000000     ,
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+
+*** Running fit1dbc ... test failed! ***
+*** Diff of fit1dbc.out fit1dbc.4071 ***
+4,8c4,9
+<  A       =  0.000000000000000E+000,
+<  B       =   1.00000000000000     ,
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     ,
+<  IBC     = 4*2, 2*3, 2*5, 2*6, 2*7, 2*8, 2*9, 2*10, 2*11,
+<  FBC     = 20*0.000000000000000E+000  
+---
+>  A=  0.0000000000000000     ,
+>  B=  1.0000000000000000     ,
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+>  IBC= 4*2          , 2*3          , 2*5          , 2*6          , 2*7          ,
+>   2*8          , 2*9          , 2*10         , 2*11         ,
+>  FBC= 20*0.0000000000000000       ,
+
+*** Running fit1dp ... test failed! ***
+*** Diff of fit1dp.out fit1dp.4071 ***
+4,6c4,6
+<  A       =  0.000000000000000E+000,
+<  B       =   1.00000000000000     ,
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  A=  0.0000000000000000     ,
+>  B=  1.0000000000000000     ,
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+
+*** Running fit2d ... test failed! ***
+*** Diff of fit2d.out fit2d.4071 ***
+6,7c6,7
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     ,
+<  COEFY   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+>  COEFY=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+14,16c14,16
+<    0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000
+<    0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293   0.000
+<    0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181   0.000
+---
+>   -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000
+>   -0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293  -0.000
+>   -0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181  -0.000
+19,21c19,21
+<    0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000
+<    0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293   0.000
+<    0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181   0.000
+---
+>   -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000
+>   -0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293  -0.000
+>   -0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181  -0.000
+
+*** Running fit2d1d ... test failed! ***
+*** Diff of fit2d1d.out fit2d1d.4071 ***
+6,7c6,7
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     ,
+<  COEFY   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+>  COEFY=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+13c13
+< Min max or errors  -7.903E-07   7.937E-07
+---
+> Min max or errors  -7.947E-07   7.760E-07
+
+*** Running fit2d_cmpl ... test failed! ***
+*** Diff of fit2d_cmpl.out fit2d_cmpl.4071 ***
+6,7c6,7
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     ,
+<  COEFY   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+>  COEFY=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+10c10
+< Max errors (on random points)   8.273E-07
+---
+> Max errors (on random points)   8.274E-07
+
+*** Running fit2dbc ... test failed! ***
+*** Diff of fit2dbc.out fit2dbc.4071 ***
+6,7c6,7
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     ,
+<  COEFY   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+>  COEFY=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+14c14
+<    0.000  -0.014  -0.052  -0.108  -0.176  -0.250  -0.324  -0.392  -0.448  -0.486  -0.500
+---
+>   -0.000  -0.014  -0.052  -0.108  -0.176  -0.250  -0.324  -0.392  -0.448  -0.486  -0.500
+25,26d24
+< Memory used so far (MB) =   6.797
+< Memory used so far (MB) =  10.465
+34c32
+< Min max or errors  -3.331E-16   5.551E-16
+---
+> Min max or errors  -6.106E-16   5.551E-16
+
+*** Running fit2dbc_x ... test failed! ***
+*** Diff of fit2dbc_x.out fit2dbc_x.4071 ***
+6,7c6,7
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     ,
+<  COEFY   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+>  COEFY=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+14,16c14,16
+<    0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000
+<    0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293   0.000
+<    0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181   0.000
+---
+>   -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000
+>   -0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293  -0.000
+>   -0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181  -0.000
+19,21c19,21
+<    0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000
+<    0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293   0.000
+<    0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181   0.000
+---
+>   -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000
+>   -0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293  -0.000
+>   -0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181  -0.000
+
+*** Running fit2dbc_y ... test failed! ***
+*** Diff of fit2dbc_y.out fit2dbc_y.4071 ***
+6,7c6,7
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     ,
+<  COEFY   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+>  COEFY=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+14,16c14,16
+<    0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000
+<    0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293   0.000
+<    0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181   0.000
+---
+>   -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000
+>   -0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293  -0.000
+>   -0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181  -0.000
+19,21c19,21
+<    0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000
+<    0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293   0.000
+<    0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181   0.000
+---
+>   -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000  -0.000
+>   -0.000  -0.019  -0.073  -0.156  -0.256  -0.357  -0.438  -0.475  -0.438  -0.293  -0.000
+>   -0.000  -0.012  -0.045  -0.096  -0.158  -0.220  -0.271  -0.294  -0.271  -0.181  -0.000
+
+*** Running moments ... test passed! ***
+
+*** Running pde1d ... test failed! ***
+*** Diff of pde1d.out pde1d.4071 ***
+7c7
+<  COEFS   =  0.000000000000000E+000,   1.00000000000000     , 2*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  COEFS=  0.0000000000000000     ,  1.0000000000000000     , 2*0.0000000000000000       ,  1.0000000000000000     ,
+29,31d28
+< Matrice construction time (s)    1.034E-03
+< Matrice factorisation time (s)   4.665E-03
+< Backsolve time (s)               5.889E-05
+
+*** Running pde1dp ... test failed! ***
+*** Diff of pde1dp.out pde1dp.4071 ***
+6c6
+<  COEFS   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  COEFS=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+33,39c33,39
+<      1   1.000E+00   1.000E+00   0.000E+00
+<      2   1.908E-17   0.000E+00   1.908E-17
+<      3  -1.041E-17   0.000E+00  -1.041E-17
+<      4   6.939E-18   0.000E+00   6.939E-18
+<      5  -6.939E-18   0.000E+00  -6.939E-18
+<      6   1.388E-17   0.000E+00   1.388E-17
+<      7  -2.776E-17   0.000E+00  -2.776E-17
+---
+>      1   1.000E+00   1.000E+00   2.220E-16
+>      2  -2.099E-16   0.000E+00  -2.099E-16
+>      3   1.180E-16   0.000E+00   1.180E-16
+>      4  -6.245E-17   0.000E+00  -6.245E-17
+>      5   3.469E-17   0.000E+00   3.469E-17
+>      6  -1.388E-17   0.000E+00  -1.388E-17
+>      7  -0.000E+00   0.000E+00  -0.000E+00
+41c41
+<      9   0.000E+00   0.000E+00   0.000E+00
+---
+>      9  -0.000E+00   0.000E+00  -0.000E+00
+43c43
+< Max. error =   1.110E-16
+---
+> Max. error =   2.220E-16
+
+*** Running pde1dp_cmpl ... test failed! ***
+*** Diff of pde1dp_cmpl.out pde1dp_cmpl.4071 ***
+6,7c6,7
+<  ALPHA   = (1.00000000000000,1.00000000000000),
+<  BETA    = (0.200000000000000,0.000000000000000E+000),
+---
+>  ALPHA=(  1.0000000000000000     ,  1.0000000000000000     ),
+>  BETA=( 0.20000000000000001     ,  0.0000000000000000     ),
+9c9
+<  NPT     =         100
+---
+>  NPT=        100,
+
+*** Running pde2d ... test failed! ***
+*** Diff of pde2d.out pde2d.4071 ***
+8,9c8,9
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     ,
+<  COEFY   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+>  COEFY=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+35,36c35,36
+< Mem used so far (MB)   1.104E+01
+<  Integral of sol  -8.174E-16
+---
+> Mem used so far (MB)  -1.000E+00
+>  Integral of sol  -8.877E-16
+40d39
+< GRIDVAL2 time (s)                2.297E-04
+47,50c46
+< Matrice construction time (s)    8.840E-01
+< Matrice factorisation time (s)   2.261E-02
+< Backsolve time (s)               3.321E-04
+< Factor/solve Gflop/s     1.344     1.153
+---
+> Factor/solve Gflop/s     2.406     0.556
+
+*** Running pde2d_pb ... test failed! ***
+*** Diff of pde2d_pb.out pde2d_pb.4071 ***
+8,9c8,9
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     ,
+<  COEFY   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+>  COEFY=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+35,36c35,36
+< Mem used so far (MB)   7.594E+00
+<  Integral of sol  -7.480E-16
+---
+> Mem used so far (MB)  -1.000E+00
+>  Integral of sol  -8.704E-16
+40d39
+< GRIDVAL2 time (s)                2.228E-04
+47,50c46
+< Matrice construction time (s)    7.885E-01
+< Matrice factorisation time (s)   4.689E-03
+< Backsolve time (s)               1.800E-04
+< Factor/solve Gflop/s     1.828     1.542
+---
+> Factor/solve Gflop/s     0.914     0.557
+
+*** Running pde3d ... test failed! ***
+*** Diff of pde3d.out pde3d.4071 ***
+10c10
+<  COEFX   =   1.00000000000000     , 3*0.000000000000000E+000  ,   1.00000000000000     
+---
+>  COEFX=  1.0000000000000000     , 3*0.0000000000000000       ,  1.0000000000000000     ,
+42c42
+< Mem used so far (MB)   1.032E+01
+---
+> Mem used so far (MB)  -1.000E+00
+46a47,57
+>            X        CALC        ANAL       ERROR
+>    9.976E-01   4.892E-03   4.839E-03   5.282E-05
+>    5.668E-01   1.249E-01   1.236E-01   1.340E-03
+>    9.659E-01   6.105E-02   6.039E-02   6.594E-04
+>    7.479E-01   1.864E-01   1.843E-01   2.007E-03
+>    3.674E-01   4.336E-02   4.290E-02   4.618E-04
+>    4.806E-01   8.631E-02   8.538E-02   9.235E-04
+>    7.375E-02   4.032E-04   3.990E-04   4.163E-06
+>    5.355E-03   1.562E-07   1.536E-07   2.578E-09
+>    3.471E-01   3.717E-02   3.677E-02   3.955E-04
+>    3.422E-01   3.577E-02   3.539E-02   3.805E-04
+53,57c64,65
+< Matrice construction time (s)    5.769E-01
+< Matrice factorisation time (s)   4.341E-03
+< Backsolve time (s)               2.255E-03
+< Factor/solve Gflop/s     1.804     0.085
+< Mem used so far (MB)   1.245E+01
+---
+> Factor/solve Gflop/s     1.842     0.092
+> Mem used so far (MB)  -1.000E+00
+
+*** Running tbasfun ... test failed! ***
+*** Diff of tbasfun.out tbasfun.4071 ***
+4c4
+<  NPT     =          10
+---
+>  NPT=         10,
+7c7
+<    0.000   0.016   0.031   0.047   0.062   0.078   0.094   0.109   0.125   0.141
+---
+>    0.000   0.016   0.031   0.047   0.063   0.078   0.094   0.109   0.125   0.141
+9,10c9,10
+<    0.312   0.328   0.344   0.359   0.375   0.391   0.406   0.422   0.438   0.453
+<    0.469   0.484   0.500   0.516   0.531   0.547   0.562   0.578   0.594   0.609
+---
+>    0.313   0.328   0.344   0.359   0.375   0.391   0.406   0.422   0.438   0.453
+>    0.469   0.484   0.500   0.516   0.531   0.547   0.563   0.578   0.594   0.609
+12c12
+<    0.781   0.797   0.812   0.828   0.844   0.859   0.875   0.891   0.906   0.922
+---
+>    0.781   0.797   0.813   0.828   0.844   0.859   0.875   0.891   0.906   0.922
+16c16
+<    0.000   0.000   0.000   0.000   0.016   0.031   0.047   0.062   0.078   0.094
+---
+>    0.000   0.000   0.000   0.000   0.016   0.031   0.047   0.063   0.078   0.094
+18,19c18,19
+<    0.266   0.281   0.297   0.312   0.328   0.344   0.359   0.375   0.391   0.406
+<    0.422   0.438   0.453   0.469   0.484   0.500   0.516   0.531   0.547   0.562
+---
+>    0.266   0.281   0.297   0.313   0.328   0.344   0.359   0.375   0.391   0.406
+>    0.422   0.438   0.453   0.469   0.484   0.500   0.516   0.531   0.547   0.563
+21c21
+<    0.734   0.750   0.766   0.781   0.797   0.812   0.828   0.844   0.859   0.875
+---
+>    0.734   0.750   0.766   0.781   0.797   0.813   0.828   0.844   0.859   0.875
+27,36c27,36
+<      1  3.9209E-07     0     0  0.0000E+00
+<      2  2.5480E-02     1     1  0.0000E+00
+<      3  3.5252E-01    22    22  0.0000E+00
+<      4  6.6691E-01    42    42  0.0000E+00
+<      5  9.6306E-01    61    61  0.0000E+00
+<      6  8.3829E-01    53    53  0.0000E+00
+<      7  3.3536E-01    21    21  0.0000E+00
+<      8  9.1533E-01    58    58  0.0000E+00
+<      9  7.9586E-01    50    50  0.0000E+00
+<     10  8.3269E-01    53    53  0.0000E+00
+---
+>      1  9.9756E-01    63    63  0.0000E+00
+>      2  5.6682E-01    36    36  0.0000E+00
+>      3  9.6592E-01    61    61  0.0000E+00
+>      4  7.4793E-01    47    47  0.0000E+00
+>      5  3.6739E-01    23    23  0.0000E+00
+>      6  4.8064E-01    30    30  0.0000E+00
+>      7  7.3754E-02     4     4  0.0000E+00
+>      8  5.3552E-03     0     0  0.0000E+00
+>      9  3.4708E-01    22    22  0.0000E+00
+>     10  3.4224E-01    21    21  0.0000E+00
+40,49c40,49
+<      1  3.9209E-07  0.0000E+00
+<      2  2.5480E-02  0.0000E+00
+<      3  3.5252E-01  0.0000E+00
+<      4  6.6691E-01  0.0000E+00
+<      5  9.6306E-01  0.0000E+00
+<      6  8.3829E-01  0.0000E+00
+<      7  3.3536E-01  0.0000E+00
+<      8  9.1533E-01  0.0000E+00
+<      9  7.9586E-01  0.0000E+00
+<     10  8.3269E-01  0.0000E+00
+---
+>      1  9.9756E-01  0.0000E+00
+>      2  5.6682E-01  0.0000E+00
+>      3  9.6592E-01  0.0000E+00
+>      4  7.4793E-01  0.0000E+00
+>      5  3.6739E-01  0.0000E+00
+>      6  4.8064E-01  0.0000E+00
+>      7  7.3754E-02  0.0000E+00
+>      8  5.3552E-03  0.0000E+00
+>      9  3.4708E-01  0.0000E+00
+>     10  3.4224E-01  0.0000E+00
+
+*** Running tmassmat ... test failed! ***
+*** Diff of tmassmat.out tmassmat.4071 ***
+4c4
+<  XLENGHT =   1.00000000000000     
+---
+>  XLENGHT=  1.0000000000000000     ,
+
+*** Running tmatrix_gb ... test failed! ***
+*** Diff of tmatrix_gb.out tmatrix_gb.4071 ***
+69c69
+<  Prod. of factored A diagnonals   7650.00000000003     
+---
+>  Prod. of factored A diagnonals   7650.0000000000291     
+
+*** Running tmatrix_pb ... test passed! ***
+
+*** Running tmatrix_zpb ... test failed! ***
+*** Diff of tmatrix_zpb.out tmatrix_zpb.4071 ***
+32c32
+<      (  0.0,  0.0)     (  0.0,  0.0)     (  0.0,  0.0)     ( 34.0,  1.0)     ( 45.0,  1.0)
+---
+>      (  0.0,  0.0)     (  0.0, -0.0)     (  0.0,  0.0)     ( 34.0,  1.0)     ( 45.0,  1.0)
+35c35
+<      ( 11.0,  0.0)     (  0.0,  0.0)     ( 13.0,  2.0)     ( 14.0,  3.0)     (  0.0,  0.0)
+---
+>      ( 11.0,  0.0)     (  0.0, -0.0)     ( 13.0,  2.0)     ( 14.0,  3.0)     (  0.0,  0.0)
+37,39c37,39
+<      ( 13.0, -2.0)     (  0.0,  0.0)     ( 33.0,  0.0)     ( 34.0,  1.0)     ( 35.0,  2.0)
+<      ( 14.0, -3.0)     (  0.0,  0.0)     ( 34.0, -1.0)     ( 44.0,  0.0)     ( 45.0,  1.0)
+<      (  0.0,  0.0)     (  0.0,  0.0)     ( 35.0, -2.0)     ( 45.0, -1.0)     ( 55.0,  0.0)
+---
+>      ( 13.0, -2.0)     (  0.0, -0.0)     ( 33.0,  0.0)     ( 34.0,  1.0)     ( 35.0,  2.0)
+>      ( 14.0, -3.0)     (  0.0, -0.0)     ( 34.0, -1.0)     ( 44.0,  0.0)     ( 45.0,  1.0)
+>      (  0.0,  0.0)     (  0.0, -0.0)     ( 35.0, -2.0)     ( 45.0, -1.0)     ( 55.0,  0.0)
+41c41
+<      ( 11.0,  0.0)     (  0.0,  0.0)     ( 13.0,  2.0)     ( 14.0,  3.0)     (  0.0,  0.0)
+---
+>      ( 11.0,  0.0)     (  0.0, -0.0)     ( 13.0,  2.0)     ( 14.0,  3.0)     (  0.0,  0.0)
+43,45c43,45
+<      ( 13.0, -2.0)     (  0.0,  0.0)     ( 33.0,  0.0)     ( 34.0,  1.0)     ( 35.0,  2.0)
+<      ( 14.0, -3.0)     (  0.0,  0.0)     ( 34.0, -1.0)     ( 44.0,  0.0)     ( 45.0,  1.0)
+<      (  0.0,  0.0)     (  0.0,  0.0)     ( 35.0, -2.0)     ( 45.0, -1.0)     ( 55.0,  0.0)
+---
+>      ( 13.0, -2.0)     (  0.0, -0.0)     ( 33.0,  0.0)     ( 34.0,  1.0)     ( 35.0,  2.0)
+>      ( 14.0, -3.0)     (  0.0, -0.0)     ( 34.0, -1.0)     ( 44.0,  0.0)     ( 45.0,  1.0)
+>      (  0.0,  0.0)     (  0.0, -0.0)     ( 35.0, -2.0)     ( 45.0, -1.0)     ( 55.0,  0.0)
+47c47
+<      ( 11.0,  0.0)     (  0.0,  0.0)     ( 13.0,  2.0)     ( 14.0,  3.0)     (  0.0,  0.0)
+---
+>      ( 11.0,  0.0)     (  0.0, -0.0)     ( 13.0,  2.0)     ( 14.0,  3.0)     (  0.0,  0.0)
+49,51c49,51
+<      ( 13.0, -2.0)     (  0.0,  0.0)     ( 33.0,  0.0)     ( 34.0,  1.0)     ( 35.0,  2.0)
+<      ( 14.0, -3.0)     (  0.0,  0.0)     ( 34.0, -1.0)     ( 44.0,  0.0)     ( 45.0,  1.0)
+<      (  0.0,  0.0)     (  0.0,  0.0)     ( 35.0, -2.0)     ( 45.0, -1.0)     ( 55.0,  0.0)
+---
+>      ( 13.0, -2.0)     (  0.0, -0.0)     ( 33.0,  0.0)     ( 34.0,  1.0)     ( 35.0,  2.0)
+>      ( 14.0, -3.0)     (  0.0, -0.0)     ( 34.0, -1.0)     ( 44.0,  0.0)     ( 45.0,  1.0)
+>      (  0.0,  0.0)     (  0.0, -0.0)     ( 35.0, -2.0)     ( 45.0, -1.0)     ( 55.0,  0.0)
diff --git a/wk/tbasfun.in b/wk/tbasfun.in
new file mode 100644
index 0000000..222b86e
--- /dev/null
+++ b/wk/tbasfun.in
@@ -0,0 +1,7 @@
+&newrun
+ nx = 64,
+ nidbas = 2,
+ npt = 10
+ jdermx = 1, 
+ nlper = t,
+/
diff --git a/wk/tcdsmat.in b/wk/tcdsmat.in
new file mode 100644
index 0000000..e753a37
--- /dev/null
+++ b/wk/tcdsmat.in
@@ -0,0 +1,13 @@
+&newrun
+ nints = 32,32
+ nidbas = 3,3,
+ ngauss = 4,4,
+ mbess = 3,
+ coefy = 0., 1., 0., 0., 1. ! fdist = y
+ coefx = 0., 1., 0., 0., 1. ! fdist = x
+ coefx = 1., 0., 0., 0., 1. ! Equidistant mesh
+ coefy = 1., 0., 0., 0., 1. ! Equidistant mesh
+ nitmx=100000, rtolmx=1.e-12, nssor=0, omega=1.6,
+ readmat=f, verbose=f,
+ filein="../solver/mat4096.h5",
+/
diff --git a/wk/test_kron.in b/wk/test_kron.in
new file mode 100644
index 0000000..e69de29
diff --git a/wk/tlocintv.in b/wk/tlocintv.in
new file mode 100644
index 0000000..ad5473e
--- /dev/null
+++ b/wk/tlocintv.in
@@ -0,0 +1,11 @@
+&newrun
+ nx = 64,
+ nidbas = 3,
+ ngauss = 4,
+ a = 0., b=1.0
+ np = 20,
+ nits=10000,
+ coefs = 0., 1., 0., 0., 1.    ! fdist = x
+ coefs = 1., 0., 1., 0.5, 0.2  ! Gaussian
+ coefs = 1., 0., 0., 0., 1.    ! Equidistant mesh
+/
diff --git a/wk/tmassmat.in b/wk/tmassmat.in
new file mode 100644
index 0000000..e0ef279
--- /dev/null
+++ b/wk/tmassmat.in
@@ -0,0 +1,5 @@
+&newrun
+ nx = 8,
+ nidbas = 3,
+ xlenght = 6.283185307179586,
+/
diff --git a/wk/tpardiso.in b/wk/tpardiso.in
new file mode 100644
index 0000000..fcd6eea
--- /dev/null
+++ b/wk/tpardiso.in
@@ -0,0 +1,9 @@
+&newrun
+ nints = 32,32
+ nidbas = 3,3,
+ ngauss = 4,4,
+ mbess = 3,
+ verbose = f,
+ readmat = f,
+ filein="../solver/mat32.h5",
+/
diff --git a/wk/tsparse1.in b/wk/tsparse1.in
new file mode 100644
index 0000000..a6abc66
--- /dev/null
+++ b/wk/tsparse1.in
@@ -0,0 +1,11 @@
+2
+19
+13
+5
+20
+1
+13
+10
+4
+2
+0