Page MenuHomec4science

solvers.tex
No OneTemporary

File Metadata

Created
Wed, May 29, 01:22

solvers.tex

%
% @file solvers.tex
%
% @brief
%
% @copyright
% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
% SPC (Swiss Plasma Center)
%
% SPClibs is free software: you can redistribute it and/or modify it under
% the terms of the GNU Lesser General Public License as published by the Free
% Software Foundation, either version 3 of the License, or (at your option)
% any later version.
%
% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License
% along with this program. If not, see <https://www.gnu.org/licenses/>.
%
% @author
% (in alphabetical order)
% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
%
\documentclass[a4paper]{article}
\usepackage{linuxdoc-sgml}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{amsmath}
%\usepackage{verbatim}
%\usepackage[notref]{showkeys}
\title{\tt The Solvers in BSPLINES}
\author{Trach-Minh Tran}
\date{v0.6, December 2011}
\abstract{Implementation of a common simple interface to popular
solver packages (LAPACK, PARDISO, WSMP, PETSc, etc.). The main goal is to
provide an easy access to these packages in order to solve elliptic and
parabolic as well as some types of integro-differential
equations.}
\begin{document}
\maketitle
\tableofcontents
\section{Matrix form of the problem}
\subsection{Getting started}
\subsubsection{A one-dimensional problem}
Let us start with the one-dimensional Sturm-Liouville differential
equation:
\begin{equation*}
-\frac{d}{dx} \left[C_1(x) \frac{d\phi}{dx}\right] + C_2(x)\phi = \rho,
\end{equation*}
on the domain $0\leq x \leq L$ with suitable boundary conditions.
On a grid with $N$ intervals, the discretized solution $\phi$, using
the splines $\Lambda_i(x)$ of order $p$ can be written as
\begin{equation}
\label{sol1d}
\phi(x) = \sum_{i=0}^{d-1} \phi_i\Lambda_i(x),
\end{equation}
where \cite{BSPLINES}
\begin{equation*}
d =
\begin{cases}
N & \text{if $\phi$ is periodic}, \\
N+p & \text{otherwise},
\end{cases}
\end{equation*}
and $\phi_i$ are the unknowns of the following matrix equation:
\begin{equation}
\label{matEq1d}
\sum_{i'=0}^{d-1} A_{ii'}\phi_{i'} = \rho_i, \qquad i=0,\ldots, d-1.
\end{equation}
Here the matrix $A_{ii'}$ and the right-hand-side $\rho_i$ are
respectively given by:
\begin{equation}
\begin{split}
\label{matCoef1d}
A_{ii'} =& \int_{0}^{L}\!dx\,C_1\Lambda_{i}^{'}\Lambda_{i'}^{'}
+ \int_{0}^{L}\!dx\,C_2\Lambda_{i}\Lambda_{i'}, \\
\rho_i =& \int_{0}^{L}dx \rho\Lambda_i.\\
\end{split}
\end{equation}
For more general differential operators, the matrix coefficients $A_{ii'}$ can be
written as a sum of contributing matrices of the form
\begin{equation}
\label{mat1d}
A_{ii'} = \int_{0}^{L}\!dx\,C\Lambda_{i}^{\alpha}\Lambda_{i'}^{\alpha'},
\end{equation}
where $\Lambda_{i}^{\alpha}$ denotes the $\alpha^\text{th}$
derivative of $\Lambda_{i}$.
As the splines $\Lambda_i$ have a support of $p+1$ intervals, the
matrix is sparse and its
coefficients $A_{ii'}$ are non-zero only for $|i-i'| \leq p$: hence the
matrix has a band structure of bandwidth equal to $2p+1$
if the operator is purely differential. For an
integral equation such as
\begin{equation*}
\int_{0}^{L}\!dx' K(x,x')\phi(x') = \rho(x),
\end{equation*}
the discretization results in a \emph{dense} matrix of the form
\begin{equation}
\label{matIntg1d}
A_{ii'} = \int_{0}^{L}\!dx \Lambda_{i}(x) \int_{0}^{L}\!dx'
K(x,x')\Lambda_{i'}(x').
\end{equation}
Note that when the kernel is separable $K(x,x') = U(x)V(x')$, the
matrix $A_{ii'}$ is a \emph{dyadic}:
\begin{equation}
A_{ii'} = \int_{0}^{L}\!dx U(x) \Lambda_{i}(x) \int_{0}^{L}\!dx
V(x)\Lambda_{i'}(x) = U_iV_{i'}.
\end{equation}
\subsubsection{Periodic boundary conditions}
The splines $\Lambda_i$ are $N$-periodic ($\Lambda_{i+N}(x)=\Lambda_i(x-L)$). This
property can be easily enforced while constructing both $\rho_i$
and the matrix $A_{ii'}$. This results in a solution
$\phi_i$ which is also $N$-periodic.
\subsubsection{Non-periodic boundary conditions}
In {\tt BSPLINES} \cite{BSPLINES}, the constructed non-periodic splines are such that at the
boundaries $x=0$ and $x=L$:
\begin{equation}
\Lambda_i(0) = \delta_{i,0}, \qquad \Lambda_i(L) = \delta_{i,N+p-1},
\end{equation}
which imply that, using (\ref{sol1d})
\begin{equation}
\phi(0) = \phi_0, \qquad \phi(L) = \phi_{N+p-1}.
\end{equation}
It is thus possible to impose the Dirichlet boundary conditions
by a simple modification of the matrix $A_{ii'}$ as shown in
Appendix~\ref{DirichletCond}.
\subsection{Problems in more dimensions}
\subsubsection{Two-dimensional equations}
The results obtained above can be extended in a
straightforward manner. Assuming, for example a
\emph{polar like} $(r,\theta)$ coordinate system,
with the discretized solution and the right-hand side
written as:
\begin{equation}
\label{discreteEq2d}
\begin{split}
\phi(r,\theta) &= \sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1}
\phi_{ij}\Lambda_i(r) \Lambda_j(\theta) \\
\rho_{ij} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta J(r,\theta) \rho(r,\theta)
\Lambda_i(r) \Lambda_j(\theta),\\
\end{split}
\end{equation}
where $J(r,\theta)$ is the Jacobian, the matrix equation to solve is
\begin{equation}
\sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}
A_{iji'j'}\phi_{i'j'} = \rho_{ij},
\end{equation}
with the matrix $A_{iji'j'}$ expressed as a sum of matrices of the
form:
\begin{equation}
\label{mat2d}
A_{iji'j'} = \int_{0}^{R}\!dr \int_{0}^{2\pi}\!d\theta\,
C(r,\theta)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\,
\Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta).
\end{equation}
\subsubsection{Three-dimensional equations}
Likewise, for the three-dimension case, assuming for example a
\emph{toroidal like} $(r,\theta, \varphi)$ coordinate system,
with the discretized solution and the right-hand side
written as:
\begin{equation}
\label{discreteEq3d}
\begin{split}
\phi(r,\theta, \varphi) &=
\sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1}
\sum_{k=0}^{N_\varphi-1}
\phi_{ijk}\Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi) \\
\rho_{ijk} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta
\int_{0}^{2\pi} \!d\varphi J(r,\theta,\varphi) \rho(r,\theta,\varphi)
\Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi),\\
\end{split}
\end{equation}
where $J(r,\theta,\varphi)$ is the Jacobian, the matrix equation to solve is
\begin{equation}
\sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}\sum_{k'=0}^{N_\varphi-1}
A_{ijki'j'k'}\phi_{i'j'k'} = \rho_{ijk},
\end{equation}
with the matrix $A_{ijki'j'k'}$ expressed as a sum of matrices of the
form:
\begin{equation}
\label{mat3d}
A_{ijki'j'k'} = \int_{0}^{R}\!dr
\int_{0}^{2\pi}\!d\theta\ \int_{0}^{2\pi}\!d\varphi\,
C(r,\theta,\varphi)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\,
\Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta)
\Lambda_{k}^{\gamma}(\varphi)\Lambda_{k'}^{\gamma'}(\varphi).
\end{equation}
\subsubsection{Unicity condition}
In the case of the polar coordinates $(r,\theta)$ considered above,
the unicity condition on the axis $r=0$ should be imposed. It can be
enforced by modifications of the matrix $A$ as described in
Appendix~\ref{unicityCond}.
\subsubsection{One-dimensional numbering}
For two-dimensional (three-dimensional) problems, the solution
$\phi_{ij}$ ($\phi_{ijk}$) as well as the right-hand-side $\rho_{ij}$
($\rho_{ijk}$) can be conveniently casted into one-dimensional
arrays. As an example, by numbering first the last index, we obtain the
following mappings:
\begin{equation}
\label{map1d}
\mu =
\begin{cases}
j + iN_\theta & \text{two-dimensional case} \\
k + (j + iN_\theta)N_\varphi & \text{three-dimensional case} \\
\end{cases}
\end{equation}
Using such a one-dimensional numbering, the matrix equation for the two and
three dimensional cases takes a more conventional form:
\begin{equation}
\sum_{\mu'=0}^{r-1} A_{\mu\mu'} \phi_{\mu'} = \rho_\mu,
\end{equation}
with the respective matrix ranks $r=(N_r+p_r)N_\theta$ and
$r=(N_r+p_r)N_\theta N_\varphi$. For a pure differential operator, the
matrix $A_{\mu\mu'}$ has a band structure of bandwidth
$b=2(p_r+1)N_\theta-1$ and $b=2(p_r+1)N_\theta N_\varphi-1$ respectively. It is
important to note that, except for the one-dimensional problem, there
are \emph{many} zeros inside the matrix band!
\section{The module {\tt MATRIX}}
\subsection{Interface}
The Fortran module {\tt MATRIX} contains easy-to-use routines to solve the
matrix equation formulated in the previous section, using the direct
solvers of LAPACK. The different matrix storage formats are defined,
using the Fortran derived datatypes. The different types in the
present version are listed in Table~\ref{matTypes}.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}\hline
{\tt gemat} & General dense matrix \\
{\tt gbmat} & General band matrix \\
{\tt pbmat} & Symmetric positive-definite band matrix \\
{\tt periodic\_mat} & Matrix obtained for example in \\
& one-dimensional periodic problems\\
\hline
\end{tabular}
\caption{The matrix types}
\label{matTypes}
\end{table}
These types define {\tt DOUBLE PRECISION} real matrices.
{\tt DOUBLE COMPLEX} matrices are declared by prefixing
these types with the letter ``{\tt z}'', for example {\tt zgbmat}.
Note that {\tt zpbmat} defines a \emph{hermitian} positive-definite
complex matrix.
The \emph{generic} routines are defined for each of these types in Table~\ref{matRoutines}.
Note that the routine {\tt updtmat} is mainly used for the matrix assembly
while {\tt getxxx} and {\tt putxxx} are rather used to modify the
matrix, for example to impose boundary conditions.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}
\hline
{\tt init} & Initializes the data structure \\
{\tt destroy} & Free the data structure memory \\
\hline
{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt vmx} & Returns the matrix-vector product \\
{\tt mcopy} & Copy a matrix to another matrix \\
{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
{\tt determinant} & Returns the matrix determinant \\
\hline
{\tt factor} & Computes the LU (Cholesky for symmetric/hermitian \\
& matrix) factorization \\
{\tt bsolve} & Solves the linear system using the factorized matrix \\
\hline
\end{tabular}
\caption{The generic routines in the {\tt MATRIX} module}
\label{matRoutines}
\end{table}
The complete description of each routine is given in
Appendix~\ref{matRef}. More information on how to use it can be obtained by
{\tt greping} its name on the examples found in the {\tt examples/} directory.
\subsection{A two-dimensional example}
\label{twodEx}
Let's consider the Poisson equation using the cylindrical coordinates
$(r,\theta)$:
\begin{equation}
-\frac{1}{r}\frac{\partial}{\partial r}
\left[r\frac{\partial\phi}{\partial r}\right]
-\frac{1}{r^2}\frac{\partial^2\phi}{\partial\theta^2} = \rho,
\qquad \phi(r=1,\theta) = 0.
\end{equation}
Assuming the exact solution
\begin{equation*}
\phi(r,\theta) = (1-r^2)r^m\cos m\theta,
\end{equation*}
the right-hand-side becomes
\begin{equation*}
\rho=4(m+1)r^{m}\cos m\theta.
\end{equation*}
The matrix and the right hand-side of the discretized problem are computed as
\begin{equation}
\begin{split}
A_{iji'j'} &= \int_{0}^1\!dr \int_{0}^{2\pi}\!d\theta\,\left[
r\,\Lambda'_{i}(r)\Lambda'_{i'}(r)\,
\Lambda_{j}(\theta)\Lambda_{j'}(\theta) +
\frac{1}{r}\,\Lambda_{i}(r)\Lambda_{i'}(r)\,
\Lambda'_{j}(\theta)\Lambda'_{j'}(\theta) \right] \\
\rho_{ij} &= \int_{0}^1\!dr \int_{0}^{2\pi} \!d\theta\,\,r \rho(r,\theta)
\,\,\Lambda_i(r) \Lambda_j(\theta).
\end{split}
\end{equation}
In the example {\tt pde2d.f90} this problem is treated in detail.
In the following, only the calls to the {\tt MATRIX} routines
are reviewed to show how the matrix problem is solved using the {\tt
MATRIX} module.
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
!
! Declare a General Band matrix
!
USE matrix
USE conmat_mod
TYPE(gbmat) :: mat
!
! Rank and bandwidth. nidbas(1) is the spline order in
! the first dimension r.
!
nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals
ku = kl ! Number of super-diagnonals
nterms = 2 ! Number of terms in the weak form
!
! Initialize the matrix data structure
!
CALL init(kl, ku, nrank, nterms, mat)
!
! Construct the matrix, using 2D spline splxy
! and impose boundary conditions
!
CALL conmat(splxy, mat, coefeq_poisson)
CALL ibcmat(mat, ny)
!
! Compute the RHS, using the 2D spline splxy
! and impose boundary conditions
!
CALL disrhs(mbess, splxy, rhs)
CALL ibcrhs(rhs, ny)
!
! Factor the matrix and solve
!
CALL factor(mat)
CALL bsolve(mat, rhs, sol)
!
...
CONTAINS
SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, y
INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
DOUBLE PRECISION, INTENT(out) :: c(:)
!
c(1) = x
idt(1,1) = 1; idt(1,2) = 0
idw(1,1) = 1; idw(1,2) = 0
!
c(2) = 1.d0/x
idt(2,1) = 0; idt(2,2) = 1
idw(2,1) = 0; idw(2,2) = 1
END SUBROUTINE coefeq_poisson
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
Some explanations and remarks:
\begin{itemize}
\item The matrix construction is performed by {\tt conmat} which will
be described later in section \ref{secCONMAT}. The
\emph{weak form} is defined in the \emph{internal}
procedure {\tt coefeq\_poisson} and passed as an argument to
{\tt conmat}. See section \ref{secCONMAT} for a detailed
description of the variables {\tt c, idt, idw} returned by
{\tt coefeq\_poisson}.
\item Boundary conditions are imposed by modifications
of the matrix in subroutine {\tt ibcmat} (see file {\tt the
ibcmat.f90}), using the {\tt MATRIX}
routines {\tt getrow, putrow, getcol, putcol}.
\item The construction of the right-hand-side in {\tt disrhs} (see the
file {\tt disrhs.f90}) is computed using a Gauss quadrature..
\item Using the {\tt pbmat} type instead of {\tt gbmat} (the matrix in
this example is symmetric and positive-definite!) requires only a few
modifications of the program (see the complete example {\tt
pde2d\_pb.f90}):
\begin{itemize}
\item Change the type {\tt gbmat} to {\tt pbmat} in all matrix declarations
\item Change the list of arguments in the routine {\tt init} to
({\tt ku, nrank, nterms, mat})
\item Small changes in the boundary conditions ({\tt ibcmat} and {\tt
ibcrhs}) to take into account the symmetry.
\end{itemize}
\item The module {\tt MATRIX} can be used independently of {\tt
BSPLINES} (which is used here only to compute the matrix and
right-hand-side), for example in a problem discretized using
Finite Differences.
\end{itemize}
\section{Sparse matrix storage}
Using the \emph{band matrix format} for a pure differential operator
requires to store a full bandwidth $b=2(p_r+1)N_\theta-1$ for the
two-dimensional problem as shown in section 1, while there are only
$(2p_r+1)^2$ non-zero elements per matrix row. In three-dimensional
problem, it is much worse since $b=2(p_r+1)N_\theta N\varphi-1$ for
$(2p_r+1)^3$ non-zero elements.
In order to reduce the matrix storage, a solution consists of just
storing the matrix non-zero elements and use the \emph{Sparse Direct
Solvers}. With an optimal \emph{renumbering} strategy (or
\emph{fill-in reducing ordering}), the
size of the factored matrix can be expected to be smaller than the
corresponding band matrix.
An alternative is to use \emph{iterative} methods which usually need
less memory.
Such a sparse matrix is implemented in the {\tt SPARSE} module where
each matrix row is represented by a \emph{\tt linked} list of elements
with sorted column index. The data structure of this sparse matrix is
wrapped up in a the Fortran data type {\tt spmat} for a real matrix and
{\tt zspmat} for a complex matrix. Most of the generic routines which
are already defined in the {\tt MATRIX} module are overloaded for these
matrix types. They are listed in Table~\ref{spmatRoutines}. The complete
documentation of these routines can be found in Appendix~\ref{spmatRef}.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}
\hline
\emph{Matrix types} & {\tt spmat, zspmat} \\
\hline\hline
{\tt init} & Initializes the data structure \\
{\tt destroy} & Free the data structure memory \\
\hline
{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
row $i$, column $j$ \\
\hline
{\tt get\_count} & Get the number of non-zero elements in matrix \\
\hline
\end{tabular}
\caption{The generic routines in the {\tt SPARSE} module}
\label{spmatRoutines}
\end{table}
It should be noted that this module is \emph{not} used
directly in solver problems. One usually uses instead modules which
are specific to a type of (direct or iterative) solver. As will be
shown in the next section, it is the routines in this solver module which
directly calls the routines defined in the {\tt SPARSE} module during the matrix
assembly.
\section{Solvers using the module {\tt SPARSE}}
All the solvers discussed in this section use initially the module
{\tt SPARSE} to construct the sparse matrix. Once this construction
procedure is complete, this matrix is converted to the (usually more
efficient) format used by the solver.
In a time-dependent simulation where the problem matrix
changes but not the sparsity pattern, the subsequent matrix assembly
will be performed directly on this solver's format.
Thus for example, the first time {\tt updtmat} is called
on a new matrix, it is the version from {\tt SPARSE}. Next, if
{\tt updtmat} is called again to modify the matrix, it will be the
solver's version, unless the matrix is re-initialized by a call to
{\tt destroy} followed by {\tt init}. This switch is completely
transparent for the user as shown through an example in the next
section.
\subsection{The PARDISO direct solver}
The interface to PARDISO~\cite{PARDISO} is implemented in the
module {\tt PARDISO\_BSPLINES}.
The matrix type (symmetric, hermitian, positive-definite) is set
by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the
generic routine {\tt init}. All the other generic routines defined in
the module {MATRIX}, plus routines specific to the sparse solver,
are listed in Table~\ref{pardisoRoutines}. The complete documentation
of these routines is given in Appendix~\ref{pardisoRef}.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}
\hline
\emph{Matrix types} & {\tt pardiso\_mat, zpardiso\_mat} \\
\hline\hline
{\tt init} & Initializes the data structure \\
{\tt destroy} & Free the data structure memory \\
\hline
{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt vmx} & Returns the matrix-vector product \\
{\tt mcopy} & Copy a matrix to another matrix \\
{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
{\tt clear\_mat}& Set the matrix elements to zero.\\
{\tt psum\_mat} & Parallel sum of matrices \\
{\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\
{\tt get\_count}& Get the number of non-zero elements in matrix \\
\hline
{\tt factor} & Factorization \\
{\tt bsolve} & Solves the linear system using the factorized matrix \\
{\tt to\_mat} & Convert to PARDISO CSR matrix format \\
{\tt reord\_mat}& Reordering and symbolic factorization \\
{\tt numfact} & Numerical factorization \\
\hline
\end{tabular}
\caption{The generic routines in the {\tt PARDISO\_BSPLINES} module}
\label{pardisoRoutines}
\end{table}
Below, a complete example solving a simple
two-dimensional Poisson discretized by the 5 point Finite Difference
method illustrates how to use the {\tt PARDISO\_BSPLINES} module.
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
PROGRAM main
USE pardiso_bsplines
IMPLICIT NONE
TYPE(pardiso_mat) :: amat
DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:)
INTEGER :: nx=5, ny=4
INTEGER :: n, nnz
INTEGER :: i, j, irow, jcol
!
WRITE(*,'(a)', advance='no') 'Enter nx, ny: '
READ(*,*) nx, ny
n = nx*ny ! Rank of the matrix
ALLOCATE(rhs(n))
ALLOCATE(sol(n))
ALLOCATE(arow(n))
!
CALL init(n, 1, amat, nlsym=.TRUE.) ! Pardiso mat, symmetric case
!
! Construct the matrix and RHS
!
DO j=1,ny
DO i=1,nx
arow = 0.0d0
irow = numb(i,j)
arow(irow) = 4.0d0
IF(i.GT.1) arow(numb(i-1,j)) = -1.0d0
IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0
IF(j.GT.1) arow(numb(i,j-1)) = -1.0d0
IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0
CALL putrow(amat, irow, arow)
rhs(irow) = SUM(arow) ! => the exact solution is 1
END DO
END DO
!
WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat)
!
! Factor the amat matrix (Reordering, symbolic and numerical factorization)
!
CALL factor(amat)
!
! Back solve
!
CALL bsolve(amat, rhs, sol)
!
! Check solutions
!
WRITE(*,'(/a/(10f8.4))') 'Computed sol', sol
WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
!
! Clean up
!
DEALLOCATE(rhs)
DEALLOCATE(sol)
DEALLOCATE(arow)
CALL destroy(amat)
CONTAINS
INTEGER FUNCTION numb(i,j)
!
! One-dimensional numbering
! Number first x then y
!
INTEGER, INTENT(in) :: i, j
numb = (j-1)*nx + i
END FUNCTION numb
END PROGRAM main
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
It should be noted that
\begin{itemize}
\item The routine {\tt putrow} in the matrix construction loop uses the
version from the {\tt SPARSE} module to create dynamically the
matrix row using the linked list.
\item The routine {\tt factor} calls successively the matrix conversion
{\tt to\_mat}, the reordering and symbolic factorization routine
{\tt reord\_mat} and finally the numerical factorization {\tt
numfact}. One could indeed call these three routines separately
instead of the single call to {\tt factor},
\item After solving the linear system, if the matrix is modified by
calling for example {\tt putrow} again, it will modify
directly the converted matrix and not on the {\tt spmat} matrix
which is anyway \emph{destroyed} at the end of {\tt to\_mat}.
\item If the matrix sparsity changes, the matrix should be
re-initialized by calling the {\tt destroy} and {\tt init} routines.
\end{itemize}
Other examples can be found by running ``{\tt grep pardiso\_mat}''
on the F90 files in the directory {\tt examples/}.
\subsection{The WSMP direct solver}
The interface to WSMP~\cite{WSMP} is implemented in the
module {\tt WSMP\_BSPLINES}.
The matrix type (symmetric, hermitian, positive-definite) is set
by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the
generic routine {\tt init}. All the other generic routines defined in
the module {MATRIX}, plus routines specific to the sparse solver,
are listed in Table~\ref{wsmpRoutines}. The complete documentation
of these routines is given in Appendix~\ref{wsmpRef}.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}
\hline
\emph{Matrix types} & {\tt wsmp\_mat, zwsmp\_mat} \\
\hline\hline
{\tt init} & Initializes the data structure \\
{\tt destroy} & Free the data structure memory \\
\hline
{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt vmx} & Returns the matrix-vector product \\
{\tt mcopy} & Copy a matrix to another matrix \\
{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
{\tt clear\_mat}& Set the matrix elements to zero.\\
{\tt psum\_mat} & Parallel sum of matrices \\
{\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\
{\tt get\_count}& Get the number of non-zero elements in matrix \\
\hline
{\tt factor} & Factorization \\
{\tt bsolve} & Solves the linear system using the factorized matrix \\
{\tt to\_mat} & Convert to WSMP CSR matrix format \\
{\tt reord\_mat}& Reordering and symbolic factorization \\
{\tt numfact} & Numerical factorization \\
\hline
\end{tabular}
\caption{The generic routines in the {\tt WSMP\_BSPLINES} module}
\label{wsmpRoutines}
\end{table}
The simple Poisson example using the {\tt PARDISO\_BSPLINES} module
shown in the previous section can be easily adapted to the WSMP
interface since there are only two lines to change: the {\tt USE} and
the matrix {\tt TYPE} lines.
Other examples of how to use this interface can be found by running
``{\tt grep wsmp\_mat}'' on the F90 files the directory {\tt examples/}.
The same solver functionality can be found in both the PARDISO and
WSMP solvers as one can verify by comparing Table~\ref{pardisoRoutines}
and Table~\ref{wsmpRoutines} or the description of routines in
Appendix~\ref{pardisoRef} and Appendix~\ref{wsmpRef}.
However, there
is an important difference. While in PARDISO (and indeed also in LAPACK),
it is possible to define several matrices to solve simultaneously, it
appears that in WSMP, this is possible \emph{only} for symmetric and
hermitian matrices: in the present 10.9 version, the
routines to store and recall the solver context
{\tt WSTOREMAT/WRECALLMAT} which are present in the symmetric version of
the library are missing in the general version!
A separate module named {\tt PWSMP\_BSPLINES} added the MPI
\emph{parallelization} capability provided by WSMP. This parallel version
implements the same user interface as shown in Table~\ref{wsmpRoutines}.
The following considerations should be however taken in to account:
\begin{enumerate}
\item The coefficient matrix {\tt amat} is partitioned into blocks of
\emph{contiguous} rows, with their indices defined in
the interval [{\tt amat\%istart,amat\%iend}] which is defined after the call to
{\tt init}.
\item Calls to the routine {\tt updtmat} to update the matrix coefficients
should not specify a row index \emph{outside} this interval.
On the other hand, {\tt getxxx} will return 0 and {\tt putxxx} will
ignore it if a row index \emph{not} in the range [{\tt amat\%istart,amat\%iend}]
is passed to them.
\item An \emph{optional} MPI communicator can be given to {\tt init}
using the keyword {\tt comm\_in}. By default, the communicator
{\tt MPI\_COMM\_WORLD} is used.
\end{enumerate}
A complete example using {\tt PWSMP\_BSPLINES} can be found in
{\tt examples/pde2d\_pwsmp.f90}.
\subsection{The MUMPS direct solver}
{\tt MUMPS}~\cite{MUMPS} is a \emph{parallel sparse direct solver}
using {\tt MPI} and is implemented in the module
{\tt MUMPS\_BSPLINES}. User program using this module
should be compiled and linked with {\tt MPI} even if only the
serial version of the solver is needed, in which case the
{\tt MPI\_COMM\_SELF} is passed to the initialization routine
{\tt init} as an \emph{optional} argument with the keyword
{\tt comm\_in}. Otherwise a valid {\tt MPI} communicator should be
passed. By default {\tt comm\_in=MPI\_COMM\_SELF}. Note that it
is possible to use both serial and parallel solvers in the same
program to solve different matrix problems.
As for {\tt PARDISO} and {\tt WSMP}, the same user interface to the
{\tt MUMPS} solver is used and summarized in
Table~\ref{mumpsRoutines}. The complete documentation
of these routines is given in Appendix~\ref{mumpsRef}.
\begin{table}[h]
\centering
\begin{tabular}{|l|l|}
\hline
\emph{Matrix types} & {\tt mumps\_mat, zmumps\_mat} \\
\hline\hline
{\tt init} & Initializes the data structure \\
{\tt destroy} & Free the data structure memory \\
\hline
{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
row $i$, column $j$ \\
{\tt vmx} & Returns the matrix-vector product \\
{\tt mcopy} & Copy a matrix to another matrix \\
{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
{\tt clear\_mat}& Set the matrix elements to zero.\\
{\tt psum\_mat} & Parallel sum of matrices \\
{\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\
{\tt get\_count}& Get the number of non-zero elements in matrix \\
\hline
{\tt factor} & Factorization \\
{\tt bsolve} & Solves the linear system using the factorized matrix \\
{\tt to\_mat} & Convert to WSMP CSR matrix format \\
{\tt reord\_mat}& Reordering and symbolic factorization \\
{\tt numfact} & Numerical factorization \\
\hline
\end{tabular}
\caption{The generic routines in the {\tt MUMPS\_BSPLINES} module}
\label{mumpsRoutines}
\end{table}
\section{Fourier solver \cite{McMillan}}
\subsection{The matrix equation in Fourier space}
For a periodic one-dimensional problem, the solution $\phi_i$ and
the right-hand-side $\rho_i$ in (\ref{matEq1d}) are
$N$-periodic. Their Discrete Fourier Transform (DFT) can be defined as
\begin{equation}
\begin{split}
\hat{\phi}_k = \sum_{j=0}^{N-1} \phi_j e^{i\frac{2\pi}{N}kj}, &\qquad
\hat{\rho}_k = \sum_{j=0}^{N-1} \rho_j e^{i\frac{2\pi}{N}kj}, \\
\phi_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\phi}_k e^{-i\frac{2\pi}{N}kj}, &\qquad
\rho_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\rho}_k e^{-i\frac{2\pi}{N}kj}.
\end{split}
\end{equation}
Taking the DFT of Eq.~(\ref{matEq1d}), we obtain the following matrix equation
in Fourier space:
\begin{equation}
\label{Fourier1d}
\sum_{k'=0}^{N-1} \hat{A}_{kk'}\hat{\phi}_{k'} = \hat{\rho}_{k},
\end{equation}
where $\hat{A}_{kk'}$ is the DFT of the original matrix. Following the
notations in Eq.~(\ref{mat1d}) and assuming an \emph{equidistant} mesh
with the interval $\Delta=L/N$, each of the DFT matrices of the
weak form can be written as
\begin{equation}
\begin{split}
\hat{A}_{kk'}
&= \frac{1}{N}\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj}
\sum_{j'=0}^{N-1} A_{jj'}e^{-i\frac{2\pi}{N}k'j'} \\
&= \frac{1}{N}\int_{0}^L\!\!dx\,C(x) \,
\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\,
\sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'}
\Lambda_{j'}^{\alpha'} (x) \\
&= \frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \,
\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\,
\sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'}
\Lambda_{j'}^{\alpha'} (x)
\end{split}
\end{equation}
Note that each of the last two sums is over the splines which are non-zero
at a given $x$. Using the translational symmetry of the periodic splines:
\begin{equation*}
\sum_j e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x) =
\sum_j e^{i\frac{2\pi}{N}kj}
\Lambda_{j-J}^\alpha(x-J\Delta) =
e^{i\frac{2\pi}{N}kJ}\, \hat{\Lambda}_{k}^\alpha(x-J\Delta),
\end{equation*}
where we have defined the DFT of splines $\hat{\Lambda}_{k}(x)$ as
\begin{equation}
\hat{\Lambda}_{k}^\alpha(x) = \sum_j\Lambda_j^\alpha(x) e^{i\frac{2\pi}{N}kj},
\end{equation}
which are computed by the routine {\tt ft\_basfun} in the module
{\tt BSPLINES} for any spline order $p$ and derivative $\alpha \le
p$. The DFT matrices can now be written as:
\begin{equation*}
\hat{A}_{kk'} =
\frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \,
e^{i\frac{2\pi}{N}J(k-k')} \hat{\Lambda}_k^\alpha (x-J\Delta)\,
\left[\hat{\Lambda}_{k'}^{\alpha'} (x-J\Delta)\right]^{*}.
\end{equation*}
Finally, making the variable transform $x\rightarrow x+J\Delta$ and
defining the DFT of the weak-form coefficient $C$ as
\begin{equation}
\hat{C}_{k}(x) = \sum_{J=0}^{N-1}C(x+J\Delta)\,e^{i\frac{2\pi}{N}Jk},
\end{equation}
the DFT of the matrix $\hat{A}_{kk'}$ can be calculated as an
integration over the first interval:
\begin{equation}
\hat{A}_{kk'} = \frac{1}{N}\int_0^\Delta\!\!dx\,
\hat{C}_{k-k'}(x) \,\hat{\Lambda}_{k}^\alpha(x)
\left[\hat{\Lambda}_{k'}^{\alpha'}(x)\right]^{*},
\end{equation}
which can be computed using again the same Gauss formula as before.
For uniform $C$, $\hat A_{kk'}$ is diagonal and the matrix equation
(\ref{Fourier1d}) reduces to a system of equations for the
uncoupled Fourier modes.
When $C$ is non-uniform, $\hat A_{kk'}$ is \emph{dense}.
However in problems where the solution is expected to be
``smooth'', one can keep only a small number of Fourier modes, reducing
thus the rank of $\hat A_{kk'}$. Furthermore, if the coefficients
$C(x)$ of the differential equations are very smooth, peaked at a few (low
order) modes, the DFT matrix can become \emph{sparse}!
The generalization to the two-dimensional problem (\ref{mat2d}) is
straightforward:
\begin{gather}
\hat{A}_{im,i'm'} =
\frac{1}{N_\theta}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta \,
\hat{C}_{m-m'}(r,\theta) \,\hat{\Lambda}_{m}^\beta(\theta)
\left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\right\}
\Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha'}(r) \\
\hat{C}_{m}(r,\theta) =
\sum_{j=0}^{N_\theta-1}C(r,\theta+j\Delta\theta)\, e^{i\frac{2\pi}{N_\theta}jm}.
\end{gather}
Likewise, for the three-dimensional problem (\ref{mat3d}), we obtain
\begin{gather}
\hat{A}_{imn,i'm'n'} =
\frac{1}{N_\theta N_\varphi}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta
\int_0^{\Delta\varphi}\!\!d\varphi \,
\hat{C}_{m-m',n-n'}(r,\theta,\varphi) \,\hat{\Lambda}_{m}^\beta(\theta)
\left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\,\hat{\Lambda}_{n}^\gamma(\varphi)
\left[\hat{\Lambda}_{n'}^{\gamma'}(\varphi)\right]^{*}\right\}
\Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha}(r) \\
\hat{C}_{mn}(r,\theta,\varphi) =
\sum_{j=0}^{N_\theta-1}\sum_{k=0}^{N_\varphi-1}C(r,\theta+j\Delta\theta,\varphi+
k\Delta\varphi)\, e^{i\frac{2\pi}{N_\theta}jm}\,e^{i\frac{2\pi}{N_\varphi}kn}.
\end{gather}
Note that for axi-symmetric systems where the coefficients $C$ do not
depend on $\varphi$
\begin{equation}
\hat{C}_{mn} = \hat{C}_{mn}\delta_{n,0}
\end{equation}
and thus the three-dimensional problem reduces to a set of independent
two-dimensional problems with
\begin{equation}
\begin{split}
\hat{A}^n_{im.i'm'} &= M_n \hat{A}_{im.i'm'} \\
M_n &= \int_0^{\Delta\varphi}\!\!d\varphi \left|\hat{\Lambda}_n(\varphi)\right|^2.
\end{split}
\end{equation}
\subsection{Integral equation}
The DFT matrices for differential operators derived above can be
extended to an integral operator of the following form:
\begin{equation}
\int_{0}^{L}\!dx' K(x,x')\,\phi(x') = \rho(x),
\end{equation}
where $\phi(x)$ is $L$-periodic. Using the same FE discretization as
above results in the following matrix in \emph{real} space:
\begin{equation}
A_{jj'} = \int_{0}^{L}\!dx\,\Lambda_j(x)\,\int_{0}^{L}\!dx'\,
K(x,x')\,\Lambda_{j'}(x'),
\end{equation}
and its DFT
\begin{equation}
\hat{A}_{kk'} = \frac{1}{N}
\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!dx\,
\sum_{J'=0}^{N-1}\int_{J'\Delta}^{(J'+1)\Delta}\!dx'\,K(x,x')\,
e^{i\frac{2\pi}{N}kJ}\hat{\Lambda}_k(x-J\Delta)\,
e^{-i\frac{2\pi}{N}k'J'}\left[\hat{\Lambda}_{k'}(x-J'\Delta)\right]^{*},
\end{equation}
Now, defining the DFT of the kernel as
\begin{equation}
\hat{K}_{kk'}(x,x') =
\sum_{J=0}^{N-1}\sum_{J'=0}^{N-1}K(x+J\Delta,x'+J'\Delta)\,
e^{i\frac{2\pi}{N}kJ}\,e^{-i\frac{2\pi}{N}k'J'},
\end{equation}
the final expression for the DFT of the matrix $\hat{A}_{kk'}$ reduces to
\begin{equation}
\hat{A}_{kk'} = \frac{1}{N}
\int_0^\Delta\!dx\int_0^\Delta\!dx'\,\hat{K}_{kk'}(x,x')\,
\hat{\Lambda}_k (x)\,
\left[\hat{\Lambda}_{k'}(x')\right]^{*}.
\end{equation}
Again, notice that the dense matrix $\hat{A}$ can become \emph{sparse}
if only a limited number of Fourier modes are retained in the DFT of
the kernel $\hat{K}$.
\subsection{A two-dimensional example with a non-uniform coefficient}
As a check, we considered here a two-dimensional example similar to the example in
section \ref{twodEx} but with a non-uniform coefficient:
\begin{equation}
-\frac{1}{r}\frac{\partial}{\partial r}
\left[rC\frac{\partial\phi}{\partial r}\right]
-\frac{1}{r^2}\frac{\partial}{\partial \theta}
\left[C\frac{\partial\phi}{\partial \theta}\right] = \rho.
\end{equation}
With $C(r,\theta) = 1+\epsilon r\cos\theta$, assuming the same exact solution as in
section (\ref{twodEx})
\begin{equation}
\phi(r,\theta) = (1-r^2)r^m\cos m\theta,
\end{equation}
the right-hand side becomes
\begin{equation}
\begin{split}
\rho(r,\theta) = 4(m+1)r^m\cos m\theta & +
\frac{\epsilon r^m}{2}(4+5m-m/r^2)\cos(m-1)\theta \\
&+ \frac{\epsilon r^m}{2}(4+3m+m/r^2)\cos(m+1)\theta.
\end{split}
\end{equation}
This problem is solved in real space and Fourier space respectively in example
{\tt pde2d\_sym\_pardiso.f90} and example {\tt pde2d\_sym\_pardiso\_dft.f90}.
Both use the {\tt PARDISO\_BSPLINES} module to solve the sparse
matrix equation. It should be noted that the Fourier method should yield the
\emph{same solution} as found with the solver in real space if \emph{all} the
$N_\theta$ Fourier modes are kept.
For the problem defined above with {\tt m=3}, by keeping only the seven Fourier
modes in $[-3,3]$ and the three mode couplings $[-1,0,1]$ in the
Fourier solver, we found that both methods yield the same (up to 5
digits) \emph{relative discretization error}. Furthermore, increasing
the number of Fourier modes to $[-4,4]$ (note that the $m=\pm 4$
Fourier components of the right hand side $\rho$ are not null) does
not increase the accuracy of the computed solution!
In this example, the matrix in Fourier space has a rank which is
$N_\theta/7$ times smaller than in the solver in real space. The number of
non-zeros is also reduced by a factor of $(2p+1)/3$ since only 3
Fourier mode coupling terms are considered.
In general, the efficiency of such a \emph{matrix filter} is expected
to be problem-dependent. The Fourier solver should be tested in real simulations.
\section{The matrix construction module {\tt CONMAT\_MOD}}
\label{secCONMAT}
The module implements the generic matrix construction subroutine
{\tt conmat}, using the algorithm detailed in Appendix~\ref{matAssembly},
for 1D and 2D differential equations. The computed matrix is returned in the
argument {\tt mat} which can be a Lapack band matrix as well as a
PARDISO, WSMP or MUMPS sparse matrix.
The complete interface of the subroutine is given below.
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE conmat(spl, mat, coefeq, maxder)
TYPE(spline1d|spline2d) :: spl
TYPE([z]gbmat|[z]pbmat|[z]periodic_mat|[z]pardiso|...) :: mat
INTEGER, INTENT(in), OPTIONAL :: maxder[(2)]
INTERFACE
SUBROUTINE coefeq(x, [y], idt, idw, c)
DOUBLE PRECISION, INTENT(in) :: x, [y]
INTEGER, INTENT(out) :: idt(:), idw(:)
DOUBLE PRECISION, INTENT(out) :: c(:)
END SUBROUTINE coefeq
END INTERFACE
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Construct the FE matrix for 1D or 2D differential operator.
\item[Arguments:] \mbox{}
\begin{verbatim}
spl : 1D or 2D spline
mat : matrix object
coefeq : user provided subroutine (see below)
maxder : Maximum order of the derivatives in the weak form.
Equal to 1 (first derivative) by default.
\end{verbatim}
\end{description}
The subroutine {\tt conmat} includes, in addition to the arguments
{\tt spl, mat} and {\tt maxder} described above,
an user provided subroutine as the third
argument {\tt coefeq} which computes all the weak
form coefficients defined in Eq.(\ref{locMat1d}) and Eq.(\ref{locMat2d})
for a given point ($x$ for 1D case or $(x,y)$ for 2D case). The output
array {\tt c} will contain all the computed $C$ with its corresponding
derivative orders $(d,d')$ returned in {\tt idt, idw} respectively.
Other quantities required to calculate the coefficients $C$ could be
communicated to {\tt coefeq}, using for example a {\tt COMMON} block or
a {\tt MODULE}.
An example of using this module can be found in section \ref{twodEx}.
\appendix
\section{Matrix assembly for differential operators}
\label{matAssembly}
\subsection{1D case}
\subsubsection{Local matrix}
The contribution to the discretized weak-form from the interval
$[x_i, x_{i+1}]$ where $i=1,\ldots,N$, is a sum of the \emph{local matrices}
\begin{equation}
\label{locMat1d}
\begin{split}
A^i_{\alpha\alpha'} &= \int_{x_i}^{x_{i+1}}\!\!dx \;C(x)
\Lambda^d_{\alpha}(x)\Lambda^{d'}_{\alpha'}(x) \\
&\simeq \sum_{g=1}^{G}\,
\underbrace{w_g\Lambda^d_{\alpha}(x_g)\Lambda^{d'}_{\alpha'}(x_g)}_{F_{\alpha\alpha'g}}
\,\underbrace{C(x_g)}_{c_g},
\end{split}
\end{equation}
where a $G$ point Gauss quadrature over the interval $[x_i, x_{i+1}]$
is used to approximate the integral and
$\Lambda^d_{\alpha}$ denotes the $d^{th}$ derivative of splines
which are non zero in the interval $[x_i, x_{i+1}]$. For splines of
degree $p$, $\alpha=0,\ldots,p$. Note that the matrix can be written as a
\emph{matrix-vector product}:
\begin{equation}
\mathbf{A}= \mathbf{F} \cdot \mathbf{c}.
\end{equation}
\subsubsection{Mapping to global matrix}
For $N$ intervals, the number of spline elements of degree $p$, is
$N_e=N+p$, or $N_e=N$ if the system is \emph{periodic}.
Once the local matrix $A_{\alpha\alpha'}$ is formed, it can be added
to the \emph{global} matrix using the mapping:
\begin{equation}
\begin{split}
A^g_{II'} &\leftarrow A^g_{II'} + A^i_{\alpha\alpha'} \\
I = i+\alpha, & \qquad I' = i+\alpha'
\end{split}
\end{equation}
For periodic problems, the indices $I,I'$ are further transformed
by taking into account the periodicity $N$, using for example the
following {\tt FORTRAN} statement
\begin{center} \tt
I = MODULO(I-1,N) + 1 \\
\end{center}
\subsection{2D case}
\subsubsection{Local matrix}
In this case, the local matrix obtained for the grid cell
$[x_i, x_{i+1}]\times[y_j, y_{j+1}]$ takes the form:
\begin{equation}
\label{locMat2d}
\begin{split}
A_{\alpha\alpha'\beta\beta'} &= \int_{x_i}^{x_{i+1}}\!\!dx
\int_{y_j}^{y_{j+1}}\!\!dy\,\Lambda^{d_1}_{\alpha}(x)\Lambda^{d_1'}_{\alpha'}(x)\;C(x,y)
\,\Lambda^{d_2}_{\beta}(y)\Lambda^{d_2'}_{\beta'}(y) \\
&\simeq \sum_{g_1=1}^{G_1}\,
\underbrace{w_{g_1}\Lambda^{d_1}_{\alpha}(x_{g_1})\Lambda^{{d_1}'}_{\alpha'}(x_{g_1})}_{F_{\alpha\alpha'g_1}}
\;\sum_{g_2=1}^{G_2}\,\underbrace{C(x_{g_1},y_{g_2})}_{C_{g_1g_2}}
\underbrace{w_{g_2}\Lambda^{d_2}_{\beta}(y_{g_2})\Lambda^{{d_2}'}_{\beta'}(y_{g_2})}_{G_{\beta\beta'g_2}},
\end{split}
\end{equation}
which can be computed efficiently as \emph{matrix-matrix products}
\begin{equation}
\mathbf{A} = \mathbf{F}\cdot\mathbf{C}\cdot\mathbf{G^{T}}
\end{equation}
\subsubsection{Mapping to global matrix}
The local to global element indices mapping on each of the two
dimensions can be defined as previously as
\begin{equation}
\begin{split}
I = i+\alpha, & \qquad I' = i+\alpha' \\
J = j+\beta, & \qquad J' = j+\beta'
\end{split}
\end{equation}
If any of the 2 dimensions is periodic, the periodic condition have
to be applied to the corresponding global element index as explained
above.
Furthermore, in order to reduce the 4 dimension array $A^g_{II'JJ'}$
to the standard 2 dimension matrix, we number first the elements in
$y$ coordinate and obtain the following index transformation:
\begin{equation}
\mu = J + N^y_e(I-1), \qquad \mu' = J' + N^y_e(I'-1),
\end{equation}
where $N^y_e$ is the number of elements along the $y$ coordinate. The
\emph{global} matrix is then constructed from
\begin{equation}
A^g_{\mu\mu'} \leftarrow A^g_{\mu\mu'} + A_{\alpha\alpha'\beta\beta'}
\end{equation}
\section{The boundary conditions}
\subsection{Dirichlet condition}
\label{DirichletCond}
\subsubsection{1D case}
Let us consider the boundary condition
$u(0)=c$. Since all the splines are 0
at $x=0$, except for the first spline which is equal to 1,
$\Lambda_i(0)=\delta_{i,1}$, we have simply
\begin{equation}
c=u(0) = \sum_{i=1}^N u_i \Lambda_i(0) \quad \Longrightarrow
\quad u_{1} = c.
\end{equation}
The discretized linear system of equations, taking into account of
this BC, can thus be written as
\begin{equation}
\begin{split}
u_1 &= c\\
\sum_{j=2}^N A_{ij}u_j &= f_i - A_{i1}c, \quad i=2,\ldots, N
\end{split}
\end{equation}
or in the following matrix form:
\begin{equation}
\left(\begin{matrix}
1 & 0 & \cdots \\
0 & A_{22} & \cdots \\
\vdots & \vdots & \ddots \\
\end{matrix}\right)
\left(\begin{matrix}
u_{1} \\
u_{2} \\
\vdots\\
u_{N} \\
\end{matrix}\right) =
\left(\begin{matrix}
c \\
f_{2} -cA_{21}\\
\vdots\\
f_{N} -cA_{N1}\\
\end{matrix}\right)
\end{equation}
Note that (1) the transformed matrix preserves any symmetry or
positivity of the original matrix, (2) the first column of the
original matrix has to be saved in order to modify the RHS $f_i$
but only for non zero $c$ and (3) in that case, one needs to save
only $[A_{i1}]_{i=2}^{p+1}$, where $p$ is the spline order.
In summary, the procedure for imposing the Dirichlet BC $u_1=c$ can be
summarized as follows:
\begin{enumerate}
\item Matrix transformation
\begin{enumerate}
\item Clear the matrix row $i=1$ and set its diagonal term
$A_{11}$ to 1.
\item Get the matrix column $A_{j1}, \quad j=2,\ldots,p+1$ and save it.
\item Clear the matrix column $j=1$ and set its diagonal term
$A_{11}$ to 1.
\end{enumerate}
\item RHS transformation
\begin{enumerate}
\item Set $f_1\leftarrow c$.
\item Modify the RHS: $f_i\leftarrow f_i-A_{i1}c, \quad i=2,\ldots,p+1$.
\end{enumerate}
\end{enumerate}
If the original matrix \emph{is not symmetric}, only the steps (1a)
and (2a) are required, since the other steps are only necessary to
preserve the symmetry of the original matrix.
\subsubsection{2D case}
In that case, let us write the solution $u(x,y)$ as
\begin{equation}
u(x,y) = \sum_{i=1}^{N_1}\sum_{j=1}^{N_2} u_{ij} \Lambda_i(x)\Lambda_j(y),
\end{equation}
where $N_1, N_2$ are the number of elements in each
dimension. Assuming the BC $u(0,y) = g(y)$, and since
$\Lambda_i(0)=\delta_{i1}$, the solutions $u_{ij}$
should satisfy
\begin{equation}
\label{dirich_2d}
\sum_{j=1}^{N_2} u_{1j} \Lambda_j(y) = g(y).
\end{equation}
If $g(y)$ is constant $g(y)=c$, we obtain the trivial solution
$u_{1,j}=c$ since $\sum_{j=1}^{N_2} \Lambda_j(y)=1$ \cite{BSPLINES}.
For non-uniform $g$,
at least 2 methods can be used to obtain the $N_2$ unknowns $u_{1j}$
satisfying the equation above:
\begin{enumerate}
\item By \emph{collocating} Eq.(\ref{dirich_2d}) on a \emph{suitable}
set of points $[y_k]_{ k=1}^{N_2}$, the problem is reduced to an
\emph{interpolation} one (see section ``Spline
Interpolation'' in \cite{BSPLINES}).
\item By \emph{minimizing} the residual norm of Eq.(\ref{dirich_2d})
defined as follows:
\begin{gather}
R = \left\|\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)-g(y)\right\|^2 =
\int\!\!dy\left\{\left[\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\right]^2
- 2g(y)\sum_{j=1}^{N_2} c_{j} \Lambda_j(y) +g^2(y)\right\}\\
\frac{\partial R}{\partial c_k} = 2 \int\!\!dy\left[
\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\Lambda_k(y)
-g(y)\Lambda_k(y)\right] = 0, \quad k=1,\ldots,N_2,
\end{gather}
the boundary solutions $c_j$ can be calculated by solving the following
\emph{weak-form} of Eq.(\ref{dirich_2d}):
\begin{equation}
\sum_{j=1}^{N_2} c_{j} \int\!\!dy\Lambda_j(y)\Lambda_k(y) =
\int\!\!dy\Lambda_k(y) g(y), \qquad k=1,\ldots,N_2.
\end{equation}
\end{enumerate}
Once the values of $c_j$ known, the procedure described for the 1D case
above can be applied to satisfy each of the $N_2$ conditions $u_{1j}=c_j$.
A full example for solving the cylindrical Laplace equation in
cylindrical coordinates:
\begin{equation}
\begin{split}
\frac{1}{r}\frac{\partial}{\partial r}
\left(r\frac{\partial\phi}{\partial r}\right) &+\frac{1}{r^2}
\frac{\partial^2\phi}{\partial \theta^2} = 0 \\
\phi(r=1,\theta) &= \cos m\theta.
\end{split}
\end{equation}
is given in {\tt bpslines/examples/dirichlet/poisson.f90}.
\subsection{Unicity on the axis}
\label{unicityCond}
Denoting the $N$ solutions at the axis by $(u_1, \ldots, u_N)$ , and
their transforms by $(\hat u_1, \ldots, \hat u_N)$ defined by
\begin{equation} \begin{array}{ccc}
u_1-u_N = \hat u_1 & & u_1 = \hat u_1 + \hat u_N \\
u_2-u_N = \hat u_2 & & u_2 = \hat u_2 + \hat u_N \\
\vdots & \Longrightarrow & \vdots \\
u_{N-1}-u_N = \hat u_{N-1} & & u_{N-1} = \hat u_{N-1} + \hat u_N \\
u_N = \hat u_N & & u_N = \hat u_N,
\end{array} \label{eq:unicity1} \end{equation}
the unicity condition can be specified by simply imposing
\begin{equation}
\hat u_1=\hat u_2=\ldots=\hat u_{N-1}=0. \label{eq:unicity2}
\end{equation}
From (\ref{eq:unicity1}), the \emph{transformation matrix} \(\mathbf U\) is defined
as
\begin{equation}
\mathbf{u} = \mathbf{ U \cdot\hat u}, \qquad \mathbf{U} =
\left(\begin{matrix}
1 & 0 & \dots & 0 & 1 \\
0 & 1 & \dots & 0 & 1 \\
& & \ddots& & \vdots \\
0 & 0 & \dots & 1 & 1 \\
0 & 0 & \dots & 0 & 1
\end{matrix}\right), \quad \mathbf{U^{T}} =
\left(\begin{matrix}
1 & 0 & \dots & 0 & 0 \\
0 & 1 & \dots & 0 & 0 \\
& & \ddots& & \vdots \\
0 & 0 & \dots & 1 & 0 \\
1 & 1 & \dots & 1 & 1
\end{matrix}\right).
\end{equation}
\paragraph{Matrix product \( \mathbf{A\cdot U}\)}
\begin{equation}
\mathbf{ A\cdot U} =
\left(\begin{array}{lllll}
A_{1,1} & A_{1,2} & \dots & A_{1,N-1} & \sum_{j} A_{1,j} \\
A_{2,1} & A_{2,2} & \dots & A_{2,N-1} & \sum_{j} A_{2,j} \\
& & \ddots& & \vdots \\
A_{N-1,1} & A_{N-1,2} & \dots & A_{N-1,N-1} & \sum_{j}A_{N-1,j} \\
A_{N,1} & A_{N,2} & \dots & A_{N,N-1} & \sum_{j}A_{N,j}
\end{array}\right).
\end{equation}
Thus \emph{right multiply by \(\mathbf{U}\)} is equivalent to put the
\emph{the sum of each row on the last column}.
\paragraph{Matrix product \( \mathbf{ U^T \cdot A}\)}
\begin{equation}
\mathbf{ U^T \cdot A} =
\left(\begin{array}{lllll}
A_{1,1} & A_{1,2} & \dots & A_{1,N-1} & A_{1,N} \\
A_{2,1} & A_{2,2} & \dots & A_{2,N-1} & A_{2,N} \\
& & \ddots& & \vdots \\
A_{N-1,1} & A_{N-1,2} & \dots & A_{N-1,N-1} & A_{N-1,N} \\
\sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} &
\sum_{i}A_{i,N}
\end{array}\right).
\end{equation}
Thus \emph{left multiply by \(\mathbf{\hat U}\)} is equivalent to put the
\emph{the sum of each column on the last row}.
\paragraph{Product \( \mathbf{\hat U \cdot b}\)}
\begin{equation}
\mathbf{\hat b} = \mathbf{U^T\cdot b} =
\left(\begin{array}{l}
b_1 \\
b_2 \\
\vdots \\
b_{N-1} \\
\sum_{i} b_{i}
\end{array}\right),
\end{equation}
\paragraph{Transformation of the original matrix equation}
The full original linear system, obtained from the discretization of the
2D \(r,\theta\) polar coordinates can be written as:
\begin{equation}
\left(\begin{array}{ll}
\mathbf{A} & \mathbf{B} \\
\mathbf{C} & \mathbf{D}
\end{array}\right)
\left(\begin{array}{l}
\mathbf{u} \\
\mathbf{v}
\end{array}\right) =
\left(\begin{array}{l}
\mathbf{b} \\
\mathbf{c}
\end{array}\right), \label{eq:orig_matrix_eq}
\end{equation}
where the solution array is split into the solutions \(\mathbf{u}\) at \(r=0\) and
the solutions \(\mathbf{v}\) on the remaining domain. The transformed system can
thus be written as
\begin{equation*}
\left(\begin{array}{ll}
\mathbf{U^T} & 0 \\
0 & \mathbf{I}
\end{array}\right)
\left(\begin{array}{ll}
\mathbf{A} & \mathbf{B} \\
\mathbf{C} & \mathbf{D}
\end{array}\right)
\left(\begin{array}{ll}
\mathbf{U} & 0 \\
0 & \mathbf{I}
\end{array}\right)
\left(\begin{array}{l}
\mathbf{\hat u} \\
\mathbf{v}
\end{array}\right) =
\left(\begin{array}{ll}
\mathbf{U^T} &0 \\
0 & \mathbf{I}
\end{array}\right)
\left(\begin{array}{l}
\mathbf{b} \\
\mathbf{c}
\end{array}\right),
\end{equation*}
\begin{equation}
\Longrightarrow
\left(\begin{array}{cc}
\mathbf{U^TAU} & \mathbf{U^TB} \\
\mathbf{CU} & \mathbf{D}
\end{array}\right)
\left(\begin{array}{l}
\mathbf{\hat u} \\
\mathbf{v}
\end{array}\right) =
\left(\begin{array}{c}
\mathbf{U^Tb} \\
\mathbf{c}
\end{array}\right),
\end{equation}
Notice that the transformation preserves any symmetry existing in the original system
(\ref{eq:orig_matrix_eq}). The transformed matrix is finally given in the following where
only the modified elements are shown and the sum is only over the first \(N\)
points in \(\theta\) direction. The \(\times\) symbol denotes unmodified elements.
\begin{equation}
\left(\begin{array}{lllllll}
\times & \times & \times & \times & \sum_{j} A_{1,j} & \times & \times \\
\times & \times & \times & \times & \sum_{j} A_{2,j} & \times & \times \\
\times & \times & \times & \times & \vdots & \times & \times \\
\times & \times & \times & \times & \sum_{j} A_{N-1,j} & \times & \times \\
\sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} &
\sum_{i,j}A_{i,j} & \sum_{i}A_{i,N+1} & \dots \\
\times & \times & \times & \times & \sum_{j} A_{N+1,j} & \times & \times \\
\times & \times & \times & \times & \vdots & \times & \times
\end{array}\right)
\end{equation}
Only the \(N^{th}\) column and the \(N^{th}\) row are affected by the transformation.
Applying now the unicity condition (\ref{eq:unicity2}) the final transformed system
reads:
\begin{equation}
\left(\begin{array}{lllllll}
1 & 0 & \dots & 0 & 0 & 0 & 0 \\
0 & 1 & \dots & 0 & 0 & 0 & 0 \\
0 & 0 & \ddots & 0 & \vdots & 0 & 0 \\
0 & 0 & \dots & 1 & 0 & 0 & 0 \\
0 & 0 & \dots & 0 & \sum_{i,j}A_{i,j} & \sum_{i}A_{i,N+1} & \dots \\
0 & 0 & \dots & 0 & \sum_{j} A_{N+1,j} & \times & \times \\
0 & 0 & \dots & 0 & \vdots & \times & \times
\end{array}\right)
\left(\begin{array}{l}
\hat u_1 \\
\hat u_2 \\
\vdots\\
\hat u_{N-1}\\
\hat u_{N} \\
u_{N+1} \\
\vdots
\end{array}\right) =
\left(\begin{array}{l}
0 \\
0 \\
\vdots\\
0 \\
\sum_{i} b_{i} \\
b_{N+1} \\
\vdots
\end{array}\right).
\end{equation}
\section{{\tt MATRIX} Reference}
\label{matRef}
The following conventions are adopted in the routine descriptions:
\begin{itemize}
\item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)}
declares a variable which can be of type {\tt gemat} or {\tt zgemat}.
\item The symbol ``$|$'' is the logical {\tt OR} operator. Thus
\begin{verbatim}
TYPE([z]gemat|[z]gbmat) :: mat
\end{verbatim}
declares that {\tt mat} can be either of type {\tt gemat}, {\tt
zgemat}, {\tt pbmat} or {\tt zpbmat}.
\item In a same declaration block, if a scalar or array of type {\tt
DOUBLE PRECISION|COMPLEX} is declared together with a matrix object
which can be also complex, both should be either real
or complex. For example in the routine {\tt updtmat}, if {\tt mat}
of type {\tt zgbmat}, {\tt val} should be complex.
\end{itemize}
\subsection{init}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
TYPE([z]gemat) :: mat
SUBROUTINE init(n, nterms, mat ,kmat)
TYPE([z]gbmat) :: mat
SUBROUTINE init(kl, ku, n, nterms, mat, kmat)
TYPE([z]pbmat) :: mat
SUBROUTINE init(ku, n, nterms, mat, kmat)
TYPE([z]periodic_mat) :: mat
SUBROUTINE init(kl, ku, n, nterms, mat, kmat)
INTEGER, INTENT(in) :: kl, ku, n, nterms
INTEGER, OPTIONAL :: kmat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Initialize data structure for matrix
\item[Arguments:] \mbox{}
\begin{verbatim}
n : rank of matrix
kl, ku : number of sub and super diagonals
nterms : number of terms in weak form
kmat : matrix id
mat : matrix object
\end{verbatim}
\end{description}
\subsection{destroy}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE destroy(mat)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Free matrix memory
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{updmat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE updtmat(mat, i, j, val)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Update (accumulate) element $A_{ij}$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : input value
\end{verbatim}
\end{description}
\subsection{putele}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putele(mat, i, j, val)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite element $A_{ij}$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : input value
\end{verbatim}
\end{description}
\subsection{putrow}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putrow(mat, i, arr)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: i
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite a matrix row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
arr : input array
\end{verbatim}
\end{description}
\subsection{putcol}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putcol(mat, j, arr)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite a matrix row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
j : column index
arr : input array
\end{verbatim}
\end{description}
\subsection{getele}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getele(mat, i, j, val)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get element $A_{ij}$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : output value
\end{verbatim}
\end{description}
\subsection{getrow}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getrow(mat, i, arr)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: i
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get a matrix row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
arr : output array
\end{verbatim}
\end{description}
\subsection{getcol}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getcol(mat, j, arr)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
INTEGER, INTENT(IN) :: j
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get a matrix column
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : column index
arr : output array
\end{verbatim}
\end{description}
\subsection{vmx}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
FUNCTION vmx(mat, x)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
DOUBLE PRECISION|COMPLEX, DIMENSION(:), INTENT(in) :: x
DOUBLE PRECISION|COMPLEX, DIMENSION(SIZE(x)) :: vmx
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Matrix-vector product $Ax$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
x : input array
vmx : output array
\end{verbatim}
\end{description}
\subsection{mcopy}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE mcopy(mata, matb)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Matrix copy: $B = A$
\item[Arguments:] \mbox{}
\begin{verbatim}
mata : input matrix object
matb : output matrix object
\end{verbatim}
\end{description}
\subsection{maddto}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE maddto(mata, alpha, matb)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mata, matb
DOUBLE PRECISION|COMPLEX : alpha
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Matrix addition: $A \leftarrow A+\alpha B$
\item[Arguments:] \mbox{}
\begin{verbatim}
mata : input matrix object
matb : output matrix object
alpha : input scalar
\end{verbatim}
\end{description}
\subsection{determinant}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE determinant(mat, base, pow)
TYPE([z]gemat|[z]gbmat|[z]pbmat) :: mat
INTEGER, INTENT(out) :: pow
DOUBLE PRECISION|COMPLEX : base
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Returns the determinant of matrix as $D = \text{base}\times 10^{\text{pow}}$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : input matrix object
base : mantissa of determinant
pow : exponent of determinant
\end{verbatim}
\end{description}
\subsection{factor}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE factor(mat)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
LU (Cholesky for symmetric/hermitian matrix) factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : inout matrix object
\end{verbatim}
\end{description}
\subsection{bsolve}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE bsolve(mat)
TYPE([z]gemat|[z]gbmat|[z]pbmat|[z]periodic_mat) :: mat
DOUBLE PRECISION|COMPLEX, DIMENSION [(:)] :: rhs
DOUBLE PRECISION|COMPLEX, DIMENSION [(:),] OPTIONAL, INTENT (out) :: sol
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Solve the linear system using the factored matrix, for a single or
multiple right-hand-side
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : input factored matrix object
rhs : input right-hand-side, overwriten by the solution if sol is not present
sol : contains solution
\end{verbatim}
\end{description}
\section{{\tt SPMAT} Reference}
\label{spmatRef}
The following conventions are adopted in the routine descriptions:
\begin{itemize}
\item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)}
declares a variable which can be of type {\tt gemat} or {\tt zgemat}.
\item The symbol ``$|$'' is the logical {\tt OR} operator. Thus
\begin{verbatim}
TYPE([z]gemat|[z]gbmat) :: mat
\end{verbatim}
declares that {\tt mat} can be either of type {\tt gemat}, {\tt
zgemat}, {\tt pbmat} or {\tt zpbmat}.
\item In a same declaration block, if a scalar or array of type {\tt
DOUBLE PRECISION|COMPLEX} is declared together with a matrix object
which can be also complex, both should be either real
or complex. For example in the routine {\tt updtmat}, if {\tt mat}
of type {\tt zgbmat}, {\tt val} should be complex.
\end{itemize}
\subsection{init}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE init(n, mat, istart, iend)
TYPE([z]spmat) :: mat
INTEGER, INTENT(in), OPTIONAL :: istart, iend
INTEGER, INTENT(in) :: n
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Initialize an empty sparse matrix of $n$ rows.
\item[Arguments:] \mbox{}
\begin{verbatim}
n : rank of matrix
mat : matrix object
istart, iend : range of row indices. By default istart=1, iend=n
\end{verbatim}
\end{description}
\subsection{destroy}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE destroy(mat)
TYPE([z]spmat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Free matrix memory
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{updmat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE updtmat(mat, i, j, val)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Update (accumulate) an existing element $A_{ij}$ or insert it in the
linked list in an increasing order in the column index j.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : input value
\end{verbatim}
\end{description}
\subsection{putele}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putele(mat, i, j, val, nlforce_zero)
TYPE([z]pbmat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: val
LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite an existing element $A_{ij}$ or insert it in the
linked list in an increasing order in the column index j.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : input value
nlforce_zero : Never remove an existing element when input is zero if TRUE
FALSE by default
\end{verbatim}
\end{description}
\subsection{putrow}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putrow(mat, i, arr, col, nlforce_zero)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: i
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
INTEGER, INTENT(in), OPTIONAL :: col(:)
LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite a matrix row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
arr : input array
col : input array containing column indices
nlforce_zero : Never remove an existing element when input is zero if TRUE
FALSE by default
\end{verbatim}
\end{description}
\subsection{putcol}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE putcol(mat, j, arr, nlforce_zero)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: j
DOUBLE PRECISION|COMPLEX, INTENT(IN) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Overwrite a matrix row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
j : column index
arr : input array
nlforce_zero : Never remove an existing non-zero element if .TRUE.
.FALSE. by default
\end{verbatim}
\end{description}
\subsection{getele}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getele(mat, i, j, val)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: i, j
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: val
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get element $A_{ij}$
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
j : column index
val : output value
\end{verbatim}
\end{description}
\subsection{getrow}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getrow(mat, i, arr, col)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: i
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
INTEGER, INTENT(out), OPTIONAL :: col(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get a matrix row and optionally the column indices
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : row index
arr : output array
col : output array containing column indices
\end{verbatim}
\end{description}
\subsection{getcol}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE getcol(mat, j, arr)
TYPE([z]spmat) :: mat
INTEGER, INTENT(IN) :: j
DOUBLE PRECISION|COMPLEX, INTENT(OUT) :: arr(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Get a matrix column
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
i : column index
arr : output array
\end{verbatim}
\end{description}
\subsection{get\_count}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
INTEGER FUNCTION get_count(mat, nnz)
TYPE([z]spmat) :: mat
INTEGER, INTENT(out), OPTIONAL :: nnz(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Returns the number of non-zeros and optionally an array of numbers
of non-zeros on each row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nnz : array containing numbers of non-zeros on each row.
\end{verbatim}
\end{description}
\section{{\tt PARDISO\_BSPLINES} Reference}
\label{pardisoRef}
The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
getcol, vmx, mcopy, maddto} and {\tt destroy}
have \emph{exactly} the same list of arguments as
those from the {\tt MATRIX} module (as documented in
Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
that have different arguments. The same conventions as before are used
for the routine description.
\subsection{init}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
& nlforce_zero)
INTEGER, INTENT(in) :: n, nterms
TYPE([z]pardiso_mat) :: mat
INTEGER, OPTIONAL, INTENT(in) :: kmat
LOGICAL, OPTIONAL, INTENT(in) :: nlsym
LOGICAL, OPTIONAL, INTENT(in) :: nlpos
LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Initialize the PARDISO solver. A SPMAT matrix of $n$ empty rows is initialized.
\item[Arguments:] \mbox{}
\begin{verbatim}
n : rank of matrix
nterms : number of terms in weak form
kmat : matrix id
mat : matrix object
nlsym : symmetric or not. Default is .FALSE.
nlherm : Hermitian or not for complex matrix . Default is .FALSE.
nlpos : Positive-definite or not. Default is .TRUE.
nlforce_zero : Never remove an existing non-zero element if .TRUE.
.TRUE. by default
\end{verbatim}
\end{description}
\subsection{clear\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE clear_mat(mat)
TYPE([z]pardiso_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Clear matrix, keeping its sparse structure unchanged
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{psum\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE sum_mat(mat, comm)
TYPE([z]pardiso_mat) :: mat
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Parallel sum of matrices. Result matrix is placed in the sparse
matrix mat\%mat on all processes of comm.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
comm : communicator
\end{verbatim}
\end{description}
\subsection{p2p\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
TYPE([z]pardiso_mat) :: mat
INTEGER, INTENT(in) :: dest
CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt')
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Point-to-point combine sparse matrix between 2 processes.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
dest : rank of remote process
extyp : exchange type ('send', 'recv', 'sendrecv')
op : operation type ('put', 'updt')
comm : communicator
\end{verbatim}
\end{description}
\subsection{get\_count}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
INTEGER FUNCTION get_count(mat, nnz)
TYPE([z]pardiso_mat) :: mat
INTEGER, INTENT(out), OPTIONAL :: nnz(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Returns the number of non-zeros and optionally an array of numbers
of non-zeros on each row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nnz : array containing numbers of non-zeros on each row.
\end{verbatim}
\end{description}
\subsection{factor}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE factor(mat, nlreord, nlmetis, debug)
TYPE([z]pardiso_mat) :: mat
LOGICAL, OPTIONAL, INTENT(in) :: nlreord
LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
LOGICAL, OPTIONAL, INTENT(in) :: debug
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nlreord : call reord_mat if .TRUE. (default is .TRUE.)
nlmetis : use METIS nested dissection for reoredering. Default
is minimum degree alogorithm.
debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\subsection{bsolve}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE bsolve_pardiso_mat1(mat, rhs, sol, nref, debug)
TYPE([z]pardiso_mat) :: mat
DOUBLE PRECISION|COMPLEX :: rhs(:)
DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
INTEGER, OPTIONAL :: nref
LOGICAL, OPTIONAL, INTENT(in) :: debug
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
rhs : input right-hand-side, overwriten by the solution if sol is not present
sol : contains solution
ref : maximum number of refinement steps. Default is 0 (no refinement).
debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\subsection{to\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE to_mat(mat)
TYPE([z]pardiso_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Convert linked list spmat to pardiso matrix structure
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{reord\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE reord_mat(mat, nlmetis, debug)
TYPE([z]pardiso_mat) :: mat
LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
LOGICAL, OPTIONAL, INTENT(in) :: debug
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Reordering and symbolic factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nlmetis : use METIS nested dissection for reoredering. Default
is minimum degree alogorithm.
debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\subsection{numfact}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE numfact(mat, debug)
TYPE([z]pardiso_mat) :: mat
LOGICAL, OPTIONAL, INTENT(in) :: debug
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Numerical factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\section{{\tt [P]WSMP\_BSPLINES} Reference}
\label{wsmpRef}
The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
getcol, vmx, mcopy, maddto} and {\tt destroy}
have \emph{exactly} the same list of arguments as
those from the {\tt MATRIX} module (as documented in
Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
that have different arguments. The same conventions as before are used
for the routine description.
\subsection{init}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
& nlforce_zero, [comm_in])
INTEGER, INTENT(in) :: n, nterms
TYPE([z]wsmp_mat) :: mat
INTEGER, OPTIONAL, INTENT(in) :: kmat
LOGICAL, OPTIONAL, INTENT(in) :: nlsym
LOGICAL, OPTIONAL, INTENT(in) :: nlpos
LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
INTEGER, OPTIONAL, INTENT(in) :: comm_in
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Initialize the WSMP solver. A SPMAT matrix of $n$ empty rows is initialized.
\item[Arguments:] \mbox{}
\begin{verbatim}
n : rank of matrix
nterms : number of terms in weak form
kmat : matrix id
mat : matrix object
nlsym : symmetric or not. Default is .FALSE.
nlherm : Hermitian or not for complex matrix . Default is .FALSE.
nlpos : Positive-definite or not. Default is .TRUE.
nlforce_zero : Never remove an existing non-zero element if .TRUE.
.TRUE. by default
comm_in : MPI communicator. By default MPI_COMM_WORLD (only in PWSMP_BSPLINES)
\end{verbatim}
\end{description}
\subsection{clear\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE clear_mat(mat)
TYPE([z]wsmp_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Clear matrix, keeping its sparse structure unchanged
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{psum\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE sum_mat(mat, comm)
TYPE([z]wsmp_mat) :: mat
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Parallel sum of matrices. Result matrix is placed in the sparse
matrix mat\%mat on all processes of comm.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
comm : communicator
\end{verbatim}
\end{description}
\subsection{p2p\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
TYPE([z]wsmp_mat) :: mat
INTEGER, INTENT(in) :: dest
CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt')
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Point-to-point combine sparse matrix between 2 processes.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
dest : rank of remote process
extyp : exchange type ('send', 'recv', 'sendrecv')
op : operation type ('put', 'updt')
comm : communicator
\end{verbatim}
\end{description}
\subsection{get\_count}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
INTEGER FUNCTION get_count(mat, nnz)
TYPE([z]wsmp_mat) :: mat
INTEGER, INTENT(out), OPTIONAL :: nnz(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Returns the number of non-zeros and optionally an array of numbers
of non-zeros on each row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nnz : array containing numbers of non-zeros on each row.
\end{verbatim}
\end{description}
\subsection{factor}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE factor(mat, nlreord)
TYPE([z]wsmp_mat) :: mat
LOGICAL, OPTIONAL, INTENT(in) :: nlreord
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nlreord : call reord_mat if .TRUE. (default is .TRUE.)
\end{verbatim}
\end{description}
\subsection{bsolve}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref)
TYPE([z]wsmp_mat) :: mat
DOUBLE PRECISION|COMPLEX :: rhs(:)
DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
INTEGER, OPTIONAL :: nref
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
rhs : input right-hand-side, overwriten by the solution if sol is not present
sol : contains solution
ref : maximum number of refinement steps. Default is 0 (no refinement).
debug : verbose output from WSMP if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\subsection{to\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE to_mat(mat)
TYPE([z]wsmp_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Convert linked list spmat to wsmp matrix structure
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{reord\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE reord_mat(mat)
TYPE([z]wsmp_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Reordering and symbolic factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{numfact}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE numfact(mat)
TYPE([z]wsmp_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Numerical factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\section{{\tt MUMPS\_BSPLINES} Reference}
\label{mumpsRef}
The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
getcol, vmx, mcopy, maddto} and {\tt destroy}
have \emph{exactly} the same list of arguments as
those from the {\tt MATRIX} module (as documented in
Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
that have different arguments. The same conventions as before are used
for the routine description.
\subsection{init}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
& nlforce_zero, comm_in)
INTEGER, INTENT(in) :: n, nterms
TYPE([z]mumps_mat) :: mat
INTEGER, OPTIONAL, INTENT(in) :: kmat
LOGICAL, OPTIONAL, INTENT(in) :: nlsym
LOGICAL, OPTIONAL, INTENT(in) :: nlpos
LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
INTEGER, OPTIONAL, INTENT(in) :: comm_in
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Initialize the MUMPS solver. A SPMAT matrix of $n$ empty rows is initialized.
\item[Arguments:] \mbox{}
\begin{verbatim}
n : rank of matrix
nterms : number of terms in weak form
kmat : matrix id
mat : matrix object
nlsym : symmetric or not. Default is .FALSE.
nlherm : Hermitian or not for complex matrix . Default is .FALSE.
nlpos : Positive-definite or not. Default is .TRUE.
nlforce_zero : Never remove an existing non-zero element if .TRUE.
.TRUE. by default
comm_in : MPI communicator. By default MPI_COMM_SELF (serial mode).
\end{verbatim}
\end{description}
\subsection{clear\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE clear_mat(mat)
TYPE([z]mumps_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Clear matrix, keeping its sparse structure unchanged
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{psum\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE sum_mat(mat, comm)
TYPE([z]mumps_mat) :: mat
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Parallel sum of matrices. Result matrix is placed in the sparse
matrix mat\%mat on all processes of comm.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
comm : communicator
\end{verbatim}
\end{description}
\subsection{p2p\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
TYPE([z]mumps_mat) :: mat
INTEGER, INTENT(in) :: dest
CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt')
INTEGER, INTENT(in) :: comm
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Point-to-point combine sparse matrix between 2 processes.
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
dest : rank of remote process
extyp : exchange type ('send', 'recv', 'sendrecv')
op : operation type ('put', 'updt')
comm : communicator
\end{verbatim}
\end{description}
\subsection{get\_count}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
INTEGER FUNCTION get_count(mat, nnz)
TYPE([z]mumps_mat) :: mat
INTEGER, INTENT(out), OPTIONAL :: nnz(:)
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Returns the number of non-zeros and optionally an array of numbers
of non-zeros on each row
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nnz : array containing numbers of non-zeros on each row.
\end{verbatim}
\end{description}
\subsection{factor}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE factor(mat, nlreord)
TYPE([z]mumps_mat) :: mat
LOGICAL, OPTIONAL, INTENT(in) :: nlreord
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
nlreord : call reord_mat if .TRUE. (default is .TRUE.)
\end{verbatim}
\end{description}
\subsection{bsolve}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE bsolve_mumps_mat1(mat, rhs, sol, nref)
TYPE([z]mumps_mat) :: mat
DOUBLE PRECISION|COMPLEX :: rhs(:)
DOUBLE PRECISION|COMPLEX, OPTIONAL :: sol(:)
INTEGER, OPTIONAL :: nref
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Wrapper of to\_mat, reord\_mat and numfact
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
rhs : input right-hand-side, overwriten by the solution if sol is not present
sol : contains solution
ref : maximum number of refinement steps. Default is 0 (no refinement).
debug : verbose output from MUMPS if .TRUE. Default is .FALSE.
\end{verbatim}
\end{description}
\subsection{to\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE to_mat(mat)
TYPE([z]mumps_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Convert linked list spmat to mumps matrix structure
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{reord\_mat}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE reord_mat(mat)
TYPE([z]mumps_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Reordering and symbolic factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\subsection{numfact}
\par
\addvspace{\medskipamount}
\nopagebreak\hrule
\begin{verbatim}
SUBROUTINE numfact(mat)
TYPE([z]mumps_mat) :: mat
\end{verbatim}
\nopagebreak\hrule
\addvspace{\medskipamount}
\begin{description}
\item[Purpose:] \mbox{}
Numerical factorization
\item[Arguments:] \mbox{}
\begin{verbatim}
mat : matrix object
\end{verbatim}
\end{description}
\begin{thebibliography}{99}
\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide.
\bibitem{PARDISO} \url{http://www.pardiso-project.org/}
\bibitem{WSMP} \url{http://www-users.cs.umn.edu/~agupta/wsmp.html}
\bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/}
\bibitem{McMillan} B. F. McMillan, et. al. \emph{Rapid Fourier space
solution of linear partial integro-differential equations in
toroidal magnetic confinement geometries}, Computer Physics
Communications 181(4),
715-719 (2010)
\end{thebibliography}
\end{document}

Event Timeline