solvers.tex
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Wed, May 29, 01:22

solvers.tex
View Options

	%
	% @file solvers.tex
	%
	% @brief
	%
	% @copyright
	% Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
	% SPC (Swiss Plasma Center)
	%
	% SPClibs is free software: you can redistribute it and/or modify it under
	% the terms of the GNU Lesser General Public License as published by the Free
	% Software Foundation, either version 3 of the License, or (at your option)
	% any later version.
	%
	% SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
	% WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
	%
	% You should have received a copy of the GNU Lesser General Public License
	% along with this program. If not, see <https://www.gnu.org/licenses/>.
	%
	% @author
	% (in alphabetical order)
	% @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
	%
	\documentclass[a4paper]{article}
	\usepackage{linuxdoc-sgml}
	\usepackage{graphicx}
	\usepackage{hyperref}
	\usepackage{amsmath}
	%\usepackage{verbatim}
	%\usepackage[notref]{showkeys}

	\title{\tt The Solvers in BSPLINES}
	\author{Trach-Minh Tran}
	\date{v0.6, December 2011}
	\abstract{Implementation of a common simple interface to popular
	solver packages (LAPACK, PARDISO, WSMP, PETSc, etc.). The main goal is to
	provide an easy access to these packages in order to solve elliptic and
	parabolic as well as some types of integro-differential
	equations.}

	\begin{document}
	\maketitle
	\tableofcontents

	\section{Matrix form of the problem}
	\subsection{Getting started}
	\subsubsection{A one-dimensional problem}

	Let us start with the one-dimensional Sturm-Liouville differential
	equation:

	\begin{equation*}
	-\frac{d}{dx} \left[C_1(x) \frac{d\phi}{dx}\right] + C_2(x)\phi = \rho,
	\end{equation*}
	on the domain $0\leq x \leq L$ with suitable boundary conditions.
	On a grid with $N$ intervals, the discretized solution $\phi$, using
	the splines $\Lambda_i(x)$ of order $p$ can be written as
	\begin{equation}
	\label{sol1d}
	\phi(x) = \sum_{i=0}^{d-1} \phi_i\Lambda_i(x),
	\end{equation}
	where \cite{BSPLINES}
	\begin{equation*}
	d =
	\begin{cases}
	N & \text{if $\phi$ is periodic}, \\
	N+p & \text{otherwise},
	\end{cases}
	\end{equation*}
	and $\phi_i$ are the unknowns of the following matrix equation:
	\begin{equation}
	\label{matEq1d}
	\sum_{i'=0}^{d-1} A_{ii'}\phi_{i'} = \rho_i, \qquad i=0,\ldots, d-1.
	\end{equation}
	Here the matrix $A_{ii'}$ and the right-hand-side $\rho_i$ are
	respectively given by:
	\begin{equation}
	\begin{split}
	\label{matCoef1d}
	A_{ii'} =& \int_{0}^{L}\!dx\,C_1\Lambda_{i}^{'}\Lambda_{i'}^{'}
	+ \int_{0}^{L}\!dx\,C_2\Lambda_{i}\Lambda_{i'}, \\
	\rho_i =& \int_{0}^{L}dx \rho\Lambda_i.\\
	\end{split}
	\end{equation}
	For more general differential operators, the matrix coefficients $A_{ii'}$ can be
	written as a sum of contributing matrices of the form
	\begin{equation}
	\label{mat1d}
	A_{ii'} = \int_{0}^{L}\!dx\,C\Lambda_{i}^{\alpha}\Lambda_{i'}^{\alpha'},
	\end{equation}
	where $\Lambda_{i}^{\alpha}$ denotes the $\alpha^\text{th}$
	derivative of $\Lambda_{i}$.
	As the splines $\Lambda_i$ have a support of $p+1$ intervals, the
	matrix is sparse and its
	coefficients $A_{ii'}$ are non-zero only for $\|i-i'\| \leq p$: hence the
	matrix has a band structure of bandwidth equal to $2p+1$
	if the operator is purely differential. For an
	integral equation such as
	\begin{equation*}
	\int_{0}^{L}\!dx' K(x,x')\phi(x') = \rho(x),
	\end{equation*}
	the discretization results in a \emph{dense} matrix of the form
	\begin{equation}
	\label{matIntg1d}
	A_{ii'} = \int_{0}^{L}\!dx \Lambda_{i}(x) \int_{0}^{L}\!dx'
	K(x,x')\Lambda_{i'}(x').
	\end{equation}
	Note that when the kernel is separable $K(x,x') = U(x)V(x')$, the
	matrix $A_{ii'}$ is a \emph{dyadic}:
	\begin{equation}
	A_{ii'} = \int_{0}^{L}\!dx U(x) \Lambda_{i}(x) \int_{0}^{L}\!dx
	V(x)\Lambda_{i'}(x) = U_iV_{i'}.
	\end{equation}

	\subsubsection{Periodic boundary conditions}
	The splines $\Lambda_i$ are $N$-periodic ($\Lambda_{i+N}(x)=\Lambda_i(x-L)$). This
	property can be easily enforced while constructing both $\rho_i$
	and the matrix $A_{ii'}$. This results in a solution
	$\phi_i$ which is also $N$-periodic.

	\subsubsection{Non-periodic boundary conditions}
	In {\tt BSPLINES} \cite{BSPLINES}, the constructed non-periodic splines are such that at the
	boundaries $x=0$ and $x=L$:
	\begin{equation}
	\Lambda_i(0) = \delta_{i,0}, \qquad \Lambda_i(L) = \delta_{i,N+p-1},
	\end{equation}
	which imply that, using (\ref{sol1d})
	\begin{equation}
	\phi(0) = \phi_0, \qquad \phi(L) = \phi_{N+p-1}.
	\end{equation}
	It is thus possible to impose the Dirichlet boundary conditions
	by a simple modification of the matrix $A_{ii'}$ as shown in
	Appendix~\ref{DirichletCond}.


	\subsection{Problems in more dimensions}
	\subsubsection{Two-dimensional equations}
	The results obtained above can be extended in a
	straightforward manner. Assuming, for example a
	\emph{polar like} $(r,\theta)$ coordinate system,
	with the discretized solution and the right-hand side
	written as:
	\begin{equation}
	\label{discreteEq2d}
	\begin{split}
	\phi(r,\theta) &= \sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1}
	\phi_{ij}\Lambda_i(r) \Lambda_j(\theta) \\
	\rho_{ij} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta J(r,\theta) \rho(r,\theta)
	\Lambda_i(r) \Lambda_j(\theta),\\
	\end{split}
	\end{equation}
	where $J(r,\theta)$ is the Jacobian, the matrix equation to solve is
	\begin{equation}
	\sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}
	A_{iji'j'}\phi_{i'j'} = \rho_{ij},
	\end{equation}
	with the matrix $A_{iji'j'}$ expressed as a sum of matrices of the
	form:
	\begin{equation}
	\label{mat2d}
	A_{iji'j'} = \int_{0}^{R}\!dr \int_{0}^{2\pi}\!d\theta\,
	C(r,\theta)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\,
	\Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta).
	\end{equation}

	\subsubsection{Three-dimensional equations}
	Likewise, for the three-dimension case, assuming for example a
	\emph{toroidal like} $(r,\theta, \varphi)$ coordinate system,
	with the discretized solution and the right-hand side
	written as:
	\begin{equation}
	\label{discreteEq3d}
	\begin{split}
	\phi(r,\theta, \varphi) &=
	\sum_{i=0}^{N_r+p_r-1}\sum_{j=0}^{N_\theta-1}
	\sum_{k=0}^{N_\varphi-1}
	\phi_{ijk}\Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi) \\
	\rho_{ijk} &= \int_{0}^{R}\!dr \int_{0}^{2\pi} \!d\theta
	\int_{0}^{2\pi} \!d\varphi J(r,\theta,\varphi) \rho(r,\theta,\varphi)
	\Lambda_i(r) \Lambda_j(\theta) \Lambda_k(\varphi),\\
	\end{split}
	\end{equation}
	where $J(r,\theta,\varphi)$ is the Jacobian, the matrix equation to solve is
	\begin{equation}
	\sum_{i'=0}^{N_r+p_r-1}\sum_{j'=0}^{N_\theta-1}\sum_{k'=0}^{N_\varphi-1}
	A_{ijki'j'k'}\phi_{i'j'k'} = \rho_{ijk},
	\end{equation}
	with the matrix $A_{ijki'j'k'}$ expressed as a sum of matrices of the
	form:
	\begin{equation}
	\label{mat3d}
	A_{ijki'j'k'} = \int_{0}^{R}\!dr
	\int_{0}^{2\pi}\!d\theta\ \int_{0}^{2\pi}\!d\varphi\,
	C(r,\theta,\varphi)\,\Lambda_{i}^{\alpha}(r)\Lambda_{i'}^{\alpha'}(r)\,
	\Lambda_{j}^{\beta}(\theta)\Lambda_{j'}^{\beta'}(\theta)
	\Lambda_{k}^{\gamma}(\varphi)\Lambda_{k'}^{\gamma'}(\varphi).
	\end{equation}

	\subsubsection{Unicity condition}
	In the case of the polar coordinates $(r,\theta)$ considered above,
	the unicity condition on the axis $r=0$ should be imposed. It can be
	enforced by modifications of the matrix $A$ as described in
	Appendix~\ref{unicityCond}.

	\subsubsection{One-dimensional numbering}
	For two-dimensional (three-dimensional) problems, the solution
	$\phi_{ij}$ ($\phi_{ijk}$) as well as the right-hand-side $\rho_{ij}$
	($\rho_{ijk}$) can be conveniently casted into one-dimensional
	arrays. As an example, by numbering first the last index, we obtain the
	following mappings:
	\begin{equation}
	\label{map1d}
	\mu =
	\begin{cases}
	j + iN_\theta & \text{two-dimensional case} \\
	k + (j + iN_\theta)N_\varphi & \text{three-dimensional case} \\
	\end{cases}
	\end{equation}
	Using such a one-dimensional numbering, the matrix equation for the two and
	three dimensional cases takes a more conventional form:
	\begin{equation}
	\sum_{\mu'=0}^{r-1} A_{\mu\mu'} \phi_{\mu'} = \rho_\mu,
	\end{equation}
	with the respective matrix ranks $r=(N_r+p_r)N_\theta$ and
	$r=(N_r+p_r)N_\theta N_\varphi$. For a pure differential operator, the
	matrix $A_{\mu\mu'}$ has a band structure of bandwidth
	$b=2(p_r+1)N_\theta-1$ and $b=2(p_r+1)N_\theta N_\varphi-1$ respectively. It is
	important to note that, except for the one-dimensional problem, there
	are \emph{many} zeros inside the matrix band!

	\section{The module {\tt MATRIX}}
	\subsection{Interface}

	The Fortran module {\tt MATRIX} contains easy-to-use routines to solve the
	matrix equation formulated in the previous section, using the direct
	solvers of LAPACK. The different matrix storage formats are defined,
	using the Fortran derived datatypes. The different types in the
	present version are listed in Table~\ref{matTypes}.

	\begin{table}[h]
	\centering
	\begin{tabular}{\|l\|l\|}\hline
	{\tt gemat} & General dense matrix \\
	{\tt gbmat} & General band matrix \\
	{\tt pbmat} & Symmetric positive-definite band matrix \\
	{\tt periodic\_mat} & Matrix obtained for example in \\
	& one-dimensional periodic problems\\
	\hline
	\end{tabular}
	\caption{The matrix types}
	\label{matTypes}
	\end{table}

	These types define {\tt DOUBLE PRECISION} real matrices.
	{\tt DOUBLE COMPLEX} matrices are declared by prefixing
	these types with the letter ``{\tt z}'', for example {\tt zgbmat}.
	Note that {\tt zpbmat} defines a \emph{hermitian} positive-definite
	complex matrix.
	The \emph{generic} routines are defined for each of these types in Table~\ref{matRoutines}.
	Note that the routine {\tt updtmat} is mainly used for the matrix assembly
	while {\tt getxxx} and {\tt putxxx} are rather used to modify the
	matrix, for example to impose boundary conditions.

	\begin{table}[h]
	\centering
	\begin{tabular}{\|l\|l\|}
	\hline
	{\tt init} & Initializes the data structure \\
	{\tt destroy} & Free the data structure memory \\
	\hline
	{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
	{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
	row $i$, column $j$ \\
	{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
	row $i$, column $j$ \\
	{\tt vmx} & Returns the matrix-vector product \\
	{\tt mcopy} & Copy a matrix to another matrix \\
	{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
	{\tt determinant} & Returns the matrix determinant \\
	\hline
	{\tt factor} & Computes the LU (Cholesky for symmetric/hermitian \\
	& matrix) factorization \\
	{\tt bsolve} & Solves the linear system using the factorized matrix \\
	\hline
	\end{tabular}
	\caption{The generic routines in the {\tt MATRIX} module}
	\label{matRoutines}
	\end{table}
	The complete description of each routine is given in
	Appendix~\ref{matRef}. More information on how to use it can be obtained by
	{\tt greping} its name on the examples found in the {\tt examples/} directory.

	\subsection{A two-dimensional example}
	\label{twodEx}
	Let's consider the Poisson equation using the cylindrical coordinates
	$(r,\theta)$:
	\begin{equation}
	-\frac{1}{r}\frac{\partial}{\partial r}
	\left[r\frac{\partial\phi}{\partial r}\right]
	-\frac{1}{r^2}\frac{\partial^2\phi}{\partial\theta^2} = \rho,
	\qquad \phi(r=1,\theta) = 0.
	\end{equation}

	Assuming the exact solution
	\begin{equation*}
	\phi(r,\theta) = (1-r^2)r^m\cos m\theta,
	\end{equation*}
	the right-hand-side becomes
	\begin{equation*}
	\rho=4(m+1)r^{m}\cos m\theta.
	\end{equation*}
	The matrix and the right hand-side of the discretized problem are computed as

	\begin{equation}
	\begin{split}
	A_{iji'j'} &= \int_{0}^1\!dr \int_{0}^{2\pi}\!d\theta\,\left[
	r\,\Lambda'_{i}(r)\Lambda'_{i'}(r)\,
	\Lambda_{j}(\theta)\Lambda_{j'}(\theta) +
	\frac{1}{r}\,\Lambda_{i}(r)\Lambda_{i'}(r)\,
	\Lambda'_{j}(\theta)\Lambda'_{j'}(\theta) \right] \\
	\rho_{ij} &= \int_{0}^1\!dr \int_{0}^{2\pi} \!d\theta\,\,r \rho(r,\theta)
	\,\,\Lambda_i(r) \Lambda_j(\theta).
	\end{split}
	\end{equation}
	In the example {\tt pde2d.f90} this problem is treated in detail.
	In the following, only the calls to the {\tt MATRIX} routines
	are reviewed to show how the matrix problem is solved using the {\tt
	MATRIX} module.
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	!
	! Declare a General Band matrix
	!
	USE matrix
	USE conmat_mod
	TYPE(gbmat) :: mat
	!
	! Rank and bandwidth. nidbas(1) is the spline order in
	! the first dimension r.
	!
	nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix
	kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals
	ku = kl ! Number of super-diagnonals
	nterms = 2 ! Number of terms in the weak form
	!
	! Initialize the matrix data structure
	!
	CALL init(kl, ku, nrank, nterms, mat)
	!
	! Construct the matrix, using 2D spline splxy
	! and impose boundary conditions
	!
	CALL conmat(splxy, mat, coefeq_poisson)
	CALL ibcmat(mat, ny)
	!
	! Compute the RHS, using the 2D spline splxy
	! and impose boundary conditions
	!
	CALL disrhs(mbess, splxy, rhs)
	CALL ibcrhs(rhs, ny)
	!
	! Factor the matrix and solve
	!
	CALL factor(mat)
	CALL bsolve(mat, rhs, sol)
	!
	...
	CONTAINS
	SUBROUTINE coefeq_poisson(x, y, idt, idw, c)
	DOUBLE PRECISION, INTENT(in) :: x, y
	INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
	DOUBLE PRECISION, INTENT(out) :: c(:)
	!
	c(1) = x
	idt(1,1) = 1; idt(1,2) = 0
	idw(1,1) = 1; idw(1,2) = 0
	!
	c(2) = 1.d0/x
	idt(2,1) = 0; idt(2,2) = 1
	idw(2,1) = 0; idw(2,2) = 1
	END SUBROUTINE coefeq_poisson
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}

	Some explanations and remarks:
	\begin{itemize}
	\item The matrix construction is performed by {\tt conmat} which will
	be described later in section \ref{secCONMAT}. The
	\emph{weak form} is defined in the \emph{internal}
	procedure {\tt coefeq\_poisson} and passed as an argument to
	{\tt conmat}. See section \ref{secCONMAT} for a detailed
	description of the variables {\tt c, idt, idw} returned by
	{\tt coefeq\_poisson}.
	\item Boundary conditions are imposed by modifications
	of the matrix in subroutine {\tt ibcmat} (see file {\tt the
	ibcmat.f90}), using the {\tt MATRIX}
	routines {\tt getrow, putrow, getcol, putcol}.
	\item The construction of the right-hand-side in {\tt disrhs} (see the
	file {\tt disrhs.f90}) is computed using a Gauss quadrature..
	\item Using the {\tt pbmat} type instead of {\tt gbmat} (the matrix in
	this example is symmetric and positive-definite!) requires only a few
	modifications of the program (see the complete example {\tt
	pde2d\_pb.f90}):
	\begin{itemize}
	\item Change the type {\tt gbmat} to {\tt pbmat} in all matrix declarations
	\item Change the list of arguments in the routine {\tt init} to
	({\tt ku, nrank, nterms, mat})
	\item Small changes in the boundary conditions ({\tt ibcmat} and {\tt
	ibcrhs}) to take into account the symmetry.
	\end{itemize}
	\item The module {\tt MATRIX} can be used independently of {\tt
	BSPLINES} (which is used here only to compute the matrix and
	right-hand-side), for example in a problem discretized using
	Finite Differences.
	\end{itemize}

	\section{Sparse matrix storage}
	Using the \emph{band matrix format} for a pure differential operator
	requires to store a full bandwidth $b=2(p_r+1)N_\theta-1$ for the
	two-dimensional problem as shown in section 1, while there are only
	$(2p_r+1)^2$ non-zero elements per matrix row. In three-dimensional
	problem, it is much worse since $b=2(p_r+1)N_\theta N\varphi-1$ for
	$(2p_r+1)^3$ non-zero elements.

	In order to reduce the matrix storage, a solution consists of just
	storing the matrix non-zero elements and use the \emph{Sparse Direct
	Solvers}. With an optimal \emph{renumbering} strategy (or
	\emph{fill-in reducing ordering}), the
	size of the factored matrix can be expected to be smaller than the
	corresponding band matrix.

	An alternative is to use \emph{iterative} methods which usually need
	less memory.

	Such a sparse matrix is implemented in the {\tt SPARSE} module where
	each matrix row is represented by a \emph{\tt linked} list of elements
	with sorted column index. The data structure of this sparse matrix is
	wrapped up in a the Fortran data type {\tt spmat} for a real matrix and
	{\tt zspmat} for a complex matrix. Most of the generic routines which
	are already defined in the {\tt MATRIX} module are overloaded for these
	matrix types. They are listed in Table~\ref{spmatRoutines}. The complete
	documentation of these routines can be found in Appendix~\ref{spmatRef}.

	\begin{table}[h]
	\centering
	\begin{tabular}{\|l\|l\|}
	\hline
	\emph{Matrix types} & {\tt spmat, zspmat} \\
	\hline\hline
	{\tt init} & Initializes the data structure \\
	{\tt destroy} & Free the data structure memory \\
	\hline
	{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
	{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
	row $i$, column $j$ \\
	{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
	row $i$, column $j$ \\
	\hline
	{\tt get\_count} & Get the number of non-zero elements in matrix \\
	\hline
	\end{tabular}
	\caption{The generic routines in the {\tt SPARSE} module}
	\label{spmatRoutines}
	\end{table}

	It should be noted that this module is \emph{not} used
	directly in solver problems. One usually uses instead modules which
	are specific to a type of (direct or iterative) solver. As will be
	shown in the next section, it is the routines in this solver module which
	directly calls the routines defined in the {\tt SPARSE} module during the matrix
	assembly.

	\section{Solvers using the module {\tt SPARSE}}
	All the solvers discussed in this section use initially the module
	{\tt SPARSE} to construct the sparse matrix. Once this construction
	procedure is complete, this matrix is converted to the (usually more
	efficient) format used by the solver.
	In a time-dependent simulation where the problem matrix
	changes but not the sparsity pattern, the subsequent matrix assembly
	will be performed directly on this solver's format.

	Thus for example, the first time {\tt updtmat} is called
	on a new matrix, it is the version from {\tt SPARSE}. Next, if
	{\tt updtmat} is called again to modify the matrix, it will be the
	solver's version, unless the matrix is re-initialized by a call to
	{\tt destroy} followed by {\tt init}. This switch is completely
	transparent for the user as shown through an example in the next
	section.

	\subsection{The PARDISO direct solver}
	The interface to PARDISO~\cite{PARDISO} is implemented in the
	module {\tt PARDISO\_BSPLINES}.
	The matrix type (symmetric, hermitian, positive-definite) is set
	by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the
	generic routine {\tt init}. All the other generic routines defined in
	the module {MATRIX}, plus routines specific to the sparse solver,
	are listed in Table~\ref{pardisoRoutines}. The complete documentation
	of these routines is given in Appendix~\ref{pardisoRef}.


	\begin{table}[h]
	\centering
	\begin{tabular}{\|l\|l\|}
	\hline
	\emph{Matrix types} & {\tt pardiso\_mat, zpardiso\_mat} \\
	\hline\hline
	{\tt init} & Initializes the data structure \\
	{\tt destroy} & Free the data structure memory \\
	\hline
	{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
	{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
	row $i$, column $j$ \\
	{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
	row $i$, column $j$ \\
	{\tt vmx} & Returns the matrix-vector product \\
	{\tt mcopy} & Copy a matrix to another matrix \\
	{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
	{\tt clear\_mat}& Set the matrix elements to zero.\\
	{\tt psum\_mat} & Parallel sum of matrices \\
	{\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\
	{\tt get\_count}& Get the number of non-zero elements in matrix \\
	\hline
	{\tt factor} & Factorization \\
	{\tt bsolve} & Solves the linear system using the factorized matrix \\
	{\tt to\_mat} & Convert to PARDISO CSR matrix format \\
	{\tt reord\_mat}& Reordering and symbolic factorization \\
	{\tt numfact} & Numerical factorization \\
	\hline
	\end{tabular}
	\caption{The generic routines in the {\tt PARDISO\_BSPLINES} module}
	\label{pardisoRoutines}
	\end{table}

	Below, a complete example solving a simple
	two-dimensional Poisson discretized by the 5 point Finite Difference
	method illustrates how to use the {\tt PARDISO\_BSPLINES} module.
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	PROGRAM main
	USE pardiso_bsplines
	IMPLICIT NONE
	TYPE(pardiso_mat) :: amat
	DOUBLE PRECISION, ALLOCATABLE :: rhs(:), sol(:), arow(:)
	INTEGER :: nx=5, ny=4
	INTEGER :: n, nnz
	INTEGER :: i, j, irow, jcol
	!
	WRITE(*,'(a)', advance='no') 'Enter nx, ny: '
	READ(,) nx, ny
	n = nx*ny ! Rank of the matrix
	ALLOCATE(rhs(n))
	ALLOCATE(sol(n))
	ALLOCATE(arow(n))
	!
	CALL init(n, 1, amat, nlsym=.TRUE.) ! Pardiso mat, symmetric case
	!
	! Construct the matrix and RHS
	!
	DO j=1,ny
	DO i=1,nx
	arow = 0.0d0
	irow = numb(i,j)
	arow(irow) = 4.0d0
	IF(i.GT.1) arow(numb(i-1,j)) = -1.0d0
	IF(i.LT.nx) arow(numb(i+1,j)) = -1.0d0
	IF(j.GT.1) arow(numb(i,j-1)) = -1.0d0
	IF(j.LT.ny) arow(numb(i,j+1)) = -1.0d0
	CALL putrow(amat, irow, arow)
	rhs(irow) = SUM(arow) ! => the exact solution is 1
	END DO
	END DO
	!
	WRITE(*,'(a,i6)') 'Number of non-zeros of matrix', get_count(amat)
	!
	! Factor the amat matrix (Reordering, symbolic and numerical factorization)
	!
	CALL factor(amat)
	!
	! Back solve
	!
	CALL bsolve(amat, rhs, sol)
	!
	! Check solutions
	!
	WRITE(*,'(/a/(10f8.4))') 'Computed sol', sol
	WRITE(*,'(a,1pe12.3)') 'Error', MAXVAL(ABS(sol-1.0d0))
	!
	! Clean up
	!
	DEALLOCATE(rhs)
	DEALLOCATE(sol)
	DEALLOCATE(arow)
	CALL destroy(amat)
	CONTAINS
	INTEGER FUNCTION numb(i,j)
	!
	! One-dimensional numbering
	! Number first x then y
	!
	INTEGER, INTENT(in) :: i, j
	numb = (j-1)*nx + i
	END FUNCTION numb
	END PROGRAM main
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}

	It should be noted that
	\begin{itemize}
	\item The routine {\tt putrow} in the matrix construction loop uses the
	version from the {\tt SPARSE} module to create dynamically the
	matrix row using the linked list.
	\item The routine {\tt factor} calls successively the matrix conversion
	{\tt to\_mat}, the reordering and symbolic factorization routine
	{\tt reord\_mat} and finally the numerical factorization {\tt
	numfact}. One could indeed call these three routines separately
	instead of the single call to {\tt factor},
	\item After solving the linear system, if the matrix is modified by
	calling for example {\tt putrow} again, it will modify
	directly the converted matrix and not on the {\tt spmat} matrix
	which is anyway \emph{destroyed} at the end of {\tt to\_mat}.
	\item If the matrix sparsity changes, the matrix should be
	re-initialized by calling the {\tt destroy} and {\tt init} routines.
	\end{itemize}

	Other examples can be found by running ``{\tt grep pardiso\_mat}''
	on the F90 files in the directory {\tt examples/}.

	\subsection{The WSMP direct solver}
	The interface to WSMP~\cite{WSMP} is implemented in the
	module {\tt WSMP\_BSPLINES}.
	The matrix type (symmetric, hermitian, positive-definite) is set
	by the arguments {\tt nlsym, nlherm} and {\tt nlpos} passed to the
	generic routine {\tt init}. All the other generic routines defined in
	the module {MATRIX}, plus routines specific to the sparse solver,
	are listed in Table~\ref{wsmpRoutines}. The complete documentation
	of these routines is given in Appendix~\ref{wsmpRef}.

	\begin{table}[h]
	\centering
	\begin{tabular}{\|l\|l\|}
	\hline
	\emph{Matrix types} & {\tt wsmp\_mat, zwsmp\_mat} \\
	\hline\hline
	{\tt init} & Initializes the data structure \\
	{\tt destroy} & Free the data structure memory \\
	\hline
	{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
	{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
	row $i$, column $j$ \\
	{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
	row $i$, column $j$ \\
	{\tt vmx} & Returns the matrix-vector product \\
	{\tt mcopy} & Copy a matrix to another matrix \\
	{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
	{\tt clear\_mat}& Set the matrix elements to zero.\\
	{\tt psum\_mat} & Parallel sum of matrices \\
	{\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\
	{\tt get\_count}& Get the number of non-zero elements in matrix \\
	\hline
	{\tt factor} & Factorization \\
	{\tt bsolve} & Solves the linear system using the factorized matrix \\
	{\tt to\_mat} & Convert to WSMP CSR matrix format \\
	{\tt reord\_mat}& Reordering and symbolic factorization \\
	{\tt numfact} & Numerical factorization \\
	\hline
	\end{tabular}
	\caption{The generic routines in the {\tt WSMP\_BSPLINES} module}
	\label{wsmpRoutines}
	\end{table}

	The simple Poisson example using the {\tt PARDISO\_BSPLINES} module
	shown in the previous section can be easily adapted to the WSMP
	interface since there are only two lines to change: the {\tt USE} and
	the matrix {\tt TYPE} lines.

	Other examples of how to use this interface can be found by running
	``{\tt grep wsmp\_mat}'' on the F90 files the directory {\tt examples/}.

	The same solver functionality can be found in both the PARDISO and
	WSMP solvers as one can verify by comparing Table~\ref{pardisoRoutines}
	and Table~\ref{wsmpRoutines} or the description of routines in
	Appendix~\ref{pardisoRef} and Appendix~\ref{wsmpRef}.
	However, there
	is an important difference. While in PARDISO (and indeed also in LAPACK),
	it is possible to define several matrices to solve simultaneously, it
	appears that in WSMP, this is possible \emph{only} for symmetric and
	hermitian matrices: in the present 10.9 version, the
	routines to store and recall the solver context
	{\tt WSTOREMAT/WRECALLMAT} which are present in the symmetric version of
	the library are missing in the general version!

	A separate module named {\tt PWSMP\_BSPLINES} added the MPI
	\emph{parallelization} capability provided by WSMP. This parallel version
	implements the same user interface as shown in Table~\ref{wsmpRoutines}.
	The following considerations should be however taken in to account:
	\begin{enumerate}
	\item The coefficient matrix {\tt amat} is partitioned into blocks of
	\emph{contiguous} rows, with their indices defined in
	the interval [{\tt amat\%istart,amat\%iend}] which is defined after the call to
	{\tt init}.
	\item Calls to the routine {\tt updtmat} to update the matrix coefficients
	should not specify a row index \emph{outside} this interval.
	On the other hand, {\tt getxxx} will return 0 and {\tt putxxx} will
	ignore it if a row index \emph{not} in the range [{\tt amat\%istart,amat\%iend}]
	is passed to them.
	\item An \emph{optional} MPI communicator can be given to {\tt init}
	using the keyword {\tt comm\_in}. By default, the communicator
	{\tt MPI\_COMM\_WORLD} is used.
	\end{enumerate}

	A complete example using {\tt PWSMP\_BSPLINES} can be found in
	{\tt examples/pde2d\_pwsmp.f90}.

	\subsection{The MUMPS direct solver}
	{\tt MUMPS}~\cite{MUMPS} is a \emph{parallel sparse direct solver}
	using {\tt MPI} and is implemented in the module
	{\tt MUMPS\_BSPLINES}. User program using this module
	should be compiled and linked with {\tt MPI} even if only the
	serial version of the solver is needed, in which case the
	{\tt MPI\_COMM\_SELF} is passed to the initialization routine
	{\tt init} as an \emph{optional} argument with the keyword
	{\tt comm\_in}. Otherwise a valid {\tt MPI} communicator should be
	passed. By default {\tt comm\_in=MPI\_COMM\_SELF}. Note that it
	is possible to use both serial and parallel solvers in the same
	program to solve different matrix problems.

	As for {\tt PARDISO} and {\tt WSMP}, the same user interface to the
	{\tt MUMPS} solver is used and summarized in
	Table~\ref{mumpsRoutines}. The complete documentation
	of these routines is given in Appendix~\ref{mumpsRef}.

	\begin{table}[h]
	\centering
	\begin{tabular}{\|l\|l\|}
	\hline
	\emph{Matrix types} & {\tt mumps\_mat, zmumps\_mat} \\
	\hline\hline
	{\tt init} & Initializes the data structure \\
	{\tt destroy} & Free the data structure memory \\
	\hline
	{\tt updtmat} & Accumulates a value to the element $A_{ij}$ \\
	{\tt putele, putrow, putcol} & Overwrites the matrix element $(i,j)$,
	row $i$, column $j$ \\
	{\tt getele, getrow, getcol} & Returns the matrix element $(i,j)$,
	row $i$, column $j$ \\
	{\tt vmx} & Returns the matrix-vector product \\
	{\tt mcopy} & Copy a matrix to another matrix \\
	{\tt maddto} & Adds 2 matrices: $A \leftarrow A+\alpha B$ \\
	{\tt clear\_mat}& Set the matrix elements to zero.\\
	{\tt psum\_mat} & Parallel sum of matrices \\
	{\tt p2p\_mat} & Point-to-point combine sparse matrix between 2 processes\\
	{\tt get\_count}& Get the number of non-zero elements in matrix \\
	\hline
	{\tt factor} & Factorization \\
	{\tt bsolve} & Solves the linear system using the factorized matrix \\
	{\tt to\_mat} & Convert to WSMP CSR matrix format \\
	{\tt reord\_mat}& Reordering and symbolic factorization \\
	{\tt numfact} & Numerical factorization \\
	\hline
	\end{tabular}
	\caption{The generic routines in the {\tt MUMPS\_BSPLINES} module}
	\label{mumpsRoutines}
	\end{table}


	\section{Fourier solver \cite{McMillan}}

	\subsection{The matrix equation in Fourier space}
	For a periodic one-dimensional problem, the solution $\phi_i$ and
	the right-hand-side $\rho_i$ in (\ref{matEq1d}) are
	$N$-periodic. Their Discrete Fourier Transform (DFT) can be defined as
	\begin{equation}
	\begin{split}
	\hat{\phi}_k = \sum_{j=0}^{N-1} \phi_j e^{i\frac{2\pi}{N}kj}, &\qquad
	\hat{\rho}_k = \sum_{j=0}^{N-1} \rho_j e^{i\frac{2\pi}{N}kj}, \\
	\phi_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\phi}_k e^{-i\frac{2\pi}{N}kj}, &\qquad
	\rho_j = \frac{1}{N}\sum_{k=0}^{N-1} \hat{\rho}_k e^{-i\frac{2\pi}{N}kj}.
	\end{split}
	\end{equation}
	Taking the DFT of Eq.~(\ref{matEq1d}), we obtain the following matrix equation
	in Fourier space:
	\begin{equation}
	\label{Fourier1d}
	\sum_{k'=0}^{N-1} \hat{A}_{kk'}\hat{\phi}_{k'} = \hat{\rho}_{k},
	\end{equation}
	where $\hat{A}_{kk'}$ is the DFT of the original matrix. Following the
	notations in Eq.~(\ref{mat1d}) and assuming an \emph{equidistant} mesh
	with the interval $\Delta=L/N$, each of the DFT matrices of the
	weak form can be written as
	\begin{equation}
	\begin{split}
	\hat{A}_{kk'}
	&= \frac{1}{N}\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj}
	\sum_{j'=0}^{N-1} A_{jj'}e^{-i\frac{2\pi}{N}k'j'} \\
	&= \frac{1}{N}\int_{0}^L\!\!dx\,C(x) \,
	\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\,
	\sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'}
	\Lambda_{j'}^{\alpha'} (x) \\
	&= \frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \,
	\sum_{j=0}^{N-1} e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x)\,
	\sum_{j'=0}^{N-1} e^{-i\frac{2\pi}{N}k'j'}
	\Lambda_{j'}^{\alpha'} (x)
	\end{split}
	\end{equation}
	Note that each of the last two sums is over the splines which are non-zero
	at a given $x$. Using the translational symmetry of the periodic splines:
	\begin{equation*}
	\sum_j e^{i\frac{2\pi}{N}kj} \Lambda_j^\alpha (x) =
	\sum_j e^{i\frac{2\pi}{N}kj}
	\Lambda_{j-J}^\alpha(x-J\Delta) =
	e^{i\frac{2\pi}{N}kJ}\, \hat{\Lambda}_{k}^\alpha(x-J\Delta),
	\end{equation*}
	where we have defined the DFT of splines $\hat{\Lambda}_{k}(x)$ as
	\begin{equation}
	\hat{\Lambda}_{k}^\alpha(x) = \sum_j\Lambda_j^\alpha(x) e^{i\frac{2\pi}{N}kj},
	\end{equation}
	which are computed by the routine {\tt ft\_basfun} in the module
	{\tt BSPLINES} for any spline order $p$ and derivative $\alpha \le
	p$. The DFT matrices can now be written as:
	\begin{equation*}
	\hat{A}_{kk'} =
	\frac{1}{N}\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!\!dx\,C(x) \,
	e^{i\frac{2\pi}{N}J(k-k')} \hat{\Lambda}_k^\alpha (x-J\Delta)\,
	\left[\hat{\Lambda}_{k'}^{\alpha'} (x-J\Delta)\right]^{*}.
	\end{equation*}

	Finally, making the variable transform $x\rightarrow x+J\Delta$ and
	defining the DFT of the weak-form coefficient $C$ as
	\begin{equation}
	\hat{C}_{k}(x) = \sum_{J=0}^{N-1}C(x+J\Delta)\,e^{i\frac{2\pi}{N}Jk},
	\end{equation}
	the DFT of the matrix $\hat{A}_{kk'}$ can be calculated as an
	integration over the first interval:
	\begin{equation}
	\hat{A}_{kk'} = \frac{1}{N}\int_0^\Delta\!\!dx\,
	\hat{C}_{k-k'}(x) \,\hat{\Lambda}_{k}^\alpha(x)
	\left[\hat{\Lambda}_{k'}^{\alpha'}(x)\right]^{*},
	\end{equation}
	which can be computed using again the same Gauss formula as before.
	For uniform $C$, $\hat A_{kk'}$ is diagonal and the matrix equation
	(\ref{Fourier1d}) reduces to a system of equations for the
	uncoupled Fourier modes.

	When $C$ is non-uniform, $\hat A_{kk'}$ is \emph{dense}.
	However in problems where the solution is expected to be
	``smooth'', one can keep only a small number of Fourier modes, reducing
	thus the rank of $\hat A_{kk'}$. Furthermore, if the coefficients
	$C(x)$ of the differential equations are very smooth, peaked at a few (low
	order) modes, the DFT matrix can become \emph{sparse}!

	The generalization to the two-dimensional problem (\ref{mat2d}) is
	straightforward:

	\begin{gather}
	\hat{A}_{im,i'm'} =
	\frac{1}{N_\theta}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta \,
	\hat{C}_{m-m'}(r,\theta) \,\hat{\Lambda}_{m}^\beta(\theta)
	\left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\right\}
	\Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha'}(r) \\
	\hat{C}_{m}(r,\theta) =
	\sum_{j=0}^{N_\theta-1}C(r,\theta+j\Delta\theta)\, e^{i\frac{2\pi}{N_\theta}jm}.
	\end{gather}

	Likewise, for the three-dimensional problem (\ref{mat3d}), we obtain

	\begin{gather}
	\hat{A}_{imn,i'm'n'} =
	\frac{1}{N_\theta N_\varphi}\int_0^R\!\!dr\left\{\int_0^{\Delta\theta}\!\!d\theta
	\int_0^{\Delta\varphi}\!\!d\varphi \,
	\hat{C}_{m-m',n-n'}(r,\theta,\varphi) \,\hat{\Lambda}_{m}^\beta(\theta)
	\left[\hat{\Lambda}_{m'}^{\beta'}(\theta)\right]^{*}\,\hat{\Lambda}_{n}^\gamma(\varphi)
	\left[\hat{\Lambda}_{n'}^{\gamma'}(\varphi)\right]^{*}\right\}
	\Lambda_{i}^\alpha(r)\Lambda_{i'}^{\alpha}(r) \\
	\hat{C}_{mn}(r,\theta,\varphi) =
	\sum_{j=0}^{N_\theta-1}\sum_{k=0}^{N_\varphi-1}C(r,\theta+j\Delta\theta,\varphi+
	k\Delta\varphi)\, e^{i\frac{2\pi}{N_\theta}jm}\,e^{i\frac{2\pi}{N_\varphi}kn}.
	\end{gather}

	Note that for axi-symmetric systems where the coefficients $C$ do not
	depend on $\varphi$
	\begin{equation}
	\hat{C}_{mn} = \hat{C}_{mn}\delta_{n,0}
	\end{equation}
	and thus the three-dimensional problem reduces to a set of independent
	two-dimensional problems with
	\begin{equation}
	\begin{split}
	\hat{A}^n_{im.i'm'} &= M_n \hat{A}_{im.i'm'} \\
	M_n &= \int_0^{\Delta\varphi}\!\!d\varphi \left\|\hat{\Lambda}_n(\varphi)\right\|^2.
	\end{split}
	\end{equation}

	\subsection{Integral equation}
	The DFT matrices for differential operators derived above can be
	extended to an integral operator of the following form:
	\begin{equation}
	\int_{0}^{L}\!dx' K(x,x')\,\phi(x') = \rho(x),
	\end{equation}
	where $\phi(x)$ is $L$-periodic. Using the same FE discretization as
	above results in the following matrix in \emph{real} space:
	\begin{equation}
	A_{jj'} = \int_{0}^{L}\!dx\,\Lambda_j(x)\,\int_{0}^{L}\!dx'\,
	K(x,x')\,\Lambda_{j'}(x'),
	\end{equation}
	and its DFT
	\begin{equation}
	\hat{A}_{kk'} = \frac{1}{N}
	\sum_{J=0}^{N-1}\int_{J\Delta}^{(J+1)\Delta}\!dx\,
	\sum_{J'=0}^{N-1}\int_{J'\Delta}^{(J'+1)\Delta}\!dx'\,K(x,x')\,
	e^{i\frac{2\pi}{N}kJ}\hat{\Lambda}_k(x-J\Delta)\,
	e^{-i\frac{2\pi}{N}k'J'}\left[\hat{\Lambda}_{k'}(x-J'\Delta)\right]^{*},
	\end{equation}
	Now, defining the DFT of the kernel as
	\begin{equation}
	\hat{K}_{kk'}(x,x') =
	\sum_{J=0}^{N-1}\sum_{J'=0}^{N-1}K(x+J\Delta,x'+J'\Delta)\,
	e^{i\frac{2\pi}{N}kJ}\,e^{-i\frac{2\pi}{N}k'J'},
	\end{equation}
	the final expression for the DFT of the matrix $\hat{A}_{kk'}$ reduces to
	\begin{equation}
	\hat{A}_{kk'} = \frac{1}{N}
	\int_0^\Delta\!dx\int_0^\Delta\!dx'\,\hat{K}_{kk'}(x,x')\,
	\hat{\Lambda}_k (x)\,
	\left[\hat{\Lambda}_{k'}(x')\right]^{*}.
	\end{equation}
	Again, notice that the dense matrix $\hat{A}$ can become \emph{sparse}
	if only a limited number of Fourier modes are retained in the DFT of
	the kernel $\hat{K}$.

	\subsection{A two-dimensional example with a non-uniform coefficient}
	As a check, we considered here a two-dimensional example similar to the example in
	section \ref{twodEx} but with a non-uniform coefficient:

	\begin{equation}
	-\frac{1}{r}\frac{\partial}{\partial r}
	\left[rC\frac{\partial\phi}{\partial r}\right]
	-\frac{1}{r^2}\frac{\partial}{\partial \theta}
	\left[C\frac{\partial\phi}{\partial \theta}\right] = \rho.
	\end{equation}

	With $C(r,\theta) = 1+\epsilon r\cos\theta$, assuming the same exact solution as in
	section (\ref{twodEx})
	\begin{equation}
	\phi(r,\theta) = (1-r^2)r^m\cos m\theta,
	\end{equation}
	the right-hand side becomes
	\begin{equation}
	\begin{split}
	\rho(r,\theta) = 4(m+1)r^m\cos m\theta & +
	\frac{\epsilon r^m}{2}(4+5m-m/r^2)\cos(m-1)\theta \\
	&+ \frac{\epsilon r^m}{2}(4+3m+m/r^2)\cos(m+1)\theta.
	\end{split}
	\end{equation}

	This problem is solved in real space and Fourier space respectively in example
	{\tt pde2d\_sym\_pardiso.f90} and example {\tt pde2d\_sym\_pardiso\_dft.f90}.
	Both use the {\tt PARDISO\_BSPLINES} module to solve the sparse
	matrix equation. It should be noted that the Fourier method should yield the
	\emph{same solution} as found with the solver in real space if \emph{all} the
	$N_\theta$ Fourier modes are kept.

	For the problem defined above with {\tt m=3}, by keeping only the seven Fourier
	modes in $[-3,3]$ and the three mode couplings $[-1,0,1]$ in the
	Fourier solver, we found that both methods yield the same (up to 5
	digits) \emph{relative discretization error}. Furthermore, increasing
	the number of Fourier modes to $[-4,4]$ (note that the $m=\pm 4$
	Fourier components of the right hand side $\rho$ are not null) does
	not increase the accuracy of the computed solution!

	In this example, the matrix in Fourier space has a rank which is
	$N_\theta/7$ times smaller than in the solver in real space. The number of
	non-zeros is also reduced by a factor of $(2p+1)/3$ since only 3
	Fourier mode coupling terms are considered.

	In general, the efficiency of such a \emph{matrix filter} is expected
	to be problem-dependent. The Fourier solver should be tested in real simulations.

	\section{The matrix construction module {\tt CONMAT\_MOD}}
	\label{secCONMAT}
	The module implements the generic matrix construction subroutine
	{\tt conmat}, using the algorithm detailed in Appendix~\ref{matAssembly},
	for 1D and 2D differential equations. The computed matrix is returned in the
	argument {\tt mat} which can be a Lapack band matrix as well as a
	PARDISO, WSMP or MUMPS sparse matrix.
	The complete interface of the subroutine is given below.

	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE conmat(spl, mat, coefeq, maxder)
	TYPE(spline1d\|spline2d) :: spl
	TYPE([z]gbmat\|[z]pbmat\|[z]periodic_mat\|[z]pardiso\|...) :: mat
	INTEGER, INTENT(in), OPTIONAL :: maxder[(2)]
	INTERFACE
	SUBROUTINE coefeq(x, [y], idt, idw, c)
	DOUBLE PRECISION, INTENT(in) :: x, [y]
	INTEGER, INTENT(out) :: idt(:), idw(:)
	DOUBLE PRECISION, INTENT(out) :: c(:)
	END SUBROUTINE coefeq
	END INTERFACE
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Construct the FE matrix for 1D or 2D differential operator.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	spl : 1D or 2D spline
	mat : matrix object
	coefeq : user provided subroutine (see below)
	maxder : Maximum order of the derivatives in the weak form.
	Equal to 1 (first derivative) by default.
	\end{verbatim}
	\end{description}

	The subroutine {\tt conmat} includes, in addition to the arguments
	{\tt spl, mat} and {\tt maxder} described above,
	an user provided subroutine as the third
	argument {\tt coefeq} which computes all the weak
	form coefficients defined in Eq.(\ref{locMat1d}) and Eq.(\ref{locMat2d})
	for a given point ($x$ for 1D case or $(x,y)$ for 2D case). The output
	array {\tt c} will contain all the computed $C$ with its corresponding
	derivative orders $(d,d')$ returned in {\tt idt, idw} respectively.
	Other quantities required to calculate the coefficients $C$ could be
	communicated to {\tt coefeq}, using for example a {\tt COMMON} block or
	a {\tt MODULE}.

	An example of using this module can be found in section \ref{twodEx}.

	\appendix

	\section{Matrix assembly for differential operators}
	\label{matAssembly}
	\subsection{1D case}
	\subsubsection{Local matrix}
	The contribution to the discretized weak-form from the interval
	$[x_i, x_{i+1}]$ where $i=1,\ldots,N$, is a sum of the \emph{local matrices}
	\begin{equation}
	\label{locMat1d}
	\begin{split}
	A^i_{\alpha\alpha'} &= \int_{x_i}^{x_{i+1}}\!\!dx \;C(x)
	\Lambda^d_{\alpha}(x)\Lambda^{d'}_{\alpha'}(x) \\
	&\simeq \sum_{g=1}^{G}\,
	\underbrace{w_g\Lambda^d_{\alpha}(x_g)\Lambda^{d'}_{\alpha'}(x_g)}_{F_{\alpha\alpha'g}}
	\,\underbrace{C(x_g)}_{c_g},
	\end{split}
	\end{equation}
	where a $G$ point Gauss quadrature over the interval $[x_i, x_{i+1}]$
	is used to approximate the integral and
	$\Lambda^d_{\alpha}$ denotes the $d^{th}$ derivative of splines
	which are non zero in the interval $[x_i, x_{i+1}]$. For splines of
	degree $p$, $\alpha=0,\ldots,p$. Note that the matrix can be written as a
	\emph{matrix-vector product}:
	\begin{equation}
	\mathbf{A}= \mathbf{F} \cdot \mathbf{c}.
	\end{equation}

	\subsubsection{Mapping to global matrix}
	For $N$ intervals, the number of spline elements of degree $p$, is
	$N_e=N+p$, or $N_e=N$ if the system is \emph{periodic}.
	Once the local matrix $A_{\alpha\alpha'}$ is formed, it can be added
	to the \emph{global} matrix using the mapping:
	\begin{equation}
	\begin{split}
	A^g_{II'} &\leftarrow A^g_{II'} + A^i_{\alpha\alpha'} \\
	I = i+\alpha, & \qquad I' = i+\alpha'
	\end{split}
	\end{equation}
	For periodic problems, the indices $I,I'$ are further transformed
	by taking into account the periodicity $N$, using for example the
	following {\tt FORTRAN} statement

	\begin{center} \tt
	I = MODULO(I-1,N) + 1 \\
	\end{center}

	\subsection{2D case}
	\subsubsection{Local matrix}
	In this case, the local matrix obtained for the grid cell
	$[x_i, x_{i+1}]\times[y_j, y_{j+1}]$ takes the form:
	\begin{equation}
	\label{locMat2d}
	\begin{split}
	A_{\alpha\alpha'\beta\beta'} &= \int_{x_i}^{x_{i+1}}\!\!dx
	\int_{y_j}^{y_{j+1}}\!\!dy\,\Lambda^{d_1}_{\alpha}(x)\Lambda^{d_1'}_{\alpha'}(x)\;C(x,y)
	\,\Lambda^{d_2}_{\beta}(y)\Lambda^{d_2'}_{\beta'}(y) \\
	&\simeq \sum_{g_1=1}^{G_1}\,
	\underbrace{w_{g_1}\Lambda^{d_1}_{\alpha}(x_{g_1})\Lambda^{{d_1}'}_{\alpha'}(x_{g_1})}_{F_{\alpha\alpha'g_1}}
	\;\sum_{g_2=1}^{G_2}\,\underbrace{C(x_{g_1},y_{g_2})}_{C_{g_1g_2}}
	\underbrace{w_{g_2}\Lambda^{d_2}_{\beta}(y_{g_2})\Lambda^{{d_2}'}_{\beta'}(y_{g_2})}_{G_{\beta\beta'g_2}},
	\end{split}
	\end{equation}
	which can be computed efficiently as \emph{matrix-matrix products}
	\begin{equation}
	\mathbf{A} = \mathbf{F}\cdot\mathbf{C}\cdot\mathbf{G^{T}}
	\end{equation}
	\subsubsection{Mapping to global matrix}
	The local to global element indices mapping on each of the two
	dimensions can be defined as previously as
	\begin{equation}
	\begin{split}
	I = i+\alpha, & \qquad I' = i+\alpha' \\
	J = j+\beta, & \qquad J' = j+\beta'
	\end{split}
	\end{equation}
	If any of the 2 dimensions is periodic, the periodic condition have
	to be applied to the corresponding global element index as explained
	above.

	Furthermore, in order to reduce the 4 dimension array $A^g_{II'JJ'}$
	to the standard 2 dimension matrix, we number first the elements in
	$y$ coordinate and obtain the following index transformation:
	\begin{equation}
	\mu = J + N^y_e(I-1), \qquad \mu' = J' + N^y_e(I'-1),
	\end{equation}
	where $N^y_e$ is the number of elements along the $y$ coordinate. The
	\emph{global} matrix is then constructed from
	\begin{equation}
	A^g_{\mu\mu'} \leftarrow A^g_{\mu\mu'} + A_{\alpha\alpha'\beta\beta'}
	\end{equation}

	\section{The boundary conditions}
	\subsection{Dirichlet condition}
	\label{DirichletCond}
	\subsubsection{1D case}
	Let us consider the boundary condition
	$u(0)=c$. Since all the splines are 0
	at $x=0$, except for the first spline which is equal to 1,
	$\Lambda_i(0)=\delta_{i,1}$, we have simply
	\begin{equation}
	c=u(0) = \sum_{i=1}^N u_i \Lambda_i(0) \quad \Longrightarrow
	\quad u_{1} = c.
	\end{equation}
	The discretized linear system of equations, taking into account of
	this BC, can thus be written as
	\begin{equation}
	\begin{split}
	u_1 &= c\\
	\sum_{j=2}^N A_{ij}u_j &= f_i - A_{i1}c, \quad i=2,\ldots, N
	\end{split}
	\end{equation}
	or in the following matrix form:
	\begin{equation}
	\left(\begin{matrix}
	1 & 0 & \cdots \\
	0 & A_{22} & \cdots \\
	\vdots & \vdots & \ddots \\
	\end{matrix}\right)
	\left(\begin{matrix}
	u_{1} \\
	u_{2} \\
	\vdots\\
	u_{N} \\
	\end{matrix}\right) =
	\left(\begin{matrix}
	c \\
	f_{2} -cA_{21}\\
	\vdots\\
	f_{N} -cA_{N1}\\
	\end{matrix}\right)
	\end{equation}
	Note that (1) the transformed matrix preserves any symmetry or
	positivity of the original matrix, (2) the first column of the
	original matrix has to be saved in order to modify the RHS $f_i$
	but only for non zero $c$ and (3) in that case, one needs to save
	only $[A_{i1}]_{i=2}^{p+1}$, where $p$ is the spline order.

	In summary, the procedure for imposing the Dirichlet BC $u_1=c$ can be
	summarized as follows:
	\begin{enumerate}
	\item Matrix transformation
	\begin{enumerate}
	\item Clear the matrix row $i=1$ and set its diagonal term
	$A_{11}$ to 1.
	\item Get the matrix column $A_{j1}, \quad j=2,\ldots,p+1$ and save it.
	\item Clear the matrix column $j=1$ and set its diagonal term
	$A_{11}$ to 1.
	\end{enumerate}
	\item RHS transformation
	\begin{enumerate}
	\item Set $f_1\leftarrow c$.
	\item Modify the RHS: $f_i\leftarrow f_i-A_{i1}c, \quad i=2,\ldots,p+1$.
	\end{enumerate}
	\end{enumerate}
	If the original matrix \emph{is not symmetric}, only the steps (1a)
	and (2a) are required, since the other steps are only necessary to
	preserve the symmetry of the original matrix.

	\subsubsection{2D case}
	In that case, let us write the solution $u(x,y)$ as
	\begin{equation}
	u(x,y) = \sum_{i=1}^{N_1}\sum_{j=1}^{N_2} u_{ij} \Lambda_i(x)\Lambda_j(y),
	\end{equation}
	where $N_1, N_2$ are the number of elements in each
	dimension. Assuming the BC $u(0,y) = g(y)$, and since
	$\Lambda_i(0)=\delta_{i1}$, the solutions $u_{ij}$
	should satisfy
	\begin{equation}
	\label{dirich_2d}
	\sum_{j=1}^{N_2} u_{1j} \Lambda_j(y) = g(y).
	\end{equation}
	If $g(y)$ is constant $g(y)=c$, we obtain the trivial solution
	$u_{1,j}=c$ since $\sum_{j=1}^{N_2} \Lambda_j(y)=1$ \cite{BSPLINES}.
	For non-uniform $g$,
	at least 2 methods can be used to obtain the $N_2$ unknowns $u_{1j}$
	satisfying the equation above:
	\begin{enumerate}
	\item By \emph{collocating} Eq.(\ref{dirich_2d}) on a \emph{suitable}
	set of points $[y_k]_{ k=1}^{N_2}$, the problem is reduced to an
	\emph{interpolation} one (see section ``Spline
	Interpolation'' in \cite{BSPLINES}).
	\item By \emph{minimizing} the residual norm of Eq.(\ref{dirich_2d})
	defined as follows:
	\begin{gather}
	R = \left\\|\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)-g(y)\right\\|^2 =
	\int\!\!dy\left\{\left[\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\right]^2
	- 2g(y)\sum_{j=1}^{N_2} c_{j} \Lambda_j(y) +g^2(y)\right\}\\
	\frac{\partial R}{\partial c_k} = 2 \int\!\!dy\left[
	\sum_{j=1}^{N_2} c_{j} \Lambda_j(y)\Lambda_k(y)
	-g(y)\Lambda_k(y)\right] = 0, \quad k=1,\ldots,N_2,
	\end{gather}
	the boundary solutions $c_j$ can be calculated by solving the following
	\emph{weak-form} of Eq.(\ref{dirich_2d}):
	\begin{equation}
	\sum_{j=1}^{N_2} c_{j} \int\!\!dy\Lambda_j(y)\Lambda_k(y) =
	\int\!\!dy\Lambda_k(y) g(y), \qquad k=1,\ldots,N_2.
	\end{equation}
	\end{enumerate}
	Once the values of $c_j$ known, the procedure described for the 1D case
	above can be applied to satisfy each of the $N_2$ conditions $u_{1j}=c_j$.

	A full example for solving the cylindrical Laplace equation in
	cylindrical coordinates:
	\begin{equation}
	\begin{split}
	\frac{1}{r}\frac{\partial}{\partial r}
	\left(r\frac{\partial\phi}{\partial r}\right) &+\frac{1}{r^2}
	\frac{\partial^2\phi}{\partial \theta^2} = 0 \\
	\phi(r=1,\theta) &= \cos m\theta.
	\end{split}
	\end{equation}
	is given in {\tt bpslines/examples/dirichlet/poisson.f90}.

	\subsection{Unicity on the axis}
	\label{unicityCond}
	Denoting the $N$ solutions at the axis by $(u_1, \ldots, u_N)$ , and
	their transforms by $(\hat u_1, \ldots, \hat u_N)$ defined by

	\begin{equation} \begin{array}{ccc}
	u_1-u_N = \hat u_1 & & u_1 = \hat u_1 + \hat u_N \\
	u_2-u_N = \hat u_2 & & u_2 = \hat u_2 + \hat u_N \\
	\vdots & \Longrightarrow & \vdots \\
	u_{N-1}-u_N = \hat u_{N-1} & & u_{N-1} = \hat u_{N-1} + \hat u_N \\
	u_N = \hat u_N & & u_N = \hat u_N,
	\end{array} \label{eq:unicity1} \end{equation}
	the unicity condition can be specified by simply imposing

	\begin{equation}
	\hat u_1=\hat u_2=\ldots=\hat u_{N-1}=0. \label{eq:unicity2}
	\end{equation}
	From (\ref{eq:unicity1}), the \emph{transformation matrix} $\mathbf U$ is defined
	as

	\begin{equation}
	\mathbf{u} = \mathbf{ U \cdot\hat u}, \qquad \mathbf{U} =
	\left(\begin{matrix}
	1 & 0 & \dots & 0 & 1 \\
	0 & 1 & \dots & 0 & 1 \\
	& & \ddots& & \vdots \\
	0 & 0 & \dots & 1 & 1 \\
	0 & 0 & \dots & 0 & 1
	\end{matrix}\right), \quad \mathbf{U^{T}} =
	\left(\begin{matrix}
	1 & 0 & \dots & 0 & 0 \\
	0 & 1 & \dots & 0 & 0 \\
	& & \ddots& & \vdots \\
	0 & 0 & \dots & 1 & 0 \\
	1 & 1 & \dots & 1 & 1
	\end{matrix}\right).
	\end{equation}


	\paragraph{Matrix product $ \mathbf{A\cdot U}$}
	\begin{equation}
	\mathbf{ A\cdot U} =
	\left(\begin{array}{lllll}
	A_{1,1} & A_{1,2} & \dots & A_{1,N-1} & \sum_{j} A_{1,j} \\
	A_{2,1} & A_{2,2} & \dots & A_{2,N-1} & \sum_{j} A_{2,j} \\
	& & \ddots& & \vdots \\
	A_{N-1,1} & A_{N-1,2} & \dots & A_{N-1,N-1} & \sum_{j}A_{N-1,j} \\
	A_{N,1} & A_{N,2} & \dots & A_{N,N-1} & \sum_{j}A_{N,j}
	\end{array}\right).
	\end{equation}
	Thus \emph{right multiply by $\mathbf{U}$} is equivalent to put the
	\emph{the sum of each row on the last column}.

	\paragraph{Matrix product $ \mathbf{ U^T \cdot A}$}
	\begin{equation}
	\mathbf{ U^T \cdot A} =
	\left(\begin{array}{lllll}
	A_{1,1} & A_{1,2} & \dots & A_{1,N-1} & A_{1,N} \\
	A_{2,1} & A_{2,2} & \dots & A_{2,N-1} & A_{2,N} \\
	& & \ddots& & \vdots \\
	A_{N-1,1} & A_{N-1,2} & \dots & A_{N-1,N-1} & A_{N-1,N} \\
	\sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} &
	\sum_{i}A_{i,N}
	\end{array}\right).
	\end{equation}
	Thus \emph{left multiply by $\mathbf{\hat U}$} is equivalent to put the
	\emph{the sum of each column on the last row}.

	\paragraph{Product $ \mathbf{\hat U \cdot b}$}
	\begin{equation}
	\mathbf{\hat b} = \mathbf{U^T\cdot b} =
	\left(\begin{array}{l}
	b_1 \\
	b_2 \\
	\vdots \\
	b_{N-1} \\
	\sum_{i} b_{i}
	\end{array}\right),
	\end{equation}

	\paragraph{Transformation of the original matrix equation}
	The full original linear system, obtained from the discretization of the
	2D $r,\theta$ polar coordinates can be written as:

	\begin{equation}
	\left(\begin{array}{ll}
	\mathbf{A} & \mathbf{B} \\
	\mathbf{C} & \mathbf{D}
	\end{array}\right)
	\left(\begin{array}{l}
	\mathbf{u} \\
	\mathbf{v}
	\end{array}\right) =
	\left(\begin{array}{l}
	\mathbf{b} \\
	\mathbf{c}
	\end{array}\right), \label{eq:orig_matrix_eq}
	\end{equation}
	where the solution array is split into the solutions $\mathbf{u}$ at $r=0$ and
	the solutions $\mathbf{v}$ on the remaining domain. The transformed system can
	thus be written as

	\begin{equation*}
	\left(\begin{array}{ll}
	\mathbf{U^T} & 0 \\
	0 & \mathbf{I}
	\end{array}\right)
	\left(\begin{array}{ll}
	\mathbf{A} & \mathbf{B} \\
	\mathbf{C} & \mathbf{D}
	\end{array}\right)
	\left(\begin{array}{ll}
	\mathbf{U} & 0 \\
	0 & \mathbf{I}
	\end{array}\right)
	\left(\begin{array}{l}
	\mathbf{\hat u} \\
	\mathbf{v}
	\end{array}\right) =
	\left(\begin{array}{ll}
	\mathbf{U^T} &0 \\
	0 & \mathbf{I}
	\end{array}\right)
	\left(\begin{array}{l}
	\mathbf{b} \\
	\mathbf{c}
	\end{array}\right),
	\end{equation*}

	\begin{equation}
	\Longrightarrow
	\left(\begin{array}{cc}
	\mathbf{U^TAU} & \mathbf{U^TB} \\
	\mathbf{CU} & \mathbf{D}
	\end{array}\right)
	\left(\begin{array}{l}
	\mathbf{\hat u} \\
	\mathbf{v}
	\end{array}\right) =
	\left(\begin{array}{c}
	\mathbf{U^Tb} \\
	\mathbf{c}
	\end{array}\right),
	\end{equation}
	Notice that the transformation preserves any symmetry existing in the original system
	(\ref{eq:orig_matrix_eq}). The transformed matrix is finally given in the following where
	only the modified elements are shown and the sum is only over the first $N$
	points in $\theta$ direction. The $\times$ symbol denotes unmodified elements.

	\begin{equation}
	\left(\begin{array}{lllllll}
	\times & \times & \times & \times & \sum_{j} A_{1,j} & \times & \times \\
	\times & \times & \times & \times & \sum_{j} A_{2,j} & \times & \times \\
	\times & \times & \times & \times & \vdots & \times & \times \\
	\times & \times & \times & \times & \sum_{j} A_{N-1,j} & \times & \times \\
	\sum_{i}A_{i,1} & \sum_{i}A_{i,2} & \dots & \sum_{i}A_{i,N-1} &
	\sum_{i,j}A_{i,j} & \sum_{i}A_{i,N+1} & \dots \\
	\times & \times & \times & \times & \sum_{j} A_{N+1,j} & \times & \times \\
	\times & \times & \times & \times & \vdots & \times & \times
	\end{array}\right)
	\end{equation}
	Only the $N^{th}$ column and the $N^{th}$ row are affected by the transformation.
	Applying now the unicity condition (\ref{eq:unicity2}) the final transformed system
	reads:

	\begin{equation}
	\left(\begin{array}{lllllll}
	1 & 0 & \dots & 0 & 0 & 0 & 0 \\
	0 & 1 & \dots & 0 & 0 & 0 & 0 \\
	0 & 0 & \ddots & 0 & \vdots & 0 & 0 \\
	0 & 0 & \dots & 1 & 0 & 0 & 0 \\
	0 & 0 & \dots & 0 & \sum_{i,j}A_{i,j} & \sum_{i}A_{i,N+1} & \dots \\
	0 & 0 & \dots & 0 & \sum_{j} A_{N+1,j} & \times & \times \\
	0 & 0 & \dots & 0 & \vdots & \times & \times
	\end{array}\right)
	\left(\begin{array}{l}
	\hat u_1 \\
	\hat u_2 \\
	\vdots\\
	\hat u_{N-1}\\
	\hat u_{N} \\
	u_{N+1} \\
	\vdots
	\end{array}\right) =
	\left(\begin{array}{l}
	0 \\
	0 \\
	\vdots\\
	0 \\
	\sum_{i} b_{i} \\
	b_{N+1} \\
	\vdots
	\end{array}\right).
	\end{equation}


	\section{{\tt MATRIX} Reference}
	\label{matRef}

	The following conventions are adopted in the routine descriptions:
	\begin{itemize}
	\item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)}
	declares a variable which can be of type {\tt gemat} or {\tt zgemat}.
	\item The symbol ``$\|$'' is the logical {\tt OR} operator. Thus
	\begin{verbatim}
	TYPE([z]gemat\|[z]gbmat) :: mat
	\end{verbatim}
	declares that {\tt mat} can be either of type {\tt gemat}, {\tt
	zgemat}, {\tt pbmat} or {\tt zpbmat}.
	\item In a same declaration block, if a scalar or array of type {\tt
	DOUBLE PRECISION\|COMPLEX} is declared together with a matrix object
	which can be also complex, both should be either real
	or complex. For example in the routine {\tt updtmat}, if {\tt mat}
	of type {\tt zgbmat}, {\tt val} should be complex.
	\end{itemize}

	\subsection{init}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	TYPE([z]gemat) :: mat
	SUBROUTINE init(n, nterms, mat ,kmat)
	TYPE([z]gbmat) :: mat
	SUBROUTINE init(kl, ku, n, nterms, mat, kmat)
	TYPE([z]pbmat) :: mat
	SUBROUTINE init(ku, n, nterms, mat, kmat)
	TYPE([z]periodic_mat) :: mat
	SUBROUTINE init(kl, ku, n, nterms, mat, kmat)
	INTEGER, INTENT(in) :: kl, ku, n, nterms
	INTEGER, OPTIONAL :: kmat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Initialize data structure for matrix
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	n : rank of matrix
	kl, ku : number of sub and super diagonals
	nterms : number of terms in weak form
	kmat : matrix id
	mat : matrix object
	\end{verbatim}
	\end{description}

	\subsection{destroy}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE destroy(mat)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Free matrix memory
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}

	\subsection{updmat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE updtmat(mat, i, j, val)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mat
	INTEGER, INTENT(IN) :: i, j
	DOUBLE PRECISION\|COMPLEX, INTENT(IN) :: val
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Update (accumulate) element $A_{ij}$
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : row index
	j : column index
	val : input value
	\end{verbatim}
	\end{description}

	\subsection{putele}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE putele(mat, i, j, val)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mat
	INTEGER, INTENT(IN) :: i, j
	DOUBLE PRECISION\|COMPLEX, INTENT(IN) :: val
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Overwrite element $A_{ij}$
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : row index
	j : column index
	val : input value
	\end{verbatim}
	\end{description}

	\subsection{putrow}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE putrow(mat, i, arr)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mat
	INTEGER, INTENT(IN) :: i
	DOUBLE PRECISION\|COMPLEX, INTENT(IN) :: arr(:)
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Overwrite a matrix row
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : row index
	arr : input array
	\end{verbatim}
	\end{description}

	\subsection{putcol}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE putcol(mat, j, arr)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mat
	INTEGER, INTENT(IN) :: j
	DOUBLE PRECISION\|COMPLEX, INTENT(IN) :: arr(:)
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Overwrite a matrix row
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	j : column index
	arr : input array
	\end{verbatim}
	\end{description}

	\subsection{getele}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE getele(mat, i, j, val)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mat
	INTEGER, INTENT(IN) :: i, j
	DOUBLE PRECISION\|COMPLEX, INTENT(OUT) :: val
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Get element $A_{ij}$
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : row index
	j : column index
	val : output value
	\end{verbatim}
	\end{description}

	\subsection{getrow}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE getrow(mat, i, arr)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mat
	INTEGER, INTENT(IN) :: i
	DOUBLE PRECISION\|COMPLEX, INTENT(OUT) :: arr(:)
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Get a matrix row
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : row index
	arr : output array
	\end{verbatim}
	\end{description}

	\subsection{getcol}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE getcol(mat, j, arr)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mat
	INTEGER, INTENT(IN) :: j
	DOUBLE PRECISION\|COMPLEX, INTENT(OUT) :: arr(:)
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Get a matrix column
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : column index
	arr : output array
	\end{verbatim}
	\end{description}

	\subsection{vmx}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	FUNCTION vmx(mat, x)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mat
	DOUBLE PRECISION\|COMPLEX, DIMENSION(:), INTENT(in) :: x
	DOUBLE PRECISION\|COMPLEX, DIMENSION(SIZE(x)) :: vmx
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Matrix-vector product $Ax$
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	x : input array
	vmx : output array
	\end{verbatim}
	\end{description}

	\subsection{mcopy}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE mcopy(mata, matb)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mata, matb
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Matrix copy: $B = A$
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mata : input matrix object
	matb : output matrix object
	\end{verbatim}
	\end{description}

	\subsection{maddto}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE maddto(mata, alpha, matb)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mata, matb
	DOUBLE PRECISION\|COMPLEX : alpha
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Matrix addition: $A \leftarrow A+\alpha B$
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mata : input matrix object
	matb : output matrix object
	alpha : input scalar
	\end{verbatim}
	\end{description}

	\subsection{determinant}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE determinant(mat, base, pow)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat) :: mat
	INTEGER, INTENT(out) :: pow
	DOUBLE PRECISION\|COMPLEX : base
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Returns the determinant of matrix as $D = \text{base}\times 10^{\text{pow}}$
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : input matrix object
	base : mantissa of determinant
	pow : exponent of determinant
	\end{verbatim}
	\end{description}

	\subsection{factor}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE factor(mat)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	LU (Cholesky for symmetric/hermitian matrix) factorization
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : inout matrix object
	\end{verbatim}
	\end{description}

	\subsection{bsolve}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE bsolve(mat)
	TYPE([z]gemat\|[z]gbmat\|[z]pbmat\|[z]periodic_mat) :: mat
	DOUBLE PRECISION\|COMPLEX, DIMENSION [(:)] :: rhs
	DOUBLE PRECISION\|COMPLEX, DIMENSION [(:),] OPTIONAL, INTENT (out) :: sol
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Solve the linear system using the factored matrix, for a single or
	multiple right-hand-side
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : input factored matrix object
	rhs : input right-hand-side, overwriten by the solution if sol is not present
	sol : contains solution
	\end{verbatim}
	\end{description}

	\section{{\tt SPMAT} Reference}
	\label{spmatRef}

	The following conventions are adopted in the routine descriptions:
	\begin{itemize}
	\item {\tt [z]} means optional: for example {\tt TYPE([z]gemat)}
	declares a variable which can be of type {\tt gemat} or {\tt zgemat}.
	\item The symbol ``$\|$'' is the logical {\tt OR} operator. Thus
	\begin{verbatim}
	TYPE([z]gemat\|[z]gbmat) :: mat
	\end{verbatim}
	declares that {\tt mat} can be either of type {\tt gemat}, {\tt
	zgemat}, {\tt pbmat} or {\tt zpbmat}.
	\item In a same declaration block, if a scalar or array of type {\tt
	DOUBLE PRECISION\|COMPLEX} is declared together with a matrix object
	which can be also complex, both should be either real
	or complex. For example in the routine {\tt updtmat}, if {\tt mat}
	of type {\tt zgbmat}, {\tt val} should be complex.
	\end{itemize}

	\subsection{init}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE init(n, mat, istart, iend)
	TYPE([z]spmat) :: mat
	INTEGER, INTENT(in), OPTIONAL :: istart, iend
	INTEGER, INTENT(in) :: n
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Initialize an empty sparse matrix of $n$ rows.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	n : rank of matrix
	mat : matrix object
	istart, iend : range of row indices. By default istart=1, iend=n
	\end{verbatim}
	\end{description}

	\subsection{destroy}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE destroy(mat)
	TYPE([z]spmat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Free matrix memory
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}

	\subsection{updmat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE updtmat(mat, i, j, val)
	TYPE([z]spmat) :: mat
	INTEGER, INTENT(IN) :: i, j
	DOUBLE PRECISION\|COMPLEX, INTENT(IN) :: val
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Update (accumulate) an existing element $A_{ij}$ or insert it in the
	linked list in an increasing order in the column index j.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : row index
	j : column index
	val : input value
	\end{verbatim}
	\end{description}

	\subsection{putele}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE putele(mat, i, j, val, nlforce_zero)
	TYPE([z]pbmat) :: mat
	INTEGER, INTENT(IN) :: i, j
	DOUBLE PRECISION\|COMPLEX, INTENT(IN) :: val
	LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Overwrite an existing element $A_{ij}$ or insert it in the
	linked list in an increasing order in the column index j.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : row index
	j : column index
	val : input value
	nlforce_zero : Never remove an existing element when input is zero if TRUE
	FALSE by default
	\end{verbatim}
	\end{description}

	\subsection{putrow}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE putrow(mat, i, arr, col, nlforce_zero)
	TYPE([z]spmat) :: mat
	INTEGER, INTENT(IN) :: i
	DOUBLE PRECISION\|COMPLEX, INTENT(IN) :: arr(:)
	INTEGER, INTENT(in), OPTIONAL :: col(:)
	LOGICAL, INTENT(in), OPTIONAL :: nlforce_zero
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Overwrite a matrix row
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : row index
	arr : input array
	col : input array containing column indices
	nlforce_zero : Never remove an existing element when input is zero if TRUE
	FALSE by default
	\end{verbatim}
	\end{description}

	\subsection{putcol}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE putcol(mat, j, arr, nlforce_zero)
	TYPE([z]spmat) :: mat
	INTEGER, INTENT(IN) :: j
	DOUBLE PRECISION\|COMPLEX, INTENT(IN) :: arr(:)
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Overwrite a matrix row
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	j : column index
	arr : input array
	nlforce_zero : Never remove an existing non-zero element if .TRUE.
	.FALSE. by default
	\end{verbatim}
	\end{description}

	\subsection{getele}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE getele(mat, i, j, val)
	TYPE([z]spmat) :: mat
	INTEGER, INTENT(IN) :: i, j
	DOUBLE PRECISION\|COMPLEX, INTENT(OUT) :: val
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Get element $A_{ij}$
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : row index
	j : column index
	val : output value
	\end{verbatim}
	\end{description}

	\subsection{getrow}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE getrow(mat, i, arr, col)
	TYPE([z]spmat) :: mat
	INTEGER, INTENT(IN) :: i
	DOUBLE PRECISION\|COMPLEX, INTENT(OUT) :: arr(:)
	INTEGER, INTENT(out), OPTIONAL :: col(:)
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Get a matrix row and optionally the column indices
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : row index
	arr : output array
	col : output array containing column indices
	\end{verbatim}
	\end{description}

	\subsection{getcol}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE getcol(mat, j, arr)
	TYPE([z]spmat) :: mat
	INTEGER, INTENT(IN) :: j
	DOUBLE PRECISION\|COMPLEX, INTENT(OUT) :: arr(:)
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Get a matrix column
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	i : column index
	arr : output array
	\end{verbatim}
	\end{description}

	\subsection{get\_count}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	INTEGER FUNCTION get_count(mat, nnz)
	TYPE([z]spmat) :: mat
	INTEGER, INTENT(out), OPTIONAL :: nnz(:)
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Returns the number of non-zeros and optionally an array of numbers
	of non-zeros on each row
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	nnz : array containing numbers of non-zeros on each row.
	\end{verbatim}
	\end{description}

	\section{{\tt PARDISO\_BSPLINES} Reference}
	\label{pardisoRef}
	The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
	getcol, vmx, mcopy, maddto} and {\tt destroy}
	have \emph{exactly} the same list of arguments as
	those from the {\tt MATRIX} module (as documented in
	Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
	that have different arguments. The same conventions as before are used
	for the routine description.

	\subsection{init}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
	& nlforce_zero)
	INTEGER, INTENT(in) :: n, nterms
	TYPE([z]pardiso_mat) :: mat
	INTEGER, OPTIONAL, INTENT(in) :: kmat
	LOGICAL, OPTIONAL, INTENT(in) :: nlsym
	LOGICAL, OPTIONAL, INTENT(in) :: nlpos
	LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Initialize the PARDISO solver. A SPMAT matrix of $n$ empty rows is initialized.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	n : rank of matrix
	nterms : number of terms in weak form
	kmat : matrix id
	mat : matrix object
	nlsym : symmetric or not. Default is .FALSE.
	nlherm : Hermitian or not for complex matrix . Default is .FALSE.
	nlpos : Positive-definite or not. Default is .TRUE.
	nlforce_zero : Never remove an existing non-zero element if .TRUE.
	.TRUE. by default
	\end{verbatim}
	\end{description}

	\subsection{clear\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE clear_mat(mat)
	TYPE([z]pardiso_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Clear matrix, keeping its sparse structure unchanged
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}

	\subsection{psum\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE sum_mat(mat, comm)
	TYPE([z]pardiso_mat) :: mat
	INTEGER, INTENT(in) :: comm
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Parallel sum of matrices. Result matrix is placed in the sparse
	matrix mat\%mat on all processes of comm.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	comm : communicator
	\end{verbatim}
	\end{description}

	\subsection{p2p\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
	TYPE([z]pardiso_mat) :: mat
	INTEGER, INTENT(in) :: dest
	CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
	CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt')
	INTEGER, INTENT(in) :: comm
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Point-to-point combine sparse matrix between 2 processes.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	dest : rank of remote process
	extyp : exchange type ('send', 'recv', 'sendrecv')
	op : operation type ('put', 'updt')
	comm : communicator
	\end{verbatim}
	\end{description}

	\subsection{get\_count}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	INTEGER FUNCTION get_count(mat, nnz)
	TYPE([z]pardiso_mat) :: mat
	INTEGER, INTENT(out), OPTIONAL :: nnz(:)
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Returns the number of non-zeros and optionally an array of numbers
	of non-zeros on each row
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	nnz : array containing numbers of non-zeros on each row.
	\end{verbatim}
	\end{description}

	\subsection{factor}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE factor(mat, nlreord, nlmetis, debug)
	TYPE([z]pardiso_mat) :: mat
	LOGICAL, OPTIONAL, INTENT(in) :: nlreord
	LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
	LOGICAL, OPTIONAL, INTENT(in) :: debug
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Wrapper of to\_mat, reord\_mat and numfact
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	nlreord : call reord_mat if .TRUE. (default is .TRUE.)
	nlmetis : use METIS nested dissection for reoredering. Default
	is minimum degree alogorithm.
	debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
	\end{verbatim}
	\end{description}

	\subsection{bsolve}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE bsolve_pardiso_mat1(mat, rhs, sol, nref, debug)
	TYPE([z]pardiso_mat) :: mat
	DOUBLE PRECISION\|COMPLEX :: rhs(:)
	DOUBLE PRECISION\|COMPLEX, OPTIONAL :: sol(:)
	INTEGER, OPTIONAL :: nref
	LOGICAL, OPTIONAL, INTENT(in) :: debug
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Wrapper of to\_mat, reord\_mat and numfact
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	rhs : input right-hand-side, overwriten by the solution if sol is not present
	sol : contains solution
	ref : maximum number of refinement steps. Default is 0 (no refinement).
	debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
	\end{verbatim}
	\end{description}

	\subsection{to\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE to_mat(mat)
	TYPE([z]pardiso_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Convert linked list spmat to pardiso matrix structure
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}

	\subsection{reord\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE reord_mat(mat, nlmetis, debug)
	TYPE([z]pardiso_mat) :: mat
	LOGICAL, OPTIONAL, INTENT(in) :: nlmetis
	LOGICAL, OPTIONAL, INTENT(in) :: debug
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Reordering and symbolic factorization
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	nlmetis : use METIS nested dissection for reoredering. Default
	is minimum degree alogorithm.
	debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
	\end{verbatim}
	\end{description}

	\subsection{numfact}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE numfact(mat, debug)
	TYPE([z]pardiso_mat) :: mat
	LOGICAL, OPTIONAL, INTENT(in) :: debug
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Numerical factorization
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	debug : verbose output from PARDISO if .TRUE. Default is .FALSE.
	\end{verbatim}
	\end{description}

	\section{{\tt [P]WSMP\_BSPLINES} Reference}
	\label{wsmpRef}
	The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
	getcol, vmx, mcopy, maddto} and {\tt destroy}
	have \emph{exactly} the same list of arguments as
	those from the {\tt MATRIX} module (as documented in
	Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
	that have different arguments. The same conventions as before are used
	for the routine description.

	\subsection{init}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
	& nlforce_zero, [comm_in])
	INTEGER, INTENT(in) :: n, nterms
	TYPE([z]wsmp_mat) :: mat
	INTEGER, OPTIONAL, INTENT(in) :: kmat
	LOGICAL, OPTIONAL, INTENT(in) :: nlsym
	LOGICAL, OPTIONAL, INTENT(in) :: nlpos
	LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
	INTEGER, OPTIONAL, INTENT(in) :: comm_in
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Initialize the WSMP solver. A SPMAT matrix of $n$ empty rows is initialized.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	n : rank of matrix
	nterms : number of terms in weak form
	kmat : matrix id
	mat : matrix object
	nlsym : symmetric or not. Default is .FALSE.
	nlherm : Hermitian or not for complex matrix . Default is .FALSE.
	nlpos : Positive-definite or not. Default is .TRUE.
	nlforce_zero : Never remove an existing non-zero element if .TRUE.
	.TRUE. by default
	comm_in : MPI communicator. By default MPI_COMM_WORLD (only in PWSMP_BSPLINES)
	\end{verbatim}
	\end{description}

	\subsection{clear\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE clear_mat(mat)
	TYPE([z]wsmp_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Clear matrix, keeping its sparse structure unchanged
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}

	\subsection{psum\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE sum_mat(mat, comm)
	TYPE([z]wsmp_mat) :: mat
	INTEGER, INTENT(in) :: comm
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Parallel sum of matrices. Result matrix is placed in the sparse
	matrix mat\%mat on all processes of comm.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	comm : communicator
	\end{verbatim}
	\end{description}

	\subsection{p2p\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
	TYPE([z]wsmp_mat) :: mat
	INTEGER, INTENT(in) :: dest
	CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
	CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt')
	INTEGER, INTENT(in) :: comm
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Point-to-point combine sparse matrix between 2 processes.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	dest : rank of remote process
	extyp : exchange type ('send', 'recv', 'sendrecv')
	op : operation type ('put', 'updt')
	comm : communicator
	\end{verbatim}
	\end{description}

	\subsection{get\_count}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	INTEGER FUNCTION get_count(mat, nnz)
	TYPE([z]wsmp_mat) :: mat
	INTEGER, INTENT(out), OPTIONAL :: nnz(:)
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Returns the number of non-zeros and optionally an array of numbers
	of non-zeros on each row
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	nnz : array containing numbers of non-zeros on each row.
	\end{verbatim}
	\end{description}

	\subsection{factor}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE factor(mat, nlreord)
	TYPE([z]wsmp_mat) :: mat
	LOGICAL, OPTIONAL, INTENT(in) :: nlreord
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Wrapper of to\_mat, reord\_mat and numfact
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	nlreord : call reord_mat if .TRUE. (default is .TRUE.)
	\end{verbatim}
	\end{description}

	\subsection{bsolve}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE bsolve_wsmp_mat1(mat, rhs, sol, nref)
	TYPE([z]wsmp_mat) :: mat
	DOUBLE PRECISION\|COMPLEX :: rhs(:)
	DOUBLE PRECISION\|COMPLEX, OPTIONAL :: sol(:)
	INTEGER, OPTIONAL :: nref
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Wrapper of to\_mat, reord\_mat and numfact
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	rhs : input right-hand-side, overwriten by the solution if sol is not present
	sol : contains solution
	ref : maximum number of refinement steps. Default is 0 (no refinement).
	debug : verbose output from WSMP if .TRUE. Default is .FALSE.
	\end{verbatim}
	\end{description}

	\subsection{to\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE to_mat(mat)
	TYPE([z]wsmp_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Convert linked list spmat to wsmp matrix structure
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}

	\subsection{reord\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE reord_mat(mat)
	TYPE([z]wsmp_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Reordering and symbolic factorization
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}

	\subsection{numfact}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE numfact(mat)
	TYPE([z]wsmp_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Numerical factorization
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}

	\section{{\tt MUMPS\_BSPLINES} Reference}
	\label{mumpsRef}
	The subroutines {\tt updmat, putele, putrow, putcol, getele, getrow,
	getcol, vmx, mcopy, maddto} and {\tt destroy}
	have \emph{exactly} the same list of arguments as
	those from the {\tt MATRIX} module (as documented in
	Appendix~\ref{matRef}), except for the matrix types. Below, we show only the routines
	that have different arguments. The same conventions as before are used
	for the routine description.

	\subsection{init}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE init(n, nterms, mat, kmat, nlsym, [nlherm,] nlpos, &
	& nlforce_zero, comm_in)
	INTEGER, INTENT(in) :: n, nterms
	TYPE([z]mumps_mat) :: mat
	INTEGER, OPTIONAL, INTENT(in) :: kmat
	LOGICAL, OPTIONAL, INTENT(in) :: nlsym
	LOGICAL, OPTIONAL, INTENT(in) :: nlpos
	LOGICAL, OPTIONAL, INTENT(in) :: nlforce_zero
	INTEGER, OPTIONAL, INTENT(in) :: comm_in
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Initialize the MUMPS solver. A SPMAT matrix of $n$ empty rows is initialized.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	n : rank of matrix
	nterms : number of terms in weak form
	kmat : matrix id
	mat : matrix object
	nlsym : symmetric or not. Default is .FALSE.
	nlherm : Hermitian or not for complex matrix . Default is .FALSE.
	nlpos : Positive-definite or not. Default is .TRUE.
	nlforce_zero : Never remove an existing non-zero element if .TRUE.
	.TRUE. by default
	comm_in : MPI communicator. By default MPI_COMM_SELF (serial mode).
	\end{verbatim}
	\end{description}

	\subsection{clear\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE clear_mat(mat)
	TYPE([z]mumps_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Clear matrix, keeping its sparse structure unchanged
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}

	\subsection{psum\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE sum_mat(mat, comm)
	TYPE([z]mumps_mat) :: mat
	INTEGER, INTENT(in) :: comm
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Parallel sum of matrices. Result matrix is placed in the sparse
	matrix mat\%mat on all processes of comm.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	comm : communicator
	\end{verbatim}
	\end{description}

	\subsection{p2p\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE p2p_mat(mat, dest, extyp, op, comm)
	TYPE([z]mumps_mat) :: mat
	INTEGER, INTENT(in) :: dest
	CHARACTER(len=*), INTENT(in) :: extyp ! ('send', 'recv', 'sendrecv')
	CHARACTER(len=*), INTENT(in) :: op ! ('put', 'updt')
	INTEGER, INTENT(in) :: comm
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Point-to-point combine sparse matrix between 2 processes.
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	dest : rank of remote process
	extyp : exchange type ('send', 'recv', 'sendrecv')
	op : operation type ('put', 'updt')
	comm : communicator
	\end{verbatim}
	\end{description}

	\subsection{get\_count}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	INTEGER FUNCTION get_count(mat, nnz)
	TYPE([z]mumps_mat) :: mat
	INTEGER, INTENT(out), OPTIONAL :: nnz(:)
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Returns the number of non-zeros and optionally an array of numbers
	of non-zeros on each row
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	nnz : array containing numbers of non-zeros on each row.
	\end{verbatim}
	\end{description}

	\subsection{factor}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE factor(mat, nlreord)
	TYPE([z]mumps_mat) :: mat
	LOGICAL, OPTIONAL, INTENT(in) :: nlreord
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Wrapper of to\_mat, reord\_mat and numfact
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	nlreord : call reord_mat if .TRUE. (default is .TRUE.)
	\end{verbatim}
	\end{description}

	\subsection{bsolve}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE bsolve_mumps_mat1(mat, rhs, sol, nref)
	TYPE([z]mumps_mat) :: mat
	DOUBLE PRECISION\|COMPLEX :: rhs(:)
	DOUBLE PRECISION\|COMPLEX, OPTIONAL :: sol(:)
	INTEGER, OPTIONAL :: nref
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Wrapper of to\_mat, reord\_mat and numfact
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	rhs : input right-hand-side, overwriten by the solution if sol is not present
	sol : contains solution
	ref : maximum number of refinement steps. Default is 0 (no refinement).
	debug : verbose output from MUMPS if .TRUE. Default is .FALSE.
	\end{verbatim}
	\end{description}

	\subsection{to\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE to_mat(mat)
	TYPE([z]mumps_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Convert linked list spmat to mumps matrix structure
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}

	\subsection{reord\_mat}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE reord_mat(mat)
	TYPE([z]mumps_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Reordering and symbolic factorization
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}

	\subsection{numfact}
	\par
	\addvspace{\medskipamount}
	\nopagebreak\hrule
	\begin{verbatim}
	SUBROUTINE numfact(mat)
	TYPE([z]mumps_mat) :: mat
	\end{verbatim}
	\nopagebreak\hrule
	\addvspace{\medskipamount}
	\begin{description}
	\item[Purpose:] \mbox{}
	Numerical factorization
	\item[Arguments:] \mbox{}
	\begin{verbatim}
	mat : matrix object
	\end{verbatim}
	\end{description}


	\begin{thebibliography}{99}
	\bibitem{BSPLINES} {\tt BSPLINES} Reference Guide.
	\bibitem{PARDISO} \url{http://www.pardiso-project.org/}
	\bibitem{WSMP} \url{http://www-users.cs.umn.edu/~agupta/wsmp.html}
	\bibitem{MUMPS} \url{http://graal.ens-lyon.fr/MUMPS/}
	\bibitem{McMillan} B. F. McMillan, et. al. \emph{Rapid Fourier space
	solution of linear partial integro-differential equations in
	toroidal magnetic confinement geometries}, Computer Physics
	Communications 181(4),
	715-719 (2010)
	\end{thebibliography}

	\end{document}

solvers.texNo OneTemporaryActions

File Metadata

solvers.texView Options

Event Timeline

solvers.tex
No OneTemporary
Actions

solvers.tex
View Options