!>
!> @file pde2d_wsmp.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see <https://www.gnu.org/licenses/>.
!>
!> @authors
!> (in alphabetical order)
!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
!>
!
!  Solving the 2d PDE using splines and WSMP non-symmetric matrix
!
!    -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my), with f(x=1,y) = 0
!    exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
MODULE pde2d_wsmp_mod
  USE bsplines
  USE wsmp_bsplines
  IMPLICIT NONE
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  SUBROUTINE dismat(spl, mat)
!
!   Assembly of FE matrix mat using spline spl
!
    TYPE(spline2d), INTENT(in)       :: spl
    TYPE(wsmp_mat), INTENT(inout)    :: mat
!
    INTEGER :: n1, nidbas1, ndim1, ng1
    INTEGER :: n2, nidbas2, ndim2, ng2
    INTEGER :: i, j, ig1, ig2
    INTEGER :: iterm, iw1, iw2, igw1, igw2, it1, it2, igt1, igt2, irow, jcol
    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:,:)
    DOUBLE PRECISION:: contrib
!
    INTEGER :: kterms         ! Number of terms in weak form
    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
    DOUBLE PRECISION, ALLOCATABLE  :: coefs(:,:,:)          ! Terms in weak form
    INTEGER, ALLOCATABLE :: left1(:), left2(:)
!===========================================================================
!              1.0 Prologue
!
!   Properties of spline space
!
    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
    WRITE(*,'(a, 5i6)') 'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
!
!   Gauss quadature
!
    CALL get_gauss(spl%sp1, ng1)
    CALL get_gauss(spl%sp2, ng2)
    ALLOCATE(xg1(ng1), wg1(ng1))
    ALLOCATE(xg2(ng1), wg2(ng1))
!
!   Weak form
!
    kterms = mat%nterms
    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
    ALLOCATE(coefs(kterms,ng1,ng2))
!
    ALLOCATE(fun1(0:nidbas1,0:1,ng1)) ! Spline and 1st derivative
    ALLOCATE(fun2(0:nidbas2,0:1,ng2)) ! 
!===========================================================================
!              2.0 Assembly loop
!
    ALLOCATE(left1(ng1))
    ALLOCATE(left2(ng2))
    DO i=1,n1
       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
       left1 = i
       CALL basfun(xg1, spl%sp1, fun1, left1)
       DO j=1,n2
          CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
          left2 = j
          CALL basfun(xg2, spl%sp2, fun2, left2)
!
          DO ig1=1,ng1
             DO ig2=1,ng2
                CALL coefeq(xg1(ig1), xg2(ig2), &
                     &      idert(:,:,ig1,ig2), &
                     &      iderw(:,:,ig1,ig2), &
                     &      coefs(:,ig1,ig2))
             END DO
          END DO
!
          DO iw1=0,nidbas1  ! Weight function in dir 1
             igw1 = i+iw1
             DO iw2=0,nidbas2  ! Weight function in dir 2
                igw2 = MODULO(j+iw2-1, n2) + 1
                irow = igw2 + (igw1-1)*n2
                DO it1=0,nidbas1  ! Test function in dir 1
                   igt1 = i+it1
                   DO it2=0,nidbas2  ! Test function in dir 2
                      igt2 = MODULO(j+it2-1, n2) + 1
                      jcol = igt2 + (igt1-1)*n2
!-------------
                      contrib = 0.0d0
                      DO ig1=1,ng1
                         DO ig2=1,ng2
                            DO iterm=1,kterms
                               contrib = contrib + &
                                    &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                                    &    fun2(iw2,iderw(iterm,2,ig1,ig2),ig2) * &
                                    &    coefs(iterm,ig1,ig2) *                 &
                                    &    fun2(it2,idert(iterm,2,ig1,ig2),ig2) * &
                                    &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                                    &    wg1(ig1) * wg2(ig2)
                            END DO
                         END DO
                      END DO
                      CALL updtmat(mat, irow, jcol, contrib)
!-------------
                   END DO
                END DO
             END DO
          END DO
       END DO
    END DO
!===========================================================================
!              9.0  Epilogue
!
    DEALLOCATE(xg1, wg1, fun1)
    DEALLOCATE(xg2, wg2, fun2)
    DEALLOCATE(idert, iderw, coefs)
    DEALLOCATE(left1,left2)
!
  CONTAINS
    SUBROUTINE coefeq(x, y, idt, idw, c)
      DOUBLE PRECISION, INTENT(in) :: x, y
      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
      DOUBLE PRECISION, INTENT(out) :: c(SIZE(idt,1))
!
! Weak form = Int(x*dw/dx*dt/dx + 1/x*dw/dy*dt/dy)dxdy
!
      c(1) = x        ! 
      idt(1,1) = 1
      idt(1,2) = 0
      idw(1,1) = 1
      idw(1,2) = 0
      !
      c(2) = 1.d0/x
      idt(2,1) = 0
      idt(2,2) = 1
      idw(2,1) = 0
      idw(2,2) = 1
    END SUBROUTINE coefeq
  END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  SUBROUTINE disrhs(mbess, spl, rhs)
!
!   Assembly the RHS using 2d spline spl
!
    INTEGER, INTENT(in) :: mbess
    TYPE(spline2d), INTENT(in) :: spl
    DOUBLE PRECISION, INTENT(out) :: rhs(:)
    INTEGER :: n1, nidbas1, ndim1, ng1
    INTEGER :: n2, nidbas2, ndim2, ng2
    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
    DOUBLE PRECISION:: contrib
!===========================================================================
!              1.0 Prologue
!
!   Properties of spline space
!
    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
!
    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
!   Gauss quadature
!
    CALL get_gauss(spl%sp1, ng1)
    CALL get_gauss(spl%sp2, ng2)
    ALLOCATE(xg1(ng1), wg1(ng1))
    ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
!              2.0 Assembly loop
!
    nrank = SIZE(rhs)
    rhs(1:nrank) = 0.0d0
!
    DO i=1,n1
       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
       DO ig1=1,ng1
          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
          DO j=1,n2
             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
             DO ig2=1,ng2
                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                contrib = wg1(ig1)*wg2(ig2) * &
                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
                DO k1=0,nidbas1
                   i1 = i+k1
                   DO k2=0,nidbas2
                      j2 = MODULO(j+k2-1,n2) + 1
                      ij = j2 + (i1-1)*n2
                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                   END DO
                END DO
             END DO
          END DO
       END DO
    END DO
!===========================================================================
!              9.0  Epilogue
!
    DEALLOCATE(xg1, wg1, fun1)
    DEALLOCATE(xg2, wg2, fun2)
!
  CONTAINS
    DOUBLE PRECISION FUNCTION rhseq(x1, x2, m)
      DOUBLE PRECISION, INTENT(in) :: x1, x2
      INTEGER, INTENT(in) :: m
      rhseq = REAL(4*(m+1),8)*x1**(m+1)*COS(REAL(m,8)*x2)
    END FUNCTION rhseq
  END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  SUBROUTINE ibcmat(mat, ny)
!
!   Apply BC on matrix
!
    TYPE(wsmp_mat), INTENT(inout) :: mat
    INTEGER, INTENT(in) :: ny
    INTEGER :: nrank, i, j
    DOUBLE PRECISION, ALLOCATABLE :: zsum(:), arr(:)
!===========================================================================
!              1.0 Prologue
!
    nrank = mat%rank
    ALLOCATE(zsum(nrank), arr(nrank))
!===========================================================================
!              2.0 Unicity at the axis
!
!   The vertical sum on the NY-th row
!
    zsum = 0.0d0
    DO i=1,ny
       arr = 0.0d0
       CALL getrow(mat, i, arr)
       zsum(:) = zsum(:) + arr(:)
    END DO
    CALL putrow(mat, ny, zsum)
!
!   The horizontal sum on the NY-th column
!
    zsum = 0.0d0
    DO j=1,ny
       arr = 0.0d0
       CALL getcol(mat, j, arr)
       zsum(ny:) = zsum(ny:) + arr(ny:)
    END DO
    CALL putcol(mat, ny, zsum)
!
!   The away operator
!
    DO j = 1,ny-1
       arr = 0.0d0; arr(j) = 1.0d0
       CALL putcol(mat, j, arr)     
    END DO
!
    DO i = 1,ny-1
       arr = 0.0d0; arr(i) = 1.0d0
       CALL putrow(mat, i, arr)     
    END DO
!  
!===========================================================================
!              3.0 Dirichlet on right boundary
!
    DO j = nrank, nrank-ny+1, -1
       arr = 0.0d0; arr(j) = 1.0d0
       CALL putcol(mat, j, arr)     
    END DO
!
    DO i = nrank, nrank-ny+1, -1
       arr = 0.0d0; arr(i) = 1.0d0
       CALL putrow(mat, i, arr)     
    END DO
!===========================================================================
!              9.0  Epilogue
!
    DEALLOCATE(zsum, arr)
!
  END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  SUBROUTINE ibcrhs(rhs, ny)
!
!   Apply BC on RHS
!
    DOUBLE PRECISION, INTENT(inout) :: rhs(:)
    INTEGER, INTENT(in) :: ny
    INTEGER :: nrank
    DOUBLE PRECISION :: zsum
!===========================================================================
!              1.0 Prologue
!
    nrank = SIZE(rhs,1)
!===========================================================================
!              2.0 Unicity at the axis
!
!   The vertical sum
!
    zsum = SUM(rhs(1:ny))
    rhs(ny) = zsum
    rhs(1:ny-1) = 0.0d0
!===========================================================================
!              3.0 Dirichlet on right boundary
!
    rhs(nrank-ny+1:nrank) = 0.0d0
  END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_wsmp_mod
PROGRAM main
  USE pde2d_wsmp_mod
  USE futils
!
  IMPLICIT NONE
  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms
  LOGICAL :: nlppform
  INTEGER :: i, j, ij, dimx, dimy, nrank, jder(2), it
  DOUBLE PRECISION :: pi, coefx(5), coefy(5)
  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: newsol
  TYPE(spline2d) :: splxy
  TYPE(wsmp_mat) :: mat
!
  CHARACTER(len=128) :: file='pde2d_wsmp.h5'
  INTEGER :: fid
  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr
  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
  DOUBLE PRECISION :: seconds, mem, dopla
  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2
  DOUBLE PRECISION :: tconv, treord
  INTEGER :: nits=100
  LOGICAL :: nlforce_zero
  LOGICAL :: nlserial
!
  NAMELIST /newrun/ nx, ny, nidbas, ngauss, mbess, nlppform, &
       &            nlforce_zero, nlserial, coefx, coefy
!===========================================================================
!              1.0 Prologue
!
!   Read in data specific to run
!
  nx = 8              ! Number of intervals in x
  ny = 8              ! Number of intervals in y
  nidbas = (/3,3/)    ! Degree of splines
  ngauss = (/4,4/)    ! Number of Gauss points/interval
  mbess = 2           ! Exponent of differential problem
  nterms = 2          ! Number of terms in weak form
  nlppform = .TRUE.   ! Use PPFORM for gridval or not
  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
  READ(*,newrun)
  WRITE(*,newrun)
!
!   Define grid on x (=radial) & y (=poloidal) axis
!
  pi = 4.0d0*ATAN(1.0d0)
  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
  CALL meshdist(coefx, xgrid, nx)
  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
  CALL meshdist(coefy, ygrid, ny)
!
!   Create hdf5 file
!
  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
  CALL attach(fid, '/', 'NX', nx)
  CALL attach(fid, '/', 'NY', ny)
  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
  CALL attach(fid, '/', 'MBESS', mbess)
!===========================================================================
!              2.0 Discretize the PDE
!
!   Set up spline
!
  t0 = seconds()
  CALL set_spline(nidbas, ngauss, &
       &          xgrid, ygrid, splxy, (/.FALSE., .TRUE./), nlppform=nlppform)
!
!   FE matrix assembly
!
  nrank = (nx+nidbas(1))*ny    ! Rank of the FE matrix
  WRITE(*,'(a,i8)') 'nrank', nrank
!
  CALL init(nrank, nterms, mat, nlforce_zero=nlforce_zero)
  CALL dismat(splxy, mat)
  ALLOCATE(arr(nrank))
  IF(nrank.LT.100) THEN
     DO i=1,nrank
        CALL getele(mat, i, i, arr(i))
     END DO
     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix before BC', arr
  END IF
!
!   BC on Matrix
!
  WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
  WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
  CALL ibcmat(mat, ny)
  IF(nrank.LT.100) THEN
     DO i=1,nrank
        CALL getele(mat, i, i, arr(i))
     END DO
     WRITE(*,'(a/(10(1pe12.3)))') 'Diag. of matrix after BC', arr
     WRITE(*,'(a)') 'Last rows'
     DO i=nrank-ny,nrank
        CALL getrow(mat, i, arr)
        WRITE(*,'(10(1pe12.3))') arr
     END DO
  END IF
  tmat = seconds() - t0
!
!   RHS assembly
!
  ALLOCATE(rhs(nrank), sol(nrank))
  CALL disrhs(mbess, splxy, rhs)
!
!   BC on RHS
!
  CALL ibcrhs(rhs, ny)
!
  CALL putarr(fid, '/RHS', rhs, 'RHS of linear system')
  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
!===========================================================================
!              3.0 Solve the dicretized system
!
  t0 = seconds()
  CALL to_mat(mat)
  tconv = seconds() -t0
  WRITE(*,'(/a,l4)') 'associated(mat%mat)', ASSOCIATED(mat%mat)
  WRITE(*,'(a,1pe12.3)') 'Mem used (MB) after TO_MAT', mem()
!
  t0 = seconds()
  CALL reord_mat(mat)
  CALL putmat(fid, '/MAT', mat)
  treord = seconds() - t0
!
  t0 = seconds()
  CALL numfact(mat)
  tfact = seconds() - t0
 
  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
  WRITE(*,'(/a,i6)') 'Number of nonzeros in factor (/1000) = ',mat%p%iparm(24)
  WRITE(*,'(a,1pe12.3)')  'Number of factorization GFLOPS      = ',mat%p%dparm(23)/1.d9
  gflops1 = mat%p%dparm(23) / tfact / 1.d9
!
  CALL bsolve(mat, rhs, sol)
  t0 = seconds()
  DO it=1,nits   ! nits iterations for timing
     CALL bsolve(mat, rhs, sol)
     sol(1:ny-1) = sol(ny)
  END DO
  WRITE(*,'(/a,i6)') 'Number of refinement steps = ',mat%p%iparm(26)
  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
  tsolv = (seconds() - t0)/REAL(nits)
!
!   Spline coefficients, taking into account of periodicity in y
!   Note: in SOL, y was numbered first.
!
  dimx = splxy%sp1%dim
  dimy = splxy%sp2%dim
  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
  DO j=0,dimy-1
     DO i=0,dimx-1
        ij = MODULO(j,ny) + i*ny + 1
        bcoef(i,j) = sol(ij)
     END DO
  END DO
  WRITE(*,'(a,2(1pe12.3))') ' Integral of sol', fintg(splxy, bcoef)
  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!===========================================================================
!              4.0 Check the solution
!
!   Check function values computed with various method
!
  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
  DO i=0,nx
     DO j=0,ny
        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
     END DO
  END DO
  jder = (/0,0/)
!
!   Compute PPFORM/BCOEFS at first call to gridval
  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
  WRITE(*,'(/a)') '*** Checking solutions'
  t0 = seconds()
  DO it=1,nits   ! nits iterations for timing
     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
  END DO
  tgrid = (seconds() - t0)/REAL(nits)
  errsol = solana - solcal
  IF( SIZE(bcoef,2) .LE. 10 ) THEN
     CALL prnmat('BCOEF', bcoef)
     CALL prnmat('SOLANA', solana)
     CALL prnmat('SOLCAL', solcal)
     CALL prnmat('ERRSOL', errsol)
  END IF
  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
       &    norm2(errsol) / norm2(solana)
  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
!
  CALL putarr(fid, '/xgrid', xgrid, 'r')
  CALL putarr(fid, '/ygrid', ygrid, '\theta')
  CALL putarr(fid, '/sol', solcal, 'Solutions')
  CALL putarr(fid, '/solana', solana,'Exact solutions')
  CALL putarr(fid, '/errors', errsol, 'Errors')
!
!   Check derivatives d/dx and d/dy
!
  WRITE(*,'(/a)') '*** Checking gradient'
  DO i=0,nx
     DO j=0,ny
        IF( mbess .EQ. 0 ) THEN
           solana(i,j) = -2.0d0 * xgrid(i)
        ELSE
           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
        END IF
     END DO
  END DO
!
  jder = (/1,0/)
  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
  errsol = solana - solcal
  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
!
  DO i=0,nx
     DO j=0,ny
        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
     END DO
  END DO
!
  jder = (/0,1/)
  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
  errsol = solana - solcal
  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
!
  WRITE(*,'(/a)') '---'
  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
  WRITE(*,'(a,1pe12.3)') 'Matrice conversion time (s)   ', tconv
  WRITE(*,'(a,1pe12.3)') 'Matrice reorder time (s)      ', treord
  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
  WRITE(*,'(a,1pe12.3)') 'conver. +reorder + factor (s) ', tfact+treord+tconv
  WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
!===========================================================================
!              5.0 Clear the matrix and recompute
!
  WRITE(*,'(/a)') 'Recompute the solver ...'
  t0 = seconds()
  CALL clear_mat(mat)
  CALL dismat(splxy, mat)
  CALL ibcmat(mat, ny)
  tmat = seconds()-t0
!
  t0 = seconds()
!!$  CALL numfact(mat)
  CALL factor(mat, nlreord=.FALSE.)
  tfact = seconds()-t0
  gflops1 = mat%p%dparm(23) / tfact / 1.d9
!
  t0 = seconds()
  ALLOCATE(newsol(nrank))
  CALL bsolve(mat, rhs, newsol)
  newsol(1:ny-1) = newsol(ny)
  tsolv = seconds()-t0
!
  WRITE(*,'(/a, 1pe12.3)') 'Error =', SQRT(DOT_PRODUCT(newsol-sol,newsol-sol))
  WRITE(*,'(/a)') '---'
  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
  WRITE(*,'(a,1pe12.3)') 'Backsolve(2) time (s)         ', tsolv
  WRITE(*,'(a,1pe12.3)') 'Total (s)                     ', tmat+tfact+tsolv
  WRITE(*,'(a,2f10.3)') 'Factor  Gflop/s', gflops1
!
  DEALLOCATE(newsol)
!===========================================================================
!              9.0  Epilogue
!
  DEALLOCATE(xgrid, rhs, sol)
  DEALLOCATE(solcal, solana, errsol)
  DEALLOCATE(bcoef)
  DEALLOCATE(arr)
  CALL destroy_sp(splxy)
  CALL destroy(mat)
!
  CALL closef(fid)
!===========================================================================
!
CONTAINS
  FUNCTION norm2(x)
!
!  Compute the 2-norm of array x
!
    IMPLICIT NONE
    DOUBLE PRECISION :: norm2
    DOUBLE PRECISION, INTENT(in) :: x(:,:)
    DOUBLE PRECISION :: sum2
    INTEGER :: i, j
!
    sum2 = 0.0d0
    DO i=1,SIZE(x,1)
       DO j=1,SIZE(x,2)
          sum2 = sum2 + x(i,j)**2
       END DO
    END DO
    norm2 = SQRT(sum2)
  END FUNCTION norm2
  SUBROUTINE prnmat(label, mat)
    CHARACTER(len=*) :: label
    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
    INTEGER :: i
    WRITE(*,'(/a)') TRIM(label)
    DO i=1,SIZE(mat,1)
       WRITE(*,'(10(1pe12.3))') mat(i,:)
    END DO
  END SUBROUTINE prnmat
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
!   Construct an 1d  non-equidistant mesh given a
!   mesh distribution function.
!
  IMPLICIT NONE
  DOUBLE PRECISION, INTENT(in) :: c(5)
  INTEGER, INTENT(iN) :: nx
  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
  INTEGER :: nintg
  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
  INTEGER :: i, k
!
  a=x(0)
  b=x(nx)
  nintg = 10*nx
  ALLOCATE(xint(0:nintg), fint(0:nintg))
!
!  Mesh distribution
!
  dx = (b-a)/REAL(nintg)
  xint(0) = a
  fint(0) = 0.0d0
  f1 = fdist(xint(0))
  DO i=1,nintg
     f0 = f1
     xint(i) = xint(i-1) + dx
     f1 = fdist(xint(i))
     fint(i) = fint(i-1) + 0.5*(f0+f1)
  END DO
!
!  Normalization
!
  scal = REAL(nx) / fint(nintg)
  fint(0:nintg) = fint(0:nintg) * scal
!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
!  Obtain mesh point by (inverse) interpolation
!
  k = 1
  DO i=1,nintg-1
     IF( fint(i) .GE. REAL(k) ) THEN
        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
             &   (k-fint(i))
        k = k+1
     END IF
  END DO
!
  DEALLOCATE(xint, fint)
CONTAINS
  DOUBLE PRECISION FUNCTION fdist(x)
    DOUBLE PRECISION, INTENT(in) :: x
    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
  END FUNCTION fdist
END SUBROUTINE meshdist
!+++