!>
!> @file pde2d_sym_pardiso_dft.f90
!>
!> @brief
!>
!> @copyright
!> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne)
!> SPC (Swiss Plasma Center)
!>
!> SPClibs is free software: you can redistribute it and/or modify it under
!> the terms of the GNU Lesser General Public License as published by the Free
!> Software Foundation, either version 3 of the License, or (at your option)
!> any later version.
!>
!> SPClibs is distributed in the hope that it will be useful, but WITHOUT ANY
!> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
!> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
!>
!> You should have received a copy of the GNU Lesser General Public License
!> along with this program. If not, see <https://www.gnu.org/licenses/>.
!>
!> @author
!> (in alphabetical order)
!> @author Trach-Minh Tran <trach-minh.tran@epfl.ch>
!>
!
!  Solving the 2d PDE using splines and PARDISO symmetric matrix.
!  The periodic coordinate y is discrete Fourier transformed.
!
!    -d/dx[x C d/dx]f - 1x/d/dy[Cd/dy] f = \rho, with f(x=1,y) = 0
!     C(x,y) = 1 + \epsilon x cos(y)
!     exact solution: f(x,y) = (1-x^2) x^m cos(my)
!
MODULE pde2d_sym_pardiso_dft_mod
  USE bsplines
  USE pardiso_bsplines
  IMPLICIT NONE
!
CONTAINS
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  SUBROUTINE dismat(spl, epsi, mat)
!
!   Assembly of FE matrix mat using spline spl
!
    TYPE(spline2d), INTENT(in)       :: spl
    DOUBLE PRECISION, INTENT(in)     :: epsi
    TYPE(zpardiso_mat), INTENT(inout) :: mat
!
    INTEGER :: n1, nidbas1, ndim1, ng1
    INTEGER :: n2, nidbas2, ndim2, ng2
    INTEGER :: kmin, kmax, dk
    INTEGER :: i, j, ig1, ig2, kc
    INTEGER :: iterm, iw1, mw, igw1, it1, mt, igt1, irow, jcol
    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:,:)
    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
    DOUBLE COMPLEX, ALLOCATABLE   :: ft_fun2(:,:,:), fft_temp(:)
    DOUBLE COMPLEX :: contrib
!
    INTEGER :: kterms         ! Number of terms in weak form
    INTEGER :: kcoupl         ! Number of mode couplings
    INTEGER, ALLOCATABLE :: idert(:,:,:,:), iderw(:,:,:,:)  ! Derivative order
    DOUBLE COMPLEX, ALLOCATABLE  :: coefs(:,:,:,:)          ! Terms in weak form
    INTEGER, ALLOCATABLE :: left1(:), left2(:)
!===========================================================================
!              1.0 Prologue
!
!   Properties of spline space
!
    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
    kmin = spl%sp2%dft%kmin
    kmax = spl%sp2%dft%kmax
    dk   = spl%sp2%dft%dk
    WRITE(*,'(/a, 5i6)') 'n1, nidbas1, ndim1 =', n1, nidbas1, ndim1
    WRITE(*,'(a, 5i6)')  'n2, nidbas2, ndim2 =', n2, nidbas2, ndim2
    WRITE(*,'(a, 5i6)')  'kmin, kmax, dk     =', kmin, kmax, dk
!
!   Gauss quadature
!
    CALL get_gauss(spl%sp1, ng1)
    CALL get_gauss(spl%sp2, ng2)
    ALLOCATE(xg1(ng1), wg1(ng1))
    ALLOCATE(xg2(ng2), wg2(ng2))
!
!   Weak form
!
    kterms = mat%nterms
    kcoupl = SIZE(spl%sp2%dft%mode_couplings)
    ALLOCATE(idert(kterms,2,ng1,ng2), iderw(kterms,2,ng1,ng2))
    ALLOCATE(coefs(kterms,kcoupl,ng1,ng2))
!
!   Splines and derivatives at all Gauss points
!
    ALLOCATE(fun1(0:nidbas1,0:1,ng1))    ! Spline and 1st derivative
    ALLOCATE(ft_fun2(kmin:kmax,0:1,ng2)) ! DFT of splines and 1st derivative
    ALLOCATE(fft_temp(0:n2-1))           ! Used in coefeq
!===========================================================================
!              2.0 Assembly loop
!
    ALLOCATE(left1(ng1))
    ALLOCATE(left2(ng2))
!
! First interval in 2nd (periodic) coordinate
!
    j = 1
    CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
    left2 = j
    CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)

    DO i=1,n1
       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
       left1 = i
       CALL basfun(xg1, spl%sp1, fun1, left1)
       DO ig1=1,ng1
          DO ig2=1,ng2
             CALL coefeq(xg1(ig1), xg2(ig2), &
                  &      idert(:,:,ig1,ig2), &
                  &      iderw(:,:,ig1,ig2), &
                  &      coefs(:,:,ig1,ig2))
          END DO
       END DO
!
       DO iw1=0,nidbas1     ! Weight function in dir 1
          igw1 = i+iw1
          DO it1=0,nidbas1  ! Test function in dir 1
             igt1 = i+it1
             DO mt=kmin,kmax ! Test Fourier mode
                DO kc=1,kcoupl
                   mw = mt + spl%sp2%dft%mode_couplings(kc)
                   IF(mw.LT.kmin .OR. mw.GT.kmax) CYCLE
!-------------
                   contrib = (0.0d0, 0.0d0)
                   DO ig1=1,ng1
                      DO ig2=1,ng2
                         DO iterm=1,kterms
                            contrib = contrib + &
                              &    fun1(iw1,iderw(iterm,1,ig1,ig2),ig1) * &
                              &    ft_fun2(mw,iderw(iterm,2,ig1,ig2),ig2) * &
                              &    coefs(iterm,kc,ig1,ig2) *                 &
                              &    CONJG(ft_fun2(mt,idert(iterm,2,ig1,ig2),ig2)) * &
                              &    fun1(it1,idert(iterm,1,ig1,ig2),ig1) * &
                              &    wg1(ig1) * wg2(ig2) /REAL(n2,8)
                         END DO
                      END DO
                   END DO
                   irow = (igw1-1)*dk + (mw-kmin)+1  ! Number first mode m then radial coord.
                   jcol = (igt1-1)*dk + (mt-kmin)+1
                   CALL updtmat(mat, irow, jcol, contrib)
!-------------
                END DO
             END DO
          END DO
       END DO
    END DO
!===========================================================================
!              9.0  Epilogue
!
    DEALLOCATE(xg1, wg1, fun1)
    DEALLOCATE(xg2, wg2, ft_fun2)
    DEALLOCATE(idert, iderw, coefs)
    DEALLOCATE(left1,left2)
    DEALLOCATE(fft_temp)
!
  CONTAINS
    SUBROUTINE coefeq(x, y, idt, idw, c)
      USE fft
      DOUBLE PRECISION, INTENT(in) :: x, y
      INTEGER, INTENT(out) :: idt(:,:), idw(:,:)
      DOUBLE COMPLEX, INTENT(out) :: c(:,:)
!
      DOUBLE PRECISION :: zcoef, dy
      INTEGER :: j, k, kc, kp
!
! Weak form = Int(x*C*dw/dx*dt/dx + C/x*dw/dy*dt/dy)dxdy
!      C(x,y) = 1 + epsilon*x*cos(y)
!
      dy = spl%sp2%dft%dx
      kc = SIZE(spl%sp2%dft%mode_couplings)
      DO j=0,n2-1
         fft_temp(j) = 1.0d0+epsi*x*COS(y+j*dy)
      END DO
      CALL fourcol(fft_temp,1)
      DO k=1,kc
         kp = spl%sp2%dft%mode_couplings(k)
         IF(kp.LT.0) kp=kp+n2
         c(1,k) = x*fft_temp(kp)
         c(2,k) = fft_temp(kp)/x
      END DO
!!$      WRITE(*,'(a/(10(1pe12.4)))') 'fft_temp', ABS(fft_temp)
!!$      WRITE(*,'(a/(10(1pe12.4)))') 'c1', ABS(c(1,:))
!!$      WRITE(*,'(a/(10(1pe12.4)))') 'c2', ABS(c(2,:))
!
      idt(1,1) = 1
      idt(1,2) = 0
      idw(1,1) = 1
      idw(1,2) = 0
!
      idt(2,1) = 0
      idt(2,2) = 1
      idw(2,1) = 0
      idw(2,2) = 1
    END SUBROUTINE coefeq
  END SUBROUTINE dismat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  SUBROUTINE disrhs(mbess, epsi, spl, rhs)
!
!   Assembly the RHS using 2d spline spl
!
    INTEGER, INTENT(in)           :: mbess
    DOUBLE PRECISION, INTENT(in)  :: epsi
    TYPE(spline2d), INTENT(in)    :: spl
    DOUBLE PRECISION, INTENT(out) :: rhs(:)
!
    INTEGER :: n1, nidbas1, ndim1, ng1
    INTEGER :: n2, nidbas2, ndim2, ng2
    INTEGER :: i, j, ig1, ig2, k1, k2, i1, j2, ij, nrank
    DOUBLE PRECISION, ALLOCATABLE :: xg1(:), wg1(:), fun1(:,:)
    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:), fun2(:,:)
    DOUBLE PRECISION:: contrib
!===========================================================================
!              1.0 Prologue
!
!   Properties of spline space
!
    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
    CALL get_dim(spl%sp2, ndim2, n2, nidbas2) 
!
    ALLOCATE(fun1(0:nidbas1,1)) ! needs only basis functions (no derivatives)
    ALLOCATE(fun2(0:nidbas2,1)) ! needs only basis functions (no derivatives)
!
!   Gauss quadature
!
    CALL get_gauss(spl%sp1, ng1)
    CALL get_gauss(spl%sp2, ng2)
    ALLOCATE(xg1(ng1), wg1(ng1))
    ALLOCATE(xg2(ng1), wg2(ng1))
!===========================================================================
!              2.0 Assembly loop
!
    nrank = SIZE(rhs)
    rhs(1:nrank) = 0.0d0
!
    DO i=1,n1
       CALL get_gauss(spl%sp1, ng1, i, xg1, wg1)
       DO ig1=1,ng1
          CALL basfun(xg1(ig1), spl%sp1, fun1, i)
          DO j=1,n2
             CALL get_gauss(spl%sp2, ng2, j, xg2, wg2)
             DO ig2=1,ng2
                CALL basfun(xg2(ig2), spl%sp2, fun2, j)
                contrib = wg1(ig1)*wg2(ig2) * &
                     &    rhseq(xg1(ig1),xg2(ig2), mbess)
                DO k1=0,nidbas1
                   i1 = i+k1
                   DO k2=0,nidbas2
                      j2 = MODULO(j+k2-1,n2) + 1
                      ij = j2 + (i1-1)*n2
                      rhs(ij) = rhs(ij) + contrib*fun1(k1,1)*fun2(k2,1)
                   END DO
                END DO
             END DO
          END DO
       END DO
    END DO
!===========================================================================
!              9.0  Epilogue
!
    DEALLOCATE(xg1, wg1, fun1)
    DEALLOCATE(xg2, wg2, fun2)
!
  CONTAINS
    DOUBLE PRECISION FUNCTION rhseq(x, y, m)
      DOUBLE PRECISION, INTENT(in) :: x, y
      INTEGER, INTENT(in)          :: m
!
      DOUBLE PRECISION :: xm
      xm = REAL(m,8)
      rhseq = x**(m+1) * ( 4.0d0*(xm+1.0d0)*COS(xm*y) + &
           &  epsi*x*( &
           &    ( (3.0d0*(xm+1.0d0) - xm/x**2)*COS((xm-1.0d0)*y) + &
           &     (3.0d0+2.0d0*xm)*COS((xm+1.0d0)*y) ) &
           &    ))
    END FUNCTION rhseq
  END SUBROUTINE disrhs
!
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  SUBROUTINE ibcmat(mat, dft)
!
!   Apply BC on matrix
!
    TYPE(zpardiso_mat), INTENT(inout) :: mat
    TYPE(dftmap), INTENT(in)          :: dft
    INTEGER :: nrank, k, kmin, kmax, dk, i
    DOUBLE COMPLEX :: arr(mat%rank)
!===========================================================================
!              1.0 Prologue
!
    nrank = mat%rank
    kmin = dft%kmin
    kmax = dft%kmax
    dk   = dft%dk
!===========================================================================
!              2.0 BC at the axis
!
!   zero for non-zero modes
!
    DO k=kmin,kmax
       IF(k.NE.0) THEN
          i = k-kmin+1
          arr = 0.0d0; arr(i) = 1.0d0
          CALL putrow(mat, i, arr)
       END IF
    END DO
!===========================================================================
!              3.0 Dirichlet on right boundary
!
    DO i = nrank, nrank-dk+1, -1
       arr = 0.0d0; arr(i) = 1.0d0
       CALL putrow(mat, i, arr)     
    END DO
!
  END SUBROUTINE ibcmat
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  SUBROUTINE ibcrhs(rhs, dft)
!
!   Apply BC on RHS
!
    DOUBLE COMPLEX, INTENT(inout) :: rhs(:)
    TYPE(dftmap), INTENT(in)      :: dft
    INTEGER :: nrank, kmin, kmax, dk, k
!===========================================================================
!              1.0 Prologue
!
    nrank = SIZE(rhs,1)
    kmin = dft%kmin
    kmax = dft%kmax
    dk = dft%dk
!===========================================================================
!              2.0 BC at the axis
!
!   zero for non-zero modes
!
    DO k=kmin,kmax
       IF(k.NE.0) rhs(k-kmin+1) = 0.0
    END DO
!===========================================================================
!              3.0 Dirichlet on right boundary
!
    rhs(nrank-dk+1:nrank) = 0.0d0
  END SUBROUTINE ibcrhs
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  SUBROUTINE spectrum0(spl, carr, xpt, cspec)
!
!  DFT modes at xpt (integration on the first interval)
!
    DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
    DOUBLE PRECISION, PARAMETER :: pi = 3.141592653589793d0
    TYPE(spline2d), INTENT(in)   :: spl
    DOUBLE COMPLEX, INTENT(in)   :: carr(:)
    DOUBLE PRECISION, INTENT(in) :: xpt
    DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
!
    INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
    INTEGER :: k,  kmin, kmax, dk, kk
    INTEGER :: ng2, ig2
    INTEGER, ALLOCATABLE :: left2(:)
    DOUBLE PRECISION :: temp(1)
    DOUBLE PRECISION, ALLOCATABLE :: xg2(:), wg2(:)
    DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:,:), psi(:), coefs(:,:)
!
    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
    kmin = spl%sp2%dft%kmin
    kmax = spl%sp2%dft%kmax
    dk   = spl%sp2%dft%dk
!
    CALL get_gauss(spl%sp2, ng2)
    ALLOCATE(left2(ng2))
    ALLOCATE(xg2(ng2), wg2(ng2))
    ALLOCATE(ft_fun2(kmin:kmax,1,ng2)) ! DFT of splines
    ALLOCATE(psi(kmin:kmax))
!
!  Integration over first interval
!
    left2 = 1
    CALL get_gauss(spl%sp2, ng2, 1, xg2, wg2)
    CALL ft_basfun(xg2, spl%sp2, ft_fun2, left2)
    psi = (0.0d0,0.0d0)
    DO k=kmin,kmax
       DO ig2=1,ng2
          psi(k)  = psi(k) + wg2(ig2)*EXP(k*ci*xg2(ig2))*CONJG(ft_fun2(k,1,ig2))
       END DO
    END DO
!
    ALLOCATE(coefs(dk,ndim1))
    coefs = RESHAPE(carr, SHAPE(coefs))
    temp = xpt
    DO kk=kmin,kmax
       k=kk-kmin+1
       coefs(k,:) = psi(kk)*coefs(k,:)
       CALL gridval(spl%sp1, temp, cspec(k:k), 0, coefs(k,:))
    END DO
    cspec = cspec/(2.0d0*pi)
!
    DEALLOCATE(left2)
    DEALLOCATE(xg2, wg2)
    DEALLOCATE(ft_fun2)
    DEALLOCATE(psi)
    DEALLOCATE(coefs)    
  END SUBROUTINE spectrum0
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  SUBROUTINE spectrum1(spl, carr, xpt, ypt0, cspec)
!
!  DFT modes at xpt (at the initial ypt0)
!
    DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
!
    TYPE(spline2d), INTENT(in)   :: spl
    DOUBLE COMPLEX, INTENT(in)   :: carr(:)
    DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
    DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
!
    INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
    INTEGER :: k,  kmin, kmax, dk
    DOUBLE PRECISION :: temp(1)
    DOUBLE COMPLEX :: ctemp(1)
    DOUBLE COMPLEX, ALLOCATABLE :: ft_fun2(:,:), coefs(:,:)
!
    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
    kmin = spl%sp2%dft%kmin
    kmax = spl%sp2%dft%kmax
    dk   = spl%sp2%dft%dk
!
! DFT of splines at ypt0
    ALLOCATE(ft_fun2(kmin:kmax,1)) 
    ALLOCATE(coefs(kmin:kmax,ndim1))
    CALL ft_basfun(ypt0, spl%sp2, ft_fun2, 1)
    coefs = RESHAPE(carr, SHAPE(coefs))
!
    temp = xpt
    DO k=kmin,kmax
       CALL gridval(spl%sp1, temp, ctemp, 0, coefs(k,:))
       cspec(k-kmin+1) = CONJG(ft_fun2(k,1))*ctemp(1)*EXP(k*ci*ypt0)
    END DO
    cspec = cspec/REAL(n2,8)
!
    DEALLOCATE(ft_fun2)
    DEALLOCATE(coefs)    
  END SUBROUTINE spectrum1
!!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  SUBROUTINE spectrum2(spl, xpt, ypt0, cspec)
!
!  DFT modes at xpt (at the initial ypt0)
!
    USE fft
    USE bsplines
!
    DOUBLE COMPLEX, PARAMETER   :: ci = (0.0d0,1.0d0)
!
    TYPE(spline2d)               :: spl
    DOUBLE PRECISION, INTENT(in) :: xpt, ypt0
    DOUBLE COMPLEX, INTENT(out)  :: cspec(:)
!
    INTEGER :: ndim1, n1, nidbas1, ndim2, n2, nidbas2
    INTEGER :: k,  kmin, kmax, dk
    DOUBLE PRECISION, ALLOCATABLE :: ypt(:), fun(:,:)
    DOUBLE COMPLEX, ALLOCATABLE ::   ft_fun(:)
    DOUBLE PRECISION :: temp(1)
!
    CALL get_dim(spl%sp1, ndim1, n1, nidbas1)
    CALL get_dim(spl%sp2, ndim2, n2, nidbas2)
    kmin = spl%sp2%dft%kmin
    kmax = spl%sp2%dft%kmax
    dk   = spl%sp2%dft%dk
!
    ALLOCATE(ypt(0:n2-1))
    ALLOCATE(fun(1, 0:n2-1))
    ALLOCATE(ft_fun(0:n2-1))
!
!  Function values at points ypt
!
    ypt(0:n2-1) = ypt0 + spl%sp2%knots(0:n2-1)
    temp = xpt
    CALL gridval(spl, temp, ypt, fun, (/0,0/))
    ft_fun = fun(1,:)
!
!  Discrete Fourier Transform
!
    CALL fourcol(ft_fun, 1)
    DO k=kmin,kmax
       IF(k.LT.0) THEN
          cspec(k-kmin+1) = ft_fun(k+n2)*EXP(k*ci*ypt0)
       ELSE
          cspec(k-kmin+1) = ft_fun(k)*EXP(k*ci*ypt0)
       END IF
    END DO
    cspec = cspec/REAL(n2,8)
!
    DEALLOCATE(ypt)
    DEALLOCATE(fun)
    DEALLOCATE(ft_fun)
  END SUBROUTINE spectrum2
!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
END MODULE pde2d_sym_pardiso_dft_mod
PROGRAM main
  USE pde2d_sym_pardiso_dft_mod
  USE futils
  USE fft
!
  IMPLICIT NONE
  INTEGER :: nx, ny, nidbas(2), ngauss(2), mbess, nterms, kmin, kmax, dk
  INTEGER :: n_mode_couplings
  INTEGER, ALLOCATABLE :: mode_couplings(:)
  LOGICAL :: nlppform
  INTEGER :: i, j, ij, dimx, dimy, nrank, nrank_full, jder(2), it, i0, i0_r
  INTEGER :: k, kp, ik
  DOUBLE PRECISION :: pi, epsi, coefx(5), coefy(5)
  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, rhs, sol
  DOUBLE COMPLEX, ALLOCATABLE   ::  crhs(:), crhs_r(:), csol(:), csol_r(:)
  TYPE(spline2d) :: splxy
  TYPE(zpardiso_mat) :: mat
!
  CHARACTER(len=128) :: file='pde2d_sym_pardiso_dft.h5'
  INTEGER :: fid
  DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: arr, srow
  DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol
  DOUBLE PRECISION :: seconds, mem, dopla
  DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tfour, tfour0, tgrid, gflops1
  INTEGER :: nits=100
  LOGICAL :: nlmetis, nlforce_zero, nlpos
!
  DOUBLE PRECISION :: xpt, ypt0
  DOUBLE COMPLEX, ALLOCATABLE :: cspec0(:), cspec(:), energy_k(:)
  DOUBLE COMPLEX :: energy, energy_exact
!
  NAMELIST /newrun/ nx, ny, nidbas, ngauss, kmin, kmax, mbess, epsi, &
       &            nlppform, nlmetis, nlforce_zero, nlpos, coefx, coefy
!===========================================================================
!              1.0 Prologue
!
!   Read in data specific to run
!
  nx = 8              ! Number of intervals in x
  ny = 8              ! Number of intervals in y
  nidbas = (/3,3/)    ! Degree of splines
  ngauss = (/4,4/)    ! Number of Gauss points/interval
  kmin = -3           ! Minimum Fourier mode number
  kmax = 3            ! Maximum Fourier mode number
  mbess = 2           ! Exponent of differential problem
  epsi = 0.5          ! Non-uniformity in the Laplacian coefficicient
  nterms = 2          ! Number of terms in weak form
  nlppform = .TRUE.   ! Use PPFORM for gridval or not
  nlmetis = .FALSE.   ! Use metis ordering or minimum degree
  nlforce_zero = .FALSE. ! Remove existing nodes with zero val in  putele/row/ele
  nlpos = .TRUE.         ! Matrix is positive definite
  coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
  coefy(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function
!
  READ(*,newrun)
  WRITE(*,newrun)
!!
!   Read table of mode couplings
!
  READ(*,*) n_mode_couplings
  ALLOCATE(mode_couplings(n_mode_couplings))
  READ(*,*) mode_couplings
  WRITE(*,'(/a/(20i4))') 'Mode couplings', mode_couplings
!
!   Define grid on x (=radial) & y (=poloidal) axis
!
  pi = 4.0d0*ATAN(1.0d0)
  ALLOCATE(xgrid(0:nx), ygrid(0:ny))
  xgrid(0) = 0.0d0;   xgrid(nx) = 1.0d0
  CALL meshdist(coefx, xgrid, nx)
  ygrid(0) = 0.0d0;   ygrid(ny) = 2.d0*pi
  CALL meshdist(coefy, ygrid, ny)
!
!   Exact energy
!
  energy_exact = 2.0d0*pi/REAL(2+mbess,8)
!
!   Create hdf5 file
!
  CALL creatf(file, fid, 'PDE2D Result File', real_prec='d')
  CALL attach(fid, '/', 'NX', nx)
  CALL attach(fid, '/', 'NY', ny)
  CALL attach(fid, '/', 'NIDBAS1', nidbas(1))
  CALL attach(fid, '/', 'NIDBAS2', nidbas(2))
  CALL attach(fid, '/', 'NGAUSS1', ngauss(1))
  CALL attach(fid, '/', 'NGAUSS2', ngauss(2))
  CALL attach(fid, '/', 'MBESS', mbess)
  CALL attach(fid, '/', 'EPSI', epsi)
  CALL attach(fid, '/', 'KMIN', kmin)
  CALL attach(fid, '/', 'KMAX', kmax)
!===========================================================================
!              2.0 Discretize the PDE
!
!   Set up spline
!
  CALL set_spline(nidbas, ngauss, xgrid, ygrid, splxy, &
       &          (/.FALSE., .TRUE./))
!
!   Init DFT for spline in 2nd direction
!
  CALL init_dft(splxy%sp2, kmin, kmax, mode_couplings)
  dk = splxy%sp2%dft%dk
!
!   FE matrix assembly
!
  dimx = splxy%sp1%dim
  dimy = splxy%sp2%dim
  nrank = (nx+nidbas(1))*dk       ! Rank of restricted matrix
  nrank_full = (nx+nidbas(1))*ny  ! Rank of full matrix
!
  ALLOCATE(rhs(nrank_full), sol(nrank_full))
  ALLOCATE(crhs(nrank_full), csol(nrank_full))
  ALLOCATE(crhs_r(nrank), csol_r(nrank))
!
  WRITE(*,'(a,i8)') 'nrank_full', nrank_full
  WRITE(*,'(a,i8)') 'nrank     ', nrank
!
  t0 = seconds()
  CALL init(nrank, nterms, mat, nlherm=.TRUE.)
  CALL dismat(splxy, epsi, mat)
  ALLOCATE(arr(nrank))
  ALLOCATE(srow(nrank))
  DO i=1,nrank
     CALL getrow(mat, i, arr)
     srow(i) = SUM(arr)
  END DO
!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows before BC', REAL(srow)
!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows before BC', AIMAG(srow)
  PRINT*, 'Sum of mat before BC', SUM(srow)
!
!   BC on Matrix
!
  WRITE(*,'(/a,l4)') 'nlforce_zero = ', mat%nlforce_zero
  WRITE(*,'(a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
  CALL ibcmat(mat, splxy%sp2%dft)
  tmat = seconds() - t0
  DO i=1,nrank
     CALL getrow(mat, i, arr)
     srow(i) = SUM(arr)
  END DO
!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Real of Sum of matrix rows after BC', REAL(srow)
!!$  WRITE(*,'(a/(10(1pe12.3)))') 'Imag of Sum of matrix rows after BC', AIMAG(srow)
  PRINT*, 'Sum of mat after BC', SUM(srow)
!
  WRITE(*,'(/a,i8)') 'Number of non-zeros of matrix = ', get_count(mat)
  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after DISMAT', mem()
!
!   RHS assembly
!
  CALL disrhs(mbess, epsi, splxy, rhs)
!
!   Init FFT
!
  t0 = seconds()
  CALL fourcol(crhs(1:ny),1)
  CALL fourcol(crhs(1:ny),-1)
  tfour0 = seconds()-t0
  crhs = crhs/REAL(ny,8)
!
!  DFT of RHS
!
  t0 = seconds()
  crhs = rhs
  DO i=1,nx+nidbas(1)
     i0 = (i-1)*ny
     CALL fourcol(crhs(i0+1:i0+ny), 1)
  END DO
  tfour = seconds()-t0
!
!   Restriction in Fourier space
!     k  = kmin:kmax  (restricted)
!     kp = 0:ny-1     (full)
!
  DO i=1,nx+nidbas(1)
     i0 = (i-1)*ny
     i0_r = (i-1)*dk
     DO k=kmin,kmax
        kp = k
        IF(kp.LT.0) kp = kp+ny
        crhs_r(i0_r+k-kmin+1) = crhs(i0+kp+1)
     END DO
  END DO
!
!   BC on RHS
!
  CALL ibcrhs(crhs_r, splxy%sp2%dft)
!
  IF(nrank.LT.100) THEN
     WRITE(*,'(a/(10(1pe12.3)))') 'Real of crhs', REAL(crhs)
     WRITE(*,'(a/(10(1pe12.3)))') 'Imag of crhs', AIMAG(crhs)
  END IF
!===========================================================================
!              3.0 Solve the dicretized system
!
!   Matrix factorization
!
  t0 = seconds()
!!$  CALL factor(mat, nlmetis=nlmetis)
  CALL to_mat(mat)
  CALL reord_mat(mat, nlmetis=nlmetis);  CALL putmat(fid, '/MAT1', mat)
  CALL numfact(mat)
  tfact = seconds() - t0
  DO i=1,nrank
     CALL getrow(mat, i, arr)
     srow(i) = SUM(arr)
  END DO
  PRINT*, 'Sum of mat after factor', SUM(srow)
 
  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after FACTOR', mem()
  WRITE(*,'(/a,i12)') 'Number of nonzeros in factors of A  = ',mat%p%iparm(18)
  WRITE(*,'(a,i12)')  'Number of factorization MFLOPS      = ',mat%p%iparm(19)
  gflops1 = mat%p%iparm(19) / tfact / 1.d3
!
!   Backsolve
!
  t0 = seconds()
  PRINT*, 'SUM of crhs_r', SUM(crhs_r)
  CALL bsolve(mat, crhs_r, csol_r, debug=.FALSE.)
  WRITE(*,'(a,1pe12.4)') 'Residue =', cnorm2(vmx(mat,csol_r)-crhs_r)
  tsolv = seconds() - t0
  PRINT*, 'SUM of csol_r', SUM(csol_r)
!
  CALL putarr(fid, '/FT_RHS', crhs_r, 'DFT of RHS')
  CALL putarr(fid, '/FT_SOL', csol_r, 'DFT of Spline coefficients')
!===========================================================================
!              4.0 Perform some diagnostics in Fourier space
!
!   Fourier spectrum at xpt
!
  xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
  ALLOCATE(cspec0(dk))
  CALL spectrum0(splxy, csol_r, xpt, cspec0)
  WRITE(*,'(/a,f10.5)') 'DFT spectrum (by integration) at x = ', xpt
  DO k=kmin,kmax
     WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
  END DO
!
  ypt0 = 0.0d0
  WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
  CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
  DO k=kmin,kmax
     WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
  END DO
  ypt0 = splxy%sp2%dft%dx/2.0d0   ! Center of first interval
  WRITE(*,'(/a,2f10.5)') 'DFT spectrum1 at x, y0 = ', xpt, ypt0
  CALL spectrum1(splxy, csol_r, xpt, ypt0, cspec0)
  DO k=kmin,kmax
     WRITE(*,'(i5,3(1pe15.3))') k, cspec0(k-kmin+1), ABS(cspec0(k-kmin+1))
  END DO
!
!   Spectral energy
!
  WRITE(*,'(/a)') 'Spectral energies'
  ALLOCATE(energy_k(kmin:kmax))
  energy_k = (0.0d0,0.0d0)
  DO i=1,dimx
     i0_r = (i-1)*dk
     DO k=kmin,kmax
        ik = i0_r+k-kmin+1
        energy_k(k) = energy_k(k) + csol_r(ik)*CONJG(crhs_r(ik))
     END DO
  END DO
  energy_k = energy_k/REAL(ny,8)
  energy = SUM(energy_k)
  DO k=kmin,kmax
     WRITE(*,'(i5,3(1pe15.3))') k, energy_k(k), ABS(energy_k(k))
  END DO
  WRITE(*,'(a5,4(1pe15.3))') 'Sum', energy, ABS(energy), REAL(energy-energy_exact)
!
  CALL putarr(fid, '/ENERGY_K', energy_k, 'Spectral energies')
!===========================================================================
!              5.0 Transform back to real space
!
!   Expand to full Fourier space
!     k  = kmin:kmax  (restricted)
!     kp = 0:ny-1     (full)
!
  crhs = (0.0d0,0.0d0)
  DO i=1,nx+nidbas(1)
     i0 = (i-1)*ny
     i0_r = (i-1)*dk
     DO k=kmin,kmax
        kp = k
        IF(kp.LT.0) kp = kp+ny
        csol(i0+kp+1) = csol_r(i0_r+k-kmin+1)
     END DO
  END DO
!
!   Fourier transform back to real space
!
  t0 = seconds()
  DO i=1,nx+nidbas(1)
     i0 = (i-1)*ny
     CALL fourcol(csol(i0+1:i0+ny),-1)
  END DO
  sol = REAL(csol)/REAL(ny,8)
  tfour = tfour + seconds()-t0
!
!
!   Spline coefficients, taking into account of periodicity in y
!   Note: in SOL, y was numbered first.
!
  ALLOCATE(bcoef(0:dimx-1, 0:dimy-1))
  DO j=0,dimy-1
     DO i=0,dimx-1
        ij = MODULO(j,ny) + i*ny + 1
        bcoef(i,j) = sol(ij)
     END DO
  END DO
  CALL putarr(fid, '/SOL', sol, 'Spline coefficients of solution')
!
!   Total energy
!
  WRITE(*,'(/a, 2(1pe15.3))') 'Total energy and error(real space)', &
       &     DOT_PRODUCT(rhs,sol), &
       &     DOT_PRODUCT(rhs,sol)-REAL(energy_exact)               
  WRITE(*,'(/a,1pe12.3)') 'Mem used (MB) after BSOLVE', mem()
!===========================================================================
!              6.0 Check the solution
!
!   Check function values computed with various method
!
  ALLOCATE(solcal(0:nx,0:ny), solana(0:nx,0:ny), errsol(0:nx,0:ny))
  DO i=0,nx
     DO j=0,ny
        solana(i,j) = (1-xgrid(i)**2) * xgrid(i)**mbess * COS(mbess*ygrid(j))
     END DO
  END DO
  jder = (/0,0/)
!
!   Compute PPFORM/BCOEFS at first call to gridval
!
  CALL gridval(splxy, xgrid, ygrid, solcal, jder, bcoef)
!
!    Fourier spectrum at xpt
!
  xpt = SQRT(REAL(mbess,8)/REAL(mbess+2,8))
  ALLOCATE(cspec(dk))
!
  ypt0 = 0.0d0
  WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
  CALL spectrum2(splxy, xpt, ypt0, cspec)
  DO k=kmin,kmax
     WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
  END DO
!
  ypt0 = splxy%sp2%dft%dx/2.0d0   ! Center of first interval
  WRITE(*,'(/a,2f10.5)') 'DFT spectrum2 at x, y0 = ', xpt, ypt0
  CALL spectrum2(splxy, xpt, ypt0, cspec)
  DO k=kmin,kmax
     WRITE(*,'(i5,3(1pe15.3))') k, cspec(k-kmin+1), ABS(cspec(k-kmin+1))
  END DO
!
  WRITE(*,'(/a)') '*** Checking solutions'
  t0 = seconds()
  DO it=1,nits   ! nits iterations for timing
     CALL gridval(splxy, xgrid, ygrid, solcal, jder)
  END DO
  tgrid = (seconds() - t0)/REAL(nits)
  errsol = solana - solcal
  IF( SIZE(bcoef,2) .LE. 10 ) THEN
     CALL prnmat('BCOEF', bcoef)
     CALL prnmat('SOLANA', solana)
     CALL prnmat('SOLCAL', solcal)
     CALL prnmat('ERRSOL', errsol)
  END IF
  WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', &
       &    norm2(errsol) / norm2(solana)
  WRITE(*,'(a,1pe12.3)') 'GRIDVAL2 time (s)             ', tgrid
!
  CALL putarr(fid, '/xgrid', xgrid, 'r')
  CALL putarr(fid, '/ygrid', ygrid, '\theta')
  CALL putarr(fid, '/sol', solcal, 'Solutions')
  CALL putarr(fid, '/solana', solana,'Exact solutions')
  CALL putarr(fid, '/errors', errsol, 'Errors')
!
!   Check derivatives d/dx and d/dy
!
  WRITE(*,'(/a)') '*** Checking gradient'
  DO i=0,nx
     DO j=0,ny
        IF( mbess .EQ. 0 ) THEN
           solana(i,j) = -2.0d0 * xgrid(i)
        ELSE
           solana(i,j) = (-(mbess+2)*xgrid(i)**2 + mbess) * &
             &           xgrid(i)**(mbess-1) * COS(mbess*ygrid(j))
        END IF
     END DO
  END DO
!
  jder = (/1,0/)
  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
  errsol = solana - solcal
  CALL putarr(fid, '/derivx', solcal, 'd/dx of solutions')
  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dx',  norm2(errsol)
!
  DO i=0,nx
     DO j=0,ny
        solana(i,j) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * SIN(mbess*ygrid(j))
     END DO
  END DO
!
  jder = (/0,1/)
  CALL gridval(splxy, xgrid, ygrid, solcal, jder)
  CALL putarr(fid, '/derivy', solcal, 'd/dy of solutions')
  errsol = solana - solcal
  WRITE(*,'(a,2(1pe12.3))') 'Error in d/dy',  norm2(errsol)
!
  WRITE(*,'(/a)') '---'
  WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat
  WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact
  WRITE(*,'(a,1pe12.3)') 'Backsolve(1) time (s)         ', tsolv
  WRITE(*,'(a,1pe12.3)') 'Init FFT time (s)             ', tfour0
  WRITE(*,'(a,1pe12.3)') 'FFT time (s)                  ', tfour
  WRITE(*,'(a,2f10.3)')  'Factor  Gflop/s', gflops1
!===========================================================================
!              9.0  Epilogue
!
  DEALLOCATE(cspec0, cspec)
  DEALLOCATE(mode_couplings)
  DEALLOCATE(xgrid, ygrid, rhs, sol)
  DEALLOCATE(crhs, csol)
  DEALLOCATE(crhs_r, csol_r)
  DEALLOCATE(solcal, solana, errsol)
  DEALLOCATE(bcoef)
  DEALLOCATE(arr)
  DEALLOCATE(srow)
  DEALLOCATE(energy_k)
  CALL destroy_sp(splxy)
  CALL destroy(mat)
!
  CALL closef(fid)
!===========================================================================
!
CONTAINS
  FUNCTION norm2(x)
!
!  Compute the 2-norm of array x
!
    IMPLICIT NONE
    DOUBLE PRECISION :: norm2
    DOUBLE PRECISION, INTENT(in) :: x(:,:)
    DOUBLE PRECISION :: sum2
    INTEGER :: i, j
!
    sum2 = 0.0d0
    DO i=1,SIZE(x,1)
       DO j=1,SIZE(x,2)
          sum2 = sum2 + x(i,j)**2
       END DO
    END DO
    norm2 = SQRT(sum2)
  END FUNCTION norm2
!
  FUNCTION cnorm2(x)
    DOUBLE COMPLEX, INTENT(in) :: x(:)
    DOUBLE PRECISION :: cnorm2
    cnorm2 = SQRT(DOT_PRODUCT(x,x))    
  END FUNCTION cnorm2
!
  SUBROUTINE prnmat(label, mat)
    CHARACTER(len=*) :: label
    DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat
    INTEGER :: i
    WRITE(*,'(/a)') TRIM(label)
    DO i=1,SIZE(mat,1)
       WRITE(*,'(10(1pe12.3))') mat(i,:)
    END DO
  END SUBROUTINE prnmat
END PROGRAM main
!
!+++
SUBROUTINE meshdist(c, x, nx)
!
!   Construct an 1d  non-equidistant mesh given a
!   mesh distribution function.
!
  IMPLICIT NONE
  DOUBLE PRECISION, INTENT(in) :: c(5)
  INTEGER, INTENT(iN) :: nx
  DOUBLE PRECISION, INTENT(inout) :: x(0:nx)
  INTEGER :: nintg
  DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xint, fint
  DOUBLE PRECISION :: a, b, dx, f0, f1, scal
  INTEGER :: i, k
!
  a=x(0)
  b=x(nx)
  nintg = 10*nx
  ALLOCATE(xint(0:nintg), fint(0:nintg))
!
!  Mesh distribution
!
  dx = (b-a)/REAL(nintg)
  xint(0) = a
  fint(0) = 0.0d0
  f1 = fdist(xint(0))
  DO i=1,nintg
     f0 = f1
     xint(i) = xint(i-1) + dx
     f1 = fdist(xint(i))
     fint(i) = fint(i-1) + 0.5*(f0+f1)
  END DO
!
!  Normalization
!
  scal = REAL(nx) / fint(nintg)
  fint(0:nintg) = fint(0:nintg) * scal
!!$  WRITE(*,'(a/(10f8.3))') 'FINT', fint
!
!  Obtain mesh point by (inverse) interpolation
!
  k = 1
  DO i=1,nintg-1
     IF( fint(i) .GE. REAL(k) ) THEN
        x(k) = xint(i) + (xint(i+1)-xint(i))/(fint(i+1)-fint(i)) * &
             &   (k-fint(i))
        k = k+1
     END IF
  END DO
!
  DEALLOCATE(xint, fint)
CONTAINS
  DOUBLE PRECISION FUNCTION fdist(x)
    DOUBLE PRECISION, INTENT(in) :: x
    fdist = c(1) + c(2)*x + c(3)*EXP(-((x-c(4))/c(5))**2)
  END FUNCTION fdist
END SUBROUTINE meshdist
!+++