!> !> @file ppde3d_pb.f90 !> !> @brief !> !> @copyright !> Copyright (©) 2021 EPFL (Ecole Polytechnique Fédérale de Lausanne) !> SPC (Swiss Plasma Center) !> !> spclibs is free software: you can redistribute it and/or modify it under !> the terms of the GNU Lesser General Public License as published by the Free !> Software Foundation, either version 3 of the License, or (at your option) !> any later version. !> !> spclibs is distributed in the hope that it will be useful, but WITHOUT ANY !> WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS !> FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. !> !> You should have received a copy of the GNU Lesser General Public License !> along with this program. If not, see . !> !> @authors !> (in alphabetical order) !> @author Trach-Minh Tran !> PROGRAM main ! ! Solving the following 3d PDE using splines: ! ! -d/dx[x d/dx]f - 1/x[d/dy]^2 f = 4(m+1)x^(m+1)cos(my)cos(z)^n, with f(x=1,y) = 0 ! exact solution: f(x,y) = (1-x^2) x^m cos(my)cos(z)^n ! USE futils USE fft USE pputils2, ONLY : pptransp USE ppde3d_pb_mod ! IMPLICIT NONE ! CHARACTER(len=128) :: infile="ppde3d_pb.in" INTEGER :: l INTEGER :: nx, ny, nz, nidbas(3), ngauss(3), mbess, npow, nterms INTEGER :: startz, endz, nzloc INTEGER :: start_rank, end_rank, nrank_loc LOGICAL :: nlppform INTEGER :: i, j, k, kk, ij, dimx, dimy, dimz, nrank, kl, ku INTEGER :: jder(3), it DOUBLE PRECISION :: pi, coefx(5) DOUBLE PRECISION :: dy, dz DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: xgrid, ygrid, zgrid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass DOUBLE PRECISION, DIMENSION(:,:), ALLOCATABLE :: rhs, sol, rhs_t DOUBLE COMPLEX, DIMENSION(:,:), ALLOCATABLE :: crhs_t ! TYPE(spline2d1d), TARGET :: splxyz TYPE(spline2d), POINTER :: splxy TYPE(spline1d) :: splz TYPE(pbmat) :: mat ! CHARACTER(len=128) :: file='ppde3d_pb.h5' INTEGER :: fid DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: arr DOUBLE PRECISION, DIMENSION(:,:,:), ALLOCATABLE :: bcoef, solcal, solana, errsol DOUBLE PRECISION :: seconds, mem, dopla DOUBLE PRECISION :: t0, tmat, tfact, tsolv, tgrid, gflops1, gflops2 INTEGER :: nits=500 ! INTEGER, PARAMETER :: npart=10000000 INTEGER :: nploc DOUBLE PRECISION, DIMENSION(npart) :: xp, yp, zp, fp_calc, fp_anal DOUBLE PRECISION zsuml, zsumg, errnorm2 ! INTEGER :: kmin, kmax DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: fftmass_shifted ! NAMELIST /newrun/ nx, ny, nz, nidbas, ngauss, mbess, npow, nlppform, coefx !=========================================================================== ! 1.0 Prologue ! ! Init MPI ! CALL mpi_init(ierr) CALL mpi_comm_size(MPI_COMM_WORLD, npes, ierr) CALL mpi_comm_rank(MPI_COMM_WORLD, me, ierr) ! ! Get input file name from command argument ! IF( COMMAND_ARGUMENT_COUNT() .EQ. 1 ) THEN CALL GET_COMMAND_ARGUMENT(1, infile, l, ierr) END IF ! ! Read in data specific to run ! nx = 8 ! Number of intervals in x ny = 8 ! Number of intervals in y nz = 8 ! Number of intervals in z nidbas = (/3,3,3/) ! Degree of splines ngauss = (/4,4, 4/) ! Number of Gauss points/interval mbess = 2 ! Exponent of differential problem npow = 2 ! Exponent of differential problem nterms = 2 ! Number of terms in weak form nlppform = .TRUE. ! Use PPFORM for gridval or not coefx(1:5) = (/1.0d0, 0.d0, 0.d0, 0.d0, 1.d0/) ! Mesh point distribution function ! OPEN(unit=99, file=TRIM(infile), status='old', action='read') READ(99,newrun) IF( me.EQ.0) THEN WRITE(*,newrun) END IF CLOSE(99) ! ! Define grid on x (=radial) & y (=poloidal) axis ! pi = 4.0d0*ATAN(1.0d0) ALLOCATE(xgrid(0:nx), ygrid(0:ny), zgrid(0:nz)) ! xgrid(0) = 0.0d0; xgrid(nx) = 1.0d0 CALL meshdist(coefx, xgrid, nx) ! dy = 2.d0*pi/REAL(ny,8) ! Equidistant in y ygrid = (/ (j*dy, j=0,ny) /) ! ! Partitionned toroidal grid z ! dz = 2.0d0*pi/REAL(nz,8) ! Equidistant in z zgrid = (/ (k*dz, k=0,nz) /) CALL dist1d(0, nz, startz, nzloc) endz = startz+nzloc !!$ PRINT*, 'PE', me, ' startz, endz, nzloc', startz, endz, nzloc ! IF( me.EQ.0) THEN WRITE(*,'(a/(10f8.3))') 'XGRID', xgrid(0:nx) WRITE(*,'(a/(10f8.3))') 'YGRID', ygrid(0:ny) WRITE(*,'(a/(10f8.3))') 'ZGRID', zgrid(0:nz) END IF ! ! Create hdf5 file ! CALL creatf(file, fid, 'PDE2D Result File', real_prec='d', & & mpicomm=MPI_COMM_WORLD) CALL attach(fid, '/', 'NX', nx) CALL attach(fid, '/', 'NY', ny) CALL attach(fid, '/', 'NZ', nz) CALL attach(fid, '/', 'NIDBAS1', nidbas(1)) CALL attach(fid, '/', 'NIDBAS2', nidbas(2)) CALL attach(fid, '/', 'NIDBAS3', nidbas(3)) CALL attach(fid, '/', 'NGAUSS1', ngauss(1)) CALL attach(fid, '/', 'NGAUSS2', ngauss(2)) CALL attach(fid, '/', 'NGAUSS2', ngauss(3)) CALL attach(fid, '/', 'MBESS', mbess) CALL putarr(fid, '/xgrid', xgrid, 'r') CALL putarr(fid, '/ygrid', ygrid, '\theta') CALL putarr(fid, '/zgrid', zgrid(0:nz-1), '\phi') !=========================================================================== ! 2.0 Discretize the PDE ! ! Set up spline ! t0 = seconds() CALL set_spline(nidbas, ngauss, xgrid, ygrid, zgrid(startz:endz), & & splxyz, (/.FALSE., .TRUE., .TRUE./), nlppform=nlppform) splxy => splxyz%sp12 ! IF( me.EQ.0) THEN WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in X', splxy%sp1%knots WRITE(*,'(/a,/(12(f8.3)))') 'KNOTS in Y', splxy%sp2%knots END IF CALL disp(splxyz%sp3%knots, 'KNOTS in Z', MPI_COMM_WORLD) ! ! 2D FE matrix assembly (in plane x-y) ! nrank = (nx+nidbas(1))*ny ! Rank of the FE matrix kl = (nidbas(1)+1)*ny -1 ! Number of sub-diagnonals ku = kl ! Number of super-diagnonals IF(me.EQ.0) THEN WRITE(*,'(a,5i6)') 'nrank, kl, ku', nrank, kl, ku END IF ! CALL init(ku, nrank, nterms, mat) CALL dismat(splxy, mat) ALLOCATE(arr(nrank)) ! ! BC on Matrix ! CALL ibcmat(mat, ny) tmat = seconds() - t0 ! ! 3D RHS assembly ! ALLOCATE(rhs(nrank,0:nzloc+nidbas(3)-1)) ! With right guard cells nzloc:nzloc+nidbas3-1 ALLOCATE(sol(nrank,0:nzloc-1)) CALL disrhs3(mbess, npow, splxyz, rhs) ! zsuml = SUM(ABS(rhs(:,0:nzloc-1))) CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, & & MPI_COMM_WORLD, ierr) IF(me.EQ.0) PRINT*, 'sum of rhs after DISRHS3', zsumg ! ! FFT in z of RHS ! CALL dist1d(1, nrank, start_rank, nrank_loc) end_rank = start_rank+nrank_loc-1 ALLOCATE(rhs_t(0:nz-1,nrank_loc), crhs_t(0:nz-1,nrank_loc)) ! CALL pptransp(MPI_COMM_WORLD, rhs(:,0:nzloc-1), rhs_t) crhs_t = rhs_t CALL fourcol(crhs_t,1) crhs_t = crhs_t/REAL(nz,8) ! ! Apply Mass matrix to crhs ! CALL set_spline(nidbas(3), ngauss(3), zgrid, splz, .TRUE.) kmin =-nz/2 kmax = nz/2-1 CALL init_dft(splz, kmin, kmax) ALLOCATE(fftmass_shifted(kmin:kmax)) ALLOCATE(fftmass(0:nz-1)) CALL calc_fftmass_old(splz, fftmass_shifted) DO k=kmin,kmax fftmass(MODULO(k+nz,nz)) = fftmass_shifted(k) END DO IF(me.EQ.0) THEN WRITE(*,'(/a/(10(1pe12.3)))') 'Mass matrix', fftmass END IF DO k=0,nz-1 crhs_t(k,:) = crhs_t(k,:)/fftmass(k) END DO ! ! Fourier transform back crhs to real space in z ! CALL fourcol(crhs_t, -1) rhs_t(:,:) = REAL(crhs_t(:,:),8) CALL pptransp(MPI_COMM_WORLD, rhs_t, sol) ! Put the final RHS in SOL ! ! BC on RHS ! CALL ibcrhs3(sol, ny) ! IF(me.EQ.0) THEN WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() END IF !=========================================================================== ! 3.0 Solve the dicretized system ! t0 = seconds() CALL factor(mat) tfact = seconds() - t0 gflops1 = dopla('DPBTRF',nrank,nrank,kl,ku,0) / tfact / 1.d9 t0 = seconds() CALL bsolve(mat, sol) ! ! Backtransform of solution ! DO k=0,nzloc-1 sol(1:ny-1,k) = sol(ny,k) END DO ! zsuml = SUM(ABS(sol)) CALL mpi_reduce(zsuml, zsumg, 1, MPI_DOUBLE_PRECISION, MPI_SUM , 0, & & MPI_COMM_WORLD, ierr) IF(me.EQ.0) PRINT*, 'sum of sol', zsumg ! tsolv = seconds() - t0 gflops2 = dopla('DPBTRS',nrank,1,kl,ku,0) / tsolv / 1.d9 ! ! Spline coefficients, taking into account of periodicity in y and z ! Note: in SOL, y was numbered first. ! dimx = splxy%sp1%dim dimy = splxy%sp2%dim dimz = splxyz%sp3%dim ALLOCATE(bcoef(0:dimx-1, 0:dimy-1, 0:dimz-1)) ! ! Get 3D array of spline coefs. ! DO j=0,dimy-1 DO i=0,dimx-1 ij = MODULO(j,ny) + i*ny + 1 DO k=0,nzloc-1 bcoef(i,j,k) = sol(ij,k) END DO END DO END DO ! ! Get missing coefs from remote guard cells ! prev = MODULO(me-1,npes) next = MODULO(me+1,npes) count = dimx*dimy DO i=0,nidbas(3)-1 CALL mpi_sendrecv(bcoef(0,0,i), count, MPI_DOUBLE_PRECISION, prev, 0, & & bcoef(0,0,nzloc+i), count, MPI_DOUBLE_PRECISION, next, 0, & & MPI_COMM_WORLD, status, ierr) END DO ! IF(me.EQ.0) THEN WRITE(*,'(/a,3i6)') 'dimx, dimy, dimz =', dimx, dimy, dimz END IF !=========================================================================== ! 4.0 Check the solution ! ! Check function values computed with various method ! CALL RANDOM_NUMBER(xp) CALL RANDOM_NUMBER(yp); yp = 2.d0*pi*yp CALL RANDOM_NUMBER(zp); zp = 2.d0*pi*zp nploc = 0 DO i=1,npart IF(zp(i).GE.zgrid(startz) .AND. zp(i).LT.zgrid(endz)) THEN nploc = nploc+1 xp(nploc) = xp(i) yp(nploc) = yp(i) zp(nploc) = zp(i) END IF END DO jder = (/0,0,0/) CALL gridval(splxyz, xp(1:nploc), yp(1:nploc), zp(1:nploc), fp_calc(1:nploc), jder, bcoef) DO i=1,nploc fp_anal(i) = (1-xp(i)**2) * xp(i)**mbess & & * COS(mbess*yp(i)) * COS(zp(i))**npow END DO errnorm2 = norm21(fp_calc(1:nploc)-fp_anal(1:nploc))/norm21(fp_calc(1:nploc)) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors, using random points', & & errnorm2 END IF ! ALLOCATE(solcal(0:nx,0:ny,0:nzloc-1)) ALLOCATE(solana(0:nx,0:ny,0:nzloc-1)) ALLOCATE(errsol(0:nx,0:ny,0:nzloc-1)) ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 kk=startz+k solana(i,j,k) = (1-xgrid(i)**2) * xgrid(i)**mbess & & * COS(mbess*ygrid(j)) * COS(zgrid(kk))**npow END DO END DO END DO ! jder = (/0,0,0/) CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) tgrid = seconds()-t0 errsol = solana - solcal ! errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative discretization errors', & & errnorm2 END IF CALL putarr(fid, '/sol', solcal,pardim=3) CALL putarr(fid, '/solana', solana,pardim=3) ! ! Check derivative d/dx ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 IF( mbess .EQ. 0 ) THEN solana(i,j,k) = -2.0d0 * xgrid(i) * COS(zgrid(k+startz))**npow ELSE solana(i,j,k) = (-(mbess+2)*xgrid(i)**2 + mbess) * & & xgrid(i)**(mbess-1) * COS(mbess*ygrid(j)) * & & COS(zgrid(k+startz))**npow END IF END DO END DO END DO ! jder = (/1,0,0/) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) tgrid = tgrid + seconds()-t0 errsol = solana - solcal errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dx', errnorm2 END IF CALL putarr(fid, '/derivx', solcal, pardim=3) CALL putarr(fid, '/derivx_exact', solana,pardim=3) ! ! Check derivative d/dy ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 solana(i,j,k) = -mbess * (1-xgrid(i)**2) * xgrid(i)**mbess * & & SIN(mbess*ygrid(j))* COS(zgrid(k+startz))**npow END DO END DO END DO ! jder = (/0,1,0/) t0 = seconds() CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) tgrid = tgrid + seconds()-t0 errsol = solana - solcal errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dy', errnorm2 END IF CALL putarr(fid, '/derivy', solcal, pardim=3) CALL putarr(fid, '/derivy_exact', solana,pardim=3) ! ! Check derivative d/dz ! DO i=0,nx DO j=0,ny DO k=0,nzloc-1 solana(i,j,k) = -npow*(1-xgrid(i)**2) * xgrid(i)**mbess & & * COS(mbess*ygrid(j)) * COS(zgrid(k+startz))**(npow-1) & & * SIN(zgrid(k+startz)) END DO END DO END DO ! jder = (/0,0,1/) t0 = seconds() IF(nlppform) THEN CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder) ELSE CALL gridval(splxyz, xgrid, ygrid, zgrid(startz:endz-1), solcal, jder, bcoef) END IF tgrid = tgrid + seconds()-t0 errsol = solana - solcal errnorm2 = norm2(errsol) / norm2(solana) IF(me.EQ.0) THEN WRITE(*,'(a,2(1pe12.3))') 'Relative error in d/dz', errnorm2 END IF CALL putarr(fid, '/derivz', solcal, pardim=3) CALL putarr(fid, '/derivz_exact', solana,pardim=3) !=========================================================================== ! 9.0 Epilogue ! IF(me.EQ.0) THEN WRITE(*,'(/a)') '---' WRITE(*,'(a,1pe12.3)') 'Matrice construction time (s) ', tmat WRITE(*,'(a,1pe12.3)') 'Matrice factorisation time (s)', tfact WRITE(*,'(a,1pe12.3)') 'Backsolve time (s) ', tsolv WRITE(*,'(a,1pe12.3)') 'gridval time (s) ', tgrid WRITE(*,'(a,2f10.3)') 'Factor/solve Gflop/s', gflops1, gflops2 WRITE(*,'(a,1pe12.3)') 'Mem used so far (MB)', mem() END IF ! DEALLOCATE(xgrid, ygrid, zgrid) DEALLOCATE(fftmass) DEALLOCATE(fftmass_shifted) DEALLOCATE(rhs) DEALLOCATE(sol) DEALLOCATE(rhs_t) DEALLOCATE(crhs_t) DEALLOCATE(bcoef) DEALLOCATE(solcal, solana, errsol) DEALLOCATE(arr) CALL destroy_sp(splxyz) CALL destroy_sp(splz) CALL destroy(mat) ! CALL closef(fid) ! CALL mpi_finalize(ierr) !=========================================================================== ! CONTAINS FUNCTION norm2(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm2 DOUBLE PRECISION, INTENT(in) :: x(:,:,:) DOUBLE PRECISION :: sum2, sum2g INTEGER :: i, j ! sum2 = 0.0d0 DO i=1,SIZE(x,1) DO j=1,SIZE(x,2) DO k=1,SIZE(x,3) sum2 = sum2 + x(i,j,k)**2 END DO END DO END DO CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & & MPI_COMM_WORLD, ierr) norm2 = SQRT(sum2g) END FUNCTION norm2 ! FUNCTION norm21(x) ! ! Compute the 2-norm of array x ! IMPLICIT NONE DOUBLE PRECISION :: norm21 DOUBLE PRECISION, INTENT(in) :: x(:) DOUBLE PRECISION :: sum2, sum2g INTEGER :: i, j ! sum2 = DOT_PRODUCT(x,x) CALL mpi_allreduce(sum2, sum2g, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & & MPI_COMM_WORLD, ierr) norm21 = SQRT(sum2g) END FUNCTION norm21 SUBROUTINE prnmat(label, mat) CHARACTER(len=*) :: label DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: mat INTEGER :: i WRITE(*,'(/a)') TRIM(label) DO i=1,SIZE(mat,1) WRITE(*,'(10(1pe12.3))') mat(i,:) END DO END SUBROUTINE prnmat END PROGRAM main ! !+++