diff --git a/src/fields_mod.f90 b/src/fields_mod.f90
index 875497f..dd86396 100644
--- a/src/fields_mod.f90
+++ b/src/fields_mod.f90
@@ -1,1548 +1,1548 @@
 !------------------------------------------------------------------------------
 ! EPFL/Swiss Plasma Center
 !------------------------------------------------------------------------------
 !
 ! MODULE: beam
 !
 !> @author
 !> Patryk Kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> Module responsible for initializing the magnetic field, solving the Poisson equation and computing the moments of the particles distribution function
 !------------------------------------------------------------------------------
 MODULE fields
    USE constants
    USE basic, ONLY: nr, nz, zgrid, rgrid, Br, Bz, Er, Ez, femorder, ngauss, nlppform, pot, Athet, &
    & splrz, splrz_ext, nlperiod, phinorm, nlPhis, nrank, mpirank, mpisize, step, it2d, timera, potxt, erxt, ezxt
    USE beam, ONLY: partslist
    USE bsplines
    USE mumps_bsplines
    use mpi
    Use omp_lib
    Use mpihelper, ONLY: db_type
    USE particletypes
    IMPLICIT NONE
 
    REAL(kind=db), allocatable, SAVE :: matcoef(:, :), phi_spline(:), vec1(:), vec2(:)
    REAL(kind=db), allocatable, SAVE :: loc_moments(:, :), loc_rhs(:), gradgtilde(:), fverif(:), ppformwork(:,:,:)
    INTEGER, SAVE:: loc_zspan
    TYPE(mumps_mat), SAVE :: femat          !< Finite Element Method matrix for the full domain
    TYPE(mumps_mat), SAVE :: reduccedmat    !< Finite Element Method matrix in the redduced web-spline sub-space
    !TYPE(mumps_mat), SAVE :: fematmpi       !< Finite Element Method matrix prepared for mpi parallelism
    INTEGER :: nbmoments = 10                !< number of moments to be calculated and stored
    INTEGER(kind=omp_lock_kind), Allocatable:: mu_lock(:) !< Stores the lock for fields parallelism
 
 CONTAINS
 
    SUBROUTINE mag_init
       USE basic, ONLY: magnetfile, nr, nz
       USE bsplines
       USE mumps_bsplines
       USE mpihelper
       USE geometry
 
       ALLOCATE (Br((nr + 1)*(nz + 1)), Bz((nr + 1)*(nz + 1)))
       ALLOCATE (Athet((nr + 1)*(nz + 1)))
 
       ! Calculate magnetic field mirror components in grid points (Davidson analytical formula employed)
       ! or load it from magnetfile if present
       CALL magnet(magnetfile)
 
    end subroutine mag_init
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Set-up the necessary variables for solving Poisson and computes the magnetic field on the grid
 !
 !---------------------------------------------------------------------------
    SUBROUTINE fields_init
       USE basic, ONLY: pot, nlperiod, nrank, rhs, volume, rgrid
       USE bsplines
       USE geometry
       USE mumps_bsplines
       USE mpihelper
       INTEGER :: nrz(2), i, d2, k1, n1
 
       ! Auxiliary vectors
       ALLOCATE(vec1((nz+1)*(nr+1)),vec2((nr+1)*(nz+1)))
       DO i=0,nr
          vec1(i*(nz+1)+1:(i+1)*(nz+1))=zgrid!(0:nz)
          vec2(i*(nz+1)+1:(i+1)*(nz+1))=rgrid(i)
       END DO
 
       ! Set up 2d spline splrz used in the FEM
       CALL set_spline(femorder, ngauss, zgrid, rgrid, splrz, nlppform=nlppform, period=nlperiod)
       ! Set up 2d spline splrz_ext used in the FEM to calculate the external electric field and potential
       CALL set_spline(femorder, ngauss, zgrid, rgrid, splrz_ext, nlppform=nlppform, period=nlperiod)
 
       !Allocate the work buffer to calculate the ppform
       d2 = splrz%sp2%dim
       k1 = splrz%sp1%order
       n1 = splrz%sp1%nints
       ALLOCATE(ppformwork(d2,k1,n1))
 
       ! Calculate dimension of splines
       nrz(1) = nz
       nrz(2) = nr
       CALL get_dim(splrz, nrank, nrz, femorder)
 
       ! Allocate necessary variables
       ALLOCATE (matcoef(nrank(1), nrank(2)))
       ALLOCATE (pot((nr + 1)*(nz + 1)))
       ALLOCATE (potxt((nr + 1)*(nz + 1)))
       ALLOCATE (Erxt((nr + 1)*(nz + 1)))
       ALLOCATE (Ezxt((nr + 1)*(nz + 1)))
       ALLOCATE (rhs(nrank(1)*nrank(2)))
       ALLOCATE (gradgtilde(nrank(1)*nrank(2)))
       gradgtilde = 0
       ALLOCATE (phi_spline(nrank(1)*nrank(2)))
       ALLOCATE (volume(nrank(1)*nrank(2)))
       volume = 0
 
       ALLOCATE (Er((nr + 1)*(nz + 1)), Ez((nr + 1)*(nz + 1)))
       ALLOCATE (mu_lock(nrank(1)*nrank(2)))
       do i = 1, nrank(1)*nrank(2)
          call omp_init_lock(mu_lock(i))
       end do
 
    end SUBROUTINE fields_init
 
 
    !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Set-up the geometry definition and read it from the standard input
 !> Precomputes the LHS matrix to solve Poisson abd the RHS effect of the dirichlet boundaries
 !
 !---------------------------------------------------------------------------
 
    SUBROUTINE fields_start
       USE geometry
       USE basic, ONLY: nrank
       implicit none
       INTEGER:: i,j, ierr
       DOUBLE PRECISION:: val
       ! set up the geometry module for setting up non-conforming boundary conditions
       call timera(0, "geom_init")
       call geom_init(splrz, vec1, vec2)
       call timera(1, "geom_init")
 
       ! Initialisation of FEM matrix
       CALL init(nrank(1)*nrank(2), 2, femat)
 
       ! Calculate and factorise FEM matrix (depends only on mesh)
       CALL fematrix(femat)
       
 
       If (walltype .lt. 0) then
          allocate (fverif(nrank(1)*nrank(2)))
          fverif = 0
       end if
       
       ! Compute the volume of the splines and gtilde for solving E using web-splines
       CALL comp_volume
       !$OMP PARALLEL 
       Call comp_gradgtilde
       !$OMP END PARALLEL
 
       if (nlweb) then
          ! Calculate reduced matrix for use of web splines
          call timera(0, "reduce femat")
          call Reducematrix(femat, reduccedmat)
          call timera(1, "reduce femat")   
          call factor(reduccedmat)
       else
          call factor(femat)
       end if
       
       
 
       !WRITE(*,*) "Copy and to_mat worked"
       !CALL MPI_abort(MPI_COMM_WORLD,-1,ierr)
       !$OMP PARALLEL
       call vacuum_field
       !$OMP END PARALLEL
    END SUBROUTINE fields_start
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Recomputes the vacuum electric field
 !
 !---------------------------------------------------------------------------
    subroutine vacuum_field
       Use geometry
       USE basic, ONLY: pot, rhs
       implicit none
       INTEGER:: i, iend
       ! Computes the externally imposed electric field
       !$OMP DO SIMD
       do i=1,nrank(1)*nrank(2)
          rhs(i)=-gradgtilde(i)
          !rhs = -gradgtilde
          if (walltype .lt. 0) rhs(i) = rhs(i) + fverif(i)
       end do
       !$OMP END DO SIMD
 
       !$OMP BARRIER
 
       call poisson(splrz_ext)
 
       !$OMP BARRIER
       !$OMP DO 
          ! On the root process, compute the electric field for diagnostic purposes
       DO i=1,size(pot),16
          iend=min(size(pot),i+15)
          potxt(i:iend) = pot(i:iend)
          erxt(i:iend) = Er(i:iend)
          Ezxt(i:iend) = Ez(i:iend)
       END DO
       !$OMP END DO NOWAIT
    end subroutine
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Set-up the necessary variables for the communication of moments and rhs grid
 !
 !---------------------------------------------------------------------------
    SUBROUTINE fields_comm_init(Zbounds)
       USE basic, ONLY: nrank
       USE mpihelper
       INTEGER:: Zbounds(0:)
       loc_zspan = Zbounds(mpirank + 1) - Zbounds(mpirank) + femorder(1)
       if (allocated(loc_moments)) deallocate (loc_moments)
       ALLOCATE (loc_moments(nbmoments, loc_zspan*nrank(2)))
       if (allocated(loc_rhs)) deallocate (loc_rhs)
       ALLOCATE (loc_rhs(loc_zspan*nrank(2)))
       IF (mpisize .gt. 1) THEN
          CALL init_overlaps(nrank, femorder, Zbounds(mpirank), Zbounds(mpirank + 1), nbmoments)
       END IF
 
    END SUBROUTINE fields_comm_init
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Construct the right hand side vector used in the FEM Poisson solver
 !
 !> @param[in] plist list of the particles type storing the desired specie parameters
 !
 !---------------------------------------------------------------------------
    SUBROUTINE rhscon(plist)
       USE bsplines
       use mpi
       USE basic, ONLY: rhs, Zbounds
       USE beam, ONLY: particles
       USE mpihelper
       Use geometry
       Use omp_lib
       type(particles), INTENT(INOUT):: plist(:)
       INTEGER:: i,j,k
 
       IF (nlphis) then ! We calculate the self-consistent field
 
          !$OMP DO SIMD
          Do i=1,size(loc_rhs)
             loc_rhs(i)=0
          end do
          !$OMP END DO SIMD
 
 ! Assemble rhs for each specie
          Do i = 1, size(plist, 1)
             if (plist(i)%is_field) CALL deposit_charge(plist(i), loc_rhs)
          END Do
          !$OMP BARRIER
 
          !Communicate the overlaps
          if(mpisize .gt. 1) call rhs_overlap
 
          ! Add gradgtilde
          !$OMP DO SIMD
-         Do i=1,size(loc_rhs)
-            j=i/loc_zspan
+         Do i=0,size(loc_rhs)-1
+            j=(i-1)/loc_zspan
             k=mod(i,loc_zspan)
-            loc_rhs(i)=loc_rhs(i)-gradgtilde((j)*nrank(1)+(k+Zbounds(mpirank)))
+            loc_rhs(i+1)=loc_rhs(i+1)-gradgtilde((j)*nrank(1)+(k+Zbounds(mpirank)))
          end do
          !$OMP END DO SIMD
 
 
 
          !add the fverif source for test cases
          if (walltype .lt. 0)then
             !$OMP DO
-            Do i=1,size(loc_rhs)
+            Do i=0,size(loc_rhs)-1
                j=i/loc_zspan
                k=mod(i,loc_zspan)
-               loc_rhs(i)=loc_rhs(i)+fverif((j)*nrank(1)+(k+Zbounds(mpirank)))
+               loc_rhs(i+1)=loc_rhs(i+1)+fverif((j)*nrank(1)+(k+Zbounds(mpirank)))
             end do
             !$OMP END DO
          end if
 
          ! If we are using MPI parallelism, reduce the rhs on the root process
          IF (mpisize .gt. 1) THEN
             CALL rhs_gather(rhs)
          ELSE
             !$OMP DO
             Do i=1,size(loc_rhs)
                rhs(i)=loc_rhs(i)
             end do
             !$OMP END DO
          END IF
       ELSE ! We only consider the externally imposed field
          !$OMP DO
          Do i=1,size(rhs)
             rhs(i)=0
          end do
          !$OMP END DO
       END IF
    END SUBROUTINE rhscon
 
    !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Calculate the 0th 1st and 2nd order moments of the particle p and stores it in moment
 !
 !> @param[in] p the particles type storing the desired specie parameters
 !> @param[out] moment the 2d array storing the calculated moments
 !
 !---------------------------------------------------------------------------
    SUBROUTINE momentsdiag(p)
       USE bsplines
       use mpi
       USE beam, ONLY: particles
       USE mpihelper
       Use geometry
       type(particles), INTENT(INOUT):: p
       !REAL(kind=db), INTENT(INOUT):: moment(:, :)
 
       !$OMP SINGLE
       loc_moments = 0 ! Reset the moments matrix
 ! Assemble rhs
       !$OMP END SINGLE
 
       IF (p%Nploc .ne. 0) THEN
          CALL deposit_moments(p, loc_moments)
       END IF
 
       !$OMP SINGLE
       if(.not. allocated(p%moments))THEN
          if(mpirank.eq.0)THEN
             Allocate(p%moments(nbmoments,nrank(1)*nrank(2)))
          else
             Allocate(p%moments(0,0))
          end if
       end if
 
       !$OMP END SINGLE
 
 ! If we are using MPI parallelism, reduce the rhs on the root process
       IF (mpisize .gt. 1) THEN
          CALL moments_gather(p%moments)
       ELSE
          !$OMP SINGLE
          p%moments = loc_moments
          !$OMP END SINGLE NOWAIT
       END IF
 
    END SUBROUTINE momentsdiag
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Deposit the particles moments (n,v,v^2) from p on the grid
 !
 !> @param[in] p the particles type storing the desired specie parameters
 !> @param[in] p_loc_moments local tensor used to store the moments of the given specie
 !---------------------------------------------------------------------------
 
    SUBROUTINE deposit_moments(p, p_loc_moments)
       USE bsplines
       use mpi
       USE basic, ONLY: Zbounds
       USE beam, ONLY: particles
       USE mpihelper
       USE geometry
       USE omp_lib
 
       TYPE(particles), INTENT(IN):: p
       REAL(kind=db), DIMENSION(:, :), INTENT(INOUT):: p_loc_moments
       REAL(kind=db), DIMENSION(:, :), Allocatable:: omp_loc_moments
       INTEGER ::irow, jcol, it, jw, mu, i, k, iend, nbunch
       INTEGER, DIMENSION(:), ALLOCATABLE::zleft, rleft
       REAL(kind=db) :: vr, vthet, vz, coeff
       REAL(kind=db), ALLOCATABLE :: fun(:, :, :), fun2(:, :, :)
       INTEGER:: num_threads
 
       num_threads = omp_get_max_threads()
       nbunch = p%Nploc/num_threads ! Particle bunch size used when calling basfun
       nbunch = max(nbunch, 1) ! Particle bunch size used when calling basfun
       nbunch = min(nbunch, 64) ! Particle bunch size used when calling basfun
 
         
 ! Assemble rhs
       IF (p%Nploc .gt. 0) THEN
 !!$OMP PARALLEL DEFAULT(SHARED), PRIVATE(zleft,rleft,jw,it,iend,irow,jcol,mu,k,vr,vz,vthet,coeff,fun,fun2)
       ALLOCATE (zleft(nbunch), rleft(nbunch))
       ALLOCATE (fun(1:femorder(1) + 1, 0:0, nbunch), fun2(1:femorder(2) + 1, 0:0, nbunch)) ! Arrays keeping values of b-splines at gauss node
 
          allocate(omp_loc_moments(size(p_loc_moments,1),size(p_loc_moments,2)))
          omp_loc_moments=0
          !$OMP DO
          DO i = 1, p%Nploc, nbunch
             ! Avoid segmentation fault by accessing non relevant data
             iend = min(i + nbunch - 1, p%Nploc)
             k = iend - i + 1
             ! Localize the particle
             !CALL locintv(splrz%sp2, p%R(i:iend), rleft(1:k))
             !CALL locintv(splrz%sp1, p%Z(i:iend), zleft(1:k))
             rleft(1:k) = p%rindex(i:iend)
             zleft(1:k) = p%zindex(i:iend)
             ! Compute the value of the splines at the particles positions
             CALL basfun(p%pos(3,i:iend), splrz%sp1, fun(:, :, 1:k), zleft(1:k) + 1)
             CALL basfun(p%pos(1,i:iend), splrz%sp2, fun2(:, :, 1:k), rleft(1:k) + 1)
             DO k = 1, (iend - i + 1)
                DO jw = 1, (femorder(2) + 1)
                   DO it = 1, (femorder(1) + 1)
                      irow = zleft(k) + it - Zbounds(mpirank)
                      jcol = rleft(k) + jw
                      mu = irow + (jcol - 1)*(loc_zspan)
                      coeff = p%weight*fun(it, 0, k)*fun2(jw, 0, k)
                      ! Add contribution of particle nbunch to rhs grid point mu
                      vr = 0.5*(p%U(1,i + k - 1)/p%Gamma(i + k - 1) + p%Uold(1,i + k - 1)/p%Gammaold(i + k - 1))
                      vz = 0.5*(p%U(3,i + k - 1)/p%Gamma(i + k - 1) + p%Uold(3,i + k - 1)/p%Gammaold(i + k - 1))
                      vthet = 0.5*(p%U(2,i + k - 1)/p%Gamma(i + k - 1) + p%Uold(2,i + k - 1)/p%Gammaold(i + k - 1))
                      !call omp_set_lock(mu_lock(mu))
                      !!$OMP ATOMIC UPDATE
                      omp_loc_moments(1, mu) = omp_loc_moments(1, mu) + coeff
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
                      omp_loc_moments(2, mu) = omp_loc_moments(2, mu) + coeff*vr
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
                      omp_loc_moments(3, mu) = omp_loc_moments(3, mu) + coeff*vthet
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
                      omp_loc_moments(4, mu) = omp_loc_moments(4, mu) + coeff*vz
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
                      omp_loc_moments(5, mu) = omp_loc_moments(5, mu) + coeff*vr*vr
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
                      omp_loc_moments(6, mu) = omp_loc_moments(6, mu) + coeff*vr*vthet
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
                      omp_loc_moments(7, mu) = omp_loc_moments(7, mu) + coeff*vr*vz
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
                      omp_loc_moments(8, mu) = omp_loc_moments(8, mu) + coeff*vthet*vthet
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
                      omp_loc_moments(9, mu) = omp_loc_moments(9, mu) + coeff*vthet*vz
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
                      omp_loc_moments(10, mu) = omp_loc_moments(10, mu) + coeff*vz*vz
                      !!$OMP END ATOMIC
                      !call omp_unset_lock(mu_lock(mu))
                   END DO
                END DO
             END DO
          END DO
       !!$OMP END PARALLEL DO
       !$OMP END DO NOWAIT
       Do i=1,size(p_loc_moments,2)
          call omp_set_lock(mu_lock(i))
          p_loc_moments(:,i)=p_loc_moments(:,i)+omp_loc_moments(:,i)
          call omp_unset_lock(mu_lock(i))
       end do
       !!$OMP END CRITICAL(loc_moments_reduce)
       DEALLOCATE (fun, fun2, zleft, rleft)
       END IF
 
    END subroutine deposit_moments
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Deposit the particles charges (q) from p on the grid
 !
 !> @param[in] p the particles type storing the desired specie parameters
 !> @param[in] p_loc_moments local tensor used to store the moments of the given specie
 !---------------------------------------------------------------------------
 
    SUBROUTINE deposit_charge(p, p_loc_moments)
       USE bsplines
       use mpi
       USE constants
       USE basic, ONLY: Zbounds, rnorm, phinorm
       USE beam, ONLY: particles
       USE mpihelper
       USE geometry
       USE omp_lib
 
       TYPE(particles), INTENT(IN):: p
       REAL(kind=db), DIMENSION(:), INTENT(INOUT):: p_loc_moments
       REAL(kind=db), DIMENSION(:), allocatable:: omp_loc_moments
       INTEGER ::irow, jcol, it, jw, mu, i, k, iend, nbunch
       INTEGER, DIMENSION(:), ALLOCATABLE::zleft, rleft
       REAL(kind=db), ALLOCATABLE :: fun(:, :, :), fun2(:, :, :)
       INTEGER:: num_threads, curr_thread
       real(kind=db):: contrib, chargecoeff
 
       num_threads = omp_get_max_threads()
       nbunch = p%Nploc/num_threads ! Particle bunch size used when calling basfun
       nbunch = max(nbunch, 1) ! Particle bunch size used when calling basfun
       nbunch = min(nbunch, 16) ! Particle bunch size used when calling basfun
       chargecoeff = p%weight*p%q/(2*pi*eps_0*phinorm*rnorm) ! Normalized charge density simulated by each macro particle
 
 ! Assemble rhs
       IF (p%Nploc .ne. 0) THEN
          !!!$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i,zleft, rleft, jw, it, iend, irow, jcol, mu, k, fun, fun2, contrib)
          ALLOCATE (zleft(nbunch), rleft(nbunch))
          ALLOCATE (fun(1:femorder(1) + 1, 0:0, nbunch), fun2(1:femorder(2) + 1, 0:0, nbunch)) ! Arrays keeping values of b-splines at gauss node
          allocate(omp_loc_moments(size(p_loc_moments)))
          omp_loc_moments=0
          zleft=0
          rleft=0
           curr_thread=omp_get_thread_num()
          !$OMP DO
          DO i = 1, p%Nploc, nbunch
             ! Avoid segmentation fault by accessing non relevant data
             iend = min(i + nbunch - 1, p%Nploc)
             k = iend - i + 1
             ! Localize the particle
             rleft(1:k) = p%rindex(i:iend)
             zleft(1:k) = p%zindex(i:iend)
             ! Compute the value of the splines at the particles positions
             CALL basfun(p%pos(3,i:iend), splrz%sp1, fun, zleft(1:k) + 1)
             CALL basfun(p%pos(1,i:iend), splrz%sp2, fun2, rleft(1:k) + 1)
             !CALL geom_weight(p%Z(i:iend),p%R(i:iend),wgeom)
             DO k = 1, (iend - i + 1)
                DO jw = 1, (femorder(2) + 1)
                   DO it = 1, (femorder(1) + 1)
                      irow = zleft(k) + it - Zbounds(mpirank)
                      jcol = rleft(k) + jw
                      mu = irow + (jcol - 1)*(loc_zspan)
                      ! Add contribution of particle k to rhs grid point mu
                      contrib = fun(it, 0, k)*fun2(jw, 0, k)*p%geomweight(0,i + k - 1)*chargecoeff
                      omp_loc_moments(mu) = omp_loc_moments(mu) + contrib
                   END DO
                END DO
             END DO
          END DO
          !$OMP END DO NOWAIT
          DEALLOCATE (fun, fun2, zleft, rleft)
       Do i=1,size(p_loc_moments)
          !$OMP ATOMIC
             p_loc_moments(i)=p_loc_moments(i)+omp_loc_moments(i)
          !$OMP END ATOMIC
       end do
       END IF
 
 
    END subroutine deposit_charge
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Do the communication of the local moment matrices between mpi workers for the overlap grid points
 !> 
 !
 !---------------------------------------------------------------------------
 
    SUBROUTINE rhs_overlap
       USE mpihelper
       USE Basic, ONLY: Zbounds, mpirank, leftproc, rightproc
       INTEGER:: ierr, i, j
 
       !$OMP MASTER
       !WRITE(*,*) mpirank, "wE communicate overlap rhs"
       CALL rhsoverlapcomm(mpirank, leftproc, rightproc, loc_rhs, nrank, femorder, loc_zspan - femorder(1))
       !$OMP END MASTER
       !$OMP BARRIER
 
       IF (mpirank .gt. 0) THEN
          !$OMP DO SIMD collapse(2)
          DO j = 1, femorder(1)
             DO i = 1, nrank(2)
                loc_rhs((i - 1)*loc_zspan + j) = loc_rhs((i - 1)*loc_zspan + j)&
                & + rhsoverlap_buffer(nrank(2)*(j - 1) + i)
             END DO
          END DO
          !$OMP END DO SIMD
       END IF
       !$OMP BARRIER
    END SUBROUTINE rhs_overlap
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Do the communication of the local moment matrices between mpi workers to reduce the result on the host
 !
 !---------------------------------------------------------------------------
 
    SUBROUTINE rhs_gather(rhs)
       USE mpihelper
       USE Basic, ONLY: Zbounds, mpirank, leftproc, rightproc
       REAL(kind=db), DIMENSION(:), INTENT(INOUT):: rhs
       INTEGER:: ierr, i, j
       INTEGER:: displs(mpisize), counts(mpisize)
       INTEGER:: overlap_type
       INTEGER:: rcvoverlap_type
 
       displs = Zbounds(0:mpisize - 1)
       counts = Zbounds(1:mpisize) - Zbounds(0:mpisize - 1)
       counts(mpisize) = counts(mpisize) + femorder(1)
       
       ! Set communication vector type
       overlap_type = rhsoverlap_type
       rcvoverlap_type = rcvrhsoverlap_type
 
       !$OMP MASTER
       IF (mpirank .eq. 0) THEN
          rhs = 0
       END IF
       CALL MPI_GATHERV(loc_rhs, counts(mpirank + 1), rhsoverlap_type, &
       &                rhs, counts, displs, rcvrhsoverlap_type, 0, MPI_COMM_WORLD, ierr)
       !$OMP END MASTER
       !$OMP BARRIER
    END SUBROUTINE rhs_gather
 
    !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Do the communication of the local moment matrices between mpi workers for the overlap grid points
 !> and reduce the result on the host
 !
 !---------------------------------------------------------------------------
 
    SUBROUTINE moments_gather(moment)
       USE mpihelper
       USE Basic, ONLY: Zbounds, mpirank, leftproc, rightproc
       REAL(kind=db), DIMENSION(:, :), INTENT(INOUT):: moment
       INTEGER:: ierr, i, j
       INTEGER:: displs(mpisize), counts(mpisize)
 
       displs = Zbounds(0:mpisize - 1)
       counts = Zbounds(1:mpisize) - Zbounds(0:mpisize - 1)
       counts(mpisize) = counts(mpisize) + femorder(1)
       !$OMP MASTER
       CALL momentsoverlapcomm(mpirank, leftproc, rightproc, loc_moments, nrank, femorder, loc_zspan - femorder(1))
       !$OMP END MASTER
       !$OMP BARRIER
 
       IF (mpirank .gt. 0) THEN
          !!$OMP PARALLEL DO SIMD DEFAULT(SHARED) private(i)
          !$OMP DO SIMD collapse(2)
          DO j = 1, femorder(1)
             DO i = 1, nrank(2)
                loc_moments(1:nbmoments, (i - 1)*loc_zspan + j) = loc_moments(1:nbmoments, (i - 1)*loc_zspan + j)&
                & + momentsoverlap_buffer(nbmoments*(nrank(2)*(j - 1) + i - 1) + 1:nbmoments*(nrank(2)*(j - 1) + i))
             END DO
          END DO
          !$OMP END DO SIMD
       END IF
 
       !$OMP MASTER
       ! Set communication vector type
       IF (mpirank .eq. 0) THEN
          moment = 0
       END IF
       CALL MPI_GATHERV(loc_moments, counts(mpirank + 1), momentsoverlap_type, &
       &                moment, counts, displs, rcvmomentsoverlap_type, 0, MPI_COMM_WORLD, ierr)
       !$OMP END MASTER
       !$OMP BARRIER
 
    END SUBROUTINE moments_gather
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Solves Poisson equation using FEM. Distributes the result on all MPI workers and interpolate the electric forces
 !> for each particle.
 !
 !---------------------------------------------------------------------------
    SUBROUTINE poisson(splinevar)
       USE basic, ONLY: rhs, nrank, pot, nlend
       USE bsplines, ONLY: spline2d, gridval
       USE mumps_bsplines, ONLY: bsolve, vmx
       USE futils
       Use geometry
       type(spline2d):: splinevar
       INTEGER:: ierr, i, j, iend
       real(kind=db), allocatable::reducedrhs(:)
       real(kind=db), allocatable:: reducedsol(:), tempcol(:)
       allocate (reducedrhs(nrank(1)*nrank(2)))
       allocate (reducedsol(nbreducedspline))
       allocate (tempcol(nrank(1)*nrank(2)))
 
       !$OMP MASTER
          if (nlweb) then ! we use the web-spline reduction for stability
             if(mpirank.eq.0) then       ! Only the root process solves Poisson
                reducedrhs = vmx(etilde, rhs)
                Call bsolve(reduccedmat, reducedrhs(1:nbreducedspline), reducedsol)
             end if
             CALL MPI_Bcast(reducedsol, nbreducedspline, db_type, 0, MPI_COMM_WORLD, ierr)
     
             tempcol = 0
             tempcol(1:nbreducedspline) = reducedsol
             !phi_spline = 0
             phi_spline = vmx(etildet, tempcol)
          else
             if(mpirank.eq.0) then
                CALL bsolve(femat, rhs, phi_spline)
             end if
             CALL MPI_Bcast(phi_spline, nrank(1)*nrank(2), db_type, 0, MPI_COMM_WORLD, ierr)
          end if
       !$OMP END MASTER
       !$OMP BARRIER
       
       !$OMP DO SIMD collapse(2)
       Do i=1,nrank(1)
          Do j=1,nrank(2)
             matcoef(i,j) = phi_spline((i-1*nrank(1)+j))
          END DO
       END DO
       !$OMP END DO SIMD
 
       ! update the ppform coefficients
       CALL updt_ppform2d(splinevar, matcoef)
 
 
 !$OMP BARRIER
 
       IF (mpirank .eq. 0 .and. (modulo(step, it2d) .eq. 0 .or. nlend)) THEN
          !$OMP DO 
          ! On the root process, compute the electric field for diagnostic purposes
          DO i=1,size(pot),16
             iend=min(size(pot),i+15)
          CALL gridval(splinevar, vec1(i:iend), vec2(i:iend), pot(i:iend), (/0, 0/))
          CALL gridval(splinevar, vec1(i:iend), vec2(i:iend), Ez(i:iend), (/1, 0/))
          CALL gridval(splinevar, vec1(i:iend), vec2(i:iend), Er(i:iend), (/0, 1/))
          Ez(i:iend) = -pot(i:iend)*gridwdir(1,i:iend) - Ez*gridwdir(0,i:iend) - gtilde(1,i:iend)
          Er(i:iend) = -pot(i:iend)*gridwdir(2,i:iend) - Er*gridwdir(0,i:iend) - gtilde(2,i:iend)
          pot(i:iend) = pot(i:iend)*gridwdir(0,i:iend) + gtilde(0,i:iend)
          END DO
          !$OMP END DO NOWAIT
       END IF
    END SUBROUTINE poisson
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Computes the electric fields and potential at the particles position for particles
 !> between positions nstart and nend in the list
 !
 !> @param[in] p the particles type storing the desired specie parameters
 !> @param[in] nstart starting index for the particle list
 !> @param[in] nend ending index for the particle list
 !---------------------------------------------------------------------------
 
    SUBROUTINE EFieldscompatparts(p, nstart, nend)
       Use beam, ONLY: particles
       Use geometry
       Use splinebound
       TYPE(particles), INTENT(INOUT):: p
       INTEGER, OPTIONAL::nstart, nend
       INTEGER:: i, iend, nst, nnd
       INTEGER:: nbunch
       INTEGER:: num_threads
       Real(kind=db), ALLOCATABLE:: erext(:), ezext(:), gtildeloc(:, :)
 
       if (.not. present(nstart)) nst = 1
       if (.not. present(nend)) nnd = p%Nploc
       !num_threads = omp_get_max_threads()
       !nbunch = (nnd - nst + 1)/num_threads ! Particle bunch size used when calling basfun
       !nbunch = max(nbunch, 1) ! Particle bunch size used when calling basfun
       nbunch =  64 ! Particle bunch size used when calling basfun
       Allocate (erext(nbunch), ezext(nbunch), gtildeloc(0:2,0:nbunch - 1))
 
       ! Evaluate the electric potential and field at the particles position
 
       !$OMP DO SIMD
       DO i = nst, nnd, nbunch
          ! Avoid segmentation fault by accessing non relevant data
          iend = min(i + nbunch - 1, nnd)
 
          CALL speval(splrz, p%pos(3,i:iend), p%pos(1,i:iend),p%Zindex(i:iend),p%Rindex(i:iend), p%pot(i:iend), p%E(2,i:iend), p%E(1,i:iend))
          CALL speval(splrz_ext, p%pos(3,i:iend), p%pos(1,i:iend),p%Zindex(i:iend),p%Rindex(i:iend), p%potxt(i:iend))
 
          Call total_gtilde(p%pos(3,i:iend), p%pos(1,i:iend), gtildeloc(:,0:iend - i),p%geomweight(:,i:iend))
 
          p%E(2,i:iend) = -p%E(2,i:iend)*p%geomweight(0,i:iend) - p%pot(i:iend)*p%geomweight(1,i:iend) - gtildeloc(1,0:iend - i)
          p%E(1,i:iend) = -p%E(1,i:iend)*p%geomweight(0,i:iend) - p%pot(i:iend)*p%geomweight(2,i:iend) - gtildeloc(2,0:iend - i)
          p%pot(i:iend) = p%geomweight(0,i:iend)*p%pot(i:iend) + gtildeloc(0,0:iend - i)
          p%potxt(i:iend) = p%geomweight(0,i:iend)*p%potxt(i:iend) + gtildeloc(0,0:iend - i)
       END DO
       !$OMP END DO SIMD NOWAIT
 
    END SUBROUTINE EFieldscompatparts
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Constucts the FEM matrix using bsplines initialized in fields_init
 !---------------------------------------------------------------------------
    SUBROUTINE fematrix(mat)
       USE bsplines
       USE geometry
       USE omp_lib
       USE sparse
       type(mumps_mat):: mat
       REAL(kind=db), ALLOCATABLE :: xgauss(:, :), wgauss(:), wgeom(:, :)
       INTEGER, ALLOCATABLE :: f(:, :), aux(:)
       REAL(kind=db), ALLOCATABLE :: coefs(:)
       REAL(kind=db), ALLOCATABLE :: fun(:, :, :), fun2(:, :, :)
       REAL(kind=db)  :: contrib
       INTEGER, ALLOCATABLE :: idert(:, :), iderw(:, :), iderg(:, :)
       integer,allocatable:: iid(:),jid(:)
       INTEGER :: i, j, jt, iw, irow, jcol, mu, igauss, iterm, irow2, jcol2, mu2, kterms, gausssize
       kterms=8
       
       If (allocated(fun)) deallocate (fun)
       If (allocated(fun2)) deallocate (fun2)
       ALLOCATE (fun(1:femorder(1) + 1, 0:1,3*ngauss(1)*ngauss(2)), fun2(1:femorder(2) + 1, 0:1,3*ngauss(1)*ngauss(2)))
       If (allocated(wgeom)) deallocate (wgeom)
       ALLOCATE (wgeom(0:2,3*ngauss(1)*ngauss(2)))!Arrays keeping values of b-splines at gauss node
 
       !ALLOCATE(xgauss(ngauss(1)*ngauss(2),2), wgauss(ngauss(1)*ngauss(2)),zg(ngauss(1)),rg(ngauss(2)), wzg(ngauss(1)), wrg(ngauss(2)))   !Gaussian nodes and weights arrays
       ALLOCATE (f((femorder(1) + 1)*(femorder(2) + 1), 2), aux(femorder(1) + 1)) !Auxiliary arrays ordering bsplines
       ALLOCATE (idert(kterms, 2), iderw(kterms, 2), coefs(kterms), iderg(kterms, 2))
       ALLOCATE (iid(3*ngauss(1)*ngauss(2)), jid(3*ngauss(1)*ngauss(2)))
       !Pointers on the order of derivatives
       call timera(0, "fematrix")
 
 ! Constuction of auxiliary array ordering bsplines in given interval
       DO i = 1, (femorder(1) + 1)
          aux(i) = i
       END DO
       DO i = 1, (femorder(2) + 1)
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 1) = aux
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 2) = i
       END DO
 
 ! Assemble FEM matrix
 !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(j,i,xgauss,wgauss,gausssize,wgeom, igauss, iterm,jt,irow,jcol, mu, iw, irow2,jcol2, mu2, contrib, iderw, idert, iderg, coefs, fun, fun2,iid,jid), collapse(2)
       DO j = 1, nr ! Loop on r position
          DO i = 1, nz        ! Loop on z position
             !! Computation of gauss weight and position in r and z direction for gaussian integration
             Call calc_gauss(splrz, ngauss, i, j, xgauss, wgauss, gausssize)
 
             iid=i
             jid=j
 
             if (gausssize .gt. 1) then
                !If (allocated(wgeom)) deallocate (wgeom)
                !ALLOCATE (wgeom(0:2,gausssize))
                CALL geom_weight(xgauss(1:gausssize, 1), xgauss(1:gausssize, 2), wgeom(:,1:gausssize))
                CALL basfun(xgauss(1:gausssize, 1), splrz%sp1, fun(:,:,1:gausssize), iid(1:gausssize))
                CALL basfun(xgauss(1:gausssize, 2), splrz%sp2, fun2(:,:,1:gausssize), jid(1:gausssize))
             End if
             DO jt = 1, (1 + femorder(1))*(femorder(2) + 1)
                irow = i + f(jt, 1) - 1; jcol = j + f(jt, 2) - 1
                mu = irow + (jcol - 1)*nrank(1)
                
                DO iw = 1, (1 + femorder(1))*(femorder(2) + 1)
                   irow2 = i + f(iw, 1) - 1; jcol2 = j + f(iw, 2) - 1
                   mu2 = irow2 + (jcol2 - 1)*nrank(1)
                   contrib=0.0_db
                   DO igauss = 1, gausssize ! Loop on gaussian weights and positions
                      CALL coefeq(xgauss(igauss, :), idert, iderw, iderg, coefs, kterms)
                      DO iterm = 1, kterms ! Loop on the two integration dimensions
                         contrib = contrib+wgeom(iderg(iterm, 1),igauss)*wgeom(iderg(iterm, 2),igauss)* &
                         &   fun(f(jt, 1), idert(iterm, 1),igauss)*fun(f(iw, 1), idert(iterm, 2),igauss)* &
                         &   fun2(f(jt, 2), iderw(iterm, 1),igauss)*fun2(f(iw, 2), iderw(iterm, 2),igauss)* &
                         &   wgauss(igauss)*coefs(iterm)
                      END DO
                   end do
                   call omp_set_lock(mu_lock(mu))
                   CALL updt_sploc(mat%mat%row(mu), mu2, contrib)
                   call omp_unset_lock(mu_lock(mu))
                END DO
                
             END DO
          END DO
       END DO
       !$OMP End parallel do
 
       DEALLOCATE (f, aux)
       DEALLOCATE (idert, iderw, coefs, fun, fun2)
 
       call timera(1, "fematrix")
    END SUBROUTINE fematrix
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Computes the volume of the splines cells needed to display the density in post-processing
 !---------------------------------------------------------------------------
 
    SUBROUTINE comp_volume
 
       USE bsplines
       USE geometry
       USE basic, ONLY: Volume
 
       REAL(kind=db), ALLOCATABLE :: xgauss(:, :), wgauss(:), wgeom(:, :)
       INTEGER, ALLOCATABLE :: f(:, :), aux(:)
       REAL(kind=db), ALLOCATABLE :: coefs(:)
       REAL(kind=db), ALLOCATABLE :: fun(:, :), fun2(:, :), gtildeintegr(:, :), ftestpt(:, :)
       Integer, ALLOCATABLE, Dimension(:) :: idg, idt, idp, idw
       INTEGER :: i, j, jt, irow, jcol, mu, igauss, gausssize, iterm, nterms
       Real(kind=db)::newcontrib
 
       call timera(0, "comp_volume")
 
       ALLOCATE (fun(1:femorder(1) + 1, 0:1), fun2(1:femorder(2) + 1, 0:1))!Arrays keeping values of b-splines at gauss node
       !ALLOCATE(xgauss(ngauss(1)*ngauss(2),2), wgauss(ngauss(1)*ngauss(2)),zg(ngauss(1)),rg(ngauss(2)), wzg(ngauss(1)), wrg(ngauss(2)))   !Gaussian nodes and weights arrays
       ALLOCATE (f((femorder(1) + 1)*(femorder(2) + 1), 2), aux(femorder(1) + 1)) !Auxiliary arrays ordering bsplines
       nterms = 4
       Allocate (idg(nterms), idt(nterms), idw(nterms), idp(nterms), coefs(nterms))
 ! Constuction of auxiliary array ordering bsplines in given interval
       DO i = 1, (femorder(1) + 1)
          aux(i) = i
       END DO
       DO i = 1, (femorder(2) + 1)
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 1) = aux
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 2) = i
       END DO
 
       volume = 0
       if (walltype .lt. 0) fverif = 0
 
 ! Assemble Volume matrix
       !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,i,xgauss,wgauss,gausssize,wgeom, igauss, gtildeintegr, ftestpt, iterm,jt,irow,jcol, mu, idw, idt, idg, idp, coefs, fun, fun2, newcontrib), collapse(2)
       DO j = 1, nr ! Loop on r position
          DO i = 1, nz        ! Loop on z position
             ! Computation of gauss weight and position in r and z direction for gaussian integration
             Call calc_gauss(splrz, ngauss, i, j, xgauss, wgauss, gausssize)
             If (allocated(wgeom)) deallocate (wgeom)
             if (gausssize .gt. 0) then
                ALLOCATE (wgeom(0:2,size(xgauss, 1)))
                CALL geom_weight(xgauss(:, 1), xgauss(:, 2), wgeom)
             End if
             If (allocated(gtildeintegr)) deallocate (gtildeintegr)
             ALLOCATE (gtildeintegr(0:2,size(xgauss, 1)))
             Call total_gtilde(xgauss(:, 1), xgauss(:, 2), gtildeintegr,wgeom)
             if (walltype .lt. 0) then
                If (allocated(ftestpt)) deallocate (ftestpt)
                ALLOCATE (ftestpt(0:0,size(xgauss, 1)))
                CALL ftest(xgauss(:, 1), xgauss(:, 2), ftestpt)
             end if
 
             DO igauss = 1, gausssize ! Loop on gaussian weights and positions
                CALL basfun(xgauss(igauss, 1), splrz%sp1, fun, i)
                CALL basfun(xgauss(igauss, 2), splrz%sp2, fun2, j)
                CALL coefeqext(xgauss(igauss, :), idt, idw, idg, idp, coefs)
 
                DO jt = 1, (1 + femorder(1))*(femorder(2) + 1)
                   irow = i + f(jt, 1) - 1; 
                   jcol = j + f(jt, 2) - 1
                   mu = irow + (jcol - 1)*nrank(1)
                   newcontrib = 2*pi*fun(f(jt, 1), 0)*fun2(f(jt, 2), 0)*wgauss(igauss)*xgauss(igauss, 2)!*wgeom(igauss,0)
                   !$OMP ATOMIC UPDATE
                   volume(mu) = volume(mu) + newcontrib
                   !$OMP END ATOMIC
                   if (walltype .lt. 0) THEN
                      newcontrib = ftestpt(0,igauss)*fun(f(jt, 1), 0)*fun2(f(jt, 2), 0)&
                      &*wgeom(0,igauss)*wgauss(igauss)*xgauss(igauss, 2)
                      !$OMP ATOMIC UPDATE
                      fverif(mu) = fverif(mu) + newcontrib
                      !$OMP END ATOMIC
                   end if
                END DO
             END DO
          END DO
       END DO
       !$OMP END PARALLEL DO
 
       !DEALLOCATE(xgauss, wgauss,zg,rg, wzg, wrg)
       DEALLOCATE (f, aux)
       DEALLOCATE (fun, fun2)
 
       call timera(1, "comp_volume")
 
    END SUBROUTINE comp_volume
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Computes the gradient of the gtilde function for the web-spline method needed to correctly apply the dirichlet boundary conditions
 !---------------------------------------------------------------------------
 
    SUBROUTINE comp_gradgtilde
 
       USE bsplines
       USE geometry
 
       REAL(kind=db), ALLOCATABLE :: xgauss(:, :), wgauss(:), wgeom(:, :)
       INTEGER, ALLOCATABLE :: f(:, :), aux(:)
       REAL(kind=db), ALLOCATABLE :: coefs(:)
       REAL(kind=db), ALLOCATABLE :: fun(:, :,:), fun2(:, :,:), gtildeintegr(:, :)
       Integer, ALLOCATABLE, Dimension(:) :: idg, idt, idp, idw
       integer,allocatable:: iid(:),jid(:)
       INTEGER :: i, j, jt, irow, jcol, mu, igauss, gausssize, iterm, nterms
       Real(kind=db)::newcontrib
 
       !call timera(0, "comp_gradgtilde")
 
       ALLOCATE (fun(1:femorder(1) + 1, 0:1,3*ngauss(1)*ngauss(2)), fun2(1:femorder(2) + 1, 0:1,3*ngauss(1)*ngauss(2)))!Arrays keeping values of b-splines at gauss node
       !ALLOCATE(xgauss(ngauss(1)*ngauss(2),2), wgauss(ngauss(1)*ngauss(2)),zg(ngauss(1)),rg(ngauss(2)), wzg(ngauss(1)), wrg(ngauss(2)))   !Gaussian nodes and weights arrays
       If (allocated(wgeom)) deallocate (wgeom)
       ALLOCATE (wgeom(0:2,3*ngauss(1)*ngauss(2)))!Arrays keeping values of b-splines at gauss node
       ALLOCATE (f((femorder(1) + 1)*(femorder(2) + 1), 2), aux(femorder(1) + 1)) !Auxiliary arrays ordering bsplines
       nterms = 4
       Allocate (idg(nterms), idt(nterms), idw(nterms), idp(nterms), coefs(nterms))
       ALLOCATE (iid(3*ngauss(1)*ngauss(2)), jid(3*ngauss(1)*ngauss(2)))
 
       If (allocated(gtildeintegr)) deallocate (gtildeintegr)
       ALLOCATE (gtildeintegr(0:2,3*ngauss(1)*ngauss(2)))
 
 ! Constuction of auxiliary array ordering bsplines in given interval
       DO i = 1, (femorder(1) + 1)
          aux(i) = i
       END DO
       DO i = 1, (femorder(2) + 1)
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 1) = aux
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 2) = i
       END DO
 
       !$OMP DO SIMD
       do j=1,size(gradgtilde)
          gradgtilde(j) = 0
       END DO
       !$OMP END DO SIMD
 
       ! Assemble gradgtilde matrix
       !$OMP DO collapse(2), schedule(dynamic)
       DO j = 1, nr ! Loop on r position
          DO i = 1, nz        ! Loop on z position
             ! Computation of gauss weight and position in r and z direction for gaussian integration
             Call calc_gauss(splrz, ngauss, i, j, xgauss, wgauss, gausssize)
             iid=i
             jid=j
             if (gausssize .gt. 1) then
                !If (allocated(wgeom)) deallocate (wgeom)
                !ALLOCATE (wgeom(0:2,gausssize))
                CALL geom_weight(xgauss(1:gausssize, 1), xgauss(1:gausssize, 2), wgeom(:,1:gausssize))
                CALL basfun(xgauss(1:gausssize, 1), splrz%sp1, fun(:,:,1:gausssize), iid(1:gausssize))
                CALL basfun(xgauss(1:gausssize, 2), splrz%sp2, fun2(:,:,1:gausssize), jid(1:gausssize))
                Call total_gtilde(xgauss(1:gausssize, 1), xgauss(1:gausssize, 2), gtildeintegr(:,1:gausssize),wgeom(:,1:gausssize))
             else 
                cycle
             End if
             DO jt = 1, (1 + femorder(1))*(femorder(2) + 1)
                irow = i + f(jt, 1) - 1; jcol = j + f(jt, 2) - 1
                mu = irow + (jcol - 1)*nrank(1)
                newcontrib = 0.0_db
                DO igauss = 1, gausssize ! Loop on gaussian weights and positions
                   CALL coefeqext(xgauss(igauss, :), idt, idw, idg, idp, coefs)
                Do iterm = 1, nterms
                   newcontrib = newcontrib + wgeom( idg(iterm),igauss)*gtildeintegr( idp(iterm),igauss)* &
                   &   fun(f(jt, 1), idt(iterm),igauss)*fun2(f(jt, 2), idw(iterm),igauss)* &
                   &   wgauss(igauss)*coefs(iterm)
                   End do
                end do
                !$OMP ATOMIC UPDATE
                gradgtilde(mu) = gradgtilde(mu) + newcontrib
                !$OMP END ATOMIC
             END DO
          END DO
       END DO
       !!! $OMP END PARALLEL DO
       !$OMP END DO
 
       !DEALLOCATE(xgauss, wgauss,zg,rg, wzg, wrg)
       DEALLOCATE (f, aux)
       DEALLOCATE (fun, fun2)
 
       !call timera(1, "comp_gradgtilde")
 
    END SUBROUTINE comp_gradgtilde
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Imposes the dirichlet boundary conditions on the FEM matrix for the case where we use regular splines ( not web-splines).
 !---------------------------------------------------------------------------
    SUBROUTINE fe_dirichlet
       REAL(kind=db), ALLOCATABLE :: arr(:)
       INTEGER :: i
       ALLOCATE (arr(nrank(1)*nrank(2)))
       DO i = 1, nrank(1)
          IF (rgrid(0) .ne. 0.0_db) THEN
             arr = 0; arr(i) = 1;
             CALL putrow(femat, i, arr)
          END IF
          arr = 0; arr(nrank(1)*nrank(2) + 1 - i) = 1;
          CALL putrow(femat, nrank(1)*nrank(2) + 1 - i, arr)
       END DO
       DEALLOCATE (arr)
    END SUBROUTINE fe_dirichlet
 !________________________________________________________________________________
    SUBROUTINE coefeq(x, idt, idw, idg, c, kterms)
       REAL(kind=db), INTENT(in) :: x(:)
       INTEGER, INTENT(out) :: idt(:, :), idw(:, :), idg(:, :),kterms
       REAL(kind=db), INTENT(out) :: c(:)
       kterms=8
 
 
       c = x(2)
       idt(1, 1) = 0
       idt(1, 2) = 0
       idw(1, 1) = 0
       idw(1, 2) = 0
       idg(1, 1) = 1
       idg(1, 2) = 1
       idt(2, 1) = 0
       idt(2, 2) = 1
       idw(2, 1) = 0
       idw(2, 2) = 0
       idg(2, 1) = 1
       idg(2, 2) = 0
       idt(3, 1) = 1
       idt(3, 2) = 0
       idw(3, 1) = 0
       idw(3, 2) = 0
       idg(3, 1) = 0
       idg(3, 2) = 1
       idt(4, 1) = 1
       idt(4, 2) = 1
       idw(4, 1) = 0
       idw(4, 2) = 0
       idg(4, 1) = 0
       idg(4, 2) = 0
       idt(5, 1) = 0
       idt(5, 2) = 0
       idw(5, 1) = 0
       idw(5, 2) = 0
       idg(5, 1) = 2
       idg(5, 2) = 2
       idt(6, 1) = 0
       idt(6, 2) = 0
       idw(6, 1) = 0
       idw(6, 2) = 1
       idg(6, 1) = 2
       idg(6, 2) = 0
       idt(7, 1) = 0
       idt(7, 2) = 0
       idw(7, 1) = 1
       idw(7, 2) = 0
       idg(7, 1) = 0
       idg(7, 2) = 2
       idt(8, 1) = 0
       idt(8, 2) = 0
       idw(8, 1) = 1
       idw(8, 2) = 1
       idg(8, 1) = 0
       idg(8, 2) = 0
    END SUBROUTINE coefeq
 
    SUBROUTINE coefeqext(x, idt, idw, idg, idp, c)
       REAL(kind=db), INTENT(in) :: x(:)
       INTEGER, INTENT(out) :: idp(:), idt(:), idw(:), idg(:)
       REAL(kind=db), INTENT(out) :: c(:)
 
       c(1) = x(2)
       idp(1) = 1
       idg(1) = 1
       idt(1) = 0
       idw(1) = 0
       c(2) = x(2)
       idp(2) = 1
       idg(2) = 0
       idt(2) = 1
       idw(2) = 0
       c(3) = x(2)
       idp(3) = 2
       idg(3) = 2
       idt(3) = 0
       idw(3) = 0
       c(4) = x(2)
       idp(4) = 2
       idg(4) = 0
       idt(4) = 0
       idw(4) = 1
    END SUBROUTINE coefeqext
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Computes the magnetic field on the grid according to a magnetic mirror,
 !> or according to the linear interpolation of the values on the
 !> grid saved in h5 file stored at magfile.
 !> @param[in] magfile filname of .h5 file containing the definitions of A and B
 !---------------------------------------------------------------------------
    SUBROUTINE magnet(magfile)
       USE basic, ONLY: B0, Rcurv, rgrid, zgrid, width, rnorm, nr, nz, bnorm
       USE constants, ONLY: Pi
       CHARACTER(LEN=*), INTENT(IN), OPTIONAL:: magfile
       REAL(kind=db) :: rg, zg, halfLz, MirrorRatio
       INTEGER :: i, rindex
       IF (len_trim(magfile) .lt. 1) THEN
          halfLz = (zgrid(nz) + zgrid(0))/2
          MirrorRatio = (Rcurv - 1)/(Rcurv + 1)
          DO i = 1, (nr + 1)*(nz + 1)
             rindex = (i - 1)/(nz + 1)
             rg = rgrid(rindex)
             zg = zgrid(i - rindex*(nz + 1) - 1) - halfLz
             Br(i) = -B0*MirrorRatio*SIN(2*pi*zg/width*rnorm)*bessi1(2*pi*rg/width*rnorm)/bnorm
             Bz(i) = B0*(1 - MirrorRatio*COS(2*pi*zg/width*rnorm)*bessi0(2*pi*rg/width*rnorm))/bnorm
             Athet(i) = 0.5*B0*(rg*rnorm - width/pi*MirrorRatio*bessi1(2*pi*rg/width*rnorm)*COS(2*pi*zg/width*rnorm))
          END DO
       ELSE
          CALL load_mag_from_h5(magfile)
       END IF
    END SUBROUTINE magnet
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Loads the magnetic field defined in the .h5 file at location magfile
 !> @param[in] magfile filname of .h5 file containing the definitions of A and B
 !---------------------------------------------------------------------------
    SUBROUTINE load_mag_from_h5(magfile)
       USE basic, ONLY: B0, rnorm, bnorm, bscaling
       USE constants, ONLY: Pi
       USE futils
       USE bsplines
       CHARACTER(LEN=*), INTENT(IN):: magfile
       REAL(kind=db), ALLOCATABLE :: magr(:), magz(:)
       REAL(kind=db), ALLOCATABLE :: tempBr(:, :), tempBz(:, :), tempAthet(:, :)
       real(kind=db), allocatable:: c(:,:)
       type(spline2d):: Maginterpolation
       REAL(kind=db) :: maxB
       INTEGER :: magfid, dims(2)
       LOGICAL:: B_is_saved
       INTEGER :: magn(2), magrank
 
       CALL openf(trim(magfile), magfid, 'r', real_prec='d')
 
       CALL getdims(magfid, '/mag/Athet', magrank, magn)
 
       ALLOCATE (magr(magn(2)), magz(magn(1)))
       ALLOCATE (tempAthet(magn(1), magn(2)), tempBr(magn(1), magn(2)), tempBz(magn(1), magn(2)))
 
       ! Read r and z coordinates for the definition of A_\thet, and B
       CALL getarr(magfid, '/mag/r', magr)
       CALL getarr(magfid, '/mag/z', magz)
       CALL getarr(magfid, '/mag/Athet', tempAthet)
 
       IF (isdataset(magfid, '/mag/Br') .and. isdataset(magfid, '/mag/Bz')) THEN
          CALL getarr(magfid, '/mag/Br', tempBr)
          CALL getarr(magfid, '/mag/Bz', tempBz)
          IF(bscaling .gt. 0) then
             maxB=sqrt(maxval(tempBr**2+tempBz**2))
             tempBr=tempBr/maxB*B0
             tempBz=tempBz/maxB*B0
          end if
          B_is_saved = .true.
       ELSE
          B_is_saved = .false.
       END IF
 
       magz=magz/rnorm
       magr=magr/rnorm
       CALL set_splcoef((/3,3/),magz,magr,Maginterpolation)
       call get_dim(Maginterpolation,dims)
 
       ! Interpolation of the magnetic potential vector
       allocate(c(dims(1),dims(2)))
       call get_splcoef(Maginterpolation,tempAthet, c)
       CALL gridval(Maginterpolation,vec1,vec2, Athet ,(/0,0/),c)
 
 
       
       if(B_is_saved == .true.)then
          ! Interpolation of the Axial magnetic field
          call get_splcoef(Maginterpolation,tempBz, c)
          CALL gridval(Maginterpolation,vec1,vec2, Bz ,(/0,0/),c)
 
          ! Interpolation of the radial magnetic field
          call get_splcoef(Maginterpolation,tempBr, c)
          CALL gridval(Maginterpolation,vec1,vec2, Br ,(/0,0/),c)
       else
          CALL gridval(Maginterpolation,vec1,vec2, Br,(/1,0/))
          Br=-Br
          CALL gridval(Maginterpolation,vec1,vec2, Bz,(/0,1/))
          Bz=Bz+Athet/vec2
       end if
 
 
       if( bscaling .lt. 0 ) then
          maxB = maxval(sqrt(Bz**2 + Br**2))
 
          Bz = Bz/maxB*B0
          Br = Br/maxB*B0
       end if
       ! We normalize
       Br = Br/bnorm
       Bz = Bz/bnorm
 
       CALL closef(magfid)
       deallocate(c)
       call destroy_SP(Maginterpolation)
    END SUBROUTINE load_mag_from_h5
 !________________________________________________________________________________
 !Modified Bessel functions of the first kind of the zero order
    FUNCTION bessi0(x)
       REAL(kind=db) :: bessi0, x
       REAL(kind=db) ::  ax
       REAL(kind=db) p1, p2, p3, p4, p5, p6, p7, q1, q2, q3, q4, q5, q6, q7, q8, q9, y
       SAVE p1, p2, p3, p4, p5, p6, p7, q1, q2, q3, q4, q5, q6, q7, q8, q9
       DATA p1, p2, p3, p4, p5, p6, p7/1.0d0, 3.5156229d0, 3.0899424d0, 1.2067492d0, 0.2659732d0, 0.360768d-1, 0.45813d-2/
       DATA q1, q2, q3, q4, q5, q6, q7, q8, q9/0.39894228d0, 0.1328592d-1, 0.225319d-2, -0.157565d-2, 0.916281d-2, &
       &                          -0.2057706d-1, 0.2635537d-1, -0.1647633d-1, 0.392377d-2/
       if (abs(x) .lt. 3.75) then
          y = (x/3.75)**2
          bessi0 = p1 + y*(p2 + y*(p3 + y*(p4 + y*(p5 + y*(p6 + y*p7)))))
       else
          ax = abs(x)
          y = 3.75/ax
          bessi0 = (exp(ax)/sqrt(ax))*(q1 + y*(q2 + y*(q3 + y*(q4 + y*(q5 + y*(q6 + y*(q7 + y*(q8 + y*q9))))))))
       end if
       return
    END FUNCTION bessi0
 !________________________________________________________________________________
 !Modified Bessel functions of the first kind of the first order
    FUNCTION bessi1(x)
       REAL(kind=db) ::  bessi1, x
       REAL(kind=db) ::  ax
       REAL(kind=db) p1, p2, p3, p4, p5, p6, p7, q1, q2, q3, q4, q5, q6, q7, q8, q9, y
       SAVE p1, p2, p3, p4, p5, p6, p7, q1, q2, q3, q4, q5, q6, q7, q8, q9
       DATA p1, p2, p3, p4, p5, p6, p7/0.5d0, 0.87890594d0, 0.51498869d0, 0.15084934d0, 0.2658733d-1, 0.301532d-2, 0.32411d-3/
       DATA q1, q2, q3, q4, q5, q6, q7, q8, q9/0.39894228d0, -0.3988024d-1, -0.362018d-2, 0.163801d-2, -0.1031555d-1, &
       &                           0.2282967d-1, -0.2895312d-1, 0.1787654d-1, -0.420059d-2/
       if (abs(x) .lt. 3.75D0) then
          y = (x/3.75D0)**2
          bessi1 = x*(p1 + y*(p2 + y*(p3 + y*(p4 + y*(p5 + y*(p6 + y*p7))))))
       else
          ax = abs(x)
          y = 3.75D0/ax
          bessi1 = (exp(ax)/sqrt(ax))*(q1 + y*(q2 + y*(q3 + y*(q4 + y*(q5 + y*(q6 + y*(q7 + y*(q8 + y*q9))))))))
          if (x .lt. 0.) bessi1 = -bessi1
       end if
       return
    END FUNCTION bessi1
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Free the memory used by the fields module
 !---------------------------------------------------------------------------
    SUBROUTINE clean_fields
       Use bsplines
       USE basic, ONLY: rhs
       INTEGER:: i
       do i = 1, nrank(1)*nrank(2)
          call omp_destroy_lock(mu_lock(i))
       end do
       DEALLOCATE (mu_lock)
       DEALLOCATE (matcoef)
       DEALLOCATE (pot)
       DEALLOCATE (rhs)
       DEALLOCATE (loc_rhs)
       DEALLOCATE (loc_moments)
       DEALLOCATE (phi_spline)
       DEALLOCATE (Br, Bz)
       DEALLOCATE (Er, Ez)
       DEALLOCATE (vec1, vec2)
       Call DESTROY_SP(splrz)
       Call DESTROY_SP(splrz_ext)
 
    END SUBROUTINE clean_fields
 
    SUBROUTINE updt_sploc(arow, j, val)
       !
       !   Update element j of row arow or insert it in an increasing "index"
       !
       USE sparse
       TYPE(sprow), TARGET          :: arow
       INTEGER, INTENT(in)          :: j
       DOUBLE PRECISION, INTENT(in) :: val
       !
       TYPE(elt), TARGET :: pre_root
       TYPE(elt), POINTER :: t, p
       !
       pre_root%next => arow%row0 ! pre_root is linked to the head of the list.
       t => pre_root
       DO WHILE (ASSOCIATED(t%next))
          p => t%next
          IF (p%index .EQ. j) THEN
             p%val = p%val + val
             RETURN
          END IF
          IF (p%index .GT. j) EXIT
          t => t%next
       END DO
       ALLOCATE (p)
       p = elt(j, val, t%next)
       t%next => p
       !
       arow%nnz = arow%nnz + 1
       arow%row0 => pre_root%next ! In case the head is altered
    END SUBROUTINE updt_sploc
 
 
    SUBROUTINE updt_ppform2d(sp,c)
       use bsplines
       TYPE(spline2d), INTENT(inout) :: sp
       DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: c
       !DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:)
       INTEGER:: m,mm
       INTEGER :: d1, d2, k1, k2, n1, n2
 
       d1 = sp%sp1%dim
       d2 = sp%sp2%dim
       k1 = sp%sp1%order
       k2 = sp%sp2%order
       n1 = sp%sp1%nints
       n2 = sp%sp2%nints
 
       !ALLOCATE(work(d2,k1,n1))
       !$OMP DO
       DO m=1,SIZE(c,2)
          CALL topp0(sp%sp1, c(:,m), ppformwork(m,:,:))
       END DO
       !$OMP END DO NOWAIT
 
       !$OMP SINGLE
       IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
       ALLOCATE(sp%ppform(k1,n1,k2,n2))
       !$OMP END SINGLE
 
       !$OMP DO 
       DO mm=1,SIZE(ppformwork,3)
          DO m=1,SIZE(ppformwork,2)
           CALL topp0(sp%sp2, ppformwork(:,m,mm), sp%ppform(m,mm,:,:))
          END DO
       END DO
       !$OMP END DO
       !DEALLOCATE(work)
 
    end subroutine updt_ppform2d
 
    !===========================================================================
   SUBROUTINE topp0(sp, c, ppform)
    !
    !   Compute PPFORM of a fuction defined by the spline SP
    !   and spline coefficients C(1:d)
    !
        use bsplines
        TYPE(spline1d), INTENT(in) :: sp
        DOUBLE PRECISION, INTENT(in) :: c(:)
        DOUBLE PRECISION, INTENT(out) :: ppform(0:,:)
        INTEGER :: p, nints, i, j, k
    !
        p = sp%order - 1
        nints = sp%nints
    !
        ppform = 0.0d0
        DO i=1,nints       ! on each knot interval
           DO j=1,p+1      ! all spline in interval i
              DO k=0,p     ! k_th derivatives
                 ppform(k,i) = ppform(k,i) + sp%val0(k,j,i)*c(j+i-1)
              END DO
           END DO
        END DO
    !
      END SUBROUTINE topp0
    !+
 END MODULE fields