diff --git a/matlab/@fennecshdf5/guiEkin.m b/matlab/@fennecshdf5/guiEkin.m
index 2f6b10e..a5c5a73 100644
--- a/matlab/@fennecshdf5/guiEkin.m
+++ b/matlab/@fennecshdf5/guiEkin.m
@@ -1,214 +1,214 @@
 function f=guiEkin(M)
 fieldstep=1;
 
 
 f=uifigure('Name','Fluid Energy data');
 
 mf=uipanel(f,'Position',[5 50 f.Position(3)-10 f.Position(4)-55]);
 mf.AutoResizeChildren='off';
 m=uipanel(f,'Position',[5 5 f.Position(3)-10 40]);
 
 sgtitle(mf,sprintf('t=%0.5e s',M.t2d(fieldstep)))
 
 sld = uislider(m,'Position',[10 30 0.6*m.Position(3) 3]);
 sld.Value=fieldstep;
 sld.Limits=[1 length(M.t2d)];
 
 edt = uieditfield(m,'numeric','Limits',[1 length(M.t2d)],'Value',1);
 edt.Position=[sld.Position(1)+sld.Position(3)+25 5 40 20];
 edt.RoundFractionalValues='on';
 
 
 Printbt=uibutton(m,'Position',[edt.Position(1)+edt.Position(3)+10 5 40 20],'Text', 'Save');
 %Playbt=uibutton(m,'Position',[Printbt.Position(1)+Printbt.Position(3)+10 5 40 20],'Text', 'Play/Pause');
 
 
 sld.ValueChangingFcn={@updatefigdata,edt,mf};
 edt.ValueChangedFcn={@updatefigdata,sld,mf};
 Printbt.ButtonPushedFcn={@plotGridButtonPushed};
 [R,Z]=meshgrid(M.rgrid,M.zgrid);
 Rinv=1./R;
 Rinv(:,1)=0;
 
 Plotfennecsdata(mf,M,fieldstep);
 
     function Plotfennecsdata(fig,M,fieldstep)
         %Plotfennecsgriddata Plot the 2d data of fennecs at time step fieldstep
         sgtitle(fig,sprintf('t=%0.5e s',M.t2d(fieldstep)))
         
         
         ax1=subplot(2,2,1,'Parent',fig);
         
         
         surface(ax1,M.zgrid,M.rgrid,M.N(:,:,fieldstep),'edgecolor','none');
         hold(ax1,'on')
         
         border=contourc(M.zgrid,M.rgrid,M.geomweight(:,:,1),[0 0]);
         zpos=interp2(M.zgrid, M.rgrid, M.N(:,:,fieldstep), border(1,2:end), border(2,2:end));
         plot3(ax1,border(1,2:end),border(2,2:end),zpos,'r-','linewidth',1.5,'Displayname','Boundaries')
         
         bfields=contourc(M.zgrid,M.rgrid,M.rAthet,15);
         zpos=interp2(M.zgrid, M.rgrid, M.N(:,:,fieldstep), bfields(1,2:end), bfields(2,2:end));
         plot3(ax1,bfields(1,2:end),bfields(2,2:end),zpos,'m-.','linewidth',1.5,'Displayname','Boundaries')
         
         
         xlim(ax1,[M.zgrid(1) M.zgrid(end)])
         ylim(ax1,[M.rgrid(1) M.rgrid(end)])
         xlabel(ax1,'z [m]')
         ylabel(ax1,'r [m]')
         title(ax1,'Position')
         c = colorbar(ax1);
         c.Label.String= 'n[m^{-3}]';
         %c.Limits=[0 max(M.N(:))];
         %caxis(ax1,[0 max(M.N(:))]);
         view(ax1,2)
         
         %set(ax1,'colorscale','log')
         
         
         UR=M.fluidUR(:,:,fieldstep);
         UZ=M.fluidUZ(:,:,fieldstep);
         
         ax2=subplot(2,2,2,'Parent',fig);
         Ekr=squeeze(M.fluidEkin(1,:,:,fieldstep))/M.qe;
         Ekr(Ekr==0)=NaN;
         surface(ax2,M.zgrid,M.rgrid,Ekr,'edgecolor','none');
         %plot(ax2,M.zgrid,data.pot(:,5))
         xlabel(ax2,'z [m]')
         ylabel(ax2,'r [m]')
         colormap(ax2,'jet')
         c = colorbar(ax2);
         c.Label.String= 'E_r [eV]';
         hold(ax2,'on')
         border=contourc(M.zgrid,M.rgrid,M.geomweight(:,:,1),[0 0]);
         zpos=interp2(M.zgrid, M.rgrid, squeeze(M.fluidEkin(1,:,:,fieldstep))/M.qe, border(1,2:end), border(2,2:end));
         plot3(ax2,border(1,2:end),border(2,2:end),zpos,'r-','linewidth',1.5,'Displayname','Boundaries')
         %c.Limits=[min(M.fluidUR(:,:,:)) max(M.fluidUR(:,:,:))];
         
         grid(ax2, 'on')
         %caxis(ax2,[-4e5 1e6])
         
         view(ax2,2)
         
         ax3=subplot(2,2,3,'Parent',fig);
         Ekthet=squeeze(M.fluidEkin(2,:,:,fieldstep))/M.qe;
         Ekthet(Ekthet==0)=NaN;
         surface(ax3,M.zgrid,M.rgrid,Ekthet,'edgecolor','none')
         
         xlabel(ax3,'z [m]')
         ylabel(ax3,'r [m]')
         colormap(ax3,'jet')
         c = colorbar(ax3);
         c.Label.String= 'E_\theta [eV]';
         hold(ax3,'on')
         border=contourc(M.zgrid,M.rgrid,M.geomweight(:,:,1),[0 0]);
         zpos=interp2(M.zgrid, M.rgrid, squeeze(M.fluidEkin(2,:,:,fieldstep))/M.qe, border(1,2:end), border(2,2:end));
         plot3(ax3,border(1,2:end),border(2,2:end),zpos,'r-','linewidth',1.5,'Displayname','Boundaries')
 
         
         grid(ax3, 'on')
         view(ax3,2)
         
         ax4=subplot(2,2,4,'Parent',fig);
         Ekz=squeeze(M.fluidEkin(3,:,:,fieldstep))/M.qe;
         Ekz(Ekz==0)=NaN;
         surface(ax4,M.zgrid,M.rgrid,Ekz,'edgecolor','none')
         xlabel(ax4,'z [m]')
         ylabel(ax4,'r [m]')
         colormap(ax4,'jet')
         c = colorbar(ax4);
         c.Label.String= 'E_z [eV]';
         titl='';
         labl='';
         hold(ax4,'on')
         border=contourc(M.zgrid,M.rgrid,M.geomweight(:,:,1),[0 0]);
         zpos=interp2(M.zgrid, M.rgrid, squeeze(M.fluidEkin(3,:,:,fieldstep))/M.qe, border(1,2:end), border(2,2:end));
         plot3(ax4,border(1,2:end),border(2,2:end),zpos,'r-','linewidth',1.5,'Displayname','Boundaries')
 
         
         grid(ax4, 'on')
         view(ax4,2)
         
         linkaxes([ax1 ax2 ax3 ax4],'xy')
         
     end
 
     function plotGridButtonPushed(btn,ax)
         %UNTITLED2 Summary of this function goes here
         %   Detailed explanation goes here
         f=figure();
         Plotfennecsgriddata(f,M,sld.Value);
         f.PaperOrientation='landscape';
         [~, name, ~] = fileparts(M.file);
         print(f,sprintf('%sfluid%d',name,sld.Value),'-dpdf','-fillpage')
     end
 
     function updatefigdata(control, event, Othercontrol, fig)
         
         
         if strcmp(event.EventName,'ValueChanged')
             fieldstep=floor(control.Value);
             control.Value=fieldstep;
         else
             fieldstep=floor(event.Value);
         end
         Othercontrol.Value=fieldstep;
         
         sgtitle(fig,sprintf('t=%0.5e s',double((fieldstep-1)*M.it1)*M.dt))
         
         %% update Position histogram
         ax1=fig.Children(end);
         
         
         dens=M.N(:,:,fieldstep);
         dens(M.geomweight(:,:,1)<0)=0;
 
         zpos=interp2(M.zgrid, M.rgrid, dens, ax1.Children(end-1).XData, ax1.Children(end-1).YData);
         ax1.Children(end-1).ZData=zpos;
         zpos=interp2(M.zgrid, M.rgrid, dens, ax1.Children(end-2).XData, ax1.Children(end-2).YData);
         ax1.Children(end-2).ZData=zpos;
         dens(dens<=0)=NaN;
         ax1.Children(end).ZData=dens;
         ax1.Children(end).CData=dens;
         
         
         ER=squeeze(M.fluidEkin(1,:,:,fieldstep))/M.qe;
         EZ=squeeze(M.fluidEkin(3,:,:,fieldstep))/M.qe;
         
         view(ax1,2)
         %% update Radial velocity
         ax1=fig.Children(end-2);
         zpos=interp2(M.zgrid, M.rgrid, ER, ax1.Children(end-1).XData, ax1.Children(end-1).YData);
         ax1.Children(end-1).ZData=zpos;
         ER(ER<=0)=NaN;
         fig.Children(end-2).Children(end).CData=ER;
         fig.Children(end-2).Children(end).ZData=ER;
-        caxis(ax1,[0 50]);
+        caxis(ax1,[0 500]);
         
         
         %% update Azimuthal velocity
         ax1=fig.Children(end-4);
         Ethet=squeeze(M.fluidEkin(2,:,:,fieldstep))/M.qe;
         zpos=interp2(M.zgrid, M.rgrid, Ethet, ax1.Children(end-1).XData, ax1.Children(end-1).YData);
         ax1.Children(end-1).ZData=zpos;
         Ethet(Ethet<=0)=NaN;     
         fig.Children(end-4).Children(end).CData=Ethet;
         fig.Children(end-4).Children(end).ZData=Ethet;
-        caxis(ax1,[0 50]);
+        caxis(ax1,[0 500]);
         
         %% update Axial velocity
         ax1=fig.Children(end-6);
         zpos=interp2(M.zgrid, M.rgrid, EZ, ax1.Children(end-1).XData, ax1.Children(end-1).YData);
         ax1.Children(end-1).ZData=zpos;
         %drawnow limitrate
         EZ(EZ<=0)=NaN;
         fig.Children(end-6).Children(end).CData=EZ;
         fig.Children(end-6).Children(end).ZData=EZ;
-        caxis(ax1,[0 50]);
+        caxis(ax1,[0 500]);
         
         
     end
 
 end
 
 
diff --git a/src/basic_mod.f90 b/src/basic_mod.f90
index 45289c1..f1af4f9 100644
--- a/src/basic_mod.f90
+++ b/src/basic_mod.f90
@@ -1,426 +1,426 @@
 MODULE basic
 !
   USE hashtable
   USE constants
   USE bsplines
   USE mumps_bsplines
   USE futils
   USE mpihelper
   use random
   IMPLICIT NONE
 !
 !   Basic module for time dependent problems
 !
   CHARACTER(len=128) :: label1, label2, label3, label4
 !
 !   BASIC Namelist
 !
   LOGICAL          :: nlres = .FALSE.           !< Restart flag
   LOGICAL          :: nlsave = .TRUE.           !< Checkpoint (save) flag
   LOGICAL          :: newres=.FALSE.            !< New result HDF5 file
   LOGICAL          :: nlxg=.FALSE.              !< Show graphical interface Xgrafix
   LOGICAL          :: nlmaxwellsource = .FALSE. !< Activate the maxwell source
   INTEGER          :: nrun=1                    !< Number of time steps to run
   REAL(kind=db) :: job_time=3600.0           !< Time allocated to this job in seconds
   REAL(kind=db) :: tmax=100000.0             !< Maximum simulation time
   REAL(kind=db) :: extra_time=60.0           !< Extra time allocated
   REAL(kind=db) :: dt=1                      !< Time step
   REAL(kind=db) :: time=0                    !< Current simulation time (Init from restart file)
 !
 !   Other basic global vars and arrays
 !
   INTEGER :: jobnum                    !< Job number
-  INTEGER :: step                      !< Calculation step of this run
+  INTEGER :: step=0                    !< Calculation step of this run
   INTEGER :: cstep=0                   !< Current step number (Init from restart file)
-  LOGICAL :: nlend                     !< Signal end of run
+  LOGICAL :: nlend=.false.             !< Signal end of run
   INTEGER :: ierr                      !< Integer used for MPI
   INTEGER :: it0d=1                    !< Number of iterations between 0d values writes to hdf5
   INTEGER :: it2d=100                  !< Number of iterations between 2d values writes to hdf5
   INTEGER :: itparts=1000              !< Number of iterations between particles values writes to hdf5
   INTEGER :: ittext=10                 !< Number of iterations between text outputs in the console
   INTEGER :: itrestart=10000           !< Number of iterations between save of restart.h5 file
   INTEGER :: ittracer=100              !< Number of iterations between save of traced particles position and velocity
   INTEGER :: itcelldiag=100000         !< Number of iterations between save of celldiag diagnostic
   INTEGER :: nbcelldiag=0              !< Number of celldiagnostics
   INTEGER :: itgraph                   !< Number of iterations between graphical interface updates
   INTEGER :: mpirank                   !< MPIrank of the current processus
   INTEGER :: mpisize                   !< Size of the MPI_COMM_WORLD communicator
   INTEGER :: rightproc                 !< Rank of next processor in the z decomposition
   INTEGER :: leftproc                  !< Rank of previous processor in the z decomposition
 !
 !  List of logical file units
   INTEGER :: lu_in       = 90          !< File duplicated from STDIN
   INTEGER :: lu_stop     = 91          !< stop file, see subroutine TESEND
   INTEGER :: lu_partfile = 120         !< particle loading file, see beam::loadpartfile
 !
 !  HDF5 file
   CHARACTER(len=256) :: resfile = "results.h5" !< Main result file
   CHARACTER(len=256) :: rstfile = "restart.h5" !< Restart file
   CHARACTER(len=256) :: magnetfile = ""       !< H5 file containing the magnetic field definition where r,z are in m and Br, Bz are in T
   CHARACTER(len=256) :: partfile(10)=""       !< Particle loading file
   CHARACTER(len=256) :: addedtestspecfile(10)="" !< Particle file list for added particles at restart
   INTEGER           :: fidres                 !< File ID for resfile
   INTEGER           :: fidrst                 !< File ID for restart file
   TYPE(BUFFER_TYPE) :: hbuf0                  !< Hashtable for 0d var
 ! 
 ! Plasma parameters
   LOGICAL          :: nlPhis= .TRUE.                 !< Calculate self consistent electric field flag
   LOGICAL          :: nlfreezephi= .FALSE.          !< Freeze the Poisson solver to the field obtained at (re-)start
   LOGICAL          :: nlclassical= .FALSE.           !< If true, solves the equation of motion according to classical 
                                                      !! dynamics
   LOGICAL          :: nlperiod(2)=(/.false.,.false./)!< Set periodic splines on or off
   LOGICAL          :: partperiodic= .TRUE.           !< Sets if the particles boundary conditions are periodic or open
   INTEGER          :: nbaddtestspecies=0             !< On restart number of files to read to add test particles
   INTEGER          :: nplasma                        !< Number of macro-particles on initialisation
   INTEGER          :: nbspecies = 1                  !< Number of particles species also counting tracing particles
   INTEGER          :: npartsalloc = 0                !< Size of particle memory allocated at the begining of the simulation
   INTEGER       :: nblock                         !< Number of slices in Z for stable distribution initialisation
   REAL(kind=db) :: potinn=0                       !< Electric potential at the inner metallic wall
   REAL(kind=db) :: potout=0                       !< Electric potential at the outer metallic wall
   REAL(kind=db) :: B0                             !< Max magnitude of magnetic field [T] and normalisation factor for magnetic field
   REAL(kind=db), allocatable     :: Bz(:), Br(:)  !< Normalised magnetic field components
   REAL(kind=db), allocatable     :: Athet(:)      !< Theta component of the magnetic vector potential Tm
   TYPE(spline2d), SAVE :: splrz                   !< Spline at r and z for total electric field
   TYPE(spline2d), SAVE :: splrz_ext               !< Spline at r and z for external electric field
   REAL(kind=db), allocatable     :: Ez(:), Er(:)  !< Normalised electric field components ( ext+self )
   REAL(kind=db), allocatable     :: pot(:)        !< Normalised electrostatic potential ( ext+self )
   REAL(kind=db), allocatable     :: Ezxt(:), Erxt(:)  !< Normalised external Electric field components
   REAL(kind=db), allocatable     :: potxt(:)        !< Normalised external Electro static potential
   REAL(kind=db)     :: radii(11)                   !< Inner and outer radius of cylinder and radii of fine mesh region [m]
   REAL(kind=db)     :: plasmadim(4)               !< Zmin Zmax Rmin Rmax values for plasma particle loading [m]
   INTEGER           :: distribtype=1              !< Type of distribution function used to load the particles
                                                   !!1: gaussian, 2: Stable as defined in 4.95 of Davidson, 7 use particle input file
   REAL(kind=db)     :: H0=0                       !< Initial value of Hamiltonian for distribution 2 [J]
   REAL(kind=db)     :: P0=0                       !< Initial canonical angular momentum for distribution 2 [kg m^2/s]
   REAL(kind=db)     :: temprescale = -1.0         !< Factor used for temperature rescaling in case of a restart (<0 -> no rescaling)
   INTEGER           :: samplefactor =-1           !< Factor used for the up-sampling of the particles number 
   REAL(kind=db)     :: lz(11)                     !< Lower and upper cylinder limits in z direction [m]
   REAL(kind=db)     :: n0                         !< Physical plasma density parameter [m-3] used in distribtype=1 and for time scales normlisation
   !REAL(kind=db), DIMENSION(:,:), ALLOCATABLE, SAVE:: moments    !< Moments of the distribution function evaluated every it2d
   REAL(kind=db), DIMENSION(:), ALLOCATABLE, SAVE:: rhs    !< right hand side of the poisson equation solver
   REAL(kind=db), DIMENSION(:), ALLOCATABLE, SAVE:: volume !< Volume covered by each spline for density calculation
   INTEGER           :: nz                                 !< Total Number of grid intervals in z
   INTEGER           :: nnz(10)                            !< Number of grid intervals in z
   INTEGER           :: nsubz=10                           !< Number of sub-intervals in z
   INTEGER           :: nr                                 !< Total number of grid intervals in r 
   INTEGER           :: nnr(10)                            !< Number of grid intervals in r in each subdomain
   INTEGER           :: nsubr=10                           !< Number of sub-intervals in r
   REAL(kind=db)     :: dz(10)                             !< Cell size in z
   REAL(kind=db)     :: dr(10)                             !< Cell size in r for each region
   REAL(kind=db), ALLOCATABLE :: zgrid(:)                  !< Nodes positions in longitudinal direction
   REAL(kind=db), ALLOCATABLE :: rgrid(:)                  !< Nodes positions in radial direction
   REAL(kind=db)     :: bnorm,enorm,vnorm,tnorm,rnorm,phinorm,qnorm !< Normalization constants
   REAL(kind=db)     :: qsim                               !< Charge of superparticles [C]
   REAL(kind=db)     :: msim                               !< Mass of superparticles [kg]
   REAL(kind=db)     :: partmass=me                        !< Mass of physical particle [kg]
   INTEGER           :: femorder(2)                        !< FEM order 
   INTEGER           :: ngauss(2)                          !< Number of gauss points for FEM integration
   LOGICAL           :: nlppform =.TRUE.                   !< Defines if spline evaluation is done using ppform (faster with true)
   INTEGER, SAVE     :: nrank(2)                           !< Number of splines in both directions
   REAL(kind=db)     :: omegac                             !< yclotronic frequency at B0 [1/s]
   REAL(kind=db)     :: omegap                             !< Plasma frequency at n0 [1/s]
   REAL(kind=db)     :: temp                               !< Initial temperature of plasma [K] for distribtype=1
 
   ! If magnetfile ='' The magnetic field is one of a magnetic mirror with maximum amplitude on axis of B0
   ! and
   REAL(kind=db)     :: Rcurv = 1.0                        !< Magnetic field curvature coefficient
   REAL(kind=db)     :: Width = 1.0                        !< Distance between two magnetic mirrors
 
   REAL(kind=db)     :: weights_scale=1.0                  !< Scale factor for the particle weights on restart (only for newres=.true.)
   INTEGER, DIMENSION(:), ALLOCATABLE :: Zbounds     !< Index of bounds for local processus in Z direction for MPI decomposition
   INTEGER           :: bscaling = -1                      !< if >0 rescale the magnetic field read from h5 file before calculating value at grid points, if <0 rescale after interpolation, if = 0 doesn't rescale
   REAL(kind=db):: invdz(10), invdr(10) !< inverse of the grid cell step
   
 CONTAINS
 !
 !================================================================================
   SUBROUTINE basic_data
 !
 !   Define basic data
 !
     use mpihelper
     USE omp_lib
     Use random
     IMPLICIT NONE
 !
 !   Local vars and arrays
     CHARACTER(len=256) :: inputfilename
     INTEGER :: i, nbprocs
 !   
     NAMELIST /BASIC/ job_time, extra_time, nrun, tmax, dt, nlres, nlsave, newres, nlxg,          &
          &           nplasma, potinn, potout, B0, lz, n0, nz, nnz, nnr, femorder, ngauss,             &
          &           nlppform, plasmadim, radii, temp, Rcurv, width, it0d, it2d, itparts, ittext, &
          &           resfile, rstfile, itgraph, nlPhis, distribtype, nblock, nlclassical, H0, P0, partperiodic, &
          &           temprescale, samplefactor, nlmaxwellsource, npartsalloc, partfile, partmass, nbspecies, &
          &           ittracer, itcelldiag, nbcelldiag, magnetfile, weights_scale, nlfreezephi, nbaddtestspecies, &
          &           addedtestspecfile, bscaling
 !________________________________________________________________________________
 !                   1.   Process Standard Input File
 !
   IF(COMMAND_ARGUMENT_COUNT().NE.1)THEN
       WRITE(*,*)'ERROR, ONE COMMAND-LINE ARGUMENT REQUIRED, STOPPING'
       STOP
   ENDIF
   
   CALL GET_COMMAND_ARGUMENT(1,inputfilename)
   
   OPEN(UNIT=lu_in,FILE=trim(inputfilename),ACTION='READ')
   
   IF(mpirank .eq. 0) THEN
 !________________________________________________________________________________
 !                   1.   Label the run
 !
     READ(lu_in,'(a)') label1
     READ(lu_in,'(a)') label2
     READ(lu_in,'(a)') label3
     READ(lu_in,'(a)') label4
 !
     WRITE(*,'(12x,a/)') label1(1:len_trim(label1))
     WRITE(*,'(12x,a/)') label2(1:len_trim(label2))
     WRITE(*,'(12x,a/)') label3(1:len_trim(label3))
     WRITE(*,'(12x,a/)') label4(1:len_trim(label4))
 !________________________________________________________________________________
 !                   2.   Read in basic data specific to run
 !
     READ(lu_in,basic)
     WRITE(*,basic)
 #if _DEBUG==1
     WRITE(*,*) "Compiled in debug mode"
 #endif
   ELSE
     READ(lu_in,basic)
   END IF
   CALL mpitypes_init ! initialize all mpi types that will be needed in the simulation
   WRITE(*,'(a,i4.2,a,i4.2,a)')"Running on ",mpisize," tasks with", omp_get_max_threads() ," openMP threads"
 
   IF(samplefactor .gt. 1 .and. .not. newres) THEN
     IF(mpirank.eq.0) WRITE(*,*)"To increase the number of particles, you need to create a new result file (set newres to 1)"
     CALL MPI_abort(MPI_COMM_WORLD,-1,ierr)
   END IF
 
   IF (npartsalloc .lt. nplasma) THEN
     npartsalloc=nplasma
   END IF
 
 ! Total number of intervals
   nr=sum(nnr)
   if (any(nnz.gt.0)) then
     nz=sum(nnz)
   else 
     nnz(1)=nz
   end if
 
 ! Normalisation constants
 
   if(nplasma .gt. 0) then
     qsim=pi*(plasmadim(2)-plasmadim(1))*(plasmadim(4)**2-plasmadim(3)**2)*n0*elchar/nplasma 
   else
     qsim=sign(n0,elchar)
   end if
   msim=abs(qsim)/elchar*partmass
   vnorm=vlight
 
   omegac=sign(elchar,qsim)/partmass*B0
   omegap=sqrt(elchar**2*abs(n0)/partmass/eps_0)
 
   tnorm=min(abs(1/omegac),abs(1/omegap))
   rnorm=vnorm*tnorm 
   bnorm=B0
   enorm=vlight*bnorm
   phinorm=enorm*rnorm
 
   ! Normalised boundary conditions
   potinn=potinn/phinorm     
   potout=potout/phinorm
 
   ! Normalised dt
   dt=dt/tnorm
 
 ! Characteristic frequencies and normalised volume 
   
   IF(mpirank .eq. 0) THEN
     IF(abs(omegap).GT. abs(omegac)) THEN
        WRITE(*,'(a,3(1pe12.3))') 'omegap, omegac, omegap/omegac', omegap, omegac, omegap/omegac
     ELSE
        WRITE(*,'(a,3(1pe12.3))') 'omegap, omegac, omegac/omegap', omegap, omegac, omegac/omegap
     END IF
   END IF
   ! Construction of the mesh rgrid in r and zgrid in z and its normalisation
   CALL mesh
   rgrid=rgrid/rnorm
   zgrid=zgrid/rnorm
   dz=dz/rnorm
   dr=dr/rnorm
   Where(dr.gt.0) invdr=1/dr
   Where(dz.gt.0) invdz=1/dz
   !invdz=1/dz
 
 ! Initialize random number generator
 nbprocs = omp_get_max_threads()
 allocate(seed(ran_s,nbprocs), ran_index(nbprocs), ran_array(ran_k,nbprocs))
 IF(.false.) then
 call date_and_time(time=random_seed_str)
 CALL MPI_BCAST(random_seed_str,10,MPI_CHARACTER,0,MPI_COMM_WORLD,ierr)
 write(*,*) "MPI seed:", mpirank, random_seed_str
 end if
 Do i=1,nbprocs
 ! Generate seed from the default seed-string in random module
   CALL decimal_to_seed(random_seed_str, seed(:,i))
 
 ! Generate a different seed for each processor from the mother seed
 
   CALL next_seed(mpirank*nbprocs+i,seed(:,i))
 
 ! Initialize the random array (first hundred numbers)
 
   CALL random_init(seed(:,i), ran_index(i), ran_array(:,i))
 end do
 
   
 !
   END SUBROUTINE basic_data
   !================================================================================
   SUBROUTINE daytim(str)
     !
     !   Print date and time
     !
         IMPLICIT NONE
     !
         CHARACTER(len=*), INTENT(in) :: str
     !
     !   Local vars and arrays
         CHARACTER(len=16) :: d, t, dat, functime
     !________________________________________________________________________________
     !
         CALL DATE_AND_TIME(d,t)
         dat=d(7:8) // '/' // d(5:6) // '/' // d(1:4)
         functime=t(1:2) // ':' // t(3:4) // ':' // t(5:10)
         WRITE(*,'(a,1x,a,1x,a)') str, dat(1:10), functime(1:12)
     !
       END SUBROUTINE daytim
 !================================================================================
   SUBROUTINE timera(cntrl, str, eltime)
 !
 !   Timers (cntrl=0/1 to Init/Update)
 !
     IMPLICIT NONE
     INTEGER, INTENT(in)                  :: cntrl
     CHARACTER(len=*), INTENT(in)         :: str
     REAL(kind=db), OPTIONAL, INTENT(out) :: eltime
 !
     INTEGER, PARAMETER                    :: ncmax=128
     INTEGER, SAVE                         :: icall=0, nc=0
     REAL(kind=db), SAVE                   :: startt0=0.0
     CHARACTER(len=16), SAVE               :: which(ncmax)
     INTEGER                               :: lstr, found, i
     REAL(kind=db)                         :: seconds
     REAL(kind=db), DIMENSION(ncmax), SAVE :: startt = 0.0, endt = 0.0
 !________________________________________________________________________________
     IF( icall .EQ. 0 ) THEN
        icall = icall+1
        startt0 = seconds()
     END IF
 
     lstr = LEN_TRIM(str)
     IF( lstr .GT. 0 ) found = loc(str)
 !________________________________________________________________________________
 !
     SELECT CASE (cntrl)
 !
     CASE(-1)    !  Current wall time
        IF( PRESENT(eltime) ) THEN
           eltime = seconds() - startt0
        ELSE
           WRITE(*,'(/a,a,1pe10.3/)') "++ ", ' Wall time used so far = ', seconds() - startt0
        END IF
 !
     CASE(0)    !  Init Timer
        IF( found .EQ. 0 ) THEN  !  Called for the 1st time for 'str'
           nc = nc+1
           which(nc) = str(1:lstr)
           found = nc
        END IF
        startt(found) = seconds()
 !
     CASE(1)   !  Update timer
        endt(found) = seconds() - startt(found)
        IF( PRESENT(eltime) ) THEN
           eltime = endt(found)
        ELSE
           WRITE(*,'(/a,a,1pe10.3/)') "++ "//str, ' wall clock time = ', endt(found)
        END IF
 !
     CASE(2)   !  Update and reset timer
        endt(found) = endt(found) + seconds() - startt(found)
        startt(found) = seconds()
        IF( PRESENT(eltime) ) THEN
           eltime = endt(found)
        END IF
 !
     CASE(9)   !  Display all timers
        IF( nc .GT. 0 ) THEN
           WRITE(*,'(a)') "Timer Summary"
           WRITE(*,'(a)') "============="
           DO i=1,nc
              WRITE(*,'(a20,2x,2(1pe12.3))') TRIM(which(i))//":", endt(i)
           END DO
        END IF
 !
     END SELECT
 !
   CONTAINS
     INTEGER FUNCTION loc(funcstr)
       CHARACTER(len=*), INTENT(in) :: funcstr
       INTEGER :: j, ind
       loc = 0
       DO j=1,nc
           ind = INDEX(which(j), funcstr(1:lstr))
           IF( ind .GT. 0 .AND. LEN_TRIM(which(j)) .EQ. lstr ) THEN
              loc = j
              EXIT
           END IF
        END DO
     END FUNCTION loc
   END SUBROUTINE timera
 !================================================================================
 
   !---------------------------------------------------------------------------
 !> @author
 !> Patryk Kaminski   EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Creates the mesh in r and z direction for calculating the electric and magnetic fields.
 !---------------------------------------------------------------------------
   SUBROUTINE mesh
    INTEGER :: j,i,k
     ALLOCATE(zgrid(0:nz),rgrid(0:nr))
     !dz=(lz(2)-lz(1))/nz
 
     k=0
     nsubz=count(nnz.gt.0)
     zgrid(0)=lz(1)
     do i=1,nsubz
       dz(i)=(lz(i+1)-lz(i))/nnz(i)
       if (nnz(i).gt.0) then
         DO j=1,nnz(i)
           zgrid(j+k)=lz(i)+j*dz(i)
         END DO
       end if
       k=k+nnz(i)
     end do
 
     nsubr=count(nnr.gt.0)
     k=0
     rgrid(0)=radii(1)
     do i=1,nsubr
       dr(i)=(radii(i+1)-radii(i))/nnr(i)
       if (nnr(i).gt.0) then
         DO j=1,nnr(i)
           rgrid(j+k)=radii(i)+j*dr(i)
         END DO
       end if
       k=k+nnr(i)
     end do
   END SUBROUTINE mesh
 END MODULE basic
diff --git a/src/beam_mod.f90 b/src/beam_mod.f90
index d328e62..8679295 100644
--- a/src/beam_mod.f90
+++ b/src/beam_mod.f90
@@ -1,2088 +1,2148 @@
 MODULE beam
 !------------------------------------------------------------------------------
 ! EPFL/Swiss Plasma Center
 !------------------------------------------------------------------------------
 !
 ! MODULE: beam
 !
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !> Patryk Kaminski   EPFL/SPC
 !> Trach Minh Tran   EPFL/SPC
 !
 ! DESCRIPTION:
 !> Module responsible for loading, advancing and computing the necessary diagnostics for the simulated particles.
 !------------------------------------------------------------------------------
 !
   USE constants
   use mpi
   USE mpihelper
   USE basic, ONLY: mpirank, mpisize
   USE distrib
   USE particletypes
   USE weighttypes
 
   IMPLICIT NONE
 
 !
   !TYPE(particles) :: parts  !< Storage for all the particles
   !SAVE :: parts
   TYPE(particles), DIMENSION(:), ALLOCATABLE, SAVE :: partslist
   
 !   Diagnostics (scalars)
   REAL(kind=db) :: ekin=0  !< Total kinetic energy (J)
   REAL(kind=db) :: epot=0  !< Total potential energy (J)
   REAL(kind=db) :: etot=0  !< Current total energy (J)
   REAL(kind=db) :: etot0=0 !< Initial total energy (J)
   REAL(kind=db) :: loc_etot0=0 !< theoretical local total energy (J)
   REAL(kind=db) :: Energies(4) !< (1) kinetic energy, (2) potential energy, (3) total energy and (4) gained/lossed energy due to gain or loss of particles (J) 
 !
  INTEGER, DIMENSION(:), ALLOCATABLE, SAVE :: Nplocs_all !< Array containing the local numbers of particles in each MPI process
  
  INTERFACE add_created_part
     MODULE PROCEDURE add_linked_created_part, add_list_created_part
  END INTERFACE add_created_part
 !
 
  abstract interface
  subroutine rloader(nbase,y,rminus,rplus)
    USE constants
    REAL(kind=db), INTENT(out) :: y(:)
    INTEGER, INTENT(in)        :: nbase
    REAL(kind=db), INTENT(in)  :: rplus, rminus
  end subroutine
   REAL(kind=db) FUNCTION gamma(UZ, UR, UTHET)
       USE constants
       REAL(kind=db), INTENT(IN):: UR,UZ,UTHET
   end FUNCTION
 end interface
 
 CONTAINS
 
 !---------------------------------------------------------------------------
    !> @author
    !> Guillaume Le Bars EPFL/SPC
    !
    ! DESCRIPTION:
    !> @brief Loads the particles at the beginning of the simulation and create the parts variable if necessary
    !---------------------------------------------------------------------------
 SUBROUTINE load_parts
     USE basic, ONLY: nplasma, mpirank, ierr, distribtype, nlclassical, nbspecies, partfile
     use mpi
 
     INTEGER:: i
     REAL(kind=db), DIMENSION(:), ALLOCATABLE :: VZ, VR, VTHET
 
     
     ALLOCATE(VZ(nplasma), VR(nplasma), VTHET(nplasma))
 
 
     ! Select case to define the type of distribution
     SELECT CASE(distribtype)
       CASE(1) ! Gaussian distribution in V, uniform in Z and 1/R in R
         CALL loaduniformRZ(partslist(1), VR, VZ, VTHET)
       CASE(2) !Stable distribution from Davidson 4.95 p.119
         CALL loadDavidson(partslist(1), VR, VZ, VTHET, lodunir)
       CASE(3) !Stable distribution from Davidson 4.95 p.119 but with constant distribution in R 
         CALL loadDavidson(partslist(1), VR, VZ, VTHET, lodinvr)
       CASE(4) !Stable distribution from Davidson 4.95 p.119 but with gaussian distribution in R
         CALL loadDavidson(partslist(1), VR, VZ, VTHET, lodgausr)
       CASE(5) !Stable distribution from Davidson 4.95 p.119 with gaussian in V computed from v_th given by temp
         CALL loadDavidson(partslist(1), VR, VZ, VTHET, lodunir)
       CASE(6) ! Uniform distribution in R and Z and Gaussian distribution in V with Vz<V_perp to satisfy magnetic mirror trapping
         CALL loaduniformRZ(partslist(1), VR, VZ, VTHET)
         VZ = VZ/50
       CASE(7) ! Distribution defined in separate input file
         CALL read_part_file(partslist(1), partfile(1), VR, VZ, VTHET) 
         nplasma=partslist(1)%Nptot
       CASE DEFAULT
         IF (mpirank .eq. 0) WRITE(*,*) "Unknown type of distribution:", distribtype
         CALL MPI_Abort(MPI_COMM_WORLD, -1, ierr)
 
     END SELECT
     Do i=1,nplasma
       partslist(1)%partindex(i)=i
     END DO
     partslist(1)%newindex=nplasma
 
     IF(nlclassical) THEN
       partslist(1)%Gamma(1:nplasma)=1.0_db
     ELSE
       partslist(1)%Gamma(1:nplasma)=sqrt(1/(1-VR**2-VZ**2-VTHET**2))
     END IF
     ! Normalization of the velocities
     partslist(1)%U(1,1:nplasma)=partslist(1)%Gamma(1:nplasma)*VR
     partslist(1)%U(3,1:nplasma)=partslist(1)%Gamma(1:nplasma)*VZ
     partslist(1)%U(2,1:nplasma)=partslist(1)%Gamma(1:nplasma)*VTHET
     DEALLOCATE(VZ, VR, VTHET)
     
     CALL boundary_loss(partslist(1))
     partslist(1)%Nptot=partslist(1)%nploc
 
     
     DO i=2,nbspecies
      call load_part_file(partslist(i),partfile(i))
     END DO
 
     partslist(1)%calc_moments=.true.
     partslist(1)%is_field=.true.
 END SUBROUTINE load_parts
 
 SUBROUTINE load_part_file(p,partfilename)
   use mpi
   USE basic, ONLY: nlclassical
   type(particles) :: p
   CHARACTER(len=*):: partfilename
   Real(kind=db), ALLOCATABLE:: VR(:), VZ(:), VTHET(:)
   INTEGER j
   ! Read the actual file and load the position in p
   CALL read_part_file(p, partfilename, VR, VZ, VTHET)
   Do j=1,p%Nploc
     p%partindex(j)=j
   END DO
 
   IF(nlclassical) THEN
     p%Gamma=1.0_db
   ELSE
     p%Gamma(1:p%Nptot)=sqrt(1/(1-VR**2-VZ**2-VTHET**2))
   END IF
   ! Normalization of the velocities
   p%U(1,1:p%Nptot)   = p%Gamma(1:p%Nptot)*VR(1:p%Nptot)
   p%U(3,1:p%Nptot)   = p%Gamma(1:p%Nptot)*VZ(1:p%Nptot)
   p%U(2,1:p%Nptot)= p%Gamma(1:p%Nptot)*VTHET(1:p%Nptot)
   DEALLOCATE(VZ, VR, VTHET) 
   call boundary_loss(p)
 
 END SUBROUTINE load_part_file
 !---------------------------------------------------------------------------
 ! DESCRIPTION:
 !> @brief Checks for each particle if the z position is outside of the local/global simulation space.
 !> Depending on the boundary conditions, the leaving particles are sent to the correct neighbouring MPI process
 !> or deleted.
 !
 !> @param[in]  p particles structure
 !
 !> @author Guillaume Le Bars EPFL/SPC
 !---------------------------------------------------------------------------
 SUBROUTINE bound(p)
 
   USE basic, ONLY: zgrid, nz, Zbounds, mpirank, step, leftproc, rightproc, partperiodic
+  use omp_lib
   IMPLICIT NONE
   type(particles), INTENT(INOUT):: p
-  INTEGER :: i, rsendnbparts, lsendnbparts, nblostparts
+  INTEGER :: i,j, rsendnbparts, lsendnbparts, nblostparts
   INTEGER :: receivednbparts, partdiff
-  INTEGER, DIMENSION(p%Nploc) :: sendhole
-  INTEGER, DIMENSION(p%Nploc) :: losthole
   LOGICAL:: leftcomm, rightcomm
   INTEGER, ALLOCATABLE:: partstoremove(:)
-
-  receivednbparts=0
-  nblostparts=0
-  rsendnbparts=0
-  lsendnbparts=0
-
+  INTEGER,allocatable :: nblost(:)
+  
+  allocate(nblost(size(p%nblost,1)))
+  nblost=0
   IF (p%Nploc .gt. 0) THEN
-  losthole=0
-  sendhole=0
-
+    !$OMP SINGLE
+      p%losthole=0
+      p%sendhole=0
+    !$OMP END SINGLE
   ! We communicate with the left processus
   leftcomm  = leftproc .ne. -1
   ! We communicate with the right processus
   rightcomm = rightproc .ne. -1
-
   ! Boundary condition at z direction
-  !$OMP PARALLEL DO DEFAULT(SHARED)
+  !$OMP DO
     DO i=1,p%Nploc
       ! If the particle is to the right of the local simulation space, it is sent to the right MPI process
       IF (p%pos(3,i) .ge. zgrid(Zbounds(mpirank+1))) THEN
         IF(partperiodic) THEN
           DO WHILE (p%pos(3,i) .GT. zgrid(nz))
             p%pos(3,i) = p%pos(3,i) - zgrid(nz) + zgrid(0)
           END DO
         END IF
-        !$OMP CRITICAL (nbparts)
+        !!$OMP CRITICAL (nbparts)
         IF(rightcomm) THEN
             rsendnbparts=rsendnbparts+1
-            sendhole(lsendnbparts+rsendnbparts)=i
+            p%sendhole(i)=i
         ELSE
             nblostparts=nblostparts+1
-            losthole(nblostparts)=i
-            p%nblost(2)=p%nblost(2)+1
+            p%losthole(i)=i
+            nblost(2)=nblost(2)+1
         END IF
-        !$OMP END CRITICAL (nbparts)
+        !!$OMP END CRITICAL (nbparts)
       ! If the particle is to the left of the local simulation space, it is sent to the left MPI process
       ELSE IF (p%pos(3,i) .lt. zgrid(Zbounds(mpirank))) THEN
         IF(partperiodic) THEN
           DO WHILE (p%pos(3,i) .LT. zgrid(0))
             p%pos(3,i) = p%pos(3,i) + zgrid(nz) - zgrid(0)
           END DO
         END IF
-        !$OMP CRITICAL (nbparts)
+        !!$OMP CRITICAL (nbparts)
         IF(leftcomm) THEN
           ! We send the particle to the left process
           lsendnbparts=lsendnbparts+1
-          sendhole(lsendnbparts+rsendnbparts)=-i
+          p%sendhole(i)=-i
         ELSE
           ! we destroy the particle
           nblostparts=nblostparts+1
-          losthole(nblostparts)=i
-          p%nblost(1)=p%nblost(1)+1
+          p%losthole(i)=i
+          nblost(1)=nblost(1)+1
         END IF
-        !$OMP END CRITICAL (nbparts)
+        !!$OMP END CRITICAL (nbparts)
       END IF
     END DO
-  !$OMP END PARALLEL DO
+  !$OMP END DO NOWAIT
   END IF
 
+  !$OMP critical (lostparts_red)
+  p%nblost=nblost+p%nblost
+  !$OMP END CRITICAL (lostparts_red)
+  
+  !$OMP BARRIER
+
+  !$OMP MASTER
+  receivednbparts=0
+  
+
+  j=1
+  nblostparts=0
+  rsendnbparts=0
+  lsendnbparts=0
+  Do i=1,p%Nploc
+    if(p%sendhole(i) .eq. 0) cycle
+    
+    p%sendhole(j)=p%sendhole(i)
+    if(p%sendhole(i).gt.0)then
+      rsendnbparts=rsendnbparts+1
+    else 
+      lsendnbparts=lsendnbparts+1
+    end if
+    j=j+1
+  end do
+
+  j=1
+  nblostparts=0
+  Do i=1,p%Nploc
+    if(p%losthole(i) .eq. 0) cycle
+    
+    p%losthole(j)=p%losthole(i)
+    j=j+1
+    nblostparts=nblostparts+1
+  end do
+ 
     IF(mpisize .gt. 1) THEN
       ! We send the particles leaving the local simulation space to the closest neighbour
-      CALL particlescommunication(p, lsendnbparts, rsendnbparts, sendhole, receivednbparts, (/leftproc,rightproc/))
+      CALL particlescommunication(p, lsendnbparts, rsendnbparts, p%sendhole, receivednbparts, (/leftproc,rightproc/))
     END IF
 
     ! If the boundary conditions are not periodic, we delete the corresponding particles
     IF(nblostparts .gt. 0 .and. step .ne. 0) THEN
       DO i=1,nblostparts
-          CALL delete_part(p, losthole(i), .false. )
+          CALL delete_part(p, p%losthole(i), .false. )
       END DO
       !WRITE(*,'(i8.2,a,i4.2)') nblostparts, " particles lost in z on process: ", mpirank
     END IF
 
     ! computes if we received less particles than we sent
     partdiff=max(lsendnbparts+rsendnbparts-receivednbparts,0)
     
     IF(nblostparts + partdiff .gt. 0) THEN
       ALLOCATE(partstoremove(nblostparts+partdiff))
-      partstoremove(1:partdiff)=abs(sendhole(receivednbparts+1:receivednbparts+partdiff))
-      partstoremove(partdiff+1:partdiff+nblostparts)=abs(losthole(1:nblostparts))
+      partstoremove(1:partdiff)=abs(p%sendhole(receivednbparts+1:receivednbparts+partdiff))
+      partstoremove(partdiff+1:partdiff+nblostparts)=abs(p%losthole(1:nblostparts))
       call LSDRADIXSORT(partstoremove,size(partstoremove))
       !Write(*,'(a,60i)') "partstoremove: ", partstoremove
       ! If we received less particles than we sent, or lost particles we fill the remaining holes with the particles from the end of the parts arrays
       DO i=nblostparts+partdiff,1,-1
         CALL move_part(p, p%Nploc, partstoremove(i))
         p%partindex(p%Nploc)=-1
         p%Nploc = p%Nploc-1
       END DO
+      deallocate(partstoremove)
     END IF
+  !$OMP END MASTER
+    deallocate(nblost)
 END subroutine bound
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief Check if a particle is outside the simulation domain and remove it if needed
 !> @param[in]  p particles structure
 !---------------------------------------------------------------------------
 SUBROUTINE boundary_loss(p)
   USE basic, ONLY: rgrid, nr
   Use geometry, ONLY: geom_weight, dom_weight
+  Use omp_lib
+  IMPLICIT NONE
   type(particles), INTENT(INOUT):: p
   INTEGER :: i,j,isup, nblostparts, iend,nbunch
-  INTEGER, DIMENSION(p%Nploc) :: losthole
   INTEGER, DIMENSION(16)::idwall
   INTEGER,allocatable :: nblost(:)
   
   allocate(nblost(size(p%nblost,1)))
 
-  nblostparts=0
+
   nblost=0
   nbunch=16
-
   IF (p%Nploc .le. 0) return
-  losthole=0
+     !$OMP SINGLE
+        p%losthole=0
+     !$OMP END SINGLE
+
+  !!$OMP PARALLEL DEFAULT(SHARED), private(i,iend,j,isup,idwall)
 
-  !$OMP PARALLEL DEFAULT(SHARED), private(i,iend,j,isup,idwall)
-      !$OMP DO reduction(+:nblost)
+      !$OMP DO
       DO i=1,p%Nploc,nbunch
         ! Avoid segmentation fault caused by accessing non relevant data
         iend=min(i+nbunch-1,p%Nploc)
         ! calculate the weight do determine if a particle is inside the simulation domain.
         call dom_weight(p%pos(3,i:iend), p%pos(1,i:iend), p%geomweight(0,i:iend),idwall(1:iend-i+1))
         do j=i,iend
           if(p%geomweight(0,j).le.0 .or. p%pos(1,j) .ge. rgrid(nr) .or. p%pos(1,j) .le. rgrid(0)) then
             ! If the particle is outside of the simulation space in the r direction, or if it is outside of the vacuum region it is deleted.
-              !$OMP CRITICAL (lostparts)
-                nblostparts=nblostparts+1
-                losthole(nblostparts)=j
-              !$OMP END CRITICAL (lostparts)
+              !!$OMP CRITICAL (lostparts)
+                p%losthole(j)=j
+              !!$OMP END CRITICAL (lostparts)
                 isup=0
                 if(p%pos(1,j) .ge. rgrid(nr) .or. idwall(j-i+1) .gt.0) then
                   isup=1
                 end if
                 nblost(3+isup+idwall(j-i+1))=nblost(3+isup+idwall(j-i+1))+1
           else
             call p_calc_rzindex(p,j)
           end if
         end do
         call geom_weight(p%pos(3,i:iend), p%pos(1,i:iend), p%geomweight(:,i:iend))
       END DO
-      !$OMP END DO
-  !$OMP END PARALLEL
+      !$OMP END DO NOWAIT
+  !!$OMP END PARALLEL
+
+!$OMP critical (lostparts_red)
+      p%nblost=nblost+p%nblost
+!$OMP END CRITICAL (lostparts_red)
+
+
+!$OMP BARRIER
 
+!$OMP MASTER
+    nblostparts=0
+    j=1
+      Do i=1,p%Nploc
+        if(p%losthole(i) .eq. 0) cycle
 
+        p%losthole(j)=p%losthole(i)
+        j=j+1
+        nblostparts=nblostparts+1
+      end do
     IF(nblostparts.gt.0) THEN
-      p%nblost=nblost+p%nblost
       !call qsort(losthole,p%Nploc,sizeof(losthole(1)),compare_int)
-      call LSDRADIXSORT(losthole(1:nblostparts),nblostparts)
+      call LSDRADIXSORT(p%losthole(1:nblostparts),nblostparts)
       !Write(*,'(a,60i)') "losthole: ", losthole(1:nblostparts+1)
       DO i=nblostparts,1,-1
-          CALL delete_part(p,losthole(i))
+          CALL delete_part(p,p%losthole(i))
       END DO
       
     END IF
+!$OMP END MASTER
   END SUBROUTINE boundary_loss
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief Computes the radial and axial cell index of the particle i
 !> @param[in]  p particles structure
 !> @param[in]  i index in p of the particle
 !---------------------------------------------------------------------------
 subroutine p_calc_rzindex(p,i)
   use basic, only: rgrid,zgrid,invdz,invdr, nnr, nsubr,nsubz, nnz
   integer::i,j,k
   type(particles)::p
   k=0
   do j=1,nsubr
     IF (p%pos(1,i) .GT. rgrid(k) .AND.  p%pos(1,i) .LT. rgrid(k+nnr(j))) THEN
       p%rindex(i)=floor((p%pos(1,i)-rgrid(k))*invdr(j))+k
       exit
     end if
     k=k+nnr(j)
   end do
   k=0
   do j=1,nsubz
     IF (p%pos(3,i) .GT. zgrid(k) .AND.  p%pos(3,i) .LT. zgrid(k+nnz(j))) THEN
       p%zindex(i)=floor((p%pos(3,i)-zgrid(k))*invdz(j))+k
       exit
     end if
     k=k+nnz(j)
   end do
   !p%zindex(i)=floor((p%Z(i)-zgrid(0))*invdz)
 end subroutine p_calc_rzindex
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief Computes the magnetic field amplitude at each particle position interpolated from the magnetic field at the closeset grid point
 !> @param[in]  p particles structure
 !---------------------------------------------------------------------------
 SUBROUTINE comp_mag_p(p)
   USE basic, ONLY: zgrid, rgrid, BZ, BR, nz, invdz
   type(particles), INTENT(INOUT):: p
   INTEGER :: i
   Real(kind=db):: WZ,WR
   INTEGER:: j1,j2,j3,j4
 
-  !$OMP PARALLEL DO SIMD DEFAULT(SHARED) Private(J1,J2,J3,J4,WZ,WR)
+  !$OMP DO SIMD Private(J1,J2,J3,J4,WZ,WR)
   DO i=1,p%Nploc
     WZ=(p%pos(3,i)-zgrid(p%zindex(i)))/(zgrid(p%zindex(i)+1)-zgrid(p%zindex(i)));
     WR=(p%pos(1,i)-rgrid(p%rindex(i)))/(rgrid(p%rindex(i)+1)-rgrid(p%rindex(i)));
     J1=(p%rindex(i))*(nz+1) + p%zindex(i)+1
     J2=(p%rindex(i))*(nz+1) + p%zindex(i)+2
     J3=(p%rindex(i)+1)*(nz+1)+p%zindex(i)+1
     J4=(p%rindex(i)+1)*(nz+1)+p%zindex(i)+2
     
     ! Interpolation for magnetic field
     p%B(2,i)=(1-WZ)*(1-WR)*Bz(J4) &
     &   +WZ*(1-WR)*Bz(J3)    &
     &   +(1-WZ)*WR*Bz(J2)    &
     &   +WZ*WR*Bz(J1)
     p%B(1,i)=(1-WZ)*(1-WR)*Br(J4) &
     &   +WZ*(1-WR)*Br(J3)    &
     &   +(1-WZ)*WR*Br(J2)    &
     &   +WZ*WR*Br(J1)
   END DO
-  !$OMP END PARALLEL DO SIMD
+  !$OMP END DO SIMD NOWAIT
 end subroutine comp_mag_p
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Routine used to compute the lorentz factor \f$\gamma\f$ in the classical simulations.
 !> This routine systematically returns 1.0 to treat the system according to classical dynamic.
 !
 !> @param[out] gamma the lorentz factor \f$\gamma\f$
 !> @param[in]  UZ \f$\gamma\beta_z=\gamma v_z/c\f$ the normalized particle longitudinal velocity
 !> @param[in]  UR \f$\gamma\beta_r=\gamma v_r/c\f$ the normalized particle radial velocity
 !> @param[in]  UTHET \f$\gamma\beta_\theta=\gamma v_\theta/c\f$ the normalized particle azimuthal velocity
 !---------------------------------------------------------------------------
 REAL(kind=db) FUNCTION gamma_classical(UZ, UR, UTHET)
 !!#if __INTEL_COMPILER > 1700
 !$OMP declare simd(gamma_classical)
 !!#endif
       REAL(kind=db), INTENT(IN):: UR,UZ,UTHET
       gamma_classical=1.0
 END FUNCTION gamma_classical
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief Routine used to compute the lorentz factor \f$\gamma\f$ in the relativistic simulations.
 !> This routine computes the Lorentz factor \f$\gamma=\sqrt{1+\mathbf{\gamma\beta}^2}\f$
 !
 !> @param[out] gamma the lorentz factor \f$\gamma\f$
 !> @param[in]  UZ \f$\gamma\beta_z=\gamma v_z/c\f$ the normalized particle longitudinal velocity
 !> @param[in]  UR \f$\gamma\beta_r=\gamma v_r/c\f$ the normalized particle radial velocity
 !> @param[in]  UTHET \f$\gamma\beta_\theta=\gamma v_\theta/c\f$ the normalized particle azimuthal velocity
 !---------------------------------------------------------------------------
 REAL(kind=db) FUNCTION gamma_relativistic(UZ, UR, UTHET)
 !!#if __INTEL_COMPILER > 1700
 !$OMP declare simd(gamma_relativistic)
 !!#endif
       REAL(kind=db), INTENT(IN):: UR,UZ,UTHET
       gamma_relativistic=sqrt(1+UZ**2+UR**2+UTHET**2)
 END FUNCTION gamma_relativistic
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief General routine to compute the velocities at time t+1.
 !> This routine allows to treat the classical and relativistic case efficiently from a numerical standpoint,
 !> by using a pointer to the routine computing gamma. This avoid the nlclassical flag check on each particle.
 !
 !> @param[in]  p The particles structure being updated
 !---------------------------------------------------------------------------
 SUBROUTINE comp_velocity(p)
 !
 !   Computes the new velocity of the particles due to Lorentz force
 !
   USE basic, ONLY : nlclassical
   type(particles), INTENT(INOUT):: p
   ! Store old Velocities
   !CALL swappointer(p%UZold,    p%UZ)
+  !$OMP master
   CALL swappointer2(p%Uold,    p%U)
   !CALL swappointer(p%UTHETold, p%UTHET)
   CALL swappointer(p%Gammaold, p%Gamma)
-
+  !$OMP end master
+  !$OMP BARRIER
   IF (nlclassical) THEN
     CALL comp_velocity_fun(p, gamma_classical)
   ELSE
     CALL comp_velocity_fun(p, gamma_relativistic)
   END IF
 
 END SUBROUTINE comp_velocity
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk Kaminski EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief Routine called by comp_velocity to compute the velocities at time t+1.
 !> This routine allows to treat the classical and relativistic case efficiently from a numerical standpoint,
 !> by using the routine computing gamma as an input. This avoid the nlclassical flag check on each particle.
 !
 !> @param[in] gamma the function used to compute the value of the lorentz factor \f$\gamma\f$
 !> @param[in]  p The particles structure being updated
 !---------------------------------------------------------------------------
 SUBROUTINE comp_velocity_fun(p, gammafun)
 !
 !   Computes the new velocity of the particles due to Lorentz force
 !
   USE basic, ONLY : bnorm, dt, tnorm
   procedure(gamma)::gammafun
   type(particles), INTENT(INOUT):: p
   REAL(kind=db) :: tau
   REAL(kind=db):: BRZ, BRR, ZBR, ZBZ, ZPR, ZPZ, ZPTHET, SQR, ZBZ2, ZBR2
   INTEGER:: J1, J2, J3, J4
   INTEGER:: i
  ! Normalized time increment
   tau=p%qmRatio*bnorm*0.5*dt*tnorm
   IF (p%Nploc .NE. 0) THEN
-    !$OMP PARALLEL DO SIMD DEFAULT(SHARED) PRIVATE(J1,J2,J3,J4,BRZ, BRR, ZBR, ZBZ, ZPR, ZPZ, ZPTHET, SQR, ZBZ2, ZBR2)
+    !$OMP DO SIMD PRIVATE(J1,J2,J3,J4,BRZ, BRR, ZBR, ZBZ, ZPR, ZPZ, ZPTHET, SQR, ZBZ2, ZBR2)
     DO i=1,p%Nploc
     ! First half of electric pulse
        p%U(3,i)=p%Uold(3,i)+p%E(2,i)*tau
        p%U(1,i)=p%Uold(1,i)+p%E(1,i)*tau
        p%Gamma(i)=gammafun(p%U(3,i), p%U(1,i), p%Uold(2,i))
 
     ! Rotation along magnetic field
        ZBZ=tau*p%B(2,i)/p%Gamma(i)
        ZBR=tau*p%B(1,i)/p%Gamma(i)
        ZPZ=p%U(3,i)-ZBR*p%Uold(2,i)                     !u'_{z}
        ZPR=p%U(1,i)+ZBZ*p%Uold(2,i)                     !u'_{r}
        ZPTHET=p%Uold(2,i)+(ZBR*p%U(3,i)-ZBZ*p%U(1,i))          !u'_{theta}
        SQR=1+ZBZ*ZBZ+ZBR*ZBR
        ZBZ2=2*ZBZ/SQR
        ZBR2=2*ZBR/SQR
        p%U(3,i)=p%U(3,i)-ZBR2*ZPTHET                    !u+_{z}
        p%U(1,i)=p%U(1,i)+ZBZ2*ZPTHET                   !u+_{r}
        p%U(2,i)=p%Uold(2,i)+(ZBR2*ZPZ-ZBZ2*ZPR)      !u+_{theta}
 
     ! Second half of acceleration
        p%U(3,i)=p%U(3,i)+p%E(2,i)*tau
        p%U(1,i)=p%U(1,i)+p%E(1,i)*tau
 
     ! Final computation of the Lorentz factor
        p%Gamma(i)=gammafun(p%U(3,i), p%U(1,i), p%U(2,i))
     END DO
-    !$OMP END PARALLEL DO SIMD
+    !$OMP END DO SIMD NOWAIT
   END IF
   p%collected=.false.
 END SUBROUTINE comp_velocity_fun
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk Kaminski EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Computes the particles position at time t+1
 !> This routine computes the particles position at time t+1 according to the Bunemann algorithm.
 !
 !> @param[in]  p The particles structure being updated
 !---------------------------------------------------------------------------
 SUBROUTINE push(p)
     Use basic, ONLY: dt
     type(particles), INTENT(INOUT):: p
     REAL(kind=db):: XP, YP, COSA, SINA, U1, U2, ALPHA
     INTEGER :: i
 
     IF (p%Nploc .NE. 0) THEN
-    !$OMP PARALLEL DO SIMD DEFAULT(SHARED) PRIVATE(XP, YP, COSA, SINA, U1, U2, ALPHA)
+    !!$OMP PARALLEL DO SIMD DEFAULT(SHARED) PRIVATE(XP, YP, COSA, SINA, U1, U2, ALPHA)
+    !$OMP DO SIMD
     DO i=1,p%Nploc
 ! Local Cartesian coordinates
           XP=p%pos(1,i)+dt*p%U(1,i)/p%Gamma(i)
           YP=dt*p%U(2,i)/p%Gamma(i)
 
         ! Conversion to cylindrical coordiantes
           p%pos(3,i)=p%pos(3,i)+dt*p%U(3,i)/p%Gamma(i)
           p%pos(1,i)=sqrt(XP**2+YP**2)
 
         ! Computation of the rotation angle
           IF (p%pos(1,i) .EQ. 0) THEN
             COSA=1
             SINA=0
             ALPHA=0
           ELSE
             COSA=XP/p%pos(1,i)
             SINA=YP/p%pos(1,i)
             ALPHA=asin(SINA)
           END IF
         ! New azimuthal position
           p%pos(2,i)=MOD(p%pos(2,i)+ALPHA,2*pi)
 
         ! Velocity in rotated reference frame
           U1=COSA*p%U(1,i)+SINA*p%U(2,i)
           U2=-SINA*p%U(1,i)+COSA*p%U(2,i)
 
           p%U(1,i)=U1
           p%U(2,i)=U2
 
         END DO
-        !$OMP END PARALLEL DO SIMD
+        !$OMP END DO SIMD NOWAIT
     END IF
+    !$OMP SINGLE
     p%collected=.false.
+    !$OMP END SINGLE
 END SUBROUTINE push
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Computes several diagnostic quantities
 !> This routine computes the total kinetic and electric potential energy.
 !> It keeps track of the reference energy and the number of particle per mpi node. 
 !
 !---------------------------------------------------------------------------
 SUBROUTINE partdiagnostics
 !
 !  Compute energies
 !
     USE constants, ONLY: vlight
     USE basic, ONLY: phinorm, cstep, nlclassical, ierr, nbspecies
 
     INTEGER:: i,j
 
   ! Reset the quantities
     ekin=0
     epot=0
     etot=0
 
 
   ! Computation of the kinetic and potential energy as well as  fluid velocities and density
-    !$OMP PARALLEL DO REDUCTION(+:epot, ekin) DEFAULT(SHARED), PRIVATE(i,j) 
+    !!$OMP PARALLEL DO REDUCTION(+:epot, ekin) DEFAULT(SHARED), PRIVATE(i,j) 
     Do j=1,nbspecies
       if(.not. partslist(j)%is_field) CYCLE
       DO i=1,partslist(j)%Nploc
 
 !        Potential energy
          epot=epot+(partslist(j)%pot(i)+partslist(j)%potxt(i))*partslist(j)%q*partslist(j)%weight
          ! Kinetic energy
          IF(.not. nlclassical) THEN
           ekin=ekin+(0.5*(partslist(j)%Gammaold(i)+partslist(j)%Gamma(i))-1)*partslist(j)%m*partslist(j)%weight
          ELSE
           ekin=ekin+0.5*( partslist(j)%U(1,i)*partslist(j)%Uold(1,i)    &
                       & + partslist(j)%U(3,i)*partslist(j)%Uold(3,i)    &
                       & + partslist(j)%U(2,i)*partslist(j)%Uold(2,i) )*partslist(j)%m*partslist(j)%weight
          END IF
       END DO
     END DO
-    !$OMP END PARALLEL DO
+    !!$OMP END PARALLEL DO
     epot=epot*phinorm*0.5
     ekin=ekin*vlight**2
 
     !  Shift to Etot at cstep=1 (not valable yet at cstep=0!)
     IF(cstep.EQ. 1) THEN
     ! Compute the local total energy
        loc_etot0 = epot+ekin
        etot0=0
     END IF
     !etot=loc_etot0
     ! Compute the total energy
     etot=epot+ekin
     Energies=(/ekin,epot,etot,loc_etot0/)
   ! The computed energy is sent to the root process
     IF(mpisize .gt.1) THEN
       IF(mpirank .eq.0 ) THEN
           CALL MPI_REDUCE(MPI_IN_PLACE, Energies, 4, db_type, db_sum_op, &
           & 0, MPI_COMM_WORLD, ierr)
           etot0=etot0+Energies(4)
           ekin=Energies(1)
           epot=Energies(2)
           etot=Energies(3)
       ELSE
           CALL MPI_REDUCE(Energies, Energies, 4, db_type, db_sum_op, &
           & 0, MPI_COMM_WORLD, ierr)
       END IF
     ELSE
       etot0=etot0+loc_etot0
     END IF
     loc_etot0=0
 
   ! Send the local number of particles on each node to the root process
     IF(mpisize .gt. 1) THEN
      Nplocs_all(mpirank)=partslist(1)%Nploc
      IF(mpirank .eq.0 ) THEN
         CALL MPI_gather(MPI_IN_PLACE, 1, MPI_INTEGER, Nplocs_all, 1, MPI_INTEGER,&
         & 0, MPI_COMM_WORLD, ierr)
         !CALL MPI_REDUCE(MPI_IN_PLACE,partslist(1)%nudcol,3,db_type,db_sum_op,0,MPI_COMM_WORLD,ierr)
         partslist(1)%Nptot=sum(Nplocs_all)
        !partslist(1)%nudcol=partslist(1)%nudcol/partslist(1)%Nptot
       ELSE
         CALL MPI_gather(Nplocs_all(mpirank), 1, MPI_INTEGER, Nplocs_all, 1, MPI_INTEGER,&
         & 0, MPI_COMM_WORLD, ierr)
         !CALL MPI_REDUCE(partslist(1)%nudcol,partslist(1)%nudcol,3,db_type,db_sum_op,0,MPI_COMM_WORLD,ierr)
       END IF
     ELSE
       partslist(1)%Nptot=partslist(1)%Nploc
     END IF
   end subroutine partdiagnostics
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief Collect the particles positions and velocities on the root process.
 !> If the collection has already been performed at this time step, the routine does nothing.
 !
 !---------------------------------------------------------------------------
   SUBROUTINE collectparts(p)
     USE basic, ONLY: mpirank, mpisize, ierr
     type(particles), INTENT(INOUT):: p
     INTEGER, DIMENSION(:), ALLOCATABLE :: displs, Nploc
     INTEGER:: i
     INTEGER:: particles_type(mpisize-1)     !< Stores the MPI data type used for particles gathering on node 0 and broadcast from node 0
     INTEGER :: part_requests(mpisize-1)
     INTEGER:: stats(MPI_STATUS_SIZE,mpisize-1)
 
     part_requests=MPI_REQUEST_NULL
 
     particles_type=MPI_DATATYPE_NULL
     IF(p%collected) RETURN ! exit subroutine if particles have already been collected during this time step
 
     ALLOCATE(Nploc(0:mpisize-1))
     ALLOCATE(displs(0:mpisize-1))
     displs=0
 
     Nploc(mpirank)=p%Nploc
     CALL MPI_Allgather(MPI_IN_PLACE, 1, MPI_INTEGER, Nploc, 1, MPI_INTEGER,&
     & MPI_COMM_WORLD, ierr)
     
     p%Nptot=sum(Nploc)
     IF(p%Nptot .eq. 0 ) THEN
       p%partindex(:)=-1
       p%collected=.true.
       RETURN
     END IF
 
     Do i=1,mpisize-1
       displs(i)=displs(i-1)+Nploc(i-1)
     END DO
     IF(mpirank.eq.0 .and. p%Nptot .gt. size(p%pos,2)) THEN
      CALL change_parts_allocation(p,max(p%Nptot-size(P%pos,2),floor(0.5*size(P%pos,2))))
    END IF
 
     
 
     IF(mpirank .ne. 0) THEN
       if(Nploc(mpirank) .gt. 0) THEN
         Call init_particles_gather_mpi(p,1,Nploc(mpirank),particles_type(mpirank))
         ! Send Particles informations to root process
         CALL MPI_SEND(p, 1, particles_type(mpirank), 0, partsgather_tag, MPI_COMM_WORLD, ierr)
         CALL MPI_TYPE_FREE(particles_type(mpirank),ierr)
       END IF
     ELSE
   ! Receive particle information from all processes
       DO i=1,mpisize-1
         if(Nploc(i) .lt. 1) cycle
         Call init_particles_gather_mpi(p,displs(i)+1,Nploc(i),particles_type(i))
         CALL MPI_IRECV(p,1,particles_type(i),i,partsgather_tag,MPI_COMM_WORLD, part_requests(i), ierr)
       END DO
       CALL MPI_WAITALL(mpisize-1,part_requests, stats, ierr)
       p%partindex(sum(Nploc)+1:)=-1
       Do i=1,mpisize-1
         if(Nploc(i) .lt. 1) cycle
         CALL MPI_TYPE_FREE(particles_type(i),ierr)
       END DO
     END IF
     p%collected=.TRUE.
   END SUBROUTINE collectparts
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief Computes the velocities at time t-1/2 delta t to keep the second order precision in time on the velocity.
 !> This should only be used at particle initialisation time, ot in the case of a restart.
 !
 !---------------------------------------------------------------------------
   SUBROUTINE adapt_vinit(p)
     !!   Computes the velocity at time -dt/2 from velocities computed at time 0
     !
       USE basic, ONLY : bnorm, dt, tnorm, nlclassical, phinorm, distribtype, vnorm
       type(particles), INTENT(INOUT):: p
       REAL(kind=db) :: tau, BRZ, BRR, ZBR, ZBZ, ZPR, ZPZ, ZPTHET, &
       &           SQR, Vperp, v2
       INTEGER :: J1, J2, J3, J4, i
       REAL(kind=db), DIMENSION(:), ALLOCATABLE :: VZ, VR, VTHET
 
     ! In case Davidson distribution is used the longitudinal and radial velocities are adapted to take into account the
     ! electric potential.
       IF(distribtype .EQ. 2 .OR. distribtype .EQ. 3 .OR. distribtype .EQ. 4 .or. p%Davidson) THEN
         ALLOCATE(VR(p%Nploc),VZ(p%Nploc),VTHET(p%Nploc))
         CALL loduni(7,VZ)
         VZ=VZ*2*pi
         VTHET=p%U(2,:)/p%Gamma*vnorm
         DO i=1,p%Nploc
           Vperp=sqrt(MAX(2*p%H0/p%m-2*p%qmRatio*p%pot(i)*phinorm-VTHET(i)**2,0.0_db))
           VR(i)=Vperp*sin(VZ(i))
           VZ(i)=Vperp*cos(VZ(i))
           IF(nlclassical) THEN
             p%Gamma(i)=1
           ELSE
             v2=VR(i)**2+VZ(i)**2+VTHET(i)**2
             p%Gamma(i)=sqrt(1/(1-v2/vnorm**2))
           END IF
           p%U(1,i)=p%Gamma(i)*VR(i)/vnorm
           p%U(3,i)=p%Gamma(i)*VZ(i)/vnorm
           p%U(2,i)=p%Gamma(i)*VTHET(i)/vnorm
         END DO
         DEALLOCATE(VR,VZ,VTHET)
       END IF
 
       ! Normalised time increment
       !tau=-omegac/2/omegap*dt/tnorm
       tau=-p%qmRatio*bnorm*0.5*dt*tnorm
       ! Store old Velocities
       CALL swappointer2(p%Uold, p%U)
       !CALL swappointer(p%URold, p%UR)
       !CALL swappointer(p%UTHETold, p%UTHET)
       CALL swappointer(p%Gammaold, p%Gamma)
 
       IF (p%Nploc .NE. 0) THEN
       !$OMP PARALLEL DO SIMD DEFAULT(SHARED) PRIVATE(J1,J2,J3,J4,BRZ, BRR, ZBR, ZBZ, ZPR, ZPZ, ZPTHET, SQR)
         DO i=1,p%Nploc
 
         ! Half inverse Rotation along magnetic field
            ZBZ=tau*p%B(2,i)/p%Gammaold(i)
            ZBR=tau*p%B(1,i)/p%Gammaold(i)
            SQR=1+ZBZ*ZBZ+ZBR*ZBR
            ZPZ=(p%Uold(3,i)-ZBR*p%Uold(2,i))/SQR                     !u-_{z}
            ZPR=(p%Uold(1,i)+ZBZ*p%Uold(2,i))/SQR                     !u-_{r}
            ZPTHET=p%Uold(2,i)+(ZBR*p%Uold(3,i)-ZBZ*p%Uold(1,i))/SQR          !u-_{theta}
            p%U(3,i)=ZPZ
            p%U(1,i)=ZPR
            p%U(2,i)=ZPTHET
 
         ! half of decceleration
            p%U(3,i)=p%U(3,i)+p%E(2,i)*tau
            p%U(1,i)=p%U(1,i)+p%E(1,i)*tau
 
            IF(.not. nlclassical) THEN
             p%Gamma(i)=sqrt(1+p%U(3,i)**2+p%U(1,i)**2+p%U(2,i)**2)
            END IF
 
         END DO
         !$OMP END PARALLEL DO SIMD
       END IF
 
     END SUBROUTINE adapt_vinit
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief Calculates the number of particles per column of the spatial grid ( at fixed axial cell position)
 !> This facilitate the computation of the axial grid limits for each MPI worker
 !
 !---------------------------------------------------------------------------
 SUBROUTINE calcnbperz(p,nbperz)
   USE basic, only: nz
   IMPLICIT NONE
   type(particles):: p
   INTEGER, INTENT(INOUT):: nbperz(0:)
   Integer::i, zindex 
   
   nbperz=0
-  !$OMP PARALLEL DO DEFAULT(SHARED) reduction(+:nbperz), private(zindex,i)
+  !!$OMP PARALLEL DO DEFAULT(SHARED) reduction(+:nbperz), private(zindex,i)
   Do i=1,p%Nploc
     ! we make sure zindex is in [0, nz-1] to avoid segmentation faults
     zindex=min(nz-1,max(p%zindex(i),0)) 
     nbperz(zindex)=nbperz(zindex)+1
   END DO
-  !$OMP END PARALLEL DO
+  !!$OMP END PARALLEL DO
 END SUBROUTINE calcnbperz
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief In the case of MPI parallelism, computes the axial limits of the local domain.
 !---------------------------------------------------------------------------
   SUBROUTINE calc_Zbounds(p, Zbounds, norder)
   ! Computes the start and end indices for the Z boundaries on local processus
   ! Computes the particle indices from initial particle loading vector, that stay in current process
     USE basic, ONLY: nz, cstep, mpirank, mpisize,step
     USE mpihelper
     TYPE(particles), INTENT(INOUT):: p
     INTEGER:: Zbounds(0:)
     INTEGER:: norder(2)
     INTEGER:: old_Zbounds(0:size(Zbounds,1)-1)
     INTEGER:: k, i, nbparts
     REAL(kind=db):: idealnbpartsperproc
     INTEGER, DIMENSION(0:nz-1):: partspercol ! Vector containing the number of particles between zgrid(n) and zgrid(n+1)
     INTEGER:: Zmin, Zmax ! Minimum and maximum indices of particles in Z direction
     INTEGER:: Zperproc, ierr, remparts
     CHARACTER(12)::fmt
 
     ! calculatese the axial disstibution integrated along the radial direction
     call calcnbperz(p,partspercol)
 
+
     ! gather this data on all nodes
     if(step .gt. 0 .and. mpisize .gt. 1) THEN
       old_Zbounds=Zbounds
       CALL MPI_ALLREDUCE(MPI_IN_PLACE, partspercol, nz, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr)
     END IF
 
     ! estimate the ideal number of particle per MPI worker
     idealnbpartsperproc = p%Nptot/mpisize
 
     ! find the start and end indices where particles are present
     Zmin=0
     Zmax=nz-1
     Do k=0,nz-1
       if(partspercol(k) .gt.0) then
         Zmin=k
         exit
       end if
     end do
     Do k=nz-1,0,-1
       if(partspercol(k) .gt.0) then
         Zmax=k
         exit
       end if
     end do 
 
     ! Find naive axial limits assuming uniform axial distribution
     IF(Zmax .le. 0) Zmax=nz-1
     IF(Zmin .gt. nz) Zmin=0
     Zperproc=(Zmax-Zmin)/mpisize
 
    
     IF (Zperproc .lt. 1 .or. cstep .eq. 0) THEN
       !! No particles are present initially
       Zperproc=nz/mpisize
       Zmin=0
       ! Define boundaries using naive guess on start or restart (allow to start with 0 parts)
       DO k=1,mpisize-1
         IF(k .lt. mpisize-1-MODULO(Zmax-Zmin,mpisize)) THEN
           Zbounds(k)=Zmin+k*Zperproc-1
         ELSE
           Zbounds(k)=Zmin+k*Zperproc-1+k-mpisize+2+MODULO(Zmax-Zmin,mpisize)
         END IF
       END DO
 
     ELSE
       i=0
       ! Define axial boundaries using the axial distribution information.
       ! the subdomains are not equal
       remparts=p%Nptot
       DO k=1,mpisize-1
         nbparts=0
         DO WHILE(nbparts<0.98*idealnbpartsperproc .and. i .lt. Zmax .and. (nbparts+partspercol(i)).lt.1.25*idealnbpartsperproc)
           nbparts=nbparts+partspercol(i)
           i=i+1 
         END DO
         remparts=remparts-nbparts
         Zbounds(k)=i
       END DO
     END IF
 
     IF(step .gt. 0 .and. mpirank .eq. 0) THEN
       Do i=1,mpisize-1
         !We check that the new limits will not exceed the old limits of the left and right process
         ! This avoids particle communications with process >mpirank+2 and < mpirank-2
         ! However this should converge over time
         IF(Zbounds(i) .lt. old_Zbounds(i-1)) Zbounds(i)=old_Zbounds(i-1)
         if(Zbounds(i) .gt. old_Zbounds(i+1))Zbounds(i)=old_Zbounds(i+1)
         ! If a process would have an axial domain shoter than axial norder, we revert to the old boundaries.
         IF((Zbounds(i)-Zbounds(i-1)).lt. norder(1) .or. (Zbounds(i+1)-Zbounds(i)).lt. norder(1)) THEN
           Zbounds=old_Zbounds
           EXIT
         END IF
       END DO
     END IF
 
     ! send the new boundaries to all the workers
     CALL MPI_Bcast(Zbounds,mpisize+1,MPI_INTEGER,0,MPI_COMM_WORLD, ierr)
     DO k=0,mpisize-1
       Nplocs_all(k)=SUM(partspercol(Zbounds(k):Zbounds(k+1)-1))
     END DO
 
     if(mpirank .eq. 0) THEN
       WRITE(fmt,'(a,i3,a)')"(a,",mpisize+1, "i5)"
       WRITE(*,fmt) "Zbounds: ", Zbounds
       WRITE(fmt,'(a,i3,a)')"(a,",mpisize, "i8)"
       WRITE(*,fmt) "Nplocs: ", Nplocs_all
     END IF
     
   END SUBROUTINE calc_Zbounds
 
   !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief After a restart keep only the particles in the local domain of the current MPI worker
 !---------------------------------------------------------------------------
   SUBROUTINE keep_mpi_self_parts(p,Zbounds)
     TYPE(particles),INTENT(INOUT):: p
     INTEGER,INTENT(in)::Zbounds(0:)
     INTEGER :: i, partstart, old_sum,ierr
     partstart=1
     p%Nploc=0
     Do i=1,p%Nptot
       IF(p%Zindex(i).ge.Zbounds(mpirank).and.p%Zindex(i).lt.Zbounds(mpirank+1)) THEN
         p%Nploc=p%Nploc+1
         CALL move_part(p,i,p%Nploc)
       END IF
     END DO
     old_sum=p%Nptot
     CALL MPI_REDUCE(p%Nploc, p%Nptot,1,MPI_INTEGER, MPI_SUM, 0, MPI_COMM_WORLD, ierr)
     IF(p%Nptot .ne. old_sum) THEN
        WRITE(*,*) "Error in particle distribution kept: ", p%Nptot, "/",old_sum
        !call MPI_Abort(MPI_COMM_WORLD, -1, ierr)
        !stop
     END IF
   END SUBROUTINE keep_mpi_self_parts
 
 !_______________________________________________________________________________
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Manage the particle communication between neighbours.
 !> This routine is responsible to receive the incoming particles from the MPI neighbours and to send its outgoing
 !> particles to these neighbours
 !
 !> @param [in] lsendnbparts number of particles to send to the left neighbour (mpirank-1)
 !> @param [in] rsendnbparts number of particles to send to the right neighbour (mpirank+1)
 !> @param [in] sendholes array containing the indices of the particle leaving the local domain in ascending order. If the index is positive, the particle goes to the right neigbour, and to the left neighbour if the index is negative
 !---------------------------------------------------------------------------
   SUBROUTINE particlescommunication(p, lsendnbparts, rsendnbparts, sendholes, receivednbparts, procs)
     USE mpihelper, ONLY: particle_type
 #ifdef _DEBUG
     USE basic, ONLY: step
 #endif
     type(particles), INTENT(INOUT):: p
     INTEGER, INTENT(in)    :: lsendnbparts, rsendnbparts
     INTEGER, INTENT(out)   :: receivednbparts
     INTEGER, INTENT(in) :: sendholes(:)
     INTEGER, INTENT(in) :: procs(2)
     INTEGER,  ASYNCHRONOUS :: rrecvnbparts=0, lrecvnbparts=0
     INTEGER, ASYNCHRONOUS :: sendrequest(2), recvrequest(2)
     INTEGER, ASYNCHRONOUS :: sendstatus(MPI_STATUS_SIZE,2), recvstatus(MPI_STATUS_SIZE,2)
     TYPE(particle), ALLOCATABLE :: rrecvpartbuff(:), lrecvpartbuff(:), rsendpartbuff(:), lsendpartbuff(:) ! buffers to send and receive particle from left and right processes
 
     INTEGER :: lsentnbparts, rsentnbparts
     INTEGER :: lreceivednbparts, rreceivednbparts, ierr
 
     lsentnbparts=lsendnbparts
     rsentnbparts=rsendnbparts
 
     sendrequest=MPI_REQUEST_NULL
     recvrequest=MPI_REQUEST_NULL
     lrecvnbparts=0
     rrecvnbparts=0
 
   ! Send and receive the number of particles to exchange
     CALL MPI_IRECV(lrecvnbparts, 1, MPI_INTEGER, procs(1),  nbpartsexchange_tag, MPI_COMM_WORLD, recvrequest(1), ierr)
     CALL MPI_IRECV(rrecvnbparts, 1, MPI_INTEGER, procs(2), nbpartsexchange_tag, MPI_COMM_WORLD, recvrequest(2), ierr)
     CALL MPI_ISEND(lsentnbparts, 1, MPI_INTEGER, procs(1),  nbpartsexchange_tag, MPI_COMM_WORLD, sendrequest(1), ierr)
     CALL MPI_ISEND(rsentnbparts, 1, MPI_INTEGER, procs(2), nbpartsexchange_tag, MPI_COMM_WORLD, sendrequest(2), ierr)
 
     CALL MPI_Waitall(2,recvrequest(1:2), recvstatus(:,1:2), ierr)
 
     recvrequest=MPI_REQUEST_NULL
 
     lreceivednbparts=lrecvnbparts
     rreceivednbparts=rrecvnbparts
 
   ! Re/allocate enough memory to store the incoming particles
     ALLOCATE(rrecvpartbuff(rreceivednbparts))
     ALLOCATE(lrecvpartbuff(lreceivednbparts))
 
   ! Receive particles from left and right processes to the corresponding buffers
     IF ( lrecvnbparts .gt. 0) THEN
       CALL MPI_IRECV(lrecvpartbuff, lreceivednbparts, particle_type, procs(1),  partsexchange_tag, MPI_COMM_WORLD, recvrequest(1), ierr)
     END IF
     IF( rrecvnbparts .gt. 0) THEN
       CALL MPI_IRECV(rrecvpartbuff, rreceivednbparts, particle_type, procs(2), partsexchange_tag, MPI_COMM_WORLD, recvrequest(2), ierr)
     END IF
 
     ALLOCATE(rsendpartbuff(rsendnbparts))
     ALLOCATE(lsendpartbuff(lsendnbparts))
 
   ! Copy the leaving particles to the corresponding send buffers
     IF ( (lsendnbparts + rsendnbparts) .gt. 0) THEN
       CALL AddPartSendBuffers(p, lsendnbparts, rsendnbparts, sendholes, lsendpartbuff, rsendpartbuff)
     END IF
 
     CALL MPI_Waitall(2,sendrequest(1:2), sendstatus(:,1:2), ierr)
 
   ! Send the particles to the left and right neighbours
     IF( lsendnbparts .gt. 0) THEN
         CALL MPI_ISEND(lsendpartbuff, lsendnbparts, particle_type, procs(1),  partsexchange_tag, MPI_COMM_WORLD, sendrequest(1), ierr)
 #ifdef _DEBUG
         !WRITE(*,*)"snding ", lsendnbparts , " to  left  at step: ",step
 #endif
     END IF
     IF( rsendnbparts .gt. 0) THEN
         CALL MPI_ISEND(rsendpartbuff, rsendnbparts, particle_type, procs(2), partsexchange_tag, MPI_COMM_WORLD, sendrequest(2), ierr)
 #ifdef _DEBUG
         !WRITE(*,*)"snding ", rsendnbparts , " to  right at step: ",step
 #endif
     END IF
 
   ! Receive the incoming parts in the receive buffers
     IF ( lreceivednbparts .gt. 0) THEN
       CALL MPI_Wait(recvrequest(1), recvstatus(:,1), ierr)
       IF(ierr .ne. MPI_SUCCESS) THEN
         WRITE(*,*) "Error in particle reception on proc:", mpirank, " error code:", ierr, "status:", recvstatus(:,1)
         CALL MPI_Abort(MPI_COMM_WORLD, -1, ierr)
       END IF
 #ifdef _DEBUG
       !WRITE(*,*)"rcvd ", lreceivednbparts , " from left  at step: ",step
 #endif
     END IF
     IF ( rreceivednbparts .gt. 0) THEN
       CALL MPI_Wait(recvrequest(2), recvstatus(:,2), ierr)
       IF(ierr .ne. MPI_SUCCESS) THEN
         WRITE(*,*) "Error in particle reception on proc:", mpirank, " error code:", ierr, "status:", recvstatus(:,2)
         CALL MPI_Abort(MPI_COMM_WORLD, -1, ierr)
       END IF
 #ifdef _DEBUG
       !WRITE(*,*)"rcvd ", rreceivednbparts , " from right at step: ",step
 #endif
     END IF
 
     receivednbparts=rreceivednbparts+lreceivednbparts
 
     IF(p%Nploc+receivednbparts-lsendnbparts-rsendnbparts .gt. size(p%pos,2)) THEN
       CALL change_parts_allocation(p,receivednbparts)
     END IF
 
   ! Copy the incoming particles from the receive buffers to the simulation parts variable
     CALL Addincomingparts(p, rreceivednbparts, lreceivednbparts, lsendnbparts+rsendnbparts, &
     & sendholes, lrecvpartbuff, rrecvpartbuff)
 
   ! Wait for the outgoing particles to be fully received by the neighbours
     IF( lsendnbparts .gt. 0) THEN
       CALL MPI_Wait(sendrequest(1), sendstatus(:,1), ierr)
 #ifdef _DEBUG
       !WRITE(*,*)"sent ", lsentnbparts , " to  left  at step: ",step
 #endif
     END IF
     IF( rsendnbparts .gt. 0) THEN
       CALL MPI_Wait(sendrequest(2), sendstatus(:,2), ierr)
 #ifdef _DEBUG
       !WRITE(*,*)"sent ", rsentnbparts , " to  right at step: ",step
 #endif
     END IF
 !
 !
   END SUBROUTINE particlescommunication
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Copy the particles from the receive buffers to the local simulation variable parts.
 !> The incoming particles will first be stored in the holes left by the outgoing particles, then they
 !> will be added at the end of the parts variable
 !
 !> @param [in] rrecvnbparts number of particles received from the right neighbour (mpirank+1)
 !> @param [in] lrecvnbparts number of particles received from the left neighbour (mpirank-1)
 !> @param [in] sendnbparts total number of particles having left the local domain
 !> @param [in] sendholes array containing the indices of the particle having left the local domain in ascending order.
 !---------------------------------------------------------------------------
   SUBROUTINE Addincomingparts(p, rrecvnbparts, lrecvnbparts, sendnbparts, sendholes,lrecvpartbuff, rrecvpartbuff)
 !
     USE mpihelper
     TYPE(particles), INTENT(INOUT):: p
     INTEGER, INTENT(in)    :: rrecvnbparts, lrecvnbparts, sendnbparts
     INTEGER, INTENT(in) :: sendholes(:)
     TYPE(particle), INTENT(IN) :: rrecvpartbuff(:), lrecvpartbuff(:) 
     INTEGER k,partpos
 
   
 
   ! First import the particles coming from the right
     IF(rrecvnbparts .gt. 0) THEN
       Do k=1,rrecvnbparts
         IF(k .le. sendnbparts) THEN
         ! Fill the holes left by sent parts
           partpos=abs(sendholes(k))
         ELSE
         ! Add at the end of parts and keep track of number of parts
           p%Nploc=p%Nploc+1
           partpos=p%Nploc
         END IF
         CALL Insertincomingpart(p, rrecvpartbuff(k), partpos)
       END DO
     END IF
 
   ! Then import the particles coming from the left
     IF(lrecvnbparts .gt. 0) THEN
       Do k=1,lrecvnbparts
         IF(k+rrecvnbparts .le. sendnbparts) THEN
           ! Fill the holes left by sent parts
           partpos=abs(sendholes(k+rrecvnbparts))
         ELSE
           ! Add at the end of parts and keep track of number of parts
           p%Nploc=p%Nploc+1
           partpos=p%Nploc
         END IF
         CALL Insertincomingpart(p, lrecvpartbuff(k), partpos)
       END DO
     END IF
 !
   END SUBROUTINE Addincomingparts
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Copy the particles from the local parts variable to the left and right send buffers.
 !
 !> @param [in] lsendnbparts number of particles to send to the left neighbour (mpirank-1)
 !> @param [in] rsendnbparts number of particles to send to the right neighbour (mpirank+1)
 !> @param [in] sendholes array containing the indices of the particle leaving the local domain in ascending order. If the index is positive, the particle goes to the right neigbour, and to the left neighbour if the index is negative
 !---------------------------------------------------------------------------
   SUBROUTINE AddPartSendBuffers(p, lsendnbparts, rsendnbparts, sendholes, lsendpartbuff, rsendpartbuff)
 !
     USE mpihelper
     TYPE(particles), INTENT(INOUT):: p
     INTEGER, INTENT(in)    :: lsendnbparts, rsendnbparts
     INTEGER, INTENT(in) :: sendholes(:)
     TYPE(particle), INTENT(OUT) :: rsendpartbuff(:), lsendpartbuff(:) 
     INTEGER:: partpos, k
     INTEGER:: lsendpos, rsendpos
     lsendpos=0
     rsendpos=0
 
   ! Loop over the outgoing particles and fill the correct send buffer
     Do k=lsendnbparts+rsendnbparts,1,-1
       partpos=abs(sendholes(k))
       IF(sendholes(k) .GT. 0) THEN
         rsendpos=rsendpos+1
         CALL Insertsentpart(p, rsendpartbuff, rsendpos, partpos)
       ELSE IF(sendholes(k) .LT. 0) THEN
         lsendpos=lsendpos+1
         CALL Insertsentpart(p, lsendpartbuff, lsendpos, partpos)
       END IF
     END DO
 !
 !
   END SUBROUTINE AddPartSendBuffers
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief Add the particles stored in the buffer to the main particle storage p in particles form 
 !> @param[in]  p particles structure to add particles to
 !> @param[in]  buffer memory containing the particles to be added
 !> @param[in]  nb_ins number of particles stored in buffer
 !---------------------------------------------------------------------------
   SUBROUTINE add_list_created_part(p, buffer,nb_ins)
     IMPLICIT NONE
     TYPE(particles), INTENT(INOUT):: p 
     TYPE(particle), ALLOCATABLE, INTENT(in) :: buffer(:)
     INTEGER, OPTIONAL:: nb_ins
     INTEGER:: i, nptotinit, parts_size_increase, nb_added
 
     nptotinit=p%Nploc+1
     if(present(nb_ins)) THEN
       nb_added=nb_ins
     ELSE
       nb_added=size(buffer,1)
     end if
 
     IF(nb_added .le. 0) RETURN ! No particles to add
 
     ! if there is not enough space in the parts simulation buffer, increase the parst size
     IF(p%Nploc + nb_added .gt. size(p%pos,2)) THEN
       parts_size_increase=Max(floor(0.1*size(p%pos,2)),nb_added)
       CALL change_parts_allocation(p, parts_size_increase)
     END IF
     
     DO i=1,nb_added
       CALL add_created_particle(p,buffer(i))
     END DO
     nb_added=p%Nploc-nptotinit+1
     if(p%is_field) then
       IF(allocated(p%addedlist)) then
         call change_array_size_int(p%addedlist,2)
       else
         allocate(p%addedlist(2))
       end if
       p%addedlist(size(p%addedlist)-1)=nptotinit
       p%addedlist(size(p%addedlist))=nb_added
     end if
   END SUBROUTINE add_list_created_part
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief Add the particles stored in the linked buffer to the main particle storage p in particles form 
 !> @param[in]  p particles structure to add particles to
 !> @param[in]  linked_buffer memory containing the particles to be added in linked list format
 !> @param[in]  destroy Indicates if the memory of the linked buffer must be freed after copy to p
 !> @param[in]  zerovelocity Define if the velocity of the particles in p is set to 0 or copied from the buffer
 !---------------------------------------------------------------------------
   SUBROUTINE add_linked_created_part(p, linked_buffer, destroy, zerovelocity)
 
     IMPLICIT NONE
     TYPE(particles), INTENT(INOUT):: p 
     TYPE(linked_part_row), INTENT(in) :: linked_buffer
     LOGICAL:: destroy, zerovelocity
     TYPE(linked_part), POINTER:: part
     INTEGER:: i, nptotinit, parts_size_increase, nb_added
 
     nptotinit=p%Nploc+1
     nb_added=linked_buffer%n
 
     IF(nb_added .le. 0) RETURN ! No particles to add
 
     ! if there is not enough space in the parts simulation buffer, increase the parst size
     IF(p%Nploc + nb_added .gt. size(p%pos,2)) THEN
       parts_size_increase=Max(floor(0.1*size(p%pos,2)),nb_added)
       CALL change_parts_allocation(p, parts_size_increase)
     END IF
     
     part=>linked_buffer%start
     DO i=1,nb_added
       CALL add_created_particle(p,part%p)
       part=>part%next
     END DO
     nb_added=p%Nploc-nptotinit+1
     if(p%is_field) then
       IF(allocated(p%addedlist)) then
         call change_array_size_int(p%addedlist,2)
       else
         allocate(p%addedlist(2))
       end if
       p%addedlist(size(p%addedlist)-1)=nptotinit
       p%addedlist(size(p%addedlist))=nb_added
     end if
     if(zerovelocity)then
       p%U(:,nptotinit:p%Nploc)=0
       !p%UTHET(nptotinit:p%Nploc)=0
       !p%UZ(nptotinit:p%Nploc)=0
     end if
     if (destroy) call destroy_linked_parts(linked_buffer%start)
     if (p%is_field) then
       ! we keep track of energy by removing the ionisation energy
       ! with conversion from electronvolt to joules
       loc_etot0=loc_etot0-sum(p%pot(nptotinit:p%Nploc)*elchar) 
     end if
   END SUBROUTINE add_linked_created_part
 
   !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Add created particles from a buffer of type particle to the main species storages.
 !
 !> @param [in] p specie memory where we want to add particles
 !> @param [in] part particle buffer storing the data we want to add to p
 !---------------------------------------------------------------------------
 
   SUBROUTINE add_created_particle(p,part)
     USE geometry
     TYPE(particles):: p
     TYPE(particle):: part
      p%Nploc=p%Nploc+1
       p%newindex=p%newindex+1
       ! add the data to the p structure
       CALL Insertincomingpart(p, part, p%Nploc)
       p%partindex(p%Nploc)=p%newindex
       ! calculate the new domain weight
       CALL dom_weight(p%pos(3,p%Nploc),p%pos(1,p%Nploc),p%geomweight(0,p%Nploc))
       ! delete the particle if it is outside of the computational domain
       if( .not. is_inside(p,p%Nploc) ) then
         p%Nploc=p%Nploc-1
         p%newindex=p%newindex-1
         RETURN
       end if
       ! Calculate the geometric weight for the Poisson solver and the grid indices
       CALL geom_weight(p%pos(3,p%Nploc),p%pos(1,p%Nploc),p%geomweight(:,p%Nploc))
       call p_calc_rzindex(p,p%Nploc)
     END SUBROUTINE add_created_particle
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Checks if the particle id in p is inside of the simulation domain
 !
 !> @param [in] p specie memory
 !> @param [in] id index of the particle we want to test
 !---------------------------------------------------------------------------
  function is_inside(p,id)
   Use basic, ONLY: rgrid,zgrid, nr, nz
   IMPLICIT NONE
    logical :: is_inside
    type(particles) :: p
    integer :: id   
    is_inside=.true.
   ! Check if the particle is in the simulation domain 
    if(p%geomweight(0,id).le.0)then
       is_inside=.false.
       return
    end if
    ! check if the particle is in the simulation grid
    if(p%pos(1,id).ge.rgrid(nr) .or. p%pos(1,id) .le. rgrid(0))then
     is_inside=.false.
     return
    end if
    if(p%pos(3,id).ge.zgrid(nz) .or. p%pos(3,id) .le. zgrid(0))then
     is_inside=.false.
     return
    end if
   end function is_inside
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Calculate the energy added by new particles to the system for diagnostic purposes
 !
 !> @param [in] p specie memory
 !---------------------------------------------------------------------------
   SUBROUTINE calc_newparts_energy(p)
     USE basic, ONLY: phinorm, nlclassical
     type(particles)::p
     integer::i,n,nptotinit,nbadded, nptotend
     ! exit if these particles dont participate in the Poisson equation
     if(.not. p%is_field) return
     if( allocated(p%addedlist)) then
       n=size(p%addedlist)
       ! For each set of added particles
       Do i=1,n,2
         nptotinit=p%addedlist(i)
         nbadded=p%addedlist(i+1)
         p%nbadded=p%nbadded+nbadded
         nptotend=nptotinit+nbadded-1
 
         ! Potential energy
         loc_etot0=loc_etot0+p%q*p%weight*sum(p%pot(nptotinit:nptotend))*phinorm
 
         ! Kinetic energy
         IF(.not. nlclassical) THEN
             loc_etot0=loc_etot0+p%m*p%weight*vlight**2*sum(0.5*(p%Gamma(nptotinit:nptotend)+p%Gammaold(nptotinit:nptotend))-1)
         ELSE
             loc_etot0=loc_etot0+0.5*p%m*p%weight*vlight**2*sum(p%U(1,nptotinit:nptotend)*p%Uold(1,nptotinit:nptotend) &
             & +p%U(3,nptotinit:nptotend)*p%Uold(3,nptotinit:nptotend) &
             & +p%U(2,nptotinit:nptotend)*p%Uold(2,nptotinit:nptotend))
         END IF
       end do
       deallocate(p%addedlist)
     end if
 
   end subroutine calc_newparts_energy
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Delete particle at given index removing its energy from the diagnosed quantities
 !
 !> @param [in] index index of particle to be deleted
 !---------------------------------------------------------------------------
   SUBROUTINE delete_part(p, index, replace)
   !! This will destroy particle at the given index
   USE constants, ONLY: vlight
   USE bsplines
   USE geometry
   USE basic, ONLY: phinorm, nlclassical
   TYPE(particles), INTENT(INOUT):: p
   INTEGER, INTENT(IN) :: index
   LOGICAL, OPTIONAL :: replace
   LOGICAL:: repl
 
   IF(present(replace)) THEN
     repl=replace
   ELSE
     repl=.true.
   END IF
   !Computes the potential at the particle position with phi_ext+phi_s
     IF(index .le. p%Nploc) THEN
       IF(p%is_field) THEN
         loc_etot0=loc_etot0-p%q*p%weight*(p%pot(index))*phinorm
         IF(.not. nlclassical) THEN
           loc_etot0=loc_etot0-p%m*p%weight*vlight**2*(p%Gamma(index)-1)
         ELSE
           loc_etot0=loc_etot0-0.5*p%m*p%weight*vlight**2*(p%U(1,index)**2+p%U(3,index)**2+p%U(2,index)**2)
         END IF
       END IF
 
       IF(repl) THEN
        ! We fill the gap
         CALL move_part(p, p%Nploc, index)
         p%partindex(p%Nploc)=-1
        ! Reduce the total number of simulated parts
         p%Nploc=p%Nploc-1
       END IF
     END IF
   END SUBROUTINE delete_part
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Loads a uniform density of particles on a rectangular annulus qith maxwellian velocities
 !
 !> @param [inout] p particle memory to load into
 !> @param [inout] VR array of radial velocity for the particles
 !> @param [inout] VTHET array of azimuthal velocity for the particles
 !> @param [inout] VZ array of axial velocity for the particles
 !---------------------------------------------------------------------------
   SUBROUTINE loaduniformRZ(p, VR,VZ,VTHET)
     USE basic, ONLY: plasmadim, rnorm, temp, qsim, msim
     USE constants, ONLY: me, kb, elchar
     REAL(kind=db), INTENT(inout) ::VZ(:), VR(:), VTHET(:)
     TYPE(particles), INTENT(INOUT):: p
 
     CALL creat_parts(p, size(VR,1))
 
     p%Nploc=size(VR,1)
     p%Nptot=size(VR,1)
     p%q=sign(elchar,qsim)
     p%weight=msim/me
     p%m=me
     p%qmRatio=qsim/msim
 
   ! Initial distribution in z with normalisation
       CALL loduni(1,p%pos(3,1:p%Nploc))
       p%pos(3,1:p%Nploc)=(plasmadim(1)+(plasmadim(2)-plasmadim(1))*p%pos(3,1:p%Nploc))/rnorm
     ! Initial distribution in r with normalisation
       CALL lodlinr(2,p%pos(1,1:p%Nploc),plasmadim(3),plasmadim(4))
       p%pos(1,1:p%Nploc)=p%pos(1,1:p%Nploc)/rnorm
 
     ! Initial velocities distribution
       CALL loadGaussianVelocities(p, VR, VZ, VTHET, temp)
   END SUBROUTINE loaduniformRZ
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Loads a cloud of electrons trapped in a magnetic mirror according to Davidsons equilibrium 
 !> p117 of physics of non-neutral plasma book
 !
 !> @param [inout] p particle memory to load into
 !> @param [inout] VR array of radial velocity for the particles
 !> @param [inout] VTHET array of azimuthal velocity for the particles
 !> @param [inout] VZ array of axial velocity for the particles
 !---------------------------------------------------------------------------
   SUBROUTINE loadDavidson(p, VR,VZ,VTHET, lodr)
     USE constants, ONLY: me, kb, elchar
     USE basic, ONLY: nplasma, rnorm, plasmadim, distribtype, H0, P0, Rcurv, width, qsim, msim, &
     &                omegac, zgrid, nz, rnorm, n0, nblock, temp
     procedure(rloader)::lodr
     TYPE(particles), INTENT(INOUT):: p
     REAL(kind=db), INTENT(INOUT)::VZ(:), VR(:), VTHET(:)
     REAL(kind=db), DIMENSION(:), ALLOCATABLE::ra, rb, z
     REAL(kind=db) :: r0, deltar2, halfLz, Mirrorratio, Le, VOL
     INTEGER :: j, n, blockstart, blockend, addedpart, remainparts
     INTEGER, DIMENSION(:), ALLOCATABLE ::  blocksize
 
     CALL creat_parts(p, size(VR,1))
 
     p%Nploc=size(VR,1)
     p%Nptot=p%Nploc
 
       Allocate(ra(nblock),rb(nblock), z(0:nblock))
       !r0=(plasmadim(4)+plasmadim(3))/2
       r0=sqrt(4*H0/(me*omegac**2))
       halfLz=(zgrid(nz)+zgrid(0))/2
       MirrorRatio=(Rcurv-1)/(Rcurv+1)
       z(0)=plasmadim(1)
       DO n=1,nblock
       ! Compute limits in radius and load radii for each part
         Le=(plasmadim(2)-plasmadim(1))/nblock*(n-0.5)-halfLz*rnorm+plasmadim(1)
         z(n)=z(0)+n*(plasmadim(2)-plasmadim(1))/nblock
         deltar2=1-MirrorRatio*cos(2*pi*Le/width)
         rb(n)=r0/deltar2*sqrt(1-P0*abs(omegac)/2/H0*deltar2+sqrt(1-P0*abs(omegac)/H0*deltar2))
         ra(n)=r0/deltar2*sqrt(1-P0*abs(omegac)/2/H0*deltar2-sqrt(1-P0*abs(omegac)/H0*deltar2))
       END DO
 
       VOL=SUM(2*pi*MINVAL(ra)*(rb-ra)*(plasmadim(2)-plasmadim(1))/nblock)
       qsim=VOL*n0*elchar/nplasma
       msim=abs(qsim)/elchar*me
       p%weight=abs(qsim)/elchar
       p%m=me
       p%q=sign(elchar,qsim)
       p%qmRatio=p%q/p%m
 
 
       blockstart=1
       blockend=0
       ALLOCATE(blocksize(nblock))
       WRITE(*,*) "blocksize: ", size(blocksize), nblock
       DO n=1,nblock
         blocksize(n)=nplasma/VOL*2*pi*MINVAL(ra)*(rb(n)-ra(n))*(plasmadim(2)-plasmadim(1))/nblock
       END DO
       remainparts=p%Nploc-SUM(blocksize)
       addedpart=1
       n=nblock/2
       j=1
       DO WHILE(remainparts .GT. 0)
          blocksize(n)=blocksize(n)+addedpart
          remainparts=remainparts-addedpart
          n=n+j
          j=-1*(j+SIGN(1,j))
       END DO
 
       CALL loadPartSlices(p, lodr, ra, rb, z, blocksize)
 
 
     IF(distribtype .eq. 5) THEN
       CALL loadGaussianVelocities(p, VR, VZ, VTHET, temp)
       VZ=VZ/4
       VR=VR*8
       VTHET=VTHET*8
     ELSE
       Call loadDavidsonVelocities(p, VR, VZ, VTHET, H0, P0)
     END IF
 
   END SUBROUTINE loadDavidson
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Computes the velocities for a cloud of electrons trapped in a magnetic mirror according to Davidsons equilibrium 
 !> p117 of physics of non-neutral plasma book. This equilibrium assume mono energy and mono canonical angular momentum
 !
 !> @param [inout] p particle memory to load into
 !> @param [inout] VR array of radial velocity for the particles
 !> @param [inout] VTHET array of azimuthal velocity for the particles
 !> @param [inout] VZ array of axial velocity for the particles
 !> @param [in] H0 Total energy of each particle
 !> @param [in] P0 Initial canonical angular momentum of each particle
 !---------------------------------------------------------------------------
   SUBROUTINE loadDavidsonVelocities(p, VR,VZ,VTHET, H0, P0)
     USE constants, ONLY: me, kb, elchar
     USE basic, ONLY: rnorm, Rcurv, B0, width, vnorm, zgrid, nz
     TYPE(particles), INTENT(INOUT):: p
     REAL(kind=db), INTENT(INOUT)::VZ(:), VR(:), VTHET(:)
     REAL(kind=db), INTENT(IN):: H0, P0 
     REAL(kind=db) :: athetpos, rg, zg, halfLz, Mirrorratio, Pcomp, Acomp
     INTEGER :: i
 
     MirrorRatio=(Rcurv-1)/(Rcurv+1)
     halfLz=(zgrid(nz)+zgrid(0))/2
 
     ! Load velocities theta velocity
     ! Loading of r and z velocity is done in adapt_vinit to have
     ! access to parts%pot
       DO i=1,p%Nploc
     ! Interpolation for Magnetic potential
        rg=p%pos(1,i)*rnorm
        zg=(p%pos(3,i)-halfLz)*rnorm
 
        Athetpos=0.5*B0*(rg - width/pi*MirrorRatio*bessi1(2*pi*rg/width)*COS(2*pi*zg/width))
        Pcomp=P0/rg/p%m
        Acomp=-p%qmRatio*Athetpos
        VTHET(i)=SIGN(MIN(abs(Pcomp+Acomp),sqrt(2*H0/p%m)),Pcomp+Acomp)
        !VTHET(i)=Pcomp+Acomp
       END DO
       VTHET=VTHET/vnorm
       VZ=0._db
       VR=0._db
       p%Davidson=.true.
       p%H0=H0
       p%P0=P0
   END SUBROUTINE loadDavidsonvelocities
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Computes the particles velocities according to a maxwellian distribution of temperature temperature [K] 
 !
 !> @param [inout] p particle memory to load into
 !> @param [inout] VR array of radial velocity for the particles
 !> @param [inout] VTHET array of azimuthal velocity for the particles
 !> @param [inout] VZ array of axial velocity for the particles
 !> @param [in] temperature temperature in [k] of the distribution function
 !---------------------------------------------------------------------------
   SUBROUTINE loadGaussianVelocities(p, VR,VZ,VTHET, temperature)
     USE basic, ONLY: vnorm
     USE constants, ONLY: kb
     REAL(kind=db), INTENT(inout) ::VZ(:), VR(:), VTHET(:)
     TYPE(particles), INTENT(INOUT):: p
     REAL(kind=db), INTENT(IN):: temperature
     REAL(kind=db):: vth
 
     ! Initial velocities distribution
       vth=sqrt(2.0/3.0*kb*temperature/p%m)/vnorm        !thermal velocity
       CALL lodgaus(3,VZ)
       CALL lodgaus(5,VR)
       CALL lodgaus(7,VTHET)
       VZ=VZ*vth
       VR=VR*vth
       VTHET=VTHET*vth
       p%temperature=temperature
       p%Davidson=.false.
   END SUBROUTINE loadGaussianVelocities
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Computes the particles velocities with a uniform distribution centered in meanv and limited by meanv+spanv and meanv-spanv 
 !
 !> @param [inout] p particle memory to load into
 !> @param [inout] VR array of radial velocity for the particles
 !> @param [inout] VTHET array of azimuthal velocity for the particles
 !> @param [inout] VZ array of axial velocity for the particles
 !> @param [in] meanv mean velocity in each direction [m/s]
 !> @param [in] spanv extent of the velocity in each direction above and below the mean velocity [m/s]
 !---------------------------------------------------------------------------
   SUBROUTINE loadFlatTopVelocities(p, VR,VZ,VTHET, meanv, spanv)
     USE basic, ONLY: vnorm
     USE constants, ONLY: kb
     REAL(kind=db), INTENT(inout) ::VZ(:), VR(:), VTHET(:)
     TYPE(particles), INTENT(INOUT):: p
     REAL(kind=db), INTENT(INOUT):: meanv(3), spanv(3) 
 
     ! Initial velocities distribution
       meanv=meanv/vnorm        !thermal velocity
       spanv=spanv/vnorm
       CALL loduni(3,VZ)
       CALL loduni(5,VR)
       CALL loduni(7,VTHET)
       VR=(VR*2-1)*spanv(1)+meanv(1)
       VTHET=(VTHET*2-1)*spanv(2)+meanv(2)
       VZ=(VZ*2-1)*spanv(3)+meanv(3)
       p%Davidson=.false.
   END SUBROUTINE loadFlatTopVelocities
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Load slices of particles defined by axial and radial limits 
 !
 !> @param [inout] p particle memory to load into
 !> @param [in] lodr sampling function definig the particle distribution in r
 !> @param [in] ra lower radial limit of the slice
 !> @param [in] rb upper radial limit of the slice
 !> @param [in] z array giving the axial limits of each slice (slice i is betwwen z(i-1) and z(i))
 !> @param [in] blocksize array containing the number of particles for each slice
 !---------------------------------------------------------------------------
   SUBROUTINE loadPartslices(p, lodr, ra, rb, z, blocksize)
     USE basic, ONLY: rnorm
     TYPE(particles), INTENT(INOUT):: p
     REAL(kind=db), INTENT(IN)::ra(:), rb(:), z(0:)
     INTEGER, DIMENSION(:), INTENT(IN) ::  blocksize
     procedure(rloader)::lodr
     INTEGER :: n, blockstart, blockend, nblock
       nblock=size(blocksize,1)
       blockstart=1
       blockend=0
       DO n=1,nblock
         blockstart=blockend+1
         blockend=MIN(blockstart+blocksize(n)-1,p%Nploc)
     ! Initial distribution in z with normalisation between magnetic mirrors
         CALL loduni(1, p%pos(3,blockstart:blockend))
         p%pos(3,blockstart:blockend)= (z(n-1)+p%pos(3,blockstart:blockend)*(z(n)-z(n-1)))/rnorm
         CALL lodr(2, p%pos(1,blockstart:blockend), ra(n), rb(n))
         p%pos(1,blockstart:blockend)=p%pos(1,blockstart:blockend)/rnorm
       END DO
   END SUBROUTINE loadPartslices
 
 
   !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Read a particle file format to load a simulated specie in the simulation
 !
 !---------------------------------------------------------------------------
   SUBROUTINE read_part_file(p, partfilename, VR, VZ, VTHET)
     USE basic, ONLY: lu_partfile, rnorm, vnorm
     implicit None
     TYPE(particles), INTENT(INOUT):: p
     REAL(kind=db), DIMENSION(:), ALLOCATABLE, INTENT(INOUT)::VR, VZ, VTHET
     CHARACTER(len=*)::partfilename
     INTEGER:: nblock = 0
     REAL(kind=db), Dimension(:), ALLOCATABLE:: ra, rb, z
     INTEGER, Dimension(:), ALLOCATABLE:: npartsslice
     INTEGER:: velocitytype=1  !< 1) gaussian with temp  2) Davidson with H0, P0  
     INTEGER:: radialtype=1    !< 1) 1/R  2) uniform  3) 1/R^2 4) gauss
     INTEGER:: npartsalloc     !< initial size of particles arrays
     REAL(kind=db):: mass=me ![kg]
     REAL(kind=db):: charge=-elchar ! [C]
     REAL(kind=db):: weight=1.0 
     REAL(kind=db):: qmratioscale
     REAL(kind=db):: meanv(3)          !< mean velocity in each direction for velocitytype 3 [m/s]
     REAL(kind=db):: spanv(3)          !< pos/neg extent of velocity in each direction for velocitytype 3 [m/s]
     CHARACTER(len=256) :: header=' '  !< header of csv file section
     REAL(kind=db):: H0=3.2e-14        !< Total energy [J]
     REAL(kind=db):: P0=8.66e-25       !< Canonical angula r momentum
     REAL(kind=db):: temperature=10000 !< temperature in kelvins
     real(kind=db):: n0                !< density factor
     LOGICAL :: is_test                !< Defines if particle are saved on ittracer or not 
     LOGICAL :: is_field               !< Defines if particle contributes to Poisson solver
     LOGICAL :: calc_moments           !< Defines if moments matrix must be calculated each it2d
     CHARACTER(len=16) :: partformat = 'slices'
     INTEGER:: i, ierr, openerr
 
     NAMELIST /partsload/ nblock, mass, charge, weight, npartsalloc, velocitytype, & 
              & radialtype, temperature, H0, P0, is_test, n0, partformat, meanv, spanv, &
              & calc_moments, qmratioscale, is_field
     
     ! Set defaults
     qmratioscale=1.0
     weight=1.0
     meanv=0
     spanv=0
     mass=me
     charge=-elchar
     calc_moments=.false.
     is_test=.false.
     is_field=.true.
 
     ! Open the paticle file
     OPEN(UNIT=lu_partfile,FILE=trim(partfilename),ACTION='READ',IOSTAT=openerr)
     header=' '
     IF(openerr .ne. 0) THEN
       CLOSE(unit=lu_partfile)
       RETURN
     END IF
     READ(lu_partfile,partsload)
     
     IF(mpirank .eq.0) THEN
       WRITE(*,'(a,a)')"reading partfile: ", trim(partfilename) 
       WRITE(*,partsload)
     END IF  
 
     ! The plasma cloud is defined as a set of slices 
     IF(trim(partformat).eq.'slices') THEN
       IF( nblock .ge. 1) THEN
         ALLOCATE(z(0:nblock),ra(nblock),rb(nblock), npartsslice(nblock))
         DO WHILE(header(1:8) .ne. '//slices')
           READ(lu_partfile,'(a)') header
         END DO
         DO i=1,nblock
           READ(lu_partfile,*) z(i-1),ra(i),rb(i),npartsslice(i)
         END DO
         READ(lu_partfile,*) z(nblock)
 
         CALL creat_parts(p,max(npartsalloc,sum(npartsslice)))
         p%Nploc=sum(npartsslice)
         p%Nptot=p%Nploc
         IF( allocated(VR) ) THEN
           DEALLOCATE(VR,VZ,VTHET)
         end if
         if(.not. allocated(VR)) THEN
           ALLOCATE(VR(p%Nploc))
           ALLOCATE(VZ(p%Nploc))
           ALLOCATE(VTHET(p%Nploc))
         END IF
         
         p%m=mass
         p%q=charge
         p%weight=weight
         p%qmRatio=charge/mass*qmratioscale
         p%is_test=is_test
         p%is_field=is_field
         p%calc_moments=calc_moments
         p%Newindex=sum(npartsslice)
 
         SELECT CASE(radialtype)
           CASE(1) ! 1/R distribution in R
             CALL loadPartslices(p, lodunir, ra, rb, z, npartsslice)
           CASE(2) ! flat top distribution in R
             CALL loadPartslices(p, lodlinr, ra, rb, z, npartsslice)
           CASE(3) ! 1/R^2 distribution in R
             CALL loadPartslices(p, lodinvr, ra, rb, z, npartsslice)
           CASE(4) ! gaussian distribution in R
             CALL loadPartslices(p, lodgausr, ra, rb, z, npartsslice)
           CASE DEFAULT
             IF (mpirank .eq. 0) WRITE(*,*) "Unknown type of radial distribution:", radialtype
             CALL MPI_Abort(MPI_COMM_WORLD, -1, ierr)
         END SELECT
 
         SELECT CASE(velocitytype)
         CASE(1) ! Gaussian with temperature
           CALL loadGaussianVelocities(p, VR, VZ, VTHET, temperature)
         CASE(2) ! Davidson magnetic mirror high wr equilibrium
           CALL loadDavidsonVelocities(p, VR, VZ, VTHET, H0, P0)
         CASE(3) ! flat top velocity
           CALL loadFlatTopVelocities(p, VR, VZ, VTHET, meanv, spanv)
         CASE DEFAULT
           IF (mpirank .eq. 0) WRITE(*,*) "Unknown type of velocity distribution:", velocitytype
           CALL MPI_Abort(MPI_COMM_WORLD, -1, ierr)
         END SELECT
 
       END IF
 
     END IF
 
 
     ! The plasma cloud is defined as a set individual particles
     IF( trim(partformat) .eq. 'parts' ) THEN
       IF( nblock .ge. 1) THEN
         !Allocate necessary memory
         CALL creat_parts(p,max(npartsalloc,nblock))
         IF( allocated(VR) ) THEN
           DEALLOCATE(VR,VZ,VTHET)
         end if
         if(.not. allocated(VR)) THEN
           ALLOCATE(VR(nblock))
           ALLOCATE(VZ(nblock))
           ALLOCATE(VTHET(nblock))
         END IF
 
         ! Read the particles from the file
         DO WHILE(header(1:8) .ne. '//parts')
           READ(lu_partfile,'(a)') header
         END DO
         DO i=1,nblock
           READ(lu_partfile,*) p%pos(1,i),p%pos(2,i),p%pos(3,i), VR(i), VTHET(i), VZ(i)
         END DO
 
         p%Nploc=nblock
         p%Nptot=p%Nploc
         p%m=mass
         p%q=charge
         p%Newindex=nblock
         p%weight=weight
         p%qmRatio=charge/mass*qmratioscale
         p%is_test=is_test
         p%is_field=is_field
         p%calc_moments=calc_moments
 
         !normalizations
         p%pos(1,:)=p%pos(1,:)/rnorm
         p%pos(3,:)=p%pos(3,:)/rnorm
         !p%z=p%z/rnorm
         VR=VR/vnorm
         VTHET=VTHET/vnorm
         VZ=VZ/vnorm
       END IF
 
     END IF
     CLOSE(unit=lu_partfile)
     
 
 
   END SUBROUTINE
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Increase the number of macroparticles by separating each previous macroparticles into 
 !> samplefactor new macroparticles of equally divided weight. The new sub particles are distributed 
 !> uniformly in space to maintain the density and other moments.
 !
 !> @param [in] samplefactor multiplicator of the number of macroparticles.
 !> @param [in] p particles type to increase.
 !---------------------------------------------------------------------------
 
   SUBROUTINE upsample(p, samplefactor)
     USE basic, ONLY : nplasma, dr, dz
     INTEGER, INTENT(IN) ::samplefactor
     TYPE(particles), INTENT(INOUT):: p
     INTEGER:: i, j, currentindex
     REAL(kind=db), DIMENSION(p%Nploc) :: spreaddir ! random direction for the spread of each initial macro particle
     REAL(kind=db) :: dir ! Direction in which the particle is moved
     REAL(kind=db) :: dl  ! Particle displacement used for
     ! Load and scale the direction angle for spreading the new particles
     CALL loduni(2, spreaddir)
     spreaddir=spreaddir*2*pi/samplefactor
     dl=min(minval(dz,1,dz.GT.0),minval(dr,1,dr.GT.0))/10
     DO i=1,p%Nploc
       DO j=1,samplefactor-1
         currentindex=p%Nploc+(i-1)*(samplefactor-1)+j
         CALL move_part(p,i,currentindex)
         p%partindex(currentindex)=currentindex
         dir = spreaddir(i)+2*pi*j/samplefactor
         p%pos(1,currentindex)=p%pos(1,currentindex) + dl*cos(dir)
         p%pos(3,currentindex)=p%pos(3,currentindex) + dl*sin(dir)
       END DO
       p%partindex(i)=i
       p%pos(1,i)=p%pos(1,i) + dl*cos(spreaddir(i))
       p%pos(3,i)=p%pos(3,i) + dl*sin(spreaddir(i))
     END DO
     nplasma=nplasma*samplefactor
     p%weight=p%weight/samplefactor
     p%Nploc=p%Nploc*samplefactor
     p%Nptot=p%Nptot*samplefactor
   END SUBROUTINE upsample
 
 
 ! Taken from https://rosettacode.org/wiki/Sorting_algorithms/Radix_sort#Fortran
 !	No Copyright is exerted due to considerable prior art in the Public Domain.
 !       This Fortran version by Peter Kelly ~ peter.kelly@acm.org
 !
 !	Permission is hereby granted, free of charge, to any person obtaining
 !	a copy of this software and associated documentation files (the
 !	"Software"), to deal in the Software without restriction, including
 !	without limitation the rights to use, copy, modify, merge, publish,
 !	distribute, sublicense, and/or sell copies of the Software, and to
 !	permit persons to whom the Software is furnished to do so, subject to
 !	the following conditions:
 !	The above copyright notice and this permission notice shall be
 !	included in all copies or substantial portions of the Software.
 !	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 !	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 !	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 !	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 !	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 !	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 !	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 !          
 !     Implementation of a classic Radix Sort LSD style :)
   SUBROUTINE LSDRADIXSORT(A , N)
     IMPLICIT NONE
 !
 ! Dummy arguments
 !
     INTEGER  ::  N
     INTEGER , target, DIMENSION(0:N - 1)  ::  A           ! All arrays based off zero, one day I'll fix it
     INTENT (IN) N
     INTENT (INOUT) A
 !
 ! Local variables
 ! 
     INTEGER , DIMENSION(0:9)  ::  counts
     INTEGER  ::  digitplace
     INTEGER  ::  i
     INTEGER  ::  j
     INTEGER  ::  largestnum
     INTEGER, DIMENSION(0:N - 1)  ::  results 
 ! 
     digitplace = 1                                        ! Count of the keys
     largestnum = MAXVAL(A)
 
     DO WHILE ( (largestnum/digitplace)>0 )
        counts = 0                                         ! Init the count array
       DO i = 0 , N - 1 , 1
           J = (A(i)/digitplace)
           J = MODULO(j , 10) 
           counts(j) = counts(j) + 1
       END DO
 
 !  Change count(i) so that count(i) now contains actual position of this digit in result()
 !  Working similar to the counting sort algorithm
        DO i = 1 , 9 , 1
           counts(i) = counts(i) + counts(i - 1)       ! Build up the prefix sum
        END DO
 !
        DO i = N - 1 , 0 , -1                          ! Move from left to right
           j = (A(i)/digitplace)
           j = MODULO(j, 10)
           results(counts(j) - 1) = A(i)               ! Need to subtract one as we are zero based but prefix sum is 1 based
           counts(j) = counts(j) - 1
        END DO
 !
        DO i = 0 , N - 1 , 1                           ! Copy the semi-sorted data into the input
          A(i) = results(i)
        END DO
 !
        digitplace = digitplace*10
     END DO                                             ! While loop
     RETURN
     END SUBROUTINE LSDRADIXSORT
 
 END MODULE beam
diff --git a/src/celldiag_mod.f90 b/src/celldiag_mod.f90
index 60882fe..41e2d13 100644
--- a/src/celldiag_mod.f90
+++ b/src/celldiag_mod.f90
@@ -1,184 +1,188 @@
 !------------------------------------------------------------------------------
 ! EPFL/Swiss Plasma Center
 !------------------------------------------------------------------------------
 !
 ! MODULE: celldiag
 !
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> Represent a diagnostic positioned at cell indices (rindex,zindex) that saves the individual particles
 !> position and velocity
 !------------------------------------------------------------------------------
 
 MODULE celldiag
     !
     USE constants
     use mpi
     USE mpihelper
     USE basic, ONLY: mpirank, mpisize, vnorm, rnorm, Zbounds, zgrid, &
     & nlclassical, nlmaxwellsource, phinorm, nbcelldiag
     USE beam
     USE futils
 
     IMPLICIT NONE
     
     PRIVATE 
 
     INTEGER, SAVE, ALLOCATABLE           :: specieid(:)      !< position of the specie in partslist
     INTEGER, SAVE, ALLOCATABLE           :: rindex(:)          !< radial index for the diagnostic position
     INTEGER, SAVE, ALLOCATABLE           :: zindex(:)          !< axial index for the diagnostic position
     TYPE(particles), ALLOCATABLE, SAVE   :: diagnosed_parts(:) !< Stores the particles properties at position (rindex,zindex) 
     CHARACTER(len=20), SAVE, ALLOCATABLE :: groupname(:)       !< Name of the group in the hdf5 file
     INTEGER, SAVE , ALLOCATABLE          :: h5storelength(:)   !< particles capacity of the hdf5 dataset    
 
 
     NAMELIST /celldiagparams/ specieid, rindex, zindex
 
     PUBLIC:: celldiag_init, celldiag_save
 
 contains
 
     subroutine celldiag_init(lu_in, diagfile_id)
         implicit none
         INTEGER, INTENT(IN) :: lu_in
         INTEGER, INTENT(IN):: diagfile_id
         INTEGER:: i
         
         ALLOCATE(specieid(nbcelldiag), rindex(nbcelldiag), zindex(nbcelldiag))
         ALLOCATE(diagnosed_parts(nbcelldiag), groupname(nbcelldiag), h5storelength(nbcelldiag))
 
         Rewind(lu_in)
         if(nbcelldiag .gt. 0) then
             READ(lu_in, celldiagparams)
             if(mpirank .eq. 0) WRITE(*, celldiagparams)
 
             Do i=1,nbcelldiag
                 CALL creat_parts(diagnosed_parts(i), 500)
 
                 IF(mpirank .eq. 0) THEN
 
 
                     WRITE(groupname(i),'(a,i2.2)') "/data/celldiag/",i
 
                     If(.not. isgroup(diagfile_id, "/data/celldiag/")) THEN
                         CALL creatg(diagfile_id, "/data/celldiag")
                         CALL attach(diagfile_id, "/data/celldiag", "nbcelldiag", nbcelldiag)
                     END IF
                     CALL celldiag_createh5group(diagfile_id, groupname(i), rindex(i), zindex(i), specieid(i), diagnosed_parts(i),         h5storelength(i))
 
                 END IF
             END DO
         END IF    
 
     End subroutine celldiag_init
 
     subroutine celldiag_createh5group(diagfile_id, groupname, rindex, zindex, specid, diag_parts, h5strlength)
         INTEGER, INTENT(IN):: diagfile_id, rindex, zindex, specid
         CHARACTER(len=*), INTENT(IN):: groupname
         TYPE(particles), INTENT(IN):: diag_parts
         INTEGER, INTENT(INOUT):: h5strlength
         INTEGER:: partsrank, partsdim(2)
 
         If(.not. isgroup(diagfile_id, TRIM(groupname))) CALL creatg(diagfile_id, TRIM(groupname))
             CALL attach(diagfile_id, TRIM(groupname), "rindex", rindex)
             CALL attach(diagfile_id, TRIM(groupname), "zindex", zindex)
             CALL attach(diagfile_id, trim(groupname), "q", partslist(specid)%q)
             CALL attach(diagfile_id, trim(groupname), "m", partslist(specid)%m)
             CALL attach(diagfile_id, trim(groupname), "weight", partslist(specid)%weight)
             If(.not. isdataset(diagfile_id, trim(groupname) // "/time")) CALL creatd(diagfile_id, 0, SHAPE(rindex), trim(groupname) // "/time", "time")
             If(.not. isdataset(diagfile_id, trim(groupname) // "/Nparts")) CALL creatd(diagfile_id, 0, SHAPE(rindex), trim(groupname) //"/Nparts", "number of remaining parts")
             If(.not. isdataset(diagfile_id, trim(groupname) // "/R")) CALL creatd(diagfile_id, 1,     SHAPE(diag_parts%pos(1,:)), trim(groupname) // "/R",         "radial pos") 
             If(.not. isdataset(diagfile_id, trim(groupname) // "/Z")) CALL creatd(diagfile_id, 1,     SHAPE(diag_parts%pos(1,:)), trim(groupname) // "/Z",         "axial pos")
             If(.not. isdataset(diagfile_id, trim(groupname) // "/THET")) CALL creatd(diagfile_id, 1,  SHAPE(diag_parts%pos(1,:)), trim(groupname) // "/THET",      "azimuthal pos") 
             If(.not. isdataset(diagfile_id, trim(groupname) // "/UZ")) CALL creatd(diagfile_id, 1,    SHAPE(diag_parts%pos(1,:)), trim(groupname) // "/UZ",         "axial     beta*gamma")
             If(.not. isdataset(diagfile_id, trim(groupname) // "/UR")) CALL creatd(diagfile_id, 1,    SHAPE(diag_parts%pos(1,:)), trim(groupname) // "/UR",         "radial    beta*gamma")
             If(.not. isdataset(diagfile_id, trim(groupname) // "/UTHET")) CALL creatd(diagfile_id, 1, SHAPE(diag_parts%pos(1,:)), trim(groupname) // "/UTHET",      "azimuthal beta*gamma")
             If(.not. isdataset(diagfile_id, trim(groupname) // "/pot")) CALL creatd(diagfile_id, 1,   SHAPE(diag_parts%pot), trim(groupname) // "/pot",        "electric potential")
 
             CALL getdims(diagfile_id, trim(groupname) // '/R', partsrank, partsdim)
             h5strlength=partsdim(1)
     END subroutine celldiag_createh5group
 
     subroutine celldiag_save(time, diagfile_id)
         implicit none
         REAL(kind=db), INTENT(IN) :: time
         INTEGER, INTENT(IN)       :: diagfile_id
         INTEGER                   :: Nbtosave, i
         
         ! check if source is on
         IF(.not. celldiag_on(time)) THEN
             RETURN
         END IF  
         Do i=1,nbcelldiag
             CALL celldiag_save_specie(partslist(specieid(i)),rindex(i),zindex(i),diagnosed_parts(i))
         END DO
+
+        !$OMP BARRIER
+        !$OMP MASTER
         Do i=1,nbcelldiag
             if(mpisize .gt. 1) then
                 call collectparts(diagnosed_parts(i))
             else 
                 diagnosed_parts(i)%Nptot=diagnosed_parts(i)%Nploc
             end if
             Nbtosave=min(diagnosed_parts(i)%Nptot,h5storelength(i))
             CALL celldiag_write_specie(diagfile_id, diagnosed_parts(i), groupname(i), Nbtosave, time)
         END DO
+        !$OMP END MASTER
     
     end subroutine celldiag_save
 
     SUBROUTINE celldiag_save_specie(p, rindex, zindex, savedp)
         Type(particles), INTENT(IN)    :: p
         Type(particles), INTENT(INOUT)  :: savedp
         INTEGER, INTENT(IN) :: rindex, zindex
         INTEGER:: i, destcopyindex
 
         
         
         savedp%Nploc=0
         savedp%collected=.false.
 
         IF (p%Nploc .gt. 0 .and. zindex .ge. Zbounds(mpirank) .and. zindex .lt. Zbounds(mpirank+1)) THEN
             ! Boundary condition at z direction
-            !$OMP PARALLEL DO DEFAULT(SHARED)
+            !$OMP DO
               DO i=1,p%Nploc
                 ! If the particle is in the correct cell, it is saved
                 IF (p%Zindex(i) .eq. zindex.and. p%Rindex(i) .eq. rindex ) THEN
                   !$OMP CRITICAL (diagparts)
                     savedp%Nploc=savedp%Nploc+1
                     destcopyindex= savedp%Nploc
                   !$OMP END CRITICAL (diagparts)
                   CALL copy_part(p,i,destcopyindex,savedp)
                 END IF
               END DO
-            !$OMP END PARALLEL DO
+            !$OMP END DO NOWAIT
         END IF
 
     END subroutine celldiag_save_specie
 
     SUBROUTINE celldiag_write_specie(diagfile_id, savedp, groupname, Nbtosave, time)
         Type(particles), INTENT(IN)  :: savedp
         INTEGER, INTENT(IN)          :: diagfile_id
         CHARACTER(LEN=*), INTENT(IN) :: groupname 
         INTEGER, INTENT(IN)          :: Nbtosave
         REAL(kind=db), INTENT(IN)    :: time 
             IF(mpirank .eq. 0) THEN
                 CALL append(diagfile_id, trim(groupname) // "/time", time)
                 CALL append(diagfile_id, trim(groupname) // "/Nparts", REAL(savedp%Nptot,kind=db))
                 CALL append(diagfile_id, trim(groupname) // "/R",     savedp%pos(1,1:Nbtosave)*rnorm)
                 CALL append(diagfile_id, trim(groupname) // "/Z",     savedp%pos(3,1:Nbtosave)*rnorm)
                 CALL append(diagfile_id, trim(groupname) // "/THET",  savedp%pos(2,1:Nbtosave))
                 CALL append(diagfile_id, trim(groupname) // "/UZ",    savedp%U(3,1:Nbtosave)/savedp%gamma(1:Nbtosave))
                 CALL append(diagfile_id, trim(groupname) // "/UR",    savedp%U(1,1:Nbtosave)/savedp%gamma(1:Nbtosave))
                 CALL append(diagfile_id, trim(groupname) // "/UTHET", savedp%U(2,1:Nbtosave)/savedp%gamma(1:Nbtosave))
                 CALL append(diagfile_id, trim(groupname) // "/pot",   savedp%pot(1:Nbtosave)*phinorm)
             END IF  
     END subroutine celldiag_write_specie
 
     logical function celldiag_on(time)
         REAL(kind=db), intent(in):: time 
 
         celldiag_on=.true.
         
     end function
 
 End Module celldiag
diff --git a/src/diagnose.f90 b/src/diagnose.f90
index d1fa7e9..7e9a619 100644
--- a/src/diagnose.f90
+++ b/src/diagnose.f90
@@ -1,469 +1,469 @@
 SUBROUTINE diagnose(kstep)
 !
 !   Diagnostics
 !
   USE basic
   USE futils
   USE hashtable
   Use maxwsrce
   Use neutcol
   USE beam, ONLY : partslist, epot, ekin, etot, etot0, Nplocs_all, collectparts
 #if USE_X == 1
   USE xg, ONLY : initw, updt_xg_var
 #endif
   USE fields, ONLY: phi_spline, nbmoments
   USE celldiag
   use mpi
   Use geometry
   Use splinebound
   Use weighttypes
   use psupply
   use filemanip
 
   IMPLICIT NONE
 !
   INTEGER, INTENT(in) :: kstep
 !
 !   Local vars and arrays
   INTEGER, PARAMETER :: BUFSIZE = 20
   CHARACTER(len=128) :: str, fname, grpname
   CHARACTER(len=12):: charspec
   INTEGER:: Ntotal ! Total number of simulated particles remaining in the simulation
   INTEGER:: partsrank, partsdim(2)
   INTEGER:: Nplocs_all_save(64)
   INTEGER:: i, nbbounds
   INTEGER, allocatable, save:: partnbBuffer(:,:)
   REAL(kind=db), ALLOCATABLE :: magr(:), magz(:)
   REAL(kind=db), ALLOCATABLE :: tempBr(:, :), tempBz(:, :), tempAthet(:, :)
   INTEGER :: magn(2), magrank, magfid
 
 !________________________________________________________________________________
 !                   1.   Initial diagnostics
   IF( kstep .EQ. 0 .and. mpirank .eq. 0) THEN
 
      ! Only process 0 should save on file
 !
      WRITE(*,'(a)') '   Initial diagnostics'
 !
 !                       1.1   Initial run or when NEWRES set to .TRUE.
 !
      IF( .NOT. nlres .OR. newres) THEN
         CALL creatf(resfile, fidres, 'FENNECS result file', real_prec='d')
         WRITE(*,'(3x,a,a)') TRIM(resfile), ' created'
 !
 !  Label the run
         IF( LEN_TRIM(label1).GT.0 ) CALL attach(fidres, "/", "label1", TRIM(label1))
         IF( LEN_TRIM(label2).GT.0 ) CALL attach(fidres, "/", "label2", TRIM(label2))
         IF( LEN_TRIM(label3).GT.0 ) CALL attach(fidres, "/", "label3", TRIM(label3))
         IF( LEN_TRIM(label4).GT.0 ) CALL attach(fidres, "/", "label4", TRIM(label4))
 !
 !  Job number
         jobnum = 0
 !
 !  Data group
         CALL creatg(fidres, "/data", "data")
         CALL creatg(fidres, "/data/var0d", "0d history arrays")
         CALL creatg(fidres, "/data/var1d","1d history arrays")
         CALL creatg(fidres, "/data/part", "part phase space")
         CALL creatg(fidres, "/data/fields", "Electro static potential and Er Ez fields")
 !
 !  File group
         CALL creatg(fidres, "/files", "files")
         CALL attach(fidres, "/files",  "jobnum", jobnum)
         CALL putarr(fidres, "/data/var1d/rgrid", rgrid)
         CALL putarr(fidres, "/data/var1d/zgrid", zgrid)
         CALL creatd(fidres, 1, (/ 64 /), "/data/var0d/Nplocs_all", "Nplocs_all")
         CALL creatd(fidres, 1, (/3/), "/data/var0d/nudcol", "nudcol")
 
 ! Part and fields vectors
         !  Initialize time-dependent particle variables
         CALL creatd(fidres, 1, SHAPE(partslist(1)%pos(1,:)), "/data/part/R", "R",compress=.true.,chunking=SHAPE(partslist(1)%pos(1,:)))
         CALL creatd(fidres, 1, SHAPE(partslist(1)%pos(3,:)), "/data/part/Z", "Z",compress=.true.,chunking=SHAPE(partslist(1)%pos(1,:)))
         CALL creatd(fidres, 1, SHAPE(partslist(1)%pos(2,:)), "/data/part/THET", "THET",compress=.true.,chunking=SHAPE(partslist(1)%pos(1,:)))
         CALL creatd(fidres, 1, SHAPE(partslist(1)%U(1,:)), "/data/part/UR", "UR",compress=.true.,chunking=SHAPE(partslist(1)%pos(1,:)))
         CALL creatd(fidres, 1, SHAPE(partslist(1)%U(3,:)), "/data/part/UZ", "UZ",compress=.true.,chunking=SHAPE(partslist(1)%pos(1,:)))
         CALL creatd(fidres, 1, SHAPE(partslist(1)%U(2,:)), "/data/part/UTHET", "UTHET",compress=.true.,chunking=SHAPE(partslist(1)%pos(1,:)))
         CALL creatd(fidres, 1, SHAPE(partslist(1)%Rindex), "/data/part/Rindex", "Rindex",compress=.true.,chunking=SHAPE(partslist(1)%Rindex))
         CALL creatd(fidres, 1, SHAPE(partslist(1)%Zindex), "/data/part/Zindex", "Zindex",compress=.true.,chunking=SHAPE(partslist(1)%zindex))
         CALL creatd(fidres, 1, SHAPE(partslist(1)%partindex), "/data/part/partindex", "partindex",compress=.true.,chunking=SHAPE(partslist(1)%partindex))
         CALL creatd(fidres, 1, SHAPE(partslist(1)%pot), "/data/part/pot", "pot",compress=.true.,chunking=SHAPE(partslist(1)%pot))
         CALL creatd(fidres, 0, SHAPE(time), "/data/part/time", "time" )
         CALL creatd(fidres, 0, SHAPE(Ntotal), "/data/part/Nparts",  "number of remaining parts in the simulation space")
         CALL creatd(fidres, 1, (/7/), "/data/part/nbchange",  "number of added parts, lost parts zm,zp,rm,rp, and collisions per type io, ela")
         CALL attach(fidres,'/data/part', "q", partslist(1)%q)
         CALL attach(fidres,'/data/part', "m", partslist(1)%m)
         CALL attach(fidres,'/data/part', "weight", partslist(1)%weight)
 
         CALL creatd(fidres, 1, SHAPE(pot), "/data/fields/pot", "pot")
         CALL creatd(fidres, 1, SHAPE(Er), "/data/fields/Er", "Er")
         CALL creatd(fidres, 1, SHAPE(Ez), "/data/fields/Ez", "Ez")
         CALL creatd(fidres, 1, SHAPE(phi_spline), "/data/fields/phi", "spline form of Phi")
         CALL creatd(fidres, 2, (/nbmoments,nrank(1)*nrank(2)/), "/data/fields/moments", "moments",compress=.true.,chunking=(/1,nrank(1)*nrank(2)/))
         !CALL creatd(fidres, 2, SHAPE(moments), "/data/fields/moments", "moments")
         CALL creatd(fidres, 0, SHAPE(time), "/data/fields/time", "time" )
         CALL putarr(fidres, "/data/fields/Br", Br)
         CALL putarr(fidres, "/data/fields/Bz", Bz)
         CALL putarr(fidres, "/data/fields/Athet", Athet)
         CALL putarr(fidres, "/data/fields/volume", Volume)
 
         ! We save the magnetic field as defined by the h5 file
         if(len_trim(magnetfile) .gt. 1) then
              CALL openf(trim(magnetfile), magfid, 'r', real_prec='d')
         
              CALL getdims(magfid, '/mag/Athet', magrank, magn)
         
              ALLOCATE (magr(magn(2)), magz(magn(1)))
              ALLOCATE (tempAthet(magn(1), magn(2)), tempBr(magn(1), magn(2)), tempBz(magn(1), magn(2)))
         
              ! Read r and z coordinates for the definition of A_\thet, and B
              CALL getarr(magfid, '/mag/r', magr)
              CALL getarr(magfid, '/mag/z', magz)
              CALL getarr(magfid, '/mag/Athet', tempAthet)
         
              IF (isdataset(magfid, '/mag/Br') .and. isdataset(magfid, '/mag/Bz')) THEN
                 CALL getarr(magfid, '/mag/Br', tempBr)
                 CALL getarr(magfid, '/mag/Bz', tempBz)
              end if
         
              CALL creatg(fidres, '/data/inputmag')
              CALL putarr(fidres, '/data/inputmag/r',magr)
              CALL putarr(fidres, '/data/inputmag/z',magz)
              CALL putarr(fidres, '/data/inputmag/Athet',tempAthet)
              CALL putarr(fidres, '/data/inputmag/Br',tempBr)
              CALL putarr(fidres, '/data/inputmag/Bz',tempBz)
         
         
              call closef(magfid)
           end if
 !
 !                       1.2   Restart run
 !
      ELSE
         CALL cp2bk(resfile)    ! backup previous result file
         CALL openf(resfile, fidres, real_prec='d')
         WRITE(*,'(3x,a,a)') TRIM(resfile), ' open'
         CALL getatt(fidres, "/files",  "jobnum", jobnum)
         jobnum = jobnum+1
         WRITE(*,'(3x,a,i3)') "Current Job Number =", jobnum
         CALL attach(fidres, "/files",  "jobnum", jobnum)
         !allocate(partnbBuffer(nbspecies,4+size(partslist(1)%nblost,1)))
         !partnbBuffer=0
      END IF
 !
 !  Add input namelist variables as attributes of /data/input
      WRITE(str,'(a,i2.2)') "/data/input.",jobnum
      CALL creatg(fidres, TRIM(str))
      CALL attach(fidres, TRIM(str), "job_time", job_time)
      CALL attach(fidres, TRIM(str), "extra_time", extra_time)
      CALL attach(fidres, TRIM(str), "dt", dt*tnorm)
      CALL attach(fidres, TRIM(str), "tmax", tmax)
      CALL attach(fidres, TRIM(str), "nrun", nrun)
      CALL attach(fidres, TRIM(str), "nlres", nlres)
      CALL attach(fidres, TRIM(str), "nlsave", nlsave)
      CALL attach(fidres, TRIM(str), "newres", newres)
      CALL attach(fidres, TRIM(str), "nz", nz)
      CALL attach(fidres, TRIM(str), "nr", nr)
      CALL putarr(fidres, TRIM(str)//"/lz", lz)
      CALL attach(fidres, TRIM(str), "nplasma", nplasma)
      CALL attach(fidres, TRIM(str), "potinn", potinn)
      CALL attach(fidres, TRIM(str), "potout", potout)
      CALL attach(fidres, TRIM(str), "B0", B0)
      CALL attach(fidres, TRIM(str), "Rcurv", Rcurv)
      CALL attach(fidres, TRIM(str), "width", width)
      CALL attach(fidres, TRIM(str), "n0", n0)
      CALL attach(fidres, TRIM(str), "temp", partslist(1)%temperature)
      CALL attach(fidres, TRIM(str), "it0d", it0d)
      CALL attach(fidres, TRIM(str), "it2d", it2d)
      CALL attach(fidres, TRIM(str), "itparts", itparts)
      CALL attach(fidres, TRIM(str), "nlclassical", nlclassical)
      CALL attach(fidres, TRIM(str), "nlPhis", nlPhis)
      CALL attach(fidres, TRIM(str), "qsim", partslist(1)%q*partslist(1)%weight)
      CALL attach(fidres, TRIM(str), "msim", partslist(1)%m*partslist(1)%weight)
      CALL attach(fidres, TRIM(str), "startstep", cstep)
      CALL attach(fidres, TRIM(str), "H0", partslist(1)%H0)
      CALL attach(fidres, TRIM(str), "P0", partslist(1)%P0)
      CALL putarr(fidres, TRIM(str)//"/femorder", femorder)
      CALL putarr(fidres, TRIM(str)//"/ngauss", ngauss)
      CALL putarr(fidres, TRIM(str)//"/nnr", nnr)
      CALL putarr(fidres, TRIM(str)//"/nnz", nnz)
      CALL putarr(fidres, TRIM(str)//"/radii", radii)
      CALL putarr(fidres, TRIM(str)//"/plasmadim", plasmadim)
      CALL attach(fidres, TRIM(str), "rawparts", .true.)
      CALL attach(fidres, TRIM(str), "nbspecies", nbspecies)
      CALL putarr(fidres, TRIM(str)//"/potxt", potxt)
      CALL putarr(fidres, TRIM(str)//"/Erxt",  Erxt)
      CALL putarr(fidres, TRIM(str)//"/Ezxt",  Ezxt)
 
 !  Save geometry parameters for non conforming boundary conditions
      Call geom_diag(fidres,str,rnorm)
 
 !  Save geometry parameters for non conforming boundary conditions using b-spline curves
      call splinebound_diag(fidres, str, the_domain)
 
 !  Save Maxwellsource parameters for the ad-hoc source
      Call maxwsrce_diag(fidres,str,vnorm)
 
 !  Save neutcol parameters for the electron collisions with neutrals
      Call neutcol_diag(fidres,str,vnorm)
 
      if(.not. isdataset(fidres,'/data/var0d/nudcol'))then
            CALL creatd(fidres, 1, (/3/), "/data/var0d/nudcol", "nudcol")
      end if
 
 !  Save psupply parameters for the simulation of realistic power supplies
      Call psupply_diag(fidres,str)
 
      if(.not. isdataset(fidres,'/data/var0d/biases'))then
           nbbounds=2
           if(the_domain%nbsplines .gt. 0)  nbbounds=the_domain%nbsplines
           CALL creatd(fidres, 1, (/nbbounds/), "/data/var0d/biases", "biases")
      end if
 
 !  Save STDIN of this run
      WRITE(str,'(a,i2.2)') "/files/STDIN.",jobnum
      INQUIRE(unit=lu_in, name=fname)
      CALL putfile(fidres, TRIM(str), TRIM(fname))
 
 ! Prepare hdf5 file for storing test particles
      DO i=2,nbspecies
           WRITE(grpname,'(a,i2)')'/data/part/',i
           call create_parts_group(partslist(i),trim(grpname),time)
      END DO
      CALL attach(fidres, "/data/part", "nbspecies", nbspecies)
 
 !
 !  Initialize buffers for 0d history arrays
      CALL htable_init(hbuf0, BUFSIZE)
      CALL set_htable_fileid(hbuf0, fidres, "/data/var0d")
 !
 !  Initialize Xgrafix
 #if USE_X == 1
      IF(nlxg) THEN
           CALL initw
      END IF
 #endif
   END IF
 
   IF(kstep .EQ. 0) THEN 
      ! Initialize particle cell diagnostic
      CALL celldiag_init(lu_in, fidres)
      CLOSE(lu_in)
 
      allocate(partnbBuffer(nbspecies,4+size(partslist(1)%nblost,1)))
      partnbBuffer=0
   END IF
   
 !________________________________________________________________________________
 !                   2.   Periodic diagnostics
   IF( kstep .NE. -1) THEN
 
      IF(modulo(step,ittracer) .eq. 0 .or. nlend) THEN
           ! We gather the traced particles on the mpi host
              DO i=1,nbspecies
                   IF(partslist(i)%is_test) CALL collectparts(partslist(i))
              END DO
      END IF
 
      IF(modulo(step,itrestart) .eq. 0 .or. modulo(step,itparts) .eq. 0 .or. nlend) THEN
           ! We gather the traced particles on the mpi host
              DO i=1,nbspecies
                   CALL collectparts(partslist(i))
              END DO
      END IF
 
      do i=1,nbspecies
           partnbBuffer(i,1)=partnbBuffer(i,1)+partslist(i)%nbadded
           partnbBuffer(i,2:3)=partnbBuffer(i,2:3)+partslist(i)%nbcolls
           partnbBuffer(i,4)=partslist(i)%Nploc
           partnbBuffer(i,5:)=partnbBuffer(i,5:)+partslist(i)%nblost
           partslist(i)%nbadded=0
           partslist(i)%nblost=0
           partslist(i)%nbcolls=0
      end do
 
      IF(modulo(step,ittext) .eq. 0 .or. nlend) THEN
          ! We gather the number of gained and lost particles on the mpi host
                IF(mpirank .eq.0 ) THEN
                     CALL MPI_REDUCE(MPI_IN_PLACE, partnbBuffer, nbspecies*(4+size(partslist(1)%nblost,1)), MPI_INTEGER, MPI_SUM, &
                     & 0, MPI_COMM_WORLD, ierr)
                 ELSE
                     CALL MPI_REDUCE(partnbBuffer, partnbBuffer, nbspecies*(4+size(partslist(1)%nblost,1)), MPI_INTEGER, MPI_SUM, &
                     & 0, MPI_COMM_WORLD, ierr)
                     partnbBuffer=0
                 END IF
                 
      end if
    !
         
      ! Only process 0 should save on file
      IF(mpirank .ne. 0) RETURN
 !
      IF (mpisize .gt. 1) THEN
        partslist(1)%Nptot=sum(Nplocs_all)
      END IF
 !
      IF(modulo(step,ittext).eq. 0 .or. nlend) THEN
           WRITE(*,'(a,1x,i8.8,a1,i8.8,20x,a,1pe10.3)') '*** Timestep (this run/total) =', &
           &  step, '/', cstep, 'Time =', time
           if( abs(etot).gt. 0) then
                WRITE(*,'(a,6(1pe12.4),1x,i8.8,a1,i8.8)') 'Epot, Ekin, Etot, Etot0, Eerr, Eerr rel, Nbparts/Ntotal', epot, ekin, etot, etot0, etot-etot0,(etot-etot0)/etot, partslist(1)%Nptot,'/',nplasma
           else
                WRITE(*,'(a,4(1pe12.4),1x,i8.8,a1,i8.8)') 'Epot, Ekin, Etot, Eerr, Nbparts/Ntotal', epot, ekin, etot, etot-etot0, partslist(1)%Nptot,'/',nplasma
           end if
           IF(mpisize .gt. 1 ) then
                WRITE(*,'(a,64i10.7)') 'Nbparts per proc', Nplocs_all
           end if
           Write(*,'(a)')"speci,  added,  iocoll, elacoll, tot var,     tot,  Losses (zmin zmax rmin rmax boundaries(i))"
           write(charspec,'(a,i02,a)') '(i04,',size(partnbBuffer,2)+1,'i9.7)'
+          
           do i=1,nbspecies
                WRITE(*,charspec) i, partnbBuffer(i,1),partnbBuffer(i,2:3), partnbBuffer(i,1)-sum(partnbBuffer(i,5:)), partnbBuffer(i,4),-partnbBuffer(i,5:) 
                partslist(i)%nptot= partnbBuffer(i,4)
           end do          
           partnbBuffer=0
      END IF
 !________________________________________________________________________________
 !
 !                       2.1   0d history arrays
 !    
      ! if we do a restart, we don't want to save the same data twice
      IF( kstep .eq. 0 .and. nlres .and. (.not. newres)) return
      
      IF(modulo(step,it0d).eq. 0 .or. nlend) THEN
         CALL add_record(hbuf0, "time", "simulation time", time)
         CALL add_record(hbuf0, "epot", "potential energy", epot)
         CALL add_record(hbuf0, "ekin", "kinetic energy", ekin)
         CALL add_record(hbuf0, "etot", "total energy", etot)
         CALL add_record(hbuf0, "etot0", "theoretical total energy", etot0)
         CALL add_record(hbuf0, "nbparts", "number of remaining parts in the simulation space", REAL(partslist(1)%Nptot,kind=db))
         CALL add_record(hbuf0,"current", "unscaled current flowing between the electrodes of the power supplies [A]", the_ps%current(1)*qnorm/tnorm)
         CALL htable_endstep(hbuf0)
         Nplocs_all_save=0
         Nplocs_all_save(1:mpisize)=Nplocs_all(0:mpisize-1)
         CALL append(fidres, "/data/var0d/Nplocs_all", REAL(Nplocs_all_save,kind=db))
         CALL append(fidres, "/data/var0d/nudcol", partslist(1)%nudcol/(dt*tnorm))
         CALL append(fidres, "/data/var0d/biases", the_ps%biases*phinorm)
-
      END IF
 !
 !                       2.2   2d profiles
      IF(modulo(step,it2d).eq. 0 .or. nlend) THEN
         CALL append(fidres, "/data/fields/time", time)
         CALL append(fidres, "/data/fields/pot", pot*phinorm)
         CALL append(fidres, "/data/fields/Er", Er*enorm)
         CALL append(fidres, "/data/fields/Ez", Ez*enorm)
         CALL append(fidres, "/data/fields/phi", phi_spline*phinorm)
         CALL append(fidres, "/data/fields/moments", partslist(1)%moments)
         DO i=2,nbspecies
           IF ( .not. partslist(i)%calc_moments) CYCLE
           WRITE(grpname,'(a,i2,a)')'/data/part/',i,'/'
           CALL append(fidres, trim(grpname) // "moments", partslist(i)%moments)
         end DO
      END IF
 !
 !                       2.3   main specie quantities
      IF(modulo(step,itparts).eq. 0 .or. nlend) THEN
         !PRINT*, 'write particles to file_____________________'
         CALL append(fidres, "/data/part/time", time)
         CALL append(fidres, "/data/part/Nparts", REAL(partslist(1)%Nptot,kind=db))
         !CALL append(fidres, "/data/part/nbchange", REAL((/partslist(1)%nbadded,partslist(1)%nblost,partslist(1)%nbcolls/),kind=db))
 
         IF ( isdataset(fidres,'/data/part/R') ) THEN
           CALL getdims(fidres, '/data/part/R', partsrank, partsdim)
           partsdim(1)=min(size(partslist(1)%pos,2), partsdim(1))
           CALL append(fidres, "/data/part/R", partslist(1)%pos(1,1:partsdim(1))*rnorm)
           CALL append(fidres, "/data/part/Z", partslist(1)%pos(3,1:partsdim(1))*rnorm)
           CALL append(fidres, "/data/part/THET", partslist(1)%pos(2,1:partsdim(1)))
           CALL append(fidres, "/data/part/UZ", 0.5*(partslist(1)%U(3,1:partsdim(1))/partslist(1)%gamma(1:partsdim(1))+partslist(1)%Uold(3,1:partsdim(1))/partslist(1)%gammaold(1:partsdim(1))))
           CALL append(fidres, "/data/part/UR", 0.5*(partslist(1)%U(1,1:partsdim(1))/partslist(1)%gamma(1:partsdim(1))+partslist(1)%Uold(1,1:partsdim(1))/partslist(1)%gammaold(1:partsdim(1))))
           CALL append(fidres, "/data/part/UTHET", 0.5*(partslist(1)%U(2,1:partsdim(1))/partslist(1)%gamma(1:partsdim(1))+partslist(1)%Uold(2,1:partsdim(1))/partslist(1)%gammaold(1:partsdim(1))))
           CALL append(fidres, "/data/part/pot", partslist(1)%pot(1:partsdim(1))*phinorm)
           CALL append(fidres, "/data/part/Rindex", REAL(partslist(1)%Rindex(1:partsdim(1)),kind=db))
           CALL append(fidres, "/data/part/Zindex", REAL(partslist(1)%Zindex(1:partsdim(1)),kind=db))
           CALL append(fidres, "/data/part/partindex", REAL(partslist(1)%partindex(1:partsdim(1)),kind=db))
         END IF
      END IF
 !
 !                       2.4 Tracer quantities
      IF(modulo(step,ittracer).eq. 0 .or. nlend) THEN
        !PRINT*, 'write particles to file_____________________'
        DO i=2,nbspecies
             IF ( .not. partslist(i)%is_test) CYCLE
             WRITE(grpname,'(a,i2,a)')'/data/part/',i,'/'
             CALL append(fidres, trim(grpname) // "time", time)
             CALL append(fidres, trim(grpname) //"Nparts", REAL(partslist(i)%Nptot,kind=db))
             !CALL append(fidres, trim(grpname) //"nbchange", REAL((/partslist(i)%nbadded,partslist(i)%nblost,partslist(i)%nbcolls/),kind=db))
             IF ( isdataset(fidres,trim(grpname)//'R') ) THEN
               CALL getdims(fidres, trim(grpname) // 'R', partsrank, partsdim)
               partsdim(1)=min(size(partslist(i)%pos,2), partsdim(1))
               CALL append(fidres, trim(grpname) // "R",              partslist(i)%pos(1,1:partsdim(1))*rnorm)
               CALL append(fidres, trim(grpname) // "Z",              partslist(i)%pos(3,1:partsdim(1))*rnorm)
               CALL append(fidres, trim(grpname) // "THET",           partslist(i)%pos(2,1:partsdim(1)))
               CALL append(fidres, trim(grpname) // "UZ",             0.5*(partslist(i)%U(3,1:partsdim(1))/partslist(i)%gamma(1:partsdim(1)) + partslist(i)%Uold(3,1:partsdim(1))/partslist(i)%gammaold(1:partsdim(1))))
               CALL append(fidres, trim(grpname) // "UR",             0.5*(partslist(i)%U(1,1:partsdim(1))/partslist(i)%gamma(1:partsdim(1)) + partslist(i)%Uold(1,1:partsdim(1))/partslist(i)%gammaold(1:partsdim(1))))
               CALL append(fidres, trim(grpname) // "UTHET",          0.5*(partslist(i)%U(2,1:partsdim(1))/partslist(i)%gamma(1:partsdim(1)) + partslist(i)%Uold(2,1:partsdim(1))/partslist(i)%gammaold(1:partsdim(1))))
               CALL append(fidres, trim(grpname) // "pot",            partslist(i)%pot(1:partsdim(1))*phinorm)
               CALL append(fidres, trim(grpname) // "Rindex",    REAL(partslist(i)%Rindex(1:partsdim(1)),kind=db))
               CALL append(fidres, trim(grpname) // "Zindex",    REAL(partslist(i)%Zindex(1:partsdim(1)),kind=db))
               CALL append(fidres, trim(grpname) // "partindex", REAL(partslist(i)%partindex(1:partsdim(1)),kind=db))
               
             END IF
        END DO
     !
      END IF
 
 !                       2.5   3d profiles
 !
 !
 !                       2.6 Xgrafix
 !
 #if USE_X == 1
      IF(nlxg .AND. modulo(kstep,itgraph) .eq. 0) THEN
           call xgevent
           CALL updt_xg_var
           CALL xgupdate
      END IF
 #endif
 !________________________________________________________________________________
 !                   3.   Final diagnostics
   ELSE
 ! Only process 0 should save on file
      IF(mpirank .ne. 0) RETURN
 !
 !   Flush 0d history array buffers
      CALL htable_hdf5_flush(hbuf0)
 !
 !   Close all diagnostic files
      CALL closef(fidres)
 !________________________________________________________________________________
   END IF
 !
 
 CONTAINS
 
      SUBROUTINE create_parts_group(p,grpname, time)
           USE beam,ONLY: particles
           type(particles):: p
           real(kind=db):: time
           character(len=*):: grpname
           If(isgroup(fidres, trim(grpname))) return
           CALL creatg(fidres, grpname, "specific specie phase space")
           CALL creatd(fidres, 0, SHAPE(time), trim(grpname) // "/time", "time")
           CALL creatd(fidres, 0, SHAPE(time), trim(grpname) //"/Nparts", "number of remaining parts")
           CALL creatd(fidres, 1, SHAPE(p%pot), trim(grpname) // "/R",         "radial pos") 
           CALL creatd(fidres, 1, SHAPE(p%pot), trim(grpname) // "/Z",         "axial pos")
           CALL creatd(fidres, 1, SHAPE(p%pot), trim(grpname) // "/THET",      "azimuthal pos") 
           CALL creatd(fidres, 1, SHAPE(p%pot), trim(grpname) // "/UZ",         "axial     beta*gamma")
           CALL creatd(fidres, 1, SHAPE(p%pot), trim(grpname) // "/UR",         "radial    beta*gamma")
           CALL creatd(fidres, 1, SHAPE(p%pot), trim(grpname) // "/UTHET",      "azimuthal beta*gamma")
           CALL creatd(fidres, 1, SHAPE(p%pot), trim(grpname) // "/pot",        "electric potential")
           CALL creatd(fidres, 1, SHAPE(p%pot), trim(grpname) // "/Rindex",     "radial grid index")
           CALL creatd(fidres, 1, SHAPE(p%pot), trim(grpname) // "/Zindex",     "axial grid index")
           CALL creatd(fidres, 1, SHAPE(p%pot), trim(grpname) // "/partindex",  "particle index")
           CALL creatd(fidres, 1, (/7/), trim(grpname) // "nbchange",  "number of added parts, lost parts zm,zp,rm,rp, and collisions per type io, ela") 
           CALL attach(fidres,trim(grpname), "q", p%q)
           CALL attach(fidres,trim(grpname), "m", p%m)
           CALL attach(fidres,trim(grpname), "weight", p%weight)
           CALL creatd(fidres, 2,  (/nbmoments,nrank(1)*nrank(2)/), trim(grpname) // "/moments", "moments",compress=.true.,chunking=(/1,nrank(1)*nrank(2)/))
      END SUBROUTINE create_parts_group
 
 END SUBROUTINE diagnose
diff --git a/src/fields_mod.f90 b/src/fields_mod.f90
index d8d3f52..516591e 100644
--- a/src/fields_mod.f90
+++ b/src/fields_mod.f90
@@ -1,1384 +1,1462 @@
 !------------------------------------------------------------------------------
 ! EPFL/Swiss Plasma Center
 !------------------------------------------------------------------------------
 !
 ! MODULE: beam
 !
 !> @author
 !> Patryk Kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> Module responsible for initializing the magnetic field, solving the Poisson equation and computing the moments of the particles distribution function
 !------------------------------------------------------------------------------
 MODULE fields
    USE constants
    USE basic, ONLY: nr, nz, zgrid, rgrid, Br, Bz, Er, Ez, femorder, ngauss, nlppform, pot, Athet, &
    & splrz, splrz_ext, nlperiod, phinorm, nlPhis, nrank, mpirank, mpisize, step, it2d, timera, potxt, erxt, ezxt
    USE beam, ONLY: partslist
    USE bsplines
    USE mumps_bsplines
    use mpi
    Use omp_lib
    Use mpihelper, ONLY: db_type
    USE particletypes
    IMPLICIT NONE
 
    REAL(kind=db), allocatable, SAVE :: matcoef(:, :), phi_spline(:), vec1(:), vec2(:)
-   REAL(kind=db), allocatable, SAVE :: loc_moments(:, :), loc_rhs(:), gradgtilde(:), fverif(:)
+   REAL(kind=db), allocatable, SAVE :: loc_moments(:, :), loc_rhs(:), gradgtilde(:), fverif(:), ppformwork(:,:,:)
    INTEGER, SAVE:: loc_zspan
    TYPE(mumps_mat), SAVE :: femat          !< Finite Element Method matrix for the full domain
    TYPE(mumps_mat), SAVE :: reduccedmat    !< Finite Element Method matrix in the redduced web-spline sub-space
    !TYPE(mumps_mat), SAVE :: fematmpi       !< Finite Element Method matrix prepared for mpi parallelism
    INTEGER :: nbmoments = 10                !< number of moments to be calculated and stored
    INTEGER(kind=omp_lock_kind), Allocatable:: mu_lock(:) !< Stores the lock for fields parallelism
 
 CONTAINS
 
    SUBROUTINE mag_init
       USE basic, ONLY: magnetfile, nr, nz
       USE bsplines
       USE mumps_bsplines
       USE mpihelper
       USE geometry
 
       ALLOCATE (Br((nr + 1)*(nz + 1)), Bz((nr + 1)*(nz + 1)))
       ALLOCATE (Athet((nr + 1)*(nz + 1)))
 
       ! Calculate magnetic field mirror components in grid points (Davidson analytical formula employed)
       ! or load it from magnetfile if present
       CALL magnet(magnetfile)
 
    end subroutine mag_init
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Set-up the necessary variables for solving Poisson and computes the magnetic field on the grid
 !
 !---------------------------------------------------------------------------
    SUBROUTINE fields_init
       USE basic, ONLY: pot, nlperiod, nrank, rhs, volume, rgrid
       USE bsplines
       USE geometry
       USE mumps_bsplines
       USE mpihelper
-      INTEGER :: nrz(2), i
+      INTEGER :: nrz(2), i, d2, k1, n1
 
       ! Auxiliary vectors
       ALLOCATE(vec1((nz+1)*(nr+1)),vec2((nr+1)*(nz+1)))
       DO i=0,nr
          vec1(i*(nz+1)+1:(i+1)*(nz+1))=zgrid!(0:nz)
          vec2(i*(nz+1)+1:(i+1)*(nz+1))=rgrid(i)
       END DO
 
       ! Set up 2d spline splrz used in the FEM
       CALL set_spline(femorder, ngauss, zgrid, rgrid, splrz, nlppform=nlppform, period=nlperiod)
       ! Set up 2d spline splrz_ext used in the FEM to calculate the external electric field and potential
       CALL set_spline(femorder, ngauss, zgrid, rgrid, splrz_ext, nlppform=nlppform, period=nlperiod)
 
+      !Allocate the work buffer to calculate the ppform
+      d2 = splrz%sp2%dim
+      k1 = splrz%sp1%order
+      n1 = splrz%sp1%nints
+      ALLOCATE(ppformwork(d2,k1,n1))
+
       ! Calculate dimension of splines
       nrz(1) = nz
       nrz(2) = nr
       CALL get_dim(splrz, nrank, nrz, femorder)
 
       ! Allocate necessary variables
       ALLOCATE (matcoef(nrank(1), nrank(2)))
       ALLOCATE (pot((nr + 1)*(nz + 1)))
       ALLOCATE (potxt((nr + 1)*(nz + 1)))
       ALLOCATE (Erxt((nr + 1)*(nz + 1)))
       ALLOCATE (Ezxt((nr + 1)*(nz + 1)))
       ALLOCATE (rhs(nrank(1)*nrank(2)))
       ALLOCATE (gradgtilde(nrank(1)*nrank(2)))
       gradgtilde = 0
       ALLOCATE (phi_spline(nrank(1)*nrank(2)))
       ALLOCATE (volume(nrank(1)*nrank(2)))
       volume = 0
 
       ALLOCATE (Er((nr + 1)*(nz + 1)), Ez((nr + 1)*(nz + 1)))
       ALLOCATE (mu_lock(nrank(1)*nrank(2)))
       do i = 1, nrank(1)*nrank(2)
          call omp_init_lock(mu_lock(i))
       end do
 
    end SUBROUTINE fields_init
 
 
    !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Set-up the geometry definition and read it from the standard input
 !> Precomputes the LHS matrix to solve Poisson abd the RHS effect of the dirichlet boundaries
 !
 !---------------------------------------------------------------------------
 
    SUBROUTINE fields_start
       USE geometry
       USE basic, ONLY: nrank
       implicit none
       INTEGER:: i,j, ierr
       DOUBLE PRECISION:: val
       ! set up the geometry module for setting up non-conforming boundary conditions
       call timera(0, "geom_init")
       call geom_init(splrz, vec1, vec2)
       call timera(1, "geom_init")
 
       ! Initialisation of FEM matrix
       CALL init(nrank(1)*nrank(2), 2, femat)
 
       ! Calculate and factorise FEM matrix (depends only on mesh)
       CALL fematrix(femat)
       
 
       If (walltype .lt. 0) then
          allocate (fverif(nrank(1)*nrank(2)))
          fverif = 0
       end if
       
       ! Compute the volume of the splines and gtilde for solving E using web-splines
       CALL comp_volume
+      !$OMP PARALLEL 
       Call comp_gradgtilde
+      !$OMP END PARALLEL
 
       if (nlweb) then
          ! Calculate reduced matrix for use of web splines
          call timera(0, "reduce femat")
          call Reducematrix(femat, reduccedmat)
          call timera(1, "reduce femat")   
          call factor(reduccedmat)
       else
          call factor(femat)
       end if
       
       
 
       !WRITE(*,*) "Copy and to_mat worked"
       !CALL MPI_abort(MPI_COMM_WORLD,-1,ierr)
 
       call vacuum_field
 
    END SUBROUTINE fields_start
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Recomputes the vacuum electric field
 !
 !---------------------------------------------------------------------------
    subroutine vacuum_field
       Use geometry
       USE basic, ONLY: pot, rhs
       implicit none
       ! Computes the externally imposed electric field
+      !$OMP SINGLE
       rhs = -gradgtilde
       if (walltype .lt. 0) rhs = rhs + fverif
+      !$OMP END SINGLE
+
+      !$OMP BARRIER
+
       call poisson(splrz_ext)
+
+      !$OMP BARRIER
+      !$OMP SINGLE
       rhs = 0
       potxt = pot
       erxt = Er
       Ezxt = Ez
+      !$OMP END SINGLE NOWAIT
    end subroutine
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Set-up the necessary variables for the communication of moments and rhs grid
 !
 !---------------------------------------------------------------------------
    SUBROUTINE fields_comm_init(Zbounds)
       USE basic, ONLY: nrank
       USE mpihelper
       INTEGER:: Zbounds(0:)
       loc_zspan = Zbounds(mpirank + 1) - Zbounds(mpirank) + femorder(1)
       if (allocated(loc_moments)) deallocate (loc_moments)
       ALLOCATE (loc_moments(nbmoments, loc_zspan*nrank(2)))
       if (allocated(loc_rhs)) deallocate (loc_rhs)
       ALLOCATE (loc_rhs(loc_zspan*nrank(2)))
       IF (mpisize .gt. 1) THEN
          CALL init_overlaps(nrank, femorder, Zbounds(mpirank), Zbounds(mpirank + 1), nbmoments)
       END IF
 
    END SUBROUTINE fields_comm_init
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Construct the right hand side vector used in the FEM Poisson solver
 !
 !> @param[in] plist list of the particles type storing the desired specie parameters
 !
 !---------------------------------------------------------------------------
    SUBROUTINE rhscon(plist)
       USE bsplines
       use mpi
       USE basic, ONLY: rhs
       USE beam, ONLY: particles
       USE mpihelper
       Use geometry
       type(particles), INTENT(INOUT):: plist(:)
       INTEGER:: i
 
       IF (nlphis) then ! We calculate the self-consistent field
+         !$OMP SINGLE
          loc_rhs = 0 ! Reset the moments matrix
+         !$OMP END SINGLE
 
 ! Assemble rhs for each specie
          Do i = 1, size(plist, 1)
             if (plist(i)%is_field) CALL deposit_charge(plist(i), loc_rhs)
          END Do
-
+         !$OMP BARRIER
          ! If we are using MPI parallelism, reduce the rhs on the root process
+         
          IF (mpisize .gt. 1) THEN
             CALL rhs_gather(rhs)
          ELSE
+            !$OMP SINGLE
             rhs = loc_rhs
+            !$OMP END SINGLE
          END IF
       ELSE ! We only consider the externally imposed field
+         !$OMP SINGLE
          rhs = 0
+         !$OMP END SINGLE
       END IF
-
+      !$OMP SINGLE
       IF (mpirank .eq. 0) THEN
          rhs = rhs - gradgtilde
          if (walltype .lt. 0) rhs = rhs + fverif
       END IF
-
+      !$OMP END SINGLE
    END SUBROUTINE rhscon
 
    !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Calculate the 0th 1st and 2nd order moments of the particle p and stores it in moment
 !
 !> @param[in] p the particles type storing the desired specie parameters
 !> @param[out] moment the 2d array storing the calculated moments
 !
 !---------------------------------------------------------------------------
    SUBROUTINE momentsdiag(p)
       USE bsplines
       use mpi
       USE beam, ONLY: particles
       USE mpihelper
       Use geometry
       type(particles), INTENT(INOUT):: p
       !REAL(kind=db), INTENT(INOUT):: moment(:, :)
 
+      !$OMP SINGLE
       loc_moments = 0 ! Reset the moments matrix
 ! Assemble rhs
+      !$OMP END SINGLE
+
       IF (p%Nploc .ne. 0) THEN
          CALL deposit_moments(p, loc_moments)
       END IF
 
+      !$OMP SINGLE
       if(.not. allocated(p%moments))THEN
          if(mpirank.eq.0)THEN
             Allocate(p%moments(nbmoments,nrank(1)*nrank(2)))
          else
             Allocate(p%moments(0,0))
          end if
       end if
+
+      !$OMP END SINGLE
+
 ! If we are using MPI parallelism, reduce the rhs on the root process
       IF (mpisize .gt. 1) THEN
          CALL moments_gather(p%moments)
       ELSE
+         !$OMP SINGLE
          p%moments = loc_moments
+         !$OMP END SINGLE NOWAIT
       END IF
 
    END SUBROUTINE momentsdiag
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Deposit the particles moments (n,v,v^2) from p on the grid
 !
 !> @param[in] p the particles type storing the desired specie parameters
 !> @param[in] p_loc_moments local tensor used to store the moments of the given specie
 !---------------------------------------------------------------------------
 
    SUBROUTINE deposit_moments(p, p_loc_moments)
       USE bsplines
       use mpi
       USE basic, ONLY: Zbounds
       USE beam, ONLY: particles
       USE mpihelper
       USE geometry
       USE omp_lib
 
       TYPE(particles), INTENT(IN):: p
       REAL(kind=db), DIMENSION(:, :), INTENT(INOUT):: p_loc_moments
+      REAL(kind=db), DIMENSION(:, :), Allocatable:: omp_loc_moments
       INTEGER ::irow, jcol, it, jw, mu, i, k, iend, nbunch
       INTEGER, DIMENSION(:), ALLOCATABLE::zleft, rleft
       REAL(kind=db) :: vr, vthet, vz, coeff
-      REAL(kind=db), ALLOCATABLE :: fun(:, :, :), fun2(:, :, :), wgeom(:, :)
+      REAL(kind=db), ALLOCATABLE :: fun(:, :, :), fun2(:, :, :)
       INTEGER:: num_threads
 
       num_threads = omp_get_max_threads()
       nbunch = p%Nploc/num_threads ! Particle bunch size used when calling basfun
       nbunch = max(nbunch, 1) ! Particle bunch size used when calling basfun
       nbunch = min(nbunch, 64) ! Particle bunch size used when calling basfun
 
       ALLOCATE (zleft(nbunch), rleft(nbunch))
       ALLOCATE (fun(1:femorder(1) + 1, 0:0, nbunch), fun2(1:femorder(2) + 1, 0:0, nbunch)) ! Arrays keeping values of b-splines at gauss node
-      ALLOCATE (wgeom(nbunch, 0:0))
 
 ! Assemble rhs
       IF (p%Nploc .ne. 0) THEN
-!$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(zleft, rleft, jw, it, iend, irow, jcol, mu, k, vr, vz, vthet, coeff, fun, fun2)
+!!$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(zleft, rleft, jw, it, iend, irow, jcol, mu, k, vr, vz, vthet, coeff, fun, fun2)
+         allocate(omp_loc_moments(size(p_loc_moments,1),size(p_loc_moments,2)))
+         omp_loc_moments=0
+         !$OMP DO
          DO i = 1, p%Nploc, nbunch
             ! Avoid segmentation fault by accessing non relevant data
             iend = min(i + nbunch - 1, p%Nploc)
             k = iend - i + 1
             ! Localize the particle
             !CALL locintv(splrz%sp2, p%R(i:iend), rleft(1:k))
             !CALL locintv(splrz%sp1, p%Z(i:iend), zleft(1:k))
             rleft(1:k) = p%rindex(i:iend)
             zleft(1:k) = p%zindex(i:iend)
             ! Compute the value of the splines at the particles positions
             CALL basfun(p%pos(3,i:iend), splrz%sp1, fun(:, :, 1:k), zleft(1:k) + 1)
             CALL basfun(p%pos(1,i:iend), splrz%sp2, fun2(:, :, 1:k), rleft(1:k) + 1)
-            !CALL geom_weight(p%Z(i:iend),p%R(i:iend),wgeom)
             DO k = 1, (iend - i + 1)
                DO jw = 1, (femorder(2) + 1)
                   DO it = 1, (femorder(1) + 1)
                      irow = zleft(k) + it - Zbounds(mpirank)
                      jcol = rleft(k) + jw
                      mu = irow + (jcol - 1)*(loc_zspan)
                      coeff = p%weight*fun(it, 0, k)*fun2(jw, 0, k)
                      ! Add contribution of particle nbunch to rhs grid point mu
                      vr = 0.5*(p%U(1,i + k - 1)/p%Gamma(i + k - 1) + p%Uold(1,i + k - 1)/p%Gammaold(i + k - 1))
                      vz = 0.5*(p%U(3,i + k - 1)/p%Gamma(i + k - 1) + p%Uold(3,i + k - 1)/p%Gammaold(i + k - 1))
                      vthet = 0.5*(p%U(2,i + k - 1)/p%Gamma(i + k - 1) + p%Uold(2,i + k - 1)/p%Gammaold(i + k - 1))
-                     call omp_set_lock(mu_lock(mu))
+                     !call omp_set_lock(mu_lock(mu))
                      !!$OMP ATOMIC UPDATE
-                     p_loc_moments(1, mu) = p_loc_moments(1, mu) + coeff
+                     omp_loc_moments(1, mu) = omp_loc_moments(1, mu) + coeff
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
-                     p_loc_moments(2, mu) = p_loc_moments(2, mu) + coeff*vr
+                     omp_loc_moments(2, mu) = omp_loc_moments(2, mu) + coeff*vr
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
-                     p_loc_moments(3, mu) = p_loc_moments(3, mu) + coeff*vthet
+                     omp_loc_moments(3, mu) = omp_loc_moments(3, mu) + coeff*vthet
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
-                     p_loc_moments(4, mu) = p_loc_moments(4, mu) + coeff*vz
+                     omp_loc_moments(4, mu) = omp_loc_moments(4, mu) + coeff*vz
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
-                     p_loc_moments(5, mu) = p_loc_moments(5, mu) + coeff*vr*vr
+                     omp_loc_moments(5, mu) = omp_loc_moments(5, mu) + coeff*vr*vr
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
-                     p_loc_moments(6, mu) = p_loc_moments(6, mu) + coeff*vr*vthet
+                     omp_loc_moments(6, mu) = omp_loc_moments(6, mu) + coeff*vr*vthet
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
-                     p_loc_moments(7, mu) = p_loc_moments(7, mu) + coeff*vr*vz
+                     omp_loc_moments(7, mu) = omp_loc_moments(7, mu) + coeff*vr*vz
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
-                     p_loc_moments(8, mu) = p_loc_moments(8, mu) + coeff*vthet*vthet
+                     omp_loc_moments(8, mu) = omp_loc_moments(8, mu) + coeff*vthet*vthet
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
-                     p_loc_moments(9, mu) = p_loc_moments(9, mu) + coeff*vthet*vz
+                     omp_loc_moments(9, mu) = omp_loc_moments(9, mu) + coeff*vthet*vz
                      !!$OMP END ATOMIC
                      !!$OMP ATOMIC UPDATE
-                     p_loc_moments(10, mu) = p_loc_moments(10, mu) + coeff*vz*vz
+                     omp_loc_moments(10, mu) = omp_loc_moments(10, mu) + coeff*vz*vz
                      !!$OMP END ATOMIC
-                     call omp_unset_lock(mu_lock(mu))
+                     !call omp_unset_lock(mu_lock(mu))
                   END DO
                END DO
             END DO
          END DO
-!$OMP END PARALLEL DO
+!!$OMP END PARALLEL DO
+      !$OMP END DO NOWAIT
+      !$OMP CRITICAL(loc_moments_reduce)
+      Do i=1,size(p_loc_moments,2)
+         p_loc_moments(:,i)=p_loc_moments(:,i)+omp_loc_moments(:,i)
+      end do
+      !$OMP END CRITICAL(loc_moments_reduce)
       END IF
       DEALLOCATE (fun, fun2, zleft, rleft)
 
+
    END subroutine deposit_moments
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Deposit the particles charges (q) from p on the grid
 !
 !> @param[in] p the particles type storing the desired specie parameters
 !> @param[in] p_loc_moments local tensor used to store the moments of the given specie
 !---------------------------------------------------------------------------
 
    SUBROUTINE deposit_charge(p, p_loc_moments)
       USE bsplines
       use mpi
       USE constants
       USE basic, ONLY: Zbounds, rnorm, phinorm
       USE beam, ONLY: particles
       USE mpihelper
       USE geometry
       USE omp_lib
 
       TYPE(particles), INTENT(IN):: p
       REAL(kind=db), DIMENSION(:), INTENT(INOUT):: p_loc_moments
+      REAL(kind=db), DIMENSION(:), allocatable:: omp_loc_moments
       INTEGER ::irow, jcol, it, jw, mu, i, k, iend, nbunch
       INTEGER, DIMENSION(:), ALLOCATABLE::zleft, rleft
       REAL(kind=db), ALLOCATABLE :: fun(:, :, :), fun2(:, :, :)
       INTEGER:: num_threads, curr_thread
       real(kind=db):: contrib, chargecoeff
 
       num_threads = omp_get_max_threads()
       nbunch = p%Nploc/num_threads ! Particle bunch size used when calling basfun
       nbunch = max(nbunch, 1) ! Particle bunch size used when calling basfun
       nbunch = min(nbunch, 64) ! Particle bunch size used when calling basfun
       chargecoeff = p%weight*p%q/(2*pi*eps_0*phinorm*rnorm) ! Normalized charge density simulated by each macro particle
 
 ! Assemble rhs
       IF (p%Nploc .ne. 0) THEN
-         !$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i,zleft, rleft, jw, it, iend, irow, jcol, mu, k, fun, fun2, contrib)
+         !!!$OMP PARALLEL DEFAULT(SHARED), PRIVATE(i,zleft, rleft, jw, it, iend, irow, jcol, mu, k, fun, fun2, contrib)
          ALLOCATE (zleft(nbunch), rleft(nbunch))
          ALLOCATE (fun(1:femorder(1) + 1, 0:0, nbunch), fun2(1:femorder(2) + 1, 0:0, nbunch)) ! Arrays keeping values of b-splines at gauss node
+         allocate(omp_loc_moments(size(p_loc_moments)))
+         omp_loc_moments=0
          zleft=0
          rleft=0
           curr_thread=omp_get_thread_num()
          !$OMP DO
          DO i = 1, p%Nploc, nbunch
             ! Avoid segmentation fault by accessing non relevant data
             iend = min(i + nbunch - 1, p%Nploc)
             k = iend - i + 1
             ! Localize the particle
             rleft(1:k) = p%rindex(i:iend)
             zleft(1:k) = p%zindex(i:iend)
             ! Compute the value of the splines at the particles positions
             CALL basfun(p%pos(3,i:iend), splrz%sp1, fun, zleft(1:k) + 1)
             CALL basfun(p%pos(1,i:iend), splrz%sp2, fun2, rleft(1:k) + 1)
             !CALL geom_weight(p%Z(i:iend),p%R(i:iend),wgeom)
             DO k = 1, (iend - i + 1)
                DO jw = 1, (femorder(2) + 1)
                   DO it = 1, (femorder(1) + 1)
                      irow = zleft(k) + it - Zbounds(mpirank)
                      jcol = rleft(k) + jw
                      mu = irow + (jcol - 1)*(loc_zspan)
                      ! Add contribution of particle k to rhs grid point mu
                      contrib = fun(it, 0, k)*fun2(jw, 0, k)*p%geomweight(0,i + k - 1)*chargecoeff
-                     !$OMP ATOMIC UPDATE
-                        p_loc_moments(mu) = p_loc_moments(mu) + contrib
-                     !$OMP END ATOMIC
+                     omp_loc_moments(mu) = omp_loc_moments(mu) + contrib
                   END DO
                END DO
             END DO
          END DO
-!$OMP END DO
+!$OMP END DO NOWAIT
          DEALLOCATE (fun, fun2, zleft, rleft)
-!$OMP END PARALLEL
+!!!$OMP END PARALLEL
+      !$OMP CRITICAL(loc_charge_reduce)
+      Do i=1,size(p_loc_moments)
+         !!$OMP ATOMIC
+            p_loc_moments(i)=p_loc_moments(i)+omp_loc_moments(i)
+         !!$OMP END ATOMIC
+      end do
+      !$OMP END CRITICAL(loc_charge_reduce)
       END IF
 
+
    END subroutine deposit_charge
 
    !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Do the communication of the local moment matrices between mpi workers for the overlap grid points
 !> and reduce the result on the host
 !
 !---------------------------------------------------------------------------
 
    SUBROUTINE rhs_gather(rhs)
       USE mpihelper
       USE Basic, ONLY: Zbounds, mpirank, leftproc, rightproc
       REAL(kind=db), DIMENSION(:), INTENT(INOUT):: rhs
       INTEGER:: ierr, i, j
       INTEGER:: displs(mpisize), counts(mpisize)
       INTEGER:: overlap_type
       INTEGER:: rcvoverlap_type
 
       displs = Zbounds(0:mpisize - 1)
       counts = Zbounds(1:mpisize) - Zbounds(0:mpisize - 1)
       counts(mpisize) = counts(mpisize) + femorder(1)
+      !$OMP SINGLE
       CALL rhsoverlapcomm(mpirank, leftproc, rightproc, loc_rhs, nrank, femorder, loc_zspan - femorder(1))
+      !$OMP END SINGLE
+
+
       IF (mpirank .gt. 0) THEN
-         !$OMP PARALLEL DO SIMD DEFAULT(SHARED) private(i)
+         !$OMP DO SIMD
          DO j = 1, femorder(1)
             DO i = 1, nrank(2)
                loc_rhs((i - 1)*loc_zspan + j) = loc_rhs((i - 1)*loc_zspan + j)&
                & + rhsoverlap_buffer(nrank(2)*(j - 1) + i)
             END DO
          END DO
-         !$OMP END PARALLEL DO SIMD
+         !$OMP END DO SIMD
       END IF
       ! Set communication vector type
       overlap_type = rhsoverlap_type
       rcvoverlap_type = rcvrhsoverlap_type
+
+      !$OMP SINGLE
       IF (mpirank .eq. 0) THEN
          rhs = 0
       END IF
 
       CALL MPI_GATHERV(loc_rhs, counts(mpirank + 1), overlap_type, &
       &                rhs, counts, displs, rcvoverlap_type, 0, MPI_COMM_WORLD, ierr)
+      !$OMP END SINGLE
    END SUBROUTINE rhs_gather
 
    !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Do the communication of the local moment matrices between mpi workers for the overlap grid points
 !> and reduce the result on the host
 !
 !---------------------------------------------------------------------------
 
    SUBROUTINE moments_gather(moment)
       USE mpihelper
       USE Basic, ONLY: Zbounds, mpirank, leftproc, rightproc
       REAL(kind=db), DIMENSION(:, :), INTENT(INOUT):: moment
       INTEGER:: ierr, i, j
       INTEGER:: displs(mpisize), counts(mpisize)
 
       displs = Zbounds(0:mpisize - 1)
       counts = Zbounds(1:mpisize) - Zbounds(0:mpisize - 1)
       counts(mpisize) = counts(mpisize) + femorder(1)
+      !$OMP SINGLE
       CALL momentsoverlapcomm(mpirank, leftproc, rightproc, loc_moments, nrank, femorder, loc_zspan - femorder(1))
+      !$OMP END SINGLE
+
       IF (mpirank .gt. 0) THEN
-         !$OMP PARALLEL DO SIMD DEFAULT(SHARED) private(i)
+         !!$OMP PARALLEL DO SIMD DEFAULT(SHARED) private(i)
+         !$OMP DO SIMD
          DO j = 1, femorder(1)
             DO i = 1, nrank(2)
                loc_moments(1:nbmoments, (i - 1)*loc_zspan + j) = loc_moments(1:nbmoments, (i - 1)*loc_zspan + j)&
                & + momentsoverlap_buffer(nbmoments*(nrank(2)*(j - 1) + i - 1) + 1:nbmoments*(nrank(2)*(j - 1) + i))
             END DO
          END DO
-         !$OMP END PARALLEL DO SIMD
+         !$OMP END DO SIMD
       END IF
+
+      !$OMP SINGLE
       ! Set communication vector type
       IF (mpirank .eq. 0) THEN
          moment = 0
       END IF
       CALL MPI_GATHERV(loc_moments, counts(mpirank + 1), momentsoverlap_type, &
       &                moment, counts, displs, rcvmomentsoverlap_type, 0, MPI_COMM_WORLD, ierr)
+      !$OMP END SINGLE NOWAIT
+
    END SUBROUTINE moments_gather
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Solves Poisson equation using FEM. Distributes the result on all MPI workers and interpolate the electric forces
 !> for each particle.
 !
 !---------------------------------------------------------------------------
    SUBROUTINE poisson(splinevar)
       USE basic, ONLY: rhs, nrank, pot, nlend
       USE bsplines, ONLY: spline2d, gridval
       USE mumps_bsplines, ONLY: bsolve, vmx
       USE futils
       Use geometry
       type(spline2d):: splinevar
       INTEGER:: ierr, jder(2)
       real(kind=db), allocatable::reducedrhs(:)
       real(kind=db), allocatable:: reducedsol(:), tempcol(:)
       jder(1) = 0
       jder(2) = 0
 
-
+      !$OMP SINGLE
          if (nlweb) then ! we use the web-spline reduction for stability
             allocate (reducedrhs(nrank(1)*nrank(2)))
             allocate (reducedsol(nbreducedspline))
             allocate (tempcol(nrank(1)*nrank(2)))
             if(mpirank.eq.0) then       ! Only the root process solves Poisson
                reducedrhs = vmx(etilde, rhs)
                Call bsolve(reduccedmat, reducedrhs(1:nbreducedspline), reducedsol)
             end if
             CALL MPI_Bcast(reducedsol, nbreducedspline, db_type, 0, MPI_COMM_WORLD, ierr)
     
             tempcol = 0
             tempcol(1:nbreducedspline) = reducedsol
             !phi_spline = 0
             phi_spline = vmx(etildet, tempcol)
          else
             CALL bsolve(femat, rhs, phi_spline)
             CALL MPI_Bcast(phi_spline, nrank(1)*nrank(2), db_type, 0, MPI_COMM_WORLD, ierr)
          end if
       matcoef = reshape(phi_spline, (/nrank(1), nrank(2)/))
-   
+      !$OMP END SINGLE
+
       ! update the ppform coefficients
       CALL updt_ppform2d(splinevar, matcoef)
 
+
+!$OMP BARRIER
+
+      !$OMP SINGLE
       IF (mpirank .eq. 0 .and. (modulo(step, it2d) .eq. 0 .or. nlend)) THEN
          ! On the root process, compute the electric field for diagnostic purposes
          CALL gridval(splinevar, vec1, vec2, pot, (/0, 0/))
          CALL gridval(splinevar, vec1, vec2, Ez, (/1, 0/))
          CALL gridval(splinevar, vec1, vec2, Er, (/0, 1/))
          Ez = -pot*gridwdir(1,:) - Ez*gridwdir(0,:) - gtilde(1,:)
          Er = -pot*gridwdir(2,:) - Er*gridwdir(0,:) - gtilde(2,:)
          pot = pot*gridwdir(0,:) + gtilde(0,:)
       END IF
-
+      !$OMP END SINGLE
    END SUBROUTINE poisson
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Computes the electric fields and potential at the particles position for particles
 !> between positions nstart and nend in the list
 !
 !> @param[in] p the particles type storing the desired specie parameters
 !> @param[in] nstart starting index for the particle list
 !> @param[in] nend ending index for the particle list
 !---------------------------------------------------------------------------
 
    SUBROUTINE EFieldscompatparts(p, nstart, nend)
       Use beam, ONLY: particles
       Use geometry
       Use splinebound
       TYPE(particles), INTENT(INOUT):: p
       INTEGER, OPTIONAL::nstart, nend
       INTEGER:: i, iend, nst, nnd
       INTEGER:: nbunch
       INTEGER:: num_threads
       Real(kind=db), ALLOCATABLE:: erext(:), ezext(:), gtildeloc(:, :)
 
       if (.not. present(nstart)) nst = 1
       if (.not. present(nend)) nnd = p%Nploc
-      num_threads = omp_get_max_threads()
-      nbunch = (nnd - nst + 1)/num_threads ! Particle bunch size used when calling basfun
-      nbunch = max(nbunch, 1) ! Particle bunch size used when calling basfun
-      nbunch = min(nbunch, 64) ! Particle bunch size used when calling basfun
+      !num_threads = omp_get_max_threads()
+      !nbunch = (nnd - nst + 1)/num_threads ! Particle bunch size used when calling basfun
+      !nbunch = max(nbunch, 1) ! Particle bunch size used when calling basfun
+      nbunch =  64 ! Particle bunch size used when calling basfun
       Allocate (erext(nbunch), ezext(nbunch), gtildeloc(0:2,0:nbunch - 1))
 
       ! Evaluate the electric potential and field at the particles position
-      !$OMP PARALLEL DO PRIVATE(iend,i) firstprivate(erext,ezext,gtildeloc)
+
+      !$OMP DO 
       DO i = nst, nnd, nbunch
          ! Avoid segmentation fault by accessing non relevant data
          iend = min(i + nbunch - 1, nnd)
 
-
          CALL speval(splrz, p%pos(3,i:iend), p%pos(1,i:iend),p%Zindex(i:iend),p%Rindex(i:iend), p%pot(i:iend), p%E(2,i:iend), p%E(1,i:iend))
          CALL speval(splrz_ext, p%pos(3,i:iend), p%pos(1,i:iend),p%Zindex(i:iend),p%Rindex(i:iend), p%potxt(i:iend))
 
          Call total_gtilde(p%pos(3,i:iend), p%pos(1,i:iend), gtildeloc(:,0:iend - i),p%geomweight(:,i:iend))
 
          p%E(2,i:iend) = -p%E(2,i:iend)*p%geomweight(0,i:iend) - p%pot(i:iend)*p%geomweight(1,i:iend) - gtildeloc(1,0:iend - i)
          p%E(1,i:iend) = -p%E(1,i:iend)*p%geomweight(0,i:iend) - p%pot(i:iend)*p%geomweight(2,i:iend) - gtildeloc(2,0:iend - i)
          p%pot(i:iend) = p%geomweight(0,i:iend)*p%pot(i:iend) + gtildeloc(0,0:iend - i)
          p%potxt(i:iend) = p%geomweight(0,i:iend)*p%potxt(i:iend) + gtildeloc(0,0:iend - i)
       END DO
-      !$OMP END PARALLEL DO
+      !$OMP END DO NOWAIT
 
    END SUBROUTINE EFieldscompatparts
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Constucts the FEM matrix using bsplines initialized in fields_init
 !---------------------------------------------------------------------------
    SUBROUTINE fematrix(mat)
       USE bsplines
       USE geometry
       USE omp_lib
       USE sparse
       type(mumps_mat):: mat
       REAL(kind=db), ALLOCATABLE :: xgauss(:, :), wgauss(:), wgeom(:, :)
       INTEGER, ALLOCATABLE :: f(:, :), aux(:)
       REAL(kind=db), ALLOCATABLE :: coefs(:)
       REAL(kind=db), ALLOCATABLE :: fun(:, :), fun2(:, :)
       REAL(kind=db)  :: contrib
       INTEGER, ALLOCATABLE :: idert(:, :), iderw(:, :), iderg(:, :)
       INTEGER :: i, j, jt, iw, irow, jcol, mu, igauss, iterm, irow2, jcol2, mu2, kterms, gausssize
       kterms=8
       ALLOCATE (fun(1:femorder(1) + 1, 0:1), fun2(1:femorder(2) + 1, 0:1))!Arrays keeping values of b-splines at gauss node
       !ALLOCATE(xgauss(ngauss(1)*ngauss(2),2), wgauss(ngauss(1)*ngauss(2)),zg(ngauss(1)),rg(ngauss(2)), wzg(ngauss(1)), wrg(ngauss(2)))   !Gaussian nodes and weights arrays
       ALLOCATE (f((femorder(1) + 1)*(femorder(2) + 1), 2), aux(femorder(1) + 1)) !Auxiliary arrays ordering bsplines
       ALLOCATE (idert(kterms, 2), iderw(kterms, 2), coefs(kterms), iderg(kterms, 2))
       !Pointers on the order of derivatives
       call timera(0, "fematrix")
 
 ! Constuction of auxiliary array ordering bsplines in given interval
       DO i = 1, (femorder(1) + 1)
          aux(i) = i
       END DO
       DO i = 1, (femorder(2) + 1)
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 1) = aux
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 2) = i
       END DO
 
 ! Assemble FEM matrix
 !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(j,i,xgauss,wgauss,gausssize,wgeom, igauss, iterm,jt,irow,jcol, mu, iw, irow2,jcol2, mu2, contrib, iderw, idert, iderg, coefs, fun, fun2)
       DO j = 1, nr ! Loop on r position
          DO i = 1, nz        ! Loop on z position
             !! Computation of gauss weight and position in r and z direction for gaussian integration
             Call calc_gauss(splrz, ngauss, i, j, xgauss, wgauss, gausssize)
             if (gausssize .gt. 0) then
                If (allocated(wgeom)) deallocate (wgeom)
                ALLOCATE (wgeom(0:2,gausssize))
                CALL geom_weight(xgauss(:, 1), xgauss(:, 2), wgeom)
             End if
             DO igauss = 1, gausssize ! Loop on gaussian weights and positions
                CALL basfun(xgauss(igauss, 1), splrz%sp1, fun, i)
                CALL basfun(xgauss(igauss, 2), splrz%sp2, fun2, j)
                CALL coefeq(xgauss(igauss, :), idert, iderw, iderg, coefs, kterms)
                DO jt = 1, (1 + femorder(1))*(femorder(2) + 1)
                   irow = i + f(jt, 1) - 1; jcol = j + f(jt, 2) - 1
                   mu = irow + (jcol - 1)*nrank(1)
                   call omp_set_lock(mu_lock(mu))
                   DO iw = 1, (1 + femorder(1))*(femorder(2) + 1)
                      irow2 = i + f(iw, 1) - 1; jcol2 = j + f(iw, 2) - 1
                      mu2 = irow2 + (jcol2 - 1)*nrank(1)
                      contrib=0.0_db
                      DO iterm = 1, kterms ! Loop on the two integration dimensions
                         contrib = contrib+wgeom(iderg(iterm, 1),igauss)*wgeom(iderg(iterm, 2),igauss)* &
                         &   fun(f(jt, 1), idert(iterm, 1))*fun(f(iw, 1), idert(iterm, 2))* &
                         &   fun2(f(jt, 2), iderw(iterm, 1))*fun2(f(iw, 2), iderw(iterm, 2))* &
                         &   wgauss(igauss)*coefs(iterm)
                      END DO
                      CALL updt_sploc(mat%mat%row(mu), mu2, contrib)
                   END DO
                   call omp_unset_lock(mu_lock(mu))
                END DO
             END DO
          END DO
       END DO
       !$OMP End parallel do
 
       DEALLOCATE (f, aux)
       DEALLOCATE (idert, iderw, coefs, fun, fun2)
 
       call timera(1, "fematrix")
    END SUBROUTINE fematrix
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Computes the volume of the splines cells needed to display the density in post-processing
 !---------------------------------------------------------------------------
 
    SUBROUTINE comp_volume
 
       USE bsplines
       USE geometry
       USE basic, ONLY: Volume
 
       REAL(kind=db), ALLOCATABLE :: xgauss(:, :), wgauss(:), wgeom(:, :)
       INTEGER, ALLOCATABLE :: f(:, :), aux(:)
       REAL(kind=db), ALLOCATABLE :: coefs(:)
       REAL(kind=db), ALLOCATABLE :: fun(:, :), fun2(:, :), gtildeintegr(:, :), ftestpt(:, :)
       Integer, ALLOCATABLE, Dimension(:) :: idg, idt, idp, idw
       INTEGER :: i, j, jt, irow, jcol, mu, igauss, gausssize, iterm, nterms
       Real(kind=db)::newcontrib
 
       call timera(0, "comp_volume")
 
       ALLOCATE (fun(1:femorder(1) + 1, 0:1), fun2(1:femorder(2) + 1, 0:1))!Arrays keeping values of b-splines at gauss node
       !ALLOCATE(xgauss(ngauss(1)*ngauss(2),2), wgauss(ngauss(1)*ngauss(2)),zg(ngauss(1)),rg(ngauss(2)), wzg(ngauss(1)), wrg(ngauss(2)))   !Gaussian nodes and weights arrays
       ALLOCATE (f((femorder(1) + 1)*(femorder(2) + 1), 2), aux(femorder(1) + 1)) !Auxiliary arrays ordering bsplines
       nterms = 4
       Allocate (idg(nterms), idt(nterms), idw(nterms), idp(nterms), coefs(nterms))
 ! Constuction of auxiliary array ordering bsplines in given interval
       DO i = 1, (femorder(1) + 1)
          aux(i) = i
       END DO
       DO i = 1, (femorder(2) + 1)
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 1) = aux
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 2) = i
       END DO
 
       volume = 0
       if (walltype .lt. 0) fverif = 0
 
 ! Assemble Volume matrix
       !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,i,xgauss,wgauss,gausssize,wgeom, igauss, gtildeintegr, ftestpt, iterm,jt,irow,jcol, mu, idw, idt, idg, idp, coefs, fun, fun2, newcontrib)
       DO j = 1, nr ! Loop on r position
          DO i = 1, nz        ! Loop on z position
             ! Computation of gauss weight and position in r and z direction for gaussian integration
             Call calc_gauss(splrz, ngauss, i, j, xgauss, wgauss, gausssize)
             If (allocated(wgeom)) deallocate (wgeom)
             if (gausssize .gt. 0) then
                ALLOCATE (wgeom(0:2,size(xgauss, 1)))
                CALL geom_weight(xgauss(:, 1), xgauss(:, 2), wgeom)
             End if
             If (allocated(gtildeintegr)) deallocate (gtildeintegr)
             ALLOCATE (gtildeintegr(0:2,size(xgauss, 1)))
             Call total_gtilde(xgauss(:, 1), xgauss(:, 2), gtildeintegr,wgeom)
             if (walltype .lt. 0) then
                If (allocated(ftestpt)) deallocate (ftestpt)
                ALLOCATE (ftestpt(0:0,size(xgauss, 1)))
                CALL ftest(xgauss(:, 1), xgauss(:, 2), ftestpt)
             end if
 
             DO igauss = 1, gausssize ! Loop on gaussian weights and positions
                CALL basfun(xgauss(igauss, 1), splrz%sp1, fun, i)
                CALL basfun(xgauss(igauss, 2), splrz%sp2, fun2, j)
                CALL coefeqext(xgauss(igauss, :), idt, idw, idg, idp, coefs)
 
                DO jt = 1, (1 + femorder(1))*(femorder(2) + 1)
                   irow = i + f(jt, 1) - 1; 
                   jcol = j + f(jt, 2) - 1
                   mu = irow + (jcol - 1)*nrank(1)
                   newcontrib = 2*pi*fun(f(jt, 1), 0)*fun2(f(jt, 2), 0)*wgauss(igauss)*xgauss(igauss, 2)!*wgeom(igauss,0)
                   !$OMP ATOMIC UPDATE
                   volume(mu) = volume(mu) + newcontrib
                   !$OMP END ATOMIC
                   if (walltype .lt. 0) THEN
                      newcontrib = ftestpt(0,igauss)*fun(f(jt, 1), 0)*fun2(f(jt, 2), 0)&
                      &*wgeom(0,igauss)*wgauss(igauss)*xgauss(igauss, 2)
                      !$OMP ATOMIC UPDATE
                      fverif(mu) = fverif(mu) + newcontrib
                      !$OMP END ATOMIC
                   end if
                END DO
             END DO
          END DO
       END DO
       !$OMP END PARALLEL DO
 
       !DEALLOCATE(xgauss, wgauss,zg,rg, wzg, wrg)
       DEALLOCATE (f, aux)
       DEALLOCATE (fun, fun2)
 
       call timera(1, "comp_volume")
 
    END SUBROUTINE comp_volume
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Computes the gradient of the gtilde function for the web-spline method needed to correctly apply the dirichlet boundary conditions
 !---------------------------------------------------------------------------
 
    SUBROUTINE comp_gradgtilde
 
       USE bsplines
       USE geometry
 
       REAL(kind=db), ALLOCATABLE :: xgauss(:, :), wgauss(:), wgeom(:, :)
       INTEGER, ALLOCATABLE :: f(:, :), aux(:)
       REAL(kind=db), ALLOCATABLE :: coefs(:)
       REAL(kind=db), ALLOCATABLE :: fun(:, :), fun2(:, :), gtildeintegr(:, :), ftestpt(:, :)
       Integer, ALLOCATABLE, Dimension(:) :: idg, idt, idp, idw
       INTEGER :: i, j, jt, irow, jcol, mu, igauss, gausssize, iterm, nterms
       Real(kind=db)::newcontrib
 
       !call timera(0, "comp_gradgtilde")
 
       ALLOCATE (fun(1:femorder(1) + 1, 0:1), fun2(1:femorder(2) + 1, 0:1))!Arrays keeping values of b-splines at gauss node
       !ALLOCATE(xgauss(ngauss(1)*ngauss(2),2), wgauss(ngauss(1)*ngauss(2)),zg(ngauss(1)),rg(ngauss(2)), wzg(ngauss(1)), wrg(ngauss(2)))   !Gaussian nodes and weights arrays
       ALLOCATE (f((femorder(1) + 1)*(femorder(2) + 1), 2), aux(femorder(1) + 1)) !Auxiliary arrays ordering bsplines
       nterms = 4
       Allocate (idg(nterms), idt(nterms), idw(nterms), idp(nterms), coefs(nterms))
 ! Constuction of auxiliary array ordering bsplines in given interval
       DO i = 1, (femorder(1) + 1)
          aux(i) = i
       END DO
       DO i = 1, (femorder(2) + 1)
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 1) = aux
          f((i - 1)*(femorder(1) + 1) + 1:i*(femorder(1) + 1), 2) = i
       END DO
 
-      gradgtilde = 0
+      !$OMP SINGLE
+         gradgtilde = 0
+      !$OMP END SINGLE
 
-! Assemble Volume matrix
-      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,i,xgauss,wgauss,gausssize,wgeom, igauss, gtildeintegr, ftestpt, iterm,jt,irow,jcol, mu, idw, idt, idg, idp, coefs, fun, fun2, newcontrib)
+      ! Assemble gradgtilde matrix
+      !! $OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,i,xgauss,wgauss,gausssize,wgeom, igauss, gtildeintegr, ftestpt, iterm,jt,irow,jcol, mu, idw, idt, idg, idp, coefs, fun, fun2, newcontrib)
+      !$OMP DO
       DO j = 1, nr ! Loop on r position
          DO i = 1, nz        ! Loop on z position
             ! Computation of gauss weight and position in r and z direction for gaussian integration
             Call calc_gauss(splrz, ngauss, i, j, xgauss, wgauss, gausssize)
             If (allocated(wgeom)) deallocate (wgeom)
             if (gausssize .gt. 0) then
                ALLOCATE (wgeom(0:2,size(xgauss, 1)))
                CALL geom_weight(xgauss(:, 1), xgauss(:, 2), wgeom)
             End if
             If (allocated(gtildeintegr)) deallocate (gtildeintegr)
             ALLOCATE (gtildeintegr(0:2,size(xgauss, 1)))
             Call total_gtilde(xgauss(:, 1), xgauss(:, 2), gtildeintegr,wgeom)
             if (walltype .lt. 0) then
                If (allocated(ftestpt)) deallocate (ftestpt)
                ALLOCATE (ftestpt(0:0,size(xgauss, 1)))
                CALL ftest(xgauss(:, 1), xgauss(:, 2), ftestpt)
             end if
 
             DO igauss = 1, gausssize ! Loop on gaussian weights and positions
                CALL basfun(xgauss(igauss, 1), splrz%sp1, fun, i)
                CALL basfun(xgauss(igauss, 2), splrz%sp2, fun2, j)
                CALL coefeqext(xgauss(igauss, :), idt, idw, idg, idp, coefs)
 
                DO jt = 1, (1 + femorder(1))*(femorder(2) + 1)
                   irow = i + f(jt, 1) - 1; jcol = j + f(jt, 2) - 1
                   mu = irow + (jcol - 1)*nrank(1)
                   newcontrib = 0
                   Do iterm = 1, nterms
                      newcontrib = newcontrib + wgeom( idg(iterm),igauss)*gtildeintegr( idp(iterm),igauss)* &
                      &   fun(f(jt, 1), idt(iterm))*fun2(f(jt, 2), idw(iterm))* &
                      &   wgauss(igauss)*coefs(iterm)
                   End do
                   !$OMP ATOMIC UPDATE
                   gradgtilde(mu) = gradgtilde(mu) + newcontrib
                   !$OMP END ATOMIC
                END DO
             END DO
          END DO
       END DO
-      !$OMP END PARALLEL DO
+      !!! $OMP END PARALLEL DO
+      !$OMP END DO
 
       !DEALLOCATE(xgauss, wgauss,zg,rg, wzg, wrg)
       DEALLOCATE (f, aux)
       DEALLOCATE (fun, fun2)
 
       !call timera(1, "comp_gradgtilde")
 
    END SUBROUTINE comp_gradgtilde
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Imposes the dirichlet boundary conditions on the FEM matrix for the case where we use regular splines ( not web-splines).
 !---------------------------------------------------------------------------
    SUBROUTINE fe_dirichlet
       REAL(kind=db), ALLOCATABLE :: arr(:)
       INTEGER :: i
       ALLOCATE (arr(nrank(1)*nrank(2)))
       DO i = 1, nrank(1)
          IF (rgrid(0) .ne. 0.0_db) THEN
             arr = 0; arr(i) = 1;
             CALL putrow(femat, i, arr)
          END IF
          arr = 0; arr(nrank(1)*nrank(2) + 1 - i) = 1;
          CALL putrow(femat, nrank(1)*nrank(2) + 1 - i, arr)
       END DO
       DEALLOCATE (arr)
    END SUBROUTINE fe_dirichlet
 !________________________________________________________________________________
    SUBROUTINE coefeq(x, idt, idw, idg, c, kterms)
       REAL(kind=db), INTENT(in) :: x(:)
       INTEGER, INTENT(out) :: idt(:, :), idw(:, :), idg(:, :),kterms
       REAL(kind=db), INTENT(out) :: c(:)
       kterms=8
 
 
       c = x(2)
       idt(1, 1) = 0
       idt(1, 2) = 0
       idw(1, 1) = 0
       idw(1, 2) = 0
       idg(1, 1) = 1
       idg(1, 2) = 1
       idt(2, 1) = 0
       idt(2, 2) = 1
       idw(2, 1) = 0
       idw(2, 2) = 0
       idg(2, 1) = 1
       idg(2, 2) = 0
       idt(3, 1) = 1
       idt(3, 2) = 0
       idw(3, 1) = 0
       idw(3, 2) = 0
       idg(3, 1) = 0
       idg(3, 2) = 1
       idt(4, 1) = 1
       idt(4, 2) = 1
       idw(4, 1) = 0
       idw(4, 2) = 0
       idg(4, 1) = 0
       idg(4, 2) = 0
       idt(5, 1) = 0
       idt(5, 2) = 0
       idw(5, 1) = 0
       idw(5, 2) = 0
       idg(5, 1) = 2
       idg(5, 2) = 2
       idt(6, 1) = 0
       idt(6, 2) = 0
       idw(6, 1) = 0
       idw(6, 2) = 1
       idg(6, 1) = 2
       idg(6, 2) = 0
       idt(7, 1) = 0
       idt(7, 2) = 0
       idw(7, 1) = 1
       idw(7, 2) = 0
       idg(7, 1) = 0
       idg(7, 2) = 2
       idt(8, 1) = 0
       idt(8, 2) = 0
       idw(8, 1) = 1
       idw(8, 2) = 1
       idg(8, 1) = 0
       idg(8, 2) = 0
    END SUBROUTINE coefeq
 
    SUBROUTINE coefeqext(x, idt, idw, idg, idp, c)
       REAL(kind=db), INTENT(in) :: x(:)
       INTEGER, INTENT(out) :: idp(:), idt(:), idw(:), idg(:)
       REAL(kind=db), INTENT(out) :: c(:)
 
       c(1) = x(2)
       idp(1) = 1
       idg(1) = 1
       idt(1) = 0
       idw(1) = 0
       c(2) = x(2)
       idp(2) = 1
       idg(2) = 0
       idt(2) = 1
       idw(2) = 0
       c(3) = x(2)
       idp(3) = 2
       idg(3) = 2
       idt(3) = 0
       idw(3) = 0
       c(4) = x(2)
       idp(4) = 2
       idg(4) = 0
       idt(4) = 0
       idw(4) = 1
    END SUBROUTINE coefeqext
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Computes the magnetic field on the grid according to a magnetic mirror,
 !> or according to the linear interpolation of the values on the
 !> grid saved in h5 file stored at magfile.
 !> @param[in] magfile filname of .h5 file containing the definitions of A and B
 !---------------------------------------------------------------------------
    SUBROUTINE magnet(magfile)
       USE basic, ONLY: B0, Rcurv, rgrid, zgrid, width, rnorm, nr, nz, bnorm
       USE constants, ONLY: Pi
       CHARACTER(LEN=*), INTENT(IN), OPTIONAL:: magfile
       REAL(kind=db) :: rg, zg, halfLz, MirrorRatio
       INTEGER :: i, rindex
       IF (len_trim(magfile) .lt. 1) THEN
          halfLz = (zgrid(nz) + zgrid(0))/2
          MirrorRatio = (Rcurv - 1)/(Rcurv + 1)
          DO i = 1, (nr + 1)*(nz + 1)
             rindex = (i - 1)/(nz + 1)
             rg = rgrid(rindex)
             zg = zgrid(i - rindex*(nz + 1) - 1) - halfLz
             Br(i) = -B0*MirrorRatio*SIN(2*pi*zg/width*rnorm)*bessi1(2*pi*rg/width*rnorm)/bnorm
             Bz(i) = B0*(1 - MirrorRatio*COS(2*pi*zg/width*rnorm)*bessi0(2*pi*rg/width*rnorm))/bnorm
             Athet(i) = 0.5*B0*(rg*rnorm - width/pi*MirrorRatio*bessi1(2*pi*rg/width*rnorm)*COS(2*pi*zg/width*rnorm))
          END DO
       ELSE
          CALL load_mag_from_h5(magfile)
       END IF
    END SUBROUTINE magnet
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Loads the magnetic field defined in the .h5 file at location magfile
 !> @param[in] magfile filname of .h5 file containing the definitions of A and B
 !---------------------------------------------------------------------------
    SUBROUTINE load_mag_from_h5(magfile)
       USE basic, ONLY: B0, rnorm, bnorm, bscaling
       USE constants, ONLY: Pi
       USE futils
       USE bsplines
       CHARACTER(LEN=*), INTENT(IN):: magfile
       REAL(kind=db), ALLOCATABLE :: magr(:), magz(:)
       REAL(kind=db), ALLOCATABLE :: tempBr(:, :), tempBz(:, :), tempAthet(:, :)
       real(kind=db), allocatable:: c(:,:)
       type(spline2d):: Maginterpolation
       REAL(kind=db) :: maxB
       INTEGER :: magfid, dims(2)
       LOGICAL:: B_is_saved
       INTEGER :: magn(2), magrank
 
       CALL openf(trim(magfile), magfid, 'r', real_prec='d')
 
       CALL getdims(magfid, '/mag/Athet', magrank, magn)
 
       ALLOCATE (magr(magn(2)), magz(magn(1)))
       ALLOCATE (tempAthet(magn(1), magn(2)), tempBr(magn(1), magn(2)), tempBz(magn(1), magn(2)))
 
       ! Read r and z coordinates for the definition of A_\thet, and B
       CALL getarr(magfid, '/mag/r', magr)
       CALL getarr(magfid, '/mag/z', magz)
       CALL getarr(magfid, '/mag/Athet', tempAthet)
 
       IF (isdataset(magfid, '/mag/Br') .and. isdataset(magfid, '/mag/Bz')) THEN
          CALL getarr(magfid, '/mag/Br', tempBr)
          CALL getarr(magfid, '/mag/Bz', tempBz)
          IF(bscaling .gt. 0) then
             maxB=sqrt(maxval(tempBr**2+tempBz**2))
             tempBr=tempBr/maxB*B0
             tempBz=tempBz/maxB*B0
          end if
          B_is_saved = .true.
       ELSE
          B_is_saved = .false.
       END IF
 
       magz=magz/rnorm
       magr=magr/rnorm
       CALL set_splcoef((/3,3/),magz,magr,Maginterpolation)
       call get_dim(Maginterpolation,dims)
 
       ! Interpolation of the magnetic potential vector
       allocate(c(dims(1),dims(2)))
       call get_splcoef(Maginterpolation,tempAthet, c)
       CALL gridval(Maginterpolation,vec1,vec2, Athet ,(/0,0/),c)
 
 
       
       if(B_is_saved == .true.)then
          ! Interpolation of the Axial magnetic field
          call get_splcoef(Maginterpolation,tempBz, c)
          CALL gridval(Maginterpolation,vec1,vec2, Bz ,(/0,0/),c)
 
          ! Interpolation of the radial magnetic field
          call get_splcoef(Maginterpolation,tempBr, c)
          CALL gridval(Maginterpolation,vec1,vec2, Br ,(/0,0/),c)
       else
          CALL gridval(Maginterpolation,vec1,vec2, Br,(/1,0/))
          Br=-Br
          CALL gridval(Maginterpolation,vec1,vec2, Bz,(/0,1/))
          Bz=Bz+Athet/vec2
       end if
 
 
       if( bscaling .lt. 0 ) then
          maxB = maxval(sqrt(Bz**2 + Br**2))
 
          Bz = Bz/maxB*B0
          Br = Br/maxB*B0
       end if
       ! We normalize
       Br = Br/bnorm
       Bz = Bz/bnorm
 
       CALL closef(magfid)
       deallocate(c)
       call destroy_SP(Maginterpolation)
    END SUBROUTINE load_mag_from_h5
 !________________________________________________________________________________
 !Modified Bessel functions of the first kind of the zero order
    FUNCTION bessi0(x)
       REAL(kind=db) :: bessi0, x
       REAL(kind=db) ::  ax
       REAL(kind=db) p1, p2, p3, p4, p5, p6, p7, q1, q2, q3, q4, q5, q6, q7, q8, q9, y
       SAVE p1, p2, p3, p4, p5, p6, p7, q1, q2, q3, q4, q5, q6, q7, q8, q9
       DATA p1, p2, p3, p4, p5, p6, p7/1.0d0, 3.5156229d0, 3.0899424d0, 1.2067492d0, 0.2659732d0, 0.360768d-1, 0.45813d-2/
       DATA q1, q2, q3, q4, q5, q6, q7, q8, q9/0.39894228d0, 0.1328592d-1, 0.225319d-2, -0.157565d-2, 0.916281d-2, &
       &                          -0.2057706d-1, 0.2635537d-1, -0.1647633d-1, 0.392377d-2/
       if (abs(x) .lt. 3.75) then
          y = (x/3.75)**2
          bessi0 = p1 + y*(p2 + y*(p3 + y*(p4 + y*(p5 + y*(p6 + y*p7)))))
       else
          ax = abs(x)
          y = 3.75/ax
          bessi0 = (exp(ax)/sqrt(ax))*(q1 + y*(q2 + y*(q3 + y*(q4 + y*(q5 + y*(q6 + y*(q7 + y*(q8 + y*q9))))))))
       end if
       return
    END FUNCTION bessi0
 !________________________________________________________________________________
 !Modified Bessel functions of the first kind of the first order
    FUNCTION bessi1(x)
       REAL(kind=db) ::  bessi1, x
       REAL(kind=db) ::  ax
       REAL(kind=db) p1, p2, p3, p4, p5, p6, p7, q1, q2, q3, q4, q5, q6, q7, q8, q9, y
       SAVE p1, p2, p3, p4, p5, p6, p7, q1, q2, q3, q4, q5, q6, q7, q8, q9
       DATA p1, p2, p3, p4, p5, p6, p7/0.5d0, 0.87890594d0, 0.51498869d0, 0.15084934d0, 0.2658733d-1, 0.301532d-2, 0.32411d-3/
       DATA q1, q2, q3, q4, q5, q6, q7, q8, q9/0.39894228d0, -0.3988024d-1, -0.362018d-2, 0.163801d-2, -0.1031555d-1, &
       &                           0.2282967d-1, -0.2895312d-1, 0.1787654d-1, -0.420059d-2/
       if (abs(x) .lt. 3.75D0) then
          y = (x/3.75D0)**2
          bessi1 = x*(p1 + y*(p2 + y*(p3 + y*(p4 + y*(p5 + y*(p6 + y*p7))))))
       else
          ax = abs(x)
          y = 3.75D0/ax
          bessi1 = (exp(ax)/sqrt(ax))*(q1 + y*(q2 + y*(q3 + y*(q4 + y*(q5 + y*(q6 + y*(q7 + y*(q8 + y*q9))))))))
          if (x .lt. 0.) bessi1 = -bessi1
       end if
       return
    END FUNCTION bessi1
 
 !---------------------------------------------------------------------------
 !> @author
 !> Patryk kaminski   EPFL/SPC
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Free the memory used by the fields module
 !---------------------------------------------------------------------------
    SUBROUTINE clean_fields
       Use bsplines
       USE basic, ONLY: rhs
       INTEGER:: i
       do i = 1, nrank(1)*nrank(2)
          call omp_destroy_lock(mu_lock(i))
       end do
       DEALLOCATE (mu_lock)
       DEALLOCATE (matcoef)
       DEALLOCATE (pot)
       DEALLOCATE (rhs)
       DEALLOCATE (loc_rhs)
       DEALLOCATE (loc_moments)
       DEALLOCATE (phi_spline)
       DEALLOCATE (Br, Bz)
       DEALLOCATE (Er, Ez)
       DEALLOCATE (vec1, vec2)
       Call DESTROY_SP(splrz)
       Call DESTROY_SP(splrz_ext)
 
    END SUBROUTINE clean_fields
 
    SUBROUTINE updt_sploc(arow, j, val)
       !
       !   Update element j of row arow or insert it in an increasing "index"
       !
       USE sparse
       TYPE(sprow), TARGET          :: arow
       INTEGER, INTENT(in)          :: j
       DOUBLE PRECISION, INTENT(in) :: val
       !
       TYPE(elt), TARGET :: pre_root
       TYPE(elt), POINTER :: t, p
       !
       pre_root%next => arow%row0 ! pre_root is linked to the head of the list.
       t => pre_root
       DO WHILE (ASSOCIATED(t%next))
          p => t%next
          IF (p%index .EQ. j) THEN
             p%val = p%val + val
             RETURN
          END IF
          IF (p%index .GT. j) EXIT
          t => t%next
       END DO
       ALLOCATE (p)
       p = elt(j, val, t%next)
       t%next => p
       !
       arow%nnz = arow%nnz + 1
       arow%row0 => pre_root%next ! In case the head is altered
    END SUBROUTINE updt_sploc
 
 
    SUBROUTINE updt_ppform2d(sp,c)
       use bsplines
       TYPE(spline2d), INTENT(inout) :: sp
       DOUBLE PRECISION, DIMENSION(:,:), INTENT(in) :: c
-      DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:)
+      !DOUBLE PRECISION, ALLOCATABLE :: work(:,:,:)
       INTEGER:: m,mm
       INTEGER :: d1, d2, k1, k2, n1, n2
 
       d1 = sp%sp1%dim
       d2 = sp%sp2%dim
       k1 = sp%sp1%order
       k2 = sp%sp2%order
       n1 = sp%sp1%nints
       n2 = sp%sp2%nints
 
-      ALLOCATE(work(d2,k1,n1))
-      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(m)
+      !ALLOCATE(work(d2,k1,n1))
+      !$OMP DO
       DO m=1,SIZE(c,2)
-         CALL topp0(sp%sp1, c(:,m), work(m,:,:))
+         CALL topp0(sp%sp1, c(:,m), ppformwork(m,:,:))
       END DO
-      !$OMP END PARALLEL DO
+      !$OMP END DO NOWAIT
+
+      !$OMP SINGLE
       IF( ASSOCIATED(sp%ppform) ) DEALLOCATE(sp%ppform)
       ALLOCATE(sp%ppform(k1,n1,k2,n2))
+      !$OMP END SINGLE
 
-      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(m,mm)
-      DO mm=1,SIZE(work,3)
-         DO m=1,SIZE(work,2)
-          CALL topp0(sp%sp2, work(:,m,mm), sp%ppform(m,mm,:,:))
+      !$OMP DO 
+      DO mm=1,SIZE(ppformwork,3)
+         DO m=1,SIZE(ppformwork,2)
+          CALL topp0(sp%sp2, ppformwork(:,m,mm), sp%ppform(m,mm,:,:))
          END DO
       END DO
-      !$OMP END PARALLEL DO
-      DEALLOCATE(work)
+      !$OMP END DO
+      !DEALLOCATE(work)
 
    end subroutine updt_ppform2d
 
    !===========================================================================
   SUBROUTINE topp0(sp, c, ppform)
    !
    !   Compute PPFORM of a fuction defined by the spline SP
    !   and spline coefficients C(1:d)
    !
        use bsplines
        TYPE(spline1d), INTENT(in) :: sp
        DOUBLE PRECISION, INTENT(in) :: c(:)
        DOUBLE PRECISION, INTENT(out) :: ppform(0:,:)
        INTEGER :: p, nints, i, j, k
    !
        p = sp%order - 1
        nints = sp%nints
    !
        ppform = 0.0d0
        DO i=1,nints       ! on each knot interval
           DO j=1,p+1      ! all spline in interval i
              DO k=0,p     ! k_th derivatives
                 ppform(k,i) = ppform(k,i) + sp%val0(k,j,i)*c(j+i-1)
              END DO
           END DO
        END DO
    !
      END SUBROUTINE topp0
    !+
 END MODULE fields
diff --git a/src/main.f90 b/src/main.f90
index 6d270eb..77fd312 100644
--- a/src/main.f90
+++ b/src/main.f90
@@ -1,89 +1,99 @@
 PROGRAM main
 !
 !   Skeleton for a time dependent program
 !   Note: Even in this sequential version, MPI is required
 !         because of FUTILS (more specifcally because
 !         of the HASTABLE module)!
 !
   USE basic
   use mpi
   USE bsplines
   USE mumps_bsplines
   USE futils
+  Use omp_lib
   IMPLICIT NONE
   INTEGER:: required, provided
 !
 !
-  required=MPI_THREAD_FUNNELED
+  required=MPI_THREAD_SERIALIZED
   CALL mpi_init_thread(required,provided,ierr)
   IF(provided .lt. required) CALL MPI_abort(MPI_COMM_WORLD,-1,ierr)
   IF(ierr .ne. 0) CALL MPI_abort(MPI_COMM_WORLD,-1,ierr)
   CALL MPI_COMM_RANK(MPI_COMM_WORLD, mpirank, ierr)
   IF(ierr .ne. 0) CALL MPI_abort(MPI_COMM_WORLD,-1,ierr)
   CALL MPI_COMM_SIZE(MPI_COMM_WORLD, mpisize, ierr)
   IF(ierr .ne. 0) CALL MPI_abort(MPI_COMM_WORLD,-1,ierr)
   
 !--------------------------------------------------------------------------------
 !                         1.   Prologue
                                                        CALL timera(0, 'Prologue')
   CALL daytim('Start at')
 !
 !   Define data specific to run
 !
   CALL basic_data         !Definition of global variables and input paramaters loading 
   step=0
 !
   IF( .NOT. nlres ) THEN
      CALL newrun          !not implemented yet
   ELSE
      CALL restart         !not implemented yet
   END IF
 !
 !   Compute auxilliary values
 !
   CALL auxval             !time independent values
 !
 !   Initial conditions
 !
   IF( .NOT. nlres ) THEN
      CALL inital          !plasma initialisation
   ELSE
      CALL resume          !loads restart.h5 file
   END IF
 !
 !   Start or restart the run
 !
   CALL start              !not implemented yet
 !
 !   Initial diagnostocs
 !
   CALL diagnose(0)
                                                        CALL timera(1, 'Prologue')
 !--------------------------------------------------------------------------------
 !                         2.   Time stepping
                                                        CALL timera(0, 'Main loop')
 !
+!$OMP PARALLEL DEFAULT(SHARED)
   DO
+    !$OMP SINGLE
      step = step+1
      cstep = cstep+1
      time = time+dt*tnorm
      CALL tesend
+    !$OMP END SINGLE
+
      CALL stepon
+
+     !$OMP MASTER
      IF(modulo(step,itrestart) .eq. 0 .and. mpirank .eq. 0) CALL chkrst(1)
+     !$OMP END MASTER
      IF( nlend ) EXIT
   END DO
+  WRITE(*,*) "WE exit", mpirank, omp_get_thread_num()
+!$OMP END PARALLEL
                                                        CALL timera(1, 'Main loop')
 !--------------------------------------------------------------------------------
 !                         9.   Epilogue
                                                        CALL timera(0, 'Epilogue')
 !
   CALL diagnose(-1)
   CALL endrun
   IF(mpirank .eq. 0) THEN
                                                        CALL timera(1, 'Epilogue')
                                                        CALL timera(9, '')
                                                        CALL timera(-1, '')
   CALL daytim('Done at ')
   END IF
   CALL mpi_finalize(ierr)
 END PROGRAM main
diff --git a/src/maxwsrce_mod.f90 b/src/maxwsrce_mod.f90
index f7e558e..53231a7 100644
--- a/src/maxwsrce_mod.f90
+++ b/src/maxwsrce_mod.f90
@@ -1,221 +1,224 @@
 !------------------------------------------------------------------------------
 ! EPFL/Swiss Plasma Center
 !------------------------------------------------------------------------------
 !
 ! MODULE: maxwellsource
 !
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> Adds particle in the simulation according to a maxwellian distribution in velocity
 !> and a uniform distribution in space 
 !------------------------------------------------------------------------------
 
 MODULE maxwsrce
     !
     USE constants
     use mpi
     USE mpihelper
     USE basic, ONLY: mpirank, mpisize, vnorm, rnorm, zgrid, &
     & nlclassical, nlmaxwellsource
     USE beam
     USE distrib
 
     IMPLICIT NONE
     
     PRIVATE 
 
     REAL(kind=db), SAVE :: frequency = 0     !< Number of macro particles added per second over the spawning region
     REAL(kind=db), SAVE :: loc_frequency = 0 !< local number of macro particles added per second over the spawning region
     REAL(kind=db), SAVE :: temperature=11000 !< temperature used for the Maxwellian velocity distribution in Kelvin
     REAL(kind=db), SAVE :: rlimits(2)    !< radial limits in which particles will be spawned
     REAL(kind=db), SAVE :: zlimits(2)    !< axial limits in which particles will be spawned
     REAL(kind=db), SAVE :: last_t=0      !< last time when a macro-particle was added to the simulation
     REAL(kind=db), SAVE :: vth = 0       !< Normalized thermal velocity computed from temperature 
     REAL(kind=db), SAVE :: loc_zlimits(2)!< Local axial limits in which particles will be spawned used for current mpi process
     REAL(kind=db), SAVE :: loc_rlimits(2)!< Local radial limits in which particles will be spawned used for current mpi process
     REAL(kind=db), SAVE :: time_start=-1.0 !< time at which the source is turned on
     REAL(kind=db), SAVE :: time_end=-1.0 !< time at which the source is turned off
     INTEGER, SAVE       :: radialtype=2    !< type of radial distribution used for creating particles
 
 
     NAMELIST /maxwellsourceparams/ frequency, temperature, rlimits, zlimits, time_start, time_end, radialtype
 
     PUBLIC:: maxwsrce_init, maxwsrce_inject, maxwsrce_diag, maxwsrce_calcfreq
 
 contains
 
     subroutine maxwsrce_init(lu_in, time, Zbounds)
         implicit none
         INTEGER, INTENT(IN)::lu_in
         INTEGER, INTENT(IN):: Zbounds(0:)
         REAL(kind=db), INTENT(IN):: time
 
         INTEGER:: ierr
         
         Rewind(lu_in)
         READ(lu_in, maxwellsourceparams)
 
         IF(mpirank .eq. 0) THEN
             WRITE(*, maxwellsourceparams)
         END IF
         ! compute normalized thermal velocity and source reference time
         vth=sqrt(kb*temperature/partslist(1)%m)/vnorm 
         last_t=time
         IF(time_start .gt. last_t) last_t=time_start
         time_start=last_t
         
 
         CALL maxwsrce_calcfreq(Zbounds)
 
         if (loc_frequency .lt. 0) loc_frequency=0
         WRITE(*,*) "init local frequency : ", loc_frequency
 
         IF(radialtype .gt.4 .or. radialtype .lt. 1) THEN 
                 IF (mpirank .eq. 0) WRITE(*,*) "Unknown type of radial distribution:", radialtype
                 CALL MPI_Abort(MPI_COMM_WORLD, -1, ierr)
         END IF
 
     End subroutine maxwsrce_init
 
     SUBROUTINE maxwsrce_calcfreq(Zbounds)
         IMPLICIT NONE
         INTEGER:: Zbounds(0:)
         REAL(kind=db):: surface, frequencyrem
         REAL(kind=db):: remsurface
 
         frequencyrem=0
 
         ! compute source surface
         surface=(zlimits(2)-zlimits(1))
 
         loc_rlimits=rlimits/rnorm
         ! if the source is in the mpi process compute region
         if(zlimits(2) .gt. zgrid(Zbounds(mpirank))*rnorm .and. zlimits(1) .lt. zgrid(Zbounds(mpirank+1))*rnorm) then
             ! reduce the frequency and source boundaries to match the volume covered by the mpi process 
             IF (zlimits(1) .lt. zgrid(Zbounds(mpirank))*rnorm) THEN
                 remsurface=(zgrid(Zbounds(mpirank))*rnorm-zlimits(1))
                 frequencyrem=frequency*remsurface/surface
                 loc_zlimits(1)=zgrid(Zbounds(mpirank))
             ELSE
                 loc_zlimits(1)=zlimits(1)/rnorm
             END IF
 
             IF (zlimits(2) .gt. zgrid(Zbounds(mpirank+1))*rnorm) THEN
                 remsurface=(zlimits(2)-zgrid(Zbounds(mpirank+1))*rnorm)
                 frequencyrem=frequencyrem+frequency*remsurface/surface
                 loc_zlimits(2)=zgrid(Zbounds(mpirank+1))
             ELSE
                 loc_zlimits(2)=zlimits(2)/rnorm
             END IF
             loc_frequency=frequency-frequencyrem
         else
             ! otherwise turn off the source for this mpi process
             loc_frequency=0
             loc_zlimits=(/zgrid(Zbounds(mpirank)), zgrid(Zbounds(mpirank+1))/)
         end if
 
     END SUBROUTINE maxwsrce_calcfreq
 
     Subroutine maxwsrce_diag(File_handle, str, vnorm)
         use mpi
         Use futils
         Integer:: File_handle
         Real(kind=db):: vnorm
         Character(len=*):: str
         CHARACTER(len=256):: grpname
         Integer:: ierr, mpirank
 
         CALL MPI_COMM_RANK(MPI_COMM_WORLD, mpirank, ierr)
 
         IF(mpirank .eq. 0 .and. nlmaxwellsource) THEN
 
             Write(grpname,'(a,a)') trim(str),"/maxwellsource"
             If(.not. isgroup(File_handle, trim(grpname))) THEN
                 CALL creatg(File_handle, trim(grpname))
             END IF
             Call attach(File_handle, trim(grpname), "frequency", frequency)
             Call attach(File_handle, trim(grpname), "temperature", temperature)
             Call putarr(File_handle, trim(grpname)//"/zlimits", zlimits)
             Call putarr(File_handle, trim(grpname)//"/rlimits", rlimits)
             Call attach(File_handle, trim(grpname), "vth", vth*vnorm)
             Call attach(File_handle, trim(grpname), "time_start", time_start)
             Call attach(File_handle, trim(grpname), "time_end",time_end)
             Call attach(File_handle, trim(grpname), "radialtype", radialtype)
         END IF
 
     End subroutine maxwsrce_diag
 
     subroutine maxwsrce_inject(time)
         ! Inject the particles in the correct species according to the source definition
+        Use omp_lib
+        use basic,ONLY: mpirank
         implicit none
         REAL(kind=db), INTENT(IN) :: time
         Type(particle), ALLOCATABLE, Dimension(:):: newparts
         INTEGER:: npartsadd
         INTEGER:: i
         REAL(kind=db), ALLOCATABLE:: VR(:),VZ(:),VTHET(:),R(:),Z(:) 
         
         ! check if source is on
         IF(.not. maxwsrce_on(time)) THEN
             RETURN
         END IF
 
         ! Number of particles to add at this time step
         npartsadd=floor((time-last_t)*loc_frequency)
         
         IF (npartsadd.gt. 0) THEN
             ALLOCATE(newparts(npartsadd))
             ALLOCATE(VR(npartsadd),VZ(npartsadd),VTHET(npartsadd),R(npartsadd),Z(npartsadd))
             ! Initial velocities distribution according to gaussian
+
             CALL lodgaus(0,VZ)
             CALL lodgaus(0,VR)
             CALL lodgaus(0,VTHET)
             SELECT CASE(radialtype)
               CASE(1) ! 1/R distribution in R
                 CALL lodunir(0,R,loc_rlimits(1),loc_rlimits(2))    
               CASE(2) ! flat top distribution in R
                 CALL lodlinr(0,R,loc_rlimits(1),loc_rlimits(2))    
               CASE(3) ! 1/R^2 distribution in R
                 CALL lodinvr(0,R,loc_rlimits(1),loc_rlimits(2))    
               CASE(4) ! gaussian distribution in R
                 CALL lodgausr(0,R,loc_rlimits(1),loc_rlimits(2))    
               CASE DEFAULT
                 IF (mpirank .eq. 0) WRITE(*,*) "Unknown type of radial distribution:", radialtype
             END SELECT
 
 
             CALL loduni (0,Z)
             ! fill the added particles buffer
             DO i=1,npartsadd
                 newparts(i)%U=(/VR(i)*vth, VTHET(i)*vth, VZ(i)*vth/)
                 newparts(i)%pos=(/R(i), 0.0_db, Z(i)*(loc_zlimits(2)-loc_zlimits(1)) + loc_zlimits(1)/)
                 IF (nlclassical) THEN
                     newparts(i)%gamma=1.0
                 ELSE
                     newparts(i)%gamma=sqrt(1/(1-newparts(i)%U(1)**2-newparts(i)%U(2)**2-newparts(i)%U(3)**2))
                     newparts(i)%U=newparts(i)%U*newparts(i)%gamma
                 END IF
             END DO
             ! Move the buffer of created particles to the simulation buffer
             CALL add_created_part(partslist(1), newparts)
             last_t=last_t+npartsadd/loc_frequency
         END IF
 
     
     end subroutine maxwsrce_inject
 
     logical function maxwsrce_on(time)
         REAL(kind=db), intent(in):: time 
 
         maxwsrce_on=.false.
         IF (time_start .lt. 0 .and. time_end .lt. 0) THEN
             maxwsrce_on = .true.
         ELSE IF (time .gt. time_start .and. (time .lt. time_end .or. time_end .lt. 0) ) THEN
             maxwsrce_on = .true.
         END IF
 
         maxwsrce_on=nlmaxwellsource .and. maxwsrce_on
     end function
 
 END MODULE maxwsrce
diff --git a/src/neutcol_mod.f90 b/src/neutcol_mod.f90
index 05ccbb3..40ffaa7 100644
--- a/src/neutcol_mod.f90
+++ b/src/neutcol_mod.f90
@@ -1,513 +1,514 @@
 !------------------------------------------------------------------------------
 ! EPFL/Swiss Plasma Center
 !------------------------------------------------------------------------------
 !
 ! MODULE: neutcol
 !
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> Module responsible for handling the electron-neutral collisions and creating electrons
 !> by ionisation Based on the paper by Birdsall 1991 and Sengupta et al. 
 !------------------------------------------------------------------------------
 
 module neutcol
     USE constants
 
     IMPLICIT NONE
     private 
     LOGICAL, SAVE              :: nlcol=.false.           !< Flag to activate or not electron neutral collisions
     LOGICAL                    :: nlmaxwellio=.false.      !< Flag to define how ionised electrons are created (physically or according to maxwellian)
     INTEGER                    :: itcol = 1               !< number of dt between each evaluation of neutcol_step
     Real(kind=db)              :: neutdens=2.4e16         !< Neutral particle density in m-3
     Real(kind=db)              :: neuttemp=300            !< Neutral particle temperature in K
     Real(kind=db)              :: neutpressure            !< Neutral particle pressure in mbar
     Real(kind=db)              :: scatter_fac = 24.2      !< Energy scattering factor for the considered gas (here for Ne) [eV] see Opal 1971 https://doi.org/10.1063/1.1676707
     real(kind=db)              :: Eion = 21.56            !< Ionisation energy (eV) (here for Ne)
     Real(kind=db)              :: E0 = 27.21              !< Atomic unit of energy used for calculation of deviation angles [eV]
     real(kind=db)              :: collfactor              !< Normalised collision factor (n_n  \delta t)
     INTEGER                    :: nb_io_cross=0
     Real(kind=db), ALLOCATABLE :: io_cross_sec(:,:)       !< Ionisation cross-section table
     Real(kind=db), ALLOCATABLE :: io_growth_cross_sec(:)  !< Ionisation exponential fitting factor
     INTEGER                    :: nb_ela_cross=0
     Real(kind=db), ALLOCATABLE :: ela_cross_sec(:,:)      !< Elastic collision cross section table
     Real(kind=db), ALLOCATABLE :: ela_growth_cross_sec(:) !< Elastic collision exponential fitting factor
     Real(kind=db)              :: Escale                  !< Energy normalisation factor used to reduce computation costs
     CHARACTER(len=128)         :: io_cross_sec_file=''
     CHARACTER(len=128)         :: ela_cross_sec_file=''
     Real(kind=db)              :: etemp=22000             !< In case of nlmaxwelio, defines the temperature of created electrons [K]
     Real(kind=db)              :: vth                     !< In case of nlmaxwelio, defines the normalised thermal velocity of created electrons
     LOGICAL                    :: nldragio=.true.        !< Set if inpinging electrons are affected by ionising collisions
     INTEGER                    :: species(2)             !< species(1) contains the specie index in plist which stores the colliding particles, species(2) stores the specie index for the released ion.
     LOGICAL                    :: isotropic = .false.    !< is the scattering angle isotropic
 
 
 
     NAMELIST /neutcolparams/ neutdens, Eion, &
     & scatter_fac, nlcol, io_cross_sec_file, ela_cross_sec_file, nlmaxwellio, etemp, &
     & nldragio, itcol, species, isotropic
 
     PUBLIC:: neutcol_init, neutcol_step, neutcol_diag, itcol, neutdens
 
     PROCEDURE(rotate_vel), POINTER:: change_dir => NULL()!< Function evaluating the weight for Dirichelt boundary conditions
     
     ABSTRACT INTERFACE 
         SUBROUTINE rotate_vel(Ur, Uthet, Uz, coschi, thet)
           use constants
           real(kind=db), INTENT(INOUT):: Ur, uthet, uz, coschi, thet
         END SUBROUTINE
     end interface
 
 CONTAINS
     subroutine neutcol_init(lu_in, p)
         use mpi
         Use basic, only: mpirank, dt, nlclassical,rnorm, vnorm
         Use beam, only: particles
         Use constants
         implicit none
         INTEGER, INTENT(IN) :: lu_in
         TYPE(particles) :: p
         INTEGER:: ierr, istat, i
         character(len=1000) :: line
         real(kind=db):: xsi
         
         species(1)=1
         species(2)=-1
         Rewind(lu_in)
 
         READ(lu_in, neutcolparams, iostat=istat)
 
         if (istat.gt.0) then
             backspace(lu_in)
             read(lu_in,fmt='(A)') line
             write(*,'(A)') &
                'Invalid line in neutcolparams: '//trim(line)
             call MPI_Abort(MPI_COMM_WORLD, -1, ierr)
             stop
         end if
 
         IF(mpirank .eq. 0) THEN
             WRITE(*, neutcolparams)
         END IF
 
         if(.not. nlcol) return
 
         if(nlclassical)THEN
             Escale=0.5*p%m/elchar*vlight**2
         else
             Escale=p%m*vlight**2/elchar
         end if
 
         if (nlmaxwellio) vth=sqrt(kb*etemp/p%m)/vnorm 
 
         if(io_cross_sec_file .ne.'') then
             call read_cross_sec(io_cross_sec_file,io_cross_sec, nb_io_cross)
             if(nb_io_cross .gt. 0) then
                 allocate(io_growth_cross_sec(nb_io_cross-1))
 
                 ! Normalisations
                 io_cross_sec(:,2)=io_cross_sec(:,2)/rnorm**2
                 ! Precomputing of exponential fitting factor for faster execution
                 io_growth_cross_sec=log(io_cross_sec(2:nb_io_cross,2)/io_cross_sec(1:nb_io_cross-1,2))/ &
                 &  log(io_cross_sec(2:nb_io_cross,1)/io_cross_sec(1:nb_io_cross-1,1))
             end if
         end if
         if(ela_cross_sec_file .ne.'') then
             call read_cross_sec(ela_cross_sec_file,ela_cross_sec, nb_ela_cross)
             if(nb_ela_cross .gt. 0) then
                 allocate(ela_growth_cross_sec(nb_ela_cross-1))
 
                 ! Normalisations
                 ela_cross_sec(:,2)=ela_cross_sec(:,2)/rnorm**2
 
                 if(.not. isotropic) then
                   do i=1,nb_ela_cross
                     xsi=ela_cross_sec(i,1)/(0.25*E0+ela_cross_sec(i,1))
                     ela_cross_sec(i,2)=ela_cross_sec(i,2)*(2*xsi**2)/((1-xsi)*((1+xsi)*log((1+xsi)/(1-xsi))-2*xsi))
                   end do
                 end if 
 
                 ! Precomputing of exponential fitting factor for faster execution
                 ela_growth_cross_sec=log(ela_cross_sec(2:nb_ela_cross,2)/ela_cross_sec(1:nb_ela_cross-1,2))/ &
                 &  log(ela_cross_sec(2:nb_ela_cross,1)/ela_cross_sec(1:nb_ela_cross-1,1))
             end if
         END IF
         nlcol=nlcol .and. (allocated(io_cross_sec) .or. allocated(ela_cross_sec))
 
         ! Collision factor depending on neutral gas parameters
         collfactor=neutdens*dt*rnorm**3*itcol
         neutpressure=neutdens*kb*300/100
 
         if (.not. isotropic)then
           change_dir=> rotate
         else
           change_dir=> scatter
         end if
     end subroutine neutcol_init
 
 
     Subroutine neutcol_diag(File_handle, str, vnorm)
         use mpi
         Use futils
         Integer:: File_handle
         Real(kind=db):: vnorm
         Character(len=*):: str
         CHARACTER(len=256):: grpname
         Integer:: ierr, mpirank
 
         CALL MPI_COMM_RANK(MPI_COMM_WORLD, mpirank, ierr)
         IF(mpirank .eq. 0 .and. nlcol) THEN
 
             Write(grpname,'(a,a)') trim(str),"/neutcol"
             If(.not. isgroup(File_handle, trim(grpname))) THEN
                 CALL creatg(File_handle, trim(grpname))
             END IF
 
             Call attach(File_handle, trim(grpname), "neutdens", neutdens)
             Call attach(File_handle, trim(grpname), "neuttemp", neuttemp)
             Call attach(File_handle, trim(grpname), "neutpressure", neutpressure)
             Call attach(File_handle, trim(grpname), "scatter_fac", scatter_fac)
             Call attach(File_handle, trim(grpname), "Eion", Eion)
             Call attach(File_handle, trim(grpname), "E0", E0)
             Call attach(File_handle, trim(grpname), "Escale", Escale)
             Call putarr(File_handle,trim(grpname)//"species", species)
             if (allocated(io_cross_sec)) Call putarr(File_handle, trim(grpname)//"/io_cross_sec", io_cross_sec)
             if (allocated(ela_cross_sec)) Call putarr(File_handle, trim(grpname)//"/ela_cross_sec", ela_cross_sec)
         END IF
 
     End subroutine neutcol_diag
 
 !-------------------------------------------------------------
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Simulates the elastic and ionising collisions for each particles in plist(species(1))
 !
 !> @param [inout] plist list of particle species considered in the code
 !---------------------------------------------------------------------------
     SUBROUTINE neutcol_step(plist)
          !
     USE random
     USE beam
     USE omp_lib
     USE basic, ONLY: nlclassical
     USE distrib, ONLY: lodgaus
         type(particles), TARGET::plist(:)
         type(particles),pointer::p
         INTEGER:: i, omp_thread, num_threads, j, nbcolls_ela, nbcolls_io
         real(kind=db):: Rand(5)
         real(kind=db):: v2, v, ek, Everif, es, cosChi, thet, sig_io, sig_ela, vfact, xsi
-        type(linked_part_row), ALLOCATABLE:: ins_p(:)
+        type(linked_part_row):: ins_p
         type(linked_part), POINTER:: created
         real(kind=db):: collisionfact,nucol(3),vinit(3),vend(3)
 
         p=>plist(species(1))
 
         if(.not. nlcol .or. p%nploc .le. 0) return
         
         num_threads=omp_get_max_threads()
-        Allocate(ins_p(num_threads))
 
         nbcolls_ela=0
         nbcolls_io=0
         nucol=0
-    !$OMP PARALLEL DEFAULT(SHARED), private(collisionfact,i,omp_thread,Rand,v2,ek,sig_io,sig_ela,es,coschi,thet,vfact, created, v, everif,xsi,vinit,vend), reduction(+:nbcolls_ela,nbcolls_io, nucol)
+    !!$OMP private(collisionfact,i,omp_thread,Rand,v2,ek,sig_io,sig_ela,es,coschi,thet,vfact, created, v, everif,xsi,vinit,vend)!, reduction(+:nbcolls_ela,nbcolls_io, nucol)
         omp_thread=omp_get_thread_num()+1
-        allocate(ins_p(omp_thread)%start)
-        ins_p(omp_thread)%n=0
-        created=>ins_p(omp_thread)%start
+        !omp_thread=1
+        allocate(ins_p%start)
+        ins_p%n=0
+        created=>ins_p%start
           
     !$OMP DO       
         DO i=1,p%Nploc
           !for each particle
           CALL random_array(Rand,1,ran_index(omp_thread),ran_array(:,omp_thread))
           ! we calculate the kinetic energy and norm of the velocity
           v2=(p%U(1,i)**2+p%U(2,i)**2+p%U(3,i)**2)
           if(nlclassical) THEN
               ek=v2*escale
               v=sqrt(v2)
               vinit=p%U(:,i) ! (/p%UR(i),p%UTHET(i),p%UZ(i)/)
           ELSE
               ek=(p%gamma(i)-1)*escale
               v=sqrt(v2)/p%gamma(i)
               vinit=p%U(:,i)/p%gamma(i)!(/p%UR(i),p%UTHET(i),p%UZ(i)/)/p%gamma(i)
           end if
 
           sig_io=0
           sig_ela=0
           ! computes the ionisation and elastic collision cross-sections at this kinetic energy
           ! The ionisation event can only occur if the incoming electron energy is above the binding energy
           if (ek .gt. Eion .and. nb_io_cross .gt. 1) then
             sig_io=sig_fit(io_cross_sec,io_growth_cross_sec,ek, nb_io_cross)
           end if
           if (nb_ela_cross .gt. 1) then
             sig_ela=sig_fit(ela_cross_sec,ela_growth_cross_sec,ek, nb_ela_cross)
           end if
           collisionfact=1-exp(-collfactor*(sig_io+sig_ela)*v)
           ! If we have a collision event
           if (Rand(1) .lt.collisionfact) THEN 
               CALL random_array(Rand,1,ran_index(omp_thread),ran_array(:,omp_thread))
               ! Check if elastic or ionising event is happening
               IF(Rand(1).gt. sig_ela/(sig_io+sig_ela)) THEN ! An ionisation collision happened and we create the necessary electron
                 ! prepare the memory for the released electron
-                ins_p(omp_thread)%n=ins_p(omp_thread)%n+1
+                ins_p%n=ins_p%n+1
                 allocate(created%next)
                 created%next%prev=>created
 
                ! Fill created particle new position
                 created%p%pos=p%pos(:,i)!(/p%R(i), p%THET(i), p%Z(i)/)
 
                 IF( nlmaxwellio ) THEN ! the new electron velocity is defined according to a Maxwellian
                   CALL lodgaus(0, Rand(1:3))
                   ! get random velocity
                   created%p%U=vth*Rand(1:3)
                 ELSE
                   CALL random_array(Rand,3,ran_index(omp_thread),ran_array(:,omp_thread))
                   ! Compute created electron energy
                   Es=scatter_fac*tan(Rand(1)*atan((Ek-Eion)/(2*scatter_fac)))
                   ! Compute scattering angles for created electron
                   if (isotropic) then
                     coschi=cos(Rand(2)*pi)
                   else
                     cosChi=1-2*Rand(2)/(1+8*Es/E0*(1-Rand(2)))
                   end if
                   thet=Rand(3)*2*pi
                   if(nlclassical)THEN
                       ! new velocity factor for created particle
                       vfact=sqrt(Es/Ek)
                   ELSE
                       ! new velocity factor for created particle
                       vfact=sqrt(Es*(Es+2*Escale)/(Ek*(Ek+2*Escale)))
                   END IF
                   ! Fill created particle velocity
                   created%p%U=vfact*p%U(:,i)!(/p%UR(i),p%UTHET(i),p%UZ(i)/)
                   
                   ! rotate the velocity vector due to the collision
                   call change_dir(created%p%U(1),created%p%U(2), created%p%U(3), coschi, thet)
                 END IF
                 vend=created%p%U
                 if(nlclassical)THEN
                   ! Lorentz factor for created particle
                   created%p%gamma=1.0
                 ELSE
                   ! Lorentz factor for created particle
                   created%p%gamma=sqrt(1+created%p%U(1)**2+created%p%U(2)**2+created%p%U(3)**2)
                   vend=vend/created%p%gamma
                 END IF
                 ! We prepare the next created particle
-                ins_p(omp_thread)%end=>created
+                ins_p%end=>created
                 created=>created%next
                 ! We keep track of what changed
                 nbcolls_io=nbcolls_io+1
                 nucol=nucol-vend/vinit
 
                 ! If we want the incoming electron to be scattered, we need to compute
                 ! its new kinetic energy
                 if (nldragio) THEN
                   ! We store the lossed energy in pot for keeping track of energy conservation
                   created%prev%p%pot=Eion+Es
                   CALL random_array(Rand,2,ran_index(omp_thread),ran_array(:,omp_thread))
                   Es=Ek-Eion-Es
                   if(nlclassical)THEN
                       ! new velocity factor for scattered particle
                       vfact=sqrt(Es/Ek)
                   ELSE
                       ! new velocity factor for scattered particle
                       vfact=sqrt(Es*(Es+2*Escale)/(Ek*(Ek+2*Escale)))
                   END IF
                 ELSE
                   CYCLE
                 END IF
               ELSE ! An elastic collision event happens
                   CALL random_array(Rand,2,ran_index(omp_thread),ran_array(:,omp_thread))
                   Es=Ek
                   vfact=1.0
                   nbcolls_ela=nbcolls_ela+1
               END IF
               ! We calculate the scattered velocity angle for the scattered electron 
               if (isotropic) then
                 coschi=cos(Rand(1)*pi)
               else
                 cosChi=1-2*Rand(1)/(1+8*Es/E0*(1-Rand(1)))
               end if
               thet=Rand(2)*2*pi
               
               ! Change the incident electron velocity direction and amplitude if necessary
               p%U(:,i)=p%U(:,i)*vfact
               !p%UTHET(i)=p%UTHET(i)*vfact
               !p%UZ(i)=p%UZ(i)*vfact
               call change_dir(p%U(1,i),p%U(2,i), p%U(3,i), coschi, thet)
               if(nlclassical) THEN
                 vend=p%U(:,i)!(/p%UR(i),p%UTHET(i),p%UZ(i)/)
               ELSE
                 p%gamma(i)=sqrt(1+p%U(1,i)**2+p%U(2,i)**2+p%U(3,i)**2)
                 vend=p%U(:,i)/p%gamma(i)!(/p%UR(i),p%UTHET(i),p%UZ(i)/)/p%gamma(i)
               END IF
               nucol=nucol+1-vend/vinit
           END IF
         END DO
     !$OMP END DO
+
+        !$OMP BARRIER
+        
         ! clean up the memory after the loop
         if(associated(created%prev)) then
           created=>created%prev
-          ins_p(omp_thread)%end=>created
+          ins_p%end=>created
           deallocate(created%next)
         else
-          deallocate(ins_p(omp_thread)%start)
+          deallocate(ins_p%start)
         end if
-  !$OMP END PARALLEL
+  !!$OMP END PARALLEL
         ! We collect all created particules into one linked list for easier insertion in plist
-        Do i=1,num_threads
-          if(associated(ins_p(i)%start)) then
-                created=>ins_p(i)%end
-                Do j=i+1,num_threads
-                  created%next=>ins_p(j)%start
-                  ins_p(i)%n=ins_p(i)%n+ins_p(j)%n
-                  IF(ASSOCIATED(created%next)) then
-                      created=>ins_p(j)%end
-                  END IF
-                End Do
-                if(species(2).gt.0) then
-                  CALL add_created_part(plist(species(2)), ins_p(i), .false.,.true.)
-                end if
-                CALL add_created_part(p,ins_p(i),.true.,.false.)
-              exit
-          end if
-        end do
-        DEALLOCATE(ins_p)
+        
+        !Do i=1,num_threads
+        !$OMP CRITICAL (insertions)
+        if(species(2).gt.0.and.ins_p%n .gt.0) then
+          CALL add_created_part(plist(species(2)), ins_p, .false.,.true.)
+        end if
+        !$OMP END CRITICAL (insertions)
+        !$OMP CRITICAL (insertelectrons)
+        if(ins_p%n .gt.0) then
+          CALL add_created_part(plist(species(1)),ins_p,.true.,.false.)
+          !exit
+        end if
+        !$OMP END CRITICAL (insertelectrons)
+        !end do
+        !$OMP CRITICAL(addcolls)
         p%nbcolls=p%nbcolls+(/nbcolls_io, nbcolls_ela/)
-        p%nudcol=nucol
+        p%nudcol=p%nudcol+nucol
+        !$OMP END CRITICAL(addcolls)
         !Write(*,*)"mpirank: ", mpirank, " Nb colls ela, io: ",nbcolls_ela, nbcolls_io 
           !
     END SUBROUTINE neutcol_step
 
     FUNCTION sig_fit(sig_vec,growth_vec,ek,nb_cross)
       use distrib, ONLY: closest
         real(kind=db)::sig_fit, ek 
         real(kind=db):: sig_vec(:,:), growth_vec(:)
         Integer:: k, nb_cross
         sig_fit=0
         k=closest(sig_vec(:,1),ek, nb_cross-1)
         if(k.lt.1) return
         
         !sig_fit=(sig_vec(k,1)-sig_vec(k-1,1))/(sig_vec(k,2)-sig_vec(k-1,2))*(sig_vec(k,2)-ek)+sig_vec(k-1,1)
         ! Exponential fitting relevant at high energies
         sig_fit=sig_vec(k,2)*(ek/sig_vec(k,1))**growth_vec(k)
     END FUNCTION sig_fit
 
     SUBROUTINE rotate(Ur, Uthet, Uz, coschi, thet)
         real(kind=db), INTENT(INOUT):: Ur, uthet, uz, coschi, thet
         real(kind=db):: norm, perp(3), U(3), U0(3)
         real(kind=db):: sinchi, sinthet, costhet
         Integer :: iperp1,iperp2
 
         U0=(/Ur,Uthet,Uz/)
         norm=sqrt(sum(U0**2))
         U=U0/norm
         ! Find a vector perpendicular to U for chi rotation
         ! find the direction with maximum amplitude
         perp=(/1,1,1/)
         iperp1=maxloc(abs(U),1)
         ! find second direction with next max amplitude
         perp(iperp1)=0
         iperp2=maxloc(abs(perp*U),1)
         perp=0
         perp(iperp2)=U(iperp1)
         perp(iperp1)=-U(iperp2)
         ! Normalise the rotation vector
         perp=perp/sqrt(sum(perp**2))
 
         ! Compute sinus and cosinus for rotation
         sinchi=sqrt(1-coschi**2)
         costhet=cos(thet)
         sinthet=sin(thet)
 
         ! Rotation of angle chi around perp
         Ur    = (coschi+perp(1)**2*(1-coschi))*U0(1) + (perp(1)*perp(2)*(1-coschi)-perp(3)*sinchi)*U0(2) + (perp(1)*perp(3)*(1-coschi) + perp(2)*sinchi)*U0(3)
         Uthet = (perp(1)*perp(2)*(1-coschi)+perp(3)*sinchi)*U0(1) + (coschi + perp(2)**2*(1-coschi))*U0(2) +(perp(2)*perp(3)*(1-coschi)-perp(1)*sinchi)*U0(3)
         Uz    = (perp(1)*perp(3)*(1-coschi)-perp(2)*sinchi)*U0(1) +(perp(3)*perp(2)*(1-coschi)+perp(1)*sinchi)*U0(2) +( coschi+perp(3)**2*(1-coschi))*U0(3)
 
         U0 =(/Ur,Uthet,Uz/)
 
         ! second rotation according to uniform distribution
         ! Rotation of angle theta around U
         Ur    = (costhet+U(1)**2*(1-costhet))*U0(1) + (U(1)*U(2)*(1-costhet) - U(3)*sinthet)*U0(2) + (U(1)*U(3)*(1-costhet)+U(2)*sinthet)*U0(3)
         Uthet = (U(2)*U(1)*(1-costhet)+U(3)*sinthet)*U0(1) + (costhet + U(2)**2*(1-costhet))*U0(2) + (U(2)*U(3)*(1-costhet)-U(1)*sinthet)*U0(3)
         Uz    = (U(3)*U(1)*(1-costhet) - U(2)*sinthet)*U0(1) + (U(3)*U(2)*(1-costhet)+U(1)*sinthet)*U0(2) + (costhet +U(3)**2*(1-costhet))*U0(3)
 
         !normf=sqrt(Ur**2+Uthet**2+Uz**2)
         !if(abs(norm-normf)/norm .gt. 1e-14) WRITE(*,*) "Error in rotate the norm of v changed" 
     END SUBROUTINE rotate
 
     SUBROUTINE scatter(Ur, Uthet, Uz, coschi, thet)
       real(kind=db), INTENT(INOUT):: Ur, uthet, uz, coschi, thet
       real(kind=db):: norm
       real(kind=db):: sinchi, sinthet, costhet
 
       norm=sqrt(Ur**2+Uz**2+Uthet**2)
 
       ! Compute sinus and cosinus for rotation
       sinchi=sqrt(1-coschi**2)
       costhet=cos(thet)
       sinthet=sin(thet)
 
       Ur=norm*sinchi*costhet
       Uthet=norm*sinchi*sinthet
       Uz=norm*coschi
 
   END SUBROUTINE scatter
 
 
     SUBROUTINE read_cross_sec(filename,cross_sec, nb_cross)
         CHARACTER(len=*)           ::filename
         Real(kind=db), ALLOCATABLE :: cross_sec(:,:)
         INTEGER:: nb_cross
         INTEGER :: lu_cross_sec=9999
         INTEGER:: i, openerr, reason
         CHARACTER(len=256) :: header
         real(kind=db):: t1,t2
 
         nb_cross=0
         
         OPEN(UNIT=lu_cross_sec,FILE=trim(filename),ACTION='READ',IOSTAT=openerr)
         header=' '
         IF(openerr .ne. 0) THEN
           CLOSE(unit=lu_cross_sec)
           RETURN
         END IF
 
         ! The cross section table is defined as a two column energy and cross_section
         DO WHILE(.true.)
           READ(lu_cross_sec,'(a)',IOSTAT=reason) header
           header=adjustl(header)
           if(reason .lt. 0 ) exit ! We reached end of file
           if( header(1:1) .ne. '!') then 
             READ(header,*) t1, t2
             if(t1 .ne. 0 .and. t2.ne. 0) nb_cross=nb_cross+1
           end if
         END DO
         if (allocated(cross_sec)) deallocate(cross_sec)
         allocate(cross_sec(nb_cross,2))
         REWIND(lu_cross_sec)
             
         ! The cross section table is defined as a two column energy and cross_section
         i=1
         DO WHILE(i .le. nb_cross)
           READ(lu_cross_sec,'(a)',IOSTAT=reason) header
           header=adjustl(header)
           if(reason .lt. 0 ) exit ! We reached end of file
           if( header(1:1) .ne. '!') then 
             READ(header,*) cross_sec(i,1), cross_sec(i,2)
             if(cross_sec(i,1) .ne. 0 .and. cross_sec(i,2).ne. 0) i=i+1
           end if
         END DO
         CLOSE(unit=lu_cross_sec)
     END subroutine read_cross_sec
 end module neutcol
 
 
 
diff --git a/src/particletypes_mod.f90 b/src/particletypes_mod.f90
index 1737fbb..e21d321 100644
--- a/src/particletypes_mod.f90
+++ b/src/particletypes_mod.f90
@@ -1,557 +1,576 @@
 !------------------------------------------------------------------------------
 ! EPFL/Swiss Plasma Center
 !------------------------------------------------------------------------------
 !
 ! MODULE: particletypes
 !
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !> Patryk Kaminski   EPFL/SPC
 !> Trach Minh Tran   EPFL/SPC
 !
 ! DESCRIPTION:
 !> Module responsible for defining the particle types and defining some subroutines to change their size,
 !> initialize them or delete them
 !------------------------------------------------------------------------------
 
 MODULE particletypes
     USE constants
     !
       IMPLICIT NONE
     
       !> Stores the particles properties for the run.
       TYPE particles
         INTEGER :: Nploc                                     !< Local number of simulated particles
         INTEGER :: Nptot                                     !< Total number of simulated particles
         INTEGER :: Newindex                                  !< Stores the higher partindex for the creation of new particles
         REAL(kind=db) :: m                                   !< Particle mass
         REAL(kind=db) :: q                                   !< Particle charge
         REAL(kind=db) :: weight                              !< Number of particles represented by one macro-particle
         REAL(kind=db) :: qmRatio                             !< Charge over mass ratio
         REAL(kind=db) :: nudcol(3)                           !< Effective momentum drag frequency 
         REAL(kind=db) :: H0
         REAL(kind=db) :: P0
         REAL(kind=db) :: temperature
         LOGICAL :: Davidson=.false.
         LOGICAL :: is_test= .false.                          !< detremines if particle is saved on ittracer
         LOGICAL :: is_field= .true.                          !< detremines if particle contributes to Poisson solver
         LOGICAL :: calc_moments=.false.
         INTEGER, allocatable               :: nblost(:)      !< number of particles lost in domain boundaries at current timestep
         INTEGER                            :: nbadded        !< number of particles added by source since last gather
         INTEGER, DIMENSION(2)              :: nbcolls        !< number of particles collisions with neutrals ionisation,   elastic)
         INTEGER, DIMENSION(:), ALLOCATABLE :: Rindex         !< Index in the electric potential grid for the R direction
         INTEGER, DIMENSION(:), ALLOCATABLE :: Zindex         !< Index in the electric potential grid for the Z direction
         INTEGER, DIMENSION(:), ALLOCATABLE :: partindex      !< Index of the particle to be able to follow it when it goes from one MPI host to the other
         !REAL(kind=db), DIMENSION(:), ALLOCATABLE :: Z     !< radial coordinates of the particles
         REAL(kind=db), DIMENSION(:,:), ALLOCATABLE :: pos     !< (radial,azimuthal,longitudinal) coordinates of the particles
         !REAL(kind=db), DIMENSION(:), ALLOCATABLE :: THET  !< azimuthal coordinates of the particles
         REAL(kind=db), DIMENSION(:,:), ALLOCATABLE :: B    !< radial, axial Magnetic field
         REAL(kind=db), DIMENSION(:), ALLOCATABLE :: pot   !< Electric potential
         REAL(kind=db), DIMENSION(:), ALLOCATABLE :: potxt !< External electric potential
         REAL(kind=db), DIMENSION(:,:), ALLOCATABLE :: E   !< Radial Axial Electric field
         REAL(kind=db), DIMENSION(:,:), CONTIGUOUS, POINTER:: U          !< normalized (radial, azimuthal, axial) velocity at the current time step
         REAL(kind=db), DIMENSION(:,:), CONTIGUOUS, POINTER:: Uold       !< normalized (radial, azimuthal, axial) velocity at the previous time step
         !REAL(kind=db), DIMENSION(:), CONTIGUOUS, POINTER:: UTHET       !< normalized azimuthal velocity at the current time step
         !REAL(kind=db), DIMENSION(:), CONTIGUOUS, POINTER:: UTHETold    !< normalized azimuthal velocity at the previous time step
         !REAL(kind=db), DIMENSION(:), CONTIGUOUS, POINTER:: UZ          !< normalized axial velocity at the current time step
         !REAL(kind=db), DIMENSION(:), CONTIGUOUS, POINTER:: UZold       !< normalized axial velocity at the previous time step
         REAL(kind=db), DIMENSION(:), CONTIGUOUS, POINTER:: Gamma       !< Lorentz factor at the current time step
         REAL(kind=db), DIMENSION(:), CONTIGUOUS, POINTER:: Gammaold    !< Lorentz factor at the previous time step
         Real(kind=db), Dimension(:,:),ALLOCATABLE:: geomweight !< geometric weight at the particle position
         Real(kind=db), Dimension(:,:),ALLOCATABLE:: moments !< stores the moment matrix
+        INTEGER, DIMENSION(:), ALLOCATABLE :: losthole
+        INTEGER, DIMENSION(:), ALLOCATABLE :: sendhole
+        INTEGER:: nbsendandlost(3)=0
         LOGICAL:: collected                               !< Stores if the particles data have been collected to MPI root process during this timestep
         INTEGER, DIMENSION(:), ALLOCATABLE:: addedlist
       END TYPE particles
 
       !> Structure containing a single particle position and velocity used in MPI communications.
       TYPE particle
         INTEGER ::       partindex =0
         REAL(kind=db) :: Pos(3)    =0
         REAL(kind=db) :: U(3)      =0
         REAL(kind=db) :: Gamma     =0
         REAL(kind=db) :: pot       =0
       END TYPE particle
     
       TYPE linked_part
         type(particle) p
         type(linked_part), POINTER:: next=> NULL()
         type(linked_part), POINTER:: prev=> NULL()
       END TYPE linked_part
     
       TYPE linked_part_row
         INTEGER :: n = 0
         type(linked_part), POINTER:: start=>NULL()
         type(linked_part), POINTER:: end=>NULL()
       END TYPE linked_part_row
   
       CONTAINS
 
       !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Allocate the memory for the particles variable storing the particles quantities.
 !
 !> @param[inout] p the particles variable needing to be allocated.
 !> @param[in] nparts the maximum number of particles that will be stored in this variable
 !---------------------------------------------------------------------------
 
 SUBROUTINE creat_parts(p, nparts)
   TYPE(particles)       :: p
   INTEGER, INTENT(in)   :: nparts
 
   IF (.NOT. ALLOCATED(p%pos) ) THEN
     p%Nploc = nparts
     p%Nptot = nparts
     ALLOCATE(p%pos(3,nparts))
     !ALLOCATE(p%R(nparts))
     !ALLOCATE(p%THET(nparts))
     ALLOCATE(p%B(2,nparts))
     ALLOCATE(p%U(3,nparts))
     !ALLOCATE(p%UZ(nparts))
     !ALLOCATE(p%UTHET(nparts))
     ALLOCATE(p%Uold(3,nparts))
     !ALLOCATE(p%UZold(nparts))
     !ALLOCATE(p%UTHETold(nparts))
     ALLOCATE(p%Gamma(nparts))
     ALLOCATE(p%Rindex(nparts))
     ALLOCATE(p%Zindex(nparts))
     ALLOCATE(p%partindex(nparts))
     ALLOCATE(p%pot(nparts))
     ALLOCATE(p%potxt(nparts))
     ALLOCATE(p%E(2,nparts))
     ALLOCATE(p%GAMMAold(nparts))
     Allocate(p%geomweight(0:2,nparts))
+    Allocate(p%losthole(nparts))
+    Allocate(p%sendhole(nparts))
+
     if(.not.allocated(p%nblost)) allocate(p%nblost(4))
     p%newindex=0
     p%nblost=0
     p%nbadded=0
     p%partindex=-1
     p%Uold=0
     !p%UZold=0
     !p%UTHETold=0
     p%rindex=0
     p%zindex=0
     p%B=0
     p%U=0
     !p%UZ=0
     !p%UTHET=0
     p%pos=0
     !p%R=0
     !p%THET=0
     p%Gamma=1
     p%E=0
     p%pot=0
     p%potxt=0
     p%gammaold=1
     p%collected=.false.
     p%Davidson=.false.
     p%is_test=.false.
     p%is_field=.true.
     p%calc_moments=.true.
     p%m=me
     p%q=-elchar
     p%qmRatio=p%q/p%m
     p%weight=1.0_db
     p%H0=0
     p%P0=0
     p%temperature=0
     p%geomweight=0
+    p%losthole=0
+    p%sendhole=0
   END IF
 END SUBROUTINE creat_parts
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Copy one particle from the receive buffers to the local simulation variable parts.
 !
 !> @param [in] part particle parameters to copy from
 !> @param [in] partsindex destination particle index in the local parts variable
 !---------------------------------------------------------------------------
 SUBROUTINE Insertincomingpart(p, part, partsindex)
   TYPE(particles), INTENT(INOUT):: p
   INTEGER, INTENT(in) :: partsindex
   TYPE(particle), INTENT(in) :: part
     p%partindex(partsindex) = part%partindex
     p%pos(:,partsindex) =         part%Pos
     !p%THET(partsindex) =      part%Pos(2)
     !p%Z(partsindex) =         part%Pos(3)
     !p%UZ(partsindex) =        part%U(3)
     p%U(:,partsindex) =        part%U
     !p%UTHET(partsindex) =     part%U(2)
     p%Gamma(partsindex) =     part%Gamma
     p%pot(partsindex) =       part%pot
 !
 END SUBROUTINE Insertincomingpart
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Copy one particle from the local parts variable to the send buffer.
 !
 !> @param [in] buffer send buffer to copy to
 !> @param [in] bufferindex particle index in the send buffer
 !> @param [in] partsindex origin particle index in the local parts variable
 !---------------------------------------------------------------------------
 SUBROUTINE Insertsentpart(p, buffer, bufferindex, partsindex)
   TYPE(particles), INTENT(INOUT):: p
   INTEGER, INTENT(in) :: bufferindex, partsindex
   TYPE(particle), DIMENSION(:), INTENT(inout) :: buffer
     buffer(bufferindex)%partindex = p%partindex(partsindex)
     buffer(bufferindex)%Pos    = p%pos(:,partsindex)
     !buffer(bufferindex)%Pos(2)    = p%THET(partsindex)
     !buffer(bufferindex)%Pos(3)    = p%Z(partsindex)
     !buffer(bufferindex)%U(3)      = p%UZ(partsindex)
     buffer(bufferindex)%U         = p%U(:,partsindex)
     !buffer(bufferindex)%U(2)      = p%UTHET(partsindex)
     buffer(bufferindex)%Gamma     = p%Gamma(partsindex)
     buffer(bufferindex)%pot       = p%pot(partsindex)
 !
 END SUBROUTINE Insertsentpart
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !> @brief Exchange two particles in the parts variable.
 !
 !> @param [in] index1 index in parts of the first particle to exchange.
 !> @param [in] index2 index in parts of the second particle to exchange.
 !---------------------------------------------------------------------------
 SUBROUTINE exchange_parts(p, index1, index2)
   TYPE(particles), INTENT(INOUT):: p
   INTEGER, INTENT(IN) :: index1, index2
   REAL(kind=db):: pos(3), U(3), Gamma, geomweight(0:2),pot
   INTEGER :: Rindex, Zindex, partindex
   !! Exchange particle at index1 with particle at index2
 
   ! Store part at index1 in temporary value
   partindex  = p%partindex(index1)
   Gamma      = p%Gamma(index1)
   pot        = p%pot(index1)
   pos        = p%pos(:,index1)
   !Z          = p%Z(index1)
   !THET       = p%THET(index1)
   U         = p%U(:,index1)
   !UTHET      = p%UTHET(index1)
   !UZ         = p%UZ(index1)
   Rindex     = p%Rindex(index1)
   Zindex     = p%Zindex(index1)
   geomweight = p%geomweight(:,index1)
 
   ! Move part at index2 in part at index 1
   p%partindex(index1)    = p%partindex(index2)
   p%Gamma(index1)        = p%Gamma(index2)
   p%pot(index1)          = p%pot(index2)
   p%pos(:,index1)        = p%pos(:,index2)
   !p%Z(index1)            = p%Z(index2)
   !p%THET(index1)         = p%THET(index2)
   p%U(:,index1)          = p%U(:,index2)
   !p%UTHET(index1)        = p%UTHET(index2)
   !p%UZ(index1)           = p%UZ(index2)
   p%Rindex(index1)       = p%Rindex(index2)
   p%Zindex(index1)       = p%Zindex(index2)
   p%geomweight(:,index1) = p%geomweight(:,index2)
 
   ! Move temporary values from part(index1) to part(index2)
   p%partindex(index2)    = partindex
   p%Gamma(index2)        = Gamma
   p%pot(index2)          = pot
   p%pos(:,index2)        = pos
   !p%Z(index2)            = Z
   !p%THET(index2)         = THET
   p%U(:,index2)           = U
   !p%UTHET(index2)        = UTHET
   !p%UZ(index2)           = UZ
   p%Rindex(index2)       = Rindex
   p%Zindex(index2)       = Zindex
   p%geomweight(:,index2) = geomweight
 
   END SUBROUTINE exchange_parts
 
   SUBROUTINE change_parts_allocation(p, sizedifference)
     implicit none
     TYPE(particles), INTENT(INOUT):: p
     INTEGER,INTENT(IN) :: sizedifference
     CALL change_array_size_int(p%Rindex, sizedifference)
     CALL change_array_size_int(p%Zindex, sizedifference)
     CALL change_array_size_int(p%partindex, sizedifference)
+    CALL change_array_size_int(p%losthole, sizedifference)
+    CALL change_array_size_int(p%sendhole, sizedifference)
     CALL change_array_size_dp12(p%E,sizedifference)
     CALL change_array_size_dp(p%pot,sizedifference)
     CALL change_array_size_dp(p%potxt,sizedifference)
     !CALL change_array_size_dp(p%R,sizedifference)
     CALL change_array_size_dp12(p%pos,sizedifference)
     !CALL change_array_size_dp(p%THET,sizedifference)
     CALL change_array_size_dp12(p%B,sizedifference)
     CALL change_array_size_dp012(p%geomweight,sizedifference)
     CALL change_array_size_dp_ptr12(p%U,sizedifference)
     CALL change_array_size_dp_ptr12(p%Uold,sizedifference)
     !CALL change_array_size_dp_ptr(p%UZ,sizedifference)
     !CALL change_array_size_dp_ptr(p%UZold,sizedifference)
     !CALL change_array_size_dp_ptr(p%UTHET,sizedifference)
     !CALL change_array_size_dp_ptr(p%UTHETold,sizedifference)
     CALL change_array_size_dp_ptr(p%Gamma,sizedifference)
     CALL change_array_size_dp_ptr(p%Gammaold,sizedifference)
     p%Nploc=MIN(p%Nploc,size(p%pos,2))
   END SUBROUTINE change_parts_allocation
 
   SUBROUTINE change_array_size_dp(arr, sizedifference)
     implicit none
     REAL(kind=db), ALLOCATABLE, INTENT(INOUT):: arr(:)
     INTEGER, INTENT(IN):: sizedifference
     REAL(kind=db), ALLOCATABLE:: temp(:)
     INTEGER:: current_size, new_size
     if(allocated(arr)) THEN
       current_size=size(arr)
       new_size=current_size+sizedifference
       ALLOCATE(temp(new_size))
       temp(1:min(current_size,new_size))=arr(1:min(current_size,new_size))
       DEALLOCATE(arr)
       CALL move_alloc(temp, arr)
     END IF
   END SUBROUTINE change_array_size_dp
 
   SUBROUTINE change_array_size_dp2(arr, sizedifference)
     implicit none
     REAL(kind=db), ALLOCATABLE, INTENT(INOUT):: arr(:,:)
     INTEGER, INTENT(IN):: sizedifference
     REAL(kind=db), ALLOCATABLE:: temp(:,:)
     INTEGER:: current_size, new_size
     if(allocated(arr)) THEN
       current_size=size(arr,1)
       new_size=current_size+sizedifference
       ALLOCATE(temp(new_size,0:size(arr,2)-1))
       temp(1:min(current_size,new_size),:)=arr(1:min(current_size,new_size),:)
       DEALLOCATE(arr)
       CALL move_alloc(temp, arr)
     END IF
   END SUBROUTINE change_array_size_dp2
 
   SUBROUTINE change_array_size_dp12(arr, sizedifference)
     implicit none
     REAL(kind=db), ALLOCATABLE, INTENT(INOUT):: arr(:,:)
     INTEGER, INTENT(IN):: sizedifference
     REAL(kind=db), ALLOCATABLE:: temp(:,:)
     INTEGER:: current_size, new_size
     if(allocated(arr)) THEN
       current_size=size(arr,2)
       new_size=current_size+sizedifference
       ALLOCATE(temp(size(arr,1),new_size))
       temp(:,1:min(current_size,new_size))=arr(:,1:min(current_size,new_size))
       DEALLOCATE(arr)
       CALL move_alloc(temp, arr)
     END IF
   END SUBROUTINE change_array_size_dp12
 
   SUBROUTINE change_array_size_dp012(arr, sizedifference)
     implicit none
     REAL(kind=db), ALLOCATABLE, INTENT(INOUT):: arr(:,:)
     INTEGER, INTENT(IN):: sizedifference
     REAL(kind=db), ALLOCATABLE:: temp(:,:)
     INTEGER:: current_size, new_size
     if(allocated(arr)) THEN
       current_size=size(arr,2)
       new_size=current_size+sizedifference
       ALLOCATE(temp(0:size(arr,1)-1,new_size))
       temp(:,1:min(current_size,new_size))=arr(:,1:min(current_size,new_size))
       DEALLOCATE(arr)
       CALL move_alloc(temp, arr)
     END IF
   END SUBROUTINE change_array_size_dp012
 
   SUBROUTINE change_array_size_dp_ptr12(arr, sizedifference)
     implicit none
     REAL(kind=db), POINTER, INTENT(INOUT):: arr(:,:)
     INTEGER, INTENT(IN):: sizedifference
-    REAL(kind=db),  CONTIGUOUS, POINTER:: temp(:,:)
-    INTEGER:: current_size, new_size
+    REAL(kind=db), POINTER:: temp(:,:)
+    INTEGER:: current_size, new_size, i
     if(associated(arr)) THEN
       current_size=size(arr,2)
       new_size=current_size+sizedifference
       ALLOCATE(temp(size(arr,1),new_size))
-      temp(:,1:min(current_size,new_size))=arr(:,1:min(current_size,new_size))
+      !WRITE(*,*)"increase pointer size: ", current_size, new_size, "temp: ", size(temp,1),size(temp,2), "arr: ", size(arr,1),size(arr,2)
+      Do i=1,min(current_size,new_size)
+      temp(:,i)=arr(:,i)
+      end do
+      !WRITE(*,*)"copy done"
       DEALLOCATE(arr)
       arr=> temp
     END IF
   END SUBROUTINE change_array_size_dp_ptr12
 
   SUBROUTINE change_array_size_dp_ptr(arr, sizedifference)
     implicit none
     REAL(kind=db), POINTER, INTENT(INOUT):: arr(:)
     INTEGER, INTENT(IN):: sizedifference
     REAL(kind=db),  CONTIGUOUS, POINTER:: temp(:)
-    INTEGER:: current_size, new_size
+    INTEGER:: current_size, new_size,i
     if(associated(arr)) THEN
       current_size=size(arr)
       new_size=current_size+sizedifference
       ALLOCATE(temp(new_size))
-      temp(1:min(current_size,new_size))=arr(1:min(current_size,new_size))
+      Do i=1,min(current_size,new_size)
+        temp(i)=arr(i)
+      end do
+      !temp(1:min(current_size,new_size))=arr(1:min(current_size,new_size))
       DEALLOCATE(arr)
       arr=> temp
     END IF
   END SUBROUTINE change_array_size_dp_ptr
 
   SUBROUTINE change_array_size_int(arr, sizedifference)
     implicit none
     INTEGER, ALLOCATABLE, INTENT(INOUT):: arr(:)
     INTEGER, INTENT(IN):: sizedifference
     INTEGER, ALLOCATABLE:: temp(:)
     INTEGER:: current_size, new_size
 
     if(allocated(arr)) THEN
       current_size=size(arr)
       new_size=current_size+sizedifference
       ALLOCATE(temp(new_size))
       temp(1:min(current_size,new_size))=arr(1:min(current_size,new_size))
       DEALLOCATE(arr)
       CALL move_alloc(temp,arr)
     END IF
   END SUBROUTINE change_array_size_int
 
   !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Move particle with index sourceindex to particle with index destindex.
 !> !WARNING! This will overwrite particle at destindex.
 !
 !> @param [in] sourceindex index in parts of the particle to move.
 !> @param [in] destindex index in parts of the moved particle destination.
 !---------------------------------------------------------------------------
   SUBROUTINE move_part(p, sourceindex, destindex)
     !! This will destroy particle at destindex
     INTEGER, INTENT(IN) :: destindex, sourceindex
     TYPE(particles), INTENT(INOUT)::p
   
     IF(sourceindex .eq. destindex) RETURN
     IF(sourceindex .le. 0 .or. destindex .le. 0) RETURN
     ! Move part at sourceindex in part at destindex
    Call copy_part(p,sourceindex,destindex,p)
   
   END SUBROUTINE move_part
 
   !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Copy particle with index sourceindex in particles sourcep to particle with index destindex in particles destp.
 !> !WARNING! This will overwrite particle at destp(destindex).
 !
 !> @param [inout] sourcep Structure of source particles.
 !> @param [in] sourceindex index in parts of the particle to move.
 !> @param [in] destindex index in parts of the moved particle destination.
 !> @param [inout] destp Structure of source particles.
 !---------------------------------------------------------------------------
     SUBROUTINE copy_part(sourcep, sourceindex, destindex, destp)
       !! This will destroy particle at destindex
       INTEGER, INTENT(IN) :: destindex, sourceindex
       TYPE(particles), INTENT(IN)::sourcep
       TYPE(particles), INTENT(INOUT)::destp
     
       IF(sourceindex .le. 0 .or. destindex .le. 0) RETURN
       IF( destindex .gt. size(destp%pos,2)) RETURN
       ! Move part at sourceindex in part at destindex
       destp%partindex(destindex)    = sourcep%partindex(sourceindex)
       destp%Gamma(destindex)        = sourcep%Gamma(sourceindex)
       destp%Gammaold(destindex)     = sourcep%Gammaold(sourceindex)
       destp%pos(:,destindex)        = sourcep%pos(:,sourceindex)
       !destp%Z(destindex)            = sourcep%Z(sourceindex)
       !destp%THET(destindex)         = sourcep%THET(sourceindex)
       destp%U(:,destindex)          = sourcep%U(:,sourceindex)
       !destp%UTHET(destindex)        = sourcep%UTHET(sourceindex)
       !destp%UZ(destindex)           = sourcep%UZ(sourceindex)
       destp%Uold(:,destindex)       = sourcep%Uold(:,sourceindex)
       !destp%UTHETold(destindex)     = sourcep%UTHETold(sourceindex)
       !destp%UZold(destindex)        = sourcep%UZold(sourceindex)
       destp%Rindex(destindex)       = sourcep%Rindex(sourceindex)
       destp%Zindex(destindex)       = sourcep%Zindex(sourceindex)
       destp%geomweight(:,destindex) = sourcep%geomweight(:,sourceindex)
       destp%pot(destindex)          = sourcep%pot(sourceindex)
       destp%potxt(destindex)        = sourcep%potxt(sourceindex)
     
     END SUBROUTINE copy_part
   !________________________________________________________________________________
 
     SUBROUTINE destroy_parts(p)
       TYPE(particles)       :: p
       p%Nploc=0
       IF(ALLOCATED(p%pos)) DEALLOCATE(p%pos)
       !IF(ALLOCATED(p%R)) DEALLOCATE(p%R)
       !IF(ALLOCATED(p%THET)) DEALLOCATE(p%THET)
       IF(ALLOCATED(p%B)) DEALLOCATE(p%B)
       IF(ALLOCATED(p%E)) DEALLOCATE(p%E)
       IF(ASSOCIATED(p%U)) DEALLOCATE(p%U)
       IF(Associated(p%Uold)) DEALLOCATE(p%Uold)
       !IF(Associated(p%UZ)) DEALLOCATE(p%UZ)
       !IF(Associated(p%UZold)) DEALLOCATE(p%UZold)
       !IF(Associated(p%UTHET)) DEALLOCATE(p%UTHET)
       !IF(Associated(p%UTHETold)) DEALLOCATE(p%UTHETold)
       IF(Associated(p%Gamma)) DEALLOCATE(p%Gamma)
       IF(Associated(p%Gammaold)) DEALLOCATE(p%Gammaold)
       IF(ALLOCATED(p%Rindex)) DEALLOCATE(p%Rindex)
       IF(ALLOCATED(p%Zindex)) DEALLOCATE(p%Zindex)
+      IF(ALLOCATED(p%losthole)) DEALLOCATE(p%losthole)
+      IF(ALLOCATED(p%sendhole)) DEALLOCATE(p%sendhole)
       IF(ALLOCATED(p%partindex)) DEALLOCATE(p%partindex)
       if(allocated(p%geomweight)) Deallocate(p%geomweight)
       if(allocated(p%moments)) Deallocate(p%moments)
     END SUBROUTINE
   !________________________________________________________________________________
     SUBROUTINE clean_beam(partslist)
   !
       INTEGER:: i
       type(particles):: partslist(:)
 
       Do i=1,size(partslist,1)
         CALL destroy_parts(partslist(i))
       END DO
   !
     END SUBROUTINE clean_beam
   !________________________________________________________________________________
     SUBROUTINE swappointer( pointer1, pointer2)
       REAL(kind=db), DIMENSION(:), POINTER, INTENT(inout):: pointer1, pointer2
       REAL(kind=db), DIMENSION(:), POINTER:: temppointer
       temppointer=>pointer1
       pointer1=>pointer2
       pointer2=>temppointer
     END SUBROUTINE swappointer
 
     SUBROUTINE swappointer2( pointer1, pointer2)
       REAL(kind=db), DIMENSION(:,:), POINTER, INTENT(inout):: pointer1, pointer2
       REAL(kind=db), DIMENSION(:,:), POINTER:: temppointer
       temppointer=>pointer1
       pointer1=>pointer2
       pointer2=>temppointer
     END SUBROUTINE swappointer2
 
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief Deallocate recursively a linked_paticle linked list 
 !
 !> @param [in] l_p linked_part particle to be dallocated.
 !---------------------------------------------------------------------------
   RECURSIVE SUBROUTINE destroy_linked_parts(l_p)
     TYPE(linked_part), POINTER :: l_p
 
     IF(associated(l_p%next)) call destroy_linked_parts(l_p%next)
     deallocate(l_p)
   END subroutine destroy_linked_parts
 
 END MODULE particletypes
diff --git a/src/psupply_mod.f90 b/src/psupply_mod.f90
index 798387e..7cf483d 100644
--- a/src/psupply_mod.f90
+++ b/src/psupply_mod.f90
@@ -1,325 +1,345 @@
 module psupply
 
     use constants
     implicit none
 
     type power_supply
         logical      :: active = .false.   ! is the power supply active
         real(kind=db):: geomcapacitor = 1  ! capacitance of the metalic vessel normalised to the neutral density in vessel used in the neutcol module
         real(kind=db):: PSresistor = 1     ! internal resistance of the power supply normalised to the actual neutral density in vessel
         real(kind=db):: targetbias = 0     ! Set voltage on the power supply
         real(kind=db):: bias = 0           ! current voltage on the power supply
         integer      :: nbbounds = 2       ! number of boundaries defined in the geometry
         integer      :: lststp = 0         ! previous step on which the bias was updated
         real(kind=db):: current(3) = 0     ! current collected on the boundaries normalised to the simulated collision neutral density set in neutcol module 
                                          ! 1 is at time i-2nbhdt, 2 is at time i-nbhdt and 3 is at time i 
         integer, allocatable:: bdpos(:)  ! sign of each boundary for collected charge to determine direction of current
         real(kind=db),allocatable:: charge(:) !  Charge collected on each boundary and per nbdt
         real(kind=db),allocatable:: biases(:) !  Actual potentials at each boundary
         integer :: nbhdt = 10                 ! half of the number of time steps between each calls to RK4
         real(kind=db):: expdens               ! [m-3] experimental neutral density
         real(kind=db):: neutcoldens           ! [m-3] neutral density used in neutcol module
         real(kind=db):: frequency             ! [Hz]  frequency of an imposed oscillation in bias
         real(kind=db):: deltabias             ! [V] Amplitude of the oscillations around targetbias
     end type
 
     type(power_supply):: the_ps
 
     contains
         ! read the input parameters from the input file and setup the necesary variables for the module to work
         subroutine psupply_init(fileid,cstep,nbbounds,neutcoldens,rstbias)
             use splinebound
             use basic, only: phinorm, tnorm, mpirank, qnorm, potinn, potout, dt
             use constants
             use mpi
             use geometry
             use weighttypes
             use fields
             integer:: fileid, cstep, nbbounds, istat, nbhdt, ierr,i
             real(kind=db),OPTIONAL, INTENT(IN):: rstbias  
             real(kind=db):: neutcoldens       ! [m-3]
             real(kind=db):: expneutdens = 1   ! [m-3]
             real(kind=db):: PsResistor = 1    ! [Ohm]
             real(kind=db):: geomcapacitor = 1 ! [F]
             real(kind=db):: targetbias = 0    ! [V]
             real(kind=db):: frequency = 0     ! [Hz]
             real(kind=db):: deltabias = 0    ! [V]
             integer, allocatable:: bdpos(:)
             character(len=1000) :: line
             logical :: active = .false.
 
             NAMELIST /psupplyparams/ expneutdens, PsResistor, geomcapacitor, targetbias, nbhdt, active, bdpos, frequency, deltabias
 
             
             the_ps%lststp=cstep
             the_ps%nbbounds=nbbounds
 
             allocate(the_ps%bdpos(nbbounds),bdpos(nbbounds))
             allocate(the_ps%charge(nbbounds),the_ps%biases(nbbounds))
             the_ps%bdpos=0
             bdpos=0
             the_ps%charge=0
             the_ps%biases=0
             the_ps%current=0
 
             ! read the input parameters from file
             Rewind(fileid)
             READ(fileid, psupplyparams, iostat=istat)
 
             if (istat.gt.0) then
                 backspace(fileid)
                 read(fileid,fmt='(A)') line
                 write(*,'(A)') &
                     'Invalid line in pssupplyparams: '//trim(line)
                 call MPI_Abort(MPI_COMM_WORLD, -1, ierr)
                 stop
             end if
 
             ! save the parameters on output
             IF(mpirank .eq. 0) THEN
                 WRITE(*, psupplyparams)
             END IF
 
             ! rescale the targetbias set on the power supply
             the_ps%targetbias=abs(targetbias)/phinorm
 
             the_ps%bdpos=bdpos
             the_ps%frequency=frequency*tnorm*2*pi
             the_ps%deltabias=deltabias/phinorm
             
             ! save the experimental neutral density
             the_ps%expdens=expneutdens
             ! save the neutral collision density
             the_ps%neutcoldens=neutcoldens
 
             if(present(rstbias))then
             ! Initialize the current bias from the restart value
                 the_ps%bias=rstbias
             else
                 ! initialize with the file input parameters
                 if (the_domain%nbsplines.gt.0) then
                     do i=1,the_ps%nbbounds
                         if(the_ps%bdpos(i) .lt. 0)then
                             the_ps%bias=-the_domain%boundaries(i)%Dirichlet_val
                             exit
                         end if
                     end do
                 else
                     the_ps%bias=(potout-potinn)
                 end if
             end if
 
             ! set the initial bias
             where(the_ps%bdpos .lt. 0) 
                 the_ps%biases=-the_ps%bias
             end where
 
             ! Normalise resistor and capacitor to adapt to experimental pressure
             the_ps%PSresistor    = PSresistor*the_ps%expdens/the_ps%neutcoldens*qnorm/(tnorm*phinorm)
             the_ps%geomcapacitor = geomcapacitor*phinorm/qnorm
 
             the_ps%nbhdt         = nbhdt
             the_ps%active        = active
             
             if( .not. the_ps%active) return
 
             ! Initialize the biases
             if (the_domain%nbsplines.gt.0) then
                 do i=1,the_ps%nbbounds
                     the_domain%boundaries(i)%Dirichlet_val=the_ps%biases(i)+the_ps%deltabias*sin(the_ps%frequency*cstep*dt)
                 end do
             else
                 potinn=the_ps%biases(1)+the_ps%deltabias*sin(the_ps%frequency*cstep*dt)
                 Potout=0
                 Phidown=Potinn
                 Phiup=Potout
             end if
             
             ! recalculate gtilde to adapt for the new biases
             CALL total_gtilde(vec1, vec2, gtilde, gridwdir)
+            !$OMP PARALLEL
             call comp_gradgtilde
+
             ! Recompute the vacuum field
             call vacuum_field
+            !$OMP END PARALLEL
 
         end subroutine
 
         ! save to the result file the parameters of this module read from the input
         Subroutine psupply_diag(File_handle, str)
             use mpi
             Use futils
             use basic, only: tnorm, phinorm, qnorm
             implicit none
             Integer:: File_handle
             Character(len=*):: str
             CHARACTER(len=256):: grpname
             Integer:: ierr, mpirank
     
             CALL MPI_COMM_RANK(MPI_COMM_WORLD, mpirank, ierr)
     
             IF(mpirank .eq. 0 .and. the_ps%active) THEN
     
                 Write(grpname,'(a,a)') trim(str),"/psupply"
                 If(.not. isgroup(File_handle, trim(grpname))) THEN
                     CALL creatg(File_handle, trim(grpname))
                 END IF
                 Call attach(File_handle, trim(grpname), "expdens", the_ps%expdens)
                 Call attach(File_handle, trim(grpname), "targetbias", the_ps%targetbias*phinorm)
                 Call attach(File_handle, trim(grpname), "PSresistor", the_ps%PSresistor/the_ps%expdens*the_ps%neutcoldens/qnorm*(tnorm*phinorm))
                 Call attach(File_handle, trim(grpname), "geomcapacitor", the_ps%geomcapacitor/phinorm*qnorm)
                 Call attach(File_handle, trim(grpname), "nbhdt", the_ps%nbhdt)
                 Call putarr(File_handle,trim(grpname)//"/bdpos", the_ps%bdpos)
             END IF
         End subroutine psupply_diag
 
         ! gneral routine called from stepon to update the psupply bias
         subroutine psupply_step(ps,p,cstep)
             use particletypes
             use geometry
             use weighttypes
             use fields
             use basic, only: Potinn, potout
             type(power_supply):: ps
             type(particles):: p(:)
             integer:: cstep, i
             if (.not. ps%active ) return
 
+            !$OMP SINGLE
             ! calculate the charge collected on each boundary due to the contribution of each specie
             call add_charge(ps,p)
 
             ! calculate the current flowing between the electrodes due to the cloud
             call calc_current(ps,cstep)
 
             ! calculate the bias at the new time step
             call updt_bias(ps,cstep)
+            !$OMP END SINGLE NOWAIT 
+
 
             if(mod(cstep-ps%lststp,2*ps%nbhdt) .ne. 0) return
-            
+
+            !$OMP BARRIER
+
+            !$OMP SINGLE
             ! update the bias on the geometry for the Dirichlet b.c.
             if (the_domain%nbsplines.gt.0) then
                 do i=1,ps%nbbounds
                     the_domain%boundaries(i)%Dirichlet_val=ps%biases(i)
                 end do
             else
                 potinn=ps%biases(1)
                 Potout=0
                 Phidown=Potinn
                 Phiup=Potout
             end if
-            ! recalculate gtilde to adapt for the new biases
-            CALL total_gtilde(vec1, vec2, gtilde, gridwdir)
+            !$OMP END SINGLE
+
+            
+            !$OMP DO
+            Do i=0,nr
+                ! recalculate gtilde to adapt for the new biases
+                CALL total_gtilde(vec1(i*(nz+1)+1:(i+1)*(nz+1)), vec2(i*(nz+1)+1:(i+1)*(nz+1)), gtilde(:,i*(nz+1)+1:(i+1)*(nz+1)), gridwdir(:,i*(nz+1)+1:(i+1)*(nz+1)))
+            end do
+            !$OMP END DO
+
             call comp_gradgtilde
+            !$OMP BARRIER
+
+            call vacuum_field
         end subroutine
 
         
         ! calculates the current flowing between the electrodes due to the cloud
         subroutine calc_current(ps,cstep)
             use geometry
             use basic, only: phinorm, dt
             use fields
             type(power_supply):: ps
             integer:: cstep
 
             if(mod(cstep-ps%lststp,ps%nbhdt).eq.0) then
                 ! communicate the charge accumulation in this timestep
                 call reduce_charge(ps)
                 if(mod(cstep-ps%lststp,ps%nbhdt*2).eq.0)then
 
                     ! calculates the current by adding the contribution of each boundary
                     if (mpirank .eq. 0)then
                         ps%current(3)=sum(-ps%charge*ps%bdpos)/(ps%nbhdt*dt)
                     end if
                     ps%lststp=cstep
                 else
                     ! calculates the current by adding the contribution of each boundary
                     if (mpirank .eq. 0)then
                         ps%current(2)=sum(-ps%charge*ps%bdpos)/(ps%nbhdt*dt)
                     end if
                 end if
                 ps%charge=0
             end if
             
 
         end subroutine
 
         ! calculate the charge deposited by each specie on the electrodes (used to calculate the resulting current)
         subroutine add_charge(ps,p)
             use particletypes
             use basic, only: qnorm
             type(power_supply):: ps
             type(particles):: p(:)
             integer:: i
 
             do i=1,size(p,1)
                 if(.not. p(i)%is_field) cycle
                 !Add the normalised contribution of each specie
                 ps%charge=ps%charge+p(i)%nblost(5:)*p(i)%weight*p(i)%q/qnorm
             end do
         end subroutine
 
         ! Time integrate the ODE of the actual bias between the accelerating electrodes
         ! and broadcast it to all the workers
         subroutine updt_bias(ps,cstep)
             use basic, only: dt
             implicit none
             type(power_supply):: ps
             integer:: cstep
             real(kind=db):: bias,k1,k2,k3,k4, hdeltat
 
             if(mod(cstep-ps%lststp,2*ps%nbhdt) .ne. 0) return
 
             ! half delta t
             if (ps%PSresistor.gt.0)then
             hdeltat=dt*ps%nbhdt/(ps%PSresistor*ps%geomcapacitor)
             bias=ps%bias
 
             ! we update the bias using RK4
             k1=-(bias+ps%current(1)*ps%PSresistor-ps%targetbias)
             k2=-(bias+hdeltat*k1+ps%current(2)*ps%PSresistor-ps%targetbias)
             k3=-(bias+hdeltat*k2+ps%current(2)*ps%PSresistor-ps%targetbias)
             k4=-(bias+2*hdeltat*k3+ps%current(3)*ps%PSresistor-ps%targetbias)
 
             ps%bias=bias+(k1+2*k2+2*k3+k4)*2*hdeltat/6
             end if
 
             !Write(*,*) " new bias ", ps%bias*phinorm
             where (ps%bdpos .lt. 0)
                 ps%biases=-ps%bias+ps%deltabias*sin(ps%frequency*cstep*dt)
             end where
             where (ps%bdpos .eq. 2)
                 ps%biases=ps%deltabias*sin(ps%frequency*cstep*dt)
             end where
             ! broadcast the bias to all the mpi processes
             call bcast_bias(ps)
 
             ps%current(1)=ps%current(3)
 
         end subroutine updt_bias
 
 
         ! gather on node 0 the collected charge on each metallic boundary
         subroutine reduce_charge(ps)
             use mpi
             use mpihelper
             use basic, ONLY: mpirank
             type(power_supply):: ps
             integer:: ierr
 
             if(mpirank .eq. 0) then
                 call MPI_REDUCE(MPI_IN_PLACE,ps%charge,ps%nbbounds,db_type,db_sum_op,0,MPI_COMM_WORLD,ierr)
                 !Write(*,*) "curr charge ", ps%charge
             else
                 call MPI_REDUCE(ps%charge,ps%charge,ps%nbbounds,db_type,db_sum_op,0,MPI_COMM_WORLD,ierr)
             end if
         end subroutine
 
         ! broadcast to all the nodes the new bias imposed by the power supply on the electrodes
         subroutine bcast_bias(ps)
             use mpi
             use mpihelper
             type(power_supply):: ps
             integer:: ierr
 
             call MPI_BCAST(ps%biases,ps%nbbounds,db_type,0,MPI_COMM_WORLD,ierr)
         end subroutine
 
 end module psupply
diff --git a/src/splinebound_mod.f90 b/src/splinebound_mod.f90
index 1bc559c..c375154 100644
--- a/src/splinebound_mod.f90
+++ b/src/splinebound_mod.f90
@@ -1,937 +1,938 @@
 MODULE splinebound
     USE constants
     USE bsplines
     USE forSISL, only: newCurve, freeCurve, freeIntCurve, writeSISLcurve, writeSISLpoints
     Use forSISLdata
 
     IMPLICIT NONE
 
     INTEGER, PARAMETER :: bd=-1, bd_Dirichletconst=0, bd_Dirichletvar=1, bd_Neumann=2
 
     type cellkind
         integer:: spldirkind=0 !< -1 outside (return -1) no dist to calculate; 0 boundary calculate dist with linked boundaries; 1 inside (return 1) no dist to calculate
         integer:: spltotkind=0 !< -1 outside (return -1) no dist to calculate; 0 boundary calculate dist with linked boundaries; 1 inside (return 1) no dist to calculate
         integer:: linkedboundaries(2)=0 !< stores the spline curve indices in the spline_domain of the spline boundaries that are the closest and at a distance lower than dist_extent (1) 
         !< (1) is for dirichlet boundaries
         !< (2) is for domain boundaries
         integer:: leftknot(4)=0 !< knots pointer for s1424 in wtot then wdir
         real(kind=db):: lguess(2)=-1 !< Spline parameter left limit as start guess
         real(kind=db):: rguess(2)=-1 !< Spline parameter right limit as start guess
     end type cellkind
 
     TYPE spline_boundary
     ! all curves assume right handedness to set which side of the curve is  inside or outside
         type(SISLCurve):: curve
         Real(kind=db):: Dirichlet_val !< Value for the dirichlet boundary condition created by this boundary
         Real(kind=db):: epsge=1.0e-5  !< geometric resolution used for calculating distances
         Real(kind=db):: epsce=1.0e-9  !< value of weight below which it is 0
         INTEGER(kind(bd)):: type=bd_Dirichletconst !< type of boundary conditions
     END TYPE spline_boundary
 
     type spline_domain
         integer:: nbsplines = 0 !< number of spline boundaries in the domain
         type(spline_boundary), allocatable:: boundaries(:) !< List of boundaries in the domain
         Real(kind=db):: dist_extent=0.1 !< distance used for the merging with the plateau function for the weight
         type(cellkind), ALLOCATABLE:: cellk(:,:)  !< Precomputed parameters at each cell for faster weight computation
         type(spline2d), pointer:: splrz => null() !< Pointer to the main spline grid used for the FEM solver
         Integer:: nb1   !< Number of grid points in the 1st dimension
         Integer:: nb2   !< Number of grid points in the 2nd dimension
         real(kind=db), ALLOCATABLE:: x1(:) !< Grid points in first direction for weight interpolation
         real(kind=db), ALLOCATABLE:: x2(:) !< Grid points in 2nd direction for weight interpolation
         real(kind=db), ALLOCATABLE:: dx1(:) !< inverse cell width in first direction for weight interpolation
         real(kind=db), ALLOCATABLE:: dx2(:) !< inverse cell width in 2nd direction for weight interpolation
         !type(SISLsurf):: Dirdomweight       !< structure storing precalculated geometric weight for faster evaluation
         !type(SISLsurf):: totdomweight       !< structure storing precalculated total weight for faster evaluation
         type(spline2d):: Dirdomweightspl       !< structure storing precalculated geometric weight for faster evaluation
         type(spline2d):: totdomweightspl       !< structure storing precalculated total weight for faster evaluation
     end type spline_domain
 
 CONTAINS
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Reads a spline domain from the namelist or from a h5 file. Needs to be called for the initialization of the module
 !> @param[in] Fileid       Text file id of the input file containing namelists 
 !> @param[out] spldom      spline domain
 !> @param[in] splrz        bspline structure used by the FEM comming form bspline library
 !> @param[in] rnorm        distance normalization constant
 !> @param[in] phinorm      electric potential normalization constant
 !---------------------------------------------------------------------------
     subroutine read_splinebound(Fileid, spldom, splrz, rnorm, Phinorm)
         use mpi
         Integer:: Fileid
         type(spline_domain):: spldom
         type(spline2d):: splrz
         real(kind=db):: rnorm, phinorm
         Integer:: nbsplines, istat, mpirank, ierr
         real(kind=db):: dist_extent
         Character(len=128):: h5fname="", line
         real(kind=db) :: Dvals(30)=0
         integer:: i
 
         namelist /spldomain/ nbsplines, dist_extent, h5fname, Dvals
 
         CALL MPI_COMM_RANK(MPI_COMM_WORLD, mpirank, ierr)
 
         REWIND(fileid)
         READ(fileid,spldomain, iostat=istat)
 
         if (istat.gt.0) then
             if(mpirank .eq. 0) then
             backspace(fileid)
             read(fileid,fmt='(A)') line
             write(*,'(A)') &
                'Invalid line in geomparams: '//trim(line)
             end if
             call MPI_Abort(MPI_COMM_WORLD, -1, ierr)
             stop
         end if
 
         if(mpirank .eq. 0) WRITE(*, spldomain)
 
         Dvals=Dvals/phinorm
         dist_extent=dist_extent/rnorm
 
         if (.not. trim(h5fname)=='' ) then
             call setspline_domain(spldom, splrz, dist_extent, 0)
             call splinebound_readh5domain(h5fname,spldom, rnorm, phinorm)
             call classifycells(spldom)
             do i=1,spldom%nbsplines
                 spldom%boundaries(i)%Dirichlet_val=Dvals(i)
             end do
             return
         else 
             WRITE(*,*) "Error the filename h5fname is not defined. No boundary has been set!"
             call mpi_Abort(MPI_COMM_WORLD, -1, ierr)
         end if
     end subroutine
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Saves the spline boundaries to the result file
 !> @param[in] File_handle  futils h5 file id 
 !> @param[in] curr_grp     groupname under which the boundaries must be saved
 !> @param[in] spldom       spline domain
 !---------------------------------------------------------------------------
     Subroutine splinebound_diag(File_handle, curr_grp, spldom)
         use mpi
         Use futils
         Use basic, ONLY: rnorm, phinorm
         Integer:: File_handle
         type(spline_domain):: spldom
         Character(len=*):: curr_grp
         CHARACTER(len=128):: grpname
         Integer:: ierr, mpirank, i
 
         CALL MPI_COMM_RANK(MPI_COMM_WORLD, mpirank, ierr)
 
         IF(mpirank .eq. 0) THEN
 
             Write(grpname,'(a,a)') trim(curr_grp),"/geometry_spl"
             If(.not. isgroup(File_handle, trim(grpname))) THEN
                 CALL creatg(File_handle, trim(grpname))
             END IF
             Call attach(File_handle, trim(grpname), "dist_extent",spldom%dist_extent)
             Call attach(File_handle, trim(grpname), "nbsplines", spldom%nbsplines)
             do i=1,spldom%nbsplines
                 Write(grpname,'(a,a,i2.2)') trim(curr_grp),"/geometry_spl/",i
                 If(.not. isgroup(File_handle, trim(grpname))) THEN
                     CALL creatg(File_handle, trim(grpname))
                 END IF
                 Call attach(File_handle, trim(grpname), "Dirichlet_val", spldom%boundaries(i)%Dirichlet_val*phinorm)
                 Call attach(File_handle, trim(grpname), "order", spldom%boundaries(i)%curve%ik)
                 Call attach(File_handle, trim(grpname), "kind", spldom%boundaries(i)%curve%ikind)
                 Call attach(File_handle, trim(grpname), "type", spldom%boundaries(i)%type)
                 Call attach(File_handle, trim(grpname), "dim", spldom%boundaries(i)%curve%idim)
                 CALL putarr(File_handle, TRIM(grpname)//"/pos", spldom%boundaries(i)%curve%ecoef*rnorm)
                 CALL putarr(File_handle, TRIM(grpname)//"/knots", spldom%boundaries(i)%curve%et)
             end do
 
         END IF
 
     End subroutine splinebound_diag
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Read a spline boundary domain from an h5 file structure
 !> @param[out] spldom      new spline domain
 !> @param[in] filename     filename of the h5 file
 !> @param[in] rnorm        distance normalization constant
 !> @param[in] phinorm      electric potential normalization constant
 !---------------------------------------------------------------------------
 
     subroutine splinebound_readh5domain(filename,  spldom, rnorm, phinorm)
         use futils
         use forSISL
     implicit none
     Character(len=*),intent(in) :: filename
     type(spline_domain),intent(inout) ::  spldom
     integer:: h5id, i
     real(kind=db):: rnorm, phinorm
     CHARACTER(len=128):: grpname
     
     integer:: periodic
     integer:: order, dim, bdtype
     INTEGER:: posrank, posdim(2), err
     real(kind=db):: Dval, epsge, epsce
     real(kind=db),allocatable:: points(:,:)
 
     call openf(filename, h5id,'r','d')
 
     call getatt(h5id, '/geometry_spl/','nbsplines', spldom%nbsplines)
     
     ! prepare memory
     if (allocated(spldom%boundaries)) then
         do i=1,size(spldom%boundaries,1)
             call free_bsplinecurve(spldom%boundaries(i))
         end do
         DEALLOCATE(spldom%boundaries)
     end if
     allocate(spldom%boundaries(spldom%nbsplines))
 
     ! Read each boundary curve individually
     do i=1,spldom%nbsplines
         Write(grpname,'(a,i2.2)') "/geometry_spl/",i
         If(.not. isgroup(h5id, trim(grpname))) THEN
             Write(*,*) "Error the geometry definition file is invalid"
         END IF
 
         periodic=0
 
         Call getatt(h5id, trim(grpname), "Dirichlet_val", Dval)
         Call getatt(h5id, trim(grpname), "epsge", epsge)
         Call getatt(h5id, trim(grpname), "epsce", epsce)
         Call getatt(h5id, trim(grpname), "order", order)
         Call getatt(h5id, trim(grpname), "dim", dim)
         err=0
         Call getatt(h5id, trim(grpname), "periodic", periodic,err)
         if(err .lt.0) periodic=0
 
 
         CALL getdims(h5id, TRIM(grpname)//"/pos", posrank, posdim)
         allocate(points(posdim(1),posdim(2)))
         CALL getarr(h5id, TRIM(grpname)//"/pos", points)
 
         points=points/rnorm
 
         Call setspline_boundary(spldom%boundaries(i),transpose(points), order-1, Dval/phinorm, epsge,epsce, periodic)   
         bdtype=bd
         err=0
         Call getatt(h5id, trim(grpname), "type", bdtype,err)
         if(err.ge.0) spldom%boundaries(i)%type=bdtype
         deallocate(points)
     end do
 
     call closef(h5id)
     
     end subroutine splinebound_readh5domain
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> initialize a spline domain and allocate the necessary memory
 !> @param[out] spldom       new spline domain
 !> @param[in] splrz        bspline structure used by the FEM comming form bspline library
 !> @param[in] dist_extent  normalized characteristic fall lenght of the weight
 !> @param[in] nb_splines   number of boundary splines to allocate
 !---------------------------------------------------------------------------
 
     subroutine setspline_domain(spldom,splrz,dist_extent, nb_splines)
         type(spline_domain):: spldom
         type(spline2d), TARGET:: splrz
         real(kind=db):: dist_extent
         integer:: nb_splines, nb1, nb2
         
         ! Store the grid parameters to speed-up calculations
         nb1=splrz%sp1%nints
         nb2=splrz%sp2%nints
         spldom%nb1=nb1
         spldom%nb2=nb2
 
         spldom%splrz=>splrz
         allocate(spldom%cellk(0:nb1-1,0:nb2-1))
         allocate(spldom%x1(0:nb1))
         allocate(spldom%x2(0:nb2))
         allocate(spldom%dx1(0:nb1-1))
         allocate(spldom%dx2(0:nb2-1))
 
         spldom%x1(0:)=splrz%sp1%knots(0:nb1)
         spldom%x2(0:)=splrz%sp2%knots(0:nb2)
 
         spldom%dx1(0:)=1/(spldom%x1(1:nb1)-spldom%x1(0:nb1-1))
         spldom%dx2(0:)=1/(spldom%x2(1:nb2)-spldom%x2(0:nb2-1))
 
         !Prepare structures to host singular spline boundaries
         spldom%nbsplines=nb_splines
         if(spldom%nbsplines.gt. 0) allocate(spldom%boundaries(nb_splines))
 
         spldom%dist_extent=dist_extent
 
     end subroutine setspline_domain
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief initialize a spline boundary and allocate the necessary memory
 !> @param[out] b_curve     new spline boundary
 !> @param[in] cpoints      control points at the node positions
 !> @param[in] degree       degree of the spline polynomia  defining the boundary curve
 !> @param[in] D_val        Normalized value of the Dirichlet boundary condition for this curve
 !> @param[in] epsge        geometric precision used by SISL
 !> @param[in] epsce        arithmetic precision used by SISL
 !> @param[in] periodic     set if the spline curve is periodic
 !---------------------------------------------------------------------------
     subroutine setspline_boundary(b_curve, cpoints, degree, D_val, epsge, epsce, periodic)
         Use bsplines
         use forSISL,ONLY: newcurve, s1630
         use mpi
         type(spline_boundary):: b_curve
         Real(kind=db):: cpoints(:,:)
         Real(REAL64),ALLOCATABLE:: points(:)
         Real(REAL64):: astpar
         integer:: degree
         integer, optional:: periodic
         Integer:: order, ierr
         Real(kind=db):: D_val 
         Real(kind=db),OPTIONAL :: epsge, epsce
         Integer:: nbpoints, dim, jstat, bsptype
         integer:: period
 
         period=0
         if(present(periodic))period=periodic
         nbpoints= size(cpoints,2)
         dim=size(cpoints,1)
         order=degree+1
         if(nbpoints .lt. order) then
             WRITE(*,'(a,i3,a,i5)') "Error: the number of points", nbpoints, " is insuficient for the required order ", order
             CALL mpi_finalize(ierr)
             call EXIT(-1)
         end if
 
         allocate(points(dim*nbpoints))
         points=reshape(cpoints,(/dim*nbpoints/))
         bsptype=1 ! open boundaries b-spline 
         if(period.gt.0) bsptype=-1 ! closed periodic curve
         astpar=0.0 ! starting parameter for the knots vector
         ! initialize a new curve using SISL
         CALL s1630(points, nbpoints, astpar, bsptype, dim, order, b_curve%curve, jstat)
         if (jstat > 0 ) WRITE(*,*) "Warning ", jstat," in curve initialisation s1630 for splineweight"
         if (jstat < 0 ) WRITE(*,*) "Error ",   jstat," in curve initialisation s1630 for splineweight"
         b_curve%Dirichlet_val=D_val
         if(present(epsge)) b_curve%epsge=epsge
         if(present(epsce)) b_curve%epsce=epsce
 
     end subroutine setspline_boundary
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Calculates the Dirichlet boundary weight from a given spline domain
 !> @param[in] spldom spline domain containing the information on the boundary conditions
 !> @param[in] x1(:)     array of axial positions where the weights are evaluated
 !> @param[in] x2(:)     array of radial positions where the weights are evaluated
 !> @param[out] w(:,0:) matrix of weights with first index corresponding to the position and second index to the derivative
 !---------------------------------------------------------------------------
 
     SUBROUTINE spline_w(spldom,x1,x2,w)
         use forSISL,ONLY: s1424
         use bsplines
         type(spline_domain):: spldom
         Real(kind=db), INTENT(IN):: x2(:),x1(:)
         Real(kind=db), INTENT(OUT):: w(0:,:)
         Integer,allocatable::i(:),j(:)
 
         allocate(i(size(x2,1)),j(size(x2,1)))
 
         call getindex(x1, x2, spldom, i, j)
 
         if (size(w,1).gt.1) then
             CALL speval(spldom%Dirdomweightspl, x1, x2, i, j, w(0,:), w(1,:), w(2,:))
         else
             CALL speval(spldom%Dirdomweightspl, x1, x2, i, j, w(0,:))
         end if
 
     End SUBROUTINE spline_w
 
     !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Calculates the total geometric weight from a given spline domain
 !> @param[in] spldom spline domain containing the information on the boundary conditions
 !> @param[in] x1(:)     array of axial positions where the weights are evaluated
 !> @param[in] x2(:)     array of radial positions where the weights are evaluated
 !> @param[out] w(:,0:) matrix of weights with first index corresponding to the position and second index to the derivative
 !---------------------------------------------------------------------------
 
     SUBROUTINE spline_wtot(spldom,x1,x2,w,idwall)
         use forSISL,ONLY: s1424
         use bsplines
         type(spline_domain):: spldom
         Real(kind=db), INTENT(IN):: x2(:),x1(:)
         Real(kind=db), INTENT(OUT):: w(0:,:)
         INTEGER, optional, INTENT(OUT):: idwall(:)
         Integer:: k
         Integer,allocatable::i(:),j(:)
 
         allocate(i(size(x2,1)),j(size(x2,1)))
         
         call getindex(x1, x2, spldom, i, j)
         if(present(idwall)) then
             Do k=1,size(x2,1)    
                  idwall(k)=spldom%cellk(i(k),j(k))%linkedboundaries(2)
             END DO
         end if
 
         if (size(w,1).gt.1) then
             CALL speval(spldom%totdomweightspl, x1, x2, i, j, w(0,:), w(1,:), w(2,:))
         else
             CALL speval(spldom%totdomweightspl, x1, x2, i, j, w(0,:))
         end if
         
     End SUBROUTINE spline_wtot
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Calculates the interpolation in the domain of the Dirichlet boundary conditions from a given spline domain
 !> @param[in] spldom spline domain containing the information on the boundary conditions
 !> @param[in] z(:)     array of axial positions where the weights are evaluated
 !> @param[in] r(:)     array of radial positions where the weights are evaluated
 !> @param[out] g(:,0:) matrix of boundary interpolations g with first index corresponding to the position and second index to the derivative
 !---------------------------------------------------------------------------
     SUBROUTINE spline_g(spldom,x1,x2,g,w)
         use forSISL,ONLY: s1424
         use bsplines
         type(spline_domain):: spldom
         Real(kind=db), INTENT(IN):: x2(:),x1(:)
         Real(kind=db), INTENT(OUT):: g(0:,:)
         Real(kind=db), INTENT(IN),OPTIONAL::w(0:,:)
         REAL(real64),allocatable:: gtmp(:,:)
         Integer:: k
         Integer,allocatable::i(:),j(:)
         !type(cellkind):: cellk
         
 
         allocate(gtmp(0:size(g,1)-1,size(x2,1)))
         allocate(i(size(x2,1)),j(size(x2,1)))
 
         call getindex(x1, x2, spldom, i, j)
 
         
 
         if(present(w)) then
             gtmp=w
         else
             CALL speval(spldom%Dirdomweightspl, x1, x2,i,j, gtmp(0,:), gtmp(1,:), gtmp(2,:))
         end if
 
             Do k=1,size(x2,1)
                 if(spldom%cellk(i(k),j(k))%spldirkind.eq.0)then
                     if(gtmp(0,k) .ge. 0) then
                         if(size(g,1) .gt. 1) then
                             g(1:2,k)=-gtmp(1:2,k)*spldom%boundaries(spldom%cellk(i(k),j(k))%linkedboundaries(1))%Dirichlet_val
                         end if
                         g(0,k)=(1-gtmp(0,k))*spldom%boundaries(spldom%cellk(i(k),j(k))%linkedboundaries(1))%Dirichlet_val
                     else
                         g(0,k)=spldom%boundaries(spldom%cellk(i(k),j(k))%linkedboundaries(1))%Dirichlet_val
                         if(size(g,1).gt. 1) then
                             g(1:2,k)=0
                         end if
                     end if
                 else
                     g(:,k)=0
                 end if
             end DO
 
     End SUBROUTINE spline_g
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Evaluates the geometric weight induced by the spline curve defined by b_curve at position (z,r)
 !> @param[in] b_curve     spline_boundary containing the spline curve parameters
 !> @param[in] z           axial position where the weight is evaluated
 !> @param[in] r           radial position where the weight is evaluated
 !> @param[out] weight(:)  weight index defines the order of derivation by r or z
 !> @param[in] h           distance from the spline at which the weight is 1
 !> @param[out] distance   unscaled distance between evaluation point and spline b_curve
 !> @param[inout] leftknot initial guess for the closest spline knot of the points (r,z)
 !---------------------------------------------------------------------------
 
     subroutine splineweight(b_curve, z, r, weight, h, distance, guess, lguess, rguess)
         Use forSISL, ONLY: s1227,s1221, s1774
         type(spline_boundary):: b_curve
         Real(kind=db)::r,z
         Real(kind=db):: weight(0:)
         Real(kind=db),OPTIONAL:: distance
         real(kind=db),OPTIONAL:: guess
         real(kind=db),OPTIONAL:: lguess
         real(kind=db),OPTIONAL:: rguess
         integer:: sstatus, der, left,siz
         real(kind=db):: h, d, tpos, proj, norm
         real(kind=real64):: curvepos(2*b_curve%curve%idim)
         real(kind=db):: leftpar, rightpar,guesspar
 
 
 
         weight=0
         der=1
         sstatus=-1
         guesspar=-1.0_db
         if(present(lguess) .and. present(rguess)) then
             leftpar=lguess
             rightpar=rguess
             guesspar=(lguess+rguess)/2
             call s1774(b_curve%curve,(/z,r/),b_curve%curve%idim,b_curve%epsge,leftpar,rightpar,guesspar,tpos,sstatus)
             if (sstatus < 0 ) WRITE(*,*) "Error ",sstatus," in distance calculation s1774 for splineweight at ", z, r
         else
             call dist(b_curve,(/z,r/),d,tpos)
         end if
         ! position and derivative wrt r,z
         call s1221(b_curve%curve,der,tpos,left,curvepos,sstatus)
         if (sstatus > 0 ) WRITE(*,*) "Warning ",sstatus," in distance calculation s1227 for splineweight at ", z, r
         if (sstatus < 0 ) WRITE(*,*) "Error ",sstatus," in distance calculation s1227 for splineweight at ", z, r
         d=sqrt((curvepos(1)-z)**2+(curvepos(2)-r)**2)
 
         weight(0)=1-max((h-d)/h,0.0_db)**3
         norm=sqrt(curvepos(3)**2+curvepos(4)**2)
         if(norm.gt.0) curvepos(3:4)=curvepos(3:4)/norm
         ! if the projection of the distance vector on the normal is negative, the weight is negative
         proj=(-(z-curvepos(1))*curvepos(4)+(r-curvepos(2))*curvepos(3))
         if (proj .lt. 0 .or. abs(abs(proj) -sqrt((z-curvepos(1))**2+(r-curvepos(2))**2)).gt.1e-10) weight(0)=-weight(0)
         !if (proj .lt. 0 ) weight(0)=-weight(0)
         siz=size(weight,1)
         if (size(weight,1).gt.1 .and. abs(weight(0)) .lt. 1) then
             weight(1)=-3*curvepos(4)*(h-d)**2/h**3
             weight(2)=+3*curvepos(3)*(h-d)**2/h**3
         end if
         if(present(distance)) distance=d
         if(present(guess)) guess=tpos
     end subroutine
 
 !---------------------------------------------------------------------------
 !> @author
 !> Guillaume Le Bars EPFL/SPC
 !
 ! DESCRIPTION:
 !>
 !> @brief
 !> Calculates the closest distance between the point and the selected spline b_curve
 !> @param[in] b_curve      spline_boundary containing the spline curve parameters
 !> @param[in] point(:)     array containing the position from which to calculate the distance
 !> @param[out] distance    distance from the point to the spline
 !> @param[in] pos          parameter value of the closest point on the spline
 !---------------------------------------------------------------------------
 
     subroutine dist(b_curve, point, distance, pos)
         Use forSISL, ONLY: s1957,s1953, s1221,s1227
         type(spline_boundary):: b_curve
         Real(kind=db):: point(:)
         real(kind=db):: distance
         Real(kind=db),optional::pos
         REAL(real64):: posres, epsco, epsge,curvepos(2),d,distmin
         REAL(real64),allocatable::intpar(:) 
         integer:: numintpt, numintcu,i,left,sstatus
         type(SISLIntCurve),ALLOCATABLE:: intcurve(:)
         
         epsco=1.0e-15
         epsge=1.0e-15
 
         !epsco=0
         !epsge=b_curve%epsge
 
         numintpt=0
         sstatus=0
         distmin=HUGE(d)
         call s1953(b_curve%curve,point,b_curve%curve%idim,epsco,epsge,numintpt,intpar,numintcu,intcurve,sstatus)
         if (sstatus > 0 ) WRITE(*,*) "Warning ",sstatus," in distance calculation s1953 for splineweight at ", point(1), point(2)
         if (sstatus < 0 ) WRITE(*,*) "Error ",sstatus," in distance calculation s1953 for splineweight at ",point(1), point(2)
         if(numintpt .gt. 1) then
             Do i=1,numintpt
                 call s1227(b_curve%curve,0,intpar(i),left,curvepos,sstatus)
                 if (sstatus > 0 ) WRITE(*,*) "Warning ",sstatus," in distance calculation s1221 for splineweight at ", point(1), point(2)
                 if (sstatus < 0 ) WRITE(*,*) "Error ",sstatus," in distance calculation s1221 for splineweight at ",point(1), point(2)
                 d=(curvepos(1)-point(1))**2+(curvepos(2)-point(2))**2
                 if(d .lt. distmin) then
                     distmin=d
                     posres=intpar(i)
                 end if
             end do
         else if(numintpt .gt. 0) then
             posres=intpar(1)
         end if
         distance=distmin
         if(numintcu.ge.1) then
             posres=0.5*(intcurve(1)%epar1(1)+intcurve(1)%epar1(2))
         end if
         call s1221(b_curve%curve,0,posres,left,curvepos,sstatus)
         if (sstatus > 0 ) WRITE(*,*) "Warning ",sstatus," in distance calculation s1227 for splineweight at ", point(1), point(2)
         if (sstatus < 0 ) WRITE(*,*) "Error ",sstatus," in distance calculation s1227 for splineweight at ", point(1), point(2)
         distance=sqrt((curvepos(1)-point(1))**2+(curvepos(2)-point(2))**2)
         if (present(pos)) pos=posres
     END subroutine
 
     SUBROUTINE classify(x1, x2, cellk, spldom, wpredir, wpretot)
         real(kind=db), INTENT(IN):: x2(2), x1(2)
         type(cellkind), intent(INOUT):: cellk
         type(spline_domain)::spldom
         Real(kind=db):: zeval(4),reval(4), wpretot, wpredir
         real(kind=db), allocatable:: guess(:,:), w(:,:,:)
         Real(kind=db):: dmin, insidedir, insidetot, distance
         integer:: i,k 
 
         allocate(guess(spldom%nbsplines,4))
         allocate(w(0:2,spldom%nbsplines,4))
         w=0
         cellk%spldirkind=0
         guess=-1.0_db
         dmin=HUGE(spldom%dist_extent)
         cellk%linkedboundaries=0
         zeval=(/ x1(1),x1(2),x1(1),x1(2) /)
         reval=(/ x2(1),x2(1),x2(2),x2(2) /)
         insidedir=1
         insidetot=1
         do i=1,spldom%nbsplines    
             do k=1,4
                 ! calculate the weight for each spline boundaries at each cell corner
                 call splineweight(spldom%boundaries(i),zeval(k),reval(k),w(:,i,k),spldom%dist_extent,distance,guess(i,k))
                 ! We find the closest boundary to this point
                 if(distance .lt. dmin) then
                 ! If we are close enough we check if we are below dist_extent and need to calculate the distance each time
                     if(distance .lt. spldom%dist_extent) then
                         if(spldom%boundaries(i)%type .eq. bd_Dirichletconst .or.  spldom%boundaries(i)%type .eq.bd_Dirichletvar) then
                             cellk%linkedboundaries(1)=i
                             cellk%spldirkind=0
                         end if
                         cellk%linkedboundaries(2)=i
                         cellk%spltotkind=0
                     end if
                     dmin=distance
                     ! Otherwise we define the interior by the closest spline
                     if(spldom%boundaries(i)%type .eq. bd_Dirichletconst .or.  spldom%boundaries(i)%type .eq.bd_Dirichletvar) then
                             insidedir=w(0,i,k)
                     end if
                     insidetot=w(0,i,k)
                 end if
                 ! The neumann boundaries take precedence over the dirichlet boundaries
                 ! this is important when they define what is outside of the simulation domain.
                 if(spldom%boundaries(i)%type.eq. bd_Neumann.and. w(0,i,k).lt.0)then
                     insidetot=w(0,i,k)
                     if(distance.lt.spldom%dist_extent)then
                         cellk%linkedboundaries(2) =i
                     else
                         cellk%linkedboundaries(2) =0
                     end if
                 end if
 
             end do
         end do
         if(cellk%linkedboundaries(1) .gt. 0) then
             i=cellk%linkedboundaries(1)
             cellk%lguess(1)=minval(guess(i,:),1,guess(i,:).ge.0)
             cellk%rguess(1)=maxval(guess(i,:),1)
             wpredir=w(0,i,1)
         else
             cellk%spldirkind=sign(1,int(insidedir))
             wpredir=insidedir
         end if
 
         if(cellk%linkedboundaries(2) .gt. 0) then
             i=cellk%linkedboundaries(2)
             wpretot=w(0,i,1)
         else
             cellk%spltotkind=sign(1,int(insidetot))
             wpretot=insidetot
         end if
     end subroutine
 
     subroutine classifycells(spldom)
         use forSISL, ONLY: s1537, s1424
         use bsplines
         type(spline_domain):: spldom
         integer:: i,j, dims(2), nbeval1, nbeval2,k,l
         real(kind=db)::val
         type(cellkind):: cellk
         real(kind=db), allocatable:: wpretot(:,:,:), wpredir(:,:,:), c(:,:), x1(:), x2(:)
         
         allocate(wpretot(1:1,0:spldom%nb1,0:spldom%nb2))
         allocate(wpredir(1:1,0:spldom%nb1,0:spldom%nb2))
-        nbeval1=spldom%nb1+2
-        nbeval2=spldom%nb2+2
+        nbeval1=spldom%nb1+3
+        nbeval2=spldom%nb2+3
         
         
         ! We set the interpolation points such that the spline interpolation of the weight uses the same knots as the spline interpolation of the electric potential
         allocate(x1(0:nbeval1-1),x2(0:nbeval2-1))
         x1(0)=spldom%x1(0)
         x1(1)=(spldom%x1(0)+spldom%x1(1))/2.0_db
         j=0
         do i=2,spldom%nb1
             j=j+1
-            x1(i)=2*spldom%x1(j)-x1(i-1)
+            x1(i)=spldom%x1(j)
+            !x1(i)=2*spldom%x1(j)-x1(i-1)
         end do
-        !x1(nbeval1-2)=(spldom%x1(spldom%nb1-1)+3*spldom%x1(spldom%nb1))/2.0_db
+        x1(nbeval1-2)=(spldom%x1(spldom%nb1-1)+3*spldom%x1(spldom%nb1))/2.0_db
         x1(nbeval1-1)=spldom%x1(spldom%nb1)
         !write(*,*)"x1", x1
 
         ! We do the same for x2
         x2(0)=spldom%x2(0)
         x2(1)=(spldom%x2(0)+spldom%x2(1))/2.0_db
         j=0
         do i=2,spldom%nb2
             j=j+1
-            !x2(i)=spldom%x2(j)
-            x2(i)=2*spldom%x2(j)-x2(i-1)
+            x2(i)=spldom%x2(j)
+            !x2(i)=2*spldom%x2(j)-x2(i-1)
         end do
-        !x2(nbeval2-2)=(spldom%x2(spldom%nb2-1)+spldom%x2(spldom%nb2))/2.0_db
+        x2(nbeval2-2)=(spldom%x2(spldom%nb2-1)+spldom%x2(spldom%nb2))/2.0_db
         x2(nbeval2-1)=spldom%x2(spldom%nb2)
         !write(*,*)"x2", x2
 
 
         wpretot=0
         wpredir=0
         !$OMP PARALLEL DO private(i,j)
         do i=0,spldom%nb1-1
             !DIR$ UNROLL
             do j=0,spldom%nb2-1
                 call classify(spldom%x1(i:i+1),spldom%x2(j:j+1),spldom%cellk(i,j),spldom, wpredir(1,i,j),wpretot(1,i,j))
             end do
         end do
         !$OMP END PARALLEL DO
 
         deallocate(wpretot)
         deallocate(wpredir)
 
         allocate(wpretot(1:1,0:nbeval1-1,0:nbeval2-1))
         allocate(wpredir(1:1,0:nbeval1-1,0:nbeval2-1))
         !$OMP PARALLEL DO private(i,j,cellk,k,l)
             do i=0,nbeval1-1
                 call locintv(spldom%splrz%sp1,x1(i),k)
                 do j=0,nbeval2-1
                     call locintv(spldom%splrz%sp2,x2(j),l)
                     cellk=spldom%cellk(k,l)
                     If(abs(cellk%spldirkind) .eq. 1) Then
                         wpredir(1,i,j)=cellk%spldirkind
                     else
                         call splineweight(spldom%boundaries(cellk%linkedboundaries(1)), x1(i),x2(j), wpredir(:,i,j),spldom%dist_extent)
                     end IF
                     If(abs(cellk%spltotkind) .eq. 1) Then
                         wpretot(1,i,j)=cellk%spltotkind
                     else
                         call splineweight(spldom%boundaries(cellk%linkedboundaries(2)), x1(i),x2(j), wpretot(:,i,j),spldom%dist_extent)
                     end IF
                 end do
             end do
         !$OMP END PARALLEL DO
         
         ! Set the approximated spline weight for the Dirichlet boundary conditions
-        CALL set_splcoef((/2,2/),x1,x2,spldom%Dirdomweightspl)
+        CALL set_splcoef((/3,3/),x1,x2,spldom%Dirdomweightspl)
         call get_dim(spldom%Dirdomweightspl,dims)
             !Write(*,*) "size x1, x2 knots", size(x1),size(x2),dims, size(wpredir)
         allocate(c(dims(1),dims(2)))
         call get_splcoef(spldom%Dirdomweightspl, wpredir(1,:,:), c)
         CALL gridval(spldom%Dirdomweightspl,spldom%x1(1),spldom%x2(1), val ,(/0,0/),c)
 
         !write(*,*)"x2", x2
         !write(*,*)"konot1", spldom%x1
         !write(*,*)"konots1 interp", spldom%Dirdomweightspl%sp1%
         ! Set the approximated spline weight for the Neumann boundary conditions
-        CALL set_splcoef((/2,2/),x1,x2,spldom%totdomweightspl)
+        CALL set_splcoef((/3,3/),x1,x2,spldom%totdomweightspl)
         call get_splcoef(spldom%totdomweightspl, wpretot(1,:,:), c)
         CALL gridval(spldom%totdomweightspl,spldom%x1(1),spldom%x2(1), val ,(/0,0/),c)
         deallocate(c)
 
     end subroutine
 
     subroutine getindex(x1,x2,spldom, i, j)
         use distrib, ONLY: closest
         type(spline_domain):: spldom
         real(kind=db):: x1(:), x2(:)
         integer:: i(:),j(:)
         call locintv(spldom%splrz%sp1,x1, i)
         call locintv(spldom%splrz%sp2,x2, j)
     end subroutine
 
     SUBROUTINE speval(sp, xp, yp, leftx, lefty, f00, f10, f01)
         !
         !   Compute the function f00 and its derivatives
         !     f10 = d/dx f
         !     f01 = d/dy f
         !   assuming that its PPFORM/BCOEFSC was already computed!
         !
             TYPE(spline2d), INTENT(inout)      :: sp
             DOUBLE PRECISION, DIMENSION(:), INTENT(in)     :: xp, yp
             INTEGER, DIMENSION(:), INTENT(in)     :: leftx, lefty
             DOUBLE PRECISION, DIMENSION(:), INTENT(out) :: f00
             DOUBLE PRECISION, DIMENSION(:), INTENT(out), OPTIONAL :: f10, f01
         !
             INTEGER :: np
             DOUBLE PRECISION :: x(SIZE(xp)), y(SIZE(yp))
 
             INTEGER :: i, nidbas(2)
             DOUBLE PRECISION :: temp0(SIZE(xp),sp%sp2%order), temp1(SIZE(xp),sp%sp2%order)
             LOGICAL :: nlppform
         !
         !   Apply periodicity if required
         !
             np = SIZE(xp)
             nidbas(1) = sp%sp1%order-1
             nidbas(2) = sp%sp2%order-1
             nlppform = sp%sp1%nlppform .OR. sp%sp2%nlppform
         !
         !  Locate the interval containing x, y
         !
             x(:) = xp(:) - sp%sp1%knots(leftx(:))
             y(:) = yp(:) - sp%sp2%knots(lefty(:))
         !
         !  Compute function/derivatives
         !
         !    Using PPFORM
         !----------
             
                DO i=1,np
                   CALL my_ppval1(nidbas(1), x(i), sp%ppform(:,leftx(i)+1,:,lefty(i)+1), &
                        & temp0(i,:), temp1(i,:))
                END DO
         !   
                CALL my_ppval0(nidbas(2), y, temp0, 0, f00)
                if(present(f01))then
                     CALL my_ppval0(nidbas(2), y, temp0, 1, f01)
                end if
                if(present(f10))then
                     CALL my_ppval0(nidbas(2), y, temp1, 0, f10)
                end if
         !-----------
           CONTAINS
         !+++
             SUBROUTINE my_ppval0(p, x, ppform, jder, f)
         !
         !   Compute function and derivatives from the PP representation
         !   for many points x(:)
               INTEGER, INTENT(in) :: p
               DOUBLE PRECISION, INTENT(in) :: x(:)
               DOUBLE PRECISION, INTENT(in) :: ppform(:,:)
               INTEGER, INTENT(in) :: jder
               DOUBLE PRECISION, INTENT(out) :: f(:)
               DOUBLE PRECISION :: fact
               INTEGER :: j
               SELECT CASE (jder)
               CASE(0)            ! function value
                  SELECT CASE(p)
                  CASE(1)
                     f(:) = ppform(:,1) + x(:)*ppform(:,2)
                  CASE(2)
                     f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*ppform(:,3))
         !!$         CASE(3)
         !!$            f(:) = ppform(:,1) + x(:)*(ppform(:,2)+x(:)*(ppform(:,3)+x(:)*ppform(:,4)))
                  CASE(3:)
                     f(:) = ppform(:,p+1)
                     DO j=p,1,-1
                        f(:) = f(:)*x(:) + ppform(:,j)
                     END DO
                  END SELECT
               CASE(1)            ! 1st derivative
                  SELECT CASE(p)
                  CASE(1)
                     f(:) = ppform(:,2)
                  CASE(2)
                     f(:) = ppform(:,2) + x(:)*2.d0*ppform(:,3)
         !!$         CASE(3)
         !!$            f(:) = ppform(:,2) + x(:)*(2.d0*ppform(:,3)+x(:)*3.0d0*ppform(:,4))
                  CASE(3:)
                     f(:) = p*ppform(:,p+1)
                     DO j=p-1,1,-1
                        f(:) = f(:)*x(:) + j*ppform(:,j+1)
                     END DO
                  END SELECT
               CASE default       ! 2nd and higher derivatives
                  f(:) = ppform(:,p+1)
                  fact = p-jder
                  DO j=p,jder+1,-1
                     f(:) = f(:)/fact*j*x(:) + ppform(:,j)
                     fact = fact-1.0d0
                  END DO
                  DO j=2,jder
                     f(:) = f(:)*j
                  END DO
               END SELECT
             END SUBROUTINE my_ppval0
         !+++
             SUBROUTINE my_ppval1(p, x, ppform, f0, f1)
         !
         !   Compute function and first derivative from the PP representation
               INTEGER, INTENT(in) :: p
               DOUBLE PRECISION, INTENT(in) :: x
               DOUBLE PRECISION, INTENT(in) :: ppform(:,:)
               DOUBLE PRECISION, INTENT(out) :: f0(:)
               DOUBLE PRECISION, INTENT(out) :: f1(:)
               DOUBLE PRECISION :: fact
               INTEGER :: j
               SELECT CASE(p)
               CASE(1)
                  f0(:) = ppform(1,:) + x*ppform(2,:)
                  f1(:) = ppform(2,:)
               CASE(2)
                  f0(:) = ppform(1,:) + x*(ppform(2,:)+x*ppform(3,:))
                  f1(:) = ppform(2,:) + x*2.d0*ppform(3,:)
               CASE(3)
                  f0(:) = ppform(1,:) + x*(ppform(2,:)+x*(ppform(3,:)+x*ppform(4,:)))
                  f1(:) = ppform(2,:) + x*(2.d0*ppform(3,:)+x*3.0d0*ppform(4,:))
               CASE(4:)
                  f0 = ppform(p+1,:)
                  f1 = f0
                  DO j=p,2,-1
                     f0(:) = ppform(j,:) + x*f0(:)
                     f1(:) = f0(:) + x*f1(:)
                  END DO
                  f0(:) = ppform(1,:) + x*f0(:)
               END SELECT
             END SUBROUTINE my_ppval1
         !+++
           END SUBROUTINE speval
 
 
     subroutine free_bsplinecurve(b_curve)
         type(spline_boundary):: b_curve
         call freeCurve(b_curve%curve)
         !call freeIntCurve(b_curve%intcurve)
     end subroutine
 END MODULE splinebound
 
diff --git a/src/stepon.f90 b/src/stepon.f90
index b819344..32c0c0b 100644
--- a/src/stepon.f90
+++ b/src/stepon.f90
@@ -1,99 +1,130 @@
 SUBROUTINE stepon
 !
 !   Advance one time step
 !
    USE basic
    USE constants
    USE fields
    USE beam
    USE maxwsrce
    USE celldiag
    USE neutcol
    USE sort
    Use psupply
+   use omp_lib
+   IMPLICIT NONE
 
    INTEGER:: i
    
-   DO i=1,nbspecies
 
+   DO i=1,nbspecies
       ! Boundary conditions for plasma particles outside the plasma region
       CALL bound(partslist(i))
+   END DO
+   !$OMP BARRIER
+   DO i=1,nbspecies
       ! Localisation of particles in cells (calculation of the r and z indices)
       call boundary_loss(partslist(i))
    END DO
+   !$OMP BARRIER
 
 
    !                    Cell diag quantities
    IF(modulo(step,itcelldiag).eq. 0 .or. nlend) THEN
       CALL celldiag_save(time, fidres)
    END IF
-
 ! We compute collisions on the main particles
    IF(modulo(step,itcol).eq. 0) THEN
       CALL neutcol_step(partslist)
    END IF
-   
+   !$OMP BARRIER
 
+   
+!$OMP SINGLE
 ! The particles are injected by the source
    CALL maxwsrce_inject(time)
+!$OMP END SINGLE
+!$OMP BARRIER
+
+
 
 ! Sort particles for faster rhscon run time
 !  DO i=1,nbspecies
 !    IF(modulo(step,it2d) .eq. 0) THEN
 !      CALL gridsort(partslist(i),1,partslist(i)%Nploc)
 !    END IF
 !  END DO
 
 ! Assemble right hand side of Poisson equation
    CALL rhscon(partslist)
-
    if (.not. nlfreezephi) THEN
 ! Solve Poisson equation
       CALL poisson(splrz)
+
    end if
 
 
+
    DO i=1,nbspecies
       ! Compute the electric field at the particle position
       CALL EFieldscompatparts(partslist(i))
 
       ! Compute the magnetic field at the particle position
       call comp_mag_p(partslist(i))
+   END DO
+   !$OMP BARRIER
 
+   DO i=1,nbspecies
       ! Solve Newton eq. and advance velocity by delta t
       CALL comp_velocity(partslist(i))
-
+      !$OMP SINGLE
       ! Compute the energy of added particles
       CALL calc_newparts_energy(partslist(i))
+      !$OMP END SINGLE NOWAIT
    END DO
 
+   !$OMP BARRIER
+
+   !$OMP SINGLE
    ! Calculate main physical quantities
    CALL partdiagnostics
+   !$OMP END SINGLE NOWAIT
 
    IF (modulo(step,it2d).eq. 0 .or. nlend) THEN
       Do i=1,nbspecies
          if(partslist(i)%calc_moments) CALL momentsdiag(partslist(i))
       End do
    END IF
 
+  
+
    ! update the power supply voltage if necessary
    call psupply_step(the_ps,partslist,cstep)
 
+   !$OMP BARRIER
+   !$OMP MASTER
    ! Save variables to file
    CALL diagnose(step)
+   !$OMP END MASTER
+
+   IF (modulo(step,itparts).eq. 0 .or. modulo(step,ittracer).eq. 0 .or.  modulo(step,itrestart).eq. 0 .or. nlend) THEN
+      !$OMP BARRIER
+   END IF
 
    Do i=1,nbspecies
       ! Calculate new positions of particles at time t+delta t
       CALL push(partslist(i))
    END DO
+   
 
-
-
+   !$OMP SINGLE
    ! We recalculate the mpi axial boundaries and we adapt them if necessary
    IF(modulo(step,50) .eq. 0) THEN
+      WRITE(*,*) "calc_zbounds"
       CALL calc_Zbounds(partslist(1),Zbounds, femorder)
       CALL fields_comm_init(Zbounds)
       CALL maxwsrce_calcfreq(Zbounds)
    END IF
+   !$OMP END SINGLE
 
 END SUBROUTINE stepon