diff --git a/mods/particleContainer.py b/mods/particleContainer.py
index 8384ca2..5a890fe 100644
--- a/mods/particleContainer.py
+++ b/mods/particleContainer.py
@@ -1,263 +1,257 @@
 ###########################################################################
 #                                                                         #
 #    Copyright 2017 Andrea Cimatoribus                                    #
 #    EPFL ENAC IIE ECOL                                                   #
 #    GR A1 435 (Batiment GR)                                              #
 #    Station 2                                                            #
 #    CH-1015 Lausanne                                                     #
 #    Andrea.Cimatoribus@epfl.ch                                           #
 #                                                                         #
 #    Alexandre Serex                                                      #
 #    alexandre.serex@epfl.ch                                              #
 #                                                                         #
 #    This file is part of ctracker                                        #
 #                                                                         #
 #    ctracker is free software: you can redistribute it and/or modify it  #
 #    under the terms of the GNU General Public License as published by    #
 #    the Free Software Foundation, either version 3 of the License, or    #
 #    (at your option) any later version.                                  #
 #                                                                         #
 #    ctracker is distributed in the hope that it will be useful,          #
 #    but WITHOUT ANY WARRANTY; without even the implied warranty          #
 #    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.              #
 #    See the GNU General Public License for more details.                 #
 #                                                                         #
 #    You should have received a copy of the GNU General Public License    #
 #    along with ctracker.  If not, see <http://www.gnu.org/licenses/>.    #
 #                                                                         #
 ###########################################################################
 
 import numpy as np
 
 from queue import Queue
 
 from core.parameterContainer import ParameterContainer
 from parameters.particle import Particle
 from tools.CFunctions import ext_loop
 
 
 class ParticleContainer(ParameterContainer):
     def __init__(self, name, data, grid):
         super(ParticleContainer, self).__init__(name, data)
 
         '''
         Initializes the seeding frequency and position and seeding steps for the simulation
         '''
 
         self.outfreq = data.outfreq
 
         self.ff = float(data.ff)
 
         self.gcm_geometry = data.gcm_geometry
         self.grid = grid
         self.gcm_dt = data.gcm_dt
         self.out_dt = data.out_dt
         self.gcm_start = data.gcm_start
 
         self.iende = np.asarray(data.iende)
         self.iendw = np.asarray(data.iendw)
         self.jendn = np.asarray(data.jendn)
         self.jends = np.asarray(data.jends)
         self.nend = self.iende.size
 
         self.max_id = 0
         # Total number of active particles
         self.ntact = 0
         # Number of particles which exited the domain
         self.ntout = 0
         # this is a copy in memory of the NETcdf time
         # it is useful to find the index where to write
         # data without reading from disk
         self.nc_time = np.array([], dtype="float64")
 
         self.queue_in = Queue(maxsize=3)
         # self.queue_out = Queue(maxsize=50)
 
         self.ntact = 0
         self.active = np.array([], dtype=Particle)
         self.exited = np.array([], dtype=Particle)
         self._children = np.array([], dtype=Particle)
 
         self.nvals = 1
 
         self.is_running = False
 
         self.inputs["default"] = [self.process_input]
         self.outputs = ['default']
 
     def process_input(self, *args, **kwargs):
         self.queue_in.put(*args, **kwargs)
 
     def process_output(self, *args, **kwargs):
         pass
         """data = [1, 2, 3]
         print("ParticleContainer process output with {0}".format(data))
         self.output('default', data, *args, **kwargs)"""
 
     def is_config_valid(self):
         if self.ff not in [1.0, -1.0]:
             raise ValueError("ff controls the time direction, "
                              "it can only be +1 or -1")
         if self.ff == -1.0:
             raise NotImplementedError("Backward integration not yet supported.")
 
         if (self.nend != self.iendw.size) or \
                 (self.nend != self.jendn.size) or \
                 (self.nend != self.jends.size):
             raise ValueError("Wrong kill area definition")
 
         return True
 
     def run(self, seed_ctx):
         self.is_running = True
 
         #
         # Start time loop over GCM output steps
         #
         # array with particle ids
         active_ids = np.zeros(self.ntact, dtype="short")
         out_code = np.zeros(self.ntact, dtype="short")
 
         # input/output position of particles
         xyz = np.zeros((0, 3), dtype="float64")
         ijk = np.zeros((0, 3), dtype="int16")
 
         self.nvals = seed_ctx.nvals
         out_tijk = np.zeros((self.ntact, self.nvals, 4),
                             dtype="f8")
         out_xyz = np.zeros((self.ntact, self.nvals, 3),
                            dtype="f8")
 
         iters = seed_ctx.iters
         timestamps_in = seed_ctx.timestamps_in
         timestamps_out = seed_ctx.timestamps_out
 
         all_stops = np.sort(np.unique(np.concatenate((iters, timestamps_in, timestamps_out))))
 
         # initialize stuff
         ts = 0.0  # normalised time between two GCM time steps
         tt = 0.0  # time in seconds between two GCM time steps
 
         for ngi in range(1, all_stops.size):
             # we are starting from the start!
             # even if we will read the following
             # gcm output for interpolating
             t = all_stops[ngi - 1]
             ngiQ = uflux = vflux = wflux = 0
             t0 = t * self.gcm_dt
             t1 = all_stops[ngi] * self.gcm_dt
             dt = t1 - t0
 
             # check if we reached a timestamp at which we should extract flux data
             if t in iters:
                 # get data from the queue
                 ngiQ, uflux, vflux, wflux = self.queue_in.get()
                 ts = tt = 0.0
 
             if ngiQ != ngi:
                 raise ValueError("Queue provides wrong timestep")
 
             # remove particles that went out of the domain or other
             del_index = out_code == 0
             n_remove = (~del_index).sum()
 
             for i, o in enumerate(out_code.tolist()):
                 if o != 0:
                     print(i, o)
 
-            self.ntout += n_remove
-            self.ntact = active_ids.size
-
             if n_remove:
                 active_ids = active_ids[del_index]
                 xyz = xyz[del_index, ...]
                 ijk = ijk[del_index, ...]
                 self.ntout += n_remove
                 self.ntact = active_ids.size
                 # the active particles number changed, so
                 # we update the buffer size
                 out_code = np.zeros(self.ntact, dtype="short")
 
                 out_tijk = np.zeros((self.ntact, seed_ctx.nvals, 4),
                                     dtype="f8")
                 out_xyz = np.zeros((self.ntact, seed_ctx.nvals, 3),
                                    dtype="f8")
 
             if t in timestamps_in:
                 new_ids = np.arange(self.max_id + 1,
                                     self.max_id + 1 + seed_ctx.n_seed,
                                     dtype="int64")
                 active_ids = np.append(active_ids, new_ids)
                 xyz = np.vstack([xyz,
                                  seed_ctx.ijk_seed])
                 ijk = np.vstack([ijk,
                                  np.int16(seed_ctx.ijk_indseed)])
                 self.max_id = self.max_id + seed_ctx.n_seed
                 # we store the seeding position
                 self.output('default', (t0, None, None, new_ids.copy(), None, True))
                 self.ntact = active_ids.size
                 # the active particles number changed, so
                 # we update the buffer size
                 out_code = np.zeros(self.ntact, dtype="short")
 
                 out_tijk = np.zeros((self.ntact, seed_ctx.nvals, 4),
                                     dtype="f8")
                 out_xyz = np.zeros((self.ntact, seed_ctx.nvals, 3),
                                    dtype="f8")
 
             print("\nStarting numerical loop at ",
                   np.datetime64(self.gcm_start) +
                   np.timedelta64(int(t0 * 1000), "[ms]"),
                   "\n|.... active: {:d} ....|....  out: {:d} ....|"
                   .format(self.ntact, self.ntout))
 
             if self.outfreq == "gcmstep":
                 outfreq = 1
             elif self.outfreq == "always":
                 outfreq = 2
             elif self.outfreq == "cross":
                 outfreq = 3
             elif self.outfreq == "custom":
                 outfreq = 4
             else:
                 raise ValueError("outfreq {} not recognised"
                                  .format(self.outfreq))
 
-            if self.ntact > 2844:
-                print(xyz[2844])
-
             ext_loop(self.nvals, self.ntact,
                      xyz, ijk, out_xyz, out_tijk,
                      timestamps_in, timestamps_out, t, [ts, tt],
                      dt, outfreq,
                      self.nend, self.iendw, self.iende, self.jendn, self.jends,
                      self.grid.imax, self.grid.jmax, self.grid.kmax,
                      self.grid.dsmax, self.grid.dxyz, self.grid.dtmax,
                      self.grid.dstep, self.grid.CS, self.grid.SN, self.grid.dX,
                      self.grid.dY, self.grid.xG, self.grid.yG, self.grid.Z,
                      uflux, vflux, wflux, out_code)
 
             # instead of doing the netCDF writing here, we rather
             # put the output in a queue, a separate thread will
             # deal with it
             # Note that we must store copies, otherwise we would be
             # changing the output before it's written
             self.output('default',
                         (t0,
                          out_tijk.copy(),
                          out_xyz.copy(),
                          active_ids.copy(),
                          out_code.copy(),
                          False)
                         )
             if t in timestamps_out:
                 timestamps_out.pop(0)
             if t in timestamps_in:
                 timestamps_in.pop(0)
             # end for cycle over iterations
 
         # Tell the output the processing is finished
         self.output('default', None)
 
         # finish the output queue before closing the netcdf output file
         # self.queue_out.join()
diff --git a/tools/CFunctions.py b/tools/CFunctions.py
index 71498c0..9fac9b0 100644
--- a/tools/CFunctions.py
+++ b/tools/CFunctions.py
@@ -1,665 +1,666 @@
 import numba as nb
 from math import log, exp
 
 
 @nb.njit(nogil=True, parallel=True)
 def newpos(ii, r0, ds, uu, um):
     """
         Compute new position along self.ijk direction
 
         um: flux entering the cell
         uu: flux exiting the cell
         """
     if um != uu:
         Du = uu - um
         mDu = um / Du
         # eq. 7.8 Doos et al 2013
         return (r0 - ii + mDu) * exp(Du * ds) + ii - mDu
     else:
         return r0 + uu * ds
 
 
 @nb.njit(nogil=True, parallel=True)
 def cross(ijk, ia, ja, ka, i_futr, i_past, r0, flux):
     """
         Compute crossing times
         """
     # um: flux entering the cell
     # uu: flux exiting the cell
     # i_futr: interpolation coefficient for "future" GCM data
     # i_past: interpolation coefficient for "past" GCM data
     uu, um, ba, sp, sn, Du, ii = 0, 0, 0, 0, 0, 0, 0
     UNDEF = 1.0e20
 
     um = (i_futr * flux[1, ka, ja, ia] +
           i_past * flux[0, ka, ja, ia])
     if ijk == 1:
         ii = ia
         uu = (i_futr * flux[1, ka, ja, ia + 1] +
               i_past * flux[0, ka, ja, ia + 1])
     elif ijk == 2:
         ii = ja
         uu = (i_futr * flux[1, ka, ja + 1, ia] +
               i_past * flux[0, ka, ja + 1, ia])
     elif ijk == 3:
         ii = ka
         uu = (i_futr * flux[1, ka + 1, ja, ia] +
               i_past * flux[0, ka + 1, ja, ia])
 
     # east, north or upward crossing
     if uu > 0 and (r0 != (ii + 1)):
         if um != uu:
             Du = (uu - um)
             ba = (r0 - ii) * Du + um
             if ba > 0:
                 # eq. 7.9 & 7.6 Doos et al. 2013
                 sp = (log(uu) - log(ba)) / Du
                 # It can still be that there is a
                 # zero flux somewhere between r0 and
                 # the face
                 if (sp <= 0.0):
                     sp = UNDEF
             else:
                 sp = UNDEF
         else:
             sp = (1.0 + ii - r0) / uu
     else:
         sp = UNDEF
 
     # west, south or downward crossing
     if um < 0 and r0 != ii:
         if um != uu:
             # negative velocity, so we go from exit (uu)
             # to enter (um)
             Du = (um - uu)
             # (ii)-----(r0)---(ii+1)
             # (um)-----(ba)---(uu)
             ba = (1 + ii - r0) * Du + uu
             if ba < 0:
                 sn = (log(-ba) - log(-um)) / Du
                 # It can still be that there is a
                 # zero flux somewhere between r0 and
                 # the face
                 if sn <= 0:
                     sn = UNDEF
             else:
                 sn = UNDEF
         else:
             sn = (ii - r0) / uu
     else:
         sn = UNDEF
 
     return uu, um, sp, sn
 
 
 @nb.njit(nogil=True, parallel=True)
 def transform(ii, jj, kk, stxyz, CS, SN, dX, dY, xG, yG, Z):
     for m in range(ii.size):
         transform_one(ii[m], jj[m], kk[m], stxyz[m, :], CS, SN, dX, dY, xG, yG, Z)
 
 
 @nb.njit(nogil=True, parallel=True)
 def transform_one(i, j, k, stxyz, CS, SN, dX, dY, xG, yG, Z):  # nogil
     ii = int(i)
     jj = int(j)
     kk = int(k)
 
     nx = i % 1
     ny = j % 1
     nz = k % 1
 
     # inside the cell
     if (nx >= 1e-9) & (ny >= 1e-9):
         cosij = CS[jj, ii]
         sinij = SN[jj, ii]
         dxij = dX[jj, ii]
         dyij = dY[jj, ii]
 
         # x
         stxyz[0] = xG[jj, ii] + \
                    cosij * dxij * nx - sinij * dyij * ny
 
         # y
         stxyz[1] = yG[jj, ii] + \
                    sinij * dxij * nx + cosij * dyij * ny
 
         # z
         if nz >= 1e-9:
             stxyz[2] = (1.0 - nz) * Z[kk, jj, ii] + \
                        nz * Z[kk + 1, jj, ii]
         else:
             stxyz[2] = Z[kk, jj, ii]
     # at the SW corner
     elif (nx < 1e-9) & (ny < 1e-9):
         # x
         stxyz[0] = xG[jj, ii]
 
         # y
         stxyz[1] = yG[jj, ii]
 
         # z
         if nz >= 1e-9:
             stxyz[2] = (1.0 - nz) * Z[kk, jj, ii] + \
                        nz * Z[kk + 1, jj, ii]
         else:
             stxyz[2] = Z[kk, jj, ii]
     # on W face
     elif nx < 1e-9:
         cosij = CS[jj, ii]
         sinij = SN[jj, ii]
         dyij = dY[jj, ii]
 
         # x
         stxyz[0] = xG[jj, ii] - sinij * dyij * ny
 
         # y
         stxyz[1] = yG[jj, ii] + cosij * dyij * ny
 
         # z
         if nz >= 1e-9:
             stxyz[2] = (1.0 - nz) * Z[kk, jj, ii] + \
                        nz * Z[kk + 1, jj, ii]
         else:
             stxyz[2] = Z[kk, jj, ii]
     # on S face
     else:
         cosij = CS[jj, ii]
         sinij = SN[jj, ii]
         dxij = dX[jj, ii]
 
         # x
         stxyz[0] = xG[jj, ii] + cosij * dxij * nx
 
         # y
         stxyz[1] = yG[jj, ii] + sinij * dxij * nx
 
         # z
         if nz >= 1e-9:
             stxyz[2] = (1.0 - nz) * Z[kk, jj, ii] + \
                        nz * Z[kk + 1, jj, ii]
         else:
             stxyz[2] = Z[kk, jj, ii]
 
 
 @nb.njit(nogil=True, parallel=True)
 def ext_loop(nvals, ntact,
              xyz, ijk, out_xyz, out_tijk,
              timestamps_in, timestamps_out, step_t, times,
              out_dt, outfreq,
              nend, iendw, iende, jendn, jends,
              imax, jmax, kmax, Adsmax, Adxyz, dtmax, dstep,
              CS, SN, dX, dY, xG, yG, Z,
              uflux, vflux, wflux, out_code):
     ds, dse, dsw, dsn, dss, dsu, dsd = 0, 0, 0, 0, 0, 0, 0
     for ntrac in range(ntact):
         # initialize stuff
         ts = times[0]  # normalised time between two GCM time steps
         tt = times[1]  # time in seconds between two GCM time steps
 
         # output init
         for kk in range(nvals):
             out_tijk[ntrac, kk, 0] = -1e20
         rec_out = 0
 
         # take coord
         # the coordinate system is as follows:
         #        cell i
         # |---------------------|
         # i    i <= x < i+1    i+1
         x1 = xyz[ntrac, 0]
         y1 = xyz[ntrac, 1]
         z1 = xyz[ntrac, 2]
         ib = ijk[ntrac, 0]
         jb = ijk[ntrac, 1]
         kb = ijk[ntrac, 2]
 
         while True:
             # time interpolation constants
             i_futr = ts % 1.0
             i_past = 1.0 - i_futr
 
             x0 = x1
             y0 = y1
             z0 = z1
             ia = ib
             ja = jb
             ka = kb
 
             dsmax = Adsmax[kb, jb, ib]
             dxyz = Adxyz[kb, jb, ib]
 
             # make sure we do not start from land
             # TODO do we still need this?
             if dxyz == 0.0:
                 print("\nx0: %f\n", x0)
                 print("y0: %f\n", y0)
                 print("z0: %f\n", z0)
                 print("ia: %d\n", ia)
                 print("ja: %d\n", ja)
                 print("ka: %d\n", ka)
                 print("x1: %f\n", x1)
                 print("y1: %f\n", y1)
                 print("z1: %f\n", z1)
                 print("ib: %d\n", ib)
                 print("jb: %d\n", jb)
                 print("kb: %d\n", kb)
                 print("ds: %f\n", ds)
                 print("dsmax: %f\n", dsmax)
                 print("dxyz: %f\n", dxyz)
                 print("dsn: %f\n", dsn)
                 print("dss: %f\n", dss)
                 print("dse: %f\n", dse)
                 print("dsw: %f\n", dsw)
                 print("dsu: %f\n", dsu)
                 print("dsd: %f\n", dsd)
                 raise ValueError("Particule on land")
 
             #
             # calculate the 3 crossing times over the box
             # choose the shortest time and calculate the
             # new positions
             #
             # solving the differential equations
             # note:
             # space variables (x,...) are dimensionless
             # time variables (ds,...) are in seconds/m^3
             #
 
             uu1, um1, dse, dsw = \
                 cross(1, ia, ja, ka, i_futr, i_past, x0, uflux)
             vu1, vm1, dsn, dss = \
                 cross(2, ia, ja, ka, i_futr, i_past, y0, vflux)
             wu1, wm1, dsu, dsd = \
                 cross(3, ia, ja, ka, i_futr, i_past, z0, wflux)
 
             ds = min(min(min(min(min(min(dse, dsw), dsn), dss),
                              dsd), dsu), dsmax)
             if (ds == 0.0) or (ds == 1e20):
                 print("\nx0: %f\n", x0)
                 print("y0: %f\n", y0)
                 print("z0: %f\n", z0)
                 print("ia: %d\n", ia)
                 print("ja: %d\n", ja)
                 print("ka: %d\n", ka)
                 print("x1: %f\n", x1)
                 print("y1: %f\n", y1)
                 print("z1: %f\n", z1)
                 print("ib: %d\n", ib)
                 print("jb: %d\n", jb)
                 print("kb: %d\n", kb)
                 print("ds: %f\n", ds)
                 print("dsmax: %f\n", dsmax)
                 print("dxyz: %f\n", dxyz)
                 print("dsn: %f\n", dsn)
                 print("dss: %f\n", dss)
                 print("dse: %f\n", dse)
                 print("dsw: %f\n", dsw)
                 print("dsu: %f\n", dsu)
                 print("dsd: %f\n", dsd)
                 raise ValueError("Cannot integrate track for unknown reasons\n")
 
             # now update the time
             dt = ds * dxyz
 
             # if time step makes the integration time
             # very close to the GCM output
             if (tt + dt) >= out_dt:
                 dt = out_dt - tt
                 tt = out_dt
                 ts = 1.0
                 ds = dt / dxyz
                 end_loop = True
             else:
                 end_loop = False
                 tt += dt
                 if dt == dtmax:
                     ts += dstep
                 else:
                     ts += dt / out_dt
 
             # compute new time interpolation constant
             in_futr = ts % 1.0
             in_past = 1.0 - in_futr
 
             # now we actually compute the new position
             # eastward grid-cell exit
             if ds == dse:
                 scrivi = 1
                 # if at the "new time" the flow at the cell
                 # face is positive the particle will exit
                 uu = (in_futr * uflux[1, ka, ja, ia + 1] +
                       in_past * uflux[0, ka, ja, ia + 1])
                 if uu > 0.0:
                     ib = ia + 1
                 x1 = float(ia + 1)
                 y1 = newpos(ja, y0, ds, vu1, vm1)
                 z1 = newpos(ka, z0, ds, wu1, wm1)
                 # deal with corner cases (including rounding error)
                 # TODO: corner cases are extremely rare, but
                 # may happen. Is there a better way to deal with them?
                 if (y1 - ja) >= 1:  # y
                     uu = (in_futr * vflux[1, ka, ja + 1, ia] +
                           in_past * vflux[0, ka, ja + 1, ia])
                     if uu > 0.0:
                         jb = ja + 1
                     y1 = float(ja + 1)
                 elif (y1 - ja) <= 0:
                     uu = (in_futr * vflux[1, ka, ja, ia] +
                           in_past * vflux[0, ka, ja, ia])
                     if uu < 0.0:
                         jb = ja - 1
                     y1 = float(ja)
                 if (z1 - ka) >= 1:  # z
                     uu = (in_futr * wflux[1, ka + 1, ja, ia] +
                           in_past * wflux[0, ka + 1, ja, ia])
                     if uu > 0.0:
                         kb = ka + 1
                     z1 = float(ka + 1)
                 elif (z1 - ka) <= 0:
                     uu = (in_futr * wflux[1, ka, ja, ia] +
                           in_past * wflux[0, ka, ja, ia])
                     if uu < 0.0:
                         kb = ka - 1
                     z1 = float(ka)
             # westward grid-cell exit
             elif ds == dsw:
                 scrivi = 1
                 uu = (in_futr * uflux[1, ka, ja, ia] +
                       in_past * uflux[0, ka, ja, ia])
                 if uu < 0.0:
                     ib = ia - 1
                 x1 = float(ia)
                 y1 = newpos(ja, y0, ds, vu1, vm1)
                 z1 = newpos(ka, z0, ds, wu1, wm1)
                 # deal with corner cases (including rounding error)
                 if (y1 - ja) >= 1:  # y
                     uu = (in_futr * vflux[1, ka, ja + 1, ia] +
                           in_past * vflux[0, ka, ja + 1, ia])
                     if uu > 0.0:
                         jb = ja + 1
                     y1 = float(ja + 1)
                 elif (y1 - ja) <= 0:
                     uu = (in_futr * vflux[1, ka, ja, ia] +
                           in_past * vflux[0, ka, ja, ia])
                     if uu < 0.0:
                         jb = ja - 1
                     y1 = float(ja)
                 if (z1 - ka) >= 1:  # z
                     uu = (in_futr * wflux[1, ka + 1, ja, ia] +
                           in_past * wflux[0, ka + 1, ja, ia])
                     if uu > 0.0:
                         kb = ka + 1
                     z1 = float(ka + 1)
                 elif (z1 - ka) <= 0:
                     uu = (in_futr * wflux[1, ka, ja, ia] +
                           in_past * wflux[0, ka, ja, ia])
                     if uu < 0.0:
                         kb = ka - 1
                     z1 = float(ka)
             # northward grid-cell exit
             elif ds == dsn:
                 scrivi = 1
                 uu = (in_futr * vflux[1, ka, ja + 1, ia] +
                       in_past * vflux[0, ka, ja + 1, ia])
                 if uu > 0.0:
                     jb = ja + 1
                 x1 = newpos(ia, x0, ds, uu1, um1)
                 y1 = float(ja + 1)
                 z1 = newpos(ka, z0, ds, wu1, wm1)
                 # deal with corner cases (including rounding error)
                 if (x1 - ia) >= 1:  # x
                     uu = (in_futr * uflux[1, ka, ja, ia + 1] +
                           in_past * uflux[0, ka, ja, ia + 1])
                     if uu > 0.0:
                         ib = ia + 1
                     x1 = float(ia + 1)
                 elif (x1 - ia) <= 0:
                     uu = (in_futr * uflux[1, ka, ja, ia] +
                           in_past * uflux[0, ka, ja, ia])
                     if uu < 0.0:
                         ib = ia - 1
                     x1 = float(ia)
                 if (z1 - ka) >= 1:  # z
                     uu = (in_futr * wflux[1, ka + 1, ja, ia] +
                           in_past * wflux[0, ka + 1, ja, ia])
                     if uu > 0.0:
                         kb = ka + 1
                     z1 = float(ka + 1)
                 elif (z1 - ka) <= 0:
                     uu = (in_futr * wflux[1, ka, ja, ia] +
                           in_past * wflux[0, ka, ja, ia])
                     if uu < 0.0:
                         kb = ka - 1
                     z1 = float(ka)
             # southward grid-cell exit
             elif ds == dss:
                 scrivi = 1
                 uu = (in_futr * vflux[1, ka, ja, ia] +
                       in_past * vflux[0, ka, ja, ia])
                 if uu < 0.0:
                     jb = ja - 1
                 x1 = newpos(ia, x0, ds, uu1, um1)
                 y1 = float(ja)
                 z1 = newpos(ka, z0, ds, wu1, wm1)
                 # deal with corner cases (including rounding error)
                 if (x1 - ia) >= 1:  # x
                     uu = (in_futr * uflux[1, ka, ja, ia + 1] +
                           in_past * uflux[0, ka, ja, ia + 1])
                     if uu > 0.0:
                         ib = ia + 1
                     x1 = float(ia + 1)
                 elif (x1 - ia) <= 0:
                     uu = (in_futr * uflux[1, ka, ja, ia] +
                           in_past * uflux[0, ka, ja, ia])
                     if uu < 0.0:
                         ib = ia - 1
                     x1 = float(ia)
                 if (z1 - ka) >= 1:  # z
                     uu = (in_futr * wflux[1, ka + 1, ja, ia] +
                           in_past * wflux[0, ka + 1, ja, ia])
                     if uu > 0.0:
                         kb = ka + 1
                     z1 = float(ka + 1)
                 elif (z1 - ka) <= 0:
                     uu = (in_futr * wflux[1, ka, ja, ia] +
                           in_past * wflux[0, ka, ja, ia])
                     if uu < 0.0:
                         kb = ka - 1
                     z1 = float(ka)
             # upward grid-cell exit
             elif ds == dsu:
                 scrivi = 1
                 uu = (in_futr * wflux[1, ka + 1, ja, ia] +
                       in_past * wflux[0, ka + 1, ja, ia])
                 if uu > 0.0:
                     kb = ka + 1
                 x1 = newpos(ia, x0, ds, uu1, um1)
                 y1 = newpos(ja, y0, ds, vu1, vm1)
                 z1 = float(ka + 1)
                 # deal with corner cases (including rounding error)
                 if (x1 - ia) >= 1:  # x
                     uu = (in_futr * uflux[1, ka, ja, ia + 1] +
                           in_past * uflux[0, ka, ja, ia + 1])
                     if uu > 0.0:
                         ib = ia + 1
                     x1 = float(ia + 1)
                 elif (x1 - ia) <= 0:
                     uu = (in_futr * uflux[1, ka, ja, ia] +
                           in_past * uflux[0, ka, ja, ia])
                     if uu < 0.0:
                         ib = ia - 1
                     x1 = float(ia)
                 if (y1 - ja) >= 1:  # y
                     uu = (in_futr * vflux[1, ka, ja + 1, ia] +
                           in_past * vflux[0, ka, ja + 1, ia])
                     if uu > 0.0:
                         jb = ja + 1
                     y1 = float(ja + 1)
                 elif (y1 - ja) <= 0:
                     uu = (in_futr * vflux[1, ka, ja, ia] +
                           in_past * vflux[0, ka, ja, ia])
                     if uu < 0.0:
                         jb = ja - 1
                     y1 = float(ja)
             # downward grid-cell exit
             elif ds == dsd:
                 scrivi = 1
                 uu = (in_futr * wflux[1, ka, ja, ia] +
                       in_past * wflux[0, ka, ja, ia])
                 if uu < 0.0:
                     kb = ka - 1
                 x1 = newpos(ia, x0, ds, uu1, um1)
                 y1 = newpos(ja, y0, ds, vu1, vm1)
                 z1 = float(ka)
                 # deal with corner cases (including rounding error)
                 if (x1 - ia) >= 1:  # x
                     uu = (in_futr * uflux[1, ka, ja, ia + 1] +
                           in_past * uflux[0, ka, ja, ia + 1])
                     if uu > 0.0:
                         ib = ia + 1
                     x1 = float(ia + 1)
                 elif (x1 - ia) <= 0:
                     uu = (in_futr * uflux[1, ka, ja, ia] +
                           in_past * uflux[0, ka, ja, ia])
                     if uu < 0.0:
                         ib = ia - 1
                     x1 = float(ia)
                 if (y1 - ja) >= 1:  # y
                     uu = (in_futr * vflux[1, ka, ja + 1, ia] +
                           in_past * vflux[0, ka, ja + 1, ia])
                     if uu > 0.0:
                         jb = ja + 1
                     y1 = float(ja + 1)
                 elif (y1 - ja) <= 0:
                     uu = (in_futr * vflux[1, ka, ja, ia] +
                           in_past * vflux[0, ka, ja, ia])
                     if uu < 0.0:
                         jb = ja - 1
                     y1 = float(ja)
             elif end_loop or (ds == dsmax):
                 scrivi = 1
                 x1 = newpos(ia, x0, ds, uu1, um1)
                 y1 = newpos(ja, y0, ds, vu1, vm1)
                 z1 = newpos(ka, z0, ds, wu1, wm1)
                 # TODO for 2D advection we will need to include
                 # the check for convergence/divergence zones
             else:
                 print("\nUnrecognised ds\n")
                 print("ds: %f\n", ds)
                 print("dsmax: %f\n", dsmax)
                 print("dxyz: %f\n", dxyz)
                 print("dsn: %f\n", dsn)
                 print("dss: %f\n", dss)
                 print("dse: %f\n", dse)
                 print("dsw: %f\n", dsw)
                 print("dsu: %f\n", dsu)
                 print("dsd: %f\n", dsd)
                 raise ValueError("\nUnrecognised ds\n")
 
             # check if particle entered a kill area
             kill_area = 0
             for kk in range(nend):
                 if (iendw[kk] <= x1) and \
                         (x1 <= iende[kk]) and \
                         (jends[kk] <= y1) and \
                         (y1 <= jendn[kk]):
                     # we store the region where it was killed
                     kill_area = 10 + kk
                     break  # break the for loop over kill areas
             if kill_area > 0:
                 # write to buffer
                 if rec_out >= nvals:
                     raise ValueError("Kill region: buffer finished, make it larger!")
                 out_tijk[ntrac, rec_out, 0] = tt
                 out_tijk[ntrac, rec_out, 1] = x1
                 out_tijk[ntrac, rec_out, 2] = y1
                 out_tijk[ntrac, rec_out, 3] = z1
                 out_code[ntrac] = kill_area
+                print(ntrac, x1, y1, z1)
                 transform_one(x1, y1, z1, out_xyz[ntrac, rec_out, :], CS, SN, dX, dY, xG, yG, Z)
                 rec_out += 1
                 break  # break particle loop
 
             # check if the particle is still
             # inside the domain
             if (ib > imax) or (ib < 0) or \
                     (jb > jmax) or (jb < 0):
                 # write to buffer
                 if rec_out >= nvals:
                     raise ValueError("Exit domain: buffer finished, make it larger!")
                 out_tijk[ntrac, rec_out, 0] = tt
                 out_tijk[ntrac, rec_out, 1] = x1
                 out_tijk[ntrac, rec_out, 2] = y1
                 out_tijk[ntrac, rec_out, 3] = z1
                 out_code[ntrac] = 1
                 transform_one(x1, y1, z1, out_xyz[ntrac, rec_out, :], CS, SN, dX, dY, xG, yG, Z)
                 rec_out += 1
                 break
             # check if the particle exited through the top
             if kb >= kmax:
                 # write to buffer
                 # TODO there should be a better way to deal with the
                 # top boundary condition
                 if rec_out >= nvals:
                     raise ValueError("Airborne: buffer finished, make it larger!")
                 out_tijk[ntrac, rec_out, 0] = tt
                 out_tijk[ntrac, rec_out, 1] = x1
                 out_tijk[ntrac, rec_out, 2] = y1
                 out_tijk[ntrac, rec_out, 3] = z1
                 out_code[ntrac] = 2
                 transform_one(x1, y1, z1, out_xyz[ntrac, rec_out, :], CS, SN, dX, dY, xG, yG, Z)
                 rec_out += 1
                 break
 
 
 
             # At the end of the cycle we can write the
             # position of the particle in buffer
             # gcmstep => outfreq == 1
             # always => outfreq == 2
             # cross => outfreq == 3
             # custom => outfreq == 4
             if ((tt == out_dt) and (outfreq == 1 or outfreq == 4)) \
                     or ((outfreq == 3) and (scrivi == 1)) \
                     or ((outfreq == 2) and (ts > 0.0)):
                 # write to buffer
                 if rec_out >= nvals:
                     raise ValueError("Normal write: buffer finished, make it larger!")
                 if step_t + tt in timestamps_out:
                     out_tijk[ntrac, rec_out, 0] = tt
                     out_tijk[ntrac, rec_out, 1] = x1
                     out_tijk[ntrac, rec_out, 2] = y1
                     out_tijk[ntrac, rec_out, 3] = z1
                     transform_one(x1, y1, z1, out_xyz[ntrac, rec_out, :], CS, SN, dX, dY, xG, yG, Z)
 
                 xyz[ntrac, 0] = x1
                 xyz[ntrac, 1] = y1
                 xyz[ntrac, 2] = z1
                 ijk[ntrac, 0] = ib
                 ijk[ntrac, 1] = jb
                 ijk[ntrac, 2] = kb
                 rec_out += 1
                 break
 
 
 @nb.njit(nogil=True, parallel=True)
 def output_to_3d_array(array, dim1, dim2, values):
     for i, val in enumerate(values):
         array[dim1, dim2, i] = val
 
 
 @nb.njit(nogil=True, parallel=True)
 def loc_time_fast(timeax, N, time, is_monotonic):
     loc_time = -9999999
 
     # We go backwards since the time we are looking for
     # is usually towards the end
     for ii in range(N - 1, -1, -1):
         if abs(timeax[ii] - time) <= 1.0e-9:  # we found it
             return ii
         # the axis is monotonic, so if we find a value
         # smaller than time, it means there is no time in
         # the axis
         elif is_monotonic and (timeax[ii] < time):
             return loc_time
 
     return loc_time