diff --git a/mechanical_model/residual_stress_analysis.py b/mechanical_model/residual_stress_analysis.py
index 4c85454..f566af9 100644
--- a/mechanical_model/residual_stress_analysis.py
+++ b/mechanical_model/residual_stress_analysis.py
@@ -1,576 +1,639 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Mon Jul 25 14:38:47 2022
 
 @author: ekinkubilay
 """
 import os
 import sys
 import h5py
 
 sys.path.insert(0, os.path.join(os.getcwd(), "muspectre/build/language_bindings/python"))
 sys.path.insert(0, os.path.join(os.getcwd(), "muspectre/build/language_bindings/libmufft/python"))
 sys.path.insert(0, os.path.join(os.getcwd(), "muspectre/build/language_bindings/libmugrid/python"))
 print("current working directory: '{}'".format(os.getcwd()))
 
 import muFFT
 import muSpectre as msp
 
 import numpy as np
 import muGrid
 from muFFT import Stencils2D
 from muFFT import Stencils3D
 import cProfile, pstats
 import time as timer
 
 from petsc4py import PETSc
 import argparse
 
 import gc
+#from mpi4py import MPI
 
 class EigenStrain3:
     """
     Class whose eigen_strain_func function is used to apply eigen strains
     (gel expansion)
     """
 
     def __init__(self,eigenstrain_shape, thermal_expansion,cell):
-
         self.cell = cell
         self.pixels = self.cell.pixels
         self.nb_sub_grid = self.cell.nb_subdomain_grid_pts
         self.sub_loc = self.cell.subdomain_locations
         self.eigenstrain_shape = eigenstrain_shape
         self.alpha = thermal_expansion
         self.temperature_field = self.cell.get_fields().get_field('temperature').array()
         self.material_field = self.cell.get_fields().get_field('material').array()
         self.vacuum_strain_field = self.cell.get_fields().get_field('vacuum_strain').array()
         self.nb_quad_pts = self.cell.nb_quad_pts
 
 
     def __call__(self, strain_field):
         self.eigen_strain_func(strain_field)
 
     def eigen_strain_func(self, strain_field):
+        #dummy_time = MPI.Wtime()
         # Looping over pixels locally and apply eigen strain based on the
         # global boolean field  eigen_pixels_in_structure_eigen
         for i in range(self.nb_sub_grid[0]):
             for j in range(self.nb_sub_grid[1]):
                 for k in range(self.nb_sub_grid[2]):
                     for q in range(self.nb_quad_pts):
                         if self.material_field[0, q, i,j,k]:
                             strain_field[:, :, q, i, j, k] -= self.alpha*self.eigenstrain_shape*self.temperature_field[0,q,i,j,k] + self.vacuum_strain_field[:,:,q,i,j,k]
+        #print("EIGENSTRAIN CLASS --- Total EIGENSTRAIN time: {}".format(MPI.Wtime()- dummy_time) )
+
 
 def update_temperature_field(cell, temperature_array):
     
     sub_domain_loc = cell.subdomain_locations
     
     #temperature = np.zeros((cell.nb_subdomain_grid_pts[0],cell.nb_subdomain_grid_pts[1],cell.nb_subdomain_grid_pts[2]))
     temperature = np.zeros((1,cell.nb_quad_pts, cell.nb_subdomain_grid_pts[0],cell.nb_subdomain_grid_pts[1],cell.nb_subdomain_grid_pts[2]))
 
     for pixel_id, pixel_coord in cell.pixels.enumerate():
         i, j, k = pixel_coord[0], pixel_coord[1], pixel_coord[2]
         a = i - sub_domain_loc[0]
         b = j - sub_domain_loc[1]
         c = k - sub_domain_loc[2] 
         for q in range(cell.nb_quad_pts):
             temperature[0,q,a,b,c] = temperature_array[0,q,i,j,k]
 
     return temperature
     
 def parse_args():
     """
     Function to handle arguments
     """
     parser = argparse.ArgumentParser(description='Run given layer')
     parser.add_argument("layer_no", type=int, help='layer_number')
     parser.add_argument("mu_rank", type=int, help='number of processes for muSpectre')
 
     args = parser.parse_args()
 
     return args
     
 def main():
     
     args = parse_args()
     layer_no = args.layer_no
     os.chdir('/home/ekinkubilay/Documents/AM/mechanical_model/results')
 
 
     #comm_py is WORLD communicator (with even size), it is split into two
     #set muspectre communicator
     from mpi4py import MPI
     comm_py = MPI.COMM_WORLD
     rank_py = comm_py.rank
     size_py = comm_py.size
     
     #size must be even, color is tags for the split ranks
     mu_rank = args.mu_rank
     color = np.sign(int(rank_py/(size_py-mu_rank)))
     
     
     #split communicator
     comm = comm_py.Split(color, rank_py)
     rank = comm.rank
     size = comm.size
         
     #print(rank_py, size_py, rank, size, color)
 
     #define empty variable for broadcast buffer
     nb_steps = None
     dx, dy, dz = 0.00025, 0.00025, 0.00003
     #z_lim = 0.00216
     element_dimensions = [dx, dy, dz]
     #define global simulation parameters
     dim = 3
     nb_quad = 6
     thermal_expansion = 1e-5
     #nb_domain_grid_pts = [11,11,11]
             
     #define constant material properties
     Youngs_modulus = 10e9
     Poisson_ratio = 1/3
     
     ## define the convergence tolerance for the Newton-Raphson increment
     newton_tol = 1e-6
     equil_tol = newton_tol
     ## tolerance for the solver of the linear cell
-    cg_tol = 1e-6
+    cg_tol = -1e-6
        
     ## macroscopic strain
     strain_step = np.zeros((3,3)) #no applied external strain
     maxiter = 1000  # for linear cell solver
        
     quad_coords = [np.array([0.75,0.5,0.25]), np.array([0.75,0.25,0.5]), np.array([0.5,0.75,0.25]), np.array([0.25,0.75,0.5]), np.array([0.5,0.25,0.75]), np.array([0.25,0.5,0.75])]
     quad_coords = quad_coords*np.array([dx,dy,dz])
-    
+
     if color == 0:
         #local size and rank on the split communicator, this communicator is called comm
         rank = comm.rank
         procs = comm.size
         other_size = size_py - size
         
         start_time = MPI.Wtime()
         
         send_ranks = np.array_split(np.arange(0,other_size, 1)+size, size)[rank]
         #print(send_ranks)
         
         from dolfin import Mesh, XDMFFile, MeshTransformation, Point, HDF5File, FunctionSpace, Function
         #get the geometry and temperature via fenics
         mesh = Mesh(comm)
 
         #read the mesh and the temperature function at initial timestep
         #using Fenics within the FEM communicator
         filename = "/home/ekinkubilay/Documents/AM/Part_geometry/mesh/layer_"+str(layer_no).zfill(3)+".xdmf"
         temperature_filename = "/home/ekinkubilay/Documents/AM/Thermal_model/results/temperature/temperature_layer_"+str(layer_no).zfill(3)+".h5"
         f = XDMFFile(comm, filename)
         f.read(mesh)
         MeshTransformation.rescale(mesh, 1e-3, Point(0,0,0)) #in m
         f.close()
         del(f)
         gc.collect()
         bounding_box = mesh.bounding_box_tree()
         hdf5_file = HDF5File(comm, temperature_filename, "r")
         V = FunctionSpace(mesh, "CG", 1) 
         temperature_field_store = Function(V)
         no = 0
         vec_name = "/Temperature/vector_%d"%no
         timestamp = hdf5_file.attributes(vec_name)['timestamp']
         nb_steps = hdf5_file.attributes('Temperature')['count']-1
         hdf5_file.read(temperature_field_store, vec_name)
         timestamp_arr = [0.0]*nb_steps
         # timestamp_arr[0] = timestamp
         #obtain local domain dimensions
         xmax = np.max(mesh.coordinates()[:,0])
         xmin = np.min(mesh.coordinates()[:,0])
         ymax = np.max(mesh.coordinates()[:,1])
         ymin = np.min(mesh.coordinates()[:,1])
         zmax = np.max(mesh.coordinates()[:,2])
         zmin = np.min(mesh.coordinates()[:,2])
         #communicate max/min domain values within FEM communicator
         xmax = comm.allreduce(xmax, op=MPI.MAX)
         ymax = comm.allreduce(ymax, op=MPI.MAX)
         zmax = comm.allreduce(zmax, op=MPI.MAX)
         xmin = comm.allreduce(xmin, op=MPI.MIN)
         ymin = comm.allreduce(ymin, op=MPI.MIN)
         zmin = comm.allreduce(zmin, op=MPI.MIN)
 
         #calculate domain lengths and element dimensions
         domain_lengths = [xmax-xmin,ymax-ymin,zmax-zmin]
         nb_domain_grid_pts = list(np.array(domain_lengths)/np.array(element_dimensions)+1)
         nb_domain_grid_pts = [int(i) for i in nb_domain_grid_pts]
 
         #element_dimensions = np.array(domain_lengths)/np.array(nb_domain_grid_pts-np.ones(dim))
         #comm_py.send(domain_lengths, rank_py+2, tag=0)
         #domain_lengths = [x_max-x_min,y_max-y_min,z_max-z_min]
         #element_dimensions = np.array(domain_lengths)/np.array(nb_domain_grid_pts-np.ones(dim))
         
     
         #array to store material (and not vacuum or base plate)
         bulk = np.zeros(nb_domain_grid_pts)
         #domain discretisation TO DO:muspectre has this array already
         x_dim = np.linspace(xmin, xmax, nb_domain_grid_pts[0])
         y_dim = np.linspace(ymin, ymax, nb_domain_grid_pts[1])
         z_dim = np.linspace(zmin, zmax, nb_domain_grid_pts[2])
     
         #obtain the material (and not internal vacuum) by observing if the point
         #is in or out the submesh. This creates comm.size number of a partially
         #filled array where it is non-zero only on the sub-mesh present at that rank
         #obtain the temperature by searching every point at every sub-mesh domain
         #pass if fenics raises an error (due to point not being present on that domain)
         for i,x in enumerate(x_dim):
             for j,y in enumerate(y_dim):
                 for k,z in enumerate(z_dim):
                     if len(bounding_box.compute_collisions(Point(x+0.5*dx,y+0.5*dy,z+0.5*dz))) != 0:
                         bulk[i,j,k] = 1
 
         del(bounding_box)
         gc.collect()
 
         #gather the material across all FEM ranks and broadcast to all FEM ranks
         #bulk and temperature_array are global (size corresponds to complete domain)
         bulk = comm.allreduce(bulk, op=MPI.SUM)
         bulk[bulk > 1] = 1
 
 
         #add base plate at the bottom and vacuum on top and sides
         #increase the number of grid points and total domain length in each dimension
         # bulk = np.insert(bulk, nb_domain_grid_pts[2], 0, axis=2)
         # nb_domain_grid_pts[2] += 1
         # domain_lengths[2] += element_dimensions[2]
         
         # bulk = np.insert(bulk, nb_domain_grid_pts[1], 0, axis=1)
         # nb_domain_grid_pts[1] += 1
         # domain_lengths[1] += element_dimensions[1]
         
         # bulk = np.insert(bulk, nb_domain_grid_pts[0], 0, axis=0)
         # nb_domain_grid_pts[0] += 1
         # domain_lengths[0] += element_dimensions[0]
         
         bulk = np.insert(bulk, 0, 2, axis=2)
         nb_domain_grid_pts[2] += 1
         domain_lengths[2] += element_dimensions[2]
         
         #domain discretisation TO DO:muspectre has this array already
         x_dim = np.linspace(xmin, xmax, nb_domain_grid_pts[0])
         y_dim = np.linspace(ymin, ymax, nb_domain_grid_pts[1])
         z_dim = np.linspace(zmin-element_dimensions[2], zmax, nb_domain_grid_pts[2])
         
         coords = np.zeros([nb_domain_grid_pts[0],nb_domain_grid_pts[1],nb_domain_grid_pts[2],3])
         temperature_array = np.zeros((1,nb_quad)+tuple(nb_domain_grid_pts))
 
         for i in range(nb_domain_grid_pts[0]):
             coords[i,:,:,0] = x_dim[i]
         for i in range(nb_domain_grid_pts[1]):
             coords[:,i,:,1] = y_dim[i]
         for i in range(nb_domain_grid_pts[2]):
             coords[:,:,i,2] = z_dim[i]
           
+            
+        quad_coordinates_array = np.zeros(tuple(np.shape(temperature_array))+(3,))
+        bulk_quadrature = np.zeros_like(temperature_array)
+        for i,x in enumerate(x_dim):
+            for j,y in enumerate(y_dim):
+                for k,z in enumerate(z_dim):
+                    if bulk[i,j,k] == 1:
+                        for q in range(nb_quad):
+                            try:
+                                quad_coordinates_array[0,q,i,j,k] = Point(x+quad_coords[q,0],y+quad_coords[q,1],z+quad_coords[q,2])[:]
+                            except RuntimeError:
+                                pass
+        for q in range(nb_quad):
+            bulk_quadrature[0,q,:,:,:] = bulk[:,:,:]
         # print("element_dimensions", element_dimensions)
         # print("element_dimensions", nb_domain_grid_pts)
         # print(x_dim)
         
         #store the temperature on the array with the right index if point is found
         #this creates comm.size number of temperature_arrays each non-zero only on 
         #the sub-mesh present in that rank
         #mesh and temperature are divided differently accross communicators!! TO LOOK!
         # for i,x in enumerate(x_dim):
         #     for j,y in enumerate(y_dim):
         #         for k,z in enumerate(z_dim):
         #             if bulk[i,j,k] == 1:
         #                 for q in range(nb_quad):
         #                     try:
         #                         temperature_array[0,q,i,j,k] = temperature_field_store(Point(x,y,z))
         #                     except RuntimeError:
         #                         pass
 
         #send global arrays (material/temperature/coordinates) from FEM communicator
         #to muSpectre communicator
         for proc_rank in send_ranks: 
             
             comm_py.send(rank, proc_rank, tag=0)
             
             comm_py.send(bulk, proc_rank, tag=1)
             
             comm_py.send(nb_domain_grid_pts, proc_rank, tag=3)
     
             comm_py.send(domain_lengths, proc_rank, tag=4)
             
             comm_py.send(coords, proc_rank, tag=5)
             
             comm_py.send(timestamp_arr, proc_rank, tag=6)
             
             comm_py.send(temperature_array, proc_rank, tag=7)
             
+
+        list_subdomain_loc = [None] * size_py
+        list_nb_subdomain_grid_pts = [None] * size_py
+        
+        #print(rank_py, send_ranks)
+        for f, proc_rank in enumerate(send_ranks):
+            list_subdomain_loc[proc_rank] = comm_py.recv(source = proc_rank, tag=proc_rank)
+        
+        # for i,j in enumerate(list_subdomain_loc):
+        #     print(rank, i,j)
+        
+        for f, proc_rank in enumerate(send_ranks):
+            list_nb_subdomain_grid_pts[proc_rank] = comm_py.recv(source = proc_rank, tag=proc_rank)
+        
+        # for i,j in enumerate(list_nb_subdomain_grid_pts):
+        #     print(rank, i,j)
+        
         end_time = MPI.Wtime()
         print("FENICS INIT \n Local rank = {}, took {} seconds".format(rank, end_time-start_time))
     
     #for muSepctre processes
     if color == 1:
         
         #define muSpectre communicator, this communicator is called comm_mu
         comm_mu = muGrid.Communicator(comm)
         #local size and rank on the split communicator
         rank = comm.rank
         procs = comm.size
         
         start_time = MPI.Wtime()
 
         #recieve from FEM communicator material arrays and system size parameters
         recieve_rank = comm_py.recv(source = MPI.ANY_SOURCE, tag=0)
         
         bulk = comm_py.recv(source = recieve_rank, tag=1)
                 
         nb_domain_grid_pts = comm_py.recv(source = recieve_rank, tag=3)
 
         domain_lengths = comm_py.recv(source = recieve_rank, tag=4)
         
         coords = comm_py.recv(source = recieve_rank, tag=5)
         
         #recieve empty timestamp array from FEM communicator
         timestamp_arr = comm_py.recv(source = recieve_rank, tag=6)
         
         temperature_array = comm_py.recv(source= recieve_rank, tag=7)
         
         #create cell using muSpectre communicator
         cell = msp.cell.CellData.make_parallel(list(nb_domain_grid_pts), list(domain_lengths), comm_mu)
         cell.nb_quad_pts = nb_quad
         
         #define materials
         material = msp.material.MaterialLinearElastic1_3d.make(cell, "material", Youngs_modulus , Poisson_ratio)
         vacuum = msp.material.MaterialLinearElastic1_3d.make(cell, "vacuum", 0.0, Poisson_ratio)
         base = msp.material.MaterialLinearElastic1_3d.make(cell, "base", 2*Youngs_modulus, Poisson_ratio)
         
         #assign materials
         for pixel_index, pixel in enumerate(cell.pixels):
             if bulk[tuple(pixel)]==1:
                 material.add_pixel(pixel_index)
             elif bulk[tuple(pixel)]==0:
                 vacuum.add_pixel(pixel_index)
             elif bulk[tuple(pixel)]==2:
                 base.add_pixel(pixel_index)
           
         #define solver parameters 
         eigenstrain_shape = np.identity(3)
         gradient = Stencils3D.linear_finite_elements
         weights = 6 * [1]
 
         ## use solver 
         krylov_solver = msp.solvers.KrylovSolverCG(cg_tol, maxiter)        
         solver = msp.solvers.SolverNewtonCG(cell, krylov_solver, msp.Verbosity.Silent, 
                                             newton_tol, equil_tol, maxiter, gradient, weights)    
         solver.formulation = msp.Formulation.small_strain
         solver.initialise_cell()
     
         #define file input/output parameters for muSpectre output
         # output_filename = "layer_"+str(layer_no-1).zfill(3)+".nc"
         # if comm_mu.rank == 0 and os.path.exists(output_filename):
         #     os.remove(output_filename)
         output_filename = "layer_"+str(layer_no).zfill(3)+".nc"
         if comm_mu.rank == 0 and os.path.exists(output_filename):
             os.remove(output_filename)
         comm_mu.barrier()
         
         file_io_object_write = muGrid.FileIONetCDF(output_filename, 
                                       muGrid.FileIONetCDF.OpenMode.Write, 
                                       comm_mu)
         
         #register fields and field collection
         cell.get_fields().set_nb_sub_pts("quad", nb_quad)
         material_phase = cell.get_fields().register_int_field("material", 1, 'quad')
         coordinates = cell.get_fields().register_real_field("coordinates", 3, 'quad')
         temperature_field = cell.get_fields().register_real_field("temperature", 1, 'quad')
         vacuum_strain_field = cell.get_fields().register_real_field("vacuum_strain", (3,3), "quad")
 
         file_io_object_write.register_field_collection(field_collection=cell.get_fields(),field_names=cell.get_field_names())
         
         #initialize global time attribute (to be updated once filled)
         
         file_io_object_write.write_global_attribute('timestamp', timestamp_arr)
         
         sub_domain_loc = cell.subdomain_locations
 
         #create pointer-like copies for fields
         material_phase_alias = np.array(material_phase, copy=False)
         coordinates_alias = np.array(coordinates, copy=False)
         temperature_alias = np.array(temperature_field, copy=False)
         vacuum_strain_alias = np.array(vacuum_strain_field, copy=False)
         
         #assign values to the above created aliases
         for pixel_id, pixel_coord in cell.pixels.enumerate():
             i, j, k = pixel_coord[0], pixel_coord[1], pixel_coord[2]
             for q in range(nb_quad):
                 # local coordinates on the processor
                 a = i - sub_domain_loc[0]
                 b = j - sub_domain_loc[1]
                 c = k - sub_domain_loc[2]
         
                 material_phase_alias[0, q, a, b, c] = int(bulk[i, j, k])
                 coordinates_alias[0, q, a, b, c] = coords[i,j,k][0]
                 coordinates_alias[1, q, a, b, c] = coords[i,j,k][1]
                 coordinates_alias[2, q, a, b, c] = coords[i,j,k][2]
         #use the temperature array to change the values using temperature
         #array pointer (alias)
         temperature_alias[:,:,:,:,:] = update_temperature_field(cell, temperature_array)[:,:,:,:,:] 
         vacuum_strain_alias[:,:,:,:,:,:] = 0
 
         if layer_no != 60:
             
             from transfer_previous_layer import transfer_previous_layer
 
             strain_field = cell.get_fields().get_field("grad")
             strain_field_alias = np.array(strain_field, copy=False)
             stress_field = cell.get_fields().get_field("flux")
             stress_field_alias = np.array(stress_field, copy=False)
 
             previous_cell = transfer_previous_layer(layer_no-1)
             previous_stress = previous_cell.get_fields().get_field('flux').array()
             previous_strain = previous_cell.get_fields().get_field('grad').array()
             previous_temperature = previous_cell.get_fields().get_field('temperature').array()
             previous_vacuum_strain = previous_cell.get_fields().get_field('vacuum_strain').array()
             
             #assign values to the above created aliases
             for pixel_id, pixel_coord in cell.pixels.enumerate():
                 i, j, k = pixel_coord[0], pixel_coord[1], pixel_coord[2]
                 if k < nb_domain_grid_pts[2]-1:
                     for q in range(nb_quad):
                         # local coordinates on the processor
                         a = i - sub_domain_loc[0]
                         b = j - sub_domain_loc[1]
                         c = k - sub_domain_loc[2]
                         strain_field_alias[:, :, q, a, b, c] = previous_strain[:, :, q, i, j, k]
                         stress_field_alias[:, :, q, a, b, c] = previous_stress[:, :, q, i, j, k]
                         temperature_alias[0,q,a,b,c] = previous_temperature[0,q,i,j,k]
                         vacuum_strain_alias[:, :, q, a, b, c] = previous_vacuum_strain[:, :, q, i, j, k]
                 if k == nb_domain_grid_pts[2]-2:
                         vacuum_strain_alias[:, :, q, a, b, c] = previous_strain[:, :, q, i, j, k]
 
                 #print(np.shape(strain_field_alias), nb_domain_grid_pts, k)
         
         #define eigen_class required by solve_increment
         eigen_class = EigenStrain3(eigenstrain_shape, thermal_expansion, cell)
+
+        comm_py.send([cell.subdomain_locations[0],cell.subdomain_locations[1],cell.subdomain_locations[2]], recieve_rank, tag=rank+size_py-procs)
         
+        comm_py.send([cell.nb_subdomain_grid_pts[0],cell.nb_subdomain_grid_pts[1],cell.nb_subdomain_grid_pts[2]], recieve_rank, tag=rank+size_py-procs)
+
         end_time = MPI.Wtime()
         print("MUSPECTRE INIT \n Local rank = {}, took {} seconds".format(rank, end_time-start_time))
 
     #make nb_Steps global by broadcasting
     nb_steps = comm_py.bcast(nb_steps, root = 0)
     
     start = timer.time()
-    
+    start_MPI = MPI.Wtime()
     #seperate operations for FEM and muSpectre communicators depending on ranks
     if color == 1:
         #temperature_array = np.zeros(nb_domain_grid_pts)
-        
+        print_statement = False
         start_time = MPI.Wtime()
         
+        dummy_temperature = np.zeros((1,cell.nb_quad_pts, cell.nb_subdomain_grid_pts[0],cell.nb_subdomain_grid_pts[1],cell.nb_subdomain_grid_pts[2]))
+                    
+        if rank == 0: print_statement = True
         #for all steps
         for s in range(nb_steps):
-            print('=== STEP {}/{} ==='.format(s+1, nb_steps))
+            step_start = MPI.Wtime()
+            if print_statement: print('=== STEP {}/{} ==='.format(s, nb_steps))
             
-            #recieve temp_array from constant sender request
-            #temperature_array = np.zeros(nb_domain_grid_pts)
-            print("MUSPECTRE Step {} --- Recieve request created: {}".format(s, MPI.Wtime()))
-            req  = comm_py.Irecv(temperature_array, source = recieve_rank, tag=s)
-            print("MUSPECTRE Step {} --- Recieve request done: {}".format(s, MPI.Wtime()))
+            req  = comm_py.Irecv(dummy_temperature, source = recieve_rank, tag=s)
+            if print_statement: print("MUSPECTRE Step {} --- Recieve request done: {}".format(s, MPI.Wtime()-start_MPI))
             
-            #print(np.max(cell.get_fields().get_field("grad").array()))
-
+            solve_start = MPI.Wtime()
             #solve machanics problem
             res = solver.solve_load_increment(strain_step, eigen_class.eigen_strain_func)
             comm_mu.barrier()
+            if print_statement: print("MUSPECTRE - Step {} --- Total Solve time: {}".format(s, MPI.Wtime()- start_MPI) )
 
             #write all fields in .nc format TO DO:not all fields
             file_io_object_write.append_frame().write(["coordinates", "flux",
                          "grad",
                          "material",
                          "temperature", "vacuum_strain"])
             
             #make sure temperature array is recieved        
             req.wait()
-            print("MUSPECTRE Step {} --- Recieve request Wait done: {}".format(s, MPI.Wtime()))
+            if print_statement: print("MUSPECTRE Step {} --- Recieve request Wait done: {}".format(s, MPI.Wtime()-start_MPI))
 
             #use the temperature array to change the values using temperature
             #array pointer (alias)
-            temperature_alias[:,:,:] = update_temperature_field(cell, temperature_array)[:,:,:]
-                  
+            temperature_alias[:,:,:,:,:] = dummy_temperature
+
+
             if (s+1)%20 == 0:
-                print('=== STEP {}/{} ==='.format(s+1, nb_steps))
+                if print_statement: print('=== STEP {}/{} ==='.format(s, nb_steps))
                 break
-            print("MUSPECTRE Step {} --- Step done: {}".format(s, MPI.Wtime()))
+            if print_statement: print("MUSPECTRE Step {} --- Step done: {}".format(s, MPI.Wtime()-start_MPI))
         end_time = MPI.Wtime()
-        print("MUSPECTRE Step {} --- RUN \n Local rank = {}, took {} seconds".format(s, rank, end_time-start_time))
+        if print_statement: print("MUSPECTRE Step {} --- RUN \n Local rank = {}, took {} seconds".format(s, rank, end_time-start))
         
     if color == 0:
-        
+        print_statement = False
         start_time = MPI.Wtime()
-
+        
+        if rank == 0: print_statement = True
+        
         for s in range(nb_steps):
+            
+            step_start = MPI.Wtime()
 
-            #start constant sending request
-            print("FENICS Step {} --- Send Requests created: {}".format(s, MPI.Wtime()))
-            reqs = [None] * len(send_ranks)
-            for p, proc_rank in enumerate(send_ranks):
-                reqs[p] = comm_py.Isend([temperature_array, MPI.DOUBLE], proc_rank, tag=s)
-            print("FENICS Step {} --- Send Requests done: {}".format(s, MPI.Wtime()))
             #re-read the temperature for the next step
             no += 1
             vec_name = "/Temperature/vector_%d"%no
             timestamp = hdf5_file.attributes(vec_name)['timestamp']
             timestamp_arr[s] = timestamp
             hdf5_file.read(temperature_field_store, vec_name)
             #store it on a dummy array since we don't want to change 
             #temperature array before the sending/recieving is done
             #which will be ensured by MPI.wait
             dummy_array = np.zeros((1,nb_quad)+tuple(nb_domain_grid_pts))
             
             #store the temperature on the array with the right index if point is found
             #this creates comm.size number of temperature_arrays each non-zero only on 
             #the sub-mesh present in that rank
             #mesh and temperature are divided differently accross communicators!! TO LOOK!
-            for i,x in enumerate(x_dim):
-                for j,y in enumerate(y_dim):
-                    for k,z in enumerate(z_dim):
-                        if bulk[i,j,k] == 1:
-                            for q in range(nb_quad):
-                                try:
-                                    #dummy_array[0,q,i,j,k] = temperature_field_store(Point(x,y,z))
-                                    #if z > 0.00162: print(x,y,z,x+quad_coords[q,0],y+quad_coords[q,1],z+quad_coords[q,2])
-                                    dummy_array[0,q,i,j,k] = temperature_field_store(Point(x+quad_coords[q,0],y+quad_coords[q,1],z+quad_coords[q,2]))
-                                except RuntimeError:
-                                    pass
-
+            
+            # for i,x in enumerate(x_dim):
+            #     for j,y in enumerate(y_dim):
+            #         for k,z in enumerate(z_dim):
+            #             if bulk[i,j,k] == 1:
+            #                 for q in range(nb_quad):
+            #                     # try:
+            #                         #dummy_array[0,q,i,j,k] = temperature_field_store(Point(x,y,z))
+            #                         #if z > 0.00162: print(x,y,z,x+quad_coords[q,0],y+quad_coords[q,1],z+quad_coords[q,2])
+            #                     dummy_array[0,q,i,j,k] = temperature_field_store(quad_coordinates_array[0,q,i,j,k])
+            #                         #dummy_array[0,q,i,j,k] = temperature_field_store(Point(x+quad_coords[q,0],y+quad_coords[q,1],z+quad_coords[q,2]))
+            #                     # except RuntimeError:
+            #                     #     pass
+            #print(np.shape(bulk), np.shape(dummy_array))
+            if print_statement: print("FENICS Step {} --- Temperature Array Fill Start: {}".format(s, MPI.Wtime()-start_MPI))
+            with np.nditer([bulk, dummy_array[0,0,:,:,:]], flags=['multi_index', 'buffered'], op_flags=['writeonly'], buffersize=2*size+rank) as it:
+                for iterable in it:
+                    if bulk[it.multi_index] == 1:
+                        for q in range(nb_quad):
+                            try:
+                                dummy_array[tuple((0,q))+it.multi_index] = temperature_field_store(quad_coordinates_array[tuple((0,q))+it.multi_index])
+                            except RuntimeError:
+                                pass
+            if print_statement: print("FENICS Step {} --- Temperature Array Fill End: {}".format(s, MPI.Wtime()-start_MPI))
+            
             #make sure temperature sending is complete and create the new 
             #temperature array
+            if s != 0:
+                for p, proc_rank in enumerate(send_ranks):
+                    reqs[p].wait()
+            if print_statement: print("FENICS Step {} --- Send Requests Wait done: {}".format(s, MPI.Wtime()-start_MPI))
+            
+            temperature_array = comm.allreduce(dummy_array, op=MPI.SUM)
+            
+            #start constant sending request
+            reqs = [None] * len(send_ranks)
             for p, proc_rank in enumerate(send_ranks):
-                reqs[p].wait()
-            print("FENICS Step {} --- Send Requests Wait done: {}".format(s, MPI.Wtime()))
+                send_temp = np.array(temperature_array[:,:,list_subdomain_loc[proc_rank][0]:list_subdomain_loc[proc_rank][0]+list_nb_subdomain_grid_pts[proc_rank][0],list_subdomain_loc[proc_rank][1]:list_subdomain_loc[proc_rank][1]+list_nb_subdomain_grid_pts[proc_rank][1],list_subdomain_loc[proc_rank][2]:list_subdomain_loc[proc_rank][2]+list_nb_subdomain_grid_pts[proc_rank][2]])
+                #print(p,proc_rank, list_subdomain_loc[proc_rank], list_nb_subdomain_grid_pts[proc_rank])
+                #print('send_shape', np.shape(send_temp))
+                reqs[p] = comm_py.Isend([send_temp, MPI.DOUBLE], proc_rank, tag=s)
+            if print_statement: print("FENICS Step {} --- Send Requests done: {}".format(s, MPI.Wtime()-start_MPI))
+            
 
-            temperature_array = comm.allreduce(dummy_array, op=MPI.SUM)
 
             if (s+1)%20 == 0:
-                print('=== STEP {}/{} ==='.format(s+1, nb_steps))
+                if print_statement: print('=== STEP {}/{} ==='.format(s, nb_steps))
                 break
-            print("FENICS Step {} --- Step done: {}".format(s, MPI.Wtime()))
+            if print_statement: print("FENICS Step {} --- Step done: {}".format(s, MPI.Wtime()-start_MPI))
 
         end_time = MPI.Wtime()
-        print("FENICS Step {} --- RUN \n Local rank = {}, took {} seconds".format(s, rank, end_time-start_time))
+        if print_statement: print("FENICS Step {} --- RUN \n Local rank = {}, took {} seconds".format(s, rank, end_time-start))
      
     #make timestamp array global by broadcasting
     timestamp_arr = comm_py.bcast(timestamp_arr, root = 0)
 
     #if on the muSpectre communicator, update the global timestamp
     if color == 1:
         file_io_object_write.update_global_attribute('timestamp', 'timestamp', timestamp_arr)
         file_io_object_write.close()
         
     print("Total run time:", timer.time()-start)
 
 
     
     
 if __name__=='__main__':
 
 
     main()