diff --git a/python/ibmppp/ibmppp.py b/python/ibmppp/ibmppp.py
new file mode 100644
index 0000000..b50f5b5
--- /dev/null
+++ b/python/ibmppp/ibmppp.py
@@ -0,0 +1,1471 @@
+from mpi4py import MPI
+import numpy as np
+from scipy import ndimage
+import ucf
+import h5py
+import os
+
+class ibmppp:
+    """Parallel Python Postprocessor for the IBM-DNS code"""
+    def __init__(self,comm,dir_base,iseq,flowType,chunksPerProc=(1,1,1),numberOfGhosts=(1,1,1)):
+        """Class constructor"""
+        # Save method arguments for later use
+        self.__comm = comm
+        self.__rank = comm.Get_rank()
+        self.__nproc = comm.Get_size()
+        self.__dir_base = dir_base
+        self.__iseq = iseq
+        self.__nxppp = chunksPerProc[0]
+        self.__nyppp = chunksPerProc[1]
+        self.__nzppp = chunksPerProc[2]
+        self.__nghx = numberOfGhosts[0]
+        self.__nghy = numberOfGhosts[1]
+        self.__nghz = numberOfGhosts[2]
+        # Declare variables which hold fields
+        self.field = {'u': None, 'v': None, 'w': None, 'p': None, 's1': None}
+        # Declare variables for grid (to be set by "loadGrid()")
+        self.grid = {'u': [None,None,None], 'v':[None,None,None], 'w': [None,None,None], 'p': [None,None,None], 's1': [None,None,None]}
+        self.domainBounds = (None,None,None,None,None,None)
+        self.__boundarySymmetries = {'u': None, 'v': None, 'w': None, 'p': None, 's1': None}
+        self.__xperiodic = None
+        self.__yperiodic = None
+        self.__zperiodic = None
+        self.localGrid = {
+          'u':  [None,None,None],
+          'v':  [None,None,None],
+          'w':  [None,None,None],
+          'p':  [None,None,None],
+          's1': [None,None,None]
+        } # (ibeg,iend,jbeg,jend,kbeg,kend) of original processor grid
+        self.__dx = {'u': None, 'v': None, 'w': None, 'p': None, 's1': None}
+        # Declare internal variables for processor grid (to be set by "loadProcGrid()")
+        self.__procGrid1 = {
+          'u':  [None,None,None,None,None,None],
+          'v':  [None,None,None,None,None,None],
+          'w':  [None,None,None,None,None,None],
+          'p':  [None,None,None,None,None,None],
+          's1': [None,None,None,None,None,None]
+        } # (ibeg,iend,jbeg,jend,kbeg,kend) of original processor grid
+        self.__procGrid = {
+          'u':  [None,None,None,None,None,None],
+          'v':  [None,None,None,None,None,None],
+          'w':  [None,None,None,None,None,None],
+          'p':  [None,None,None,None,None,None],
+          's1': [None,None,None,None,None,None]
+        } # (ibeg,iend,jbeg,jend,kbeg,kend) of internal processor grid
+        self.__nproc1 = None
+        self.__nxp1 = None; self.__nyp1 = None; self.__nzp1 = None
+        self.__nxp = None; self.__nyp = None; self.__nzp = None
+        self.__ipbeg = None; self.__jpbeg = None; self.__kpbeg = None
+        self.__nghbr = None
+        # Declare internal variables for local grid indices held by this processor
+        self.__localChunkBounds = {
+          'u':  [None,None,None,None,None,None],
+          'v':  [None,None,None,None,None,None],
+          'w':  [None,None,None,None,None,None],
+          'p':  [None,None,None,None,None,None],
+          's1': [None,None,None,None,None,None]
+        } # (ib,ie,jb,je,kb,ku) grid indices of local chunk
+        self.__localChunkSize = {
+          'u':  [None,None,None],
+          'v':  [None,None,None],
+          'w':  [None,None,None],
+          'p':  [None,None,None],
+          's1': [None,None,None]
+        } # (nxl,nyl,nzl) size of local chunk (without ghost cells)
+        # Declare internal variables for MPI requests
+        self.__mpireq = []
+        self.__mpibufdata = []
+        self.__mpibufidx = []
+        # Evaluate flow type and set boundary conditions
+        self.__setBoundaryConditions(flowType)
+        # Prepare the processor layout
+        self.loadProcGrid()
+        # Load grid
+        self.loadGrid()
+
+    def loadProcGrid(self):
+        '''Read input processor grid, compute processor grid for workers'''
+        if self.__rank==0:
+            # Read the processor grid
+            file_proc = self.__dir_base+'/proc_{:04d}.bin'.format(self.__iseq)
+            (self.__procGrid1['u'][0], self.__procGrid1['u'][1], self.__procGrid1['u'][2], self.__procGrid1['u'][3], self.__procGrid1['u'][4], self.__procGrid1['u'][5],
+             self.__procGrid1['v'][0], self.__procGrid1['v'][1], self.__procGrid1['v'][2], self.__procGrid1['v'][3], self.__procGrid1['v'][4], self.__procGrid1['v'][5],
+             self.__procGrid1['w'][0], self.__procGrid1['w'][1], self.__procGrid1['w'][2], self.__procGrid1['w'][3], self.__procGrid1['w'][4], self.__procGrid1['w'][5],
+             self.__procGrid1['p'][0], self.__procGrid1['p'][1], self.__procGrid1['p'][2], self.__procGrid1['p'][3], self.__procGrid1['p'][4], self.__procGrid1['p'][5],
+             self.__procGrid1['s1'][0],self.__procGrid1['s1'][1],self.__procGrid1['s1'][2],self.__procGrid1['s1'][3],self.__procGrid1['s1'][4],self.__procGrid1['s1'][5]) = ucf.readProcgrid(file_proc)
+            # Verify that this processor grid can be redistributed onto the requested processor layout
+            self.__nxp1 = len(self.__procGrid1['u'][0])
+            self.__nyp1 = len(self.__procGrid1['u'][2])
+            self.__nzp1 = len(self.__procGrid1['u'][4])
+            self.__nproc1 = self.__nxp1*self.__nyp1*self.__nzp1
+            if self.__nxp1%self.__nxppp != 0:
+              raise ValueError('Number of processors must be divisible by the number of processors per process. (nxp={}, nxppp={})'.format(self.__nxp1,self.__nxppp))
+            if self.__nyp1%self.__nyppp != 0:
+              raise ValueError('Number of processors must be divisible by the number of processors per process. (nyp={}, nyppp={})'.format(self.__nyp1,self.__nyppp))
+            if self.__nzp1%self.__nzppp != 0:
+              raise ValueError('Number of processors must be divisible by the number of processors per process. (nzp={}, nzppp={})'.format(self.__nzp1,self.__nzppp))
+            # Determine the new processor layout and verify total number of MPI processes
+            self.__nxp = self.__nxp1//self.__nxppp
+            self.__nyp = self.__nyp1//self.__nyppp
+            self.__nzp = self.__nzp1//self.__nzppp
+            if self.__nproc!=self.__nxp*self.__nyp*self.__nzp:
+              raise ValueError('Number of MPI processes does not match the requested processor layout. (MPI procs: {}, required procs: {}'.format(self.__nproc,self.__nxp*self.__nyp*self.__nzp))
+            self.__ipbeg = range(0,self.__nxp1,self.__nxppp)
+            self.__jpbeg = range(0,self.__nyp1,self.__nyppp)
+            self.__kpbeg = range(0,self.__nzp1,self.__nzppp)
+            for key in self.__procGrid1:
+              self.__procGrid[key][0] = self.__procGrid1[key][0][0::self.__nxppp]
+              self.__procGrid[key][1] = self.__procGrid1[key][1][self.__nxppp-1::self.__nxppp]
+              self.__procGrid[key][2] = self.__procGrid1[key][2][0::self.__nyppp]
+              self.__procGrid[key][3] = self.__procGrid1[key][3][self.__nyppp-1::self.__nyppp]
+              self.__procGrid[key][4] = self.__procGrid1[key][4][0::self.__nzppp]
+              self.__procGrid[key][5] = self.__procGrid1[key][5][self.__nzppp-1::self.__nzppp]
+        # Broadcast the data
+        self.__procGrid1 = self.__comm.bcast(self.__procGrid1,root=0)
+        self.__procGrid  = self.__comm.bcast(self.__procGrid,root=0)
+        (self.__nxp1,self.__nyp1,self.__nzp1) = self.__comm.bcast((self.__nxp1,self.__nyp1,self.__nzp1),root=0)
+        (self.__nxp, self.__nyp ,self.__nzp ) = self.__comm.bcast((self.__nxp ,self.__nyp ,self.__nzp ),root=0)
+        (self.__ipbeg,self.__jpbeg,self.__kpbeg) = self.__comm.bcast((self.__ipbeg,self.__jpbeg,self.__kpbeg),root=0)
+        # Get position in processor grid
+        (self.__icol,self.__jrow,self.__kpln) = self.__positionFromRank(self.__rank,self.__nxp,self.__nyp,self.__nzp)
+        # Determine local grid indices and size
+        for key in self.__procGrid:
+          self.__localChunkBounds[key][0] = self.__procGrid[key][0][self.__icol]
+          self.__localChunkBounds[key][1] = self.__procGrid[key][1][self.__icol]
+          self.__localChunkBounds[key][2] = self.__procGrid[key][2][self.__jrow]
+          self.__localChunkBounds[key][3] = self.__procGrid[key][3][self.__jrow]
+          self.__localChunkBounds[key][4] = self.__procGrid[key][4][self.__kpln]
+          self.__localChunkBounds[key][5] = self.__procGrid[key][5][self.__kpln]
+          self.__localChunkSize[key][0] = self.__localChunkBounds[key][1] - self.__localChunkBounds[key][0] + 1
+          self.__localChunkSize[key][1] = self.__localChunkBounds[key][3] - self.__localChunkBounds[key][2] + 1
+          self.__localChunkSize[key][2] = self.__localChunkBounds[key][5] - self.__localChunkBounds[key][4] + 1
+          # Verify that local grid size is not smaller than ghost cell size
+          if (self.__localChunkSize[key][0]<self.__nghx or 
+              self.__localChunkSize[key][1]<self.__nghy or 
+              self.__localChunkSize[key][2]<self.__nghz):
+            raise ValueError('Local grid size must be greater than number of ghost cells in each direction!')
+        # Initialize neighbor array
+        self.__initializeNeighbors()
+
+    def loadGrid(self):
+        '''Read and distribute physical grid'''
+        buffer = None
+        if self.__rank==0:
+            # Read the grid
+            file_grid = self.__dir_base+'/grid_{:04d}.bin'.format(self.__iseq)
+            buffer = ucf.readGrid(file_grid)
+        # Broadcast the data
+        buffer = self.__comm.bcast(buffer,root=0)
+        # Unpack the buffer
+        (self.grid['u'][0], self.grid['u'][1], self.grid['u'][2],
+         self.grid['v'][0], self.grid['v'][1], self.grid['v'][2],
+         self.grid['w'][0], self.grid['w'][1], self.grid['w'][2],
+         self.grid['p'][0], self.grid['p'][1], self.grid['p'][2],
+         self.grid['s1'][0],self.grid['s1'][1],self.grid['s1'][2]) = buffer
+        # Compute dx per grid
+        for key in self.grid:
+          self.__dx[key] = self.grid[key][0][1]-self.grid[key][0][0]
+        # Compute domain bounds
+        if self.__xperiodic:
+          a,b = 0.0, self.grid['u'][0][-1]+self.__dx['u']
+        else:
+          a,b = self.grid['u'][0][0]+0.5*self.__dx['u'], self.grid['u'][0][-1]-0.5*self.__dx['u']
+        if self.__yperiodic:
+          c,d = 0.0, self.grid['v'][1][-1]+self.__dx['v']
+        else:
+          c,d = self.grid['v'][1][0]+0.5*self.__dx['v'], self.grid['v'][1][-1]-0.5*self.__dx['v']
+        if self.__zperiodic:
+          e,f = 0.0, self.grid['w'][2][-1]+self.__dx['w']
+        else:
+          e,f = self.grid['w'][2][0]+0.5*self.__dx['w'], self.grid['w'][2][-1]-0.5*self.__dx['w']
+        self.domainBounds = (a,b,c,d,e,f)
+        # Determine local grid arrays and dx per field
+        for key in self.grid:
+          self.localGrid[key][0] = np.concatenate(
+            (np.arange(-self.__nghx,0)*self.__dx[key]+self.grid[key][0][self.__localChunkBounds[key][0]-1],  # lower ghost cells
+             self.grid[key][0][self.__localChunkBounds[key][0]-1:self.__localChunkBounds[key][1]],           # chunk grid
+             np.arange(1,self.__nghx+1)*self.__dx[key]+self.grid[key][0][self.__localChunkBounds[key][1]-1]) # upper ghost cells
+            ) 
+          self.localGrid[key][1] = np.concatenate(
+            (np.arange(-self.__nghy,0)*self.__dx[key]+self.grid[key][1][self.__localChunkBounds[key][2]-1],  # lower ghost cells
+             self.grid[key][1][self.__localChunkBounds[key][2]-1:self.__localChunkBounds[key][3]],           # chunk grid
+             np.arange(1,self.__nghy+1)*self.__dx[key]+self.grid[key][1][self.__localChunkBounds[key][3]-1]) # upper ghost cells
+            ) 
+          self.localGrid[key][2] = np.concatenate(
+            (np.arange(-self.__nghz,0)*self.__dx[key]+self.grid[key][2][self.__localChunkBounds[key][4]-1],  # lower ghost cells
+             self.grid[key][2][self.__localChunkBounds[key][4]-1:self.__localChunkBounds[key][5]],           # chunk grid
+             np.arange(1,self.__nghz+1)*self.__dx[key]+self.grid[key][2][self.__localChunkBounds[key][5]-1]) # upper ghost cells
+            ) 
+
+    def loadParticles(self):
+        '''Reads particles from file and distributes them to the correct processor.'''
+        # Rank 0 reads the file and does the redistribution.
+        # There is No need to distinguish between master and slave particles here, since we will
+        # only use them for masking the fields. Also we do not care about particles in ghost cells.
+        if self.__rank==0:
+          file_input = self.__dir_base+'particles_{:04d}.bin'.format(self.__iseq)
+          pp,col = ucf.readParticles(file_input,step=1)
+          # Remove time dimension, because we are dealing with a single time step exclusively
+          pp = pp[:,:,0]
+          # Duplicate particles over periodic boundaries
+          dirkey = ['x','y','z']
+          isPeriodic = (self.__xperiodic,self.__yperiodic,self.__zperiodic)
+          for idir in range(0,3):
+            if not isPeriodic[idir]:
+              continue
+            # Get domain boundaries in this direction
+            bdmin = self.domainBounds[2*idir]
+            bdmax = self.domainBounds[2*idir+1]
+            # Shift and append particles which are within one diameter from the lower bound
+            li_part = (pp[col[dirkey[idir]],:]<=bdmin+2.0*pp[col['r'],:])
+            pptmp = pp[:,li_part].copy()
+            pptmp[col[dirkey[idir]],:] += bdmax-bdmin
+            pp = np.concatenate((pp,pptmp),axis=1)
+            # Shift and append particles which are within one diameter from the upper bound
+            li_part = (pp[col[dirkey[idir]],:]>=bdmax-2.0*pp[col['r'],:])
+            pptmp = pp[:,li_part].copy()
+            pptmp[col[dirkey[idir]],:] -= bdmax-bdmin
+            pp = np.concatenate((pp,pptmp),axis=1)
+          # Go through individual chunks and distribute particles
+          for ip in range(0,self.__nxp):
+            for jp in range(0,self.__nyp):
+              for kp in range(0,self.__nzp):
+                # Determine rank
+                rank_dst = self.__rankFromPosition(ip,jp,kp,self.__nxp,self.__nyp,self.__nzp)
+                pos_dst = (ip,jp,kp)
+                # Determine chunk boundaries
+                bdmin = [ np.inf,  np.inf,  np.inf]
+                bdmax = [-np.inf, -np.inf, -np.inf]
+                for key in self.grid:
+                  for idir in range(0,3):
+                    bdmin[idir] = np.minimum(bdmin[idir],self.grid[key][idir][self.__procGrid[key][2*idir][pos_dst[idir]]-1])
+                    bdmax[idir] = np.maximum(bdmax[idir],self.grid[key][idir][self.__procGrid[key][2*idir+1][pos_dst[idir]]-1]+self.__dx[key])
+                # Find particles which affect that chunk
+                li_part =  ((pp[col['x'],:]>=bdmin[0]-pp[col['r'],:]) &
+                            (pp[col['x'],:]<=bdmax[0]+pp[col['r'],:]) &
+                            (pp[col['y'],:]>=bdmin[1]-pp[col['r'],:]) & 
+                            (pp[col['y'],:]<=bdmax[1]+pp[col['r'],:]) &
+                            (pp[col['z'],:]>=bdmin[2]-pp[col['r'],:]) & 
+                            (pp[col['z'],:]<=bdmax[2]+pp[col['r'],:]))
+                # Send them to that processor
+                sendbuf = (np.ascontiguousarray(pp[:,li_part]),col)
+                self.__mpireq.append(self.__comm.isend(sendbuf,dest=rank_dst))
+        # Every rank needs to receive the particles, rank 0 send them to itself
+        req = self.__comm.irecv(source=0)
+        (self.particles,self.col) = req.wait()
+        if self.__rank==0:
+          # Wait for everyone to finish
+          MPI.Request.waitall(self.__mpireq)
+          # Communication is done! Clear the list of requests
+          self.__mpireq.clear()
+
+    def loadField(self,key,dtype='float64'):
+        '''Reads chunks from files'''
+        # Determine which chunks are to be loaded by this processor
+        (ip,jp,kp) = self.__positionFromRank(self.__rank,self.__nxp,self.__nyp,self.__nzp)
+        ipb1 = self.__ipbeg[ip]
+        jpb1 = self.__jpbeg[jp]
+        kpb1 = self.__kpbeg[kp]
+        ipe1 = ipb1+self.__nxppp-1
+        jpe1 = jpb1+self.__nyppp-1
+        kpe1 = kpb1+self.__nzppp-1
+        #print(ipb1,ipe1,jpb1,jpe1,kpb1,kpe1)
+        # Get grid bounds for the field to be loaded.
+        # Also determine file which contains the desired field as well as the
+        # dataset ID
+        if key[0]=='u':
+          filebase = self.__dir_base+'uvwp_{:04d}.'.format(self.__iseq)
+          dset = 1
+        elif key[0]=='v':
+          filebase = self.__dir_base+'uvwp_{:04d}.'.format(self.__iseq)
+          dset = 2
+        elif key[0]=='w':
+          filebase = self.__dir_base+'uvwp_{:04d}.'.format(self.__iseq)
+          dset = 3
+        elif key[0]=='p':
+          filebase = self.__dir_base+'uvwp_{:04d}.'.format(self.__iseq)
+          dset = 4
+        elif key[0]=='s':
+          filebase = self.__dir_base+'scal_{:04d}.'.format(self.__iseq)
+          dset = int(key[1])
+          if dset!=1:
+            self.fields[key] = None
+            self.grid[key] = self.grid['s1']
+            self.__procGrid1[key] = self.__procGrid1['s1']
+            self.__procGrid[key] = self.__procGrid['s1']
+            self.__localChunkBounds[key] = self.__localChunkBounds['s1']
+            self.__localChunkSize[key] = self.__localChunkSize['s1']
+        else:
+          raise ValueError('Unknown field: {}'.format(field))
+        ib = self.__procGrid1[key][0][ipb1]
+        ie = self.__procGrid1[key][1][ipe1]
+        jb = self.__procGrid1[key][2][jpb1]
+        je = self.__procGrid1[key][3][jpe1]
+        kb = self.__procGrid1[key][4][kpb1]
+        ke = self.__procGrid1[key][5][kpe1]
+        #print(ib,ie,jb,je,kb,ke)
+        nxl = ie-ib+1
+        nyl = je-jb+1
+        nzl = ke-kb+1
+        #print(nxl,nyl,nzl)
+        # Allocate an array to hold the field
+        self.field[key] = np.empty((nxl+2*self.__nghx,nyl+2*self.__nghy,nzl+2*self.__nghz),dtype=dtype,order='F')
+        #print(q.shape)
+        # Go through each chunk to be read and construct the field
+        for ip1 in range(ipb1,ipe1+1):
+          for jp1 in range(jpb1,jpe1+1):
+            for kp1 in range(kpb1,kpe1+1):
+              # Determine rank of the chunk to be read
+              iprank = self.__rankFromPosition(ip1,jp1,kp1,self.__nxp1,self.__nyp1,self.__nzp1)
+              file_input = filebase+'{:05d}'.format(iprank)
+              #print('Loading '+file_input)
+              # Compute bounds of this chunk
+              ib1 = self.__procGrid1[key][0][ip1]
+              ie1 = self.__procGrid1[key][1][ip1]
+              jb1 = self.__procGrid1[key][2][jp1]
+              je1 = self.__procGrid1[key][3][jp1]
+              kb1 = self.__procGrid1[key][4][kp1]
+              ke1 = self.__procGrid1[key][5][kp1]
+              #print(ib1,ie1,jb1,je1,kb1,ke1)
+              # Read chunk and check its validity
+              [chunk] = ucf.readFieldChunk(file_input,dset=dset)
+              if ib1!=chunk['ibeg'] or ie1!=chunk['ibeg']+chunk['nxl']-1:
+                  raise ValueError('Loaded chunk does not correspond to processor grid (ibeg={}, nxl={} vs ibeg={},nxl={}'.format(ib1,ie1-ib1+1,chunk['ibeg'],chunk['nxl']))
+              if jb1!=chunk['jbeg'] or je1!=chunk['jbeg']+chunk['nyl']-1:
+                  raise ValueError('Loaded chunk does not correspond to processor grid (jbeg={}, nyl={} vs jbeg={},nyl={}'.format(jb1,je1-jb1+1,chunk['jbeg'],chunk['nyl']))
+              if kb1!=chunk['kbeg'] or ke1!=chunk['kbeg']+chunk['nzl']-1:
+                  raise ValueError('Loaded chunk does not correspond to processor grid (kbeg={}, nzl={} vs kbeg={},nzl={}'.format(kb1,ke1-kb1+1,chunk['kbeg'],chunk['nzl']))
+              ngh1 = chunk['ighost']
+              # No need to care about ghost cells here, we communicate them later
+              #print('shape(q)',q[ib1-ib+ngh:ie1-ib+ngh+1,jb1-jb+ngh:je1-jb+ngh+1,kb1-kb+ngh:ke1-kb+ngh+1].shape)
+              #print('shape(q1)',chunk['data'][ngh1:-ngh1,ngh1:-ngh1,ngh1:-ngh1].shape)
+              self.field[key][ib1-ib+self.__nghx:ie1-ib+self.__nghx+1,
+                              jb1-jb+self.__nghy:je1-jb+self.__nghy+1,
+                              kb1-kb+self.__nghz:ke1-kb+self.__nghz+1] = chunk['data'][ngh1:-ngh1,ngh1:-ngh1,ngh1:-ngh1]
+        # Exchange ghost cells and set boundary conditions
+        self.exchangeGhostCells(key)
+        self.imposeBoundaryConditions(key)
+
+    def saveField(self,filename,key,append=False,keepAllGhost=False,xdmf=False):
+      '''Writes chunks in new processor layout to h5 files'''
+      # Determine filename of output file
+      file_chunk = filename+'.{:05d}'.format(self.__rank)
+      # Do we write all ghosts, or just a single ghost cell?
+      if keepAllGhost:
+        ii_rng = slice(0,2*self.__nghx+self.__localChunkSize[key][0])
+        jj_rng = slice(0,2*self.__nghy+self.__localChunkSize[key][1])
+        kk_rng = slice(0,2*self.__nghz+self.__localChunkSize[key][2])
+        nghx = self.__nghx
+        nghy = self.__nghy
+        nghz = self.__nghz
+      else:
+        ii_rng = slice(self.__nghx-1,self.__nghx+self.__localChunkSize[key][0]+1)
+        jj_rng = slice(self.__nghy-1,self.__nghy+self.__localChunkSize[key][1]+1)
+        kk_rng = slice(self.__nghz-1,self.__nghz+self.__localChunkSize[key][2]+1)
+        nghx = 1
+        nghy = 1
+        nghz = 1
+      # Compute global nx,ny,nz
+      nxg = self.__procGrid[key][1][-1]-self.__procGrid[key][0][0]
+      nyg = self.__procGrid[key][3][-1]-self.__procGrid[key][2][0]
+      nzg = self.__procGrid[key][5][-1]-self.__procGrid[key][4][0]
+      # If append flag is set, we add a dataset to the existing file
+      if append:
+        ioflag = 'a'
+      else:
+        ioflag = 'w'
+      # Open the file and write data, then close it because we are done
+      fid = h5py.File(file_chunk,ioflag)
+      if not append:
+        fid.create_dataset('rank',data=self.__rank)
+        fid.create_dataset('icol',data=self.__icol)
+        fid.create_dataset('jrow',data=self.__jrow)
+        fid.create_dataset('kpln',data=self.__kpln)
+      gid = fid.create_group('/'+key)
+      gid.create_dataset('ib',data=self.__localChunkBounds[key][0])
+      gid.create_dataset('ie',data=self.__localChunkBounds[key][1])
+      gid.create_dataset('jb',data=self.__localChunkBounds[key][2])
+      gid.create_dataset('je',data=self.__localChunkBounds[key][3])
+      gid.create_dataset('kb',data=self.__localChunkBounds[key][4])
+      gid.create_dataset('ke',data=self.__localChunkBounds[key][5])
+      gid.create_dataset('nxl',data=self.__localChunkSize[key][0])
+      gid.create_dataset('nyl',data=self.__localChunkSize[key][1])
+      gid.create_dataset('nzl',data=self.__localChunkSize[key][2])
+      gid.create_dataset('nx',data=nxg)
+      gid.create_dataset('ny',data=nyg)
+      gid.create_dataset('nz',data=nzg)
+      gid.create_dataset('ighost',data=(1,),dtype='i')
+      gid.create_dataset('nghostx',data=nghx,dtype='i')
+      gid.create_dataset('nghosty',data=nghy,dtype='i')
+      gid.create_dataset('nghostz',data=nghz,dtype='i')
+      did = gid.create_dataset('data',data=self.field[key][ii_rng,jj_rng,kk_rng].flatten('F'))
+      did.attrs['F_CONTIGUOUS'] = 1
+      did.attrs['DIM'] = (self.__localChunkSize[key][0]+2*nghx,self.__localChunkSize[key][1]+2*nghy,self.__localChunkSize[key][2]+2*nghz)
+      fid.close()
+      # Create an XDMF file for the field
+      if xdmf and self.__rank==0:
+        # Construct XDMF filename
+        file_xdmf = filename+'_'+key+'.xdmf'
+        # Open file and write header
+        fid = open(file_xdmf,'w') 
+        fid.write('<?xml version="1.0"?>\n')
+        fid.write('<Xdmf Version="2.0">\n')
+        fid.write('   <Domain>\n')
+        fid.write('      <Grid Name="{}" GridType="Collection" CollectionType="Spatial">\n'.format(key))
+        #fid.write('         <Time Value="%.2f" />\n',params.general.simtime)
+        # Get ghost cell setup
+        if keepAllGhost:
+          nghx,nghy,nghz = self.__nghx,self.__nghy,self.__nghz
+        else:
+          nghx,nghy,nghz = 1,1,1
+        # Loop through chunks
+        for iprank in range(0,self.__nproc):
+          # Get position
+          iproc,jproc,kproc = self.__positionFromRank(iprank,self.__nxp,self.__nyp,self.__nzp)
+          # Get basename of current subfile (output file will be in same directory as HDF5 files)
+          subfile = os.path.basename(filename+'.{:05d}'.format(iprank))
+          # Compute bounds for the local grid
+          ib = self.__procGrid[key][0][iproc]
+          ie = self.__procGrid[key][1][iproc]
+          jb = self.__procGrid[key][2][jproc]
+          je = self.__procGrid[key][3][jproc]
+          kb = self.__procGrid[key][4][kproc]
+          ke = self.__procGrid[key][5][kproc]
+          dx = self.__dx[key]
+          x0 = self.grid[key][0][ib-1]-nghx*dx
+          y0 = self.grid[key][1][jb-1]-nghy*dx
+          z0 = self.grid[key][2][kb-1]-nghz*dx
+          nxl = ie-ib+1+2*nghx
+          nyl = je-jb+1+2*nghy
+          nzl = ke-kb+1+2*nghz
+          # Write XDMF for this processor
+          fid.write('         <Grid Name="rank {:5d} position {:3d},{:3d},{:3d}">\n'.format(iprank,iproc,jproc,kproc))
+          fid.write('            <Topology TopologyType="3DCORECTMesh" NumberOfElements="{:d} {:d} {:d}"/>\n'.format(nzl,nyl,nxl))
+          fid.write('            <Geometry Origin="" Type="ORIGIN_DXDYDZ">\n')
+          fid.write('              <DataItem DataType="Float" Dimensions="3" Format="XML" Precision="8"> {:12f} {:12f} {:12f}</DataItem>\n'.format(z0,y0,x0))
+          fid.write('              <DataItem DataType="Float" Dimensions="3" Format="XML" Precision="8"> {:12f} {:12f} {:12f}</DataItem>\n'.format(dx,dx,dx))
+          fid.write('            </Geometry>\n')
+          fid.write('            <Attribute Name="{}" Dimensions="{:d} {:d} {:d}">\n'.format(key,nzl,nyl,nxl))
+          fid.write('            <DataItem Format="HDF5" DataType="Float" Dimensions="{:d} {:d} {:d}">\n'.format(nzl,nyl,nxl))
+          fid.write('            {}:/{}/data </DataItem>\n'.format(subfile,key))
+          fid.write('            </Attribute>\n')
+          fid.write('         </Grid>\n')
+        # Write footer and close file
+        fid.write('      </Grid>\n')
+        fid.write('   </Domain>\n')
+        fid.write('</Xdmf> \n')
+        fid.close()
+
+    def saveFieldOriginal(self,filename,key,append=False,xdmf=False):
+      '''Writes chunks in original processor layout to h5 files'''
+      # Determine which chunks are to be written by this processor
+      (ip,jp,kp) = self.__positionFromRank(self.__rank,self.__nxp,self.__nyp,self.__nzp)
+      ipb1 = self.__ipbeg[ip]
+      jpb1 = self.__jpbeg[jp]
+      kpb1 = self.__kpbeg[kp]
+      ipe1 = ipb1+self.__nxppp-1
+      jpe1 = jpb1+self.__nyppp-1
+      kpe1 = kpb1+self.__nzppp-1
+      # Compute global nx,ny,nz
+      nxg = self.__procGrid1[key][1][-1]-self.__procGrid1[key][0][0]
+      nyg = self.__procGrid1[key][3][-1]-self.__procGrid1[key][2][0]
+      nzg = self.__procGrid1[key][5][-1]-self.__procGrid1[key][4][0]
+      # If append flag is set, we add a dataset to the existing file
+      if append:
+        ioflag = 'a'
+      else:
+        ioflag = 'w'
+      # Now loop through all subchunks and write one file each
+      for ip1 in range(ipb1,ipe1+1):
+          for jp1 in range(jpb1,jpe1+1):
+            for kp1 in range(kpb1,kpe1+1):
+              # Determine rank of the chunk to be written, and compose filename
+              iprank = self.__rankFromPosition(ip1,jp1,kp1,self.__nxp1,self.__nyp1,self.__nzp1)
+              file_chunk = filename+'.{:05d}'.format(iprank)
+              # Compute bounds of the subchunk
+              ib1 = self.__procGrid1[key][0][ip1]
+              ie1 = self.__procGrid1[key][1][ip1]
+              jb1 = self.__procGrid1[key][2][jp1]
+              je1 = self.__procGrid1[key][3][jp1]
+              kb1 = self.__procGrid1[key][4][kp1]
+              ke1 = self.__procGrid1[key][5][kp1]
+              # Get bounds of the parent chunk
+              ib = self.__localChunkBounds[key][0]
+              jb = self.__localChunkBounds[key][2]
+              kb = self.__localChunkBounds[key][4]
+              # Get the range of subdata
+              ii_rng = slice(ib1-ib+self.__nghx-1,ie1-ib+self.__nghx+2)
+              jj_rng = slice(jb1-jb+self.__nghy-1,je1-jb+self.__nghy+2)
+              kk_rng = slice(kb1-kb+self.__nghz-1,ke1-kb+self.__nghz+2)
+              # Open the file and write data, then close it
+              fid = h5py.File(file_chunk,ioflag)
+              if not append:
+                fid.create_dataset('rank',data=iprank)
+                fid.create_dataset('icol',data=ip1)
+                fid.create_dataset('jrow',data=jp1)
+                fid.create_dataset('kpln',data=kp1)
+              gid = fid.create_group('/'+key)
+              gid.create_dataset('ib',data=ib1)
+              gid.create_dataset('ie',data=ie1)
+              gid.create_dataset('jb',data=jb1)
+              gid.create_dataset('je',data=je1)
+              gid.create_dataset('kb',data=kb1)
+              gid.create_dataset('ke',data=ke1)
+              gid.create_dataset('nxl',data=ie1-ib1+1)
+              gid.create_dataset('nyl',data=je1-jb1+1)
+              gid.create_dataset('nzl',data=ke1-kb1+1)
+              gid.create_dataset('nx',data=nxg)
+              gid.create_dataset('ny',data=nyg)
+              gid.create_dataset('nz',data=nzg)
+              gid.create_dataset('ighost',data=(1,),dtype='i')
+              gid.create_dataset('nghostx',data=(1,),dtype='i')
+              gid.create_dataset('nghosty',data=(1,),dtype='i')
+              gid.create_dataset('nghostz',data=(1,),dtype='i')
+              did = gid.create_dataset('data',data=self.field[key][ii_rng,jj_rng,kk_rng].flatten('F'))
+              did.attrs['F_CONTIGUOUS'] = 1
+              did.attrs['DIM'] = (ie1-ib1+3,je1-jb1+3,ke1-kb1+3)
+              fid.close()
+      # Create an XDMF file for the field
+      if xdmf and self.__rank==0:
+        # Construct XDMF filename
+        file_xdmf = filename+'_'+key+'.xdmf'
+        # Open file and write header
+        fid = open(file_xdmf,'w') 
+        fid.write('<?xml version="1.0"?>\n')
+        fid.write('<Xdmf Version="2.0">\n')
+        fid.write('   <Domain>\n')
+        fid.write('      <Grid Name="{}" GridType="Collection" CollectionType="Spatial">\n'.format(key))
+        #fid.write('         <Time Value="%.2f" />\n',params.general.simtime)
+        # Get ghost cell setup
+        nghx,nghy,nghz = 1,1,1
+        # Loop through chunks
+        for iprank in range(0,self.__nproc1):
+          # Get position
+          iproc,jproc,kproc = self.__positionFromRank(iprank,self.__nxp1,self.__nyp1,self.__nzp1)
+          # Get basename of current subfile (output file will be in same directory as HDF5 files)
+          subfile = os.path.basename(filename+'.{:05d}'.format(iprank))
+          # Compute bounds for the local grid
+          ib = self.__procGrid1[key][0][iproc]
+          ie = self.__procGrid1[key][1][iproc]
+          jb = self.__procGrid1[key][2][jproc]
+          je = self.__procGrid1[key][3][jproc]
+          kb = self.__procGrid1[key][4][kproc]
+          ke = self.__procGrid1[key][5][kproc]
+          dx = self.__dx[key]
+          x0 = self.grid[key][0][ib-1]-nghx*dx
+          y0 = self.grid[key][1][jb-1]-nghy*dx
+          z0 = self.grid[key][2][kb-1]-nghz*dx
+          nxl = ie-ib+1+2*nghx
+          nyl = je-jb+1+2*nghy
+          nzl = ke-kb+1+2*nghz
+          # Write XDMF for this processor
+          fid.write('         <Grid Name="rank {:5d} position {:3d},{:3d},{:3d}">\n'.format(iprank,iproc,jproc,kproc))
+          fid.write('            <Topology TopologyType="3DCORECTMesh" NumberOfElements="{:d} {:d} {:d}"/>\n'.format(nzl,nyl,nxl))
+          fid.write('            <Geometry Origin="" Type="ORIGIN_DXDYDZ">\n')
+          fid.write('              <DataItem DataType="Float" Dimensions="3" Format="XML" Precision="8"> {:12f} {:12f} {:12f}</DataItem>\n'.format(z0,y0,x0))
+          fid.write('              <DataItem DataType="Float" Dimensions="3" Format="XML" Precision="8"> {:12f} {:12f} {:12f}</DataItem>\n'.format(dx,dx,dx))
+          fid.write('            </Geometry>\n')
+          fid.write('            <Attribute Name="{}" Dimensions="{:d} {:d} {:d}">\n'.format(key,nzl,nyl,nxl))
+          fid.write('            <DataItem Format="HDF5" DataType="Float" Dimensions="{:d} {:d} {:d}">\n'.format(nzl,nyl,nxl))
+          fid.write('            {}:/{}/data </DataItem>\n'.format(subfile,key))
+          fid.write('            </Attribute>\n')
+          fid.write('         </Grid>\n')
+        # Write footer and close file
+        fid.write('      </Grid>\n')
+        fid.write('   </Domain>\n')
+        fid.write('</Xdmf> \n')
+        fid.close()
+
+    def saveGrid(self,filename):
+      '''Writes the full grid to a h5 file.'''
+      if self.__rank==0:
+        # Open the file and write data
+        fid = h5py.File(filename,'w')
+        for key in self.grid:
+          gid = fid.create_group('/'+key)
+          gid.create_dataset('x',data=self.grid[key][0])
+          gid.create_dataset('y',data=self.grid[key][1])
+          gid.create_dataset('z',data=self.grid[key][2])
+        # Close the file
+        fid.close()
+
+    def saveProcGrid(self,filename,original=False):
+      '''Writes the full grid to a h5 file.'''
+      if self.__rank==0:
+        # Open the file and write data
+        fid = h5py.File(filename,'w')
+        for key in self.grid:
+          gid = fid.create_group('/'+key)
+          if original:
+            gid.create_dataset('ibeg',data=self.__procGrid1[key][0])
+            gid.create_dataset('iend',data=self.__procGrid1[key][1])
+            gid.create_dataset('jbeg',data=self.__procGrid1[key][2])
+            gid.create_dataset('jend',data=self.__procGrid1[key][3])
+            gid.create_dataset('kbeg',data=self.__procGrid1[key][4])
+            gid.create_dataset('kend',data=self.__procGrid1[key][5])
+          else:
+            gid.create_dataset('ibeg',data=self.__procGrid[key][0])
+            gid.create_dataset('iend',data=self.__procGrid[key][1])
+            gid.create_dataset('jbeg',data=self.__procGrid[key][2])
+            gid.create_dataset('jend',data=self.__procGrid[key][3])
+            gid.create_dataset('kbeg',data=self.__procGrid[key][4])
+            gid.create_dataset('kend',data=self.__procGrid[key][5])
+        # Close the file
+        fid.close()
+
+    def freeField(self,key):
+      '''Removes a field from memory.'''
+      if key not in self.field:
+        raise ValueError('Field {} cannot be freed, because it does not exist.'.format(key))
+      self.field.pop(key)
+      self.grid.pop(key)
+      self.localGrid.pop(key)
+      self.__dx.pop(key)
+      self.__procGrid.pop(key)
+      self.__procGrid1.pop(key)
+      self.__localChunkBounds.pop(key)
+      self.__localChunkSize.pop(key)
+      self.__boundarySymmetries.pop(key)
+
+    def differentiateField(self,key,direction):
+      # Verify that we got a valid direction and assign numeric value for array indexing
+      if direction=='x':
+        idir = 0
+      elif direction=='y':
+        idir = 1
+      elif direction=='z':
+        idir = 2
+      else:
+        raise ValueError('Invalid direction: '+direction)
+      # Find direction in which to "shift": one of -dx/2, +dx/2
+      # The first point should be located on the lower boundary or +dx/2 from it
+      xg  = self.grid[key][idir]         # the global grid in the direction where the shift occurs
+      xb  = self.domainBounds[2*idir]    # the lower domain boundary of that grid
+      dist = xb-xg[0]
+      if np.abs(dist)>0.75*self.__dx[key]:
+        raise ValueError('Cannot determine shift direction, because first grid points is too far from domain boundary.')
+      if np.abs(dist)>0.25*self.__dx[key]:
+        signShift = int(np.sign(dist))
+      else:
+        signShift = +1
+      # Create a new field key ("u" differentiated w.r.t "y" will be "uy")
+      keyout = key+direction
+      shift = [0,0,0]
+      shift[idir] = signShift
+      symm = self.__deriveSymmetry(self.__boundarySymmetries[key],'d'+direction)
+      self.__allocateField(keyout,key,shift=shift,symmetry=symm)
+      # Differentiate using second-order finite difference scheme
+      idx_dst = [
+        slice(self.__nghx,self.__nghx+self.__localChunkSize[key][0]),
+        slice(self.__nghy,self.__nghy+self.__localChunkSize[key][1]),
+        slice(self.__nghz,self.__nghz+self.__localChunkSize[key][2])
+      ]
+      idx_hi = idx_dst.copy()
+      idx_lo = idx_dst.copy()
+      if signShift>0:
+        idx_hi[idir] = slice(idx_hi[idir].start+1,idx_hi[idir].stop+1,idx_hi[idir].step)
+      else:
+        idx_lo[idir] = slice(idx_lo[idir].start-1,idx_lo[idir].stop-1,idx_lo[idir].step)
+      f1odx = 1./self.__dx[key]
+      self.field[keyout][idx_dst[0],idx_dst[1],idx_dst[2]] = f1odx*(self.field[key][idx_hi[0],idx_hi[1],idx_hi[2]] - self.field[key][idx_lo[0],idx_lo[1],idx_lo[2]])
+      # Exchange ghost cells and set boundary conditions
+      self.exchangeGhostCells(keyout)
+      self.imposeBoundaryConditions(keyout)
+
+    def Qcriterion(self,keepDerivatives=False):
+      '''Computes Q criterion on the pressure grid.'''
+      # Q = -0.5 (dudx^2 + dvdy^2 + dwdz^2 + 2*dudy*dvdx + 2*dudz*dwdx + 2*dvdz*dwdy)
+      # Allocate a new output field on pressure grid
+      self.__allocateField('Q','p',shift=(0,0,0))
+      # Compute derivatives on after another and add to field
+      # dudx*dudx
+      if 'ux' not in self.field:
+        self.differentiateField('u','x')
+      symm = self.__deriveSymmetry(self.__boundarySymmetries['ux'],'mult',symm2=self.__boundarySymmetries['ux'])
+      self.__allocateField('tmp','ux',shift=(0,0,0),symmetry=symm)
+      self.field['tmp'] += 0.5*np.square(self.field['ux']) 
+      self.shiftField('tmp',self.__getShiftDirection('tmp','Q'))
+      iin,jin,kin,iout,jout,kout = self.__sliceCollocated('tmp','Q')
+      self.field['Q'][iout,jout,kout] -= self.field['tmp'][iin,jin,kin]
+      self.freeField('tmp')
+      if not keepDerivatives:
+        self.freeField('ux')
+      # dvdy*dvdy
+      if 'vy' not in self.field:
+        self.differentiateField('v','y')
+      symm = self.__deriveSymmetry(self.__boundarySymmetries['vy'],'mult',symm2=self.__boundarySymmetries['vy'])
+      self.__allocateField('tmp','vy',shift=(0,0,0),symmetry=symm)
+      self.field['tmp'] += 0.5*np.square(self.field['vy']) 
+      self.shiftField('tmp',self.__getShiftDirection('tmp','Q'))
+      iin,jin,kin,iout,jout,kout = self.__sliceCollocated('tmp','Q')
+      self.field['Q'][iout,jout,kout] -= self.field['tmp'][iin,jin,kin]
+      self.freeField('tmp')
+      if not keepDerivatives:
+        self.freeField('vy')
+      # dwdz*dwdz
+      if 'wz' not in self.field:
+        self.differentiateField('w','z')
+      symm = self.__deriveSymmetry(self.__boundarySymmetries['wz'],'mult',symm2=self.__boundarySymmetries['wz'])
+      self.__allocateField('tmp','wz',shift=(0,0,0),symmetry=symm)
+      self.field['tmp'] += 0.5*np.square(self.field['wz']) 
+      self.shiftField('tmp',self.__getShiftDirection('tmp','Q'))
+      iin,jin,kin,iout,jout,kout = self.__sliceCollocated('tmp','Q')
+      self.field['Q'][iout,jout,kout] -= self.field['tmp'][iin,jin,kin]
+      self.freeField('tmp')
+      if not keepDerivatives:
+        self.freeField('wz')
+      # dudy*dvdx
+      if 'uy' not in self.field:
+        self.differentiateField('u','y')
+      if 'vx' not in self.field:
+        self.differentiateField('v','x')
+      symm = self.__deriveSymmetry(self.__boundarySymmetries['uy'],'mult',symm2=self.__boundarySymmetries['vx'])
+      self.__allocateField('tmp','Q',shift=(-1,-1,0),symmetry=symm)
+      ii1,jj1,kk1,iout,jout,kout = self.__sliceCollocated('uy','tmp')
+      ii2,jj2,kk2,iout,jout,kout = self.__sliceCollocated('vx','tmp')
+      self.field['tmp'][iout,jout,kout] += self.field['uy'][ii1,jj1,kk1]*self.field['vx'][ii2,jj2,kk2]
+      self.exchangeGhostCells('tmp')
+      self.imposeBoundaryConditions('tmp')
+      self.shiftField('tmp',self.__getShiftDirection('tmp','Q'))
+      iin,jin,kin,iout,jout,kout = self.__sliceCollocated('tmp','Q')
+      self.field['Q'][iout,jout,kout] -= self.field['tmp'][iin,jin,kin]
+      self.freeField('tmp')
+      if not keepDerivatives:
+        self.freeField('uy')
+        self.freeField('vx')
+      # dudz*dwdx
+      if 'uz' not in self.field:
+        self.differentiateField('u','z')
+      if 'wx' not in self.field:
+        self.differentiateField('w','x')
+      symm = self.__deriveSymmetry(self.__boundarySymmetries['uz'],'mult',symm2=self.__boundarySymmetries['wx'])
+      self.__allocateField('tmp','Q',shift=(-1,0,-1),symmetry=symm)
+      ii1,jj1,kk1,iout,jout,kout = self.__sliceCollocated('uz','tmp')
+      ii2,jj2,kk2,iout,jout,kout = self.__sliceCollocated('wx','tmp')
+      self.field['tmp'][iout,jout,kout] += self.field['uz'][ii1,jj1,kk1]*self.field['wx'][ii2,jj2,kk2]
+      self.exchangeGhostCells('tmp')
+      self.imposeBoundaryConditions('tmp')
+      self.shiftField('tmp',self.__getShiftDirection('tmp','Q'))
+      iin,jin,kin,iout,jout,kout = self.__sliceCollocated('tmp','Q')
+      self.field['Q'][iout,jout,kout] -= self.field['tmp'][iin,jin,kin]
+      self.freeField('tmp')
+      if not keepDerivatives:
+        self.freeField('uz')
+        self.freeField('wx')
+      # dvdz*dwdy
+      if 'vz' not in self.field:
+        self.differentiateField('v','z')
+      if 'wy' not in self.field:
+        self.differentiateField('w','y')
+      symm = self.__deriveSymmetry(self.__boundarySymmetries['vz'],'mult',symm2=self.__boundarySymmetries['wy'])
+      self.__allocateField('tmp','Q',shift=(0,-1,-1),symmetry=symm)
+      ii1,jj1,kk1,iout,jout,kout = self.__sliceCollocated('vz','tmp')
+      ii2,jj2,kk2,iout,jout,kout = self.__sliceCollocated('wy','tmp')
+      self.field['tmp'][iout,jout,kout] += self.field['vz'][ii1,jj1,kk1]*self.field['wy'][ii2,jj2,kk2]
+      self.exchangeGhostCells('tmp')
+      self.imposeBoundaryConditions('tmp')
+      self.shiftField('tmp',self.__getShiftDirection('tmp','Q'))
+      iin,jin,kin,iout,jout,kout = self.__sliceCollocated('tmp','Q')
+      self.field['Q'][iout,jout,kout] -= self.field['tmp'][iin,jin,kin]
+      self.freeField('tmp')
+      if not keepDerivatives:
+        self.freeField('vz')
+        self.freeField('wy')
+      # Exchange ghost cells and set boundary conditions
+      self.exchangeGhostCells('Q')
+      self.imposeBoundaryConditions('Q')
+
+    def lambda2(self,keepDerivatives=False):
+      '''Computes lambda2 vortex criterion on the pressure grid'''
+      raise NotImplementedError('TBD')
+
+    def vorticity(self,component,collocate=False,keepDerivatives=False):
+      '''Computes a component of the vorticity vector.'''
+      # Give a name to the output array
+      keyout = 'om'+component
+      # Construct the names of the derivatives that we need.
+      if component=='x':
+        key1,key2,idir = 'wy','vz',0
+      elif component=='y':
+        key1,key2,idir = 'uz','wx',1
+      elif component=='z':
+        key1,key2,idir = 'vx','uy',2
+      else:
+        raise ValueError('Invalid component: '+component)
+      # Differentiate if necessary. The two derivatives should end up on the same grid.
+      # However, the grid is different for the three components. If "collocated" is specified,
+      # the components will be interpolated onto the pressure grid.
+      if key1 not in self.field:
+        self.differentiateField(key1[0],key1[1])
+      if key2 not in self.field:
+        self.differentiateField(key2[0],key2[1])
+      # Allocate an output array which is shifted compared to the pressure grid. Impose correct symmetries.
+      if not np.array_equal(self.__boundarySymmetries[key1],self.__boundarySymmetries[key2]):
+        raise ValueError('Symmetry error: THIS SHOULD NOT HAPPEN.')
+      symm = self.__boundarySymmetries[key1]
+      shift = [-1,-1,-1]
+      shift[idir] = 0
+      self.__allocateField(keyout,'p',shift=shift,symmetry=symm)
+      # Compute vorticity component
+      ii1,jj1,kk1,iout,jout,kout = self.__sliceCollocated(key1,keyout)
+      ii2,jj2,kk2,iout,jout,kout = self.__sliceCollocated(key2,keyout)
+      self.field[keyout][iout,jout,kout] = self.field[key1][ii1,jj1,kk1] - self.field[key2][ii2,jj2,kk2]
+      if not keepDerivatives:
+        self.freeField(key1)
+        self.freeField(key2)
+      # Exchange ghost cells and set boundary conditions
+      self.exchangeGhostCells(keyout)
+      self.imposeBoundaryConditions(keyout)
+      # If the fields are supposed to be collocated, shift the field to pressure grid
+      if collocate:
+        self.shiftField(keyout,self.__getShiftDirection(keyout,'p'))
+
+    def collocateVelocity(self):
+      '''Moves velocity components onto the pressure grid.'''
+      print('WARNING: collocateVelocity is UNTESTED')
+      for key in ('u','v','w'):
+        # Shift field (this is done in-place)
+        self.shiftField(key,self.__getShiftDirection(key,'p'))
+        # Match local domain
+        iin,jin,kin,iout,jout,kout = self.__sliceCollocated(key,'Q')
+        tmp = np.empty_like(self.field['p'])
+        tmp[iout,jout,kout] = self.field[key][iin,jin,kin]
+        self.field[key] = tmp
+        # Get new processor grid
+        self.__procGrid[key]         = [ii.copy() for ii in self.__procGrid['p']]
+        self.__procGrid1[key]        = [ii.copy() for ii in self.__procGrid1['p']]
+        self.__localChunkBounds[key] = [ii.copy() for ii in self.__localChunkBounds['p']]
+        self.__localChunkSize[key]   = [ii.copy() for ii in self.__localChunkSize['p']]
+        # Get new grid
+        self.__dx[key]      = self.__dx['p']
+        self.grid[key]      = [ii.copy() for ii in self.grid['p']]
+        self.localGrid[key] = [ii.copy() for ii in self.localGrid['p']]
+        # Exchange ghost cells and set boundary conditions
+        self.exchangeGhostCells(key)
+        self.imposeBoundaryConditions(key)
+
+    def downsample(self,field,factor=2):
+      '''Coarsens a field by integer factor.'''
+      raise NotImplementedError('TBD')
+
+    def gaussianFilter(self,key,sigma=(1,1,1),truncate=4.0):
+      '''Applies a gaussian filter to a field as in-place operation. Sigma is the std of the filter in terms of grid width.'''
+      # Sanity check
+      if len(sigma)!=3:
+        raise ValueError('Expected one value of sigma for each direction, but only got {}.'.format(len(sigma)))
+      # Compute stencil width and check if we got enough ghost cells
+      stencil = np.empty((3,))
+      for ii in range(0,3):
+        stencil[ii] = 2*int(truncate*sigma[ii]+0.5)+1
+      if self.__nghx<stencil[0] or self.__nghx<stencil[1] or self.__nghx<stencil[2]:
+        raise ValueError('Too few ghost cells for stencil: ',stencil,(self.__nghx,self.__nghy,self.__nghz))
+      print(stencil)
+      # Run scipy gaussian filter
+      self.field[key] = ndimage.gaussian_filter(self.field[key],sigma,truncate=truncate,mode='constant',cval=0.0)
+      # Exchange ghost cells and set boundary conditions
+      self.exchangeGhostCells(key)
+      self.imposeBoundaryConditions(key)
+
+    def maskParticles(self,key,reconstruct=False,cval=np.nan,padding=0.0):
+      '''Fills grid points which lie inside of solid phase with values.'''
+      # Get number of particles in this processeor
+      npart = self.particles.shape[1]
+      # Loop through local particles
+      for ipart in range(0,npart):
+        # Slice a box from the field around the particle
+        xp,yp,zp,rp = (self.particles[self.col['x'],ipart],
+                       self.particles[self.col['y'],ipart],
+                       self.particles[self.col['z'],ipart],
+                       self.particles[self.col['r'],ipart])
+        rp += padding*self.__dx[key]
+        # Get coefficients for reconstruction
+        if reconstruct:
+          coeff_lin = self.particles[self.col[key],ipart]
+          if key=='u':
+            coeff_rotx =  0.0
+            coeff_roty = -self.particles[self.col['oz'],ipart]
+            coeff_rotz =  self.particles[self.col['oy'],ipart]
+          elif key=='v':
+            coeff_rotx =  self.particles[self.col['oz'],ipart]
+            coeff_roty =  0.0
+            coeff_rotz = -self.particles[self.col['ox'],ipart]
+          elif key=='w':
+            coeff_rotx = -self.particles[self.col['oy'],ipart]
+            coeff_roty =  self.particles[self.col['ox'],ipart]
+            coeff_rotz =  0.0
+          else:
+            coeff_rotx =  0.0
+            coeff_roty =  0.0
+            coeff_rotz =  0.0
+        # Get bounding box of particle
+        idx_x = np.nonzero((self.localGrid[key][0]>=xp-rp) & (self.localGrid[key][0]<=xp+rp))[0]
+        idx_y = np.nonzero((self.localGrid[key][1]>=yp-rp) & (self.localGrid[key][1]<=yp+rp))[0]
+        idx_z = np.nonzero((self.localGrid[key][2]>=zp-rp) & (self.localGrid[key][2]<=zp+rp))[0]
+        # Slice the bounding box
+        sli_x = slice(idx_x[0],idx_x[-1]+1)
+        sli_y = slice(idx_y[0],idx_y[-1]+1)
+        sli_z = slice(idx_z[0],idx_z[-1]+1)
+        # Construct a grid of the subarray
+        xg,yg,zg = np.meshgrid(self.localGrid[key][0][sli_x],self.localGrid[key][1][sli_y],self.localGrid[key][2][sli_z],indexing='ij')
+        # Iterate through subarray and mask field
+        it = np.nditer((self.field[key][sli_x,sli_y,sli_z],xg,yg,zg),
+                       op_flags=[['writeonly'],['readonly'],['readonly'],['readonly']]
+                       )
+        while not it.finished:
+          xx,yy,zz = it[1:4]
+          Dx,Dy,Dz = xx-xp, yy-yp, zz-zp
+          isInside = Dx**2+Dy**2+Dz**2 <= rp**2
+          if isInside:
+            if reconstruct:
+              it[0] = coeff_lin + coeff_rotx*Dx + coeff_roty*Dy + coeff_rotz*Dz
+            else:
+              it[0] = cval
+          it.iternext()
+
+    def shiftField(self,key,shift,subsequent=True):
+      '''Shifts a field by dx/2 in specified direction as in-place operation. The new values are determined by linear interpolation.'''
+      # Sanity check
+      if len(shift)!=3:
+        raise ValueError('shift must be provided as three-component list.')
+      if np.max(shift)>1 or np.min(shift)<-1:
+        raise ValueError('Only single shift is allowed.')
+      # If all entries of shift are zero, no shifting needs to be done.
+      if np.count_nonzero(shift)==0:
+        return
+      # Update grid
+      for idir in range(0,3):
+        if shift[idir]!=0:
+          self.grid[key][idir]      += shift[idir]*0.5*self.__dx[key]
+          self.localGrid[key][idir] += shift[idir]*0.5*self.__dx[key]
+      # Perform the interpolation
+      if subsequent: 
+        # Three (at maximum) subsequent 1D interpolations.
+        # NOTE: This algorithm requires ghost cell communication for each substep, which can be avoided if we shifted in all directions simultaneously!
+        # Construct index range of target
+        idx_dst = [
+          slice(self.__nghx,self.__nghx+self.__localChunkSize[key][0]),
+          slice(self.__nghy,self.__nghy+self.__localChunkSize[key][1]),
+          slice(self.__nghz,self.__nghz+self.__localChunkSize[key][2])
+        ]
+        # Perform subsequent 1D interpolations.
+        for idir in range(0,3):
+          # If there is no shift in this direction, continue
+          if shift[idir]==0:
+            continue
+          # Perform linear interpolation
+          idx_hi = idx_dst.copy()
+          idx_lo = idx_dst.copy()
+          if shift[idir]>0:
+            idx_hi[idir] = slice(idx_hi[idir].start+1,idx_hi[idir].stop+1,idx_hi[idir].step)
+          else:
+            idx_lo[idir] = slice(idx_lo[idir].start-1,idx_lo[idir].stop-1,idx_lo[idir].step)
+          self.field[key][idx_dst[0],idx_dst[1],idx_dst[2]] = 0.5*(self.field[key][idx_hi[0],idx_hi[1],idx_hi[2]] + self.field[key][idx_lo[0],idx_lo[1],idx_lo[2]])
+          # Exchange ghost cells and set boundary conditions
+          self.exchangeGhostCells(key)
+          self.imposeBoundaryConditions(key)
+      else:
+        # One trilinear interpolation.
+        # NOTE: We only access the positions in the array which we really need to. Only one communication is required.
+        # Allocate output array: will be assigned to self.field later
+        field = np.zeros_like(self.field[key])
+        # Determine base shift of indexing
+        shiftx = -1 if shift[0]<0 else 0
+        shifty = -1 if shift[1]<0 else 0
+        shiftz = -1 if shift[2]<0 else 0
+        # Determine shift coefficients
+        cx = np.abs(shift[0])
+        cy = np.abs(shift[1])
+        cz = np.abs(shift[2])
+        eps = 1e-8
+        # Construct index range of target
+        ii_dst = slice(self.__nghx,self.__nghx+self.__localChunkSize[key][0])
+        jj_dst = slice(self.__nghy,self.__nghy+self.__localChunkSize[key][1])
+        kk_dst = slice(self.__nghz,self.__nghz+self.__localChunkSize[key][2])
+        # (0,0,0)
+        ii_src = slice(self.__nghx+shiftx,self.__nghx+self.__localChunkSize[key][0]+shiftx)
+        jj_src = slice(self.__nghy+shifty,self.__nghy+self.__localChunkSize[key][1]+shifty)
+        kk_src = slice(self.__nghz+shiftz,self.__nghz+self.__localChunkSize[key][2]+shiftz)
+        coeff = 1-0.5*(cx+cy+cz)+0.25*(cx*cy+cx*cz+cy*cz)-0.125*(cx*cy*cz)
+        if np.abs(coeff)>eps:
+          field[ii_dst,jj_dst,kk_dst] += coeff*self.field[key][ii_src,jj_src,kk_src]
+        # (1,0,0)
+        ii_src = slice(self.__nghx+shiftx+1,self.__nghx+self.__localChunkSize[key][0]+shiftx+1)
+        jj_src = slice(self.__nghy+shifty  ,self.__nghy+self.__localChunkSize[key][1]+shifty  )
+        kk_src = slice(self.__nghz+shiftz  ,self.__nghz+self.__localChunkSize[key][2]+shiftz  )
+        coeff = 0.5*cx-0.25*(cx*cy+cx*cz)+0.125*(cx*cy*cz)
+        if np.abs(coeff)>eps:
+          field[ii_dst,jj_dst,kk_dst] += coeff*self.field[key][ii_src,jj_src,kk_src]
+        # (0,1,0)
+        ii_src = slice(self.__nghx+shiftx  ,self.__nghx+self.__localChunkSize[key][0]+shiftx  )
+        jj_src = slice(self.__nghy+shifty+1,self.__nghy+self.__localChunkSize[key][1]+shifty+1)
+        kk_src = slice(self.__nghz+shiftz  ,self.__nghz+self.__localChunkSize[key][2]+shiftz  )
+        coeff = 0.5*cy-0.25*(cx*cy+cy*cz)+0.125*(cx*cy*cz)
+        if np.abs(coeff)>eps:
+          field[ii_dst,jj_dst,kk_dst] += coeff*self.field[key][ii_src,jj_src,kk_src]
+        # (0,0,1)
+        ii_src = slice(self.__nghx+shiftx  ,self.__nghx+self.__localChunkSize[key][0]+shiftx  )
+        jj_src = slice(self.__nghy+shifty  ,self.__nghy+self.__localChunkSize[key][1]+shifty  )
+        kk_src = slice(self.__nghz+shiftz+1,self.__nghz+self.__localChunkSize[key][2]+shiftz+1)
+        coeff = 0.5*cz-0.25*(cx*cz+cy*cz)+0.125*(cx*cy*cz)
+        if np.abs(coeff)>eps:
+          field[ii_dst,jj_dst,kk_dst] += coeff*self.field[key][ii_src,jj_src,kk_src]
+        # (1,1,0)
+        ii_src = slice(self.__nghx+shiftx+1,self.__nghx+self.__localChunkSize[key][0]+shiftx+1)
+        jj_src = slice(self.__nghy+shifty+1,self.__nghy+self.__localChunkSize[key][1]+shifty+1)
+        kk_src = slice(self.__nghz+shiftz  ,self.__nghz+self.__localChunkSize[key][2]+shiftz  )
+        coeff = 0.25*(cx*cy)-0.125*(cx*cy*cz)
+        if np.abs(coeff)>eps:
+          field[ii_dst,jj_dst,kk_dst] += coeff*self.field[key][ii_src,jj_src,kk_src]
+        # (1,0,1)
+        ii_src = slice(self.__nghx+shiftx+1,self.__nghx+self.__localChunkSize[key][0]+shiftx+1)
+        jj_src = slice(self.__nghy+shifty  ,self.__nghy+self.__localChunkSize[key][1]+shifty  )
+        kk_src = slice(self.__nghz+shiftz+1,self.__nghz+self.__localChunkSize[key][2]+shiftz+1)
+        coeff = 0.25*(cx*cz)-0.125*(cx*cy*cz)
+        if np.abs(coeff)>eps:
+          field[ii_dst,jj_dst,kk_dst] += coeff*self.field[key][ii_src,jj_src,kk_src]
+        # (0,1,1)
+        ii_src = slice(self.__nghx+shiftx  ,self.__nghx+self.__localChunkSize[key][0]+shiftx  )
+        jj_src = slice(self.__nghy+shifty+1,self.__nghy+self.__localChunkSize[key][1]+shifty+1)
+        kk_src = slice(self.__nghz+shiftz+1,self.__nghz+self.__localChunkSize[key][2]+shiftz+1)
+        coeff = 0.25*(cy*cz)-0.125*(cx*cy*cz)
+        if np.abs(coeff)>eps:
+          field[ii_dst,jj_dst,kk_dst] += coeff*self.field[key][ii_src,jj_src,kk_src]
+        # (1,1,1)
+        ii_src = slice(self.__nghx+shiftx+1,self.__nghx+self.__localChunkSize[key][0]+shiftx+1)
+        jj_src = slice(self.__nghy+shifty+1,self.__nghy+self.__localChunkSize[key][1]+shifty+1)
+        kk_src = slice(self.__nghz+shiftz+1,self.__nghz+self.__localChunkSize[key][2]+shiftz+1)
+        coeff = 0.125*(cx*cy*cz)
+        if np.abs(coeff)>eps:
+          field[ii_dst,jj_dst,kk_dst] += coeff*self.field[key][ii_src,jj_src,kk_src]
+        # Assign to field
+        self.field[key] = field
+        # Exchange ghost cells and set boundary conditions
+        self.exchangeGhostCells(key)
+        self.imposeBoundaryConditions(key)
+
+    def exchangeGhostCells(self,key):
+      '''Communicates all ghost cells of specified field'''
+      # Clear previous MPI buffers. They should be empty anyway, but just to be sure.
+      self.__mpireq.clear()
+      self.__mpibufdata.clear()
+      self.__mpibufidx.clear()
+      # Trigger non-blocking communication:
+      # requests will be stored in internal class variable "__mpireq"
+      # Communicate faces (6 faces)
+      self.__communicateGhostCells(key,(-1,0,0)) # left
+      self.__communicateGhostCells(key,(+1,0,0)) # right
+      self.__communicateGhostCells(key,(0,-1,0)) # down
+      self.__communicateGhostCells(key,(0,+1,0)) # up
+      self.__communicateGhostCells(key,(0,0,-1)) # front
+      self.__communicateGhostCells(key,(0,0,+1)) # back
+      # Communicate edges (12 edges)
+      self.__communicateGhostCells(key,(-1,-1,0)) # left,down
+      self.__communicateGhostCells(key,(-1,0,-1)) # left,front
+      self.__communicateGhostCells(key,(-1,+1,0)) # left,up
+      self.__communicateGhostCells(key,(-1,0,+1)) # left,back
+      self.__communicateGhostCells(key,(+1,-1,0)) # right,down
+      self.__communicateGhostCells(key,(+1,0,-1)) # right,front
+      self.__communicateGhostCells(key,(+1,+1,0)) # right,up
+      self.__communicateGhostCells(key,(+1,0,+1)) # right,back
+      self.__communicateGhostCells(key,(0,-1,-1)) # down,front
+      self.__communicateGhostCells(key,(0,-1,+1)) # down,back
+      self.__communicateGhostCells(key,(0,+1,-1)) # up,front
+      self.__communicateGhostCells(key,(0,+1,+1)) # up,back
+      # Communicate corners (8 corners)
+      self.__communicateGhostCells(key,(-1,-1,-1)) # left,down,front
+      self.__communicateGhostCells(key,(-1,-1,+1)) # left,down,back
+      self.__communicateGhostCells(key,(-1,+1,-1)) # left,up,front
+      self.__communicateGhostCells(key,(-1,+1,+1)) # left,up,back
+      self.__communicateGhostCells(key,(+1,-1,-1)) # right,down,front
+      self.__communicateGhostCells(key,(+1,-1,+1)) # right,down,back
+      self.__communicateGhostCells(key,(+1,+1,-1)) # right,up,front
+      self.__communicateGhostCells(key,(+1,+1,+1)) # right,up,back
+      # Wait for communication to finish
+      MPI.Request.waitall(self.__mpireq)
+      # Communication is done! Clear the list of requests
+      self.__mpireq.clear()
+      # Assign buffer to array
+      for ibuf in range(0,len(self.__mpibufdata)):
+        ii,jj,kk = self.__mpibufidx.pop()
+        self.field[key][ii,jj,kk] = self.__mpibufdata.pop()
+      # Verify that buffer is empty
+      if len(self.__mpibufdata)!=0:
+        raise RuntimeError('MPI data buffer not empty after ghost cell exchange!')
+      if len(self.__mpibufidx)!=0:
+        raise RuntimeError('MPI index buffer not empty after ghost cell exchange!')
+
+    def imposeBoundaryConditions(self,key):
+      '''Imposes symmetry boundary conditions on each non-periodic wall'''
+      self.__symmetrizeWall(key,(-1,0,0))
+      self.__symmetrizeWall(key,(+1,0,0))
+      self.__symmetrizeWall(key,(0,-1,0))
+      self.__symmetrizeWall(key,(0,+1,0))
+      self.__symmetrizeWall(key,(0,0,-1))
+      self.__symmetrizeWall(key,(0,0,+1))
+
+    def __setBoundaryConditions(self,flowType):
+      '''Sets the symmetries for ghost cells behind the wall'''
+      # No-slip boundary    Free-slip boundary:
+      #   u  -> -u            u  ->  u
+      #   v  -> -v            v  -> -v
+      #   w  -> -w            w  ->  w
+      #   p  ->  p            p  ->  p
+      self.__boundarySymmetries = {'u':  np.zeros((3,3,3),dtype='i'), 
+                                   'v':  np.zeros((3,3,3),dtype='i'), 
+                                   'w':  np.zeros((3,3,3),dtype='i'),
+                                   'p':  np.zeros((3,3,3),dtype='i'),
+                                   's1': np.zeros((3,3,3),dtype='i'),
+                                   }
+      if flowType in ('channel','cf'):
+          self.__xperiodic = True
+          self.__yperiodic = False
+          self.__zperiodic = True
+          self.__boundarySymmetries['u'][1,0,1]  = -1 # u -> -u at no-slip wall, since u_NS = 0
+          self.__boundarySymmetries['u'][1,2,1]  = -1 # u -> -u at no-slip wall, since u_NS = 0
+          self.__boundarySymmetries['v'][1,0,1]  =  1 # v ->  v at no-slip wall, since v,y_NS = 0 (no slip condition will be numerically wrong!)
+          self.__boundarySymmetries['v'][1,2,1]  =  1 # v ->  v at no-slip wall, since v,y_NS = 0 (no slip condition will be numerically wrong!)
+          self.__boundarySymmetries['w'][1,0,1]  = -1 # w -> -w at no-slip wall, since w_NS = 0
+          self.__boundarySymmetries['w'][1,2,1]  = -1 # w -> -w at no-slip wall, since w_NS = 0
+          self.__boundarySymmetries['p'][1,0,1]  =  1
+          self.__boundarySymmetries['p'][1,2,1]  =  1
+          self.__boundarySymmetries['s1'][1,0,1] =  1 # this depends on the actual BC, here zero gradient is imposed
+          self.__boundarySymmetries['s1'][1,2,1] =  1 # this depends on the actual BC, here zero gradient is imposed
+      elif flowType in ('open-channel','ocf'):
+          self.__xperiodic = True
+          self.__yperiodic = False
+          self.__zperiodic = True
+          self.__boundarySymmetries['u'][1,0,1]  = -1 # u -> -u at no-slip wall, since u_NS = 0
+          self.__boundarySymmetries['u'][1,2,1]  =  1 # u ->  u at free-slip wall, since u,y_FS = 0
+          self.__boundarySymmetries['v'][1,0,1]  =  1 # v ->  v at no-slip wall, since v,y_NS = 0 (no slip condition will be numerically wrong!)
+          self.__boundarySymmetries['v'][1,2,1]  = -1 # v -> -v at free-slip wall, since v_FS = 0
+          self.__boundarySymmetries['w'][1,0,1]  = -1 # w -> -w at no-slip wall, since w_NS = 0
+          self.__boundarySymmetries['w'][1,2,1]  =  1 # w -> -w at no-slip wall, since w_NS = 0
+          self.__boundarySymmetries['p'][1,0,1]  =  1
+          self.__boundarySymmetries['p'][1,2,1]  =  1
+          self.__boundarySymmetries['s1'][1,0,1] =  1 # this depends on the actual BC, here zero gradient is imposed
+          self.__boundarySymmetries['s1'][1,2,1] =  1 # this depends on the actual BC, here zero gradient is imposed
+      elif flowType in ('homiso','hit'):
+          self.__xperiodic = True
+          self.__yperiodic = True
+          self.__zperiodic = True
+      else:
+          raise ValueError('Invalid flow type: '+flowType)
+
+    def __symmetrizeWall(self,key,positionBd):
+      # Sanity check
+      if np.max(positionBd)>1 or np.min(positionBd)<-1:
+        raise ValueError('symmetrizeWall: invalid neighbor position {}'.format(positionBd))
+      if np.count_nonzero(positionBd)!=1:
+        raise ValueError('symmetrizeWall: only face direction is accepted (no edges or corners)')
+      inghbr =  positionBd[0]+1
+      jnghbr =  positionBd[1]+1
+      knghbr =  positionBd[2]+1
+      eps = 1e-3*self.__dx[key]
+      if self.__nghbr[inghbr,jnghbr,knghbr] is None:
+        # x-direction
+        if positionBd[0]==-1:                  # lower boundary
+          xbd = self.domainBounds[0]
+          xgl = self.localGrid[key][0]
+          idx  = np.where(xgl>=xbd-eps)[0][0]  # index of first point within domain
+          dist = np.abs(xgl[idx]-xbd)          # distance first point to wall: should be either 0 or dx/2
+          if dist>=0.25*self.__dx[key]:        # no point on boundary
+            ii_dst = slice(0,idx,1)
+            ii_src = slice(2*idx-1,idx-1,-1)
+          else:                                # point on boundary
+            ii_dst = slice(0,idx,1)
+            ii_src = slice(2*idx,idx,-1)
+          self.field[key][ii_dst,:,:] = self.__boundarySymmetries[key][inghbr,jnghbr,knghbr]*self.field[key][ii_src,:,:]
+        elif positionBd[0]==1:                 # upper boundary
+          xbd = self.domainBounds[1]
+          xgl = self.localGrid[key][0]
+          idx  = np.where(xgl<=xbd+eps)[0][-1] # index of last point within domain
+          dist = np.abs(xgl[idx]-xbd)          # distance first point to wall: should be either 0 or dx/2
+          if dist>=0.25*self.__dx[key]:        # no point on boundary
+            ii_dst = slice(idx+1,len(xgl),1)
+            ii_src = slice(idx,2*idx+1-len(xgl),-1)
+          else:                                # point on boundary
+            ii_dst = slice(idx+1,len(xgl),1)
+            ii_src = slice(idx-1,2*idx-len(xgl),-1)
+          self.field[key][ii_dst,:,:] = self.__boundarySymmetries[key][inghbr,jnghbr,knghbr]*self.field[key][ii_src,:,:]
+        # y-direction
+        if positionBd[1]==-1:                  # lower boundary
+          ybd = self.domainBounds[2]
+          ygl = self.localGrid[key][1]
+          idx  = np.where(ygl>=ybd-eps)[0][0]  # index of first point within domain
+          dist = np.abs(ygl[idx]-ybd)          # distance first point to wall: should be either 0 or dx/2
+          if dist>=0.25*self.__dx[key]:        # no point on boundary
+            jj_dst = slice(0,idx,1)
+            jj_src = slice(2*idx-1,idx-1,-1)
+          else:                                # point on boundary
+            jj_dst = slice(0,idx,1)
+            jj_src = slice(2*idx,idx,-1)
+          self.field[key][:,jj_dst,:] = self.__boundarySymmetries[key][inghbr,jnghbr,knghbr]*self.field[key][:,jj_src,:]
+        elif positionBd[1]==1:                 # upper boundary
+          ybd = self.domainBounds[3]
+          ygl = self.localGrid[key][1]
+          idx  = np.where(ygl<=ybd+eps)[0][-1] # index of last point within domain
+          dist = np.abs(ygl[idx]-ybd)          # distance first point to wall: should be either 0 or dx/2
+          if dist>=0.25*self.__dx[key]:        # no point on boundary
+            jj_dst = slice(idx+1,len(ygl),1)
+            jj_src = slice(idx,2*idx+1-len(ygl),-1)
+          else:                                # point on boundary
+            jj_dst = slice(idx+1,len(ygl),1)
+            jj_src = slice(idx-1,2*idx-len(ygl),-1)
+          self.field[key][:,jj_dst,:] = self.__boundarySymmetries[key][inghbr,jnghbr,knghbr]*self.field[key][:,jj_src,:]
+        # z-direction
+        if positionBd[2]==-1:                  # lower boundary
+          zbd = self.domainBounds[4]
+          zgl = self.localGrid[key][2]
+          idx  = np.where(zgl>=zbd-eps)[0][0]  # index of first point within domain
+          dist = np.abs(zgl[idx]-zbd)          # distance first point to wall: should be either 0 or dx/2
+          if dist>=0.25*self.__dx[key]:        # no point on boundary
+            kk_dst = slice(0,idx,1)
+            kk_src = slice(2*idx-1,idx-1,-1)
+          else:                                # point on boundary
+            kk_dst = slice(0,idx,1)
+            kk_src = slice(2*idx,idx,-1)
+          self.field[key][:,:,kk_dst] = self.__boundarySymmetries[key][inghbr,jnghbr,knghbr]*self.field[key][:,:,kk_src]
+        elif positionBd[2]==1:                 # upper boundary
+          zbd = self.domainBounds[5]
+          zgl = self.localGrid[key][2]
+          idx  = np.where(zgl<=zbd+eps)[0][-1] # index of last point within domain
+          dist = np.abs(zgl[idx]-zbd)          # distance first point to wall: should be either 0 or dx/2
+          if dist>=0.25*self.__dx[key]:        # no point on boundary
+            kk_dst = slice(idx+1,len(zgl),1)
+            kk_src = slice(idx,2*idx+1-len(zgl),-1)
+          else:                                # point on boundary
+            kk_dst = slice(idx+1,len(zgl),1)
+            kk_src = slice(idx-1,2*idx-len(zgl),-1)
+          self.field[key][:,:,kk_dst] = self.__boundarySymmetries[key][inghbr,jnghbr,knghbr]*self.field[key][:,:,kk_src]
+
+    def __initializeNeighbors(self):
+      '''Determine neighboring processors and save them in an array'''
+      nghbr = np.empty((3,3,3),dtype='object')
+      # wrap-around x
+      ipl = self.__icol-1
+      if ipl<0:
+        if self.__xperiodic:
+          ipl = self.__nxp-1
+        else:
+          ipl = None
+      ipr = self.__icol+1
+      if ipr>self.__nxp-1:
+        if self.__xperiodic:
+          ipr = 0
+        else:
+          ipr = None
+      inghbr = (ipl,self.__icol,ipr)
+      # wrap-around y
+      jpl = self.__jrow-1
+      if jpl<0:
+        if self.__yperiodic:
+          jpl = self.__nyp-1
+        else:
+          jpl = None
+      jpr = self.__jrow+1
+      if jpr>self.__nyp-1:
+        if self.__yperiodic:
+          jpr = 0
+        else:
+          jpr = None
+      jnghbr = (jpl,self.__jrow,jpr)
+      # wrap-around z
+      kpl = self.__kpln-1
+      if kpl<0:
+        if self.__zperiodic:
+          kpl = self.__nzp-1
+        else:
+          kpl = None
+      kpr = self.__kpln+1
+      if kpr>self.__nzp-1:
+        if self.__zperiodic:
+          kpr = 0
+        else:
+          kpr = None
+      knghbr = (kpl,self.__kpln,kpr)
+      # Construct array of neighbors
+      for ip in range(0,3):
+        for jp in range(0,3):
+          for kp in range(0,3):
+            # Assign rank to neighbor array
+            if inghbr[ip] is None or jnghbr[jp] is None or knghbr[kp] is None:
+              nghbr[ip,jp,kp] = None
+            else:
+              nghbr[ip,jp,kp] = self.__rankFromPosition(inghbr[ip],jnghbr[jp],knghbr[kp],self.__nxp,self.__nyp,self.__nzp)
+      # Save neighbors as class variable
+      self.__nghbr = nghbr
+
+    def __communicateGhostCells(self,key,positionDst):
+      '''Triggers communication of ghost cells'''
+      # Sanity check
+      if np.max(positionDst)>1 or np.min(positionDst)<-1:
+        raise ValueError('communicateGhostCells: invalid neighbor position {}'.format(positionDst))
+      # [send/recv] get the rank of the neighbor where data is to be sent to
+      # The position is passed as values -1,0,+1, but need to be converted to array indices
+      ip_dst =  positionDst[0]+1
+      jp_dst =  positionDst[1]+1
+      kp_dst =  positionDst[2]+1
+      ip_src = -positionDst[0]+1
+      jp_src = -positionDst[1]+1
+      kp_src = -positionDst[2]+1
+      rank_dst = self.__nghbr[ip_dst,jp_dst,kp_dst]
+      rank_src = self.__nghbr[ip_src,jp_src,kp_src]
+      # [send/recv] get the indices of data to be sent/received
+      (nxl,nyl,nzl) = self.__localChunkSize[key]
+      if positionDst[0]==-1:
+        ii_src = slice(self.__nghx,2*self.__nghx)
+        ii_dst = slice(self.__nghx+nxl,2*self.__nghx+nxl)
+      elif positionDst[0]==0:
+        ii_src = slice(self.__nghx,self.__nghx+nxl)
+        ii_dst = slice(self.__nghx,self.__nghx+nxl)
+      elif positionDst[0]==1:
+        ii_src = slice(nxl,nxl+self.__nghx)
+        ii_dst = slice(0,self.__nghx)
+      else:
+        raise ValueError('Invalid direction for ghost cell exchange: {}'.format(positionDst[0]))
+      if positionDst[1]==-1:
+        jj_src = slice(self.__nghy,2*self.__nghy)
+        jj_dst = slice(self.__nghy+nyl,2*self.__nghy+nyl)
+      elif positionDst[1]==0:
+        jj_src = slice(self.__nghy,self.__nghy+nyl)
+        jj_dst = slice(self.__nghy,self.__nghy+nyl)
+      elif positionDst[1]==1:
+        jj_src = slice(nyl,nyl+self.__nghy)
+        jj_dst = slice(0,self.__nghy)
+      else:
+        raise ValueError('Invalid direction for ghost cell exchange: {}'.format(positionDst[1]))
+      if positionDst[2]==-1:
+        kk_src = slice(self.__nghz,2*self.__nghz)
+        kk_dst = slice(self.__nghz+nzl,2*self.__nghz+nzl)
+      elif positionDst[2]==0:
+        kk_src = slice(self.__nghz,self.__nghz+nzl)
+        kk_dst = slice(self.__nghz,self.__nghz+nzl)
+      elif positionDst[2]==1:
+        kk_src = slice(nzl,nzl+self.__nghz)
+        kk_dst = slice(0,self.__nghz)
+      else:
+        raise ValueError('Invalid direction for ghost cell exchange: {}'.format(positionDst[2]))
+      # [send] now send the data to the neighbor, but only if there is one!
+      # Communication must be done non-blocking, and we can use upper-case routines since this is a numpy array
+      if rank_dst is not None:
+        sendbuf = np.ascontiguousarray(self.field[key][ii_src,jj_src,kk_src])
+        self.__mpireq.append(self.__comm.Isend(sendbuf,dest=rank_dst))
+      # [recv] the corresponding receive: results are stored in a buffer, which will be taken care of in the calling routine
+      if rank_src is not None:
+        recvbuf = np.zeros((ii_dst.stop-ii_dst.start,jj_dst.stop-jj_dst.start,kk_dst.stop-kk_dst.start))
+        self.__mpibufdata.append(recvbuf)
+        self.__mpibufidx.append((ii_dst,jj_dst,kk_dst))
+        self.__mpireq.append(self.__comm.Irecv(recvbuf,source=rank_src))
+
+    def __allocateField(self,key,keytemplate,shift=[0,0,0],symmetry=None):
+      '''Alocates a new field with name key. Copy grid and processor layout from keytemplate
+         and shift it by "shift". By default, the symmetries from the template are copied.
+         If this is not desired, they need to be passed.'''
+      # Sanity check
+      if len(shift)!=3:
+        raise ValueError('Direction must be provided as three-component list.')
+      if np.max(shift)>1 or np.min(shift)<-1:
+        raise ValueError('Only single shift is allowed.')
+      # Assign new processor grid
+      self.__procGrid[key]         = [ii.copy() for ii in self.__procGrid[keytemplate]]
+      self.__procGrid1[key]        = [ii.copy() for ii in self.__procGrid1[keytemplate]]
+      self.__localChunkBounds[key] = [ii.copy() for ii in self.__localChunkBounds[keytemplate]]
+      self.__localChunkSize[key]   = [ii.copy() for ii in self.__localChunkSize[keytemplate]]
+      # Assign the new grid
+      self.__dx[key]             = self.__dx[keytemplate]
+      self.grid[key]             = [ii.copy() for ii in self.grid[keytemplate]]
+      self.localGrid[key]        = [ii.copy() for ii in self.localGrid[keytemplate]]
+      # Allocate memory for the new field
+      self.field[key] = np.zeros(
+        (self.__localChunkSize[key][0]+2*self.__nghx,
+         self.__localChunkSize[key][1]+2*self.__nghy,
+         self.__localChunkSize[key][2]+2*self.__nghz)
+      )
+      # Shift grid
+      for idir in range(0,3):
+        if shift[idir]!=0:
+          self.grid[key][idir] += shift[idir]*0.5*self.__dx[key]
+          self.localGrid[key][idir] += shift[idir]*0.5*self.__dx[key]
+      # Assign symmetries
+      if symmetry is None:
+        self.__boundarySymmetries[key] = self.__boundarySymmetries[keytemplate].copy()
+      else:
+        self.__boundarySymmetries[key] = symmetry
+
+    def __getShiftDirection(self,key1,key2):
+      '''Get a tuple specifying the direction to shift field key1 onto key2.'''
+      # Sanity check
+      if np.abs(self.__dx[key1]-self.__dx[key2])>1e-8:
+        raise ValueError('Cannot determine shift direction, because grid width does not match.')
+      # Get first point of both grids
+      x1 = (self.grid[key1][0][0], self.grid[key1][1][0], self.grid[key1][2][0])
+      x2 = (self.grid[key2][0][0], self.grid[key2][1][0], self.grid[key2][2][0])
+      # Determine shift direction
+      shift = [0,0,0]
+      dx = self.__dx[key1]
+      for idir in range(0,3):
+        dist = x2[idir]-x1[idir]
+        if np.abs(dist)>0.75*dx:
+          raise ValueError('Cannot determine shift direction, because points are more than 0.5dx apart.')
+        if np.abs(dist)>0.25*dx:
+          shift[idir] = int(np.sign(dist))
+      return shift
+
+    def __sliceCollocated(self,key1,key2):
+      '''Get slice object for subdomain. Useful when two fields are to be multiplied.'''
+      # Sanity check
+      if np.abs(self.__dx[key1]-self.__dx[key2])>1e-8:
+        raise ValueError('Cannot create a collocated slice, because grid width does not match.')
+      # Prepare some work arrays
+      eps = 1e-3*self.__dx[key1]
+      ngh = (self.__nghx,self.__nghy,self.__nghz)
+      sli = [None,None,None,None,None,None]
+      # Loop through directions
+      for idir in range(0,3):
+        # Find index in key1 which matched first non-ghost cell in key2
+        x1 = self.localGrid[key1][idir]
+        x2 = self.localGrid[key2][idir]
+        idx = np.argmin(np.abs(x1-x2[ngh[idir]]))
+        # Validate that the points are collocated
+        if np.abs(x1[idx]-x2[ngh[idir]])>eps:
+          raise ValueError('Cannot create collocated slice, because grids are not collocated.')
+        # Check if we got enough grid points in key1
+        if x1[idx]+self.__localChunkSize[key2][idir]>self.__localChunkSize[key1][idir]+2*ngh[idir]:
+          raise ValueError('Cannot create collocated slice, because too few local grid points are available.')
+        # Create slices
+        sli[idir]   = slice(idx,idx+self.__localChunkSize[key2][idir])
+        sli[idir+3] = slice(ngh[idir],ngh[idir]+self.__localChunkSize[key2][idir])
+      return sli
+
+    # Some static helper functions
+    @staticmethod
+    def __rankFromPosition(ip,jp,kp,nxp,nyp,nzp):
+      return ip*nyp*nzp+jp*nzp+kp
+  
+    @staticmethod
+    def __positionFromRank(rank,nxp,nyp,nzp):
+      ip = rank//(nyp*nzp)
+      jp = (rank//nzp)%nyp
+      kp = rank%nzp
+      return (ip,jp,kp)
+
+    @staticmethod
+    def __deriveSymmetry(symm1,operation,symm2=None):
+      symm = symm1.copy()
+      if operation=='dx':
+        symm[0,1,1] = -symm1[0,1,1]
+        symm[2,1,1] = -symm1[2,1,1]
+      elif operation=='dy':
+        symm[1,0,1] = -symm1[1,0,1]
+        symm[1,2,1] = -symm1[1,2,1]
+      elif operation=='dz':
+        symm[1,1,0] = -symm1[1,1,0]
+        symm[1,1,2] = -symm1[1,1,2]
+      elif operation=='mult':
+        if symm2 is None:
+          raise ValueError('Second operand must be provided for multiplication.')
+        symm = np.multiply(symm1,symm2)
+      else:
+        raise ValueError('Invalid operation: ',operation)
+      return symm
\ No newline at end of file