Added memory monitoring, because memory caused problems...
This commit is contained in:
parent
672da18687
commit
9f2262ca7a
|
|
@ -4,10 +4,11 @@ from scipy import ndimage
|
||||||
import ucf
|
import ucf
|
||||||
import h5py
|
import h5py
|
||||||
import os
|
import os
|
||||||
|
import resource
|
||||||
|
|
||||||
class ibmppp:
|
class ibmppp:
|
||||||
"""Parallel Python Postprocessor for the IBM-DNS code"""
|
"""Parallel Python Postprocessor for the IBM-DNS code"""
|
||||||
def __init__(self,comm,dir_base,flowType,chunksPerProc=(1,1,1),numberOfGhosts=(1,1,1)):
|
def __init__(self,comm,dir_base,flowType,chunksPerProc=(1,1,1),numberOfGhosts=(1,1,1),precision='float64'):
|
||||||
"""Class constructor"""
|
"""Class constructor"""
|
||||||
# Save method arguments for later use
|
# Save method arguments for later use
|
||||||
self.__comm = comm
|
self.__comm = comm
|
||||||
|
|
@ -20,6 +21,7 @@ class ibmppp:
|
||||||
self.__nghx = numberOfGhosts[0]
|
self.__nghx = numberOfGhosts[0]
|
||||||
self.__nghy = numberOfGhosts[1]
|
self.__nghy = numberOfGhosts[1]
|
||||||
self.__nghz = numberOfGhosts[2]
|
self.__nghz = numberOfGhosts[2]
|
||||||
|
self.__precision = precision
|
||||||
# Declare variables which hold fields
|
# Declare variables which hold fields
|
||||||
self.field = {'u': None, 'v': None, 'w': None, 'p': None, 's1': None}
|
self.field = {'u': None, 'v': None, 'w': None, 'p': None, 's1': None}
|
||||||
# Declare variables for grid (to be set by "loadGrid()")
|
# Declare variables for grid (to be set by "loadGrid()")
|
||||||
|
|
@ -257,7 +259,7 @@ class ibmppp:
|
||||||
# Wait for everyone to finish
|
# Wait for everyone to finish
|
||||||
MPI.Request.waitall(reqsend)
|
MPI.Request.waitall(reqsend)
|
||||||
|
|
||||||
def loadField(self,key,dtype='float64'):
|
def loadField(self,key):
|
||||||
'''Reads chunks from files'''
|
'''Reads chunks from files'''
|
||||||
# Determine which chunks are to be loaded by this processor
|
# Determine which chunks are to be loaded by this processor
|
||||||
(ip,jp,kp) = self.__positionFromRank(self.__rank,self.__nxp,self.__nyp,self.__nzp)
|
(ip,jp,kp) = self.__positionFromRank(self.__rank,self.__nxp,self.__nyp,self.__nzp)
|
||||||
|
|
@ -267,7 +269,6 @@ class ibmppp:
|
||||||
ipe1 = ipb1+self.__nxppp-1
|
ipe1 = ipb1+self.__nxppp-1
|
||||||
jpe1 = jpb1+self.__nyppp-1
|
jpe1 = jpb1+self.__nyppp-1
|
||||||
kpe1 = kpb1+self.__nzppp-1
|
kpe1 = kpb1+self.__nzppp-1
|
||||||
#print(ipb1,ipe1,jpb1,jpe1,kpb1,kpe1)
|
|
||||||
# Get grid bounds for the field to be loaded.
|
# Get grid bounds for the field to be loaded.
|
||||||
# Also determine file which contains the desired field as well as the
|
# Also determine file which contains the desired field as well as the
|
||||||
# dataset ID
|
# dataset ID
|
||||||
|
|
@ -301,14 +302,11 @@ class ibmppp:
|
||||||
je = self.__procGrid1[key][3][jpe1]
|
je = self.__procGrid1[key][3][jpe1]
|
||||||
kb = self.__procGrid1[key][4][kpb1]
|
kb = self.__procGrid1[key][4][kpb1]
|
||||||
ke = self.__procGrid1[key][5][kpe1]
|
ke = self.__procGrid1[key][5][kpe1]
|
||||||
#print(ib,ie,jb,je,kb,ke)
|
|
||||||
nxl = ie-ib+1
|
nxl = ie-ib+1
|
||||||
nyl = je-jb+1
|
nyl = je-jb+1
|
||||||
nzl = ke-kb+1
|
nzl = ke-kb+1
|
||||||
#print(nxl,nyl,nzl)
|
|
||||||
# Allocate an array to hold the field
|
# Allocate an array to hold the field
|
||||||
self.field[key] = np.empty((nxl+2*self.__nghx,nyl+2*self.__nghy,nzl+2*self.__nghz),dtype=dtype,order='F')
|
self.field[key] = np.empty((nxl+2*self.__nghx,nyl+2*self.__nghy,nzl+2*self.__nghz),dtype=self.__precision,order='F')
|
||||||
#print(q.shape)
|
|
||||||
# Go through each chunk to be read and construct the field
|
# Go through each chunk to be read and construct the field
|
||||||
for ip1 in range(ipb1,ipe1+1):
|
for ip1 in range(ipb1,ipe1+1):
|
||||||
for jp1 in range(jpb1,jpe1+1):
|
for jp1 in range(jpb1,jpe1+1):
|
||||||
|
|
@ -316,7 +314,6 @@ class ibmppp:
|
||||||
# Determine rank of the chunk to be read
|
# Determine rank of the chunk to be read
|
||||||
iprank = self.__rankFromPosition(ip1,jp1,kp1,self.__nxp1,self.__nyp1,self.__nzp1)
|
iprank = self.__rankFromPosition(ip1,jp1,kp1,self.__nxp1,self.__nyp1,self.__nzp1)
|
||||||
file_input = filebase+'{:05d}'.format(iprank)
|
file_input = filebase+'{:05d}'.format(iprank)
|
||||||
#print('Loading '+file_input)
|
|
||||||
# Compute bounds of this chunk
|
# Compute bounds of this chunk
|
||||||
ib1 = self.__procGrid1[key][0][ip1]
|
ib1 = self.__procGrid1[key][0][ip1]
|
||||||
ie1 = self.__procGrid1[key][1][ip1]
|
ie1 = self.__procGrid1[key][1][ip1]
|
||||||
|
|
@ -324,7 +321,6 @@ class ibmppp:
|
||||||
je1 = self.__procGrid1[key][3][jp1]
|
je1 = self.__procGrid1[key][3][jp1]
|
||||||
kb1 = self.__procGrid1[key][4][kp1]
|
kb1 = self.__procGrid1[key][4][kp1]
|
||||||
ke1 = self.__procGrid1[key][5][kp1]
|
ke1 = self.__procGrid1[key][5][kp1]
|
||||||
#print(ib1,ie1,jb1,je1,kb1,ke1)
|
|
||||||
# Read chunk and check its validity
|
# Read chunk and check its validity
|
||||||
[chunk] = ucf.readFieldChunk(file_input,dset=dset)
|
[chunk] = ucf.readFieldChunk(file_input,dset=dset)
|
||||||
if ib1!=chunk['ibeg'] or ie1!=chunk['ibeg']+chunk['nxl']-1:
|
if ib1!=chunk['ibeg'] or ie1!=chunk['ibeg']+chunk['nxl']-1:
|
||||||
|
|
@ -335,8 +331,6 @@ class ibmppp:
|
||||||
raise ValueError('Loaded chunk does not correspond to processor grid (kbeg={}, nzl={} vs kbeg={},nzl={}'.format(kb1,ke1-kb1+1,chunk['kbeg'],chunk['nzl']))
|
raise ValueError('Loaded chunk does not correspond to processor grid (kbeg={}, nzl={} vs kbeg={},nzl={}'.format(kb1,ke1-kb1+1,chunk['kbeg'],chunk['nzl']))
|
||||||
ngh1 = chunk['ighost']
|
ngh1 = chunk['ighost']
|
||||||
# No need to care about ghost cells here, we communicate them later
|
# No need to care about ghost cells here, we communicate them later
|
||||||
#print('shape(q)',q[ib1-ib+ngh:ie1-ib+ngh+1,jb1-jb+ngh:je1-jb+ngh+1,kb1-kb+ngh:ke1-kb+ngh+1].shape)
|
|
||||||
#print('shape(q1)',chunk['data'][ngh1:-ngh1,ngh1:-ngh1,ngh1:-ngh1].shape)
|
|
||||||
self.field[key][ib1-ib+self.__nghx:ie1-ib+self.__nghx+1,
|
self.field[key][ib1-ib+self.__nghx:ie1-ib+self.__nghx+1,
|
||||||
jb1-jb+self.__nghy:je1-jb+self.__nghy+1,
|
jb1-jb+self.__nghy:je1-jb+self.__nghy+1,
|
||||||
kb1-kb+self.__nghz:ke1-kb+self.__nghz+1] = chunk['data'][ngh1:-ngh1,ngh1:-ngh1,ngh1:-ngh1]
|
kb1-kb+self.__nghz:ke1-kb+self.__nghz+1] = chunk['data'][ngh1:-ngh1,ngh1:-ngh1,ngh1:-ngh1]
|
||||||
|
|
@ -820,27 +814,7 @@ class ibmppp:
|
||||||
|
|
||||||
def collocateVelocity(self):
|
def collocateVelocity(self):
|
||||||
'''Moves velocity components onto the pressure grid.'''
|
'''Moves velocity components onto the pressure grid.'''
|
||||||
print('WARNING: collocateVelocity is UNTESTED')
|
raise NotImplementedError('TBD')
|
||||||
for key in ('u','v','w'):
|
|
||||||
# Shift field (this is done in-place)
|
|
||||||
self.shiftField(key,self.__getShiftDirection(key,'p'))
|
|
||||||
# Match local domain
|
|
||||||
iin,jin,kin,iout,jout,kout = self.__sliceCollocated(key,'Q')
|
|
||||||
tmp = np.empty_like(self.field['p'])
|
|
||||||
tmp[iout,jout,kout] = self.field[key][iin,jin,kin]
|
|
||||||
self.field[key] = tmp
|
|
||||||
# Get new processor grid
|
|
||||||
self.__procGrid[key] = [ii.copy() for ii in self.__procGrid['p']]
|
|
||||||
self.__procGrid1[key] = [ii.copy() for ii in self.__procGrid1['p']]
|
|
||||||
self.__localChunkBounds[key] = [ii.copy() for ii in self.__localChunkBounds['p']]
|
|
||||||
self.__localChunkSize[key] = [ii.copy() for ii in self.__localChunkSize['p']]
|
|
||||||
# Get new grid
|
|
||||||
self.__dx[key] = self.__dx['p']
|
|
||||||
self.grid[key] = [ii.copy() for ii in self.grid['p']]
|
|
||||||
self.localGrid[key] = [ii.copy() for ii in self.localGrid['p']]
|
|
||||||
# Exchange ghost cells and set boundary conditions
|
|
||||||
self.exchangeGhostCells(key)
|
|
||||||
self.imposeBoundaryConditions(key)
|
|
||||||
|
|
||||||
def downsample(self,field,factor=2):
|
def downsample(self,field,factor=2):
|
||||||
'''Coarsens a field by integer factor.'''
|
'''Coarsens a field by integer factor.'''
|
||||||
|
|
@ -852,10 +826,10 @@ class ibmppp:
|
||||||
if len(sigma)!=3:
|
if len(sigma)!=3:
|
||||||
raise ValueError('Expected one value of sigma for each direction, but only got {}.'.format(len(sigma)))
|
raise ValueError('Expected one value of sigma for each direction, but only got {}.'.format(len(sigma)))
|
||||||
# Compute stencil width and check if we got enough ghost cells
|
# Compute stencil width and check if we got enough ghost cells
|
||||||
stencil = np.empty((3,))
|
stencil = [0,0,0]
|
||||||
for ii in range(0,3):
|
for ii in range(0,3):
|
||||||
stencil[ii] = 2*int(truncate*sigma[ii]+0.5)+1
|
stencil[ii] = 2*int(truncate*sigma[ii]+0.5)+1
|
||||||
if self.__nghx<stencil[0] or self.__nghx<stencil[1] or self.__nghx<stencil[2]:
|
if self.__nghx<stencil[0] or self.__nghy<stencil[1] or self.__nghz<stencil[2]:
|
||||||
raise ValueError('Too few ghost cells for stencil: ',stencil,(self.__nghx,self.__nghy,self.__nghz))
|
raise ValueError('Too few ghost cells for stencil: ',stencil,(self.__nghx,self.__nghy,self.__nghz))
|
||||||
#print(stencil)
|
#print(stencil)
|
||||||
# Run scipy gaussian filter
|
# Run scipy gaussian filter
|
||||||
|
|
@ -1082,6 +1056,24 @@ class ibmppp:
|
||||||
self.__symmetrizeWall(key,(0,+1,0))
|
self.__symmetrizeWall(key,(0,+1,0))
|
||||||
self.__symmetrizeWall(key,(0,0,-1))
|
self.__symmetrizeWall(key,(0,0,-1))
|
||||||
self.__symmetrizeWall(key,(0,0,+1))
|
self.__symmetrizeWall(key,(0,0,+1))
|
||||||
|
|
||||||
|
def printMemoryUsage(self):
|
||||||
|
'''Print estimation of memory usage (max over all processors)'''
|
||||||
|
mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024.
|
||||||
|
mem = self.__comm.reduce(sendobj=mem,op=MPI.MAX,root=0)
|
||||||
|
if self.__rank==0:
|
||||||
|
print('Current memory usage: {:8.2f} MiB'.format(mem))
|
||||||
|
|
||||||
|
def printMemoryUsageField(self,key):
|
||||||
|
'''Print estimation of memory usage per allocated field.'''
|
||||||
|
bytesNumpy = np.dtype(self.__precision).itemsize
|
||||||
|
nxl = self.__localChunkSize[key][0]+2*self.__nghx
|
||||||
|
nyl = self.__localChunkSize[key][1]+2*self.__nghy
|
||||||
|
nzl = self.__localChunkSize[key][2]+2*self.__nghz
|
||||||
|
mem = nxl*nyl*nzl*bytesNumpy/1024.**2
|
||||||
|
mem = self.__comm.reduce(sendobj=mem,op=MPI.MAX,root=0)
|
||||||
|
if self.__rank==0:
|
||||||
|
print('Memory usage of {}: {:8.2f} MiB'.format(key,mem))
|
||||||
|
|
||||||
def __setBoundaryConditions(self,flowType):
|
def __setBoundaryConditions(self,flowType):
|
||||||
'''Sets the symmetries for ghost cells behind the wall'''
|
'''Sets the symmetries for ghost cells behind the wall'''
|
||||||
|
|
@ -1290,6 +1282,8 @@ class ibmppp:
|
||||||
kp_src = -positionDst[2]+1
|
kp_src = -positionDst[2]+1
|
||||||
rank_dst = self.__nghbr[ip_dst,jp_dst,kp_dst]
|
rank_dst = self.__nghbr[ip_dst,jp_dst,kp_dst]
|
||||||
rank_src = self.__nghbr[ip_src,jp_src,kp_src]
|
rank_src = self.__nghbr[ip_src,jp_src,kp_src]
|
||||||
|
# [send/recv] create a tag
|
||||||
|
tag = ip_dst*100+jp_dst*10+kp_dst
|
||||||
# [send/recv] get the indices of data to be sent/received
|
# [send/recv] get the indices of data to be sent/received
|
||||||
(nxl,nyl,nzl) = self.__localChunkSize[key]
|
(nxl,nyl,nzl) = self.__localChunkSize[key]
|
||||||
if positionDst[0]==-1:
|
if positionDst[0]==-1:
|
||||||
|
|
@ -1332,11 +1326,11 @@ class ibmppp:
|
||||||
# Communication must be done non-blocking, and we can use upper-case routines since this is a numpy array
|
# Communication must be done non-blocking, and we can use upper-case routines since this is a numpy array
|
||||||
if rank_dst is not None:
|
if rank_dst is not None:
|
||||||
sendbuf = np.ascontiguousarray(self.field[key][ii_src,jj_src,kk_src])
|
sendbuf = np.ascontiguousarray(self.field[key][ii_src,jj_src,kk_src])
|
||||||
reqsend = self.__comm.Isend(sendbuf,dest=rank_dst)
|
reqsend = self.__comm.Isend(sendbuf,dest=rank_dst,tag=tag)
|
||||||
# [recv] the corresponding receive: results are stored in a buffer which will be assigned to the parent array later
|
# [recv] the corresponding receive: results are stored in a buffer which will be assigned to the parent array later
|
||||||
if rank_src is not None:
|
if rank_src is not None:
|
||||||
recvbuf = np.zeros((ii_dst.stop-ii_dst.start,jj_dst.stop-jj_dst.start,kk_dst.stop-kk_dst.start))
|
recvbuf = np.zeros((ii_dst.stop-ii_dst.start,jj_dst.stop-jj_dst.start,kk_dst.stop-kk_dst.start),dtype=self.__precision)
|
||||||
reqrecv = self.__comm.Irecv(recvbuf,source=rank_src)
|
reqrecv = self.__comm.Irecv(recvbuf,source=rank_src,tag=tag)
|
||||||
# [recv] wait for data to be received
|
# [recv] wait for data to be received
|
||||||
if reqrecv is not None:
|
if reqrecv is not None:
|
||||||
reqrecv.wait()
|
reqrecv.wait()
|
||||||
|
|
@ -1367,7 +1361,7 @@ class ibmppp:
|
||||||
self.field[key] = np.zeros(
|
self.field[key] = np.zeros(
|
||||||
(self.__localChunkSize[key][0]+2*self.__nghx,
|
(self.__localChunkSize[key][0]+2*self.__nghx,
|
||||||
self.__localChunkSize[key][1]+2*self.__nghy,
|
self.__localChunkSize[key][1]+2*self.__nghy,
|
||||||
self.__localChunkSize[key][2]+2*self.__nghz)
|
self.__localChunkSize[key][2]+2*self.__nghz),dtype=self.__precision
|
||||||
)
|
)
|
||||||
# Shift grid
|
# Shift grid
|
||||||
for idir in range(0,3):
|
for idir in range(0,3):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue