241 lines
7.7 KiB
Python
241 lines
7.7 KiB
Python
'''
|
|
Created on 25.07.2012
|
|
|
|
@author: Jakob Steidl
|
|
|
|
module to detect outliers (and for now also to create a cell/matrix)
|
|
|
|
outliers will be marked in a numpy matrix. each 3D vector representing the outier/state of each velocity component.
|
|
its currently using the RawProfileData (because they have the 3D components?) but that can be changed by the arguments
|
|
in get_valuematrix_from_cellmatrix(). one needs to change the proper cell matrix as well however.
|
|
|
|
'''
|
|
|
|
import pickle, time
|
|
import numpy as np
|
|
|
|
import warnings
|
|
#import pylab as pl
|
|
#import matplotlib.pyplot as plt
|
|
|
|
#warnings.simplefilter('error')
|
|
|
|
|
|
def get_matrix_size(p):
|
|
''' determine the necessary size of a matrix that can hold all the cells '''
|
|
|
|
# determine size of matrix
|
|
size_h = len(p.ensembles)
|
|
size_v = []
|
|
|
|
for i in range(0,size_h-1):
|
|
try:
|
|
size_v.append(len(p.ensembles[i].cells))
|
|
except AttributeError:
|
|
#print('DEBUG: ensemble {} of {} doesnt have cells'.format(i, size_h))
|
|
pass
|
|
|
|
size_v = max(size_v) # rows
|
|
|
|
return (size_v, size_h)
|
|
|
|
|
|
def get_cell_matrix(p):
|
|
''' extracts the cells and stores them in a 2D list (aka matrix)
|
|
|
|
cmat[row][col]
|
|
'''
|
|
|
|
(rows, cols) = get_matrix_size(p)
|
|
|
|
# create empty standard python 'matrix'
|
|
cmat = []
|
|
for i in range(rows): #@UnusedVariable
|
|
cmat += [[False] * cols ]
|
|
|
|
# iterate trough every cell
|
|
for e in range(cols):
|
|
if not p.ensembles[e].void:
|
|
for c in range(len(p.ensembles[e].cells)):
|
|
cmat[c][e] = p.ensembles[e].cells[c]
|
|
return cmat
|
|
|
|
|
|
def get_valuematrix_from_cellmatrix(m, value, outputs=0, nvalues=3):
|
|
''' return two matrices:
|
|
1. a designated @value from the matrix @m containing Cell()-objects
|
|
2. a matrix with np.bool's (False if no valid value could be extracted, True otherwise)
|
|
|
|
'''
|
|
|
|
|
|
rows = len(m)
|
|
cols = len(m[0])
|
|
|
|
npmat = np.zeros(shape=(rows, cols, nvalues))
|
|
goodmat = np.zeros(shape=(rows, cols), dtype=np.bool)
|
|
|
|
for r in range(rows):
|
|
for c in range(cols):
|
|
if not False == m[r][c]:
|
|
npmat[r][c] = eval('m[{}][{}]{}'.format(r,c, value))
|
|
goodmat[r][c] = True
|
|
|
|
if outputs == 0:
|
|
return npmat, goodmat
|
|
elif outputs == 1:
|
|
return npmat
|
|
elif outputs == 2:
|
|
return goodmat
|
|
|
|
def get_relative_deviation_simple(vmat, vgoodmat, cfg):
|
|
''' return a matrix containing True, if corresponding cell
|
|
contains an outlier
|
|
|
|
cfg vars: (its a dictionary)
|
|
radius_h Int horizontal radius of where we do the averaging
|
|
radius_v Int vertical radius of where we do the averaging
|
|
|
|
'''
|
|
|
|
if len(vgoodmat.shape) == 2:
|
|
(rows, columns) = vgoodmat.shape
|
|
else:
|
|
rows, = vgoodmat.shape
|
|
columns = 1
|
|
|
|
rdev = np.zeros(shape=(rows, columns, 3), dtype=np.float)
|
|
|
|
radius_v = np.int(cfg['radius_v']) if 'radius_v' in cfg else 0
|
|
radius_h = np.int(cfg['radius_h'])
|
|
|
|
onlygoodneighbours_temp = [
|
|
np.zeros(shape=((2*radius_h+1) * (2*radius_v+1)), dtype=np.float),
|
|
np.zeros(shape=((2*radius_h+1) * (2*radius_v+1)), dtype=np.float),
|
|
np.zeros(shape=((2*radius_h+1) * (2*radius_v+1)), dtype=np.float) ]
|
|
|
|
|
|
|
|
# iterate trough every cell ...
|
|
for r in range(rows):
|
|
for c in range(columns):
|
|
if vgoodmat[r,c] == True:
|
|
|
|
|
|
gnc = 0 # good/neighbors counter
|
|
onlygoodneighbours = onlygoodneighbours_temp[:]
|
|
|
|
# workaround: range(x,x) doesn't work (happens if radius_v == 0)
|
|
if radius_v == 0:
|
|
range_r = [r]
|
|
else:
|
|
range_r = list(range(max(r-radius_v,0), min(r+radius_v, rows)))
|
|
|
|
|
|
for nr in range_r:
|
|
for nc in range(max(c-radius_h,0), min(c+radius_h, columns)):
|
|
if vgoodmat[nr, nc]:
|
|
onlygoodneighbours[0][gnc] = vmat[nr,nc][0]
|
|
onlygoodneighbours[1][gnc] = vmat[nr,nc][1]
|
|
onlygoodneighbours[2][gnc] = vmat[nr,nc][2]
|
|
gnc += 1
|
|
|
|
|
|
if gnc != 1:
|
|
mean = np.array([
|
|
onlygoodneighbours[0][:gnc].mean(),
|
|
onlygoodneighbours[1][:gnc].mean(),
|
|
onlygoodneighbours[2][:gnc].mean()
|
|
])
|
|
|
|
stdev = np.array([
|
|
onlygoodneighbours[0][:gnc].std(),
|
|
onlygoodneighbours[1][:gnc].std(),
|
|
onlygoodneighbours[2][:gnc].std()
|
|
])
|
|
|
|
value = vmat[r,c]
|
|
|
|
# compute the relative deviation (deviation x-times the standard deviation)
|
|
rdev[r,c] = np.divide(np.abs(mean - value), stdev)
|
|
|
|
|
|
else:
|
|
warnings.warn('cell {}/{}: not enough neighbors to gather statistics. increase search radius.'.format(r, c), RuntimeWarning)
|
|
|
|
|
|
|
|
return rdev
|
|
|
|
|
|
|
|
def get_outliers(rd, gm, cfg):
|
|
'''
|
|
meanings for the returned matrix:
|
|
1 ... outlier
|
|
0 ... no outlier
|
|
'''
|
|
return rd > cfg['limit']
|
|
|
|
|
|
def interpolate_outliers(p, cfg):
|
|
'''
|
|
remove outliers and return a ProcessedProfileObj with outliers already removed
|
|
and interpolated
|
|
cfg vars: (its a dictionary)
|
|
radius_h int horizontal radius of where we do the averaging
|
|
radius_v int vertical radius of where we do the averaging
|
|
limit float limit above which outliers are recognised as such
|
|
'''
|
|
|
|
# outliers
|
|
cm = get_cell_matrix(p) # cell matrix
|
|
vm, vgm = get_valuematrix_from_cellmatrix(cm, '.velocity.v') # value matri/x, and corresponding mask (1=good, 0=bad)
|
|
rdm = get_relative_deviation_simple(vm, vgm, cfg) # relative deviation matrix
|
|
olm = get_outliers(rdm, vgm, cfg) # outlier matrix (boolean, 1=outlier?)
|
|
|
|
|
|
# interpolation
|
|
from interpolation import interpolate
|
|
ivm = interpolate(vm, ~olm)
|
|
|
|
|
|
# update processed profile
|
|
from copy import deepcopy
|
|
p1 = deepcopy(p)
|
|
p1.update_velocities(ivm, vgm)
|
|
|
|
return p1
|
|
|
|
|
|
|
|
def plot_matrix(m):
|
|
import matplotlib.pyplot as plt
|
|
|
|
plt.imshow(m, interpolation='none')
|
|
plt.show()
|
|
|
|
#if __name__ == '__main__':
|
|
# profile = pickle.load(open('../testfiles/demodata.pickle','rb'))
|
|
#
|
|
#
|
|
## cfg = dict(radius=5, limit=2)
|
|
# cfg = dict(radius_h=15, radius_v=0, limit=2)
|
|
#
|
|
# t1 = time.time()
|
|
# cm = get_cell_matrix(profile)
|
|
# vm, vgm = get_valuematrix_from_cellmatrix(cm, '.velocity_comp.v')
|
|
#
|
|
# rdm = get_relative_deviation_simple(vm, vgm, cfg)
|
|
#
|
|
# dt1 = time.time() - t1
|
|
# t2 = time.time()
|
|
# olm = get_outliers(rdm, vgm, cfg)
|
|
# dt2 = time.time() - t2
|
|
#
|
|
# print(type(olm))
|
|
# print dt1, dt2
|
|
# plot_matrix(olm[:,:,0]-1*vgm)
|
|
#
|
|
# pickle.dump((cm, vm, vgm, rdm, olm), open('../testfiles/outliers.pickle','wb'))
|