AJMR-Python-Baird/NTC_DFM/EFDC_compare.py

424 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#%%
# -*- coding: utf-8 -*-
"""
@author: AJMR
Script to compare EFDC results against observations
"""
#%% Import
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import geopandas as gp
gp.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
from shapely.ops import nearest_points
from shapely.geometry import LineString
from shapely.geometry import Point, MultiPoint
from shapely.ops import nearest_points
from datetime import datetime, timedelta
from dfm_tools.get_nc import get_netdata, get_ncmodeldata, plot_netmapdata
from dfm_tools.get_nc_helpers import get_timesfromnc
from dfm_tools.regulargrid import scatter_to_regulargrid
from pathlib import Path
import netCDF4 as nc
import xarray as xr
import pytz
import pathlib as pl
#%% Function to find nearest point
def get_nearest_values(row, other_gdf, point_column='geometry', value_column="geometry"):
"""Find the nearest point and return the corresponding value from specified value column."""
# Create an union of the other GeoDataFrame's geometries:
other_points = other_gdf["geometry"].unary_union
# Find the nearest points
nearest_geoms = nearest_points(row[point_column], other_points)
# Get corresponding values from the other df
nearest_data = other_gdf.loc[other_gdf["geometry"] == nearest_geoms[1]]
nearest_value = nearest_data[value_column].get_values()[0]
return nearest_value
#%% Load in Water Level Data
# Gauge Locations from map
# gdf_gaugeLoc = gp.read_file('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/NTC6.kml')
gdf_gaugeLoc = gp.read_file('//srv-ott3.baird.com/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/NTC6.kml')
gdf_gaugeLocUSSP = gdf_gaugeLoc.to_crs("EPSG:32118")
# All in Feet NAVD88
# df_wl_FFG = pd.read_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/04 Gauge Data/NCP1_Gauge_Elev_Data_20130521/NC_Gauge_Elev_Data_20130521.xlsx',
df_wl_FFG = pd.read_excel('//srv-ott3.baird.com/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/04 Gauge Data/NCP1_Gauge_Elev_Data_20130521/NC_Gauge_Elev_Data_20130521.xlsx',
sheet_name='Field_Facility_Gauge')
df_wl_FFG.set_index('Date_time', inplace=True)
df_wl_FFG.index = df_wl_FFG.index.tz_localize(
pytz.timezone('America/New_York')).tz_convert(pytz.utc) #Convert to UTC
gdf_wl_FFG = gp.GeoDataFrame(df_wl_FFG,
geometry=gp.points_from_xy(np.ones([len(df_wl_FFG), 1]) * gdf_gaugeLoc['geometry'].x[2],
np.ones([len(df_wl_FFG), 1]) * gdf_gaugeLoc['geometry'].y[2]), crs="EPSG:4326")
gdf_wl_FFG.geometry = gdf_wl_FFG.geometry.to_crs("EPSG:32118")
df_wl_NGG1 = pd.read_excel('//srv-ott3.baird.com/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/04 Gauge Data/NCP1_Gauge_Elev_Data_20130521/NC_Gauge_Elev_Data_20130521.xlsx',
sheet_name='National_Grid_Gauge')
df_wl_NGG1.set_index('Date_time', inplace=True)
df_wl_NGG1.index = df_wl_NGG1.index.tz_localize(
pytz.timezone('America/New_York'), ambiguous=True).tz_convert(pytz.utc) #Convert to UTC
gdf_wl_NGG1 = gp.GeoDataFrame(df_wl_NGG1,
geometry=gp.points_from_xy(np.ones([len(df_wl_NGG1), 1]) * gdf_gaugeLoc['geometry'].x[3],
np.ones([len(df_wl_NGG1), 1]) * gdf_gaugeLoc['geometry'].y[3]), crs="EPSG:4326")
gdf_wl_NGG1.geometry = gdf_wl_NGG1.geometry.to_crs("EPSG:32118")
# Also includes temperature
df_wl_NGG2 = pd.read_excel('//srv-ott3.baird.com/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/04 Gauge Data/NCP2_NG_TideGauge_20160113/NCP2_NG_TideGauge_20160113.xlsx',
sheet_name='Sheet1')
gdf_wl_NGG2 = gp.GeoDataFrame(df_wl_NGG2,
geometry=gp.points_from_xy(np.ones([len(df_wl_NGG2), 1]) * gdf_gaugeLoc['geometry'].x[3],
np.ones([len(df_wl_NGG2), 1]) * gdf_gaugeLoc['geometry'].y[3]), crs="EPSG:4326")
gdf_wl_NGG2.geometry = gdf_wl_NGG2.geometry.to_crs("EPSG:32118")
#%% Read in EFDC Data to dataframe
# efdc6_df = pd.read_csv('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/2012_Mon6/READ_FROM_EFDC_GRAPHICS_OUT_BIN.CSV')
# efdc7_df = pd.read_csv('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/2012_Mon7/READ_FROM_EFDC_GRAPHICS_OUT_BIN.CSV')
# efdc8_df = pd.read_csv('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/2012_Mon8/READ_FROM_EFDC_GRAPHICS_OUT_BIN.CSV')
# efdc9_df = pd.read_csv('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/2012_Mon9/READ_FROM_EFDC_GRAPHICS_OUT_BIN.CSV')
efdc12_df = pd.read_csv('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/2012_Mon12/READ_FROM_EFDC_GRAPHICS_OUT_BIN.CSV')
#%% Read in EFDC grid
efdc_grid_df = pd.read_csv('//Ott-flavius/j/INPUT_FILES/GRID_FILES/NC_ER_lxly_20140129.inp',
delim_whitespace=True, skiprows=4,
names=['I', 'J', 'X', 'Y', 'CUE', 'CVE', 'CUN', 'CVN'])
efdc_grid_gdf = gp.GeoDataFrame(
efdc_grid_df, geometry=gp.points_from_xy(efdc_grid_df.X, efdc_grid_df.Y), crs="EPSG:32118")
#%% Merge EFDC outputs with grid locations and add datetimes
efdc6_merged = pd.merge(efdc6_df, efdc_grid_df, how='left', left_on=['I_MOD','J_MOD'],
right_on = ['I','J']).drop(columns=[
' DUMPID', 'END_hr', 'I_MOD', 'J_MOD', 'I_MOD', 'X', 'Y', 'CUE', 'CVE', 'CUN', 'CVN'])
efdc6_merged['date'] = pd.to_datetime([datetime(2012, 6, 1, 00, 00, 00) +
timedelta(hours=h) for h in efdc6_merged['ST_hr']])
efdc6_merged.set_index('date', inplace=True)
efdc6_merged.index = efdc6_merged.index.tz_localize(
pytz.timezone('EST')).tz_convert(pytz.utc) #Convert to UTC
del efdc6_df
efdc7_merged = pd.merge(efdc7_df, efdc_grid_df, how='left', left_on=['I_MOD','J_MOD'],
right_on = ['I','J']).drop(columns=[
' DUMPID', 'END_hr', 'I_MOD', 'J_MOD', 'I_MOD', 'X', 'Y', 'CUE', 'CVE', 'CUN', 'CVN'])
efdc7_merged['date'] = pd.to_datetime([datetime(2012, 7, 1, 00, 00, 00) +
timedelta(hours=h) for h in efdc7_merged['ST_hr']])
efdc7_merged.set_index('date', inplace=True)
efdc7_merged.index = efdc7_merged.index.tz_localize(
pytz.timezone('EST')).tz_convert(pytz.utc) #Convert to UTC
del efdc7_df
efdc8_merged = pd.merge(efdc8_df, efdc_grid_df, how='left', left_on=['I_MOD','J_MOD'],
right_on = ['I','J']).drop(columns=[
' DUMPID', 'END_hr', 'I_MOD', 'J_MOD', 'I_MOD', 'X', 'Y', 'CUE', 'CVE', 'CUN', 'CVN'])
efdc8_merged['date'] = pd.to_datetime([datetime(2012, 8, 1, 00, 00, 00) +
timedelta(hours=h) for h in efdc8_merged['ST_hr']])
efdc8_merged.set_index('date', inplace=True)
efdc8_merged.index = efdc8_merged.index.tz_localize(
pytz.timezone('EST')).tz_convert(pytz.utc) #Convert to UTC
del efdc8_df
efdc9_merged = pd.merge(efdc9_df, efdc_grid_df, how='left', left_on=['I_MOD','J_MOD'],
right_on = ['I','J']).drop(columns=[
' DUMPID', 'END_hr', 'I_MOD', 'J_MOD', 'I_MOD', 'X', 'Y', 'CUE', 'CVE', 'CUN', 'CVN'])
efdc9_merged['date'] = pd.to_datetime([datetime(2012, 9, 1, 00, 00, 00) +
timedelta(hours=h) for h in efdc9_merged['ST_hr']])
efdc9_merged.set_index('date', inplace=True)
efdc9_merged.index = efdc9_merged.index.tz_localize(
pytz.timezone('EST')).tz_convert(pytz.utc) #Convert to UTC
del efdc9_df
#%%
efdc12_merged = pd.merge(efdc12_df, efdc_grid_df, how='left', left_on=['I_MOD','J_MOD'],
right_on = ['I','J']).drop(columns=[
' DUMPID', 'END_hr', 'I_MOD', 'J_MOD', 'I_MOD', 'X', 'Y', 'CUE', 'CVE', 'CUN', 'CVN'])
efdc12_merged['date'] = pd.to_datetime([datetime(2012, 12, 1, 00, 00, 00) +
timedelta(hours=h) for h in efdc12_merged['ST_hr']])
efdc12_merged.set_index('date', inplace=True)
efdc12_merged.index = efdc12_merged.index.tz_localize(
pytz.timezone('EST')).tz_convert(pytz.utc) #Convert to UTC
del efdc12_df
#%% Merge EFDC dataframes and convert to geodataframe
efdc_merged = pd.concat([efdc6_merged, efdc7_merged, efdc8_merged, efdc9_merged])
del efdc6_merged
del efdc7_merged
del efdc8_merged
del efdc9_merged
efdc_merged_gdf = gp.GeoDataFrame(
efdc_merged, geometry=efdc_merged['geometry'], crs="EPSG:32118")
del efdc_merged
#%%
efdc_merged = efdc12_merged
efdc_merged_gdf = gp.GeoDataFrame(
efdc_merged, geometry=efdc_merged['geometry'], crs="EPSG:32118")
del efdc_merged
#%% Find nearest grid point to station FFG
nearest_geoms = nearest_points(Point(gdf_wl_FFG.iloc[0,:].geometry), MultiPoint(efdc_grid_gdf.geometry))
station_gdf = efdc_merged_gdf.loc[efdc_merged_gdf['geometry']==nearest_geoms[1]]
#%% Find nearest grid point to station BGG
nearest_geoms = nearest_points(Point(gdf_wl_NGG1.iloc[0,:].geometry), MultiPoint(efdc_grid_gdf.geometry))
stationNGG_gdf = efdc_merged_gdf.loc[efdc_merged_gdf['geometry']==nearest_geoms[1]]
# efdc_grid_gdf.loc[efdc_grid_gdf['geometry']== nearest_geoms[1]].value
#%% Save EFDC as Parquet for faster importing
efdc_merged_gdf.to_parquet('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/efdc_merged_Dec.parquet',
index=True, compression='gzip')
station_gdf.to_parquet('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/station_gdf_Dec.parquet',
index=True, compression='gzip')
stationNGG_gdf.to_parquet('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/stationNGG_gdf_Dec.parquet',
index=True, compression='gzip')
#%% Read in EFDC as Paraquet
stationNGG_gdf = gp.read_parquet('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/stationNGG_gdf.parquet')
station_gdf = gp.read_parquet('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/station_gdf.parquet')
efdc_merged_gdf = gp.read_parquet('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/efdc_merged.parquet')
#%% Read Model Log
runLog = pd.read_excel('C:/Users/arey/files\Projects/Newtown/Model Log NTC.xlsx', 'ModelLog')
dataPath = "FlowFM_map.nc"
#%% Import Delft3D results
modelPlot = [31]
ugrid_all = [None] * (max(modelPlot)+1)
data_frommap_wl = [None] * (max(modelPlot)+1)
facex = [None] * (max(modelPlot)+1)
facey = [None] * (max(modelPlot)+1)
nearest_IDX = list(range(100))
nearest_IDX_NGG = list(range(100))
dsloc = list(range(100))
dsloc_NGG = list(range(100))
# Find nearest point
for i in modelPlot:
dataPath = pl.Path(runLog['Run Location'][i],
'FlowFM', 'dflowfm', 'output', 'FlowFM_map.nc')
file_nc_map = dataPath.as_posix()
tSteps = get_timesfromnc(file_nc=file_nc_map, varname='time')
tStep = len(tSteps)-1
ugrid_all[i] = get_netdata(file_nc=file_nc_map)#,multipart=False)
data_frommap_wl[i] = get_ncmodeldata(file_nc=file_nc_map, varname='mesh2d_s1', timestep=tStep)
facex[i] = get_ncmodeldata(file_nc=file_nc_map, varname='mesh2d_face_x')
facey[i] = get_ncmodeldata(file_nc=file_nc_map, varname='mesh2d_face_y')
# Find matching Delft3D index
s = gp.GeoSeries(map(Point, zip(facex[i], facey[i])))
nearest_geoms = nearest_points(Point(gdf_wl_FFG.iloc[0,:].geometry), MultiPoint(s))
nearest_IDX[i] = np.where(s==nearest_geoms[1])
nearest_geoms = nearest_points(Point(gdf_wl_NGG1.iloc[0,:].geometry), MultiPoint(s))
nearest_IDX[i] = np.where(s==nearest_geoms[1])
#%% Read in time series
modelPlot = [31]
ugrid_all = [None] * (max(modelPlot)+1)
data_frommap_wl = [None] * (max(modelPlot)+1)
facex = [None] * (max(modelPlot)+1)
facey = [None] * (max(modelPlot)+1)
nearest_IDX = list(range(100))
nearest_IDX_NGG = list(range(100))
dsloc = list(range(100))
dsloc_NGG = list(range(100))
# Note, this uses the standard netcdf lib + xarray
for i in modelPlot:
dataPath = pl.Path(runLog['Run Location'][i],
'FlowFM', 'dflowfm', 'output', 'FlowFM_map.nc')
file_nc_map = dataPath.as_posix()
ds = xr.open_dataset(file_nc_map)
# Find nearest point
ds_x = ds['mesh2d_face_x']
ds_y = ds['mesh2d_face_y']
s = gp.GeoSeries(map(Point, zip(ds_x, ds_y)))
nearest_geoms = nearest_points(Point(gdf_wl_FFG.iloc[0,:].geometry), MultiPoint(s))
nearest_IDX[i] = np.where(s==nearest_geoms[1])
nearest_geoms_NGG = nearest_points(Point(gdf_wl_NGG1.iloc[0,:].geometry), MultiPoint(s))
nearest_IDX_NGG[i] = np.where(s==nearest_geoms_NGG[1])
dsloc[i] = ds['mesh2d_s1'].sel(mesh2d_nFaces=nearest_IDX[i][0])
dsloc_NGG[i] = ds['mesh2d_s1'].sel(mesh2d_nFaces=nearest_IDX_NGG[i][0])
#%% Convert D3D Timezone
modelPlot = [31]
df_wl_D3D = list(range(100))
df_wl_D3D_NGG = list(range(100))
d3dTzones = list(range(100))
d3dTzones[25] = 'EST'
d3dTzones[26] = 'EST'
d3dTzones[31] = 'EST'
for i in modelPlot:
df_wl_D3D[i] = dsloc[i].to_dataframe()
df_wl_D3D[i].index = df_wl_D3D[i].index.droplevel(-1)
df_wl_D3D[i].index = df_wl_D3D[i].index.tz_localize(
pytz.timezone(d3dTzones[i])).tz_convert(pytz.utc) #Convert to UTC
df_wl_D3D_NGG[i] = dsloc_NGG[i].to_dataframe()
df_wl_D3D_NGG[i].index = df_wl_D3D_NGG[i].index.droplevel(-1)
df_wl_D3D_NGG[i].index = df_wl_D3D_NGG[i].index.tz_localize(
pytz.timezone(d3dTzones[i])).tz_convert(pytz.utc) #Convert to UTC
#%% Plot water levels at nearest point
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(9, 5))
fig.patch.set_facecolor('white')
for week in range(0, 2):
weekOffset = 0
axes[week+weekOffset].plot(station_gdf.index, station_gdf['WSE_m'], label='EFDC Model')
axes[week+weekOffset].plot(gdf_wl_FFG.index+timedelta(hours=0),
gdf_wl_FFG['Water Surface Elevation (ft)']*0.3048,
label='Field Facility Gauge', color='k')
for i in modelPlot:
axes[week+weekOffset].plot(df_wl_D3D[i].index, df_wl_D3D[i].mesh2d_s1, label=runLog['Run'][i])
axes[week+weekOffset].set_xlim(datetime(2012, 12, 15, 0, 0, 0) + timedelta(days=week*7),
datetime(2012, 12, 15+7, 0, 0, 0) + timedelta(days=week*7))
axes[week+weekOffset].set_ylim(-1, 2.5)
axes[week+weekOffset].set_ylabel('Water Surface Elevation [m]')
axes[0].legend(loc='upper left')
axes[0].title.set_text('Field Facility Gauge')
plt.show()
fig.savefig('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/Figures/DecTS_FFG.png',
bbox_inches='tight')
#%% Plot water levels at National Grid
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(9, 5))
fig.patch.set_facecolor('white')
for week in range(0, 2):
weekOffset = 0
axes[week+weekOffset].plot(stationNGG_gdf.index, stationNGG_gdf['WSE_m'], label='EFDC Model')
axes[week+weekOffset].plot(gdf_wl_NGG1.index+timedelta(hours=0),
gdf_wl_NGG1['Water Surface Elevation (ft)']*0.3048,
label='National Grid Gauge', color='k')
for i in modelPlot:
axes[week+weekOffset].plot(df_wl_D3D_NGG[i].index, df_wl_D3D_NGG[i].mesh2d_s1,
label=runLog['Run'][i])
axes[week+weekOffset].set_xlim(datetime(2012, 12, 15, 0, 0, 0) + timedelta(days=week*7),
datetime(2012, 12, 15+7, 0, 0, 0) + timedelta(days=week*7))
axes[week+weekOffset].set_ylim(-1, 2.5)
axes[week+weekOffset].set_ylabel('Water Surface Elevation [m]')
axes[0].legend(loc='upper left')
axes[0].title.set_text('National Grid Gauge')
plt.show()
fig.savefig('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/Figures/DecTS_NGG.png',
bbox_inches='tight')
#%% Plot scatters
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))
fig.patch.set_facecolor('white')
interpTimes = pd.date_range("2012-06-02T00:00:00", "2012-10-01T00:00:00", freq="60min")
interpTimes = pd.date_range("2012-07-07T00:00:00", "2012-07-27T00:00:00", freq="60min")
for i in modelPlot:
axes.scatter(np.interp(interpTimes, gdf_wl_FFG.index,
gdf_wl_FFG['Water Surface Elevation (ft)']*0.3048),
np.interp(interpTimes, df_wl_D3D[i].index,
df_wl_D3D[i].mesh2d_s1),
label=runLog['Run'][i])
axes.scatter(np.interp(interpTimes, gdf_wl_FFG.index,
gdf_wl_FFG['Water Surface Elevation (ft)']*0.3048),
np.interp(interpTimes, station_gdf.index,
station_gdf['WSE_m']),
label='EFDC')
linepts = np.array([-1.0, 1.5])
plt.plot(linepts, linepts, color='k')
axes.set_xlim(-1.0, 1.5)
axes.set_ylim(-1.0, 1.5)
axes.set_aspect('equal')
axes.set_title('Field Facility Gauge')
axes.legend(loc='upper left')
axes.set_ylabel('Model Water Surface Elevation [m]')
axes.set_xlabel('Observed Water Surface Elevation [m]')
plt.show()
fig.savefig('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/Figures/FFG_ScatterUpdated.pdf',
bbox_inches='tight')
#%% Plot scatters NGG
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))
fig.patch.set_facecolor('white')
interpTimes = pd.date_range("2012-06-02T00:00:00", "2012-10-01T00:00:00", freq="60min")
interpTimes = pd.date_range("2012-07-07T00:00:00", "2012-07-27T00:00:00", freq="60min")
for i in modelPlot:
axes.scatter(np.interp(interpTimes, gdf_wl_NGG1.index,
gdf_wl_NGG1['Water Surface Elevation (ft)']*0.3048),
np.interp(interpTimes, df_wl_D3D_NGG[i].index,
df_wl_D3D_NGG[i].mesh2d_s1),
label=runLog['Run'][i])
axes.scatter(np.interp(interpTimes, gdf_wl_NGG1.index,
gdf_wl_NGG1['Water Surface Elevation (ft)']*0.3048),
np.interp(interpTimes, stationNGG_gdf.index,
stationNGG_gdf['WSE_m']),
label='EFDC')
linepts = np.array([-1.0, 1.5])
plt.plot(linepts, linepts, color='k')
axes.set_xlim(-1.0, 1.5)
axes.set_ylim(-1.0, 1.5)
axes.set_aspect('equal')
axes.set_title('National Grid Gauge')
axes.legend(loc='upper left')
axes.set_ylabel('Model Water Surface Elevation [m]')
axes.set_xlabel('Observed Water Surface Elevation [m]')
plt.show()
fig.savefig('C:/Users/arey/files/Projects/Newtown/RevisedEFDC/Figures/NGG_ScatterUpdated.pdf',
bbox_inches='tight')